diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..8b311a3fc --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,5 @@ +{ + "permissions": { + "allow": [] + } +} diff --git a/.dockerignore b/.dockerignore index 45c1def32..385a6449f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -37,8 +37,6 @@ build/ *.tgz # Backend -backend/assets/* -!backend/assets/test.wav backend/flower_db.sqlite uploads/ test/ @@ -60,4 +58,4 @@ assets/ .Spotlight-V100 .Trashes ehthumbs.db -Thumbs.db \ No newline at end of file +Thumbs.db diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..6313b56c5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.github/workflows/auto-build-data-process-dev.yml b/.github/workflows/auto-build-data-process-dev.yml index c9885170e..6be8bf638 100644 --- a/.github/workflows/auto-build-data-process-dev.yml +++ b/.github/workflows/auto-build-data-process-dev.yml @@ -7,14 +7,14 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'make/data_process/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' diff --git a/.github/workflows/auto-build-doc-dev.yml b/.github/workflows/auto-build-doc-dev.yml index 697aa0204..7c2cd46d7 100644 --- a/.github/workflows/auto-build-doc-dev.yml +++ b/.github/workflows/auto-build-doc-dev.yml @@ -7,12 +7,12 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'doc/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'doc/**' - '.github/workflows/**' diff --git a/.github/workflows/auto-build-main-dev.yml b/.github/workflows/auto-build-main-dev.yml index dbd69ac12..2815c50df 100644 --- a/.github/workflows/auto-build-main-dev.yml +++ b/.github/workflows/auto-build-main-dev.yml @@ -7,14 +7,14 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'make/main/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' diff --git a/.github/workflows/auto-build-mcp-dev.yml b/.github/workflows/auto-build-mcp-dev.yml index dacf04749..03aea08b2 100644 --- a/.github/workflows/auto-build-mcp-dev.yml +++ b/.github/workflows/auto-build-mcp-dev.yml @@ -7,14 +7,14 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'make/mcp/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' diff --git a/.github/workflows/auto-build-terminal-dev.yml b/.github/workflows/auto-build-terminal-dev.yml index fbc251edb..62fc20165 100644 --- a/.github/workflows/auto-build-terminal-dev.yml +++ b/.github/workflows/auto-build-terminal-dev.yml @@ -7,12 +7,12 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'make/terminal/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'make/terminal/**' - '.github/workflows/**' diff --git a/.github/workflows/auto-build-web-dev.yml b/.github/workflows/auto-build-web-dev.yml index 28f967894..a5abeb0b3 100644 --- a/.github/workflows/auto-build-web-dev.yml +++ b/.github/workflows/auto-build-web-dev.yml @@ -7,13 +7,13 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - 'make/web/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - 'make/web/**' diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml index 6addafa22..dace8dab6 100644 --- a/.github/workflows/auto-unit-test.yml +++ b/.github/workflows/auto-unit-test.yml @@ -12,14 +12,14 @@ on: required: false default: '["ubuntu-24.04-arm"]' pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'test/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' @@ -36,7 +36,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install uv run: pip install --upgrade uv @@ -68,26 +68,7 @@ jobs: echo "✅ All tests passed successfully." fi - # Detect architecture - - name: Detect architecture - id: arch - run: echo "arch=$(uname -m)" >> $GITHUB_OUTPUT - - # Use Python uploader on ARM - - name: Upload coverage to Codecov (Python uploader on ARM) - if: startsWith(steps.arch.outputs.arch, 'arm') || startsWith(steps.arch.outputs.arch, 'aarch64') - run: | - pip install --upgrade codecov - codecov \ - -t ${{ secrets.CODECOV_TOKEN }} \ - -f test/coverage.xml \ - -F unittests \ - -n codecov-umbrella \ - -v - - # Use official action on x86 - - name: Upload coverage to Codecov (Official Action on x86) - if: steps.arch.outputs.arch == 'x86_64' + - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: files: test/coverage.xml @@ -96,4 +77,3 @@ jobs: name: codecov-umbrella fail_ci_if_error: false verbose: true - directory: . diff --git a/.github/workflows/auto-web-check-dev.yml b/.github/workflows/auto-web-check-dev.yml index cd107b6e5..ae831a3fb 100644 --- a/.github/workflows/auto-web-check-dev.yml +++ b/.github/workflows/auto-web-check-dev.yml @@ -11,12 +11,12 @@ on: description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])' default: '["ubuntu-latest"]' pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - '.github/workflows/**' diff --git a/.github/workflows/build-offline-package.yml b/.github/workflows/build-offline-package.yml new file mode 100644 index 000000000..6619cf764 --- /dev/null +++ b/.github/workflows/build-offline-package.yml @@ -0,0 +1,105 @@ +name: Build Offline Deployment Package + +on: + workflow_dispatch: + inputs: + include_source: + description: 'Include source code in the package' + required: false + default: true + type: boolean + +jobs: + build-offline-package: + runs-on: ubuntu-latest + strategy: + matrix: + platform: [amd64, arm64] + + steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set version and platform variables + id: set-vars + run: | + PLATFORM="${{ matrix.platform }}" + REF_TYPE="${{ github.ref_type }}" + REF_NAME="${{ github.ref_name }}" + + if [ "$REF_TYPE" = "tag" ]; then + VERSION="$REF_NAME" + elif [ "$REF_TYPE" = "branch" ]; then + if [ "$REF_NAME" = "main" ]; then + VERSION="latest" + else + VERSION="${REF_NAME//\//-}" + fi + else + VERSION="latest" + fi + + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "platform=$PLATFORM" >> $GITHUB_OUTPUT + echo "package-name=nexent-offline-${PLATFORM}-${VERSION}" >> $GITHUB_OUTPUT + + - name: Build offline package + run: | + chmod +x scripts/offline/build_offline_package.sh + + ./scripts/offline/build_offline_package.sh \ + --version "${{ steps.set-vars.outputs.version }}" \ + --platform "${{ matrix.platform }}" \ + --output-dir ./offline-output \ + --include-source "${{ inputs.include_source }}" + + + + - name: Create ZIP package + run: | + PACKAGE_NAME="${{ steps.set-vars.outputs.package-name }}" + + cd offline-output + zip -r "../${PACKAGE_NAME}.zip" . + cd .. + + echo "Package created: ${PACKAGE_NAME}.zip" + + ls -lh "${PACKAGE_NAME}.zip" + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.set-vars.outputs.package-name }} + path: ${{ steps.set-vars.outputs.package-name }}.zip + retention-days: 30 + + - name: Summary + run: | + echo "" + echo "========================================" + echo "Offline Package Build Summary" + echo "========================================" + echo "Version: ${{ steps.set-vars.outputs.version }}" + echo "Platform: ${{ matrix.platform }}" + echo "Package: ${{ steps.set-vars.outputs.package-name }}.zip" + echo "Ref Type: ${{ github.ref_type }}" + echo "Ref Name: ${{ github.ref_name }}" + echo "========================================" + echo "" + echo "Package contents:" + unzip -l "${{ steps.set-vars.outputs.package-name }}.zip" | head -50 \ No newline at end of file diff --git a/.github/workflows/docker-build-push-mainland.yml b/.github/workflows/docker-build-push-mainland.yml index 1aa41b560..8c215c7ec 100644 --- a/.github/workflows/docker-build-push-mainland.yml +++ b/.github/workflows/docker-build-push-mainland.yml @@ -16,10 +16,15 @@ on: description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])' required: true default: '["ubuntu-latest"]' + push: + branches: + - main + tags: + - 'v*' jobs: build-and-push-main-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -32,20 +37,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push main image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag main image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 - name: Push latest main image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 build-and-push-main-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -58,20 +63,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push main image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag main image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64 - name: Push latest main image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64 build-and-push-data-process-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -93,20 +98,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push data process image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag data process image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 - name: Push latest data process image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 build-and-push-data-process-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -128,20 +133,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push data process image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag data process image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64 - name: Push latest data process image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64 build-and-push-web-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -154,20 +159,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push web image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag web image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 - name: Push latest web image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 build-and-push-web-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -180,20 +185,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push web image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag web image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64 - name: Push latest web image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64 build-and-push-terminal-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -206,20 +211,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 -f make/terminal/Dockerfile . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/terminal/Dockerfile . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push terminal image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag terminal image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 - name: Push latest terminal image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 build-and-push-terminal-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -232,20 +237,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 -f make/terminal/Dockerfile . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/terminal/Dockerfile . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push terminal image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag terminal image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64 - name: Push latest terminal image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64 build-and-push-mcp-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -258,20 +263,20 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push MCP image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag MCP image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 - name: Push latest MCP image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 build-and-push-mcp-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -284,16 +289,16 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push MCP image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag MCP image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64 - name: Push latest MCP image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64 manifest-push-main: @@ -305,13 +310,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for main (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for main (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 \ @@ -327,13 +333,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for data-process (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for data-process (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 \ @@ -349,13 +356,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for web (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for web (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 \ @@ -371,13 +379,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for terminal (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for terminal (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 \ @@ -393,13 +402,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for mcp (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for mcp (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 \ diff --git a/.github/workflows/docker-build-push-overseas.yml b/.github/workflows/docker-build-push-overseas.yml index d19c2600a..dcbe9d642 100644 --- a/.github/workflows/docker-build-push-overseas.yml +++ b/.github/workflows/docker-build-push-overseas.yml @@ -16,10 +16,15 @@ on: description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])' required: true default: '["ubuntu-latest"]' + push: + branches: + - main + tags: + - 'v*' jobs: build-and-push-main-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -32,20 +37,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent:${{ inputs.version }}-amd64 --load -f make/main/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/main/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push main image (amd64) to DockerHub - run: docker push nexent/nexent:${{ inputs.version }}-amd64 + run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag main image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent:${{ inputs.version }}-amd64 nexent/nexent:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent:amd64 - name: Push latest main image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent:amd64 build-and-push-main-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -58,20 +63,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent:${{ inputs.version }}-arm64 --load -f make/main/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/main/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push main image (arm64) to DockerHub - run: docker push nexent/nexent:${{ inputs.version }}-arm64 + run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag main image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent:${{ inputs.version }}-arm64 nexent/nexent:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent:arm64 - name: Push latest main image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent:arm64 build-and-push-data-process-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -93,20 +98,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ inputs.version }}-amd64 --load -f make/data_process/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/data_process/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push data process image (amd64) to DockerHub - run: docker push nexent/nexent-data-process:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag data process image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-data-process:${{ inputs.version }}-amd64 nexent/nexent-data-process:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-data-process:amd64 - name: Push latest data process image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-data-process:amd64 build-and-push-data-process-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -128,20 +133,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ inputs.version }}-arm64 --load -f make/data_process/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/data_process/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push data process image (arm64) to DockerHub - run: docker push nexent/nexent-data-process:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag data process image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-data-process:${{ inputs.version }}-arm64 nexent/nexent-data-process:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-data-process:arm64 - name: Push latest data process image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-data-process:arm64 build-and-push-web-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -154,20 +159,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ inputs.version }}-amd64 --load -f make/web/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/web/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push web image (amd64) to DockerHub - run: docker push nexent/nexent-web:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag web image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-web:${{ inputs.version }}-amd64 nexent/nexent-web:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-web:amd64 - name: Push latest web image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-web:amd64 build-and-push-web-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -180,20 +185,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ inputs.version }}-arm64 --load -f make/web/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/web/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push web image (arm64) to DockerHub - run: docker push nexent/nexent-web:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag web image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-web:${{ inputs.version }}-arm64 nexent/nexent-web:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-web:arm64 - name: Push latest web image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-web:arm64 build-and-push-terminal-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -206,20 +211,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 --load -f make/terminal/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/terminal/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push terminal image (amd64) to DockerHub - run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag terminal image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 nexent/nexent-ubuntu-terminal:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-ubuntu-terminal:amd64 - name: Push latest terminal image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-ubuntu-terminal:amd64 build-and-push-terminal-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -232,20 +237,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 --load -f make/terminal/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/terminal/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push terminal image (arm64) to DockerHub - run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag terminal image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 nexent/nexent-ubuntu-terminal:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-ubuntu-terminal:arm64 - name: Push latest terminal image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-ubuntu-terminal:arm64 build-and-push-mcp-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -258,20 +263,20 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ inputs.version }}-amd64 --load -f make/mcp/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/mcp/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push MCP image (amd64) to DockerHub - run: docker push nexent/nexent-mcp:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag MCP image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-mcp:${{ inputs.version }}-amd64 nexent/nexent-mcp:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-mcp:amd64 - name: Push latest MCP image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-mcp:amd64 build-and-push-mcp-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -284,16 +289,16 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ inputs.version }}-arm64 --load -f make/mcp/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/mcp/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push MCP image (arm64) to DockerHub - run: docker push nexent/nexent-mcp:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag MCP image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-mcp:${{ inputs.version }}-arm64 nexent/nexent-mcp:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-mcp:arm64 - name: Push latest MCP image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-mcp:arm64 manifest-push-main: @@ -305,13 +310,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for main (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent:${{ inputs.version }} \ - nexent/nexent:${{ inputs.version }}-amd64 \ - nexent/nexent:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent:${{ inputs.version }} + docker manifest create nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for main (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent:latest \ nexent/nexent:amd64 \ @@ -327,13 +333,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for data-process (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-data-process:${{ inputs.version }} \ - nexent/nexent-data-process:${{ inputs.version }}-amd64 \ - nexent/nexent-data-process:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-data-process:${{ inputs.version }} + docker manifest create nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for data-process (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-data-process:latest \ nexent/nexent-data-process:amd64 \ @@ -349,13 +356,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for web (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-web:${{ inputs.version }} \ - nexent/nexent-web:${{ inputs.version }}-amd64 \ - nexent/nexent-web:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-web:${{ inputs.version }} + docker manifest create nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for web (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-web:latest \ nexent/nexent-web:amd64 \ @@ -371,13 +379,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for terminal (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-ubuntu-terminal:${{ inputs.version }} \ - nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \ - nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-ubuntu-terminal:${{ inputs.version }} + docker manifest create nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for terminal (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-ubuntu-terminal:latest \ nexent/nexent-ubuntu-terminal:amd64 \ @@ -393,13 +402,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for mcp (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-mcp:${{ inputs.version }} \ - nexent/nexent-mcp:${{ inputs.version }}-amd64 \ - nexent/nexent-mcp:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-mcp:${{ inputs.version }} + docker manifest create nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for mcp (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-mcp:latest \ nexent/nexent-mcp:amd64 \ diff --git a/.github/workflows/docker-deploy.yml b/.github/workflows/docker-deploy.yml index 9d04c8913..a77c2491f 100644 --- a/.github/workflows/docker-deploy.yml +++ b/.github/workflows/docker-deploy.yml @@ -38,7 +38,10 @@ jobs: - name: Check if model is cached locally id: check-model run: | - if [ -f ~/model-assets/clip-vit-base-patch32/config.json ] && [ -d ~/model-assets/nltk_data ]; then + if [ -f ~/model-assets/clip-vit-base-patch32/config.json ] && \ + [ -d ~/model-assets/nltk_data ] && \ + [ -d ~/model-assets/table-transformer-structure-recognition ] && \ + [ -d ~/model-assets/yolox ]; then echo "cache-hit=true" >> "$GITHUB_OUTPUT" cp -r ~/model-assets ./ else @@ -105,4 +108,4 @@ jobs: ./deploy.sh --mode 3 --is-mainland N --enable-terminal N --version 2 --root-dir "$HOME/nexent-production-data" else ./deploy.sh --mode 1 --is-mainland N --enable-terminal N --version 2 --root-dir "$HOME/nexent-development-data" - fi \ No newline at end of file + fi diff --git a/.github/workflows/sdk_publish.yml b/.github/workflows/sdk_publish.yml index 1e5759277..3cc413381 100644 --- a/.github/workflows/sdk_publish.yml +++ b/.github/workflows/sdk_publish.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install build dependencies run: | diff --git a/.gitignore b/.gitignore index 702982568..e0bac2b47 100644 --- a/.gitignore +++ b/.gitignore @@ -19,9 +19,16 @@ docker/uploads docker/openssh-server docker/volumes/db/data docker/.env +docker/monitoring/monitoring.env docker/.run docker/deploy.options -k8s/helm/.deploy.options +k8s/helm/deploy.options +scripts/deployment/local-config.yaml +scripts/deployment/generated/ +docker/.env.generated +docker/docker-compose.generated.yml +k8s/helm/nexent/generated-values.yaml +k8s/helm/nexent/generated-secrets-values.yaml frontend_standalone/ .pnpm-store/ @@ -34,3 +41,29 @@ model-assets/ *.pytest_cache *.coverage *coverage.xml + +# Log files +*.log + +.sisyphus/ +.opencode/ +openspec/ +logs/ + +.agents/ +.devspace/ +devspace.yaml +k8s/helm/**/*.tgz +k8s/helm/nexent/Chart.lock + +MAC_DEVELOPMENT_GUIDE.md +data/ +sdk/benchmark/.env +/docker/.env.bak + +.venv + +.pytest-tmp +doc/mermaid + +.claude/skills/python-import-triage \ No newline at end of file diff --git a/README.md b/README.md index 894cd1862..7983e6c6c 100644 --- a/README.md +++ b/README.md @@ -11,111 +11,111 @@ Nexent is a zero-code platform for auto-generating production-grade AI agents, b > One prompt. Endless reach. -### 🌐 Visit our [official website](https://nexent.tech/) + -![Nexent Banner](./assets/architecture_en.png) +# 🚀 Get Started Now -https://github.com/user-attachments/assets/db6b7f5a-9ee8-4327-ae6f-c5af896126b4 +> ⭐ Before you get started, please star us on [GitHub](https://github.com/ModelEngine-Group/nexent) — your support drives us forward! -# ⚡ Have a try first +## Option 1: Try Our Official Demo -### 📋 Prerequisites +No installation required — jump right in with our **[online demo environment](http://60.204.251.153:3000/en)** to experience Nexent's capabilities instantly. -| Resource | Minimum | -|----------|---------| -| **CPU** | 2 cores | -| **RAM** | 6 GiB | -| **Software** | Docker & Docker Compose installed | +## Option 2: Deploy on Your Own -### 🛠️ Quick start with Docker Compose +If you need to run Nexent locally or in your private infrastructure, we offer two deployment options: -```bash -git clone https://github.com/ModelEngine-Group/nexent.git -cd nexent/docker -cp .env.example .env # fill only necessary configs -bash deploy.sh -``` +### System Requirements -When the containers are running, open **http://localhost:3000** in your browser and follow the setup wizard. - -# 🤝 Join Our Community - -> *If you want to go fast, go alone; if you want to go far, go together.* +| Resource | Docker | Kubernetes | +|----------|--------|-------------| +| **CPU** | 4 cores (min) / 8 cores (rec.) | 4 cores (min) / 8 cores (rec.) | +| **Memory** | 8 GiB (min) / 16 GiB (rec.) | 16 GiB (min) / 64 GiB (rec.) | +| **Disk** | 40 GiB (min) / 100 GiB (rec.) | 100 GiB (min) / 200 GiB (rec.) | +| **Architecture** | x86_64 / ARM64 | x86_64 / ARM64 | +| **Software** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ | -We have released **Nexent v1**, and the platform is now relatively stable. However, there may still be some bugs, and we are continuously improving and adding new features. Stay tuned: we will announce **v2.0** soon! +> **Note:** Recommended configurations ensure optimal performance in production environments. -* **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features. -* **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab. -* **🐛 Check our [Known Issues page](https://github.com/orgs/ModelEngine-Group/projects/9)** for the latest issue status and solutions. - -> *Rome wasn't built in a day.* - -If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us. - -Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life. - -Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing. +### Docker Deployment (Recommended for Individuals/Small Teams) -## 💬 Community & contact +Quick and straightforward for most users. Prerequisites: Docker 24+ and Docker Compose v2+: -- Browse the [Documentation](https://modelengine-group.github.io/nexent) for more information. -- Join our [Discord community](https://discord.gg/tb5H3S3wyv) to chat with other developers and get help! -- Conntact us by Wechat, find our QR Code in our [website](https://nexent.tech/en/contact) - -# ✨ Key Features - -`1` **Smart agent prompt generation** - Turn plain language into runnable prompts. Nexent automatically chooses the right tools and plans the best action path for every request. +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/docker +bash deploy.sh +``` - ![Feature 1](./assets/Feature1.png) +The Docker and Kubernetes deploy scripts share the same deployment configuration model. Interactive runs show Bash TUI menus for component selection, port policy, and image source. `infrastructure` is required; `application` is selected by default but can be disabled. Use `b`/Backspace to return to the previous TUI step and `q` to quit. Non-interactive runs can pass the same choices with `--components`, `--port-policy development|production`, and `--image-source general|mainland|local-latest`. Successful deployments save non-sensitive choices to each deploy directory's `deploy.options` for reuse on the next run. -`2` **Scalable data process engine** - Process 20+ data formats with fast OCR and table structure extraction, scaling smoothly from a single process to large-batch pipelines. +Docker uninstall is handled by `bash uninstall.sh`. It can preserve or delete data volumes: run it interactively, pass `--delete-volumes true|false`, or use `bash uninstall.sh delete-all` to remove containers and persistent data. - ![Feature 2](./assets/Feature2.png) +For detailed deployment instructions, see [Docker Installation](https://modelengine-group.github.io/nexent/en/quick-start/installation.html). -`3` **Personal-grade knowledge base** - Import files in real time, auto-summarise them, and let agents access both personal and global knowledge instantly, also knowing what it can get from each knowledge base. +### Kubernetes Deployment (For Enterprise Production) - ![Feature 3](./assets/Feature3.png) +Ideal for enterprise scenarios requiring high availability and elastic scaling. Prerequisites: Kubernetes 1.24+ and Helm 3+: -`4` **Internet knowledge search** - Connect to 5+ web search providers so agents can mix fresh internet facts with your private data. +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/k8s/helm +./deploy.sh +``` - ![Feature 4](./assets/Feature4.png) +Kubernetes uninstall is handled by `bash uninstall.sh`. It removes the Helm release first, then can optionally delete the namespace and local hostPath data. Use `--delete-namespace true|false`, `--delete-local-data true|false`, or `bash uninstall.sh delete-all`; pass `--keep-local-data` with `delete-all` to preserve local volume contents. -`5` **Knowledge-level traceability** - Serve answers with precise citations from web and knowledge-base sources, making every fact verifiable. +For detailed deployment instructions, see [Kubernetes Installation](https://modelengine-group.github.io/nexent/en/quick-start/kubernetes-installation.html). - ![Feature 5](./assets/Feature5.png) +# ✨ Core Features -`6` **Multimodal understanding & dialogue** - Speak, type, files, or show images. Nexent understands voice, text, and pictures, and can even generate new images on demand. +Nexent provides a comprehensive feature set for building powerful AI agents: - ![Feature 6](./assets/Feature6.png) +| Feature | Description | +|---------|-------------| +| **⚙️ Multi-Model Integration** | OpenAI-compatible with any provider, full LLM/Embedding/VLM/STT/TTS coverage, supports domestic model switching | +| **🤖 Zero-Code Agent Generation** | Describe requirements in natural language, generate executable agents instantly, what you think is what you get | +| **🤝 A2A Agent Collaboration** | Agent-to-Agent protocol enables seamless multi-agent cooperation and distributed workflows | +| **🧠 Layered Memory Mechanism** | Two-tier memory (user-level + user-agent-level) for persistent context across conversations | +| **📝 Progressive Skill Disclosure** | Dynamically loads Skill into context, maximizing context window efficiency | +| **🗄️ Personal-Grade Knowledge Base** | Real-time import and intelligent retrieval for 20+ document formats, auto summaries, fine-grained access control | +| **🔧 MCP Tool Ecosystem** | Plug-and-play extension system with custom development and third-party MCP service support | +| **🌐 Internet Knowledge Integration** | Multi-source search blending real-time information with private data | +| **🔍 Knowledge-Level Traceability** | Precise citations and source verification, full transparency for every fact | +| **🎭 Multimodal Interaction** | Voice, text, images, files — comprehensive natural dialogue | +| **🔢 Agent Version Management** | Version iteration and history rollback, safe and controllable | +| **🏪 Agent Marketplace** | Official and community curated agents, one-click install and use | +| **👥 Multi-Tenancy & RBAC** | Multi-tenant isolation, role-based access control, fine-grained resource management | -`7` **MCP tool ecosystem** - Drop in or build Python plug-ins that follow the MCP spec; swap models, tools, and chains without touching core code. +# 🤝 Join Our Community - ![Feature 7](./assets/Feature7.png) +> *If you want to go fast, go alone; if you want to go far, go together.* -# 🌱 MCP Tool Ecosystem +We have released **Nexent v2.0**! A comprehensive upgrade from v1.0, featuring A2A protocol support, progressive Skill disclosure, layered memory mechanism, user management with multi-tenancy, agent version management, agent marketplace, and more. -Check our [MCP Ecosystem page](https://modelengine-group.github.io/nexent/en/mcp-ecosystem/overview.html) for detailed information about the MCP tool ecosystem, including community hubs, recommended tools, and integration guides. +- **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features. +- **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab. -# 🛠️ Developer Guide +> *Rome wasn't built in a day.* -### 🤖 Model Configuration & Provider Recommendations +If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us. -Check our [Model Providers page](https://modelengine-group.github.io/nexent/en/getting-started/model-providers.html) for detailed model configuration guides and recommended provider information. +Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life. -### 🔧 Hack on Nexent +Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing. -Want to build from source or add new features? Check the [Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing) for step-by-step instructions. +# 📖 What's Next -### 🛠️ Build from Source +Ready to dive deeper? Here are the main documentation entry points: -Prefer to run Nexent from source code? Follow our [Developer Guide](https://modelengine-group.github.io/nexent/en/getting-started/development-guide) for detailed setup instructions and customization options. +- **[Quick Start](https://modelengine-group.github.io/nexent/en/quick-start/installation.html)** — System requirements and deployment guide +- **[Core Features](https://modelengine-group.github.io/nexent/en/getting-started/features.html)** — Comprehensive feature documentation +- **[User Guide](https://modelengine-group.github.io/nexent/en/user-guide/home-page.html)** — Agent development and usage +- **[Developer Guide](https://modelengine-group.github.io/nexent/en/developer-guide/overview)** — Build from source and customization +- **[FAQ](https://modelengine-group.github.io/nexent/en/quick-start/faq.html)** — Common questions and troubleshooting # 📄 License diff --git a/README_CN.md b/README_CN.md index c16de5d32..032776418 100644 --- a/README_CN.md +++ b/README_CN.md @@ -11,111 +11,104 @@ Nexent 是一个基于 **Harness Engineering** 原则打造的零代码智能体 > 一个提示词,无限种可能。 -### 🌐 访问我们的[官方网站](https://nexent.tech/) + -![Nexent Banner](./assets/architecture_zh.png) +# 🚀 先来试试看 -https://github.com/user-attachments/assets/b844e05d-5277-4509-9463-1c5b3516f11e +> ⭐ 在您开始使用前,请您顺手在 [GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点个 Star,您的支持是我们前进的动力! -# ⚡ 先来试试看 +## 方式一:使用官方体验环境 -### 📋 系统要求 +无需安装,直接访问我们的 **[在线体验环境](http://60.204.251.153:3000/zh)**,快速体验 Nexent 的强大功能。 -| 资源 | 最低要求 | -|----------|---------| -| **CPU** | 2 核 | -| **内存** | 6 GiB | -| **软件** | 已安装 Docker 和 Docker Compose | +## 方式二:自行部署 -### 🛠️ 使用 Docker Compose 快速开始 +如果需要在本地或私有环境中部署 Nexent,我们提供两种部署方式: -```bash -git clone https://github.com/ModelEngine-Group/nexent.git -cd nexent/docker -cp .env.example .env # fill only necessary configs -bash deploy.sh -``` - -当容器运行后,在浏览器中打开 **http://localhost:3000** 并按照设置向导操作。 - -# 🤝 加入我们的社区 - -> *If you want to go fast, go alone; if you want to go far, go together.* - -我们已经发布了 **Nexent v1**,平台现在相对稳定。但是,可能仍然存在一些 bug,我们正在持续改进并添加新功能。敬请期待:我们很快将宣布 **v2.0**! - -* **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。 -* **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。 -* **🐛 查看我们的[已知问题页面](https://github.com/orgs/ModelEngine-Group/projects/9)** 了解最新的问题状态和解决方案。 - -> *Rome wasn't built in a day.* - -如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。 - -早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。 +### 系统要求 -最重要的是,我们需要关注度。请为仓库点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。 +| 资源 | Docker 部署 | Kubernetes 部署 | +|------|------------|----------------| +| **CPU** | 4 核(最低)/ 8 核(推荐) | 4 核(最低)/ 8 核(推荐) | +| **内存** | 8 GiB(最低)/ 16 GiB(推荐) | 16 GiB(最低)/ 64 GiB(推荐) | +| **磁盘** | 40 GiB(最低)/ 100 GiB(推荐) | 100 GiB(最低)/ 200 GiB(推荐) | +| **架构** | x86_64 / ARM64 | x86_64 / ARM64 | +| **软件** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ | -## 💬 社区与联系方式 +> **注意:** 推荐配置可确保生产环境下的最佳性能。 -- 浏览 [文档](https://modelengine-group.github.io/nexent) 了解更多信息。 -- 加入我们的 [Discord 社区](https://discord.gg/tb5H3S3wyv) 与其他开发者交流并获取帮助! -- 通过微信联系我们,在我们的[网站](https://nexent.tech/zh/contact)找到二维码 +### Docker 部署(推荐个人/小团队使用) -# ✨ 主要特性 +适用于大多数用户,快速简单。部署前需准备Docker 24+, Docker Compose v2+: -`1` **智能体提示词自动生成** - 将自然语言转化为可被Agent执行的提示词。Nexent可以根据你的需要自动选择正确的工具并为每个请求规划最佳执行路径。 - - ![Feature 1](./assets/Feature1.png) - -`2` **可扩展数据处理引擎** - 支持 20+ 数据格式的快速 OCR 和表格结构提取,从单进程到大规模批处理管道都能平滑扩展。 - - ![Feature 2](./assets/Feature2.png) - -`3` **个人级知识库** - 实时导入文件,自动总结,让智能体能够即时访问个人和全局知识,并了解每个知识库能提供什么。 +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/docker +cp .env.example .env +bash deploy.sh +``` - ![Feature 3](./assets/Feature3.png) +详细部署指南请参考 [Docker 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)。 -`4` **互联网知识搜索** - 连接 5+ 个网络搜索提供商,让智能体能够将最新的互联网信息与您的私有数据结合。 +### Kubernetes 部署(适合企业级生产环境) - ![Feature 4](./assets/Feature4.png) +适用于需要高可用、弹性扩展的企业场景。部署前需准备 Kubernetes 集群(1.24+)和 Helm 3+: -`5` **知识级可追溯性** - 提供来自网络和知识库来源的精确引用,使每个事实都可验证。 +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/k8s/helm +./deploy-helm.sh apply +``` - ![Feature 5](./assets/Feature5.png) +详细部署指南请参考 [Kubernetes 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/kubernetes-installation.html)。 -`6` **多模态理解与对话** - 说话、打字、文件或展示图片。Nexent 理解语音、文本和图片,甚至可以根据需求生成新图像。 +# ✨ 核心特性 - ![Feature 6](./assets/Feature6.png) +Nexent 为构建强大的 AI 智能体提供全面的功能集: -`7` **MCP 工具生态系统** - 插入或构建符合 MCP 规范的 Python 插件;无需修改核心代码即可更换模型、工具和链。 +| 特性 | 描述 | +|------|------| +| **⚙️ 多模型集成** | OpenAI 兼容任意提供商,LLM/Embedding/VLM/STT/TTS 全覆盖,支持灵活切换 | +| **🤖 零代码智能体生成** | 纯自然语言描述需求,一键生成可执行智能体,所想即所得 | +| **🤝 A2A 智能体协作** | Agent-to-Agent 协议支持多智能体无缝协作,构建分布式工作流 | +| **🧠 分层记忆机制** | 两层记忆体系(用户级+用户-智能体级),跨对话持续积累上下文 | +| **📝 Skill 渐进式披露** | 动态加载 Skill 内容至上下文,高效利用上下文窗口 | +| **🗄️ 个人级知识库** | 20+ 文档格式实时导入与智能检索,自动摘要,细粒度权限控制 | +| **🔧 MCP 工具生态** | 即插即用的扩展工具体系,支持自定义开发和第三方 MCP 服务 | +| **🌐 互联网知识集成** | 多搜索源混合,实时信息与私有数据融合 | +| **🔍 知识级溯源** | 精确引用与来源验证,每个事实透明可查 | +| **🎭 多模态交互** | 语音、文字、图像、文件,全方位自然对话 | +| **🔢 智能体版本管理** | 版本迭代与历史回溯,安全可控 | +| **🏪 智能体市场** | 官方与社区优质智能体一键安装即用 | +| **👥 分权分域管理** | 多租户隔离,RBAC 权限体系,资源级精细管控 | - ![Feature 7](./assets/Feature7.png) +# 🤝 加入我们的社区 -# 🌱 MCP 工具生态 +> *If you want to go fast, go alone; if you want to go far, go together.* -查看我们的[MCP 生态系统页面](https://modelengine-group.github.io/nexent/zh/mcp-ecosystem/overview.html)了解 MCP 工具生态系统的详细信息,包括社区中心、推荐工具和集成指南。 +- **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。 +- **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。 -# 🛠️ 开发者指南 +> *Rome wasn't built in a day.* -### 🤖 模型配置与模型提供商推荐 +如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。 -查看我们的[模型提供商页面](https://modelengine-group.github.io/nexent/zh/getting-started/model-providers.html)了解详细的模型配置指南和推荐的提供商信息。 +早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。 -### 🔧 开发 Nexent +最重要的是,我们需要关注度。请 [前往 GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。 -想要从源代码构建或添加新功能?查看 [贡献指南](https://modelengine-group.github.io/nexent/zh/contributing) 获取分步说明。 +# 📖 下一步 -### 🛠️ 从源码构建 +准备好深入了解了吗?以下是主要文档入口: -想要从源码运行 Nexent?查看我们的[开发者指南](https://modelengine-group.github.io/nexent/zh/getting-started/development-guide)获取详细的设置说明和自定义选项。 +- **[快速开始](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)** — 系统要求和部署指南 +- **[核心特性详解](https://modelengine-group.github.io/nexent/zh/getting-started/features.html)** — 完整的功能说明 +- **[用户指南](https://modelengine-group.github.io/nexent/zh/user-guide/home-page.html)** — 智能体开发与使用 +- **[开发者指南](https://modelengine-group.github.io/nexent/zh/developer-guide/overview)** — 从源码构建和自定义 +- **[常见问题](https://modelengine-group.github.io/nexent/zh/quick-start/faq.html)** — 常见问题和故障排除 # 📄 许可证 diff --git a/backend/adapters/__init__.py b/backend/adapters/__init__.py new file mode 100644 index 000000000..ed46fc888 --- /dev/null +++ b/backend/adapters/__init__.py @@ -0,0 +1,13 @@ +from adapters.exception import JiuwenSDKError, JiuwenSDKUnavailableError, NexentCapabilityError + +try: + from adapters.jiuwen_sdk_adapter import JiuwenSDKAdapter +except ModuleNotFoundError: + JiuwenSDKAdapter = None # type: ignore[assignment, misc] + +__all__ = [ + "JiuwenSDKError", + "JiuwenSDKUnavailableError", + "NexentCapabilityError", + "JiuwenSDKAdapter", +] diff --git a/backend/adapters/exception.py b/backend/adapters/exception.py new file mode 100644 index 000000000..63812d3af --- /dev/null +++ b/backend/adapters/exception.py @@ -0,0 +1,13 @@ +class JiuwenSDKError(Exception): + """Jiuwen SDK 调用失败的通用异常""" + pass + + +class JiuwenSDKUnavailableError(JiuwenSDKError): + """Jiuwen SDK 不可用(依赖缺失或未启用)""" + pass + + +class NexentCapabilityError(Exception): + """nexent 原生模式不支持该能力""" + pass diff --git a/backend/adapters/jiuwen_sdk_adapter.py b/backend/adapters/jiuwen_sdk_adapter.py new file mode 100644 index 000000000..f62ce9d06 --- /dev/null +++ b/backend/adapters/jiuwen_sdk_adapter.py @@ -0,0 +1,514 @@ +""" +openjiuwen SDK adapter for Nexent. + +This module must be imported lazily (not at module load time) because +openjiuwen 0.1.13 has circular import bugs in its __init__.py files that +prevent the SDK from loading unless we bypass them. + +Import flow: + backend/adapters/__init__.py -> try/except -> JiuwenSDKAdapter = None + -> when needed: _install_jiuwen_bypasser() -> openjiuwen imports work +""" +import asyncio +import importlib.abc +import importlib.machinery +import json +import logging +import os +import sys +import types +from typing import Any, List, Literal, Optional + +logger = logging.getLogger("jiuwen_adapter") + +from adapters.exception import JiuwenSDKError + + +# ---------------------------------------------------------------------- +# Circular import bypasser for openjiuwen 0.1.13 +# +# openjiuwen has broken __init__.py files that create circular import chains: +# tune/__init__.py -> tune.optimizer -> core.operator -> agent_evolving -> ... +# This bypasser prevents those __init__.py files from executing while still +# allowing regular .py submodule files to load normally. +# ---------------------------------------------------------------------- +_CIRCULAR_CHAIN = { + "openjiuwen.agent_evolving", + "openjiuwen.agent_evolving.trainer", + "openjiuwen.agent_evolving.trainer.trainer", + "openjiuwen.agent_evolving.trainer.progress", + "openjiuwen.core", + "openjiuwen.dev_tools", + "openjiuwen.dev_tools.tune", + "openjiuwen.dev_tools.tune.optimizer", + "openjiuwen.dev_tools.tune.optimizer.instruction_optimizer", + "openjiuwen.dev_tools.prompt_builder", + "openjiuwen.dev_tools.prompt_builder.builder", +} + + +class _JiuwenInitBypasser(importlib.abc.MetaPathFinder, importlib.abc.Loader): + """ + Meta path finder that intercepts __init__.py loading within openjiuwen, + blocking only the packages in the circular import chain while letting + all other modules (including base.py files) load normally. + """ + + def find_spec(self, fullname: str, path: Any, target: Any = None) -> Any: + if not fullname.startswith("openjiuwen") or fullname == "openjiuwen": + return None + + try: + import openjiuwen as _oj + + pkg_root = _oj.__path__[0] + except ImportError: + return None + + parts = fullname.split(".")[1:] + file_path = pkg_root + for p in parts: + file_path = os.path.join(file_path, p) + + is_package = os.path.isdir(file_path) + if not is_package: + return None + + init_path = os.path.join(file_path, "__init__.py") + if not os.path.exists(init_path): + return None + + if fullname not in _CIRCULAR_CHAIN: + return None + + spec = importlib.machinery.ModuleSpec( + fullname, self, is_package=True, origin="" + ) + spec.submodule_search_locations = [file_path] + return spec + + def create_module(self, module: Any) -> None: + return None + + def exec_module(self, module: Any) -> None: + import openjiuwen as _oj + + pkg_root = _oj.__path__[0] + parts = module.__name__.split(".")[1:] + file_path = pkg_root + for p in parts: + file_path = os.path.join(file_path, p) + module.__path__ = [file_path] + module.__file__ = os.path.join(file_path, "__init__.py") + + def __getattr__(self, name: str) -> Any: + """Handle special attributes like find_distributions to prevent recursion.""" + import openjiuwen as _oj + import importlib + + # Prevent recursion when Python scans sys.meta_path for find_distributions etc. + if name in ( + "find_distributions", + "find_module", + "__path__", + "__name__", + "__file__", + "__loader__", + "__package__", + "__spec__", + ): + raise AttributeError(name) + + pkg_root = _oj.__path__[0] + parts = self.__name__.split(".")[1:] + [name] + file_path = pkg_root + for p in parts: + file_path = os.path.join(file_path, p) + + # If it's a package directory, import it as a submodule + if os.path.isdir(file_path) and os.path.exists(os.path.join(file_path, "__init__.py")): + return importlib.import_module(f"{self.__name__}.{name}") + # If it's a regular .py file + if os.path.exists(file_path + ".py"): + return importlib.import_module(f"{self.__name__}.{name}") + raise AttributeError(name) + + +_bypasser_installed = False + + +def _install_jiuwen_bypasser() -> bool: + """ + Install the circular import bypasser for openjiuwen. + Returns True if installed, False if already installed or openjiuwen not available. + """ + global _bypasser_installed + if _bypasser_installed: + return True + + # Stub missing optional dependencies before openjiuwen import chain reaches them + _stubbed = [ + ("pymilvus", {"is_successful": lambda *args, **kwargs: True}), + ("dashscope", {}), + ("pdfplumber", {}), + ] + for _name, _attrs in _stubbed: + if _name not in sys.modules: + _mod = types.ModuleType(_name) + for _k, _v in _attrs.items(): + setattr(_mod, _k, _v) + sys.modules[_name] = _mod + _mod.__path__ = [] + + # Pre-create nested stub modules for pymilvus.client.utils chain + if "pymilvus.client" not in sys.modules: + _client_mod = types.ModuleType("pymilvus.client") + _client_mod.__path__ = [] + sys.modules["pymilvus.client"] = _client_mod + if "pymilvus.client.utils" not in sys.modules: + _utils_mod = types.ModuleType("pymilvus.client.utils") + _utils_mod.is_successful = lambda *args, **kwargs: True + sys.modules["pymilvus.client.utils"] = _utils_mod + + # Stub dashscope sub-modules that may be imported lazily + _dashscope_subs = [ + ("dashscope.api_entities", {}), + ("dashscope.api_entities.data", {}), + ("dashscope.api_entities.dashscope_response", {"DashScopeAPIResponse": object}), + ("dashscope.common", {"REQUEST_TIMEOUT_KEYWORD": "timeout"}), + ("dashscope.common.constants", {"REQUEST_TIMEOUT_KEYWORD": "timeout"}), + ] + for _name, _attrs in _dashscope_subs: + if _name not in sys.modules: + _m = types.ModuleType(_name) + _m.__path__ = [] + for _k, _v in _attrs.items(): + setattr(_m, _k, _v) + sys.modules[_name] = _m + + try: + import openjiuwen # noqa: F401 + except ImportError: + return False + + for finder in sys.meta_path: + if isinstance(finder, _JiuwenInitBypasser): + _bypasser_installed = True + return True + + sys.meta_path.insert(0, _JiuwenInitBypasser()) + _bypasser_installed = True + return True + + +# ---------------------------------------------------------------------- +# Language helpers +# ---------------------------------------------------------------------- +LANGUAGE_MAP = {"zh": "zh-CN", "en": "en-US"} + + +def normalize_language(language: str) -> str: + return LANGUAGE_MAP.get(language, "zh-CN") + + +def run_async(coro): + """ + Safely run async coroutine from sync context (FastAPI or Celery). + Handles existing event loops properly. + """ + try: + loop = asyncio.get_running_loop() + except RuntimeError: + return asyncio.run(coro) + + if loop.is_running(): + try: + import nest_asyncio + nest_asyncio.apply() + return loop.run_until_complete(coro) + except ImportError: + import concurrent.futures + + def run_in_thread(): + new_loop = asyncio.new_event_loop() + asyncio.set_event_loop(new_loop) + try: + return new_loop.run_until_complete(coro) + finally: + new_loop.close() + + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(run_in_thread) + return future.result() + + return loop.run_until_complete(coro) + + +# ---------------------------------------------------------------------- +# Jiuwen SDK lazy import helpers +# ---------------------------------------------------------------------- +def _lazy_import_jiuwen_config(): + """Lazily import only lightweight Jiuwen config classes.""" + _install_jiuwen_bypasser() + + try: + import openjiuwen # noqa: F401 + except ImportError as e: + raise JiuwenSDKError(f"Jiuwen SDK 未安装: {e}") from e + + from openjiuwen.core.foundation.llm.schema.config import ( + ModelRequestConfig, + ModelClientConfig, + ProviderType, + ) + + return ModelRequestConfig, ModelClientConfig, ProviderType + + +def build_jiuwen_model_configs(model_id: int, tenant_id: str): + """将 nexent 模型配置转换为 Jiuwen 配置对象""" + from database.model_management_db import get_model_by_model_id + from utils.config_utils import get_model_name_from_config + + ModelRequestConfig, ModelClientConfig, ProviderType = _lazy_import_jiuwen_config() + + model_config = get_model_by_model_id(model_id, tenant_id) + if not model_config: + raise JiuwenSDKError(f"model_id={model_id} not found") + + api_base = (model_config.get("base_url", "") or "").strip() + if not api_base: + api_base = "https://api.openai.com/v1" + + # Jiuwen ModelClientConfig defaults to timeout=60.0, max_retries=3. + # For prompt optimization calls, 60s can be too small. Reuse Nexent model config timeout_seconds. + timeout_seconds = model_config.get("timeout_seconds") + if timeout_seconds is None: + timeout_seconds = 120 + + ssl_cert = model_config.get("ssl_cert") or None + ssl_verify = model_config.get("ssl_verify", True) + if ssl_verify and not ssl_cert: + ssl_verify = False + + client_config = ModelClientConfig( + client_provider=ProviderType.OpenAI, + api_key=model_config["api_key"], + api_base=api_base, + timeout=float(timeout_seconds), + verify_ssl=ssl_verify, + ssl_cert=ssl_cert, + ) + + request_config = ModelRequestConfig( + model_name=get_model_name_from_config(model_config), + temperature=0.3, + ) + return request_config, client_config + + +def _lazy_import_jiuwen_builders(): + """Lazily import prompt builders only when optimization paths need them.""" + _install_jiuwen_bypasser() + + try: + import openjiuwen # noqa: F401 + except ImportError as e: + raise JiuwenSDKError(f"Jiuwen SDK 未安装: {e}") from e + + from openjiuwen.dev_tools.prompt_builder.builder.feedback_prompt_builder import ( + FeedbackPromptBuilder, + ) + from openjiuwen.dev_tools.prompt_builder.builder.badcase_prompt_builder import ( + BadCasePromptBuilder, + ) + + return FeedbackPromptBuilder, BadCasePromptBuilder + + +def _unwrap_prompt_response(text: str) -> str: + """Strip JSON wrapper or markdown fence that Jiuwen LLM sometimes generates.""" + _logger = logging.getLogger("jiuwen_adapter") + _logger.debug(f"[unwrap] raw ({len(text)} chars): {text[:200]}") + + # Step 1: strip markdown code fences + text = text.strip() + if text.startswith("```"): + for lang in ("json", ""): + fence = f"```{lang}\n" + if text.startswith(fence): + text = text[len(fence):] + if text.endswith("\n```"): + text = text[:-4] + elif text.endswith("```"): + text = text[:-3] + break + text = text.strip() + _logger.debug(f"[unwrap] after fence strip ({len(text)} chars)") + + # Step 2: try standard JSON parse (handles format 1 and 2) + if text.startswith("{"): + try: + parsed = json.loads(text) + if isinstance(parsed, dict) and "prompt" in parsed: + result = parsed["prompt"].strip() + _logger.debug(f"[unwrap] extracted prompt ({len(result)} chars)") + return result + if isinstance(parsed, dict) and "result" in parsed: + result = parsed["result"].strip() + _logger.debug(f"[unwrap] extracted result ({len(result)} chars)") + return result + except Exception: + pass + + # Step 3: format 3 and 4 - raw text (possibly multi-line), return as-is + _logger.debug(f"[unwrap] no JSON wrapper, returning raw ({len(text)} chars)") + return text + + +def _lazy_import_jiuwen_tune_types(): + """Lazily import Jiuwen tune types only when badcase flow needs them.""" + _install_jiuwen_bypasser() + from openjiuwen.dev_tools.tune.base import Case, EvaluatedCase + return Case, EvaluatedCase + + +def to_jiuwen_evaluated_case(bad_case) -> Any: + """将 nexent BadCase 转换为 Jiuwen EvaluatedCase""" + Case, EvaluatedCase = _lazy_import_jiuwen_tune_types() + + case = Case( + inputs={"question": bad_case.question}, + label={"answer": bad_case.label or ""}, + ) + return EvaluatedCase( + case=case, + answer={"content": bad_case.answer}, + score=0.0, + reason=bad_case.reason or "", + ) + + +# ---------------------------------------------------------------------- +# Main adapter class +# ---------------------------------------------------------------------- +class JiuwenSDKAdapter: + """ + Jiuwen SDK 调用适配器 + + 封装 Jiuwen SDK 的所有调用,内部不处理降级, + 失败时抛出 JiuwenSDKError,由上层 PromptOptimizationService 决定是否降级 + """ + + def __init__(self, model_id: int, tenant_id: str): + self.model_id = model_id + self.tenant_id = tenant_id + self.logger = logging.getLogger("jiuwen_adapter") + + def _ensure_available(self): + """确保 Jiuwen SDK 可用""" + if not _bypasser_installed: + _install_jiuwen_bypasser() + + try: + import openjiuwen # noqa: F401 + except ImportError as e: + raise JiuwenSDKError(f"Jiuwen SDK 未安装: {e}") from e + + def optimize( + self, + prompt: str, + feedback: str, + mode: Literal["general", "insert", "select"] = "general", + start_pos: Optional[int] = None, + end_pos: Optional[int] = None, + language: str = "zh", + ) -> str: + """ + 调用 Jiuwen FeedbackPromptBuilder + + Raises: + JiuwenSDKError: SDK 调用失败 + """ + self._ensure_available() + + logger.info(f"[jiuwen-adapter] mode={mode}, start_pos={start_pos}, end_pos={end_pos}") + + request_config, client_config = build_jiuwen_model_configs( + self.model_id, self.tenant_id + ) + logger.info( + f"[jiuwen-adapter] model_id={self.model_id}, tenant_id={self.tenant_id}, " + f"api_base={client_config.api_base}, model={request_config.model_name}, " + f"timeout={getattr(client_config, 'timeout', None)}, max_retries={getattr(client_config, 'max_retries', None)}" + ) + FeedbackPromptBuilder, _ = _lazy_import_jiuwen_builders() + + builder = FeedbackPromptBuilder( + model_config=request_config, + model_client_config=client_config, + ) + + try: + result = run_async( + builder.build( + prompt=prompt, + feedback=feedback, + mode=mode, + start_pos=start_pos, + end_pos=end_pos, + language=normalize_language(language), + ) + ) + if result is None: + raise JiuwenSDKError("Jiuwen FeedbackPromptBuilder 返回为空") + return _unwrap_prompt_response(str(result)) + except Exception as e: + self.logger.error(f"Jiuwen FeedbackPromptBuilder 调用失败: {e}") + raise JiuwenSDKError(f"优化调用失败: {e}") from e + + def optimize_badcase( + self, + prompt: str, + bad_cases: List, + language: str = "zh", + ) -> str: + """ + 调用 Jiuwen BadCasePromptBuilder + + Raises: + JiuwenSDKError: SDK 调用失败 + """ + self._ensure_available() + + _, BadCasePromptBuilder = _lazy_import_jiuwen_builders() + + request_config, client_config = build_jiuwen_model_configs( + self.model_id, self.tenant_id + ) + builder = BadCasePromptBuilder( + model_config=request_config, + model_client_config=client_config, + ) + + jiuwen_cases = [to_jiuwen_evaluated_case(bc) for bc in bad_cases] + + try: + result = run_async( + builder.build( + prompt=prompt, + cases=jiuwen_cases, + language=normalize_language(language), + ) + ) + if result is None: + raise JiuwenSDKError("Jiuwen BadCasePromptBuilder 返回为空") + return _unwrap_prompt_response(str(result)) + except Exception as e: + self.logger.error(f"Jiuwen BadCasePromptBuilder 调用失败: {e}") + raise JiuwenSDKError(f"BadCasePromptBuilder 调用失败: {e}") from e + + def generate(self, **kwargs) -> dict: + """调用 Jiuwen 提示词生成能力""" + self._ensure_available() + raise JiuwenSDKError("Jiuwen 提示词生成能力尚未实现") diff --git a/backend/agents/agent_run_manager.py b/backend/agents/agent_run_manager.py index 5f7920f17..83a05aa2a 100644 --- a/backend/agents/agent_run_manager.py +++ b/backend/agents/agent_run_manager.py @@ -1,69 +1,107 @@ -import logging -import threading -from typing import Dict - -from nexent.core.agents.agent_model import AgentRunInfo - -logger = logging.getLogger("agent_run_manager") - - -class AgentRunManager: - _instance = None - _lock = threading.Lock() - - def __new__(cls): - if cls._instance is None: - with cls._lock: - if cls._instance is None: - cls._instance = super(AgentRunManager, cls).__new__(cls) - cls._instance._initialized = False - return cls._instance - - def __init__(self): - if not self._initialized: - # user_id:conversation_id -> agent_run_info - self.agent_runs: Dict[str, AgentRunInfo] = {} - self._initialized = True - - def _get_run_key(self, conversation_id: int, user_id: str) -> str: - """Generate unique key for agent run using user_id and conversation_id""" - return f"{user_id}:{conversation_id}" - - def register_agent_run(self, conversation_id: int, agent_run_info, user_id: str): - """register agent run instance""" - with self._lock: - run_key = self._get_run_key(conversation_id, user_id) - self.agent_runs[run_key] = agent_run_info - logger.info( - f"register agent run instance, user_id: {user_id}, conversation_id: {conversation_id}") - - def unregister_agent_run(self, conversation_id: int, user_id: str): - """unregister agent run instance""" - with self._lock: - run_key = self._get_run_key(conversation_id, user_id) - if run_key in self.agent_runs: - del self.agent_runs[run_key] - logger.info( - f"unregister agent run instance, user_id: {user_id}, conversation_id: {conversation_id}") - else: - logger.info( - f"no agent run instance found for user_id: {user_id}, conversation_id: {conversation_id}") - - def get_agent_run_info(self, conversation_id: int, user_id: str): - """get agent run instance""" - run_key = self._get_run_key(conversation_id, user_id) - return self.agent_runs.get(run_key) - - def stop_agent_run(self, conversation_id: int, user_id: str) -> bool: - """stop agent run for specified conversation_id and user_id""" - agent_run_info = self.get_agent_run_info(conversation_id, user_id) - if agent_run_info is not None: - agent_run_info.stop_event.set() - logger.info( - f"agent run stopped, user_id: {user_id}, conversation_id: {conversation_id}") - return True - return False - - -# create singleton instance -agent_run_manager = AgentRunManager() +import logging +import threading +from typing import Dict, Union + +from nexent.core.agents.agent_model import AgentRunInfo +from nexent.core.agents.agent_context import ContextManager, ContextManagerConfig + +logger = logging.getLogger("agent_run_manager") + + +class AgentRunManager: + _instance = None + _lock = threading.Lock() + + def __new__(cls): + if cls._instance is None: + with cls._lock: + if cls._instance is None: + cls._instance = super(AgentRunManager, cls).__new__(cls) + cls._instance._initialized = False + return cls._instance + + def __init__(self): + if not self._initialized: + # user_id:conversation_id -> agent_run_info + self.agent_runs: Dict[str, AgentRunInfo] = {} + # conversation_id -> ContextManager (conversation-level lifetime) + self._conversation_context_managers: Dict[str, ContextManager] = {} + # conversation_id -> active run count for safe cleanup + self._conversation_run_counts: Dict[str, int] = {} + self._initialized = True + + def _get_run_key(self, conversation_id: Union[int, str], user_id: str) -> str: + """Generate unique key for agent run using user_id and conversation_id""" + return f"{user_id}:{conversation_id}" + + def register_agent_run(self, conversation_id: Union[int, str], agent_run_info, user_id: str): + """register agent run instance""" + with self._lock: + run_key = self._get_run_key(conversation_id, user_id) + self.agent_runs[run_key] = agent_run_info + conv_key = str(conversation_id) + self._conversation_run_counts[conv_key] = self._conversation_run_counts.get(conv_key, 0) + 1 + logger.info( + f"register agent run instance, user_id: {user_id}, conversation_id: {conversation_id}") + + def unregister_agent_run(self, conversation_id: Union[int, str], user_id: str): + """unregister agent run instance""" + with self._lock: + run_key = self._get_run_key(conversation_id, user_id) + if run_key in self.agent_runs: + del self.agent_runs[run_key] + conv_key = str(conversation_id) + self._conversation_run_counts[conv_key] = max( + 0, self._conversation_run_counts.get(conv_key, 0) - 1 + ) + logger.info( + f"unregister agent run instance, user_id: {user_id}, conversation_id: {conversation_id}") + else: + logger.info( + f"no agent run instance found for user_id: {user_id}, conversation_id: {conversation_id}") + + def get_agent_run_info(self, conversation_id: Union[int, str], user_id: str): + """get agent run instance""" + run_key = self._get_run_key(conversation_id, user_id) + return self.agent_runs.get(run_key) + + def stop_agent_run(self, conversation_id: Union[int, str], user_id: str) -> bool: + """stop agent run for specified conversation_id and user_id""" + agent_run_info = self.get_agent_run_info(conversation_id, user_id) + if agent_run_info is not None: + agent_run_info.stop_event.set() + logger.info( + f"agent run stopped, user_id: {user_id}, conversation_id: {conversation_id}") + return True + return False + + def get_or_create_context_manager( + self, + conversation_id: Union[int, str], + config: ContextManagerConfig, + max_steps: int + ) -> ContextManager: + """Get or create a conversation-level ContextManager instance.""" + conv_key = str(conversation_id) + with self._lock: + cm = self._conversation_context_managers.get(conv_key) + if cm is None: + cm = ContextManager(config=config, max_steps=max_steps) + self._conversation_context_managers[conv_key] = cm + logger.info( + f"Created new ContextManager for conversation_id: {conv_key}") + return cm + + def clear_conversation_context_manager(self, conversation_id: Union[int, str]): + """Explicitly clear the ContextManager for a conversation.""" + conv_key = str(conversation_id) + with self._lock: + cm = self._conversation_context_managers.pop(conv_key, None) + self._conversation_run_counts.pop(conv_key, None) + if cm: + logger.info( + f"Cleared ContextManager for conversation_id: {conv_key}") + + +# create singleton instance +agent_run_manager = AgentRunManager() diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py index ea3ba24e8..7e3b42e28 100644 --- a/backend/agents/create_agent_info.py +++ b/backend/agents/create_agent_info.py @@ -1,41 +1,130 @@ +import json import threading import logging -from typing import List, Optional +from typing import Any, Dict, List, Optional from urllib.parse import urljoin -from datetime import datetime from jinja2 import Template, StrictUndefined from nexent.core.utils.observer import MessageObserver -from nexent.core.agents.agent_model import AgentRunInfo, ModelConfig, AgentConfig, ToolConfig, ExternalA2AAgentConfig +from nexent.core.agents.agent_model import AgentRunInfo, ModelConfig, AgentConfig, ToolConfig, ExternalA2AAgentConfig, AgentHistory, AgentVerificationConfig +from nexent.core.agents.agent_context import ContextManagerConfig from nexent.memory.memory_service import search_memory_in_levels -from services.file_management_service import get_llm_model +from services.file_management_service import get_llm_model, validate_urls_access from services.vectordatabase_service import ( ElasticSearchService, get_vector_db_core, - get_embedding_model, + get_embedding_model_by_index_name, get_rerank_model, ) from services.remote_mcp_service import get_remote_mcp_server_list from database.a2a_agent_db import PROTOCOL_JSONRPC from services.memory_config_service import build_memory_context -from services.image_service import get_vlm_model -from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list +from services.image_service import get_video_understanding_model, get_vlm_model +from database.agent_db import ( + search_agent_info_by_agent_id, + query_sub_agent_relations, + resolve_sub_agent_version_no, +) from database.agent_version_db import query_current_version_no from database.tool_db import search_tools_for_sub_agent from database.model_management_db import get_model_records, get_model_by_model_id +from database.knowledge_db import get_knowledge_name_map_by_index_names from database.client import minio_client from utils.model_name_utils import add_repo_to_name from utils.prompt_template_utils import get_agent_prompt_template from utils.config_utils import tenant_config_manager, get_model_name_from_config -from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE -import re +from utils.context_utils import build_context_components +from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE, MINIO_DEFAULT_BUCKET +from consts.model import AgentToolParamsRequest, ToolParamsRequest +from consts.exceptions import ValidationError logger = logging.getLogger("create_agent_info") logger.setLevel(logging.DEBUG) +def _normalize_tool_params_request(tool_params: Optional[ToolParamsRequest | Dict[str, Any]]) -> ToolParamsRequest: + """Normalize request-scoped tool parameter overrides into a ToolParamsRequest.""" + if tool_params is None: + return ToolParamsRequest() + if isinstance(tool_params, ToolParamsRequest): + return tool_params + if not isinstance(tool_params, dict): + raise ValidationError("tool_params must be an object.") + try: + return ToolParamsRequest.model_validate(tool_params) + except Exception as exc: + raise ValidationError(f"Invalid tool_params payload: {exc}") from exc + + +def _get_agent_tool_overrides( + tool_params: Optional[ToolParamsRequest], + agent_name: Optional[str], +) -> Dict[str, Dict[str, Any]]: + """Resolve tool overrides for a specific agent by its name.""" + if tool_params is None: + return {} + if not agent_name: + return {} + agent_override = tool_params.agents.get(agent_name) + if agent_override is None: + return {} + return dict(agent_override.tools) + + +def _merge_tool_params( + tool_record: Dict[str, Any], + override_params: Optional[Dict[str, Any]], + extra_params: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + """Merge request overrides on top of tool instance defaults from DB. + + Args: + tool_record: Tool configuration from database + override_params: Request-scoped overrides from tool_params + extra_params: Additional internal params not in DB schema (e.g., document_paths) + + Returns: + Merged params dict with DB defaults, overrides, and extra params + """ + merged_params: Dict[str, Any] = {} + for param in tool_record.get("params", []): + merged_params[param["name"]] = param.get("default") + + if override_params: + merged_params.update(override_params) + + # Extra params (e.g., internal access control params) always take precedence + if extra_params: + merged_params.update(extra_params) + + return merged_params + + +def _build_internal_s3_url(file: dict) -> str: + """Build a valid S3 URL for internal tools from uploaded file metadata.""" + if not isinstance(file, dict): + return "" + + object_name = str(file.get("object_name") or "").strip().lstrip("/") + if object_name: + bucket = MINIO_DEFAULT_BUCKET or "nexent" + return f"s3://{bucket}/{object_name}" + + url = str(file.get("url") or "").strip() + if not url or url.startswith("blob:") or url.startswith("s3:/blob:"): + return "" + + if url.startswith("s3://"): + return url + + if url.startswith("s3:/"): + return "s3://" + url.replace("s3:/", "", 1).lstrip("/") + + return "s3:/" + url + + def _get_skills_for_template( agent_id: int, tenant_id: str, @@ -245,7 +334,9 @@ async def create_model_config_list(tenant_id): ), url=record["base_url"], ssl_verify=record.get("ssl_verify", True), - model_factory=record.get("model_factory"))) + model_factory=record.get("model_factory"), + timeout_seconds=record.get("timeout_seconds"), + concurrency_limit=record.get("concurrency_limit"))) # fit for old version, main_model and sub_model use default model main_model_config = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) @@ -256,7 +347,9 @@ async def create_model_config_list(tenant_id): "model_name") else "", url=main_model_config.get("base_url", ""), ssl_verify=main_model_config.get("ssl_verify", True), - model_factory=main_model_config.get("model_factory"))) + model_factory=main_model_config.get("model_factory"), + timeout_seconds=main_model_config.get("timeout_seconds"), + concurrency_limit=main_model_config.get("concurrency_limit"))) model_list.append( ModelConfig(cite_name="sub_model", api_key=main_model_config.get("api_key", ""), @@ -264,7 +357,9 @@ async def create_model_config_list(tenant_id): "model_name") else "", url=main_model_config.get("base_url", ""), ssl_verify=main_model_config.get("ssl_verify", True), - model_factory=main_model_config.get("model_factory"))) + model_factory=main_model_config.get("model_factory"), + timeout_seconds=main_model_config.get("timeout_seconds"), + concurrency_limit=main_model_config.get("concurrency_limit"))) return model_list @@ -278,18 +373,23 @@ async def create_agent_config( allow_memory_search: bool = True, version_no: int = 0, override_model_id: int | None = None, + tool_params: Optional[ToolParamsRequest | Dict[str, Any]] = None, ): + normalized_tool_params = _normalize_tool_params_request(tool_params) agent_info = search_agent_info_by_agent_id( agent_id=agent_id, tenant_id=tenant_id, version_no=version_no) # create sub agent - sub_agent_id_list = query_sub_agents_id_list( + sub_agent_relations = query_sub_agent_relations( main_agent_id=agent_id, tenant_id=tenant_id, version_no=version_no) managed_agents = [] - for sub_agent_id in sub_agent_id_list: - # Get the current published version for this sub-agent (from draft version 0) - sub_agent_version_no = query_current_version_no( - agent_id=sub_agent_id, tenant_id=tenant_id) or 0 + for rel in sub_agent_relations: + sub_agent_id = rel['selected_agent_id'] + sub_agent_version_no = resolve_sub_agent_version_no( + selected_agent_id=sub_agent_id, + selected_agent_version_no=rel.get('selected_agent_version_no'), + tenant_id=tenant_id, + ) sub_agent_config = await create_agent_config( agent_id=sub_agent_id, tenant_id=tenant_id, @@ -299,13 +399,20 @@ async def create_agent_config( allow_memory_search=allow_memory_search, version_no=sub_agent_version_no, override_model_id=None, + tool_params=normalized_tool_params, ) managed_agents.append(sub_agent_config) # create external A2A agents (synchronous function, no await needed) external_a2a_agents = _get_external_a2a_agents(agent_id, tenant_id, version_no) - tool_list = await create_tool_config_list(agent_id, tenant_id, user_id, version_no=version_no) + tool_list = await create_tool_config_list( + agent_id, + tenant_id, + user_id, + version_no=version_no, + tool_params=normalized_tool_params, + ) # Build system prompt: prioritize segmented fields, fallback to original prompt field if not available duty_prompt = agent_info.get("duty_prompt", "") @@ -351,6 +458,77 @@ async def create_agent_config( # Bubble up to streaming layer so it can emit and fall back raise Exception(f"Failed to retrieve memory list: {e}") + # Append active memory tools if memory is enabled + if memory_context.user_config.memory_switch and memory_context.memory_config: + try: + memory_metadata = { + "memory_config": memory_context.memory_config, + "memory_user_config": memory_context.user_config, + "tenant_id": memory_context.tenant_id, + "user_id": memory_context.user_id, + "agent_id": memory_context.agent_id, + } + + store_tool_config = ToolConfig( + class_name="StoreMemoryTool", + name="store_memory", + description=( + "Save important information to long-term memory for future recall. " + "Use this when the user shares personal preferences, facts about themselves, " + "project context, or instructions that should persist across conversations. " + "Do NOT store transient information like temporary calculations, information " + "already in the knowledge base, or data the user explicitly says to forget." + ), + inputs=json.dumps({ + "content": { + "type": "string", + "description": "The information to remember", + "description_zh": "需要记住的信息" + } + }, ensure_ascii=False), + output_type="string", + params={}, + source="local", + usage=None, + metadata=memory_metadata, + ) + tool_list.append(store_tool_config) + + search_tool_config = ToolConfig( + class_name="SearchMemoryTool", + name="search_memory", + description=( + "Search long-term memory for relevant information from previous interactions. " + "Use this when you need context about the user's preferences, past decisions, " + "or previously discussed topics that aren't in the current conversation. " + "The system already provides some memory context automatically -- use this tool " + "when you need to search for specific information not already available." + ), + inputs=json.dumps({ + "query": { + "type": "string", + "description": "Natural language query describing what to search for", + "description_zh": "描述要搜索内容的自然语言查询" + }, + "top_k": { + "type": "integer", + "description": "Maximum number of results to return", + "description_zh": "返回结果的最大数量", + "default": 5, + "nullable": True + } + }, ensure_ascii=False), + output_type="string", + params={}, + source="local", + usage=None, + metadata=memory_metadata, + ) + tool_list.append(search_tool_config) + logger.debug("Active memory tools appended to agent tool list") + except Exception as e: + logger.warning(f"Failed to append active memory tools: {e}") + # Build knowledge base summary knowledge_base_summary = "" try: @@ -358,11 +536,15 @@ async def create_agent_config( if "KnowledgeBaseSearchTool" == tool.class_name: index_names = tool.params.get("index_names") if index_names: + # Reuse the index_name -> display_name mapping from tool.metadata + # (already computed in create_tool_config_list to avoid redundant DB query) + index_name_to_display_map = tool.metadata.get("index_name_to_display_map", {}) if tool.metadata else {} for index_name in index_names: try: + display_name = index_name_to_display_map.get(index_name, index_name) message = ElasticSearchService().get_summary(index_name=index_name) summary = message.get("summary", "") - knowledge_base_summary += f"**{index_name}**: {summary}\n\n" + knowledge_base_summary += f"**{display_name}**: {summary}\n\n" except Exception as e: logger.warning( f"Failed to get summary for knowledge base {index_name}: {e}") @@ -377,6 +559,8 @@ async def create_agent_config( # Get skills list for prompt template skills = _get_skills_for_template(agent_id, tenant_id, version_no) + is_manager = len(managed_agents) > 0 or len(external_a2a_agents) > 0 + render_kwargs = { "duty": duty_prompt, "constraint": constraint_prompt, @@ -389,17 +573,49 @@ async def create_agent_config( "APP_DESCRIPTION": app_description, "memory_list": memory_list, "knowledge_base_summary": knowledge_base_summary, - "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "user_id": user_id, } system_prompt = Template(prompt_template["system_prompt"], undefined=StrictUndefined).render(render_kwargs) model_id_to_use = override_model_id if override_model_id else agent_info.get("model_id") + model_max_tokens = 10000 if model_id_to_use is not None: model_info = get_model_by_model_id(model_id_to_use, tenant_id=tenant_id) model_name = model_info["display_name"] if model_info is not None else "main_model" + if model_info is not None and model_info.get("max_tokens"): + model_max_tokens = model_info["max_tokens"] else: model_name = "main_model" + + # Use agent-level setting for context management, default to False. + # When ContextManager is disabled, do not attach context_components because + # downstream runtime may prefer component-based prompt assembly over the + # rendered system_prompt, causing the actual model input to diverge from the + # template output. + enable_context_manager = agent_info.get("enable_context_manager", False) + context_components = [] + if enable_context_manager: + context_components = build_context_components( + duty=duty_prompt, + constraint=constraint_prompt, + few_shots=few_shots_prompt, + app_name=app_name, + app_description=app_description, + user_id=user_id, + language=language, + is_manager=is_manager, + tools=render_kwargs["tools"], + skills=skills, + managed_agents=render_kwargs["managed_agents"], + external_a2a_agents=render_kwargs["external_a2a_agents"], + memory_list=memory_list, + memory_search_query=last_user_query, + knowledge_base_summary=knowledge_base_summary, + ) + cm_config = ContextManagerConfig( + enabled=enable_context_manager, + token_threshold=model_max_tokens, + ) agent_config = AgentConfig( name="undefined" if agent_info["name"] is None else agent_info["name"], description="undefined" if agent_info["description"] is None else agent_info["description"], @@ -410,26 +626,55 @@ async def create_agent_config( agent_id=agent_id ), tools=tool_list + _get_skill_script_tools(agent_id, tenant_id, version_no), - max_steps=agent_info.get("max_steps", 10), + max_steps=agent_info.get("max_steps", 15), model_name=model_name, provide_run_summary=agent_info.get("provide_run_summary", False), managed_agents=managed_agents, - external_a2a_agents=external_a2a_agents + external_a2a_agents=external_a2a_agents, + context_manager_config=cm_config, + context_components=context_components, + verification_config=AgentVerificationConfig.model_validate(agent_info.get("verification_config") or {}), ) return agent_config -async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int = 0): - # create tool +async def create_tool_config_list( + agent_id, + tenant_id, + user_id, + version_no: int = 0, + tool_params: Optional[ToolParamsRequest | Dict[str, Any]] = None, +): tool_config_list = [] langchain_tools = await discover_langchain_tools() + normalized_tool_params = _normalize_tool_params_request(tool_params) # now only admin can modify the agent, user_id is not used tools_list = search_tools_for_sub_agent(agent_id, tenant_id, version_no=version_no) + + # Look up agent name for use in error messages. + # Agent name is optional for tool_params matching (matching uses tool identifiers only), + # but we include it in error messages so callers can identify which agent/tool caused a failure. + agent_info = search_agent_info_by_agent_id(agent_id=agent_id, tenant_id=tenant_id, version_no=version_no) + agent_name = agent_info.get("name") if agent_info else None + agent_tool_overrides = _get_agent_tool_overrides(normalized_tool_params, agent_name) + + tool_keys_seen = set() for tool in tools_list: - param_dict = {} - for param in tool.get("params", []): - param_dict[param["name"]] = param.get("default") + tool_identifier = tool.get("name") or tool.get("class_name") + if tool_identifier in tool_keys_seen: + raise ValidationError( + f"Duplicate tool identifier '{tool_identifier}' found in agent '{agent_name or agent_id}'." + ) + tool_keys_seen.add(tool_identifier) + + override_params = None + if tool.get("name") in agent_tool_overrides: + override_params = agent_tool_overrides[tool.get("name")] + elif tool.get("class_name") in agent_tool_overrides: + override_params = agent_tool_overrides[tool.get("class_name")] + + param_dict = _merge_tool_params(tool, override_params) tool_config = ToolConfig( class_name=tool.get("class_name"), name=tool.get("name"), @@ -448,24 +693,62 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int tool_config.metadata = langchain_tool break + # Extract document_paths for KnowledgeBaseSearchTool (internal access control, not in DB schema) + document_paths = None + if override_params and "document_paths" in override_params: + document_paths = override_params.get("document_paths") + # Also check using the tool name as key + if not document_paths: + kb_overrides = agent_tool_overrides.get("knowledge_base_search") + if kb_overrides and "document_paths" in kb_overrides: + document_paths = kb_overrides.get("document_paths") + # special logic for search tools that may use reranking models if tool_config.class_name == "KnowledgeBaseSearchTool": - rerank = param_dict.get("rerank", False) - rerank_model_name = param_dict.get("rerank_model_name", "") + rerank = tool_config.params.get("rerank", False) + rerank_model_name = tool_config.params.get("rerank_model_name", "") rerank_model = None if rerank and rerank_model_name: rerank_model = get_rerank_model( tenant_id=tenant_id, model_name=rerank_model_name ) + # Build display_name to index_name mapping for LLM parameter conversion + # Also build reverse mapping (index_name -> display_name) for knowledge_base_summary + index_names = tool_config.params.get("index_names", []) + display_name_to_index_map = {} + index_name_to_display_map = {} + if index_names: + knowledge_name_map = get_knowledge_name_map_by_index_names(index_names) + # Reverse the mapping: display_name (knowledge_name) -> index_name + for idx_name, kb_name in knowledge_name_map.items(): + display_name_to_index_map[kb_name] = idx_name + index_name_to_display_map[idx_name] = kb_name + tool_config.metadata = { "vdb_core": get_vector_db_core(), - "embedding_model": get_embedding_model(tenant_id=tenant_id), + "embedding_model": None, "rerank_model": rerank_model, + "display_name_to_index_map": display_name_to_index_map, + "index_name_to_display_map": index_name_to_display_map, + # Internal access control: restrict results to specific document paths (path_or_urls) + "document_paths": document_paths, } + + if not index_names: + raise ValidationError( + f"[{agent_name or agent_id}] knowledge_base_search tool requires index_names, " + f"but it is not configured in the agent and not provided via tool_params.") + + embedding_model, _, _ = get_embedding_model_by_index_name(tenant_id, index_names[0]) + if not embedding_model: + raise ValidationError( + f"No embedding model found for index '{index_names[0]}'. " + f"Please configure an embedding model for this knowledge base.") + tool_config.metadata["embedding_model"] = embedding_model elif tool_config.class_name in ["DifySearchTool", "DataMateSearchTool"]: - rerank = param_dict.get("rerank", False) - rerank_model_name = param_dict.get("rerank_model_name", "") + rerank = tool_config.params.get("rerank", False) + rerank_model_name = tool_config.params.get("rerank_model_name", "") rerank_model = None if rerank and rerank_model_name: rerank_model = get_rerank_model( @@ -479,12 +762,21 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int tool_config.metadata = { "llm_model": get_llm_model(tenant_id=tenant_id), "storage_client": minio_client, - "data_process_service_url": DATA_PROCESS_SERVICE + "data_process_service_url": DATA_PROCESS_SERVICE, + "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } elif tool_config.class_name == "AnalyzeImageTool": tool_config.metadata = { + # get_vlm_model reads the first multimodal slot, now shown as image understanding. "vlm_model": get_vlm_model(tenant_id=tenant_id), "storage_client": minio_client, + "validate_url_access": lambda urls: validate_urls_access(urls, user_id) + } + elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]: + tool_config.metadata = { + "vlm_model": get_video_understanding_model(tenant_id=tenant_id), + "storage_client": minio_client, + "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } tool_config_list.append(tool_config) @@ -552,20 +844,167 @@ async def prepare_prompt_templates( return prompt_templates -async def join_minio_file_description_to_query(minio_files, query): +async def join_minio_file_description_to_query( + minio_files, + query, + history=None, + max_files: int = 50, + max_chars: int = 10000, +): + """ + Join MinIO file descriptions to the user query. + + This function formats uploaded file information into a structured description + that includes both S3 URL (for internal tools) and presigned_url (for external MCP tools). + It processes files from both the current message and historical messages. + + De-duplication is performed using the file URL as the unique key. A maximum + file count and total character limit are enforced to prevent prompt bloat. + + Args: + minio_files: List of file info dicts from current message upload + query: Original user query + history: Optional list of historical message dicts, each may contain minio_files + max_files: Maximum number of files to include (default 50) + max_chars: Maximum total characters for file descriptions (default 10000) + + Returns: + Modified query with file descriptions appended + """ final_query = query + seen_urls: set[str] = set() + all_files: list[dict] = [] + + # Collect files from current message first (higher priority) if minio_files and isinstance(minio_files, list): - file_descriptions = [] for file in minio_files: - if isinstance(file, dict) and "url" in file and file["url"] and "name" in file and file["name"]: - file_descriptions.append(f"File name: {file['name']}, S3 URL: s3:/{file['url']}") + if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")): + s3_url = _build_internal_s3_url(file) + if not s3_url: + continue + if s3_url not in seen_urls: + seen_urls.add(s3_url) + all_files.append(file) + + # Collect files from historical messages (lower priority, already-deduped) + if history and isinstance(history, list): + for msg in history: + if isinstance(msg, dict) and msg.get("minio_files"): + for file in msg["minio_files"]: + if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")): + s3_url = _build_internal_s3_url(file) + if not s3_url: + continue + if s3_url not in seen_urls: + seen_urls.add(s3_url) + all_files.append(file) + + # Enforce file count limit (keep most recent files by truncating from the end) + if len(all_files) > max_files: + all_files = all_files[:max_files] + logger.debug(f"File list truncated from {len(all_files)} to {max_files} files") + + if all_files: + file_descriptions: list[str] = [] + # Calculate fixed overhead that is added only once + prefix = "User uploaded files. The file information is as follows:\n" + suffix = f"\n\nUser wants to answer questions based on the information in the above files: {query}" + fixed_overhead = len(prefix) + len(suffix) + + for i, file in enumerate(all_files): + s3_url = _build_internal_s3_url(file) + presigned_url = file.get("presigned_url", "") + + # Build description with both URLs + if presigned_url: + desc = ( + f"File name: {file['name']}\n" + f"- S3 URL: {s3_url} [for tools WITHOUT [MCP] prefix, like analyze_text_file]\n" + f"- presigned_url: {presigned_url} [for tools WITH [MCP] prefix]" + ) + else: + desc = f"File name: {file['name']}, S3 URL: {s3_url} [permanent]" + + # Calculate total length if we include this description + # Each description after the first adds 2 chars for \n\n separator + separator_chars = 2 if i > 0 else 0 + total_len = sum(len(d) for d in file_descriptions) + len(desc) + separator_chars + fixed_overhead + + # Check if adding this description would exceed the character limit + if total_len > max_chars: + logger.debug( + f"File descriptions truncated at {len(file_descriptions)} files " + f"to stay within {max_chars} character limit" + ) + break + + file_descriptions.append(desc) + if file_descriptions: - final_query = "User uploaded files. The file information is as follows:\n" - final_query += "\n".join(file_descriptions) + "\n\n" - final_query += f"User wants to answer questions based on the information in the above files: {query}" + final_query = prefix + "\n\n".join(file_descriptions) + suffix + return final_query +def _format_minio_files_for_content(minio_files: Optional[List[dict]], max_files: int = 20) -> str: + """Format minio_files into a string for embedding in history content. + + Args: + minio_files: List of file info dicts + max_files: Maximum number of files to include per message + + Returns: + Formatted string describing the files, or empty string if no files + """ + if not minio_files or not isinstance(minio_files, list): + return "" + + file_lines = [] + for i, file in enumerate(minio_files): + if i >= max_files: + file_lines.append(f" - ... (and {len(minio_files) - max_files} more files)") + break + if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")): + s3_url = _build_internal_s3_url(file) + if not s3_url: + continue + presigned_url = file.get("presigned_url", "") + if presigned_url: + file_lines.append( + f" - {file['name']}: {s3_url} (for non-MCP tools), presigned_url: {presigned_url} (for [MCP] tools)" + ) + else: + file_lines.append(f" - {file['name']}: {s3_url}") + + if not file_lines: + return "" + + return "\n[Attached files]:\n" + "\n".join(file_lines) + + +def _convert_history_with_minio_files(history: List) -> Optional[List[AgentHistory]]: + """Convert HistoryItem list to AgentHistory list, embedding minio_files into content. + + Args: + history: List of HistoryItem from API + + Returns: + List of AgentHistory with file info embedded in content, or None if history is None + """ + if history is None: + return None + + result = [] + for item in history: + content = item.content + if item.minio_files: + file_info = _format_minio_files_for_content(item.minio_files) + if file_info: + content = content + file_info if content else file_info + result.append(AgentHistory(role=item.role, content=content)) + return result + + def filter_mcp_servers_and_tools(input_agent_config: AgentConfig, mcp_info_dict) -> list: """ Filter mcp servers and tools, only keep the actual used mcp servers @@ -603,6 +1042,7 @@ async def create_agent_run_info( is_debug: bool = False, override_version_no: int | None = None, override_model_id: int | None = None, + tool_params: Optional[ToolParamsRequest | Dict[str, Any]] = None, ): # Determine which version_no to use based on is_debug flag # If is_debug=false, use the current published version (current_version_no) @@ -617,7 +1057,11 @@ async def create_agent_run_info( version_no = 0 logger.info(f"Agent {agent_id} has no published version, using draft version 0") - final_query = await join_minio_file_description_to_query(minio_files=minio_files, query=query) + final_query = await join_minio_file_description_to_query( + minio_files=minio_files, + query=query, + history=history + ) model_list = await create_model_config_list(tenant_id) create_config_kwargs = { "agent_id": agent_id, @@ -631,7 +1075,7 @@ async def create_agent_run_info( if override_model_id is not None: create_config_kwargs["override_model_id"] = override_model_id - agent_config = await create_agent_config(**create_config_kwargs) + agent_config = await create_agent_config(**create_config_kwargs, tool_params=tool_params) remote_mcp_list = await get_remote_mcp_server_list(tenant_id=tenant_id, is_need_auth=True) default_mcp_url = urljoin(LOCAL_MCP_SERVER, "sse") @@ -646,7 +1090,7 @@ async def create_agent_run_info( # Filter MCP servers and tools, and build mcp_host with authorization used_mcp_urls = filter_mcp_servers_and_tools(agent_config, remote_mcp_dict) - # Build mcp_host list with authorization tokens + # Build mcp_host list with authorization tokens and custom headers mcp_host = [] for url in used_mcp_urls: # Find the MCP record for this URL @@ -661,22 +1105,30 @@ async def create_agent_run_info( "url": url, "transport": "sse" if url.endswith("/sse") else "streamable-http" } - # Add authorization if present + headers = {} auth_token = mcp_record.get("authorization_token") if auth_token: - mcp_config["authorization"] = auth_token + headers["Authorization"] = auth_token + custom_headers = mcp_record.get("custom_headers") + if custom_headers and isinstance(custom_headers, dict): + headers.update(custom_headers) + if headers: + mcp_config["headers"] = headers mcp_host.append(mcp_config) else: # Fallback to string format if record not found mcp_host.append(url) + # Convert HistoryItem (from API) to AgentHistory (expected by SDK) + converted_history = _convert_history_with_minio_files(history) + agent_run_info = AgentRunInfo( query=final_query, model_config_list=model_list, observer=MessageObserver(lang=language), agent_config=agent_config, mcp_host=mcp_host, - history=history, + history=converted_history, stop_event=threading.Event() ) return agent_run_info diff --git a/backend/agents/skill_creation_agent.py b/backend/agents/skill_creation_agent.py index 3dc0cfa80..37c3ec2ad 100644 --- a/backend/agents/skill_creation_agent.py +++ b/backend/agents/skill_creation_agent.py @@ -86,7 +86,7 @@ def run_skill_creation_agent( agent_run_thread(agent_run_info) -def create_simple_skill_from_request( +def create_skill_from_request( system_prompt: str, user_prompt: str, model_config_list: List[ModelConfig], diff --git a/backend/apps/a2a_client_app.py b/backend/apps/a2a_client_app.py index db7acd108..ea149ac31 100644 --- a/backend/apps/a2a_client_app.py +++ b/backend/apps/a2a_client_app.py @@ -5,6 +5,7 @@ Used internally for configuring A2A sub-agents. """ import logging +import uuid from typing import Annotated, List, Optional from http import HTTPStatus @@ -45,6 +46,14 @@ class UpdateAgentProtocolRequest(BaseModel): ) +class TestNacosConnectionRequest(BaseModel): + """Request to test Nacos connectivity without saving the config.""" + nacos_addr: str = Field(description="Nacos server address (e.g., http://nacos-server:8848)") + nacos_username: Optional[str] = None + nacos_password: Optional[str] = None + namespace_id: Optional[str] = "public" + + # ============================================================================= # External Agent Discovery # ============================================================================= @@ -102,7 +111,7 @@ async def discover_from_nacos( results = await a2a_client_service.discover_from_nacos( nacos_config_id=request.nacos_config_id, - agent_names=request.agent_names, + agent_names=[name.strip() for name in request.agent_names], tenant_id=tenant_id, user_id=user_id, namespace=request.namespace @@ -482,6 +491,17 @@ class CreateNacosConfigRequest(BaseModel): description: Optional[str] = None +class UpdateNacosConfigRequest(BaseModel): + """Request to update a Nacos config.""" + name: Optional[str] = None + nacos_addr: Optional[str] = None + nacos_username: Optional[str] = None + nacos_password: Optional[str] = None + namespace_id: Optional[str] = None + description: Optional[str] = None + is_active: Optional[bool] = None + + @router.post("/nacos-configs") async def create_nacos_config( request: CreateNacosConfigRequest, @@ -577,6 +597,51 @@ async def get_nacos_config( ) +@router.put("/nacos-configs/{config_id}") +async def update_nacos_config( + config_id: str, + request: UpdateNacosConfigRequest, + authorization: Annotated[Optional[str], Header()] = None, + http_request: Request = None +): + """Update a Nacos configuration.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + result = a2a_agent_db.update_nacos_config( + config_id=config_id, + tenant_id=tenant_id, + user_id=user_id, + name=request.name, + nacos_addr=request.nacos_addr, + nacos_username=request.nacos_username, + nacos_password=request.nacos_password, + namespace_id=request.namespace_id, + description=request.description, + is_active=request.is_active + ) + + if not result: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"Nacos config {config_id} not found" + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": result} + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Update Nacos config failed: {e}", exc_info=True) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update Nacos config" + ) + + @router.delete("/nacos-configs/{config_id}") async def delete_nacos_config( config_id: str, @@ -610,6 +675,62 @@ async def delete_nacos_config( ) +@router.post("/nacos-configs/test-connection") +async def test_nacos_connection( + request: TestNacosConnectionRequest, + authorization: Annotated[Optional[str], Header()] = None, + http_request: Request = None +): + """Test connectivity to Nacos server without saving the configuration.""" + from utils.nacos_client import NacosClient, NacosConnectionError + + try: + get_current_user_info(authorization, http_request) + + async with NacosClient( + nacos_addr=request.nacos_addr, + username=request.nacos_username, + password=request.nacos_password + ) as client: + result = await client.test_connectivity(namespace=request.namespace_id or "public") + + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "status": "success", + "data": { + "success": result["success"], + "message": result["message"] + } + } + ) + + except NacosConnectionError as e: + logger.warning(f"Nacos connection test failed: {e}") + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "status": "success", + "data": { + "success": False, + "message": str(e) + } + } + ) + except Exception as e: + logger.error(f"Test Nacos connection failed: {e}", exc_info=True) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "status": "success", + "data": { + "success": False, + "message": f"Failed to test Nacos connection: {e}" + } + } + ) + + # ============================================================================= # External Agent Chat # ============================================================================= @@ -648,11 +769,11 @@ async def chat_with_external_agent( # Build A2A message format following A2A protocol with parts array a2a_message = { + "message_id": f"msg_{uuid.uuid4().hex}", "role": "ROLE_USER", "parts": [ { "text": request_body.message.strip(), - "mediaType": "text/plain" } ], } diff --git a/backend/apps/agent_app.py b/backend/apps/agent_app.py index b4f932dc5..87abbf9e8 100644 --- a/backend/apps/agent_app.py +++ b/backend/apps/agent_app.py @@ -1,12 +1,17 @@ +import json import logging from http import HTTPStatus from typing import Optional from fastapi import APIRouter, Body, Header, HTTPException, Request, Query from fastapi.encoders import jsonable_encoder -from starlette.responses import JSONResponse +from starlette.responses import JSONResponse, Response +from consts.const import ASSET_OWNER_TENANT_ID from consts.model import AgentRequest, AgentInfoRequest, AgentIDRequest, ConversationResponse, AgentImportRequest, AgentNameBatchCheckRequest, AgentNameBatchRegenerateRequest, VersionPublishRequest, VersionListResponse, VersionDetailResponse, VersionRollbackRequest, VersionStatusRequest, CurrentVersionResponse, VersionCompareRequest, VersionUpdateRequest +from consts.exceptions import SkillDuplicateError +from services.asset_owner_visibility import apply_agent_detail_prompt_visibility + from services.agent_service import ( get_agent_info_impl, get_creating_sub_agent_info_impl, @@ -22,6 +27,8 @@ get_agent_call_relationship_impl, clear_agent_new_mark_impl, get_agent_by_name_impl, + export_agent_with_skills_impl, + import_agent_with_skills_impl, ) from services.agent_version_service import ( publish_version_impl, @@ -38,9 +45,6 @@ ) from utils.auth_utils import get_current_user_info, get_current_user_id -# Import monitoring utilities -from utils.monitoring import monitoring_manager - agent_runtime_router = APIRouter(prefix="/agent") agent_config_router = APIRouter(prefix="/agent") logger = logging.getLogger("agent_app") @@ -48,7 +52,6 @@ # Define API route @agent_runtime_router.post("/run") -@monitoring_manager.monitor_endpoint("agent.run", exclude_params=["authorization"]) async def agent_run_api(agent_request: AgentRequest, http_request: Request, authorization: str = Header(None)): """ Agent execution API endpoint @@ -61,8 +64,11 @@ async def agent_run_api(agent_request: AgentRequest, http_request: Request, auth ) except Exception as e: logger.error(f"Agent run error: {str(e)}") + # Only expose actual error in debug mode for better diagnosis + # Keep generic message in normal mode for user experience + error_detail = str(e) if agent_request.is_debug else "Agent run error." raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Agent run error.") + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=error_detail) @agent_runtime_router.get("/stop/{conversation_id}") @@ -85,12 +91,14 @@ async def search_agent_info_api( """ Search agent info by agent_id and version_no version_no defaults to 0 (current/draft version) + Returns permission field indicating whether the user can edit this agent. """ try: - _, auth_tenant_id = get_current_user_id(authorization) + user_id, auth_tenant_id = get_current_user_id(authorization) # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id - return await get_agent_info_impl(agent_id, effective_tenant_id, version_no) + agent_info = await get_agent_info_impl(agent_id, effective_tenant_id, version_no, user_id) + return apply_agent_detail_prompt_visibility(auth_tenant_id, agent_info) except Exception as e: logger.error(f"Agent search info error: {str(e)}") raise HTTPException( @@ -157,7 +165,8 @@ async def delete_agent_api( Delete an agent """ try: - user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) + user_id, auth_tenant_id, _ = get_current_user_info( + authorization, http_request) # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id await delete_agent_impl(request.agent_id, effective_tenant_id, user_id) @@ -171,11 +180,22 @@ async def delete_agent_api( @agent_config_router.post("/export") async def export_agent_api(request: AgentIDRequest, authorization: Optional[str] = Header(None)): """ - export an agent + export an agent. + + Returns a ZIP file if the agent has skill instances, otherwise returns plain JSON. + The response Content-Type and body differ based on the agent's skill configuration. """ try: - agent_info_str = await export_agent_impl(request.agent_id, authorization) - return ConversationResponse(code=0, message="success", data=agent_info_str) + result = await export_agent_with_skills_impl(request.agent_id, authorization) + if isinstance(result, dict) and result.get("_zip"): + return Response( + content=result["data"], + media_type="application/zip", + headers={ + "Content-Disposition": f"attachment; filename=\"{result.get('filename', 'agent_export.zip')}\"" + } + ) + return ConversationResponse(code=0, message="success", data=result) except Exception as e: logger.error(f"Agent export error: {str(e)}") raise HTTPException( @@ -185,15 +205,32 @@ async def export_agent_api(request: AgentIDRequest, authorization: Optional[str] @agent_config_router.post("/import") async def import_agent_api(request: AgentImportRequest, authorization: Optional[str] = Header(None)): """ - import an agent + import an agent. + + Accepts both plain JSON (agent without skills) and JSON with embedded skill ZIPs + (agent with skills). The skills field, if present, should contain base64-encoded + ZIP packages for each skill. """ try: - await import_agent_impl( - request.agent_info, - authorization, - force_import=request.force_import - ) + if request.skills: + await import_agent_with_skills_impl( + request.agent_info, + request.skills, + authorization, + force_import=request.force_import + ) + else: + await import_agent_impl( + request.agent_info, + authorization, + force_import=request.force_import + ) return {} + except SkillDuplicateError as exc: + raise HTTPException(status_code=409, detail={ + "type": "skill_duplicate", + "duplicate_skills": exc.duplicate_names + }) except Exception as e: logger.error(f"Agent import error: {str(e)}") raise HTTPException( @@ -256,10 +293,18 @@ async def list_all_agent_info_api( list all agent info """ try: - user_id, auth_tenant_id, _ = get_current_user_info(authorization, request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - return await list_all_agent_info_impl(tenant_id=effective_tenant_id, user_id=user_id) + user_id, tenant_id, _ = get_current_user_info( + authorization, request) + + agent_list = await list_all_agent_info_impl( + tenant_id=tenant_id, user_id=user_id + ) + if tenant_id != ASSET_OWNER_TENANT_ID: + asset_agent_list = await list_all_agent_info_impl( + tenant_id=ASSET_OWNER_TENANT_ID, user_id=user_id + ) + return agent_list + asset_agent_list + return agent_list except Exception as e: logger.error(f"Agent list error: {str(e)}") raise HTTPException( @@ -308,7 +353,8 @@ async def publish_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Publish version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Publish version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Publish version error.") @agent_config_router.post("/{agent_id}/versions/compare") @@ -333,7 +379,8 @@ async def compare_versions_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Compare versions error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Compare versions error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Compare versions error.") @agent_config_router.get("/{agent_id}/versions", response_model=VersionListResponse) @@ -344,14 +391,14 @@ async def get_version_list_api( authorization: Optional[str] = Header(None), request: Request = None ): - """ + """versions = session.query(AgentVersion) Get version list for an agent """ try: - user_id, auth_tenant_id, _ = get_current_user_info(authorization, request) + _, auth_tenant_id, _ = get_current_user_info( + authorization, request) # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id - logger.info(f"Get version list for agent_id: {agent_id}, tenant_id: {effective_tenant_id}") result = get_version_list_impl( agent_id=agent_id, tenant_id=effective_tenant_id, @@ -360,7 +407,8 @@ async def get_version_list_api( return JSONResponse(status_code=HTTPStatus.OK, content=jsonable_encoder(result)) except Exception as e: logger.error(f"Get version list error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version list error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version list error.") @agent_config_router.get("/{agent_id}/versions/{version_no}", response_model=VersionDetailResponse) @@ -384,7 +432,9 @@ async def get_version_api( raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: logger.error(f"Get version detail error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") + @agent_config_router.get("/{agent_id}/versions/{version_no}/detail", response_model=VersionDetailResponse) async def get_version_detail_api( @@ -407,7 +457,8 @@ async def get_version_detail_api( raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: logger.error(f"Get version detail error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") @agent_config_router.post("/{agent_id}/versions/{version_no}/rollback") @@ -417,9 +468,10 @@ async def rollback_version_api( authorization: str = Header(None), ): """ - Rollback to a specific version by updating current_version_no only. - This does NOT create a new version - the draft will point to the target version. - Use the publish endpoint to create an actual new version after rollback. + Rollback to a specific version by restoring draft data from that version. + This copies the target version's snapshot (agent, tools, relations, skills) + into the draft (version_no=0) and updates current_version_no. + The user can then edit or re-publish from the restored state. """ try: _, tenant_id = get_current_user_id(authorization) @@ -433,7 +485,8 @@ async def rollback_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Rollback version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Rollback version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Rollback version error.") @agent_config_router.patch("/{agent_id}/versions/{version_no}/status") @@ -460,7 +513,8 @@ async def update_version_status_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Update version status error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version status error.") + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Update version status error.") @agent_config_router.put("/{agent_id}/versions/{version_no}") @@ -488,7 +542,8 @@ async def update_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Update version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version error.") @agent_config_router.delete("/{agent_id}/versions/{version_no}") @@ -513,7 +568,8 @@ async def delete_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Delete version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Delete version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Delete version error.") @agent_config_router.get("/{agent_id}/current_version", response_model=CurrentVersionResponse) @@ -535,7 +591,8 @@ async def get_current_version_api( raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: logger.error(f"Get current version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get current version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get current version error.") @agent_config_router.get("/published_list") @@ -548,10 +605,19 @@ async def list_published_agents_api( """ try: user_id, tenant_id, _ = get_current_user_info(authorization, request) - return await list_published_agents_impl(tenant_id=tenant_id, user_id=user_id) + agent_list = await list_published_agents_impl( + tenant_id=tenant_id, user_id=user_id + ) + if tenant_id != ASSET_OWNER_TENANT_ID: + asset_agent_list = await list_published_agents_impl( + tenant_id=ASSET_OWNER_TENANT_ID, user_id=user_id + ) + return agent_list + asset_agent_list + return agent_list except Exception as e: logger.error(f"Published agents list error: {str(e)}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Published agents list error." ) + diff --git a/backend/apps/agent_repository_app.py b/backend/apps/agent_repository_app.py new file mode 100644 index 000000000..e9da2fde0 --- /dev/null +++ b/backend/apps/agent_repository_app.py @@ -0,0 +1,134 @@ +import logging +from http import HTTPStatus +from typing import Optional + +from fastapi import APIRouter, Body, Header, HTTPException, Query +from starlette.responses import JSONResponse + +from consts.exceptions import SkillDuplicateError, UnauthorizedError +from services.agent_repository_service import ( + create_agent_repository_listing_impl, + import_agent_from_repository_impl, + list_agent_repository_listings_impl, + update_agent_repository_status_impl, +) +from utils.auth_utils import get_current_user_id + +agent_repository_router = APIRouter(prefix="/repository/agent") +logger = logging.getLogger("agent_repository_app") + + +@agent_repository_router.get("") +async def list_agent_repository_listings_api( + status: Optional[str] = Query(None, description="Filter by listing status"), + authorization: str = Header(None), +): + """List all marketplace repository listings with optional status filter.""" + try: + get_current_user_id(authorization) + result = list_agent_repository_listings_impl(status=status) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except UnauthorizedError as e: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except ValueError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"List agent repository listings error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="List agent repository listings error.", + ) + + +@agent_repository_router.patch("/{agent_repository_id}/status") +async def update_agent_repository_status_api( + agent_repository_id: int, + status: str = Body( + ..., + embed=True, + description=( + "New status: NOT_SHARED (未共享) / PENDING_REVIEW (待审核) / " + "REJECTED (审核驳回) / SHARED (已共享)" + ), + ), + authorization: str = Header(None), +): + """Update marketplace repository listing status (share, unshare, approve, reject).""" + try: + user_id, _ = get_current_user_id(authorization) + result = update_agent_repository_status_impl( + agent_repository_id=agent_repository_id, + status=status, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except UnauthorizedError as e: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except ValueError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"Update agent repository status error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Update agent repository status error.", + ) + + +@agent_repository_router.post("/{agent_id}/versions/{version_no}") +async def create_agent_repository_listing_api( + agent_id: int, + version_no: int, + authorization: str = Header(None), +): + """Create or update a marketplace repository listing from an agent version snapshot.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = await create_agent_repository_listing_impl( + agent_id=agent_id, + tenant_id=tenant_id, + user_id=user_id, + version_no=version_no, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except UnauthorizedError as e: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except ValueError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"Create agent repository listing error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Create agent repository listing error.", + ) + + +@agent_repository_router.post("/{agent_repository_id}/import") +async def import_agent_from_repository_api( + agent_repository_id: int, + authorization: Optional[str] = Header(None), +): + """Import an agent tree from a marketplace repository listing into the current tenant.""" + try: + await import_agent_from_repository_impl( + agent_repository_id=agent_repository_id, + authorization=authorization, + ) + return JSONResponse(status_code=HTTPStatus.OK, content={}) + except UnauthorizedError as e: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except SkillDuplicateError as exc: + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail={ + "type": "skill_duplicate", + "duplicate_skills": exc.duplicate_names, + }, + ) + except ValueError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except Exception as e: + logger.error(f"Import agent from repository error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Import agent from repository error.", + ) diff --git a/backend/apps/aidp_app.py b/backend/apps/aidp_app.py new file mode 100644 index 000000000..eae9cb678 --- /dev/null +++ b/backend/apps/aidp_app.py @@ -0,0 +1,43 @@ +""" +AIDP App Layer +FastAPI endpoints for AIDP knowledge base list proxy. +""" +import logging +from http import HTTPStatus +from typing import Annotated + +from fastapi import APIRouter, Query +from fastapi.responses import JSONResponse + +from consts.error_code import ErrorCode +from consts.exceptions import AppException +from services.aidp_service import fetch_aidp_knowledge_bases_impl + +router = APIRouter(prefix="/aidp") +logger = logging.getLogger("aidp_app") + + +@router.get("/knowledge-bases") +async def fetch_aidp_knowledge_bases_api( + server_url: Annotated[str, Query(description="AIDP API server URL")], + api_key: Annotated[str, Query(description="AIDP API key")], + page: Annotated[int, Query(ge=1, description="Page number starting from 1")] = 1, + page_size: Annotated[int, Query(ge=1, le=100, description="Page size from 1 to 100")] = 20, +) -> JSONResponse: + """Fetch paginated knowledge bases from the external AIDP API.""" + try: + result = fetch_aidp_knowledge_bases_impl( + server_url=server_url, + api_key=api_key, + page=page, + page_size=page_size, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except AppException: + raise + except Exception as e: + logger.exception("Failed to fetch AIDP knowledge bases: %s", e) + raise AppException( + ErrorCode.AIDP_SERVICE_ERROR, + f"Failed to fetch AIDP knowledge bases: {str(e)}", + ) diff --git a/backend/apps/app_factory.py b/backend/apps/app_factory.py index 219da5b82..02816cec1 100644 --- a/backend/apps/app_factory.py +++ b/backend/apps/app_factory.py @@ -101,6 +101,16 @@ async def generic_exception_handler(request, exc): if isinstance(exc, AppException): return await app_exception_handler(request, exc) + # Handle NexentCapabilityError with a friendly message + from adapters.exception import NexentCapabilityError as _NCE + + if isinstance(exc, _NCE): + logger.warning(f"NexentCapabilityError: {exc}") + return JSONResponse( + status_code=400, + content={"message": str(exc)}, + ) + logger.error(f"Generic Exception: {exc}") return JSONResponse( status_code=500, diff --git a/backend/apps/cas_app.py b/backend/apps/cas_app.py new file mode 100644 index 000000000..dbf4815f8 --- /dev/null +++ b/backend/apps/cas_app.py @@ -0,0 +1,156 @@ +import html +import logging +from http import HTTPStatus +from typing import Optional +from urllib.parse import parse_qs, urlsplit + +from fastapi import APIRouter, HTTPException, Query, Request +from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse + +from services.cas_service import ( + CAS_SERVER_URL, + CasAuthenticationError, + build_login_url, + build_renew_url, + get_cas_config, + login_with_ticket, + renew_with_ticket, + revoke_from_logout_request, +) + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/user/cas", tags=["cas"]) + + +@router.get("/config") +async def config(): + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "success", "data": get_cas_config()}, + ) + + +@router.get("/login") +async def login(redirect: str = Query("/", description="URL to return to after login")): + try: + login_url = _require_cas_server_redirect(build_login_url(redirect)) + return RedirectResponse(url=login_url, status_code=HTTPStatus.FOUND) + except CasAuthenticationError as exc: + logger.warning("CAS login rejected: %s", exc) + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail="CAS login is not available") + + +@router.get("/callback") +async def callback(ticket: str = "", redirect: str = "/"): + try: + result = await login_with_ticket(ticket, redirect) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "CAS login successful", "data": result}, + ) + except CasAuthenticationError as exc: + logger.warning("CAS callback rejected: %s", exc) + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="CAS authentication failed") + except Exception as exc: + logger.error(f"CAS callback failed: {exc}") + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="CAS login failed") + + +@router.post("/callback") +async def callback_logout(request: Request, logout_request: Optional[str] = None): + return await _handle_logout_request(request, logout_request, endpoint="callback") + + +@router.get("/renew") +async def renew(): + try: + return RedirectResponse(url=build_renew_url(), status_code=HTTPStatus.FOUND) + except CasAuthenticationError as exc: + logger.warning("CAS renew rejected: %s", exc) + return _renew_html(False, "CAS renew failed") + + +@router.get("/renew_callback") +async def renew_callback(ticket: str = ""): + if not ticket: + return _renew_html(False, "CAS session is not active") + try: + result = await renew_with_ticket(ticket) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "CAS renew successful", "data": result}, + ) + except Exception as exc: + logger.warning(f"CAS renew failed: {exc}") + return _renew_html(False, "CAS renew failed") + + +@router.post("/logout_callback") +async def logout_callback( + request: Request, + logout_request: Optional[str] = None, +): + return await _handle_logout_request(request, logout_request, endpoint="logout_callback") + + +async def _handle_logout_request( + request: Request, + logout_request: Optional[str] = None, + endpoint: str = "unknown", +): + logout_request = await _extract_logout_request(request, logout_request) + logger.info( + "CAS SLO %s received logoutRequest: present=%s length=%s", + endpoint, + bool(logout_request), + len(logout_request or ""), + ) + result = revoke_from_logout_request(logout_request) + logger.info("CAS SLO %s revoke result: %s", endpoint, result) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "success", "data": result}, + ) + + +async def _extract_logout_request(request: Request, logout_request: Optional[str] = None) -> str: + if logout_request: + return logout_request + + query_logout_request = request.query_params.get("logoutRequest") or request.query_params.get("logout_request") + if query_logout_request: + return query_logout_request + + body = await request.body() + raw_body = body.decode("utf-8") if body else "" + if not raw_body: + return "" + + parsed = parse_qs(raw_body) + return (parsed.get("logoutRequest") or parsed.get("logout_request") or [raw_body])[0] + + +def _renew_html(success: bool, reason: str = "") -> HTMLResponse: + status = "success" if success else "failed" + safe_reason = html.escape(reason) + return HTMLResponse( + status_code=HTTPStatus.OK, + content=f""" +""", + ) + + +def _require_cas_server_redirect(url: str) -> str: + parsed_url = urlsplit(url) + parsed_cas = urlsplit(CAS_SERVER_URL) + if ( + parsed_url.scheme not in {"http", "https"} + or not parsed_url.netloc + or parsed_url.scheme != parsed_cas.scheme + or parsed_url.netloc != parsed_cas.netloc + ): + logger.warning("Blocked CAS redirect outside configured server: %s", url) + raise CasAuthenticationError("Invalid CAS redirect URL") + return url diff --git a/backend/apps/config_app.py b/backend/apps/config_app.py index fc6267555..9ffadfe5e 100644 --- a/backend/apps/config_app.py +++ b/backend/apps/config_app.py @@ -2,17 +2,23 @@ from apps.app_factory import create_app from apps.agent_app import agent_config_router as agent_router +from apps.agent_repository_app import agent_repository_router from apps.config_sync_app import router as config_sync_router from apps.datamate_app import router as datamate_router from apps.vectordatabase_app import router as vectordatabase_router from apps.dify_app import router as dify_router from apps.idata_app import router as idata_router -from apps.file_management_app import file_management_config_router as file_manager_router +from apps.file_management_app import ( + file_management_config_router as file_manager_router, +) from apps.image_app import router as proxy_router from apps.knowledge_summary_app import router as summary_router from apps.mock_user_management_app import router as mock_user_management_router from apps.model_managment_app import router as model_manager_router +from apps.oauth_app import router as oauth_router from apps.prompt_app import router as prompt_router +from apps.prompt_template_app import router as prompt_template_router +from apps.mcp_management_app import router as mcp_management_router from apps.remote_mcp_app import router as remote_mcp_router from apps.skill_app import router as skill_router from apps.tenant_config_app import router as tenant_config_router @@ -24,8 +30,13 @@ from apps.user_app import router as user_router from apps.invitation_app import router as invitation_router from apps.a2a_client_app import router as a2a_client_router +from apps.monitoring_app import router as monitoring_router from apps.a2a_server_app import router as a2a_server_router +from apps.haotian_app import router as haotian_router +from apps.aidp_app import router as aidp_router +from apps.cas_app import router as cas_router from consts.const import IS_SPEED_MODE +from services.prompt_template_service import sync_system_default_prompt_template # Create logger instance logger = logging.getLogger("base_app") @@ -33,9 +44,20 @@ # Create FastAPI app with common configurations app = create_app(title="Nexent Config API", description="Configuration APIs") + +@app.on_event("startup") +async def sync_default_prompt_template_on_startup(): + """Sync the YAML-backed system default prompt template into the database on startup.""" + try: + sync_system_default_prompt_template() + logger.info("System default prompt template synced successfully.") + except Exception as exc: + logger.error(f"Failed to sync system default prompt template: {str(exc)}") + app.include_router(model_manager_router) app.include_router(config_sync_router) app.include_router(agent_router) +app.include_router(agent_repository_router) app.include_router(vectordatabase_router) app.include_router(datamate_router) app.include_router(voice_router) @@ -44,6 +66,7 @@ app.include_router(tool_config_router) app.include_router(dify_router) app.include_router(idata_router) +app.include_router(monitoring_router) # Choose user management router based on IS_SPEED_MODE if IS_SPEED_MODE: @@ -53,14 +76,21 @@ logger.info("Normal mode - using real user management router") app.include_router(user_management_router) +app.include_router(oauth_router) +app.include_router(cas_router) + app.include_router(summary_router) app.include_router(prompt_router) +app.include_router(prompt_template_router) app.include_router(skill_router) app.include_router(tenant_config_router) +app.include_router(mcp_management_router) app.include_router(remote_mcp_router) app.include_router(tenant_router) app.include_router(group_router) app.include_router(user_router) app.include_router(invitation_router) app.include_router(a2a_client_router) -app.include_router(a2a_server_router) \ No newline at end of file +app.include_router(a2a_server_router) +app.include_router(haotian_router) +app.include_router(aidp_router) diff --git a/backend/apps/data_process_app.py b/backend/apps/data_process_app.py index 9138d5ef1..693eb987e 100644 --- a/backend/apps/data_process_app.py +++ b/backend/apps/data_process_app.py @@ -204,9 +204,14 @@ async def get_index_tasks(index_name: str): Returns tasks that are being processed or waiting to be processed """ + import time + start = time.time() try: - return await service.get_index_tasks(index_name) + result = await service.get_index_tasks(index_name) + logger.info(f"[get_index_tasks] index={index_name}, tasks={len(result)}, duration={time.time()-start:.3f}s") + return result except Exception as e: + logger.error(f"[get_index_tasks] error: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) diff --git a/backend/apps/file_management_app.py b/backend/apps/file_management_app.py index 50224c952..427bde6f3 100644 --- a/backend/apps/file_management_app.py +++ b/backend/apps/file_management_app.py @@ -14,7 +14,9 @@ from consts.model import ProcessParams from services.file_management_service import upload_to_minio, upload_files_impl, \ get_file_url_impl, get_file_stream_impl, delete_file_impl, list_files_impl, \ - resolve_preview_file, get_preview_stream + resolve_preview_file, get_preview_stream, check_file_access, check_file_access_batch, \ + resolve_minio_upload_folder +from utils.auth_utils import get_current_user_id from utils.file_management_utils import trigger_data_process logger = logging.getLogger("file_management_app") @@ -91,37 +93,49 @@ async def upload_files( folder: str = Form( "attachments", description="Storage folder path for MinIO (optional)"), index_name: Optional[str] = Form( - None, description="Knowledge base index for conflict resolution") + None, description="Knowledge base index for conflict resolution"), + authorization: Optional[str] = Header(None, alias="Authorization") ): - if not file: - raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, - detail="No files in the request") - - errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl(destination, file, folder, index_name) + try: + if not file: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, + detail="No files in the request") - if uploaded_file_paths: - return JSONResponse( - status_code=HTTPStatus.OK, - content={ - "message": f"Files uploaded successfully to {destination}, ready for processing.", - "uploaded_filenames": uploaded_filenames, - "uploaded_file_paths": uploaded_file_paths, - "errors": errors - } + user_id, tenant_id = get_current_user_id(authorization) + errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl( + destination, file, folder, index_name, user_id, uploader_tenant_id=tenant_id ) - else: - raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, - detail="No valid files uploaded") + + if uploaded_file_paths: + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": f"Files uploaded successfully to {destination}, ready for processing.", + "uploaded_filenames": uploaded_filenames, + "uploaded_file_paths": uploaded_file_paths, + "errors": errors + } + ) + else: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, + detail="No valid files uploaded") + except HTTPException: + raise + except Exception as e: + logger.error(f"File upload error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="File upload error.") @file_management_config_router.post("/process") async def process_files( - files: List[dict] = Body( - ..., description="List of file details to process, including path_or_url and filename"), - chunking_strategy: Optional[str] = Body("basic"), - index_name: str = Body(...), - destination: str = Body(...), - authorization: Optional[str] = Header(None) + files: Annotated[List[dict], Body( + ..., description="List of file details to process, including path_or_url and filename")], + index_name: Annotated[str, Body(...)], + destination: Annotated[str, Body(...)], + chunking_strategy: Annotated[Optional[str], Body(...)] = "basic", + model_id: Annotated[Optional[int], Body(...)] = None, + authorization: Annotated[Optional[str], Header()] = None ): """ Trigger data processing for a list of uploaded files. @@ -134,7 +148,8 @@ async def process_files( chunking_strategy=chunking_strategy, source_type=destination, index_name=index_name, - authorization=authorization + authorization=authorization, + model_id=model_id ) process_result = await trigger_data_process(files, process_params) @@ -168,39 +183,48 @@ async def get_storage_file( "'base64' (return base64-encoded content for images)." ), ), - expires: int = Query(3600, description="URL validity period (seconds)"), - filename: Optional[str] = Query(None, description="Original filename for download (optional)") + expires: int = Query(86400, description="URL validity period (seconds)"), + filename: Optional[str] = Query(None, description="Original filename for download (optional)"), + authorization: Optional[str] = Header(None, alias="Authorization") ): """ - Get information, download link, or file stream for a single file + Get information, download link, or file stream for a single file. + + Access control: + - knowledge_base/*: All authenticated users can access + - attachments/{user_id}/*: Only the owner (user_id) can access - **object_name**: File object name - **download**: Download mode: ignore (default, return file info), stream (return file stream), redirect (redirect to download URL) - - **expires**: URL validity period in seconds (default 3600) + - **expires**: URL validity period in seconds (default 86400 = 24 hours) - **filename**: Original filename for download (optional, if not provided, will use object_name) Returns file information, download link, or file content """ try: + user_id, tenant_id = get_current_user_id(authorization) + + if not check_file_access(object_name, user_id, tenant_id): + logger.warning(f"[get_storage_file] Access denied: object_name={object_name}, user_id={user_id}") + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="You don't have permission to access this file" + ) + logger.info(f"[get_storage_file] Route matched! object_name={object_name}, download={download}, filename={filename}") if download == "redirect": - # return a redirect download URL result = await get_file_url_impl(object_name=object_name, expires=expires) return RedirectResponse(url=result["url"]) elif download == "stream": - # return a readable file stream file_stream, content_type = await get_file_stream_impl(object_name=object_name) logger.info(f"Streaming file: object_name={object_name}, content_type={content_type}") - - # Use provided filename or extract from object_name + download_filename = filename if not download_filename: - # Extract filename from object_name (get the last part after the last slash) download_filename = object_name.split("/")[-1] if "/" in object_name else object_name - - # Build Content-Disposition header with proper encoding for non-ASCII characters + content_disposition = build_content_disposition_header(download_filename) - + return StreamingResponse( file_stream, media_type=content_type, @@ -211,7 +235,6 @@ async def get_storage_file( } ) elif download == "base64": - # Return base64 encoded file content (primarily for images) file_stream, content_type = await get_file_stream_impl(object_name=object_name) try: data = file_stream.read() @@ -233,13 +256,13 @@ async def get_storage_file( }, ) else: - # return file metadata return await get_file_url_impl(object_name=object_name, expires=expires) + except HTTPException: + raise except Exception as e: logger.error(f"Failed to get file: object_name={object_name}, error={str(e)}") raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=f"Failed to get file information: {str(e)}" + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Failed to get file." ) @@ -248,17 +271,38 @@ async def get_storage_file( async def storage_upload_files( files: List[UploadFile] = File(..., description="List of files to upload"), folder: str = Form( - "attachments", description="Storage folder path (optional)") + "attachments", description="Storage folder path (optional)"), + authorization: Optional[str] = Header(None, alias="Authorization") ): """ - Upload one or more files to MinIO storage + Upload one or more files to MinIO storage. - **files**: List of files to upload - **folder**: Storage folder path (optional, defaults to 'attachments') + Use 'knowledge_base' for shared files accessible by all users. + Other folders (like 'attachments') will be isolated by user_id. Returns upload results including file information and access URLs """ - results = await upload_to_minio(files=files, folder=folder) + try: + user_id, tenant_id = get_current_user_id(authorization) + + actual_folder = resolve_minio_upload_folder(folder, user_id, tenant_id) + results = await upload_to_minio(files=files, folder=actual_folder) + + return { + "message": f"Processed {len(results)} files", + "success_count": sum(1 for r in results if r.get("success", False)), + "failed_count": sum(1 for r in results if not r.get("success", False)), + "results": results + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Storage upload error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Storage upload error." + ) # Return upload results for all files return { @@ -274,10 +318,16 @@ async def get_storage_files( prefix: str = Query("", description="File prefix filter"), limit: int = Query(100, description="Maximum number of files to return"), include_urls: bool = Query( - True, description="Whether to include presigned URLs") + True, description="Whether to include presigned URLs"), + authorization: Optional[str] = Header(None, alias="Authorization") ): """ - Get list of files from MinIO storage + Get list of files from MinIO storage. + + Access control: + - Returns only files the user has permission to access: + - knowledge_base/*: All authenticated users can access + - attachments/{user_id}/*: Only the owner's files - **prefix**: File prefix filter (optional) - **limit**: Maximum number of files to return (default 100) @@ -286,8 +336,22 @@ async def get_storage_files( Returns file list and metadata """ try: + user_id, tenant_id = get_current_user_id(authorization) files = await list_files_impl(prefix, limit) - # Remove URLs if not needed + + if user_id: + filtered_files = [ + f for f in files + if f.get("key") and check_file_access(f.get("key"), user_id, tenant_id) + ] + else: + filtered_files = [ + f for f in files + if f.get("key") and f.get("key", "").startswith("knowledge_base/") + ] + + files = filtered_files + if not include_urls: for file in files: if "url" in file: @@ -297,10 +361,12 @@ async def get_storage_files( "total": len(files), "files": files } + except HTTPException: + raise except Exception as e: + logger.error(f"Get storage files error: {str(e)}") raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=f"Failed to get file list: {str(e)}" + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get storage files error." ) @@ -481,7 +547,7 @@ async def download_datamate_file( # Build Content-Disposition header with proper encoding for non-ASCII characters content_disposition = build_content_disposition_header(download_filename) - + return StreamingResponse( iter([response.content]), media_type=content_type, @@ -507,25 +573,41 @@ async def download_datamate_file( @file_management_config_router.delete("/storage/{object_name:path}") async def remove_storage_file( - object_name: str = PathParam(..., description="File object name to delete") + object_name: str = PathParam(..., description="File object name to delete"), + authorization: Optional[str] = Header(None, alias="Authorization") ): """ - Delete file from MinIO storage + Delete file from MinIO storage. + + Access control: + - knowledge_base/*: Only allow deletion (admin operation) + - attachments/{user_id}/*: Only the owner (user_id) can delete - **object_name**: File object name to delete Returns deletion operation result """ try: + user_id, tenant_id = get_current_user_id(authorization) + + if not check_file_access(object_name, user_id, tenant_id): + logger.warning(f"[remove_storage_file] Access denied: object_name={object_name}, user_id={user_id}") + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="You don't have permission to delete this file" + ) + await delete_file_impl(object_name=object_name) return { "success": True, "message": f"File {object_name} successfully deleted" } + except HTTPException: + raise except Exception as e: + logger.error(f"Remove storage file error: {str(e)}") raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=f"Failed to delete file: {str(e)}" + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Remove storage file error." ) @@ -533,57 +615,83 @@ async def remove_storage_file( async def get_storage_file_batch_urls( request_data: dict = Body(..., description="JSON containing list of file object names"), - expires: int = Query(3600, description="URL validity period (seconds)") + expires: int = Query(3600, description="URL validity period (seconds)"), + authorization: Optional[str] = Header(None, alias="Authorization") ): """ - Batch get download URLs for multiple files (JSON request) + Batch get download URLs for multiple files (JSON request). + + Access control: + - knowledge_base/*: All authenticated users can access + - attachments/{user_id}/*: Only the owner (user_id) can access - **request_data**: JSON request body containing object_names list - - **expires**: URL validity period in seconds (default 3600) + - **expires**: URL validity period in seconds (default 86400 = 24 hours) Returns URL and status information for each file """ - # Extract object_names from request body - object_names = request_data.get("object_names", []) - if not object_names or not isinstance(object_names, list): - raise HTTPException( - status_code=400, detail="Request body must contain object_names array") + try: + user_id, tenant_id = get_current_user_id(authorization) - results = [] + object_names = request_data.get("object_names", []) + if not object_names or not isinstance(object_names, list): + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, detail="Request body must contain object_names array") - for object_name in object_names: - try: - # Get file URL - result = get_file_url_impl( - object_name=object_name, expires=expires) - results.append({ - "object_name": object_name, - "success": result["success"], - "url": result.get("url"), - "error": result.get("error") - }) - except Exception as e: - results.append({ - "object_name": object_name, - "success": False, - "error": str(e) - }) + results = [] - return { - "total": len(results), - "success_count": sum(1 for r in results if r.get("success", False)), - "failed_count": sum(1 for r in results if not r.get("success", False)), - "results": results - } + for object_name in object_names: + if not check_file_access(object_name, user_id, tenant_id): + results.append({ + "object_name": object_name, + "success": False, + "error": "Access denied" + }) + continue + + try: + result = get_file_url_impl(object_name=object_name, expires=expires) + results.append({ + "object_name": object_name, + "success": result["success"], + "url": result.get("url"), + "error": result.get("error") + }) + except Exception as e: + results.append({ + "object_name": object_name, + "success": False, + "error": str(e) + }) + + return { + "total": len(results), + "success_count": sum(1 for r in results if r.get("success", False)), + "failed_count": sum(1 for r in results if not r.get("success", False)), + "results": results + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Batch URLs error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Batch URLs error." + ) @file_management_config_router.get("/preview/{object_name:path}") async def preview_file( object_name: str = PathParam(..., description="File object name to preview"), filename: Annotated[Optional[str], Query(description="Original filename for display (optional)")] = None, range_header: Annotated[Optional[str], Header(alias="range")] = None, + authorization: Optional[str] = Header(None, alias="Authorization") ): """ - Preview file inline in browser + Preview file inline in browser. + + Access control: + - knowledge_base/*: All authenticated users can access + - attachments/{user_id}/*: Only the owner (user_id) can access + - attachments/asset_owner/{user_id}/*: ASSET_OWNER virtual tenant and owner only - **object_name**: File object name in storage - **filename**: Original filename for Content-Disposition header (optional) @@ -592,6 +700,15 @@ async def preview_file( Returns 206 Partial Content when a valid Range header is present. """ try: + user_id, tenant_id = get_current_user_id(authorization) + + if not check_file_access(object_name, user_id, tenant_id): + logger.warning(f"[preview_file] Access denied: object_name={object_name}, user_id={user_id}") + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="You don't have permission to access this file" + ) + actual_name, content_type, total_size = await resolve_preview_file(object_name=object_name) except FileTooLargeException as e: logger.warning(f"[preview_file] File too large: object_name={object_name}, error={str(e)}") @@ -608,13 +725,15 @@ async def preview_file( except UnsupportedFileTypeException as e: logger.error(f"[preview_file] Unsupported file type: object_name={object_name}, error={str(e)}") raise HTTPException( - status_code=HTTPStatus.BAD_REQUEST, + status_code=HTTPStatus.BAD_REQUEST, detail=f"File format not supported for preview: {str(e)}" ) + except HTTPException: + raise except Exception as e: logger.error(f"[preview_file] Unexpected error: object_name={object_name}, error={str(e)}") raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Failed to preview file" ) diff --git a/backend/apps/haotian_app.py b/backend/apps/haotian_app.py new file mode 100644 index 000000000..c0f3682b5 --- /dev/null +++ b/backend/apps/haotian_app.py @@ -0,0 +1,92 @@ +""" +Haotian App Layer +FastAPI endpoints for Haotian external knowledge base operations. + +This module provides proxy APIs so the frontend does not call external services directly. +""" + +import logging +from http import HTTPStatus +from typing import Optional, Dict + +from fastapi import APIRouter, Header, HTTPException, Body +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field + +from services.haotian_service import ( + fetch_haotian_knowledge_sets_impl, + test_haotian_connection_impl, +) + +router = APIRouter(prefix="/haotian") +logger = logging.getLogger("haotian_app") + + +class HaotianListRequest(BaseModel): + list_url: str = Field(..., description="Haotian knowledge sets list URL") + authorization: str = Field( + ..., description="Authorization header value, e.g. 'Bearer xxx'" + ) + + +class HaotianTestConnectionRequest(BaseModel): + list_url: str = Field(..., description="Haotian knowledge sets list URL") + authorization: str = Field( + ..., description="Authorization header value, e.g. 'Bearer xxx'" + ) + + +@router.post("/knowledge-sets") +async def fetch_haotian_knowledge_sets_api( + authorization: Optional[str] = Header(None), + request: HaotianListRequest = Body(...), +) -> JSONResponse: + """ + Fetch knowledge sets from the external Haotian list_url and return a filtered/normalized payload. + """ + _ = authorization + try: + result: Dict[str, any] = await fetch_haotian_knowledge_sets_impl( + list_url=request.list_url, + external_authorization=request.authorization, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except Exception as e: + logger.error(f"Failed to fetch Haotian knowledge sets: {e}") + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=f"Failed to fetch Haotian knowledge sets: {str(e)}", + ) + + +@router.post("/test-connection") +async def test_haotian_connection_api( + authorization: Optional[str] = Header(None), + request: HaotianTestConnectionRequest = Body(...), +) -> JSONResponse: + """ + Test connection to Haotian list_url using the provided authorization. + """ + _ = authorization + try: + ok, error_message = await test_haotian_connection_impl( + list_url=request.list_url, + external_authorization=request.authorization, + ) + if ok: + return JSONResponse( + status_code=HTTPStatus.OK, + content={"success": True, "message": "Connection successful"}, + ) + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=f"Cannot connect to Haotian server: {error_message}", + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error testing Haotian connection: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=f"Error testing Haotian connection: {str(e)}", + ) diff --git a/backend/apps/invitation_app.py b/backend/apps/invitation_app.py index 2aa3edc9e..55bbac998 100644 --- a/backend/apps/invitation_app.py +++ b/backend/apps/invitation_app.py @@ -69,6 +69,12 @@ async def list_invitations_endpoint( status_code=HTTPStatus.UNAUTHORIZED, detail=str(exc) ) + except ValidationError as exc: + logger.warning(f"Invitation list rejected by feature flag: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=str(exc) + ) except Exception as exc: logger.error(f"Unexpected error retrieving invitation list: {str(exc)}") raise HTTPException( @@ -131,6 +137,12 @@ async def create_invitation_endpoint( status_code=HTTPStatus.BAD_REQUEST, detail=str(exc) ) + except ValidationError as exc: + logger.warning(f"Invitation creation rejected by feature flag: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=str(exc) + ) except DuplicateError as exc: logger.warning(f"Duplicate invitation code: {str(exc)}") raise HTTPException( diff --git a/backend/apps/knowledge_summary_app.py b/backend/apps/knowledge_summary_app.py index e4e11ace9..ab45170fb 100644 --- a/backend/apps/knowledge_summary_app.py +++ b/backend/apps/knowledge_summary_app.py @@ -8,6 +8,7 @@ from consts.model import ChangeSummaryRequest from services.vectordatabase_service import ElasticSearchService, get_vector_db_core from utils.auth_utils import get_current_user_id, get_current_user_info +from utils.config_utils import tenant_config_manager router = APIRouter(prefix="/summary") logger = logging.getLogger("knowledge_summary_app") @@ -31,6 +32,19 @@ async def auto_summary( authorization, http_request) service = ElasticSearchService() + # Get model_id from tenant config if not provided + if model_id is None and tenant_id: + try: + tenant_config = tenant_config_manager.load_config(tenant_id) + model_id_str = tenant_config.get("LLM_ID") + if model_id_str: + model_id = int(model_id_str) + logger.info(f"Using LLM_ID {model_id} from tenant config for auto-summary") + else: + logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary may be placeholder") + except Exception as e: + logger.warning(f"Failed to get LLM_ID from tenant config: {e}") + return await service.summary_index_name( index_name=index_name, batch_size=batch_size, diff --git a/backend/apps/mcp_management_app.py b/backend/apps/mcp_management_app.py new file mode 100644 index 000000000..cfb0c292a --- /dev/null +++ b/backend/apps/mcp_management_app.py @@ -0,0 +1,302 @@ +import logging +from typing import Optional + +from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request +from fastapi.responses import JSONResponse +from http import HTTPStatus + +from consts.exceptions import ( + MCPConnectionError, + McpNotFoundError, + McpValidationError, + UnauthorizedError, +) +from consts.model import ( + RegistryListQuery, + CommunityListRequest, + CommunityPublishRequest, + CommunityUpdateRequest, +) +from services.mcp_management_service import ( + list_community_mcp_services, + list_community_mcp_tag_stats, + list_my_community_mcp_services, + list_registry_mcp_services, + publish_community_mcp_service, + update_community_mcp_service, + delete_community_mcp_service, +) +from utils.auth_utils import get_current_user_info + +router = APIRouter(prefix="/mcp-tools") +logger = logging.getLogger("mcp_management_app") + + +# --------------------------------------------------------------------------- +# Registry Endpoints (MCP Registry - external service) +# --------------------------------------------------------------------------- + +@router.get("/registry/list") +async def list_registry_mcp_services_api( + query: RegistryListQuery = Depends(), + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + List MCP services from the official MCP Registry. + """ + try: + get_current_user_info(authorization, http_request) + + data = await list_registry_mcp_services( + search=query.search, + include_deleted=query.include_deleted, + updated_since=query.updated_since, + version=query.version, + cursor=query.cursor, + limit=query.limit, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content=data, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list MCP registry services: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list MCP registry services" + ) + + +# --------------------------------------------------------------------------- +# Community Endpoints +# --------------------------------------------------------------------------- + +@router.get("/community/list") +async def list_community_mcp_services_api( + query: CommunityListRequest = Depends(), + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + List public community MCP services. + """ + try: + get_current_user_info(authorization, http_request) + data = await list_community_mcp_services( + search=query.search, + tag=query.tag, + transport_type=query.transport_type, + cursor=query.cursor, + limit=query.limit, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": data}, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list MCP community services: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list MCP community services" + ) + + +@router.get("/community/tags/stats") +async def list_community_mcp_tag_stats_api( + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Get community MCP tag statistics. + """ + try: + get_current_user_info(authorization, http_request) + stats = list_community_mcp_tag_stats() + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": stats}, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list community MCP tag stats: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list community MCP tag stats" + ) + + +@router.post("/community/publish") +async def publish_community_mcp_service_api( + payload: CommunityPublishRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Publish a local MCP service to the community. + """ + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + community_id = await publish_community_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=payload.mcp_id, + name=payload.name, + description=payload.description, + version=payload.version, + tags=payload.tags, + mcp_server=payload.mcp_server, + config_json=payload.config_json, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"community_id": community_id}}, + ) + except McpNotFoundError as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except McpValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to publish MCP community service: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to publish MCP community service" + ) + + +@router.put("/community/update") +async def update_community_mcp_service_api( + payload: CommunityUpdateRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Update a community MCP service. + """ + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + await update_community_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + community_id=payload.community_id, + name=payload.name, + description=payload.description, + tags=payload.tags, + version=payload.version, + registry_json=payload.registry_json, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"}, + ) + except McpNotFoundError as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except McpValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to update MCP community service: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP community service" + ) + + +@router.delete("/community/delete") +async def delete_community_mcp_service_api( + community_id: int = Query(gt=0), + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Delete a community MCP service. + """ + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + await delete_community_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + community_id=community_id, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"}, + ) + except McpNotFoundError as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to delete MCP community service: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to delete MCP community service" + ) + + +@router.get("/community/mine") +async def list_my_community_mcp_services_api( + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + List MCP services published by the current user to the community. + """ + try: + _, tenant_id, _ = get_current_user_info(authorization, http_request) + data = await list_my_community_mcp_services(tenant_id=tenant_id) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": data}, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list my MCP community services: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list my MCP community services" + ) diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py index 0a5a04139..53dfebb02 100644 --- a/backend/apps/model_managment_app.py +++ b/backend/apps/model_managment_app.py @@ -33,7 +33,7 @@ from fastapi.responses import JSONResponse from fastapi.encoders import jsonable_encoder from http import HTTPStatus -from typing import List, Optional +from typing import Annotated, List, Optional from services.model_health_service import ( check_model_connectivity, verify_model_config_connectivity, @@ -264,6 +264,7 @@ async def get_model_list(authorization: Optional[str] = Header(None)): Returns each model enriched with repo-qualified `model_name` and a normalized `connect_status` value. """ + try: user_id, tenant_id = get_current_user_id(authorization) logger.debug( @@ -297,7 +298,8 @@ async def get_llm_model_list(authorization: Optional[str] = Header(None)): @router.post("/healthcheck") async def check_model_health( - display_name: str = Query(..., description="Display name to check"), + display_name: Annotated[str, Query(..., description="Display name to check")], + model_type: Annotated[str, Query(..., description="...")], authorization: Optional[str] = Header(None) ): """Check and update model connectivity, returning the latest status. @@ -308,7 +310,7 @@ async def check_model_health( """ try: _, tenant_id = get_current_user_id(authorization) - result = await check_model_connectivity(display_name, tenant_id) + result = await check_model_connectivity(display_name, tenant_id, model_type) return JSONResponse(status_code=HTTPStatus.OK, content={ "message": "Successfully checked model connectivity", "data": result @@ -372,7 +374,10 @@ async def manage_check_model_health( f"Start to check model connectivity for tenant, user_id: {user_id}, " f"target_tenant_id: {request.tenant_id}, display_name: {request.display_name}") - result = await check_model_connectivity(request.display_name, request.tenant_id) + result = await check_model_connectivity( + request.display_name, + request.tenant_id + ) return JSONResponse(status_code=HTTPStatus.OK, content={ "message": "Successfully checked model connectivity", "data": result diff --git a/backend/apps/monitoring_app.py b/backend/apps/monitoring_app.py new file mode 100644 index 000000000..f89f4312f --- /dev/null +++ b/backend/apps/monitoring_app.py @@ -0,0 +1,149 @@ +""" +Model Monitoring API endpoints. + +Provides model performance metrics aggregated from model_monitoring_record_t. +Uses an independent database connection pool to avoid impacting business operations. +""" + +import logging +from http import HTTPStatus +from typing import Annotated, Any + +from fastapi import APIRouter, Header, HTTPException, Query +from sqlalchemy import text + +from consts.const import ( + ENABLE_TELEMETRY, + MONITORING_DASHBOARD_URL, + MONITORING_PROVIDER, +) +from consts.model import ConversationResponse +from database.client import get_monitoring_db_session +from utils.auth_utils import get_current_user_id + +logger = logging.getLogger("monitoring_app") + +router = APIRouter(prefix="/monitoring") + + +def _normalize_monitoring_provider(value: str | None) -> str: + return str(value or "otlp").strip().lower() + + +def get_monitoring_status() -> dict[str, Any]: + """Return telemetry state and the monitoring UI entrypoint for frontend use.""" + telemetry_enabled = ENABLE_TELEMETRY + provider = _normalize_monitoring_provider(MONITORING_PROVIDER) + dashboard_url = MONITORING_DASHBOARD_URL.strip() or None + + return { + "telemetry_enabled": telemetry_enabled, + "provider": provider, + "dashboard_url": dashboard_url, + "dashboard_port": None, + "dashboard_path": None, + } + + +def _compute_time_range_filter(time_range: str) -> str: + """Convert time_range parameter to SQL timestamp condition.""" + hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 24) + return f"m.create_time >= NOW() - INTERVAL '{hours} hours'" + + +def _query_model_metrics_from_db( + time_range: str, tenant_id: str | None = None +) -> list[dict[str, Any]]: + time_filter = _compute_time_range_filter(time_range) + + tenant_filter = "" + params: dict[str, str] = {} + if tenant_id: + tenant_filter = "AND m.tenant_id = :tenant_id" + params["tenant_id"] = tenant_id + + query_sql = f""" + SELECT + m.model_id, + m.model_name, + MAX(COALESCE(m.model_type, 'llm')) AS model_type, + MAX(COALESCE(m.display_name, split_part(m.model_name, '/', -1), 'Unknown')) AS display_name, + COUNT(*) AS request_count, + ROUND( + COALESCE( + SUM(CASE WHEN m.is_error = TRUE THEN 1 ELSE 0 END)::numeric + * 100.0 / NULLIF(COUNT(*), 0), 0 + ), 2 + ) AS error_rate, + ROUND(AVG(COALESCE(m.request_duration_ms, 0))::numeric, 1) AS avg_duration, + ROUND(AVG(CASE WHEN m.is_streaming = TRUE THEN m.ttft_ms ELSE NULL END)::numeric, 1) AS avg_ttft, + ROUND(AVG(CASE WHEN m.is_streaming = TRUE THEN m.generation_rate ELSE NULL END)::numeric, 1) AS token_generation_rate, + COALESCE(SUM(COALESCE(m.total_tokens, 0)), 0) AS total_tokens + FROM nexent.model_monitoring_record_t m + WHERE {time_filter} + {tenant_filter} + AND m.delete_flag = 'N' + GROUP BY m.model_id, m.model_name + ORDER BY request_count DESC + """ + + try: + with get_monitoring_db_session() as session: + result = session.execute(text(query_sql), params) + rows = result.fetchall() + return [ + { + "model_id": row.model_id, + "model_name": row.model_name, + "model_type": row.model_type, + "display_name": row.display_name, + "request_count": row.request_count, + "error_rate": float(row.error_rate) if row.error_rate else 0, + "avg_duration": float(row.avg_duration) if row.avg_duration else 0, + "avg_ttft": float(row.avg_ttft) if row.avg_ttft else 0, + "token_generation_rate": float(row.token_generation_rate) + if row.token_generation_rate + else 0, + "total_tokens": int(row.total_tokens) if row.total_tokens else 0, + } + for row in rows + ] + except Exception as e: + logger.error(f"Failed to query model metrics from DB: {e}") + return [] + + +@router.get("/models", response_model=ConversationResponse) +async def list_models_endpoint( + time_range: Annotated[str, Query( + description="Time range: 24h, 7d, 30d")] = "24h", + page: Annotated[int, Query(ge=1, description="Page number")] = 1, + page_size: Annotated[int, Query( + ge=1, le=100, description="Items per page")] = 20, + authorization: Annotated[str | None, Header()] = None, +): + """List all models with aggregated monitoring metrics from database.""" + try: + _, tenant_id = get_current_user_id(authorization) + + all_metrics = _query_model_metrics_from_db(time_range, tenant_id) + + start = (page - 1) * page_size + end = start + page_size + paginated = all_metrics[start:end] + + return ConversationResponse(code=0, message="success", data=paginated) + except Exception as e: + logger.error(f"Failed to list monitoring models: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) + + +@router.get("/status", response_model=ConversationResponse) +async def get_monitoring_status_endpoint(): + """Return whether monitoring UI should be shown in the frontend.""" + return ConversationResponse( + code=0, + message="success", + data=get_monitoring_status(), + ) diff --git a/backend/apps/northbound_app.py b/backend/apps/northbound_app.py index 3f1580271..9f3b7e323 100644 --- a/backend/apps/northbound_app.py +++ b/backend/apps/northbound_app.py @@ -1,12 +1,16 @@ import logging from http import HTTPStatus from typing import Optional, Dict, Any +from urllib.parse import urlparse, unquote +import re import uuid -from fastapi import APIRouter, Body, Header, Request, HTTPException, Query -from fastapi.responses import JSONResponse +import httpx +from fastapi import APIRouter, Body, File, Header, HTTPException, Query, Request, UploadFile +from fastapi.responses import JSONResponse, StreamingResponse -from consts.exceptions import LimitExceededError, UnauthorizedError +from consts.exceptions import LimitExceededError, UnauthorizedError, ConversationNotFoundError +from consts.model import ToolParamsRequest from services.northbound_service import ( NorthboundContext, get_conversation_history, @@ -15,16 +19,35 @@ stop_chat, get_agent_info_list, update_conversation_title, + upload_files_for_northbound, ) from utils.auth_utils import validate_bearer_token, get_user_and_tenant_by_access_key +from .file_management_app import build_content_disposition_header + router = APIRouter(prefix="/nb/v1", tags=["northbound"]) __all__ = ["router", "_get_northbound_context"] +def _resolve_proxy_download_filename(presigned_url: str, content_disposition: str) -> str: + """Resolve a stable download filename for the northbound file proxy.""" + if content_disposition: + filename_star_match = re.search(r"filename\*=UTF-8''([^;]+)", content_disposition) + if filename_star_match: + return unquote(filename_star_match.group(1)) or "download" + + filename_match = re.search(r'filename="?([^";]+)"?', content_disposition) + if filename_match: + return filename_match.group(1) or "download" + + path = unquote(urlparse(presigned_url).path) + filename = path.split("/")[-1].strip() + return filename or "download" + + async def _get_northbound_context(request: Request) -> NorthboundContext: """ Build northbound context from request. @@ -107,13 +130,119 @@ async def health_check(): return {"status": "healthy", "service": "northbound-api"} -@router.post("/chat/run") +@router.post( + "/chat/attachments/upload", + summary="Upload chat attachments for northbound runs", + description=( + "Upload one or more files for later use in `/nb/v1/chat/run`. " + "Successful uploads return reusable `s3_url` references." + ), +) +async def upload_chat_attachments( + request: Request, + files: list[UploadFile] = File( + ..., + description="List of files to upload", + examples=["report.pdf", "diagram.png"], + ), +): + try: + ctx: NorthboundContext = await _get_northbound_context(request) + return JSONResponse( + status_code=HTTPStatus.OK, + content=await upload_files_for_northbound(ctx=ctx, files=files), + ) + except LimitExceededError as e: + logging.error(f"Too Many Requests: rate limit exceeded: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail="Too Many Requests: rate limit exceeded") + except ValueError as e: + logging.error(f"Invalid northbound upload request: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except PermissionError as e: + logging.error(f"Permission denied while uploading northbound files: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail=str(e)) + except HTTPException as e: + raise e + except Exception as e: + logging.error(f"Failed to upload northbound files: {str(e)}", exc_info=e) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Internal Server Error") + + +@router.post( + "/chat/run", + summary="Start a northbound chat run with optional attachments", + description=( + "Run a northbound chat request. Upload attachments first through " + "`/nb/v1/chat/attachments/upload`, then pass the returned `s3_url` values " + "through the `attachments` field." + ), +) async def run_chat( request: Request, - conversation_id: Optional[int] = Body(None, embed=True), - agent_name: str = Body(..., embed=True), - query: str = Body(..., embed=True), - meta_data: Optional[Dict[str, Any]] = Body(None, embed=True), + conversation_id: Optional[int] = Body( + None, + embed=True, + description="Existing conversation ID. Omit to create a new conversation.", + examples=[123], + ), + agent_name: str = Body( + ..., + embed=True, + description="Target agent name.", + examples=["general-assistant"], + ), + query: str = Body( + ..., + embed=True, + description="User input to send to the agent.", + examples=["Summarize the uploaded report and list the key risks."], + ), + attachments: Optional[list] = Body( + None, + embed=True, + description="Attachments for the chat. Can be either a list of S3 URL strings" + "or a list of attachment objects with full metadata.", + examples=[["s3://nexent/attachments/user123/20260609_report.pdf"]], + ), + meta_data: Optional[Dict[str, Any]] = Body( + None, + embed=True, + description="Optional metadata passed through for audit and usage logging.", + examples=[{"source": "crm", "ticket_id": "INC-1001"}], + ), + tool_params: Optional[ToolParamsRequest] = Body( + None, + embed=True, + description="Optional request-scoped overrides for tool initialization parameters. " + "Overrides DB-persisted params (ag_tool_instance_t.params) on a per-run basis. " + "Conflict resolution: request value wins over DB value. " + "Structure: agents -> {agent_name} -> tools -> {tool_name} -> {param_name: param_value}. " + "tool_name matching: first by tool.name, then by tool.class_name. " + "Unknown param names cause a ValidationError (400). " + "Metadata-derived fields (e.g., vdb_core, embedding_model) are recalculated " + "from merged params for tools like KnowledgeBaseSearchTool, DifySearchTool, DataMateSearchTool.", + examples=[{ + "agents": { + "common_sense_qa_assistant": { + "tools": { + "analyze_text_file": { + "chunk_size": 4000, + "summary_only": True, + "prompt": "Please provide a concise summary of this document focusing on key facts." + }, + "knowledge_base_search": { + "top_k": 10, + "rerank": True, + "rerank_model_name": "gte-rerank-v2", + "index_names": ["nexent-docs", "faq-index"] + } + } + } + } + }], + ), idempotency_key: Optional[str] = Header(None, alias="Idempotency-Key"), ): try: @@ -123,13 +252,21 @@ async def run_chat( conversation_id=conversation_id, agent_name=agent_name, query=query, + attachments=attachments, meta_data=meta_data, + tool_params=tool_params, idempotency_key=idempotency_key, ) except LimitExceededError as e: logging.error(f"Too Many Requests: rate limit exceeded: {str(e)}", exc_info=e) raise HTTPException(status_code=HTTPStatus.TOO_MANY_REQUESTS, detail="Too Many Requests: rate limit exceeded") + except ValueError as e: + logging.error(f"Invalid northbound chat request: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except PermissionError as e: + logging.error(f"Permission denied while running northbound chat: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail=str(e)) except HTTPException as e: raise e except Exception as e: @@ -252,9 +389,98 @@ async def update_convs_title( logging.error(f"Too Many Requests: rate limit exceeded: {str(e)}", exc_info=e) raise HTTPException(status_code=HTTPStatus.TOO_MANY_REQUESTS, detail="Too Many Requests: rate limit exceeded") + except ConversationNotFoundError as e: + logging.error(f"Conversation not found while updating title: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except HTTPException as e: raise e except Exception as e: logging.error(f"Failed to update conversation title: {str(e)}", exc_info=e) raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Internal Server Error") + + +@router.get("/file/fetch") +async def fetch_file_from_presigned_url( + presigned_url: str = Query(..., description="Presigned URL from MinIO storage"), +): + """ + Fetch file content from a MinIO presigned URL. + + This endpoint acts as a proxy - it downloads the file from MinIO + (which is only accessible from within the container network) and + returns the file content to external callers (e.g., MCP tools). + + The presigned_url parameter should be URL-encoded by the caller. + + NOTE: No authentication required for this endpoint. + """ + if not presigned_url: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="presigned_url is required" + ) + + try: + parsed = urlparse(presigned_url) + if parsed.scheme not in ("http", "https"): + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="Invalid URL scheme. Must be http or https" + ) + except HTTPException: + raise + except Exception as e: + logging.error(f"Invalid presigned_url format: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="Invalid presigned_url format" + ) + + try: + async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client: + response = await client.get(presigned_url) + + if response.status_code != 200: + logging.error(f"Failed to fetch file from presigned_url, status: {response.status_code}") + raise HTTPException( + status_code=HTTPStatus.BAD_GATEWAY, + detail=f"Failed to fetch file from storage, status: {response.status_code}" + ) + + content_type = response.headers.get("Content-Type", "application/octet-stream") + content_disposition = response.headers.get("Content-Disposition", "") + download_filename = _resolve_proxy_download_filename(presigned_url, content_disposition) + + headers = { + "Content-Type": content_type, + "Content-Disposition": build_content_disposition_header(download_filename), + } + + return StreamingResponse( + content=response.aiter_bytes(), + status_code=HTTPStatus.OK, + headers=headers, + media_type=content_type + ) + + except httpx.TimeoutException: + logging.error(f"Timeout fetching file from presigned_url") + raise HTTPException( + status_code=HTTPStatus.GATEWAY_TIMEOUT, + detail="Timeout fetching file from storage" + ) + except httpx.RequestError as e: + logging.error(f"Request error fetching file from presigned_url: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.BAD_GATEWAY, + detail=f"Failed to fetch file from storage: {str(e)}" + ) + except HTTPException: + raise + except Exception as e: + logging.error(f"Unexpected error fetching file: {str(e)}", exc_info=e) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Internal server error" + ) diff --git a/backend/apps/northbound_base_app.py b/backend/apps/northbound_base_app.py index db303e00f..66d937b52 100644 --- a/backend/apps/northbound_base_app.py +++ b/backend/apps/northbound_base_app.py @@ -16,6 +16,7 @@ from apps.app_factory import create_app from .northbound_app import router as northbound_router +from .northbound_knowledge_app import router as northbound_knowledge_router class A2AServerSettings(BaseModel): @@ -49,6 +50,7 @@ class A2AServerSettings(BaseModel): ) northbound_app.include_router(northbound_router) +northbound_app.include_router(northbound_knowledge_router) # ============================================================================= diff --git a/backend/apps/northbound_knowledge_app.py b/backend/apps/northbound_knowledge_app.py new file mode 100644 index 000000000..02739d138 --- /dev/null +++ b/backend/apps/northbound_knowledge_app.py @@ -0,0 +1,505 @@ +import base64 +import logging +from http import HTTPStatus +from typing import Optional, Dict, Any, List, Annotated + +from fastapi import APIRouter, Body, File, Form, Path, Path as PathParam, Query, Request, HTTPException, UploadFile +from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse + +from consts.const import ASSET_OWNER_TENANT_ID, VectorDatabaseType +from consts.exceptions import ( + LimitExceededError, + UnauthorizedError, +) +from consts.model import ProcessParams +from services.file_management_service import ( + upload_files_impl, + get_file_url_impl, + get_file_stream_impl, + check_file_access, +) +from services.northbound_service import NorthboundContext +from services.redis_service import get_redis_service +from services.vectordatabase_service import ElasticSearchService, get_vector_db_core +from utils.auth_utils import generate_session_jwt +from utils.file_management_utils import trigger_data_process + +from .file_management_app import build_content_disposition_header +from .northbound_app import _get_northbound_context + + +logger = logging.getLogger("northbound_knowledge_app") + +router = APIRouter(prefix="/nb/v1/knowledge", tags=["northbound"]) + +__all__ = ["router"] + +RATE_LIMIT_EXCEEDED_DETAIL = "Too Many Requests: rate limit exceeded" + + +async def _require_asset_owner_context(request: Request) -> NorthboundContext: + """Resolve northbound context and ensure the caller belongs to the asset-owner tenant.""" + ctx = await _get_northbound_context(request) + if ctx.tenant_id != ASSET_OWNER_TENANT_ID: + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="This endpoint is restricted to asset administrators.", + ) + return ctx + + +@router.get("/indices") +async def get_list_indices( + request: Request, + pattern: Annotated[str, Query( + description="Pattern to match index names")] = "*", +): + """List knowledge bases visible to the asset-owner tenant. + + Restricted to asset administrators (same auth as create_new_index). + """ + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + return ElasticSearchService.list_indices( + pattern, True, ctx.tenant_id, ctx.user_id, vdb_core + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while listing knowledge bases") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error listing knowledge bases") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error listing knowledge bases") + + +@router.post("/indices/{index_name}") +async def create_new_index( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index to create")], + embedding_dim: Annotated[ + Optional[int], + Query(description="Dimension of the embedding vectors"), + ] = None, + body: Annotated[ + Optional[Dict[str, Any]], + Body( + description=( + "Request body with optional fields (ingroup_permission, group_ids, embedding_model_name, preserve_source_file)" + ), + ), + ] = None, +): + """Create a new vector index and store it in the knowledge table. + + Restricted to the asset-owner tenant: only callers whose access key resolves + to the asset-owner tenant are allowed to create knowledge bases through the + northbound API. + """ + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + + ingroup_permission = None + group_ids = None + embedding_model_name = None + preserve_source_file = None + if body: + ingroup_permission = body.get("ingroup_permission") + group_ids = body.get("group_ids") + embedding_model_name = body.get("embedding_model_name") + preserve_source_file = body.get("preserve_source_file") + + return ElasticSearchService.create_knowledge_base( + knowledge_name=index_name, + embedding_dim=embedding_dim, + vdb_core=vdb_core, + user_id=ctx.user_id, + tenant_id=ctx.tenant_id, + ingroup_permission=ingroup_permission, + group_ids=group_ids, + embedding_model_name=embedding_model_name, + preserve_source_file=preserve_source_file, + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while creating index") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error creating index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error creating index") + + +@router.delete("/indices/{index_name}") +async def delete_index( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index to delete")], +): + """Delete a knowledge base and all related data. + + Restricted to asset administrators (same auth as create_new_index). + """ + logger.debug("Received northbound request to delete knowledge base") + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + return await ElasticSearchService.full_delete_knowledge_base( + index_name, vdb_core, ctx.user_id + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while deleting index") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error deleting index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error deleting index") + + +@router.get("/indices/{index_name}/files") +async def get_index_files( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index")], +): + """Get all files from an index, including those that are not yet stored in ES. + + Restricted to asset administrators (same auth as get_list_indices). + """ + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + logger.debug( + "Listing files for index %s, tenant_id=%s, user_id=%s", + index_name, + ctx.tenant_id, + ctx.user_id, + ) + result = await ElasticSearchService.list_files( + index_name, include_chunks=False, vdb_core=vdb_core + ) + return { + "status": "success", + "files": result.get("files", []), + } + except LimitExceededError as e: + logger.exception("Rate limit exceeded while listing files") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error getting files for index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error getting index files") + + +@router.delete("/indices/{index_name}/documents") +async def delete_documents( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index")], + path_or_url: Annotated[str, Query(..., description="Path or URL of documents to delete")], + scope: Annotated[ + str, + Query( + description=( + "source_only: delete MinIO source only; " + "full: delete ES, MinIO, and Redis records" + ), + ), + ] = "full", +): + """Delete a document by scope. Restricted to asset administrators.""" + try: + await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + logger.debug( + "Deleting documents for index %s scope=%s", index_name, scope + ) + result = await ElasticSearchService.delete_document_by_scope( + index_name, path_or_url, scope, vdb_core + ) + + if scope == "full": + try: + redis_service = get_redis_service() + redis_cleanup_result = redis_service.delete_document_records( + index_name, path_or_url + ) + result["redis_cleanup"] = redis_cleanup_result + original_message = result.get( + "message", "Documents deleted successfully" + ) + result["message"] = ( + f"{original_message}. " + f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records " + f"({redis_cleanup_result['celery_tasks_deleted']} tasks, " + f"{redis_cleanup_result['cache_keys_deleted']} cache keys)." + ) + if redis_cleanup_result.get("errors"): + result["redis_warnings"] = redis_cleanup_result["errors"] + except Exception as redis_error: + logger.warning( + "Redis cleanup failed for index %s: %s", + index_name, + redis_error, + ) + result["redis_cleanup_error"] = str(redis_error) + original_message = result.get( + "message", "Documents deleted successfully" + ) + result["message"] = ( + f"{original_message}, but Redis cleanup encountered an error: " + f"{str(redis_error)}" + ) + + return result + except ValueError as exc: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, detail=str(exc) + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while deleting documents") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error deleting documents for index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error deleting documents") + + +@router.post("/file/upload") +async def upload_files( + request: Request, + file: Annotated[List[UploadFile], File(..., alias="file")], + index_name: str = Form(..., description="Knowledge base index"), +): + """Upload files to MinIO and trigger knowledge base data processing. + + Uses chunking_strategy=basic. Restricted to asset administrators + (same auth as create_new_index). + """ + try: + ctx = await _require_asset_owner_context(request) + destination = "minio" + if not file: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="No files in the request", + ) + + errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl( + destination, file, None, index_name, ctx.user_id, uploader_tenant_id=ctx.tenant_id + ) + + if uploaded_file_paths: + files = [ + {"path_or_url": path, "filename": name} + for path, name in zip(uploaded_file_paths, uploaded_filenames) + ] + # Internal data-process / ES indexing expects JWT, not northbound API key + internal_jwt = generate_session_jwt(ctx.user_id) + process_params = ProcessParams( + chunking_strategy="basic", + source_type="minio", + index_name=index_name, + authorization=internal_jwt, + ) + process_result = await trigger_data_process(files, process_params) + + if process_result is None or ( + isinstance(process_result, dict) + and process_result.get("status") == "error" + ): + error_message = "Data process service failed" + if isinstance(process_result, dict) and "message" in process_result: + error_message = process_result["message"] + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=error_message, + ) + + return JSONResponse( + status_code=HTTPStatus.CREATED, + content={ + "message": ( + "Files uploaded and processing triggered successfully" + ), + "uploaded_filenames": uploaded_filenames, + "uploaded_file_paths": uploaded_file_paths, + "errors": errors, + "process_tasks": process_result, + }, + ) + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="No valid files uploaded", + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while uploading files") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("File upload error") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="File upload error.") + + +@router.get("/file/download/{object_name:path}") +async def get_storage_file( + request: Request, + object_name: str = PathParam(..., description="File object name"), + download: str = Query( + "ignore", + description=( + "How to get the file: " + "'ignore' (default, return file info), " + "'stream' (return file stream), " + "'redirect' (redirect to download URL), " + "'base64' (return base64-encoded content for images)." + ), + ), + expires: int = Query(86400, description="URL validity period (seconds)"), + filename: Optional[str] = Query( + None, description="Original filename for download (optional)"), +): + """Get file information, download link, or file stream. + + Restricted to asset administrators (same auth as create_new_index). + """ + try: + ctx = await _require_asset_owner_context(request) + + if not check_file_access(object_name, ctx.user_id, ctx.tenant_id): + logger.warning( + "[get_storage_file] Access denied: user_id=%s", + ctx.user_id, + ) + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="You don't have permission to access this file", + ) + + logger.info( + "[get_storage_file] download=%s", + download, + ) + if download == "redirect": + result = await get_file_url_impl( + object_name=object_name, expires=expires) + return RedirectResponse(url=result["url"]) + if download == "stream": + file_stream, content_type = await get_file_stream_impl( + object_name=object_name) + logger.info( + "Streaming file: object_name=%s, content_type=%s", + object_name, + content_type, + ) + + download_filename = filename + if not download_filename: + download_filename = ( + object_name.split("/")[-1] + if "/" in object_name + else object_name + ) + + content_disposition = build_content_disposition_header( + download_filename) + + return StreamingResponse( + file_stream, + media_type=content_type, + headers={ + "Content-Disposition": content_disposition, + "Cache-Control": "public, max-age=3600", + "ETag": f'"{object_name}"', + }, + ) + if download == "base64": + file_stream, content_type = await get_file_stream_impl( + object_name=object_name) + try: + data = file_stream.read() + except Exception as exc: + logger.error( + "Failed to read file stream for base64: %s", str(exc)) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to read file content for base64 encoding", + ) + + base64_content = base64.b64encode(data).decode("utf-8") + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "success": True, + "base64": base64_content, + "content_type": content_type, + "object_name": object_name, + }, + ) + return await get_file_url_impl( + object_name=object_name, expires=expires) + except LimitExceededError as e: + logger.error( + "%s: %s", + RATE_LIMIT_EXCEEDED_DETAIL, + str(e), + exc_info=e, + ) + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Failed to get file") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to get file.") diff --git a/backend/apps/oauth_app.py b/backend/apps/oauth_app.py new file mode 100644 index 000000000..f05102d0c --- /dev/null +++ b/backend/apps/oauth_app.py @@ -0,0 +1,352 @@ +import logging + +from fastapi import APIRouter, Header, HTTPException, Request +from fastapi.responses import JSONResponse, RedirectResponse +from http import HTTPStatus +from typing import Optional + +from pydantic import ValidationError as PydanticValidationError + +from consts.model import OAuthCompleteRequest +from consts.exceptions import OAuthLinkError, OAuthProviderError, UnauthorizedError +from consts.oauth_providers import get_all_provider_definitions +from database.oauth_account_db import get_oauth_account_by_provider +from services.oauth_service import ( + complete_pending_oauth_account, + create_or_update_oauth_account, + ensure_user_tenant_exists, + exchange_code_for_provider_token, + find_supabase_user_id_by_email, + generate_pending_oauth_token, + get_authorize_url, + get_enabled_providers, + get_pending_oauth_info, + get_provider_user_info, + list_linked_accounts, + parse_state, + unlink_account, +) +from utils.auth_utils import ( + calculate_expires_at, + generate_session_jwt, + get_current_user_id, + get_supabase_admin_client, +) + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/user/oauth", tags=["oauth"]) + + +@router.get("/providers") +async def get_providers(): + providers = get_enabled_providers() + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "success", "data": providers}, + ) + + +@router.get("/authorize") +async def authorize(provider: str): + try: + url = get_authorize_url(provider) + return RedirectResponse(url=url, status_code=HTTPStatus.FOUND) + except OAuthProviderError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"OAuth authorize failed: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="OAuth authorization failed", + ) + + +@router.get("/link") +async def link(provider: str, authorization: Optional[str] = Header(None)): + if not authorization: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in") + + try: + user_id, _ = get_current_user_id(authorization) + url = get_authorize_url(provider, link_user_id=user_id) + return RedirectResponse(url=url, status_code=HTTPStatus.FOUND) + except UnauthorizedError: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in") + except OAuthProviderError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"OAuth link failed: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="OAuth link failed", + ) + + +@router.get("/callback") +async def callback( + provider: str, + code: str = "", + state: str = "", + error: Optional[str] = None, + error_description: Optional[str] = None, +): + if error: + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={ + "message": "OAuth provider returned an error", + "data": { + "oauth_error": error, + "oauth_error_description": error_description or "Unknown error", + }, + }, + ) + + if not code: + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={ + "message": "No authorization code received", + "data": { + "oauth_error": "no_code", + "oauth_error_description": "No authorization code received", + }, + }, + ) + + if provider not in get_all_provider_definitions(): + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={ + "message": "Unsupported OAuth provider", + "data": { + "oauth_error": "unsupported_provider", + "oauth_error_description": f"Provider '{provider}' is not supported", + }, + }, + ) + + state_info = parse_state(state) + link_user_id = state_info.get("link_user_id", "") + + try: + token_data = exchange_code_for_provider_token(provider, code) + provider_access_token = token_data["access_token"] + + user_info = get_provider_user_info( + provider, + provider_access_token, + openid=token_data.get("openid", ""), + ) + + provider_user_id = user_info["id"] + email = user_info["email"] + username = user_info["username"] + + if link_user_id: + supabase_user_id = link_user_id + else: + # First check if this OAuth account is already bound to a user + existing_binding = get_oauth_account_by_provider(provider, provider_user_id) + if existing_binding: + supabase_user_id = existing_binding["user_id"] + else: + supabase_user_id = None + if email: + admin_client = get_supabase_admin_client() + if not admin_client: + raise RuntimeError("Supabase admin client not available") + supabase_user_id = find_supabase_user_id_by_email( + admin_client, + email, + ) + + if not supabase_user_id: + pending_token = generate_pending_oauth_token( + provider=provider, + provider_user_id=provider_user_id, + provider_email=email, + provider_username=username, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "OAuth account information required", + "data": { + "requires_account_completion": True, + "pending_token": pending_token, + "provider": provider, + "provider_username": username, + "provider_email": email, + "email_required": not bool(email), + }, + }, + ) + + ensure_user_tenant_exists(user_id=supabase_user_id, email=email) + + create_or_update_oauth_account( + user_id=supabase_user_id, + provider=provider, + provider_user_id=provider_user_id, + email=email, + username=username, + ) + + expiry_seconds = 3600 + jwt_token = generate_session_jwt(supabase_user_id, expires_in=expiry_seconds) + expires_at = calculate_expires_at(jwt_token) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "OAuth login successful", + "data": { + "user": { + "id": str(supabase_user_id), + "email": email, + }, + "session": { + "access_token": jwt_token, + "refresh_token": "", + "expires_at": expires_at, + "expires_in_seconds": expiry_seconds, + }, + }, + }, + ) + + except OAuthLinkError as e: + logger.warning(f"OAuth callback link failed for provider={provider}: {e}") + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={ + "message": "OAuth account link failed", + "data": { + "oauth_error": "oauth_account_already_bound", + "oauth_error_description": "OAuth account is already bound to another user", + }, + }, + ) + except Exception as e: + logger.error(f"OAuth callback failed for provider={provider}: {e}") + return JSONResponse( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + content={ + "message": "OAuth login failed", + "data": { + "oauth_error": "callback_failed", + "oauth_error_description": "OAuth login failed", + }, + }, + ) + + +@router.get("/pending") +async def get_pending( + pending_token: Optional[str] = Header(None, alias="X-OAuth-Pending-Token"), +): + try: + pending = get_pending_oauth_info(pending_token or "") + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "success", "data": pending}, + ) + except OAuthLinkError as e: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except OAuthProviderError as e: + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) + except Exception as e: + logger.error(f"Failed to get pending OAuth info: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to get pending OAuth info", + ) + + +@router.post("/complete") +async def complete( + request: Request, + pending_token: Optional[str] = Header(None, alias="X-OAuth-Pending-Token"), +): + try: + request_data = OAuthCompleteRequest(**(await request.json())) + result = await complete_pending_oauth_account( + pending_token=pending_token or "", + email=str(request_data.email) if request_data.email else None, + password=request_data.password, + invite_code=request_data.invite_code, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "OAuth account completed", "data": result}, + ) + except OAuthLinkError as e: + status_code = ( + HTTPStatus.CONFLICT + if "Email already exists" in str(e) + else HTTPStatus.BAD_REQUEST + ) + raise HTTPException(status_code=status_code, detail=str(e)) + except PydanticValidationError as e: + raise HTTPException( + status_code=HTTPStatus.UNPROCESSABLE_ENTITY, + detail=e.errors(), + ) + except OAuthProviderError as e: + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) + except Exception as e: + logger.error(f"Failed to complete OAuth account: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to complete OAuth account", + ) + + +@router.get("/accounts") +async def get_accounts(authorization: Optional[str] = Header(None)): + if not authorization: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in") + + try: + user_id, _ = get_current_user_id(authorization) + accounts = list_linked_accounts(user_id) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "success", "data": accounts}, + ) + except UnauthorizedError: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in") + except Exception as e: + logger.error(f"Failed to get OAuth accounts: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to get OAuth accounts", + ) + + +@router.delete("/accounts/{provider}") +async def delete_account(provider: str, authorization: Optional[str] = Header(None)): + if not authorization: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in") + + try: + user_id, _ = get_current_user_id(authorization) + unlink_account(user_id, provider) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "success", + "data": {"provider": provider, "unlinked": True}, + }, + ) + except OAuthLinkError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except UnauthorizedError: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in") + except Exception as e: + logger.error(f"Failed to unlink OAuth account: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to unlink OAuth account", + ) diff --git a/backend/apps/prompt_app.py b/backend/apps/prompt_app.py index 7c0b799dc..6b82a5c82 100644 --- a/backend/apps/prompt_app.py +++ b/backend/apps/prompt_app.py @@ -1,11 +1,22 @@ import logging from http import HTTPStatus from typing import Optional -from fastapi import APIRouter, Header, HTTPException, Request -from fastapi.responses import StreamingResponse +from fastapi import APIRouter, Header, Request +from fastapi.responses import JSONResponse, StreamingResponse -from consts.model import GeneratePromptRequest -from services.prompt_service import gen_system_prompt_streamable +from consts.model import ( + GeneratePromptRequest, + OptimizePromptSectionRequest, + OptimizePromptBadCaseRequest, + OptimizePromptFromDebugRequest, +) +from services.prompt_service import ( + gen_system_prompt_streamable, + OptimizeRequest, + OptimizeResult, + PromptOptimizationService, +) +from adapters.exception import NexentCapabilityError from utils.auth_utils import get_current_user_info router = APIRouter(prefix="/prompt") @@ -25,13 +36,160 @@ async def generate_and_save_system_prompt_api( agent_id=prompt_request.agent_id, model_id=prompt_request.model_id, task_description=prompt_request.task_description, + prompt_template_id=prompt_request.prompt_template_id, user_id=user_id, tenant_id=tenant_id, language=language, tool_ids=prompt_request.tool_ids, - sub_agent_ids=prompt_request.sub_agent_ids + sub_agent_ids=prompt_request.sub_agent_ids, + knowledge_base_display_names=prompt_request.knowledge_base_display_names, + has_selected_resources=prompt_request.has_selected_resources, ), media_type="text/event-stream") except Exception as e: logger.exception(f"Error occurred while generating system prompt: {e}") - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Error occurred while generating system prompt.") + raise + + +@router.post("/optimize") +async def optimize_prompt_section_api( + optimize_request: OptimizePromptSectionRequest, + http_request: Request, + authorization: Optional[str] = Header(None) +): + _, tenant_id, language = get_current_user_info( + authorization, http_request) + + service = PromptOptimizationService( + model_id=optimize_request.model_id, + tenant_id=tenant_id, + language=language, + ) + + try: + result = service.optimize( + OptimizeRequest( + agent_id=optimize_request.agent_id, + model_id=optimize_request.model_id, + task_description=optimize_request.task_description, + section_type=optimize_request.section_type, + section_title=optimize_request.section_title, + current_content=optimize_request.current_content, + feedback=optimize_request.feedback, + mode=optimize_request.mode, + start_pos=optimize_request.start_pos, + end_pos=optimize_request.end_pos, + tool_ids=optimize_request.tool_ids, + sub_agent_ids=optimize_request.sub_agent_ids, + knowledge_base_display_names=optimize_request.knowledge_base_display_names, + ) + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "Success", + "data": { + "optimized_content": result.optimized_content, + "section_type": result.section_type, + "section_title": result.section_title, + "original_content": result.original_content, + } + }, + headers={"X-Prompt-Source": result.source}, + ) + except NexentCapabilityError as e: + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={"message": str(e)}, + ) + except Exception as exc: + logger.exception(f"Error occurred while optimizing prompt section: {exc}") + raise + + +@router.post("/optimize/badcase") +async def optimize_prompt_badcase_api( + badcase_request: OptimizePromptBadCaseRequest, + http_request: Request, + authorization: Optional[str] = Header(None) +): + _, tenant_id, language = get_current_user_info( + authorization, http_request) + + service = PromptOptimizationService( + model_id=badcase_request.model_id, + tenant_id=tenant_id, + language=language, + ) + + try: + result = service.optimize_badcase( + current_content=badcase_request.current_content, + bad_cases=badcase_request.bad_cases, + agent_id=badcase_request.agent_id, + section_type=badcase_request.section_type, + section_title=badcase_request.section_title, + tool_ids=badcase_request.tool_ids, + sub_agent_ids=badcase_request.sub_agent_ids, + knowledge_base_display_names=badcase_request.knowledge_base_display_names, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "Success", + "data": { + "optimized_content": result.optimized_content, + "section_type": result.section_type, + "section_title": result.section_title, + "original_content": result.original_content, + } + }, + headers={"X-Prompt-Source": result.source}, + ) + except NexentCapabilityError as e: + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={"message": str(e)}, + ) + + +@router.post("/optimize/from_debug") +async def optimize_prompt_from_debug_api( + optimize_request: OptimizePromptFromDebugRequest, + http_request: Request, + authorization: Optional[str] = Header(None) +): + _, tenant_id, language = get_current_user_info( + authorization, http_request) + + service = PromptOptimizationService( + model_id=optimize_request.model_id, + tenant_id=tenant_id, + language=language, + ) + + try: + result = service.optimize_from_debug( + agent_id=optimize_request.agent_id, + feedback=optimize_request.feedback, + selected=optimize_request.selected, + history=optimize_request.history, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "Success", + "data": { + "original_full_prompt": result.original_content, + "optimized_full_prompt": result.optimized_content, + } + }, + headers={"X-Prompt-Source": result.source}, + ) + except NexentCapabilityError as e: + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={"message": str(e)}, + ) + except Exception as exc: + logger.exception(f"Error occurred while optimizing prompt from debug: {exc}") + raise diff --git a/backend/apps/prompt_template_app.py b/backend/apps/prompt_template_app.py new file mode 100644 index 000000000..0f12bd614 --- /dev/null +++ b/backend/apps/prompt_template_app.py @@ -0,0 +1,143 @@ +import logging +from http import HTTPStatus +from typing import Optional + +from fastapi import APIRouter, Header, HTTPException +from starlette.responses import JSONResponse + +from consts.exceptions import DuplicateError, NotFoundException, ValidationError +from consts.model import PromptTemplateRequest +from services.prompt_template_service import ( + create_prompt_template_impl, + delete_prompt_template_impl, + get_prompt_template_detail_impl, + list_prompt_templates_impl, + update_prompt_template_impl, +) +from utils.auth_utils import get_current_user_id + +router = APIRouter(prefix="/prompt_templates") +logger = logging.getLogger("prompt_template_app") + + +@router.get("") +async def list_prompt_templates_api( + authorization: Optional[str] = Header(None), +): + """List prompt templates for the current user.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = list_prompt_templates_impl(tenant_id=tenant_id, user_id=user_id) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except Exception as exc: + logger.error(f"Prompt template list error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template list error.", + ) + + +@router.get("/{template_id}") +async def get_prompt_template_api( + template_id: int, + authorization: Optional[str] = Header(None), +): + """Get prompt template detail.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = get_prompt_template_detail_impl( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except NotFoundException as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template detail error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template detail error.", + ) + + +@router.post("") +async def create_prompt_template_api( + request: PromptTemplateRequest, + authorization: Optional[str] = Header(None), +): + """Create a prompt template.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = create_prompt_template_impl( + request=request, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except DuplicateError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except ValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template create error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template create error.", + ) + + +@router.put("/{template_id}") +async def update_prompt_template_api( + template_id: int, + request: PromptTemplateRequest, + authorization: Optional[str] = Header(None), +): + """Update a prompt template.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = update_prompt_template_impl( + template_id=template_id, + request=request, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except NotFoundException as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except DuplicateError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except ValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template update error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template update error.", + ) + + +@router.delete("/{template_id}") +async def delete_prompt_template_api( + template_id: int, + authorization: Optional[str] = Header(None), +): + """Delete a prompt template.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = delete_prompt_template_impl( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except NotFoundException as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except ValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template delete error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template delete error.", + ) diff --git a/backend/apps/remote_mcp_app.py b/backend/apps/remote_mcp_app.py index 0dd6127fd..3993e24ce 100644 --- a/backend/apps/remote_mcp_app.py +++ b/backend/apps/remote_mcp_app.py @@ -6,12 +6,27 @@ from fastapi.responses import JSONResponse, StreamingResponse from http import HTTPStatus -from consts.const import NEXENT_MCP_DOCKER_IMAGE, ENABLE_UPLOAD_IMAGE -from consts.exceptions import MCPConnectionError, MCPNameIllegal, MCPContainerError -from consts.model import MCPConfigRequest, MCPUpdateRequest +from consts.const import ENABLE_UPLOAD_IMAGE +from consts.exceptions import ( + MCPConnectionError, + MCPNameIllegal, + MCPContainerError, + McpNotFoundError, + McpValidationError, + McpNameConflictError, + McpPortConflictError, +) +from consts.model import ( + MCPConfigRequest, + AddMcpServiceRequest, + AddContainerMcpServiceRequest, + UpdateMcpServiceRequest, + EnableMcpServiceRequest, + DisableMcpServiceRequest, + HealthcheckMcpServiceRequest, + ListMcpServicesQuery, +) from services.remote_mcp_service import ( - add_remote_mcp_server_list, - delete_remote_mcp_server_list, get_remote_mcp_server_list, check_mcp_health_and_update_db, delete_mcp_by_container_id, @@ -19,8 +34,16 @@ update_remote_mcp_server_list, attach_mcp_container_permissions, get_mcp_record_by_id, + list_mcp_service_tools_by_id, + add_mcp_service, + add_container_mcp_service, + update_mcp_service, + update_mcp_service_enabled, + delete_mcp_service, + check_mcp_service_health, + check_container_port_conflict, + suggest_container_port, ) -from database.remote_mcp_db import check_mcp_name_exists from services.tool_configuration_service import get_tool_from_remote_mcp_server from services.mcp_container_service import MCPContainerManager from utils.auth_utils import get_current_user_info @@ -29,454 +52,388 @@ logger = logging.getLogger("remote_mcp_app") -@router.post("/tools") -async def get_tools_from_remote_mcp( - service_name: str, - mcp_url: str, +# --------------------------------------------------------------------------- +# Tools Endpoint +# --------------------------------------------------------------------------- + +@router.get("/tools") +async def get_tools_from_mcp( + mcp_id: int = Query(..., description="MCP service ID"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to list tool information from the remote MCP server """ + """ + Get tools from MCP server by MCP ID. + """ try: - _, tenant_id, _ = get_current_user_info( - authorization, http_request) - tools_info = await get_tool_from_remote_mcp_server( - mcp_server_name=service_name, - remote_mcp_server=mcp_url, - tenant_id=tenant_id + _, tenant_id, _ = get_current_user_info(authorization, http_request) + + tools_info = await list_mcp_service_tools_by_id( + tenant_id=tenant_id, + mcp_id=mcp_id, ) + return JSONResponse( status_code=HTTPStatus.OK, content={ - "tools": [tool.__dict__ for tool in tools_info], "status": "success"} + "tools": [t.model_dump() if hasattr(t, 'model_dump') else t for t in tools_info], + "status": "success" + } ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except MCPConnectionError as e: - logger.error(f"Failed to get tools from remote MCP server: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="MCP connection failed") + logger.error(f"Failed to get tools from MCP server: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="MCP connection failed" + ) except Exception as e: - logger.error(f"get tools from remote MCP server failed, error: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to get tools from remote MCP server.") + logger.error(f"get tools from MCP server failed, error: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to get tools from MCP server." + ) +# --------------------------------------------------------------------------- +# Add Endpoints +# --------------------------------------------------------------------------- + @router.post("/add") -async def add_remote_proxies( - mcp_url: str, - service_name: str, - authorization_token: Optional[str] = Query( - None, description="Authorization token for MCP server authentication (e.g., Bearer token)"), - tenant_id: Optional[str] = Query( - None, description="Tenant ID for filtering (uses auth if not provided)"), +async def add_mcp_service_endpoint( + payload: AddMcpServiceRequest, authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to add a remote MCP server """ + """ + Add an MCP service. + Supports both remote MCP (URL-based) and local MCP (record-based). + """ try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - await add_remote_mcp_server_list(tenant_id=effective_tenant_id, - user_id=user_id, - remote_mcp_server=mcp_url, - remote_mcp_server_name=service_name, - container_id=None, - authorization_token=authorization_token) + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + await add_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + name=payload.name, + description=payload.description, + source=payload.source.value if hasattr(payload.source, 'value') else payload.source, + server_url=payload.server_url, + tags=payload.tags, + authorization_token=payload.authorization_token, + custom_headers=payload.custom_headers, + container_config=payload.container_config, + registry_json=payload.registry_json, + enabled=payload.enabled if payload.enabled is not None else False, + ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"message": "Successfully added remote MCP proxy", - "status": "success"} + content={"message": "Successfully added MCP service", "status": "success"} ) except MCPNameIllegal as e: - logger.error(f"Failed to add remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.CONFLICT, - detail="MCP name already exists") + logger.error(f"Failed to add MCP service: {e}") + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail="MCP name already exists") except MCPConnectionError as e: - logger.error(f"Failed to add remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="MCP connection failed") + logger.error(f"Failed to add MCP service: {e}") + raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="MCP connection failed") + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: - logger.error(f"Failed to add remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to add remote MCP proxy") + logger.error(f"Failed to add MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to add MCP service" + ) -@router.delete("") -async def delete_remote_proxies( - service_name: str, - mcp_url: str, - tenant_id: Optional[str] = Query( - None, description="Tenant ID for filtering (uses auth if not provided)"), +@router.post("/add-from-config") +async def add_container_mcp_service_endpoint( + payload: AddContainerMcpServiceRequest, authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to delete a remote MCP server """ + """ + Add a container-based MCP service with full configuration. + Endpoint path is kept as /add-from-config for backward compatibility. + """ try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - await delete_remote_mcp_server_list(tenant_id=effective_tenant_id, - user_id=user_id, - remote_mcp_server=mcp_url, - remote_mcp_server_name=service_name) + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + container_info = await add_container_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + name=payload.name, + description=payload.description, + source=payload.source.value if hasattr(payload.source, 'value') else payload.source, + tags=payload.tags, + authorization_token=payload.authorization_token, + registry_json=payload.registry_json, + port=payload.port, + mcp_config=payload.mcp_config, + ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"message": "Successfully deleted remote MCP proxy", - "status": "success"} + content={ + "status": "success", + "data": { + "service_name": container_info.get("service_name"), + "mcp_url": container_info.get("mcp_url"), + "container_id": container_info.get("container_id"), + "container_name": container_info.get("container_name"), + "host_port": container_info.get("host_port"), + }, + }, + ) + + except McpNameConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpPortConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except MCPContainerError as e: + logger.error(f"Failed to start MCP container service: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="Docker service unavailable" + ) + except MCPConnectionError as e: + logger.error(f"MCP connection failed when adding container service: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="MCP connection failed" ) except Exception as e: - logger.error(f"Failed to delete remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to delete remote MCP proxy") + logger.error(f"Failed to add container MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to add container MCP service" + ) + +# --------------------------------------------------------------------------- +# Update Endpoint +# --------------------------------------------------------------------------- @router.put("/update") -async def update_remote_proxy( - update_data: MCPUpdateRequest, +async def update_mcp_service_endpoint( + payload: UpdateMcpServiceRequest, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to update an existing remote MCP server """ + """Update an existing MCP service by ID.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - await update_remote_mcp_server_list( - update_data=update_data, + + update_mcp_service( tenant_id=effective_tenant_id, - user_id=user_id + user_id=user_id, + mcp_id=payload.mcp_id, + new_name=payload.name, + description=payload.description, + server_url=payload.server_url, + authorization_token=payload.authorization_token, + custom_headers=payload.custom_headers, + tags=payload.tags, ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"message": "Successfully updated remote MCP proxy", - "status": "success"} + content={"message": "Successfully updated MCP service", "status": "success"} ) - except MCPNameIllegal as e: - logger.error(f"Failed to update remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.CONFLICT, - detail=str(e)) - except MCPConnectionError as e: - logger.error(f"Failed to update remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail=str(e)) + + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: - logger.error(f"Failed to update remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to update remote MCP proxy") + logger.error(f"Failed to update MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP service" + ) -@router.get("/list") -async def get_remote_proxies( +# --------------------------------------------------------------------------- +# Delete Endpoints +# --------------------------------------------------------------------------- + +@router.delete("/{mcp_id}") +async def delete_mcp_by_id( + mcp_id: int, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to get the list of remote MCP servers """ + """Delete MCP service by ID.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - remote_mcp_server_list = await get_remote_mcp_server_list( + + await delete_mcp_service( tenant_id=effective_tenant_id, user_id=user_id, - is_need_auth=False + mcp_id=mcp_id ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"remote_mcp_server_list": remote_mcp_server_list, - "enable_upload_image": ENABLE_UPLOAD_IMAGE, - "status": "success"} + content={"message": "Successfully deleted MCP service", "status": "success"} ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: - logger.error(f"Failed to get remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to get remote MCP proxy") + logger.error(f"Failed to delete MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to delete MCP service" + ) -@router.get("/record/{mcp_id}") -async def get_mcp_record( - mcp_id: int, +@router.delete("/container/{container_id}") +async def stop_mcp_container( + container_id: str, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Get single MCP record by ID """ + """Stop and remove MCP container.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - mcp_record = await get_mcp_record_by_id( - mcp_id=mcp_id, - tenant_id=effective_tenant_id - ) - - if not mcp_record: + try: + container_manager = MCPContainerManager() + except MCPContainerError as e: + logger.error(f"Failed to initialize container manager: {e}") raise HTTPException( - status_code=HTTPStatus.NOT_FOUND, - detail="MCP record not found" + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="Docker service unavailable" ) - return JSONResponse( - status_code=HTTPStatus.OK, - content={ - "mcp_name": mcp_record.get("mcp_name"), - "mcp_server": mcp_record.get("mcp_server"), - "authorization_token": mcp_record.get("authorization_token"), - "status": "success" - } - ) + success = await container_manager.stop_mcp_container(container_id) + + if success: + await delete_mcp_by_container_id( + tenant_id=effective_tenant_id, + user_id=user_id, + container_id=container_id, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "Container and MCP service stopped successfully", + "status": "success", + }, + ) + else: + return JSONResponse( + status_code=HTTPStatus.NOT_FOUND, + content={"message": "Container not found", "status": "error"}, + ) except HTTPException: raise except Exception as e: - logger.error(f"Failed to get MCP record: {e}") + logger.error(f"Failed to stop container: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to get MCP record" + detail=f"Failed to stop container: {str(e)}" ) -@router.get("/healthcheck") -async def check_mcp_health( - mcp_url: str, - service_name: str, - tenant_id: Optional[str] = Query( - None, description="Tenant ID for filtering (uses auth if not provided)"), - authorization: Optional[str] = Header(None), - http_request: Request = None -): - """ Used to check the health of the MCP server, the front end can call it, - and automatically update the database status """ - try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - await check_mcp_health_and_update_db(mcp_url, service_name, effective_tenant_id, user_id) - return JSONResponse( - status_code=HTTPStatus.OK, - content={"status": "success"} - ) - except MCPConnectionError as e: - logger.error(f"MCP connection failed: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="MCP connection failed") - except Exception as e: - logger.error(f"Failed to check the health of the MCP server: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to check the health of the MCP server") +# --------------------------------------------------------------------------- +# List Endpoints +# --------------------------------------------------------------------------- - -@router.post("/add-from-config") -async def add_mcp_from_config( - mcp_config: MCPConfigRequest, +@router.get("/list") +async def get_mcp_list( tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): """ - Add MCP server by starting a container with command+args config. - Similar to Cursor's MCP server configuration format. - - Example request: - { - "mcpServers": { - "12306-mcp": { - "command": "npx", - "args": ["-y", "12306-mcp"], - "env": {"NODE_ENV": "production"} - } - } - } + Get list of MCP services. + Returns remote MCP list with full details including container_id, description, + enabled, source, update_time, tags, container_port, registry_json, config_json, + container_status, and authorization_token. """ try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - # Initialize container manager - try: - container_manager = MCPContainerManager() - except MCPContainerError as e: - logger.error(f"Failed to initialize container manager: {e}") - raise HTTPException( - status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="Docker service unavailable. Please ensure Docker socket is mounted." - ) - - results = [] - errors = [] - - for service_name, config in mcp_config.mcpServers.items(): - try: - command = config.command - args = config.args or [] - env_vars = config.env or {} - port = config.port - - if not command: - errors.append(f"{service_name}: command is required") - continue - - if port is None: - errors.append(f"{service_name}: port is required") - continue - - # Check if MCP service name already exists before starting container - if check_mcp_name_exists(mcp_name=service_name, tenant_id=effective_tenant_id): - errors.append(f"{service_name}: MCP name already exists") - continue - - # Build full command to run inside nexent/nexent-mcp image - full_command = [ - "python", - "-m", - "mcp_proxy", - "--host", - "0.0.0.0", - "--port", - str(port), - "--transport", - "streamablehttp", - "--", - command, - *args, - ] - - # Start container - container_info = await container_manager.start_mcp_container( - service_name=service_name, - tenant_id=effective_tenant_id, - user_id=user_id, - env_vars=env_vars, - host_port=port, - image=config.image or NEXENT_MCP_DOCKER_IMAGE, - full_command=full_command, - ) - - # Register to remote MCP server list - await add_remote_mcp_server_list( - tenant_id=effective_tenant_id, - user_id=user_id, - remote_mcp_server=container_info["mcp_url"], - remote_mcp_server_name=service_name, - container_id=container_info["container_id"], - ) - - results.append({ - "service_name": service_name, - "status": "success", - "mcp_url": container_info["mcp_url"], - "container_id": container_info["container_id"], - "container_name": container_info.get("container_name"), - "host_port": container_info.get("host_port") - }) - - except MCPContainerError as e: - logger.error( - f"Failed to start MCP container {service_name}: {e}") - error_str = str(e) - # Check if error is related to image not found - if "not found" in error_str.lower() or "404" in error_str: - errors.append( - f"{service_name}: Image not found - MCP service startup image is missing") - else: - errors.append(f"{service_name}: {error_str}") - except Exception as e: - logger.error( - f"Unexpected error adding MCP {service_name}: {e}") - errors.append(f"{service_name}: {str(e)}") - - if errors and not results: - raise HTTPException( - status_code=HTTPStatus.BAD_REQUEST, - detail=f"All MCP servers failed: {errors}" - ) + remote_mcp_list = await get_remote_mcp_server_list( + tenant_id=effective_tenant_id, + user_id=user_id, + is_need_auth=True + ) return JSONResponse( status_code=HTTPStatus.OK, content={ - "message": "MCP servers processed", - "results": results, - "errors": errors if errors else None, + "remote_mcp_server_list": remote_mcp_list, + "enable_upload_image": ENABLE_UPLOAD_IMAGE, "status": "success" } ) - - except HTTPException: - raise except Exception as e: - logger.error(f"Failed to add MCP from config: {e}") + logger.error(f"Failed to get MCP list: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=f"Failed to add MCP servers: {str(e)}" + detail="Failed to get MCP list" ) -@router.delete("/container/{container_id}") -async def stop_mcp_container( - container_id: str, +@router.get("/record/{mcp_id}") +async def get_mcp_record( + mcp_id: int, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Stop and remove MCP container """ + """Get single MCP record by ID.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - try: - container_manager = MCPContainerManager() - except MCPContainerError as e: - logger.error(f"Failed to initialize container manager: {e}") - raise HTTPException( - status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="Docker service unavailable" - ) - - success = await container_manager.stop_mcp_container(container_id) + mcp_record = await get_mcp_record_by_id( + mcp_id=mcp_id, + tenant_id=effective_tenant_id + ) - if success: - # Soft delete the corresponding MCP record (if any) by container ID - await delete_mcp_by_container_id( - tenant_id=effective_tenant_id, - user_id=user_id, - container_id=container_id, - ) - return JSONResponse( - status_code=HTTPStatus.OK, - content={ - "message": "Container and MCP service stopped successfully", - "status": "success", - }, - ) - else: - return JSONResponse( + if not mcp_record: + raise HTTPException( status_code=HTTPStatus.NOT_FOUND, - content={"message": "Container not found", "status": "error"}, + detail="MCP record not found" ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "mcp_name": mcp_record.get("mcp_name"), + "mcp_server": mcp_record.get("mcp_server"), + "authorization_token": mcp_record.get("authorization_token"), + "custom_headers": mcp_record.get("custom_headers"), + "status": "success" + } + ) except HTTPException: raise except Exception as e: - logger.error(f"Failed to stop container: {e}") + logger.error(f"Failed to get MCP record: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=f"Failed to stop container: {str(e)}" + detail="Failed to get MCP record" ) @@ -487,11 +444,10 @@ async def list_mcp_containers( authorization: Optional[str] = Header(None), http_request: Request = None ): - """ List all MCP containers for the current tenant """ + """List all MCP containers for the current tenant.""" try: user_id, auth_tenant_id, _ = get_current_user_info( authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id try: @@ -539,11 +495,10 @@ async def get_container_logs( authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Get logs from MCP container via SSE stream """ + """Get logs from MCP container via SSE stream.""" try: user_id, auth_tenant_id, _ = get_current_user_info( authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id try: @@ -556,12 +511,11 @@ async def get_container_logs( ) async def generate_log_stream(): - """Generate SSE stream of container logs""" + """Generate SSE stream of container logs.""" try: async for log_line in container_manager.stream_container_logs( container_id, tail=tail, follow=follow ): - # Format as SSE: data: {json}\n\n payload = json.dumps( {"logs": log_line, "status": "success"}, ensure_ascii=False @@ -597,7 +551,185 @@ async def generate_log_stream(): ) -# Conditionally add upload-image route based on ENABLE_UPLOAD_IMAGE setting +@router.get("/healthcheck") +async def check_mcp_health( + mcp_id: int = Query(..., description="MCP service ID"), + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Check MCP service health by ID.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + health_status = await check_mcp_service_health( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=mcp_id, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"health_status": health_status}} + ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except MCPConnectionError as e: + logger.error(f"MCP connection failed: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail=str(e) or "MCP connection failed" + ) + except Exception as e: + logger.error(f"Failed to check MCP health: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to check MCP health" + ) + + +# --------------------------------------------------------------------------- +# Port Management Endpoints +# --------------------------------------------------------------------------- + +@router.get("/port/check") +async def check_mcp_port( + port: int = Query(..., ge=1, le=65535), + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Check if a port is available for MCP container.""" + try: + get_current_user_info(authorization, http_request) + available = check_container_port_conflict(port=port) + no_cache_headers = { + "Cache-Control": "no-cache, no-store, must-revalidate", + "Pragma": "no-cache", + "Expires": "0", + } + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"available": available}}, + headers=no_cache_headers + ) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"Failed to check MCP port: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to check MCP port" + ) + + +@router.get("/port/suggest") +async def suggest_mcp_port( + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Suggest an available port for MCP container.""" + try: + get_current_user_info(authorization, http_request) + port = suggest_container_port() + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"port": port}} + ) + except McpPortConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except Exception as e: + logger.error(f"Failed to suggest MCP port: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to suggest MCP port" + ) + + +# --------------------------------------------------------------------------- +# Enable/Disable Endpoints +# --------------------------------------------------------------------------- + +@router.post("/enable") +async def enable_mcp_service( + payload: EnableMcpServiceRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Enable an MCP service by ID.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + await update_mcp_service_enabled( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=payload.mcp_id, + enabled=True, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"} + ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpNameConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpPortConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except MCPConnectionError as e: + logger.error(f"MCP connection failed while enabling service: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="MCP connection failed" + ) + except Exception as e: + logger.error(f"Failed to enable MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP service status" + ) + + +@router.post("/disable") +async def disable_mcp_service( + payload: DisableMcpServiceRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Disable an MCP service by ID.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + await update_mcp_service_enabled( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=payload.mcp_id, + enabled=False, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"} + ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"Failed to disable MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP service status" + ) + + +# --------------------------------------------------------------------------- +# Image Upload Endpoint +# --------------------------------------------------------------------------- + if ENABLE_UPLOAD_IMAGE: @router.post("/upload-image") async def upload_mcp_image( @@ -621,13 +753,10 @@ async def upload_mcp_image( try: user_id, auth_tenant_id, _ = get_current_user_info( authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id - # Read file content content = await file.read() - # Call service layer to handle the business logic result = await upload_and_start_mcp_image( tenant_id=effective_tenant_id, user_id=user_id, diff --git a/backend/apps/skill_app.py b/backend/apps/skill_app.py index c9e35b690..a2a3b38cf 100644 --- a/backend/apps/skill_app.py +++ b/backend/apps/skill_app.py @@ -1,23 +1,28 @@ """Skill management HTTP endpoints.""" -import asyncio +from nexent.core.agents.agent_model import ModelConfig import logging -import os -import threading from typing import Any, Dict, List, Optional from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form, Header from starlette.responses import JSONResponse, StreamingResponse -from pydantic import BaseModel +from http import HTTPStatus +from pydantic import BaseModel, Field +from consts.const import APP_VERSION, STREAMABLE_CONTENT_TYPES from consts.exceptions import SkillException, UnauthorizedError -from services.skill_service import SkillService -from consts.model import SkillInstanceInfoRequest +from services.skill_service import ( + SkillService, + skill_creation_task_manager, + stream_skill_creation, + update_skill_list, + get_official_skills_with_status, +) +from consts.model import SkillInstanceInfoRequest, SkillCreateRequest, SkillCreateInteractiveRequest, SkillUpdateRequest, SkillResponse from utils.auth_utils import get_current_user_id, get_current_user_info -from utils.prompt_template_utils import get_skill_creation_simple_prompt_template -from nexent.core.agents.agent_model import ModelConfig -from agents.skill_creation_agent import create_simple_skill_from_request -from nexent.core.utils.observer import MessageObserver +from services.asset_owner_visibility import can_view_skill + +ASSET_OWNER_SKILL_VIEW_DENIED = {"content": "您无权限查看"} logger = logging.getLogger(__name__) @@ -25,52 +30,27 @@ skill_creator_router = APIRouter(prefix="/skills", tags=["nl2skill"]) -class SkillCreateRequest(BaseModel): - """Request model for creating a skill.""" - name: str - description: str - content: str - tool_ids: Optional[List[int]] = [] # Use tool_id list, link to ag_tool_info_t - tool_names: Optional[List[str]] = [] # Alternative: use tool name list, will be converted to tool_ids - tags: Optional[List[str]] = [] - source: Optional[str] = "custom" # official, custom, partner - params: Optional[Dict[str, Any]] = None # Skill config (JSON object) - - -class SkillUpdateRequest(BaseModel): - """Request model for updating a skill.""" - description: Optional[str] = None - content: Optional[str] = None - tool_ids: Optional[List[int]] = None # Use tool_id list - tool_names: Optional[List[str]] = None # Alternative: use tool name list, will be converted to tool_ids - tags: Optional[List[str]] = None - source: Optional[str] = None - params: Optional[Dict[str, Any]] = None - - -class SkillResponse(BaseModel): - """Response model for skill data.""" - skill_id: int - name: str - description: str - content: str - tool_ids: List[int] - tags: List[str] - source: str - params: Optional[Dict[str, Any]] = None - created_by: Optional[str] = None - create_time: Optional[str] = None - updated_by: Optional[str] = None - update_time: Optional[str] = None +def _asset_owner_skill_view_denied_response(skill: Optional[Dict[str, Any]], tenant_id: str): + """Return a denial JSONResponse when the caller cannot view an ASSET_OWNER-scoped skill.""" + if skill and not can_view_skill(tenant_id, skill.get("tenant_id")): + return JSONResponse(content=ASSET_OWNER_SKILL_VIEW_DENIED) + return None # List routes first (no path parameters) @router.get("") -async def list_skills() -> JSONResponse: - """List all available skills.""" +async def list_skills( + tenant_id: Optional[str] = Query( + None, description="Tenant ID for super admin to query specific tenant's skills"), + authorization: Optional[str] = Header(None) +) -> JSONResponse: + """List all available skills for the current tenant (or a specific tenant for super admin).""" try: - service = SkillService() - skills = service.list_skills() + _, current_tenant_id = get_current_user_id(authorization) + # Super admin can query a specific tenant's skills; otherwise use current user's tenant + effective_tenant_id = tenant_id if tenant_id else current_tenant_id + service = SkillService(tenant_id=effective_tenant_id) + skills = service.list_skills(tenant_id=effective_tenant_id) return JSONResponse(content={"skills": skills}) except SkillException as e: raise HTTPException(status_code=500, detail=str(e)) @@ -79,6 +59,68 @@ async def list_skills() -> JSONResponse: raise HTTPException(status_code=500, detail="Internal server error") +@router.get("/official") +async def list_official_skills( + tenant_id: Optional[str] = Query( + None, description="Tenant ID for super admin to query specific tenant's skills"), + authorization: Optional[str] = Header(None) +) -> JSONResponse: + """List all official skills with installation status for the current tenant (or a specific tenant for super admin). + + Returns skills that have source='official', each with a status field: + - installable: skill exists globally but not yet installed for this tenant + - installed: skill already exists for this tenant + """ + try: + _, current_tenant_id = get_current_user_id(authorization) + effective_tenant_id = tenant_id if tenant_id else current_tenant_id + skills = get_official_skills_with_status(tenant_id=effective_tenant_id) + return JSONResponse(content={"skills": skills}) + except Exception as e: + logger.error(f"Error listing official skills: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +class InstallSkillsRequest(BaseModel): + skill_names: List[str] = Field(..., + description="List of skill names to install") + locale: Optional[str] = Field( + default="en", description="Frontend locale (zh or en)") + + +@router.post("/install") +async def install_skills( + request: InstallSkillsRequest, + tenant_id: Optional[str] = Query( + None, description="Tenant ID for super admin to install skills for a specific tenant"), + authorization: Optional[str] = Header(None) +) -> JSONResponse: + """Install official skills for the current tenant (or a specific tenant for super admin). + + Uses ZIP-based installation for each skill name provided. + Skills that already exist are skipped. + """ + try: + user_id, current_tenant_id = get_current_user_id(authorization) + from services.skill_service import install_skills_from_zip_for_tenant + + effective_tenant_id = tenant_id if tenant_id else current_tenant_id + installed_names = install_skills_from_zip_for_tenant( + skill_names=request.skill_names, + tenant_id=effective_tenant_id, + user_id=user_id, + locale=request.locale + ) + return JSONResponse(content={ + "message": "Skills installed successfully", + "installed": installed_names, + "total": len(installed_names) + }) + except Exception as e: + logger.error(f"Error installing skills: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + # POST routes @router.post("") async def create_skill( @@ -88,12 +130,13 @@ async def create_skill( """Create a new skill (JSON format).""" try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) # Convert tool_names to tool_ids if provided tool_ids = request.tool_ids or [] if request.tool_names: - tool_ids = service.repository.get_tool_ids_by_names(request.tool_names, tenant_id) + raise NotImplementedError( + "Tool names are not supported for skill creation") skill_data = { "name": request.name, @@ -102,9 +145,12 @@ async def create_skill( "tool_ids": tool_ids, "tags": request.tags, "source": request.source, - "params": request.params, + "config_schemas": request.config_schemas, + "config_values": request.config_values, + "files": request.files if request.files else [], } - skill = service.create_skill(skill_data, user_id=user_id) + skill = service.create_skill( + skill_data, tenant_id=tenant_id, user_id=user_id) return JSONResponse(content=skill, status_code=201) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) @@ -121,7 +167,9 @@ async def create_skill( @router.post("/upload") async def create_skill_from_file( file: UploadFile = File(..., description="SKILL.md file or ZIP archive"), - skill_name: Optional[str] = Form(None, description="Optional skill name override"), + skill_name: Optional[str] = Form( + None, description="Optional skill name override"), + source: Optional[str] = Form("自定义", description="Skill source"), authorization: Optional[str] = Header(None) ) -> JSONResponse: """Create a skill from file upload. @@ -132,8 +180,7 @@ async def create_skill_from_file( """ try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() - + service = SkillService(tenant_id=tenant_id) content = await file.read() file_type = "auto" @@ -147,34 +194,54 @@ async def create_skill_from_file( file_content=content, skill_name=skill_name, file_type=file_type, + source=source, user_id=user_id, tenant_id=tenant_id ) return JSONResponse(content=skill, status_code=201) except UnauthorizedError as e: + logger.warning(f"Unauthorized: {e}") raise HTTPException(status_code=401, detail=str(e)) except SkillException as e: error_msg = str(e).lower() + logger.warning(f"SkillException: {e}") if "already exists" in error_msg: raise HTTPException(status_code=409, detail=str(e)) raise HTTPException(status_code=400, detail=str(e)) except Exception as e: - logger.error(f"Error creating skill from file: {e}") + logger.error( + f"Unexpected error: {type(e).__name__}: {e}", exc_info=True) raise HTTPException(status_code=500, detail="Internal server error") # Routes with path parameters @router.get("/{skill_name}/files") -async def get_skill_file_tree(skill_name: str) -> JSONResponse: +async def get_skill_file_tree( + skill_name: str, + authorization: Optional[str] = Header(None) +) -> JSONResponse: """Get file tree structure of a skill.""" try: - service = SkillService() + _, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill(skill_name) + if not skill: + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") + + denied = _asset_owner_skill_view_denied_response(skill, tenant_id) + if denied: + return denied + tree = service.get_skill_file_tree(skill_name) if not tree: - raise HTTPException(status_code=404, detail=f"Skill not found: {skill_name}") + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") return JSONResponse(content=tree) except HTTPException: raise + except UnauthorizedError as e: + raise HTTPException(status_code=401, detail=str(e)) except SkillException as e: raise HTTPException(status_code=500, detail=str(e)) except Exception as e: @@ -185,7 +252,8 @@ async def get_skill_file_tree(skill_name: str) -> JSONResponse: @router.get("/{skill_name}/files/{file_path:path}") async def get_skill_file_content( skill_name: str, - file_path: str + file_path: str, + authorization: Optional[str] = Header(None) ) -> JSONResponse: """Get content of a specific file within a skill. @@ -194,13 +262,26 @@ async def get_skill_file_content( file_path: Relative path to the file within the skill directory """ try: - service = SkillService() + _, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill(skill_name) + if not skill: + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") + + denied = _asset_owner_skill_view_denied_response(skill, tenant_id) + if denied: + return denied + content = service.get_skill_file_content(skill_name, file_path) if content is None: - raise HTTPException(status_code=404, detail=f"File not found: {file_path}") + raise HTTPException( + status_code=404, detail=f"File not found: {file_path}") return JSONResponse(content={"content": content}) except HTTPException: raise + except UnauthorizedError as e: + raise HTTPException(status_code=401, detail=str(e)) except SkillException as e: raise HTTPException(status_code=500, detail=str(e)) except Exception as e: @@ -220,7 +301,7 @@ async def update_skill_from_file( """ try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) content = await file.read() @@ -263,7 +344,7 @@ async def get_skill_instance( try: _, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) instance = service.get_skill_instance( agent_id=agent_id, skill_id=skill_id, @@ -277,13 +358,22 @@ async def get_skill_instance( detail=f"Skill instance not found for agent {agent_id} and skill {skill_id}" ) - # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, params) - skill = service.get_skill_by_id(skill_id) + # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, config_schemas, config_values) + # The instance's per-agent overrides are mapped to config_values for the frontend. + skill = service.get_skill_by_id(skill_id, tenant_id) if skill: instance["skill_name"] = skill.get("name") instance["skill_description"] = skill.get("description", "") instance["skill_content"] = skill.get("content", "") - instance["skill_params"] = skill.get("params") or {} + # Template defaults from YAML-enriched skill + instance["config_schemas"] = skill.get("config_schemas") or [] + instance["config_values"] = skill.get("config_values") or {} + # Per-agent overrides from SkillInstance.config_values override the template defaults + instance_params = instance.get("config_values") or {} + if instance_params: + merged = dict(instance.get("config_values") or {}) + merged.update(instance_params) + instance["config_values"] = merged return JSONResponse(content=instance) except UnauthorizedError as e: @@ -309,10 +399,11 @@ async def update_skill_instance( user_id, tenant_id = get_current_user_id(authorization) # Validate skill exists - service = SkillService() - skill = service.get_skill_by_id(request.skill_id) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill_by_id(request.skill_id, tenant_id) if not skill: - raise HTTPException(status_code=404, detail=f"Skill with ID {request.skill_id} not found") + raise HTTPException( + status_code=404, detail=f"Skill with ID {request.skill_id} not found") # Create or update skill instance instance = service.create_or_update_skill_instance( @@ -322,6 +413,18 @@ async def update_skill_instance( version_no=request.version_no ) + # Enrich with template info so the frontend gets config_schemas and config_values + instance["skill_name"] = skill.get("name") + instance["skill_description"] = skill.get("description", "") + instance["skill_content"] = skill.get("content", "") + instance["config_schemas"] = skill.get("config_schemas") or [] + instance["config_values"] = skill.get("config_values") or {} + instance_params = instance.get("config_values") or {} + if instance_params: + merged = dict(instance.get("config_values") or {}) + merged.update(instance_params) + instance["config_values"] = merged + return JSONResponse(content={"message": "Skill instance updated", "instance": instance}) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) @@ -336,7 +439,8 @@ async def update_skill_instance( @router.get("/instance/list") async def list_skill_instances( - agent_id: int = Query(..., description="Agent ID to query skill instances"), + agent_id: int = Query(..., + description="Agent ID to query skill instances"), version_no: int = Query(0, description="Version number (0 for draft)"), authorization: Optional[str] = Header(None) ) -> JSONResponse: @@ -344,7 +448,7 @@ async def list_skill_instances( try: _, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) instances = service.list_skill_instances( agent_id=agent_id, @@ -352,14 +456,21 @@ async def list_skill_instances( version_no=version_no ) - # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, params) + # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, config_values) + # Also include config_schemas and config_values from the template (via YAML enrichment). + # The instance's per-agent overrides (config_values) are used as-is for the frontend. for instance in instances: - skill = service.get_skill_by_id(instance.get("skill_id")) + skill = service.get_skill_by_id( + instance.get("skill_id"), tenant_id) if skill: instance["skill_name"] = skill.get("name") instance["skill_description"] = skill.get("description", "") instance["skill_content"] = skill.get("content", "") - instance["skill_params"] = skill.get("params") or {} + # Template defaults from YAML-enriched skill + instance["config_schemas"] = skill.get("config_schemas") or [] + # Per-agent config_values from SkillInstance override template defaults + instance["config_values"] = instance.get( + "config_values") or skill.get("config_values") or {} return JSONResponse(content={"instances": instances}) except UnauthorizedError as e: @@ -369,14 +480,32 @@ async def list_skill_instances( raise HTTPException(status_code=500, detail="Internal server error") +@router.get("/scan_skill") +async def scan_and_update_skill(authorization: Optional[str] = Header(None)): + """Scan local skill directories and update skill list in database.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + await update_skill_list(tenant_id=tenant_id, user_id=user_id) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "Successfully update skill", "status": "success"} + ) + except Exception as e: + logger.error(f"Failed to update skill: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Failed to update skill") + + @router.get("/{skill_name}") -async def get_skill(skill_name: str) -> JSONResponse: +async def get_skill(skill_name: str, authorization: Optional[str] = Header(None)) -> JSONResponse: """Get a specific skill by name.""" try: - service = SkillService() - skill = service.get_skill(skill_name) + _, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill(skill_name, tenant_id=tenant_id) if not skill: - raise HTTPException(status_code=404, detail=f"Skill not found: {skill_name}") + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") return JSONResponse(content=skill) except HTTPException: raise @@ -399,32 +528,32 @@ async def update_skill( """ try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) update_data = {} if request.description is not None: update_data["description"] = request.description if request.content is not None: update_data["content"] = request.content - if request.tool_ids is not None: - # Convert tool_names to tool_ids if tool_names provided, else use tool_ids directly - if request.tool_names: - update_data["tool_ids"] = service.repository.get_tool_ids_by_names(request.tool_names, tenant_id) - else: - update_data["tool_ids"] = request.tool_ids - elif request.tool_names is not None: - # Only tool_names provided, convert to tool_ids - update_data["tool_ids"] = service.repository.get_tool_ids_by_names(request.tool_names, tenant_id) if request.tags is not None: update_data["tags"] = request.tags if request.source is not None: update_data["source"] = request.source - if request.params is not None: - update_data["params"] = request.params + if request.config_schemas is not None: + update_data["config_schemas"] = request.config_schemas + if request.config_values is not None: + update_data["config_values"] = request.config_values + if request.files is not None: + update_data["files"] = [f.model_dump() for f in request.files] if not update_data: raise HTTPException(status_code=400, detail="No fields to update") - skill = service.update_skill(skill_name, update_data, user_id=user_id) + skill = service.update_skill( + skill_name, + update_data, + tenant_id=tenant_id, + user_id=user_id, + ) return JSONResponse(content=skill) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) @@ -446,9 +575,9 @@ async def delete_skill( ) -> JSONResponse: """Delete a skill.""" try: - user_id, _ = get_current_user_id(authorization) - service = SkillService() - service.delete_skill(skill_name, user_id=user_id) + user_id, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + service.delete_skill(skill_name, tenant_id=tenant_id, user_id=user_id) return JSONResponse(content={"message": f"Skill {skill_name} deleted successfully"}) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) @@ -459,12 +588,6 @@ async def delete_skill( raise HTTPException(status_code=500, detail="Internal server error") -class SkillCreateSimpleRequest(BaseModel): - """Request model for interactive skill creation.""" - user_request: str - existing_skill: Optional[Dict[str, Any]] = None - - def _build_model_config_from_tenant(tenant_id: str) -> ModelConfig: """Build ModelConfig from tenant's quick-config LLM model.""" from utils.config_utils import tenant_config_manager, get_model_name_from_config @@ -489,117 +612,66 @@ def _build_model_config_from_tenant(tenant_id: str) -> ModelConfig: ) -@skill_creator_router.post("/create-simple") -async def create_simple_skill( - request: SkillCreateSimpleRequest, +@skill_creator_router.post("/create") +async def create_skill( + request: SkillCreateInteractiveRequest, authorization: Optional[str] = Header(None) ): - """Create a simple skill interactively via LLM agent. + """Create a skill interactively via LLM agent. - Loads the skill_creation_simple prompt template, runs an internal agent - with WriteSkillFileTool and ReadSkillMdTool, extracts the block + Loads the skill creation prompt template (simple or complicated based on complexity), + runs an internal agent with WriteSkillFileTool and ReadSkillMdTool, extracts the skill content from the final answer, and streams step progress and token content via SSE. Yields SSE events: - step_count: Current agent step number - skill_content: Token-level content (thinking, code, deep_thinking, tool output) - - final_answer: Complete skill content + - final_answer: Complete skill content with and delimiters - done: Stream completion signal """ - # Message types to stream as skill_content (token-level output) - STREAMABLE_CONTENT_TYPES = frozenset([ - "model_output_thinking", - "model_output_code", - "model_output_deep_thinking", - "tool", - "execution_logs", - ]) - - async def generate(): - import json - try: - _, tenant_id, language = get_current_user_info(authorization) - - template = get_skill_creation_simple_prompt_template( - language, - existing_skill=request.existing_skill - ) + try: + _, tenant_id, user_language = get_current_user_info(authorization) + except Exception as e: + logger.error(f"Unauthorized access attempt: {e}") + raise HTTPException(status_code=401, detail="Unauthorized") + + # Build model config from tenant + model_config = _build_model_config_from_tenant(tenant_id) + + # Get language from request or user preference + lang = request.language or user_language or "zh" + + # Delegate to service layer + task_id, generator = stream_skill_creation( + user_request=request.user_request, + language=lang, + model_config=model_config, + existing_skill=request.existing_skill, + complexity=request.complexity or "simple" + ) + + return StreamingResponse(generator(), media_type="text/event-stream", headers={"X-Task-ID": task_id}) + + +@skill_creator_router.get("/stop/{task_id}") +async def stop_skill_creation( + task_id: str, + authorization: Optional[str] = Header(None) +): + """Stop an active skill creation task. + + Args: + task_id: The task ID returned from the /create endpoint (passed via X-Task-ID header) + """ + try: + _, _ = get_current_user_id(authorization) + except Exception as e: + logger.error(f"Unauthorized access attempt: {e}") + raise HTTPException(status_code=401, detail="Unauthorized") + + success = skill_creation_task_manager.stop_task(task_id) - model_config = _build_model_config_from_tenant(tenant_id) - observer = MessageObserver(lang=language) - stop_event = threading.Event() - - # Get local_skills_dir from SkillManager - skill_service = SkillService() - local_skills_dir = skill_service.skill_manager.local_skills_dir or "" - - # Start skill creation in background thread - def run_task(): - create_simple_skill_from_request( - system_prompt=template.get("system_prompt", ""), - user_prompt=request.user_request, - model_config_list=[model_config], - observer=observer, - stop_event=stop_event, - local_skills_dir=local_skills_dir - ) - - thread = threading.Thread(target=run_task) - thread.start() - - # Poll observer for step_count and token content messages - while thread.is_alive(): - cached = observer.get_cached_message() - for msg in cached: - if isinstance(msg, str): - try: - data = json.loads(msg) - msg_type = data.get("type", "") - content = data.get("content", "") - - # Stream step progress - if msg_type == "step_count": - yield f"data: {json.dumps({'type': 'step_count', 'content': content}, ensure_ascii=False)}\n\n" - # Stream token content (thinking, code, deep_thinking, tool output) - elif msg_type in STREAMABLE_CONTENT_TYPES: - yield f"data: {json.dumps({'type': 'skill_content', 'content': content}, ensure_ascii=False)}\n\n" - # Stream final_answer content separately - elif msg_type == "final_answer": - yield f"data: {json.dumps({'type': 'final_answer', 'content': content}, ensure_ascii=False)}\n\n" - except (json.JSONDecodeError, Exception): - pass - await asyncio.sleep(0.1) - - thread.join() - - # Stream any remaining cached messages after thread completes - remaining = observer.get_cached_message() - for msg in remaining: - if isinstance(msg, str): - try: - data = json.loads(msg) - msg_type = data.get("type", "") - content = data.get("content", "") - - if msg_type == "step_count": - yield f"data: {json.dumps({'type': 'step_count', 'content': content}, ensure_ascii=False)}\n\n" - elif msg_type in STREAMABLE_CONTENT_TYPES: - yield f"data: {json.dumps({'type': 'skill_content', 'content': content}, ensure_ascii=False)}\n\n" - elif msg_type == "final_answer": - yield f"data: {json.dumps({'type': 'final_answer', 'content': content}, ensure_ascii=False)}\n\n" - except (json.JSONDecodeError, Exception): - pass - - # Stream final answer content from observer - final_result = observer.get_final_answer() - if final_result: - yield f"data: {json.dumps({'type': 'final_answer', 'content': final_result}, ensure_ascii=False)}\n\n" - - # Send done signal - yield f"data: {json.dumps({'type': 'done'}, ensure_ascii=False)}\n\n" - - except Exception as e: - logger.error(f"Error in create_simple_skill stream: {e}") - yield f"data: {json.dumps({'type': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n" - - return StreamingResponse(generate(), media_type="text/event-stream") + if success: + return JSONResponse(content={"status": "success", "message": "Skill creation task stopped"}) + else: + return JSONResponse(content={"status": "not_found", "message": "Task not found or already completed"}, status_code=404) diff --git a/backend/apps/tenant_app.py b/backend/apps/tenant_app.py index e0d612902..291cd22fa 100644 --- a/backend/apps/tenant_app.py +++ b/backend/apps/tenant_app.py @@ -49,7 +49,10 @@ async def create_tenant_endpoint( # Create tenant tenant_info = create_tenant( tenant_name=request.tenant_name, - created_by=user_id + created_by=user_id, + skill_ids=request.skill_ids, + skill_names=request.skill_names, + locale=request.locale, ) logger.info(f"Created tenant {tenant_info['tenant_id']} by user {user_id}") diff --git a/backend/apps/tool_config_app.py b/backend/apps/tool_config_app.py index f0b7f9304..bfc8d5ca0 100644 --- a/backend/apps/tool_config_app.py +++ b/backend/apps/tool_config_app.py @@ -160,12 +160,14 @@ async def import_openapi_service_api( server_url: Base URL of the REST API server openapi_json: Complete OpenAPI JSON specification service_description: Optional service description + headers_template: Optional default headers template force_update: If True, replace all existing tools for this service """ service_name = openapi_service_request.get("service_name") server_url = openapi_service_request.get("server_url") openapi_json = openapi_service_request.get("openapi_json") service_description = openapi_service_request.get("service_description") + headers_template = openapi_service_request.get("headers_template") force_update = openapi_service_request.get("force_update", False) if not service_name: @@ -192,6 +194,7 @@ async def import_openapi_service_api( tenant_id=tenant_id, user_id=user_id, service_description=service_description, + headers_template=headers_template, force_update=force_update ) diff --git a/backend/apps/user_management_app.py b/backend/apps/user_management_app.py index d50cdc1f0..e79fde887 100644 --- a/backend/apps/user_management_app.py +++ b/backend/apps/user_management_app.py @@ -8,18 +8,29 @@ from supabase_auth.errors import AuthApiError, AuthWeakPasswordError -from consts.model import UserSignInRequest, UserSignUpRequest -from consts.exceptions import NoInviteCodeException, IncorrectInviteCodeException, UserRegistrationException +from consts.const import ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL +from consts.model import UserSignInRequest, UserSignUpRequest, UpdatePasswordRequest +from consts.exceptions import ( + NoInviteCodeException, + IncorrectInviteCodeException, + UserRegistrationException, + AppException, + UnauthorizedError, + ValidationError, +) +from consts.error_code import ErrorCode +from services.cas_service import build_logout_url, CasAuthenticationError from services.user_management_service import get_authorized_client, validate_token, \ check_auth_service_health, signup_user_with_invitation, signin_user, refresh_user_token, \ - get_session_by_authorization, get_user_info, create_token, list_tokens_by_user, delete_token + get_session_by_authorization, get_user_info, create_token, list_tokens_by_user, delete_token, \ + update_password from services.user_service import delete_user_and_cleanup -from consts.exceptions import UnauthorizedError -from utils.auth_utils import get_current_user_id +from utils.auth_utils import get_current_user_id, extract_session_id_from_authorization load_dotenv() logging.getLogger("httpx").setLevel(logging.WARNING) +logger = logging.getLogger("user_management_app") router = APIRouter(prefix="/user", tags=["user"]) @@ -33,10 +44,12 @@ async def service_health(): content={"message": "Auth service is available"}) except ConnectionError as e: logging.error(f"Auth service health check failed: {str(e)}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="Auth service is unavailable") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="Auth service is unavailable") except Exception as e: logging.error(f"Auth service health check failed: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Auth service is unavailable") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Auth service is unavailable") @router.post("/signup") @@ -49,7 +62,7 @@ async def signup(request: UserSignUpRequest): auto_login=request.auto_login) success_message = "🎉 User account registered successfully! Please start experiencing the AI assistant service." return JSONResponse(status_code=HTTPStatus.OK, - content={"message":success_message, "data":user_data}) + content={"message": success_message, "data": user_data}) except NoInviteCodeException as e: logging.error(f"User registration failed by invite code: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, @@ -58,18 +71,28 @@ async def signup(request: UserSignUpRequest): logging.error(f"User registration failed by invite code: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="INVITE_CODE_INVALID") + except ValidationError as e: + detail = str(e) + if detail == ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL: + logging.warning( + "User registration rejected: asset owner invite requires OAuth") + else: + logging.warning( + f"User registration rejected by validation: {detail}") + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=detail) except UserRegistrationException as e: - logging.error(f"User registration failed by registration service: {str(e)}") + logging.error( + f"User registration failed by registration service: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="REGISTRATION_SERVICE_ERROR") - except AuthApiError as e: - logging.error(f"User registration failed by email already exists: {str(e)}") - raise HTTPException(status_code=HTTPStatus.CONFLICT, - detail="EMAIL_ALREADY_EXISTS") except AuthWeakPasswordError as e: logging.error(f"User registration failed by weak password: {str(e)}") - raise HTTPException(status_code=HTTPStatus.NOT_ACCEPTABLE, + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail="WEAK_PASSWORD") + except AuthApiError as e: + logging.error(f"User registration failed by auth error: {str(e)}") + raise HTTPException(status_code=HTTPStatus.CONFLICT, + detail="EMAIL_ALREADY_EXISTS") except Exception as e: logging.error(f"User registration failed, unknown error: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, @@ -81,13 +104,16 @@ async def signin(request: UserSignInRequest): """User login""" try: signin_content = await signin_user(email=request.email, - password=request.password) + password=request.password) return JSONResponse(status_code=HTTPStatus.OK, content=signin_content) except AuthApiError as e: logging.error(f"User login failed: {str(e)}") raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Email or password error") + except ValidationError as e: + logging.warning(f"User login rejected by feature flag: {str(e)}") + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logging.error(f"User login failed, unknown error: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, @@ -108,7 +134,7 @@ async def user_refresh_token(request: Request): raise ValueError("No refresh token provided") session_info = await refresh_user_token(authorization, refresh_token) return JSONResponse(status_code=HTTPStatus.OK, - content={"message":"Token refresh successful", "data":{"session": session_info}}) + content={"message": "Token refresh successful", "data": {"session": session_info}}) except ValueError as e: logging.error(f"Refresh token failed: {str(e)}") raise HTTPException(status_code=HTTPStatus.UNPROCESSABLE_ENTITY, @@ -125,7 +151,18 @@ async def logout(request: Request): authorization = request.headers.get("Authorization") try: # Make logout idempotent: if no token or token expired, still return success + session_id = None + cas_logout_url = "" if authorization: + session_id = extract_session_id_from_authorization(authorization) + if session_id: + from database.cas_session_db import revoke_cas_session_by_session_id + + revoke_cas_session_by_session_id(session_id, actor="user") + try: + cas_logout_url = build_logout_url() + except CasAuthenticationError as cas_err: + logging.warning(f"CAS logout URL is unavailable: {str(cas_err)}") client = get_authorized_client(authorization) try: client.auth.sign_out() @@ -134,7 +171,12 @@ async def logout(request: Request): logging.warning( f"Sign out encountered an error but will be ignored: {str(signout_err)}") return JSONResponse(status_code=HTTPStatus.OK, - content={"message":"Logout successful"}) + content={ + "message": "Logout successful", + "data": { + "cas_logout_url": cas_logout_url + } + }) except Exception as e: logging.error(f"User logout failed: {str(e)}") @@ -154,8 +196,8 @@ async def get_session(request: Request): try: data = await get_session_by_authorization(authorization) return JSONResponse(status_code=HTTPStatus.OK, - content={"message": "Session is valid", - "data": data}) + content={"message": "Session is valid", + "data": data}) except UnauthorizedError as e: logging.error(f"Get user session unauthorized: {str(e)}") raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, @@ -189,6 +231,10 @@ async def get_user_information(request: Request): if not user_info: raise UnauthorizedError("User information not found") + user_info["user"]["auth_provider"] = ( + "cas" if extract_session_id_from_authorization(authorization) else "local" + ) + return JSONResponse(status_code=HTTPStatus.OK, content={"message": "Success", "data": user_info}) @@ -276,6 +322,7 @@ async def revoke_user_account(request: Request): raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="User revoke failed") + @router.post("/tokens") async def create_token_endpoint( authorization: Optional[str] = Header(None) @@ -379,3 +426,49 @@ async def delete_token_endpoint( logging.error(f"Failed to delete token: {str(e)}", exc_info=e) raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Internal Server Error") + + +@router.put("/password") +async def update_password_endpoint( + request: UpdatePasswordRequest, + authorization: Optional[str] = Header(None) +): + """Update current user's password. + + This endpoint requires the user to provide their current password for verification + before setting a new password. + """ + try: + if not authorization: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, + detail="Unauthorized: No authorization token provided") + + user_id, _ = get_current_user_id(authorization) + if not user_id: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, + detail="Unauthorized: missing user_id in JWT token") + + await update_password( + user_id=str(user_id), + old_password=request.old_password, + new_password=request.new_password + ) + + logger.info(f"Password updated successfully for user {user_id}") + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "Password updated successfully"} + ) + + except UnauthorizedError as e: + logger.warning(f"Password update unauthorized for user: {str(e)}") + raise AppException(ErrorCode.PROFILE_INVALID_CREDENTIALS, str(e)) + except AppException as e: + logger.warning( + f"Password update business error: {e.error_code} - {str(e)}") + raise e # Let app_exception_handler format the response + except Exception as e: + logging.error(f"Failed to update password: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Internal Server Error") diff --git a/backend/apps/vectordatabase_app.py b/backend/apps/vectordatabase_app.py index 872b5387b..505c39559 100644 --- a/backend/apps/vectordatabase_app.py +++ b/backend/apps/vectordatabase_app.py @@ -1,29 +1,51 @@ import logging import json from http import HTTPStatus -from typing import Any, Dict, List, Optional +from typing import Annotated, Any, Dict, List, Optional from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query from fastapi.responses import JSONResponse import re +from consts.const import ASSET_OWNER_TENANT_ID, PERMISSION_READ from consts.model import ChunkCreateRequest, ChunkUpdateRequest, HybridSearchRequest, IndexingResponse +from consts.scheduler import VALID_SUMMARY_FREQUENCIES, SUMMARY_FREQUENCY_OPTIONS_FOR_API from nexent.vector_database.base import VectorDatabaseCore from services.vectordatabase_service import ( ElasticSearchService, - get_embedding_model, + get_embedding_model_by_id, get_vector_db_core, check_knowledge_base_exist_impl, + KnowledgeBaseNeedsModelConfigError, ) +from services.file_management_service import check_file_access from services.redis_service import get_redis_service from utils.auth_utils import get_current_user_id from utils.file_management_utils import get_all_files_status from database.knowledge_db import get_index_name_by_knowledge_name, get_knowledge_record +from database.model_management_db import get_model_by_model_id router = APIRouter(prefix="/indices") service = ElasticSearchService() logger = logging.getLogger("vectordatabase_app") +INTERNAL_INDEX_NAME_DESC = "Internal index_name from knowledge_record_t" + + +@router.get("/summary_frequency_options") +async def get_summary_frequency_options(): + """ + Get valid summary frequency options for frontend. + Frontend should call this API to get the list of valid frequencies. + """ + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "options": SUMMARY_FREQUENCY_OPTIONS_FOR_API, + "valid_values": VALID_SUMMARY_FREQUENCIES, + } + ) + @router.post("/check_exist") async def check_knowledge_base_exist( @@ -54,7 +76,7 @@ def create_new_index( embedding_dim: Optional[int] = Query( None, description="Dimension of the embedding vectors"), request: Dict[str, Any] = Body( - None, description="Request body with optional fields (ingroup_permission, group_ids, embedding_model_name)"), + None, description="Request body with optional fields (ingroup_permission, group_ids, embedding_model_name, preserve_source_file)"), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), authorization: Optional[str] = Header(None) ): @@ -65,11 +87,15 @@ def create_new_index( # Extract optional fields from request body ingroup_permission = None group_ids = None - embedding_model_name = None + embedding_model_name: Optional[str] = None + is_multimodal: Optional[bool] = None + preserve_source_file: Optional[bool] = None if request: ingroup_permission = request.get("ingroup_permission") group_ids = request.get("group_ids") - embedding_model_name = request.get("embedding_model_name") + embedding_model_name = request.get("embeddingModel") + is_multimodal = request.get("is_multimodal") + preserve_source_file = request.get("preserve_source_file") # Treat path parameter as user-facing knowledge base name for new creations return ElasticSearchService.create_knowledge_base( @@ -81,6 +107,8 @@ def create_new_index( ingroup_permission=ingroup_permission, group_ids=group_ids, embedding_model_name=embedding_model_name, + is_multimodal=is_multimodal, + preserve_source_file=preserve_source_file, ) except Exception as e: raise HTTPException( @@ -160,6 +188,222 @@ async def update_index( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating index: {str(exc)}") +@router.patch("/{index_name}/summary_frequency") +async def update_summary_frequency_endpoint( + index_name: Annotated[str, Path(..., description="Name of the index to update")], + request: Annotated[Dict[str, Any], Body(..., description="Update payload with summary_frequency")], + authorization: Annotated[Optional[str], Header()] = None, +): + """Update the auto-summary frequency for a knowledge base.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + summary_frequency = request.get("summary_frequency") + + valid_frequencies = VALID_SUMMARY_FREQUENCIES + if summary_frequency not in valid_frequencies: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=f"Invalid summary_frequency. Must be one of: {valid_frequencies}" + ) + + from database.knowledge_db import update_summary_frequency + success = update_summary_frequency( + index_name=index_name, + summary_frequency=summary_frequency, + _tenant_id=tenant_id, + user_id=user_id + ) + + if success: + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "Summary frequency updated successfully", "status": "success"} + ) + else: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"Knowledge base '{index_name}' not found" + ) + except HTTPException: + raise + except Exception as exc: + logger.exception("Error updating summary frequency") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating summary frequency: {str(exc)}" + ) + + +@router.get("/{index_name}/embedding-model-status") +def get_embedding_model_status( + index_name: str = Path(..., description="Name of the index to check"), + authorization: Optional[str] = Header(None) +): + """ + Check the embedding model status of a knowledge base. + Returns information about whether a model is configured and if an update is needed. + + This endpoint is used by the frontend to determine whether to show + a dialog prompting the user to select an embedding model for knowledge bases + that were created before the model ID feature was added. + + Note: The path parameter is the internal index_name. + """ + try: + _, tenant_id = get_current_user_id(authorization) + + # Get the knowledge base record by index_name + knowledge_record = get_knowledge_record({ + "index_name": index_name, + "tenant_id": tenant_id, + "include_asset_owner_assets": True, + }) + + if not knowledge_record: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"Knowledge base '{index_name}' not found" + ) + + # Check if model_id exists + model_id = knowledge_record.get("embedding_model_id") + embedding_model_name = knowledge_record.get("embedding_model_name") + + # Get model info if model_id exists + model_info = None + if model_id: + model = get_model_by_model_id(model_id, tenant_id) + if model: + model_info = { + "model_id": model.get("model_id"), + "model_name": model.get("model_name"), + "display_name": model.get("display_name"), + "model_type": model.get("model_type"), + } + + # Determine status + if model_id and model_info: + status = "configured" + message = f"Embedding model '{model_info.get('display_name', model_info.get('model_name'))}' is configured" + needs_config = False + elif embedding_model_name: + # Has model name but no model_id (legacy data) + status = "legacy" + message = "This knowledge base was created with an older version. Please select an embedding model to ensure proper functionality." + needs_config = True + else: + # No model configured at all + status = "missing" + message = "No embedding model configured. Please select an embedding model." + needs_config = True + + # Get actual internal index_name from the database record + actual_index_name = knowledge_record.get("index_name") + + return { + "status": status, + "needs_config": needs_config, + "index_name": actual_index_name, + "knowledge_name": knowledge_record.get("knowledge_name"), + "model_id": model_id, + "embedding_model_name": embedding_model_name, + "model_info": model_info, + "message": message, + } + + except HTTPException: + raise + except Exception as e: + logger.error( + f"Error getting embedding model status for '{index_name}': {e}", exc_info=True) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=f"Error checking embedding model status: {str(e)}" + ) + + +@router.put("/{index_name}/embedding-model") +def update_embedding_model( + index_name: str = Path( + ..., description="Internal index name of the knowledge base to update"), + request: Dict[str, Any] = Body(..., + description="Update payload with model_id"), + authorization: Optional[str] = Header(None) +): + """ + Update the embedding model for a knowledge base. + This is used when a user selects an embedding model from the dialog + for knowledge bases that don't have a model configured. + """ + try: + user_id, tenant_id = get_current_user_id(authorization) + + model_id = request.get("model_id") + if not model_id: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="model_id is required" + ) + + result = ElasticSearchService.update_embedding_model( + index_name=index_name, + model_id=model_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content=result + ) + + except ValueError as exc: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=str(exc) + ) + except HTTPException: + raise + except Exception as exc: + logger.error( + f"Error updating embedding model for '{index_name}': {exc}", exc_info=True) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=f"Error updating embedding model: {str(exc)}" + ) + + +def _apply_read_only_to_asset_indices_info(asset_result: Dict[str, Any]) -> Dict[str, Any]: + """Force READ_ONLY permission on asset-owner indices_info before merge.""" + indices_info = asset_result.get("indices_info") + if not indices_info: + return asset_result + normalized = dict(asset_result) + normalized["indices_info"] = [ + {**info, "permission": PERMISSION_READ} for info in indices_info + ] + return normalized + + +def _merge_list_indices_results( + primary: Dict[str, Any], + asset_owner: Dict[str, Any], +) -> Dict[str, Any]: + """Merge tenant and ASSET_OWNER list_indices responses (concat, no dedup).""" + merged_indices = primary.get("indices", []) + \ + asset_owner.get("indices", []) + merged: Dict[str, Any] = { + "indices": merged_indices, + "count": len(merged_indices), + } + if "indices_info" in primary or "indices_info" in asset_owner: + merged["indices_info"] = ( + primary.get("indices_info", []) + + asset_owner.get("indices_info", []) + ) + return merged + + @router.get("") def get_list_indices( pattern: str = Query("*", description="Pattern to match index names"), @@ -173,9 +417,21 @@ def get_list_indices( """List all user indices with optional stats""" try: user_id, auth_tenant_id = get_current_user_id(authorization) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - return ElasticSearchService.list_indices(pattern, include_stats, effective_tenant_id, user_id, vdb_core) + if tenant_id is None: + result = ElasticSearchService.list_indices( + pattern, include_stats, auth_tenant_id, user_id, vdb_core + ) + if auth_tenant_id != ASSET_OWNER_TENANT_ID: + asset_result = ElasticSearchService.list_indices( + pattern, include_stats, ASSET_OWNER_TENANT_ID, user_id, vdb_core + ) + asset_result = _apply_read_only_to_asset_indices_info( + asset_result) + return _merge_list_indices_results(result, asset_result) + return result + return ElasticSearchService.list_indices( + pattern, include_stats, tenant_id, user_id, vdb_core + ) except Exception as e: raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error get index: {str(e)}") @@ -191,6 +447,8 @@ def create_index_documents( authorization: Optional[str] = Header(None), task_id: Optional[str] = Header( None, alias="X-Task-Id", description="Task ID for progress tracking"), + large_mode: bool = Query( + False, description="Force large-batch path when current request chunk count is below threshold"), ): """ Index documents with embeddings, creating the index if it doesn't exist. @@ -198,22 +456,26 @@ def create_index_documents( """ try: user_id, tenant_id = get_current_user_id(authorization) - + # Get the knowledge base record to retrieve the saved embedding model knowledge_record = get_knowledge_record({'index_name': index_name}) - saved_embedding_model_name = None + saved_embedding_model_id = None if knowledge_record: - saved_embedding_model_name = knowledge_record.get('embedding_model_name') - - # Use the saved model from knowledge base, fallback to tenant default if not set - embedding_model = get_embedding_model(tenant_id, saved_embedding_model_name) - + saved_embedding_model_id = knowledge_record.get( + 'embedding_model_id') + + # Use the saved model from knowledge base by model_id + embedding_model, _ = get_embedding_model_by_id( + tenant_id, saved_embedding_model_id) if saved_embedding_model_id else (None, None) + return ElasticSearchService.index_documents( embedding_model=embedding_model, index_name=index_name, data=data, vdb_core=vdb_core, task_id=task_id, + large_mode=large_mode, + model_id=saved_embedding_model_id, ) except Exception as e: error_msg = str(e) @@ -246,54 +508,70 @@ async def get_index_files( @router.delete("/{index_name}/documents") -def delete_documents( +async def delete_documents( index_name: str = Path(..., description="Name of the index"), path_or_url: str = Query(..., description="Path or URL of documents to delete"), + scope: str = Query( + "full", + description=( + "source_only: delete MinIO source only, keep ES chunks/vectors; " + "full: delete ES documents, MinIO source, and Redis task records" + ), + ), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core) ): - """Delete documents by path or URL and clean up related Redis records""" + """Delete a document by scope: source file only or full removal from the index.""" try: - # First delete the documents using existing service - result = ElasticSearchService.delete_documents( - index_name, path_or_url, vdb_core) - - # Then clean up Redis records related to this specific document - try: - redis_service = get_redis_service() - redis_cleanup_result = redis_service.delete_document_records( - index_name, path_or_url) - - # Add Redis cleanup info to the result - result["redis_cleanup"] = redis_cleanup_result - - # Update the message to include Redis cleanup info - original_message = result.get( - "message", "Documents deleted successfully") - result["message"] = ( - f"{original_message}. " - f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records " - f"({redis_cleanup_result['celery_tasks_deleted']} tasks, " - f"{redis_cleanup_result['cache_keys_deleted']} cache keys)." - ) - - if redis_cleanup_result.get("errors"): - result["redis_warnings"] = redis_cleanup_result["errors"] + result = await ElasticSearchService.delete_document_by_scope( + index_name, path_or_url, scope, vdb_core + ) - except Exception as redis_error: - logger.warning( - f"Redis cleanup failed for document {path_or_url} in index {index_name}: {str(redis_error)}") - result["redis_cleanup_error"] = str(redis_error) - original_message = result.get( - "message", "Documents deleted successfully") - result[ - "message"] = f"{original_message}, but Redis cleanup encountered an error: {str(redis_error)}" + if scope == "full": + try: + redis_service = get_redis_service() + redis_cleanup_result = redis_service.delete_document_records( + index_name, path_or_url + ) + result["redis_cleanup"] = redis_cleanup_result + original_message = result.get( + "message", "Documents deleted successfully" + ) + result["message"] = ( + f"{original_message}. " + f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records " + f"({redis_cleanup_result['celery_tasks_deleted']} tasks, " + f"{redis_cleanup_result['cache_keys_deleted']} cache keys)." + ) + if redis_cleanup_result.get("errors"): + result["redis_warnings"] = redis_cleanup_result["errors"] + except Exception as redis_error: + logger.warning( + "Redis cleanup failed for document %s in index %s: %s", + path_or_url, + index_name, + redis_error, + ) + result["redis_cleanup_error"] = str(redis_error) + original_message = result.get( + "message", "Documents deleted successfully" + ) + result["message"] = ( + f"{original_message}, but Redis cleanup encountered an error: " + f"{str(redis_error)}" + ) return result + except ValueError as exc: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, detail=str(exc) + ) except Exception as e: raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error delete indexing documents: {e}") + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=f"Error delete indexing documents: {e}", + ) @router.get("/{index_name}/documents/{path_or_url:path}/error-info") @@ -364,13 +642,14 @@ def health_check(vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)): # Try to list indices as a health check return ElasticSearchService.health_check(vdb_core) except Exception as e: - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}") @router.post("/{index_name}/chunks") def get_index_chunks( index_name: str = Path(..., - description="Name of the index (or knowledge_name) to get chunks from"), + description=INTERNAL_INDEX_NAME_DESC), page: int = Query( None, description="Page number (1-based) for pagination"), page_size: int = Query( @@ -382,12 +661,18 @@ def get_index_chunks( ): """Get chunks from the specified index, with optional pagination support""" try: - _, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) + user_id, tenant_id = get_current_user_id(authorization) + + if path_or_url is not None and not check_file_access( + path_or_url, user_id, tenant_id + ): + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="You don't have permission to access this file", + ) result = ElasticSearchService.get_index_chunks( - index_name=actual_index_name, + index_name=index_name, page=page, page_size=page_size, path_or_url=path_or_url, @@ -401,8 +686,6 @@ def get_index_chunks( ) except Exception as e: error_msg = str(e) - logger.error( - f"Error getting chunks for index '{index_name}': {error_msg}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error getting chunks: {error_msg}") @@ -410,7 +693,7 @@ def get_index_chunks( @router.post("/{index_name}/chunk") def create_chunk( index_name: str = Path(..., - description="Name of the index (or knowledge_name)"), + description=INTERNAL_INDEX_NAME_DESC), payload: ChunkCreateRequest = Body(..., description="Chunk data"), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), authorization: Optional[str] = Header(None), @@ -418,10 +701,8 @@ def create_chunk( """Create a manual chunk.""" try: user_id, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) result = ElasticSearchService.create_chunk( - index_name=actual_index_name, + index_name=index_name, chunk_request=payload, vdb_core=vdb_core, user_id=user_id, @@ -445,7 +726,7 @@ def create_chunk( @router.put("/{index_name}/chunk/{chunk_id}") def update_chunk( index_name: str = Path(..., - description="Name of the index (or knowledge_name)"), + description=INTERNAL_INDEX_NAME_DESC), chunk_id: str = Path(..., description="Chunk identifier"), payload: ChunkUpdateRequest = Body(..., description="Chunk update payload"), @@ -455,14 +736,13 @@ def update_chunk( """Update an existing chunk.""" try: user_id, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) result = ElasticSearchService.update_chunk( - index_name=actual_index_name, + index_name=index_name, chunk_id=chunk_id, chunk_request=payload, vdb_core=vdb_core, user_id=user_id, + tenant_id=tenant_id, ) return JSONResponse(status_code=HTTPStatus.OK, content=result) except ValueError as e: @@ -486,18 +766,16 @@ def update_chunk( @router.delete("/{index_name}/chunk/{chunk_id}") def delete_chunk( index_name: str = Path(..., - description="Name of the index (or knowledge_name)"), + description=INTERNAL_INDEX_NAME_DESC), chunk_id: str = Path(..., description="Chunk identifier"), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), authorization: Optional[str] = Header(None), ): """Delete a chunk.""" try: - _, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) + get_current_user_id(authorization) result = ElasticSearchService.delete_chunk( - index_name=actual_index_name, + index_name=index_name, chunk_id=chunk_id, vdb_core=vdb_core, ) @@ -529,8 +807,17 @@ async def hybrid_search( """Run a hybrid (accurate + semantic) search across indices.""" try: _, tenant_id = get_current_user_id(authorization) + resolved_index_names: List[str] = [] + for requested_name in payload.index_names: + try: + resolved_name = get_index_name_by_knowledge_name( + requested_name, tenant_id + ) + except Exception: + resolved_name = requested_name + resolved_index_names.append(resolved_name) result = ElasticSearchService.search_hybrid( - index_names=payload.index_names, + index_names=resolved_index_names, query=payload.query, tenant_id=tenant_id, top_k=payload.top_k, @@ -538,9 +825,20 @@ async def hybrid_search( vdb_core=vdb_core, ) return JSONResponse(status_code=HTTPStatus.OK, content=result) + except KnowledgeBaseNeedsModelConfigError as exc: + # Return a specific error that frontend can detect to show the config dialog + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail={ + "error_type": "KNOWLEDGE_BASE_NEEDS_MODEL_CONFIG", + "index_name": exc.index_name, + "message": exc.message, + "suggestion": "Please select an embedding model for this knowledge base before searching." + } + ) except ValueError as exc: - raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, - detail=str(exc)) + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) except Exception as exc: logger.error(f"Hybrid search failed: {exc}", exc_info=True) raise HTTPException( diff --git a/backend/apps/voice_app.py b/backend/apps/voice_app.py index 8f517cd07..cc1b37e87 100644 --- a/backend/apps/voice_app.py +++ b/backend/apps/voice_app.py @@ -2,14 +2,14 @@ import logging from http import HTTPStatus -from fastapi import APIRouter, WebSocket, HTTPException, Body, Query +from fastapi import APIRouter, WebSocket, HTTPException from fastapi.responses import JSONResponse from consts.exceptions import ( VoiceServiceException, STTConnectionException, TTSConnectionException, - VoiceConfigException + VoiceConfigException, ) from consts.model import VoiceConnectivityRequest, VoiceConnectivityResponse from services.voice_service import get_voice_service @@ -26,10 +26,29 @@ async def stt_websocket(websocket: WebSocket): logger.info("STT WebSocket connection attempt...") await websocket.accept() logger.info("STT WebSocket connection accepted") - + + # Receive config from client + client_config = {} + try: + msg = await websocket.receive() + if msg["type"] == "websocket.receive": + import json + client_config = json.loads(msg["text"]) + logger.info(f"Received client config: {client_config}") + elif msg["type"] == "bytes": + try: + import json + client_config = json.loads(msg["bytes"].decode('utf-8')) + logger.info(f"Received client config from bytes: {client_config}") + except Exception as e: + logger.warning(f"Failed to parse bytes as JSON: {e}") + except Exception as e: + logger.error(f"Error receiving config: {e}") + client_config = {} + try: voice_service = get_voice_service() - await voice_service.start_stt_streaming_session(websocket) + await voice_service.start_stt_streaming_session(websocket, stt_config=client_config) except STTConnectionException as e: logger.error(f"STT WebSocket error: {str(e)}") await websocket.send_json({"error": str(e)}) @@ -48,18 +67,60 @@ async def tts_websocket(websocket: WebSocket): logger.info("TTS WebSocket connection accepted") try: - # Receive text from client (single request) - data = await websocket.receive_json() - text = data.get("text") + # Receive config and text from client + msg = await websocket.receive() + client_config = {} + text = None + + if msg["type"] == "websocket.receive": + if "text" in msg: + import json + client_config = json.loads(msg["text"]) + text = client_config.get("text") + elif "bytes" in msg: + try: + import json + client_config = json.loads(msg["bytes"].decode('utf-8')) + text = client_config.get("text") + except Exception as e: + logger.warning(f"Failed to parse bytes as JSON: {e}") if not text: if websocket.client_state.name == "CONNECTED": await websocket.send_json({"error": "No text provided"}) return + # Extract config from client + tenant_id = client_config.get("tenant_id") + model_factory = client_config.get("model_factory") + model_name = client_config.get("model_name") + api_key = client_config.get("api_key") + model_appid = client_config.get("model_appid") + access_token = client_config.get("access_token") + base_url = client_config.get("base_url") + + logger.info(f"TTS request - model_name: {model_name}, model_factory: {model_factory}, " + f"has_api_key: {bool(api_key)}") + + # Build tts_config dict for voice service + tts_config = { + "model_factory": model_factory, + "api_key": api_key, + "model_appid": model_appid, + "access_token": access_token, + "base_url": base_url, + "model_name": model_name, + } + # Stream TTS audio to WebSocket voice_service = get_voice_service() - await voice_service.stream_tts_to_websocket(websocket, text) + await voice_service.stream_tts_to_websocket( + websocket, + text, + tenant_id=tenant_id, + model_name=model_name, + tts_config=tts_config + ) except TTSConnectionException as e: logger.error(f"TTS WebSocket error: {str(e)}") @@ -78,17 +139,17 @@ async def tts_websocket(websocket: WebSocket): async def check_voice_connectivity(request: VoiceConnectivityRequest): """ Check voice service connectivity - + Args: request: VoiceConnectivityRequest containing model_type - + Returns: VoiceConnectivityResponse with connectivity status """ try: voice_service = get_voice_service() connected = await voice_service.check_voice_connectivity(request.model_type) - + return JSONResponse( status_code=HTTPStatus.OK, content=VoiceConnectivityResponse( diff --git a/backend/assets/test_voice.pcm b/backend/assets/test_voice.pcm new file mode 100644 index 000000000..0a78f9a15 Binary files /dev/null and b/backend/assets/test_voice.pcm differ diff --git a/backend/consts/agent_unavailable_reasons.py b/backend/consts/agent_unavailable_reasons.py new file mode 100644 index 000000000..4e710ee7d --- /dev/null +++ b/backend/consts/agent_unavailable_reasons.py @@ -0,0 +1,43 @@ +""" +Agent Unavailable Reason Constants + +Centralized definition of all possible reasons why an agent may be unavailable. +These values are returned to the frontend via the 'unavailable_reasons' field. +""" + + +class AgentUnavailableReason: + """Reason codes for agent unavailability.""" + + # Identity conflicts + DUPLICATE_NAME = "duplicate_name" + DUPLICATE_DISPLAY_NAME = "duplicate_display_name" + + # Model issues + MODEL_NOT_CONFIGURED = "model_not_configured" + MODEL_UNAVAILABLE = "model_unavailable" + + # Tool issues + TOOL_UNAVAILABLE = "tool_unavailable" + ALL_TOOLS_DISABLED = "all_tools_disabled" + + # Agent issues + AGENT_NOT_FOUND = "agent_not_found" + + @classmethod + def all_reasons(cls) -> list[str]: + """Return all defined unavailable reason codes.""" + return [ + cls.DUPLICATE_NAME, + cls.DUPLICATE_DISPLAY_NAME, + cls.MODEL_NOT_CONFIGURED, + cls.MODEL_UNAVAILABLE, + cls.TOOL_UNAVAILABLE, + cls.ALL_TOOLS_DISABLED, + cls.AGENT_NOT_FOUND, + ] + + @classmethod + def is_valid_reason(cls, reason: str) -> bool: + """Check if a reason string is a valid reason code.""" + return reason in cls.all_reasons() diff --git a/backend/consts/const.py b/backend/consts/const.py index bccb91ccd..574d550c0 100644 --- a/backend/consts/const.py +++ b/backend/consts/const.py @@ -7,9 +7,12 @@ load_dotenv(override=True) # TODO: Analyze every variable if this is used -# Test voice file path +# Test voice file path (WAV format for volcengine STT) TEST_VOICE_PATH = os.path.join(os.path.dirname( os.path.dirname(__file__)), 'assets', 'test.wav') +# Test PCM file path (raw PCM format for Ali STT) +TEST_PCM_PATH = os.path.join(os.path.dirname( + os.path.dirname(__file__)), 'assets', 'test_voice.pcm') # Vector database providers @@ -28,6 +31,10 @@ class VectorDatabaseType(str, Enum): # Data Processing Service Configuration DATA_PROCESS_SERVICE = os.getenv("DATA_PROCESS_SERVICE") CLIP_MODEL_PATH = os.getenv("CLIP_MODEL_PATH") +TABLE_TRANSFORMER_MODEL_PATH = os.getenv("TABLE_TRANSFORMER_MODEL_PATH") +UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH = os.getenv( + "UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH" +) # Upload Configuration @@ -36,9 +43,16 @@ class VectorDatabaseType(str, Enum): UPLOAD_FOLDER = os.getenv('UPLOAD_FOLDER', 'uploads') ROOT_DIR = os.getenv("ROOT_DIR") +PER_WAVE_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_PER_WAVE_S", "30")) +MAX_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_MAX_S", "1800")) + + # Container-internal skills storage path CONTAINER_SKILLS_PATH = os.getenv("SKILLS_PATH") +# Container-internal official skills ZIP directory +OFFICIAL_SKILLS_ZIP_PATH = "/mnt/nexent/official-skills-zip" + # Preview Configuration FILE_PREVIEW_SIZE_LIMIT = 100 * 1024 * 1024 # 100MB @@ -66,7 +80,39 @@ class VectorDatabaseType(str, Enum): SERVICE_ROLE_KEY = os.getenv('SERVICE_ROLE_KEY', SUPABASE_KEY) # JWT secret for verifying Supabase-signed access tokens. # GoTrue uses GOTRUE_JWT_SECRET (= JWT_SECRET in docker setup) to sign tokens. -SUPABASE_JWT_SECRET = os.getenv('SUPABASE_JWT_SECRET') or os.getenv('JWT_SECRET', '') +SUPABASE_JWT_SECRET = os.getenv( + 'SUPABASE_JWT_SECRET') or os.getenv('JWT_SECRET', '') + + +# OAuth Configuration +OAUTH_CALLBACK_BASE_URL = os.getenv("OAUTH_CALLBACK_BASE_URL", "") +OAUTH_SSL_VERIFY = os.getenv("OAUTH_SSL_VERIFY", "true").lower() == "true" +OAUTH_CA_BUNDLE = os.getenv("OAUTH_CA_BUNDLE", "") + + +# CAS SSO Configuration +CAS_ENABLED = os.getenv("CAS_ENABLED", "false").lower() in ("true", "1", "yes", "on") +CAS_SERVER_URL = os.getenv("CAS_SERVER_URL", "").rstrip("/") +CAS_VALIDATE_PATH = os.getenv("CAS_VALIDATE_PATH", "/p3/serviceValidate") +CAS_CALLBACK_BASE_URL = os.getenv("CAS_CALLBACK_BASE_URL", OAUTH_CALLBACK_BASE_URL).rstrip("/") +# CAS login mode: +# - disabled: disable CAS login entry and automatic CAS redirects. +# - button: show CAS as an optional login entry. +# - force: automatically redirect unauthenticated users to CAS login. +CAS_LOGIN_MODE = os.getenv("CAS_LOGIN_MODE", "disabled").lower() +CAS_USER_ATTRIBUTE = os.getenv("CAS_USER_ATTRIBUTE", "") +CAS_EMAIL_ATTRIBUTE = os.getenv("CAS_EMAIL_ATTRIBUTE", "email") +CAS_ROLE_ATTRIBUTE = os.getenv("CAS_ROLE_ATTRIBUTE", "role") +CAS_TENANT_ATTRIBUTE = os.getenv("CAS_TENANT_ATTRIBUTE", "tenant_id") +CAS_ROLE_MAP_JSON = os.getenv("CAS_ROLE_MAP_JSON", "") +CAS_SESSION_MAX_AGE_SECONDS = int(os.getenv("CAS_SESSION_MAX_AGE_SECONDS", "3600") or 3600) +LOCAL_SESSION_MAX_AGE_SECONDS = int(os.getenv("LOCAL_SESSION_MAX_AGE_SECONDS", "3600") or 3600) +CAS_RENEW_BEFORE_SECONDS = int(os.getenv("CAS_RENEW_BEFORE_SECONDS", "300") or 300) +CAS_RENEW_TIMEOUT_SECONDS = int(os.getenv("CAS_RENEW_TIMEOUT_SECONDS", "10") or 10) +CAS_SYNTHETIC_EMAIL_DOMAIN = os.getenv("CAS_SYNTHETIC_EMAIL_DOMAIN", "cas.local") +CAS_LOGOUT_URL = os.getenv("CAS_LOGOUT_URL", "") +CAS_SSL_VERIFY = os.getenv("CAS_SSL_VERIFY", "true").lower() == "true" +CAS_CA_BUNDLE = os.getenv("CAS_CA_BUNDLE", "") # ===== To be migrated to frontend configuration ===== @@ -91,15 +137,37 @@ class VectorDatabaseType(str, Enum): DEFAULT_USER_ID = "user_id" DEFAULT_TENANT_ID = "tenant_id" +# Invitation code type for asset administrator registration +ASSET_OWNER_INVITE_CODE_TYPE = "ASSET_OWNER_INVITE" + +# User role identifier for asset administrators +ASSET_OWNER_ROLE = "ASSET_OWNER" + +# Tenant ID for asset administrators (virtual tenant, not a real tenant) +ASSET_OWNER_TENANT_ID = "asset_owner_tenant_id" + +# MinIO prefix for ASSET_OWNER-scoped attachment uploads (attachments/asset_owner/{user_id}/...) +ASSET_OWNER_ATTACHMENTS_PREFIX = "attachments/asset_owner" + +# When false, block ASSET_OWNER invites, registrations, and sign-in. +ENABLE_ASSET_OWNER_ROLE = os.getenv( + "ENABLE_ASSET_OWNER_ROLE", "false").lower() == "true" + +# HTTP detail key: asset owner must register via OAuth, not email/password signup. +ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL = "ASSET_OWNER_USE_OAUTH" + # Roles that can edit all resources within a tenant (permission = EDIT). # Keep this centralized to avoid drifting role logic across modules. -CAN_EDIT_ALL_USER_ROLES = {"SU", "ADMIN", "SPEED"} +CAN_EDIT_ALL_USER_ROLES = {"SU", "ADMIN", "SPEED", "ASSET_OWNER"} # Permission constants used by list endpoints (e.g., /agent/list, /mcp/list). PERMISSION_READ = "READ_ONLY" PERMISSION_EDIT = "EDIT" PERMISSION_PRIVATE = "PRIVATE" +# Response flag when system prompts are withheld from non-ASSET_OWNER callers. +AGENT_PROMPTS_HIDDEN_FLAG = "prompts_hidden" + # Deployment Version Configuration DEPLOYMENT_VERSION = os.getenv("DEPLOYMENT_VERSION", "speed") @@ -115,6 +183,7 @@ class VectorDatabaseType(str, Enum): MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY") MINIO_REGION = os.getenv("MINIO_REGION") MINIO_DEFAULT_BUCKET = os.getenv("MINIO_DEFAULT_BUCKET") +S3_URL_PREFIX = "s3://" # Postgres Configuration @@ -143,7 +212,7 @@ class VectorDatabaseType(str, Enum): RAY_ACTOR_NUM_CPUS = int(os.getenv("RAY_ACTOR_NUM_CPUS", "2")) RAY_DASHBOARD_PORT = int(os.getenv("RAY_DASHBOARD_PORT", "8265")) RAY_DASHBOARD_HOST = os.getenv("RAY_DASHBOARD_HOST", "0.0.0.0") -RAY_NUM_CPUS = os.getenv("RAY_NUM_CPUS") +RAY_NUM_CPUS = int(os.getenv("RAY_NUM_CPUS", "4")) RAY_OBJECT_STORE_MEMORY_GB = float( os.getenv("RAY_OBJECT_STORE_MEMORY_GB", "0.25")) RAY_TEMP_DIR = os.getenv("RAY_TEMP_DIR", "/tmp/ray") @@ -164,6 +233,7 @@ class VectorDatabaseType(str, Enum): "NEXENT_MCP_DOCKER_IMAGE", "nexent/nexent-mcp:latest") ENABLE_UPLOAD_IMAGE = os.getenv( "ENABLE_UPLOAD_IMAGE", "false").lower() == "true" +ENABLE_JIUWEN_SDK = os.getenv("NEXENT_ENABLE_JIUWEN_SDK", "true").lower() == "true" # Celery Configuration @@ -176,10 +246,21 @@ class VectorDatabaseType(str, Enum): # Worker Configuration RAY_ADDRESS = os.getenv("RAY_ADDRESS", "auto") -QUEUES = os.getenv("QUEUES", "process_q,forward_q") +QUEUES = os.getenv("QUEUES", "process_q,process_part_q,forward_q") # Will be dynamically set based on PID if not provided WORKER_NAME = os.getenv("WORKER_NAME") WORKER_CONCURRENCY = int(os.getenv("WORKER_CONCURRENCY", "4")) +RAY_WARM_ACTOR_POOL_SIZE_PART = int( + os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PART", "2")) +RAY_WARM_ACTOR_POOL_SIZE_PROCESS = int( + os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PROCESS", "1")) +# Global Ray actor pool (shared by process_q/process_part_q workers) +RAY_GLOBAL_ACTOR_POOL_SIZE = int(os.getenv("RAY_GLOBAL_ACTOR_POOL_SIZE", "3")) +RAY_ACTOR_WARM_TIMEOUT_S = float(os.getenv("RAY_ACTOR_WARM_TIMEOUT_S", "60")) +RAY_GLOBAL_ACTOR_POOL_NAME = os.getenv( + "RAY_GLOBAL_ACTOR_POOL_NAME", "nexent_global_data_processor_pool") +RAY_GLOBAL_ACTOR_POOL_NAMESPACE = os.getenv( + "RAY_GLOBAL_ACTOR_POOL_NAMESPACE", "nexent-data-process") # Voice Service Configuration @@ -279,6 +360,8 @@ class VectorDatabaseType(str, Enum): "multiEmbedding": "MULTI_EMBEDDING_ID", "rerank": "RERANK_ID", "vlm": "VLM_ID", + "vlm2": "VLM2_ID", + "vlm3": "VLM3_ID", "stt": "STT_ID", "tts": "TTS_ID" } @@ -310,19 +393,78 @@ class VectorDatabaseType(str, Enum): THINK_END_PATTERN = "" -# Telemetry and Monitoring Configuration -ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false").lower() == "true" -SERVICE_NAME = os.getenv("SERVICE_NAME", "nexent-backend") -JAEGER_ENDPOINT = os.getenv( - "JAEGER_ENDPOINT", "http://localhost:14268/api/traces") -PROMETHEUS_PORT = int(os.getenv("PROMETHEUS_PORT", "8000")) -TELEMETRY_SAMPLE_RATE = float(os.getenv("TELEMETRY_SAMPLE_RATE", "1.0")) - -# Performance monitoring thresholds -LLM_SLOW_REQUEST_THRESHOLD_SECONDS = float( - os.getenv("LLM_SLOW_REQUEST_THRESHOLD_SECONDS", "5.0")) -LLM_SLOW_TOKEN_RATE_THRESHOLD = float( - os.getenv("LLM_SLOW_TOKEN_RATE_THRESHOLD", "10.0")) # tokens per second +# Telemetry and Monitoring Configuration (OTLP Protocol) +MONITORING_PROVIDER = os.getenv("MONITORING_PROVIDER", "") +ENABLE_TELEMETRY_RAW = os.getenv("ENABLE_TELEMETRY") +ENABLE_TELEMETRY = (ENABLE_TELEMETRY_RAW or "false").lower() == "true" +OTEL_SERVICE_NAME_RAW = os.getenv("OTEL_SERVICE_NAME") +OTEL_SERVICE_NAME = OTEL_SERVICE_NAME_RAW or "nexent-backend" +OTEL_EXPORTER_OTLP_ENDPOINT_RAW = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") +OTEL_EXPORTER_OTLP_ENDPOINT = OTEL_EXPORTER_OTLP_ENDPOINT_RAW or "http://localhost:4318" +OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = os.getenv( + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "") +OTEL_EXPORTER_OTLP_METRICS_ENDPOINT = os.getenv( + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "") +OTEL_EXPORTER_OTLP_PROTOCOL_RAW = os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL") +OTEL_EXPORTER_OTLP_PROTOCOL = OTEL_EXPORTER_OTLP_PROTOCOL_RAW or "http" +OTEL_EXPORTER_OTLP_HEADERS_RAW = os.getenv("OTEL_EXPORTER_OTLP_HEADERS") +OTEL_EXPORTER_OTLP_HEADERS = OTEL_EXPORTER_OTLP_HEADERS_RAW or "" +OTEL_EXPORTER_OTLP_AUTHORIZATION = os.getenv( + "OTEL_EXPORTER_OTLP_AUTHORIZATION", "") +OTEL_EXPORTER_OTLP_X_API_KEY = os.getenv("OTEL_EXPORTER_OTLP_X_API_KEY", "") +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION = os.getenv( + "OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION", "") +LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY", "") +LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT", "") +OTEL_EXPORTER_OTLP_METRICS_ENABLED_RAW = os.getenv( + "OTEL_EXPORTER_OTLP_METRICS_ENABLED") +OTEL_EXPORTER_OTLP_METRICS_ENABLED = ( + OTEL_EXPORTER_OTLP_METRICS_ENABLED_RAW or "true").lower() == "true" +MONITORING_INSTRUMENT_REQUESTS_RAW = os.getenv( + "MONITORING_INSTRUMENT_REQUESTS") +MONITORING_INSTRUMENT_REQUESTS = ( + MONITORING_INSTRUMENT_REQUESTS_RAW or "false").lower() == "true" +MONITORING_FASTAPI_INCLUDED_URLS = os.getenv( + "MONITORING_FASTAPI_INCLUDED_URLS", "") +MONITORING_FASTAPI_EXCLUDED_URLS = os.getenv( + "MONITORING_FASTAPI_EXCLUDED_URLS", "") +MONITORING_FASTAPI_EXCLUDE_SPANS = os.getenv( + "MONITORING_FASTAPI_EXCLUDE_SPANS", "receive,send") +MONITORING_PROJECT_NAME = os.getenv("MONITORING_PROJECT_NAME", "") +MONITORING_DASHBOARD_URL = os.getenv("MONITORING_DASHBOARD_URL", "") +MONITORING_TRACE_CONTENT_MODE = os.getenv( + "MONITORING_TRACE_CONTENT_MODE", "summary") +MONITORING_TRACE_MAX_CHARS = os.getenv("MONITORING_TRACE_MAX_CHARS", "4000") +MONITORING_TRACE_MAX_ITEMS = os.getenv("MONITORING_TRACE_MAX_ITEMS", "20") +TELEMETRY_SAMPLE_RATE_RAW = os.getenv("TELEMETRY_SAMPLE_RATE") +TELEMETRY_SAMPLE_RATE = float(TELEMETRY_SAMPLE_RATE_RAW or "1.0") + +# Parse OTLP headers into dict format + + +def _parse_otlp_headers(headers_str: str) -> dict: + """Parse OTLP headers string into dict. Format: 'key1=value1,key2=value2'""" + if not headers_str: + return {} + headers = {} + for pair in headers_str.split(","): + if "=" in pair: + key, value = pair.split("=", 1) + headers[key.strip()] = value.strip() + return headers + + +OTLP_HEADERS = _parse_otlp_headers(OTEL_EXPORTER_OTLP_HEADERS) +if OTEL_EXPORTER_OTLP_AUTHORIZATION: + OTLP_HEADERS["Authorization"] = OTEL_EXPORTER_OTLP_AUTHORIZATION +if OTEL_EXPORTER_OTLP_X_API_KEY: + OTLP_HEADERS["x-api-key"] = OTEL_EXPORTER_OTLP_X_API_KEY +elif LANGSMITH_API_KEY: + OTLP_HEADERS["x-api-key"] = LANGSMITH_API_KEY +if LANGSMITH_PROJECT: + OTLP_HEADERS["Langsmith-Project"] = LANGSMITH_PROJECT +if OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION: + OTLP_HEADERS["x-langfuse-ingestion-version"] = OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION DEFAULT_ZH_TITLE = "新对话" @@ -334,15 +476,24 @@ class VectorDatabaseType(str, Enum): # Container Platform Configuration -IS_DEPLOYED_BY_KUBERNETES = os.getenv("IS_DEPLOYED_BY_KUBERNETES", "false").lower() == "true" +IS_DEPLOYED_BY_KUBERNETES = os.getenv( + "IS_DEPLOYED_BY_KUBERNETES", "false").lower() == "true" KUBERNETES_NAMESPACE = os.getenv("KUBERNETES_NAMESPACE", "nexent") -# Northbound API External URL (used for A2A Agent Card URLs) -# When accessed through reverse proxy, set this to the public-facing URL -# Falls back to http://localhost:5013 for local development -_northbound_url = os.getenv("NORTHBOUND_EXTERNAL_URL", "") -NORTHBOUND_EXTERNAL_URL = _northbound_url.rstrip("/") if _northbound_url else "http://localhost:5013" +# Northbound API public base URL (used for A2A agent cards and external file proxy links) +NORTHBOUND_EXTERNAL_URL = os.getenv( + "NORTHBOUND_EXTERNAL_URL", "http://localhost:5013/api").rstrip("/") # APP Version -APP_VERSION = "v2.0.2" +APP_VERSION = "v2.2.1" + + +# Skill Creation Streaming Configuration +STREAMABLE_CONTENT_TYPES = frozenset([ + "model_output_thinking", + "model_output_code", + "model_output_deep_thinking", + "tool", + "execution_logs", +]) diff --git a/backend/consts/error_code.py b/backend/consts/error_code.py index 072243de4..fd2987309 100644 --- a/backend/consts/error_code.py +++ b/backend/consts/error_code.py @@ -141,6 +141,23 @@ class ErrorCode(Enum): PROFILE_UPDATE_FAILED = "110102" # Profile update failed PROFILE_USER_ALREADY_EXISTS = "110103" # User already exists PROFILE_INVALID_CREDENTIALS = "110104" # Invalid credentials + # 02 - Password + PROFILE_PASSWORD_WEAK = "110201" # Password does not meet strength requirements + PROFILE_PASSWORD_SAME_AS_OLD = "110202" # New password cannot be the same as old password + + # ==================== 16 OAuth / 第三方登录 ==================== + # 01 - Provider + OAUTH_PROVIDER_NOT_CONFIGURED = "160101" # OAuth provider not configured + OAUTH_PROVIDER_DISABLED = "160102" # OAuth provider disabled + OAUTH_PROVIDER_UNSUPPORTED = "160103" # OAuth provider not supported + OAUTH_PROVIDER_ERROR = "160104" # OAuth provider returned an error + + # 02 - Account Linking + OAUTH_LINK_FAILED = "160201" # Failed to link OAuth account + OAUTH_UNLINK_FAILED = "160202" # Failed to unlink OAuth account + OAUTH_UNLINK_LAST_METHOD = "160203" # Cannot unlink last auth method + OAUTH_ACCOUNT_NOT_FOUND = "160204" # OAuth account link not found + OAUTH_ACCOUNT_ALREADY_LINKED = "160205" # OAuth account already linked # ==================== 12 TenantResource / 租户资源 ==================== # 01 - Tenant @@ -172,6 +189,12 @@ class ErrorCode(Enum): IDATA_RATE_LIMIT = "130405" # iData rate limit IDATA_RESPONSE_ERROR = "130406" # iData response error + # 05 - AIDP Service + AIDP_SERVICE_ERROR = "130501" # AIDP service error + AIDP_CONFIG_INVALID = "130502" # Invalid AIDP configuration + AIDP_CONNECTION_ERROR = "130503" # AIDP connection error + AIDP_AUTH_ERROR = "130504" # AIDP auth error + # ==================== 14 Northbound / 北向接口 ==================== # 01 - Request NORTHBOUND_REQUEST_FAILED = "140101" # Northbound request failed @@ -237,4 +260,22 @@ class ErrorCode(Enum): ErrorCode.IDATA_CONNECTION_ERROR: 502, ErrorCode.IDATA_RESPONSE_ERROR: 502, ErrorCode.IDATA_RATE_LIMIT: 429, + # AIDP (module 13) + ErrorCode.AIDP_CONFIG_INVALID: 400, + ErrorCode.AIDP_AUTH_ERROR: 401, + ErrorCode.AIDP_CONNECTION_ERROR: 502, + # OAuth (module 16) + ErrorCode.OAUTH_PROVIDER_NOT_CONFIGURED: 400, + ErrorCode.OAUTH_PROVIDER_DISABLED: 400, + ErrorCode.OAUTH_PROVIDER_UNSUPPORTED: 400, + ErrorCode.OAUTH_PROVIDER_ERROR: 502, + ErrorCode.OAUTH_LINK_FAILED: 500, + ErrorCode.OAUTH_UNLINK_FAILED: 500, + ErrorCode.OAUTH_UNLINK_LAST_METHOD: 400, + ErrorCode.OAUTH_ACCOUNT_NOT_FOUND: 404, + ErrorCode.OAUTH_ACCOUNT_ALREADY_LINKED: 409, + # Profile - Password (module 11) + ErrorCode.PROFILE_INVALID_CREDENTIALS: 400, + ErrorCode.PROFILE_PASSWORD_WEAK: 400, + ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD: 400, } diff --git a/backend/consts/error_message.py b/backend/consts/error_message.py index 4ff1141c7..bb3641604 100644 --- a/backend/consts/error_message.py +++ b/backend/consts/error_message.py @@ -5,6 +5,8 @@ Frontend should use i18n for localized messages. """ +from typing import Dict, Tuple + from .error_code import ErrorCode @@ -102,6 +104,9 @@ class ErrorMessage: ErrorCode.PROFILE_UPDATE_FAILED: "Profile update failed.", ErrorCode.PROFILE_USER_ALREADY_EXISTS: "User already exists.", ErrorCode.PROFILE_INVALID_CREDENTIALS: "Invalid username or password.", + # Profile - Password + ErrorCode.PROFILE_PASSWORD_WEAK: "Password does not meet security requirements. Please use a stronger password.", + ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD: "New password cannot be the same as the old password.", # ==================== 12 TenantResource / 租户资源 ==================== ErrorCode.TENANT_NOT_FOUND: "Tenant not found.", @@ -118,6 +123,16 @@ class ErrorMessage: ErrorCode.DIFY_AUTH_ERROR: "Dify authentication failed. Please check your API key.", ErrorCode.DIFY_RATE_LIMIT: "Dify API rate limit exceeded. Please try again later.", ErrorCode.ME_CONNECTION_FAILED: "Failed to connect to ME service.", + ErrorCode.IDATA_SERVICE_ERROR: "iData service error.", + ErrorCode.IDATA_CONFIG_INVALID: "iData configuration invalid. Please check URL and API key format.", + ErrorCode.IDATA_CONNECTION_ERROR: "Failed to connect to iData. Please check network connection and URL.", + ErrorCode.IDATA_RESPONSE_ERROR: "Failed to parse iData response. Please check API URL.", + ErrorCode.IDATA_AUTH_ERROR: "iData authentication failed. Please check your API key.", + ErrorCode.IDATA_RATE_LIMIT: "iData API rate limit exceeded. Please try again later.", + ErrorCode.AIDP_SERVICE_ERROR: "AIDP service error.", + ErrorCode.AIDP_CONFIG_INVALID: "AIDP configuration invalid. Please check URL and API key format.", + ErrorCode.AIDP_CONNECTION_ERROR: "Failed to connect to AIDP. Please check network connection and URL.", + ErrorCode.AIDP_AUTH_ERROR: "AIDP authentication failed. Please check your API key.", # ==================== 14 Northbound / 北向接口 ==================== ErrorCode.NORTHBOUND_REQUEST_FAILED: "Northbound request failed.", @@ -145,11 +160,11 @@ def get_message(cls, error_code: ErrorCode) -> str: return cls._MESSAGES.get(error_code, "An error occurred. Please try again later.") @classmethod - def get_message_with_code(cls, error_code: ErrorCode) -> tuple[int, str]: + def get_message_with_code(cls, error_code: ErrorCode) -> Tuple[int, str]: """Get error code and message as tuple.""" return (error_code.value, cls.get_message(error_code)) @classmethod - def get_all_messages(cls) -> dict: + def get_all_messages(cls) -> Dict: """Get all error code to message mappings.""" return {code.value: msg for code, msg in cls._MESSAGES.items()} diff --git a/backend/consts/exceptions.py b/backend/consts/exceptions.py index 074b4a5b0..e5e4c7a89 100644 --- a/backend/consts/exceptions.py +++ b/backend/consts/exceptions.py @@ -6,13 +6,13 @@ 1. New Framework (with ErrorCode): from consts.error_code import ErrorCode from consts.exceptions import AppException - + raise AppException(ErrorCode.COMMON_VALIDATION_ERROR, "Validation failed") raise AppException(ErrorCode.MCP_CONNECTION_FAILED, "Connection timeout", details={"host": "localhost"}) 2. Legacy Framework (simple exceptions): from consts.exceptions import ValidationError, NotFoundException, MCPConnectionError - + raise ValidationError("Tenant name cannot be empty") raise NotFoundException("Tenant 123 not found") raise MCPConnectionError("MCP connection failed") @@ -22,10 +22,12 @@ from .error_code import ErrorCode, ERROR_CODE_HTTP_STATUS from .error_message import ErrorMessage +from typing import List # ==================== New Framework: AppException with ErrorCode ==================== + class AppException(Exception): """ Base application exception with ErrorCode. @@ -35,7 +37,9 @@ class AppException(Exception): raise AppException(ErrorCode.MCP_CONNECTION_FAILED, "Timeout", details={"host": "x"}) """ - def __init__(self, error_code: ErrorCode, message: str = None, details: dict = None): + def __init__( + self, error_code: ErrorCode, message: str = None, details: dict = None + ): self.error_code = error_code self.message = message or ErrorMessage.get_message(error_code) self.details = details or {} @@ -43,9 +47,11 @@ def __init__(self, error_code: ErrorCode, message: str = None, details: dict = N def to_dict(self) -> dict: return { - "code": str(self.error_code.value), # Keep as string to preserve leading zeros + "code": str( + self.error_code.value + ), # Keep as string to preserve leading zeros "message": self.message, - "details": self.details if self.details else None + "details": self.details if self.details else None, } @property @@ -70,136 +76,200 @@ def raise_error(error_code: ErrorCode, message: str = None, details: dict = None # These do NOT require ErrorCode - they are simple Exception subclasses. # Exception handler will infer ErrorCode from class name. + class AgentRunException(Exception): """Exception raised when agent run fails.""" + pass class LimitExceededError(Exception): """Raised when an outer platform calling too frequently""" + pass class UnauthorizedError(Exception): """Raised when a user from outer platform is unauthorized.""" + pass class SignatureValidationError(Exception): """Raised when X-Signature header is missing or does not match the expected HMAC value.""" + pass class MemoryPreparationException(Exception): """Raised when memory preprocessing or retrieval fails prior to agent run.""" + pass class MCPConnectionError(Exception): """Raised when MCP connection fails.""" + pass class MCPNameIllegal(Exception): """Raised when MCP name is illegal.""" + + pass + + +class McpNotFoundError(Exception): + """Raised when MCP resource is not found.""" + pass + + +class McpValidationError(Exception): + """Raised when MCP payload or runtime data is invalid.""" + pass + + +class McpNameConflictError(Exception): + """Raised when MCP name conflicts with an existing enabled service.""" + pass + + +class McpPortConflictError(Exception): + """Raised when an MCP container port conflicts with an existing service or runtime port.""" pass class NoInviteCodeException(Exception): """Raised when invite code is not found.""" + pass class IncorrectInviteCodeException(Exception): """Raised when invite code is incorrect.""" + pass class OfficeConversionException(Exception): """Raised when Office-to-PDF conversion via data-process service fails.""" + pass class UnsupportedFileTypeException(Exception): """Raised when a file type is not supported for the requested operation.""" + pass class FileTooLargeException(Exception): """Raised when a file exceeds the maximum allowed size for the requested operation.""" + pass class UserRegistrationException(Exception): """Raised when user registration fails.""" + pass class TimeoutException(Exception): """Raised when timeout occurs.""" + pass class ValidationError(Exception): """Raised when validation fails.""" + pass class NotFoundException(Exception): """Raised when not found exception occurs.""" + pass class MEConnectionException(Exception): """Raised when ME connection fails.""" + pass class VoiceServiceException(Exception): """Raised when voice service fails.""" + + pass + + +class VoiceConfigException(Exception): + """Raised when voice configuration is invalid or missing.""" + pass class STTConnectionException(Exception): """Raised when STT service connection fails.""" + pass class TTSConnectionException(Exception): """Raised when TTS service connection fails.""" - pass - -class VoiceConfigException(Exception): - """Raised when voice configuration is invalid.""" pass class ToolExecutionException(Exception): """Raised when mcp tool execution failed.""" + pass class MCPContainerError(Exception): """Raised when MCP container operation fails.""" + pass class DuplicateError(Exception): """Raised when a duplicate resource already exists.""" + pass class DataMateConnectionError(Exception): """Raised when DataMate connection fails or URL is not configured.""" + pass +class SkillDuplicateError(Exception): + """Raised when importing an agent with skills that have duplicate names in target tenant.""" + def __init__(self, duplicate_names: List[str]): + self.duplicate_names = duplicate_names + + class SkillException(Exception): """Raised when skill operations fail.""" pass +class OAuthProviderError(Exception): + """Raised when OAuth provider configuration is invalid or provider returns an error.""" + + pass + + +class OAuthLinkError(Exception): + """Raised when linking or unlinking an OAuth account fails.""" + + pass + + class TaskNotFoundError(Exception): """Raised when A2A task is not found (per A2A spec Section 3.4.2).""" pass @@ -251,5 +321,10 @@ class UnsupportedOperationError(Exception): DifyServiceException = Exception # Generic fallback ExternalAPIError = Exception # Generic fallback +# OAuth aliases +OAuthProviderNotConfiguredError = OAuthProviderError +OAuthProviderDisabledError = OAuthProviderError +OAuthAccountNotFoundError = NotFoundException + # Signature aliases # SignatureValidationError already defined above diff --git a/backend/consts/model.py b/backend/consts/model.py index 91cf7d1b6..00e5b8a0a 100644 --- a/backend/consts/model.py +++ b/backend/consts/model.py @@ -1,8 +1,10 @@ from enum import Enum -from typing import Optional, Any, List, Dict +from typing import Optional, Any, List, Dict, Literal -from pydantic import BaseModel, Field, EmailStr -from nexent.core.agents.agent_model import ToolConfig +from pydantic import BaseModel, Field, EmailStr, ConfigDict, field_validator +from nexent.core.agents.agent_model import AgentVerificationConfig, ToolConfig + +from consts.prompt_template import PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP class ModelConnectStatusEnum(Enum): @@ -29,7 +31,7 @@ def get_value(cls, status: Optional[str]) -> str: class UserSignUpRequest(BaseModel): """User registration request model""" email: EmailStr - password: str = Field(..., min_length=6) + password: str = Field(..., min_length=8) invite_code: Optional[str] = None auto_login: Optional[bool] = True # Whether to return session after signup @@ -40,6 +42,19 @@ class UserSignInRequest(BaseModel): password: str +class OAuthCompleteRequest(BaseModel): + """Complete a pending OAuth signup.""" + email: Optional[EmailStr] = None + password: str = Field(..., min_length=6) + invite_code: str = Field(..., min_length=1) + + +class UpdatePasswordRequest(BaseModel): + """Password update request model for changing user password""" + old_password: str = Field(..., min_length=1, description="Current password for verification") + new_password: str = Field(..., min_length=8, description="New password to set (min 8 characters)") + + class UserUpdateRequest(BaseModel): """User update request model""" username: Optional[str] = Field(None, min_length=1, max_length=50) @@ -52,6 +67,52 @@ class UserDeleteRequest(BaseModel): new_owner_id: Optional[str] = None +class OAuthProviderDefinition(BaseModel): + name: str + display_name: str + icon: str + + authorize_url: str + authorize_method: str = "GET" + authorize_params: Dict[str, str] = {} + authorize_fragment: str = "" + authorize_param_map: Dict[str, str] = { + "client_id": "client_id", + "redirect_uri": "redirect_uri", + "scope": "scope", + "state": "state", + } + encode_redirect_uri: bool = False + + token_url: str + token_method: str = "POST" + token_params_map: Dict[str, str] = { + "client_id": "client_id", + "client_secret": "client_secret", + "code": "code", + "grant_type": "grant_type", + } + token_extra_params: Dict[str, str] = {} + token_error_key: Optional[str] = None + token_error_message_key: Optional[str] = None + token_response_id_key: Optional[str] = None + + userinfo_url: str + userinfo_auth_scheme: str = "Bearer" + userinfo_params: Dict[str, str] = {} + userinfo_field_map: Dict[str, str] = { + "id": "id", + "email": "email", + "username": "login", + } + userinfo_needs_email_fetch: bool = False + userinfo_email_url: Optional[str] = None + + client_id_env: str + client_secret_env: str + enabled_check: Optional[str] = None + + # Response models for model management class ModelResponse(BaseModel): code: int = 200 @@ -72,6 +133,11 @@ class ModelRequest(BaseModel): expected_chunk_size: Optional[int] = None maximum_chunk_size: Optional[int] = None chunk_batch: Optional[int] = None + # STT specific fields + model_appid: Optional[str] = None + access_token: Optional[str] = None + timeout_seconds: Optional[int] = None + concurrency_limit: Optional[int] = None class ProviderModelRequest(BaseModel): @@ -101,14 +167,44 @@ class SingleModelConfig(BaseModel): dimension: Optional[int] = None +class STTModelConfig(BaseModel): + """STT model specific configuration with factory, appid, and access token fields""" + modelName: str + displayName: str + apiConfig: Optional[ModelApiConfig] = None + modelFactory: Optional[str] = None + modelAppid: Optional[str] = None + accessToken: Optional[str] = None + + +def _empty_model_config() -> SingleModelConfig: + return SingleModelConfig( + modelName="", + displayName="", + apiConfig=ModelApiConfig(apiKey="", modelUrl="") + ) + + +class TTSModelConfig(BaseModel): + """TTS model specific configuration with factory, appid, and access token fields""" + modelName: str + displayName: str + apiConfig: Optional[ModelApiConfig] = None + modelFactory: Optional[str] = None + modelAppid: Optional[str] = None + accessToken: Optional[str] = None + + class ModelConfig(BaseModel): llm: SingleModelConfig embedding: SingleModelConfig multiEmbedding: SingleModelConfig rerank: SingleModelConfig vlm: SingleModelConfig - stt: SingleModelConfig - tts: SingleModelConfig + vlm2: SingleModelConfig = Field(default_factory=_empty_model_config) + vlm3: SingleModelConfig = Field(default_factory=_empty_model_config) + stt: STTModelConfig + tts: TTSModelConfig class AppConfig(BaseModel): @@ -128,16 +224,41 @@ class GlobalConfig(BaseModel): # Request models +class HistoryItem(BaseModel): + role: str + content: str + minio_files: Optional[List[Dict[str, Any]]] = None + + +class AgentToolParamsRequest(BaseModel): + """Request-scoped tool parameter overrides for a single agent.""" + + tools: Dict[str, Dict[str, Any]] = Field( + default_factory=dict, + description="Mapping from tool identifier to request-scoped override params", + ) + + +class ToolParamsRequest(BaseModel): + """Request-scoped tool parameter overrides for main and managed agents.""" + + agents: Dict[str, AgentToolParamsRequest] = Field( + default_factory=dict, + description="Mapping from agent identifier to tool parameter overrides", + ) + + class AgentRequest(BaseModel): query: str conversation_id: Optional[int] = None - history: Optional[List[Dict]] = None + history: Optional[List[HistoryItem]] = None # Complete list of attachment information minio_files: Optional[List[Dict[str, Any]]] = None agent_id: Optional[int] = None model_id: Optional[int] = None version_no: Optional[int] = None is_debug: Optional[bool] = False + tool_params: Optional[ToolParamsRequest] = None class MessageUnit(BaseModel): @@ -236,6 +357,7 @@ class ProcessParams(BaseModel): source_type: str index_name: str authorization: Optional[str] = None + model_id: Optional[int] = None class OpinionRequest(BaseModel): @@ -248,10 +370,110 @@ class GeneratePromptRequest(BaseModel): task_description: str agent_id: int model_id: int + prompt_template_id: Optional[int] = None tool_ids: Optional[List[int]] = Field( None, description="Optional: tool IDs from frontend (takes precedence over database query)") sub_agent_ids: Optional[List[int]] = Field( None, description="Optional: sub-agent IDs from frontend (takes precedence over database query)") + knowledge_base_display_names: Optional[List[str]] = Field( + None, description="Optional: knowledge base display names from frontend (takes precedence over database query)") + has_selected_resources: bool = Field( + True, description="Whether tools or sub-agents are selected; when False, skips generating constraint and few_shots sections") + + +class PromptTemplateContentRequest(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + duty_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["duty_system_prompt"] + ) + constraint_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["constraint_system_prompt"] + ) + few_shots_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["few_shots_system_prompt"] + ) + agent_variable_name_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_variable_name_system_prompt"] + ) + agent_display_name_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_system_prompt"] + ) + agent_description_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_description_system_prompt"] + ) + user_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["user_prompt"] + ) + agent_name_regenerate_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_name_regenerate_system_prompt"] + ) + agent_name_regenerate_user_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_name_regenerate_user_prompt"] + ) + agent_display_name_regenerate_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_regenerate_system_prompt"] + ) + agent_display_name_regenerate_user_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_regenerate_user_prompt"] + ) + + +class PromptTemplateRequest(BaseModel): + template_name: str + description: Optional[str] = None + template_type: str = "agent_generate" + template_content_zh: PromptTemplateContentRequest + template_content_en: Optional[PromptTemplateContentRequest] = None +class OptimizePromptSectionRequest(BaseModel): + task_description: str + agent_id: int + model_id: int + section_type: str + section_title: str + current_content: str + feedback: str + mode: Literal["general", "insert", "select"] = "general" + start_pos: Optional[int] = Field(None, description="Start position for insert/select mode") + end_pos: Optional[int] = Field(None, description="End position for insert/select mode") + tool_ids: Optional[List[int]] = Field( + None, description="Optional: tool IDs from frontend (takes precedence over database query)") + sub_agent_ids: Optional[List[int]] = Field( + None, description="Optional: sub-agent IDs from frontend (takes precedence over database query)") + knowledge_base_display_names: Optional[List[str]] = Field( + None, description="Optional: knowledge base display names from frontend (takes precedence over database query)") + + +class BadCaseItem(BaseModel): + question: str + answer: str + label: Optional[str] = None + reason: Optional[str] = None + + +class OptimizePromptBadCaseRequest(BaseModel): + agent_id: int + model_id: int + current_content: str + bad_cases: List[BadCaseItem] + section_type: str + section_title: str + tool_ids: Optional[List[int]] = Field(None) + sub_agent_ids: Optional[List[int]] = Field(None) + knowledge_base_display_names: Optional[List[str]] = Field(None) + + +class OptimizeFromDebugSelected(BaseModel): + user_question: str + assistant_answer: str + + +class OptimizePromptFromDebugRequest(BaseModel): + agent_id: int + model_id: int + feedback: str + selected: OptimizeFromDebugSelected + history: Optional[List[HistoryItem]] = None class GenerateTitleRequest(BaseModel): @@ -269,7 +491,7 @@ class AgentInfoRequest(BaseModel): author: Optional[str] = None model_name: Optional[str] = None model_id: Optional[int] = None - max_steps: Optional[int] = None + max_steps: Optional[int] = Field(default=None, ge=1, le=30) provide_run_summary: Optional[bool] = None duty_prompt: Optional[str] = None constraint_prompt: Optional[str] = None @@ -277,13 +499,27 @@ class AgentInfoRequest(BaseModel): enabled: Optional[bool] = None business_logic_model_name: Optional[str] = None business_logic_model_id: Optional[int] = None + prompt_template_id: Optional[int] = None + prompt_template_name: Optional[str] = None enabled_tool_ids: Optional[List[int]] = None enabled_skill_ids: Optional[List[int]] = None related_agent_ids: Optional[List[int]] = None + related_external_agent_ids: Optional[List[int]] = None group_ids: Optional[List[int]] = None ingroup_permission: Optional[str] = None + enable_context_manager: Optional[bool] = None + verification_config: Optional[Dict[str, Any]] = None + greeting_message: Optional[str] = None + example_questions: Optional[List[str]] = None version_no: int = 0 + @field_validator("verification_config", mode="before") + @classmethod + def normalize_verification_config(cls, value): + if value is None: + return None + return AgentVerificationConfig.model_validate(value).model_dump() + class AgentIDRequest(BaseModel): agent_id: int @@ -307,6 +543,7 @@ class SkillInstanceInfoRequest(BaseModel): agent_id: int enabled: bool = True version_no: int = 0 + config_values: Optional[Dict[str, Any]] = None class ToolInstanceSearchRequest(BaseModel): @@ -347,6 +584,7 @@ class MessageIdRequest(BaseModel): class ExportAndImportAgentInfo(BaseModel): agent_id: int + tenant_id: Optional[str] = None name: str display_name: Optional[str] = None description: str @@ -354,6 +592,7 @@ class ExportAndImportAgentInfo(BaseModel): author: Optional[str] = None max_steps: int provide_run_summary: bool + verification_config: Optional[Dict[str, Any]] = None duty_prompt: Optional[str] = None constraint_prompt: Optional[str] = None few_shots_prompt: Optional[str] = None @@ -364,6 +603,9 @@ class ExportAndImportAgentInfo(BaseModel): model_name: Optional[str] = None business_logic_model_id: Optional[int] = None business_logic_model_name: Optional[str] = None + skill_names: Optional[List[str]] = None + prompt_template_id: Optional[int] = None + prompt_template_name: Optional[str] = None class Config: arbitrary_types_allowed = True @@ -380,9 +622,21 @@ class ExportAndImportDataFormat(BaseModel): mcp_info: List[MCPInfo] +class AgentRepositorySnapshot(ExportAndImportDataFormat): + """Frozen marketplace snapshot: export format plus optional skill ZIP payloads.""" + skills: Optional[List["SkillZipEntry"]] = None + + +class SkillZipEntry(BaseModel): + """A skill bundled inside an agent export ZIP.""" + skill_name: str + skill_zip_base64: str + + class AgentImportRequest(BaseModel): agent_info: ExportAndImportDataFormat force_import: bool = False + skills: Optional[List[SkillZipEntry]] = None class AgentNameBatchRegenerateItem(BaseModel): @@ -448,19 +702,6 @@ class VoiceConnectivityResponse(BaseModel): message: str = Field(..., description="Status message") -class TTSRequest(BaseModel): - """Request model for TTS text-to-speech conversion""" - text: str = Field(..., min_length=1, - description="Text to convert to speech") - stream: bool = Field(True, description="Whether to stream the audio") - - -class TTSResponse(BaseModel): - """Response model for TTS conversion""" - status: str = Field(..., description="Status of the TTS conversion") - message: Optional[str] = Field(None, description="Additional message") - - class ToolValidateRequest(BaseModel): """Request model for tool validation""" name: str = Field(..., description="Tool name to validate") @@ -510,6 +751,8 @@ class MCPUpdateRequest(BaseModel): new_mcp_url: str = Field(..., description="New MCP server URL") new_authorization_token: Optional[str] = Field( None, description="New authorization token for MCP server authentication (e.g., Bearer token)") + custom_headers: Optional[Dict[str, Any]] = Field( + None, description="Custom HTTP headers as JSON object") # Tenant Management Data Models @@ -518,6 +761,22 @@ class TenantCreateRequest(BaseModel): """Request model for creating a tenant""" tenant_name: str = Field(..., min_length=1, description="Tenant display name") + skill_ids: Optional[List[int]] = Field( + default=None, + description="Skill IDs to install for the new tenant (legacy, use skill_names instead)" + ) + skill_names: Optional[List[str]] = Field( + default=None, + description="Skill names to install for the new tenant. " + "Each name is used to derive a .zip filename from " + "OFFICIAL_SKILLS_ZIP_PATH and installed via upload." + ) + locale: Optional[str] = Field( + default=None, + description="Frontend locale when creating the tenant (e.g. 'zh' or 'en'). " + "Determines the source label for auto-installed skills: " + "'zh' → '官方', other locales → 'official'." + ) class TenantUpdateRequest(BaseModel): @@ -689,15 +948,20 @@ class ManageTenantModelCreateRequest(BaseModel): tenant_id: str = Field(..., min_length=1, description="Target tenant ID to create model for") model_repo: Optional[str] = Field('', description="Model repository path") model_name: str = Field(..., description="Model name") - model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'tts', 'stt')") + model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'stt')") api_key: Optional[str] = Field('', description="API key for the model") base_url: Optional[str] = Field('', description="Base URL for the model API") max_tokens: Optional[int] = Field(0, description="Maximum tokens for the model") display_name: Optional[str] = Field('', description="Display name for the model") - model_factory: Optional[str] = Field('OpenAI-API-Compatible', description="Model factory/provider name") + model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model") expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models") maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models") chunk_batch: Optional[int] = Field(None, description="Batch size for chunking") + # STT specific fields + model_appid: Optional[str] = Field(None, description="Application ID for STT models (e.g., Volcano Engine)") + access_token: Optional[str] = Field(None, description="Access token for STT models (e.g., Volcano Engine)") + timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds") + concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model") class ManageTenantModelUpdateRequest(BaseModel): @@ -711,10 +975,15 @@ class ManageTenantModelUpdateRequest(BaseModel): base_url: Optional[str] = Field(None, description="Base URL for the model API") max_tokens: Optional[int] = Field(None, description="Maximum tokens for the model") display_name: Optional[str] = Field(None, description="New display name for the model") - model_factory: Optional[str] = Field(None, description="Model factory/provider name") + model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model") expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models") maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models") chunk_batch: Optional[int] = Field(None, description="Batch size for chunking") + # STT specific fields + model_appid: Optional[str] = Field(None, description="Application ID for STT models") + access_token: Optional[str] = Field(None, description="Access token for STT models") + timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds") + concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model") class ManageTenantModelDeleteRequest(BaseModel): @@ -772,6 +1041,7 @@ class VersionListItemResponse(BaseModel): source_version_no: Optional[int] = Field(None, description="Source version number if rollback") source_type: Optional[str] = Field(None, description="Source type: NORMAL / ROLLBACK") status: str = Field(..., description="Version status: RELEASED / DISABLED / ARCHIVED") + is_a2a: bool = Field(False, description="Whether this version is published as an A2A Server agent") created_by: str = Field(..., description="User who published this version") create_time: Optional[str] = Field(None, description="Publish timestamp") @@ -791,6 +1061,7 @@ class VersionDetailResponse(BaseModel): source_version_no: Optional[int] = Field(None, description="Source version number") source_type: Optional[str] = Field(None, description="Source type") status: str = Field(..., description="Version status") + is_a2a: bool = Field(False, description="Whether this version is published as an A2A Server agent") created_by: str = Field(..., description="User who published this version") create_time: Optional[str] = Field(None, description="Publish timestamp") agent_info: Optional[dict] = Field(None, description="Agent info snapshot") @@ -831,3 +1102,261 @@ class CurrentVersionResponse(BaseModel): release_note: Optional[str] = Field(None, description="Release notes") created_by: str = Field(..., description="User who published this version") create_time: Optional[str] = Field(None, description="Publish timestamp") + + +# Skill Management Data Models +# --------------------------------------------------------------------------- +class SkillCreateRequest(BaseModel): + """Request model for creating a skill via JSON.""" + name: str + description: str + content: str + tool_ids: Optional[List[int]] = [] + tool_names: Optional[List[str]] = [] + tags: Optional[List[str]] = [] + source: Optional[str] = "custom" + config_schemas: Optional[Dict[str, Any]] = None + config_values: Optional[Dict[str, Any]] = None + files: Optional[List[Dict[str, str]]] = Field( + default_factory=list, + description="Additional skill files beyond SKILL.md. " + "Each entry has 'path' (relative path) and 'content'. " + "SKILL.md may also be sent here; the 'content' field is the primary SKILL.md source." + ) + + +class SkillFileData(BaseModel): + """A single file within a skill.""" + path: str = Field(description="Relative file path within the skill (e.g. 'SKILL.md', 'scripts/run.py')") + content: str = Field(description="Full file content") + + +class SkillUpdateRequest(BaseModel): + """Request model for updating a skill.""" + description: Optional[str] = None + content: Optional[str] = None + tool_ids: Optional[List[int]] = None + tool_names: Optional[List[str]] = None + tags: Optional[List[str]] = None + source: Optional[str] = None + config_schemas: Optional[Dict[str, Any]] = None + config_values: Optional[Dict[str, Any]] = None + files: Optional[List[SkillFileData]] = Field( + default_factory=list, + description="Updated skill files. Each entry has file_path and content. " + "Pass 'SKILL.md' here to update the main skill file; other files are written as-is." + ) + + +class SkillResponse(BaseModel): + """Response model for skill data.""" + skill_id: int + name: str + description: str + content: str + tool_ids: List[int] + tags: List[str] + source: str + config_schemas: Optional[Dict[str, Any]] = None + config_values: Optional[Dict[str, Any]] = None + created_by: Optional[str] = None + create_time: Optional[str] = None + updated_by: Optional[str] = None + update_time: Optional[str] = None + + +class SkillCreateInteractiveRequest(BaseModel): + """Request model for interactive skill creation via LLM agent.""" + user_request: str + existing_skill: Optional[Dict[str, Any]] = None + complexity: Optional[str] = "simple" + language: Optional[str] = "zh" + + +# --------------------------------------------------------------------------- +# MCP Management Data Models +# --------------------------------------------------------------------------- + +class MCPSourceType(str, Enum): + """MCP source type enumeration""" + LOCAL = "local" + MCP_REGISTRY = "mcp_registry" + COMMUNITY = "community" + + +class AddMcpServiceRequest(BaseModel): + """Request model for adding an MCP service""" + name: str = Field(..., min_length=1, description="MCP service name") + server_url: str = Field(..., min_length=1, description="MCP server URL") + description: Optional[str] = Field(None, description="MCP service description") + source: MCPSourceType = Field(default=MCPSourceType.LOCAL, description="MCP source type") + tags: List[str] = Field(default_factory=list, description="MCP tags") + authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server") + custom_headers: Optional[Dict[str, Any]] = Field(None, description="Custom HTTP headers as JSON object") + container_config: Optional[Dict[str, Any]] = Field(None, description="Container configuration") + registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON") + enabled: Optional[bool] = Field(default=False, description="Whether the MCP is enabled after creation") + + @field_validator("name", "server_url", "description", "authorization_token", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + return value.strip() + return value + + +class AddContainerMcpServiceRequest(BaseModel): + """Request model for adding a container-based MCP service""" + name: str = Field(..., min_length=1, description="MCP service name") + description: Optional[str] = Field(None, description="MCP service description") + source: MCPSourceType = Field(default=MCPSourceType.LOCAL, description="MCP source type") + tags: List[str] = Field(default_factory=list, description="MCP tags") + authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server") + registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON") + port: int = Field(..., ge=1, le=65535, description="Host port for the container") + mcp_config: MCPConfigRequest = Field(..., description="MCP server configuration") + + @field_validator("name", "description", "authorization_token", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + return value.strip() + return value + + +class UpdateMcpServiceRequest(BaseModel): + """Request model for updating an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID") + name: str = Field(..., min_length=1, description="New MCP service name") + description: Optional[str] = Field(None, description="MCP service description") + server_url: str = Field(..., min_length=1, description="New MCP server URL") + tags: List[str] = Field(default_factory=list, description="MCP tags") + authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server") + custom_headers: Optional[Dict[str, Any]] = Field(None, description="Custom HTTP headers as JSON object") + + @field_validator("name", "server_url", "description", "authorization_token", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + return value.strip() + return value + + +class EnableMcpServiceRequest(BaseModel): + """Request model for enabling an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to enable") + + +class DisableMcpServiceRequest(BaseModel): + """Request model for disabling an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to disable") + + +class HealthcheckMcpServiceRequest(BaseModel): + """Request model for checking MCP service health""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to health check") + + +class ListMcpToolsRequest(BaseModel): + """Request model for listing MCP service tools""" + mcp_id: int = Field(..., gt=0, description="MCP record ID") + + +class PortConflictCheckRequest(BaseModel): + """Request model for checking port availability""" + port: int = Field(..., ge=1, le=65535, description="Port number to check") + + +class ListMcpServicesQuery(BaseModel): + """Query parameters for listing MCP services""" + tag: Optional[str] = Field(None, description="Filter by tag") + + @field_validator("tag", mode="before") + @classmethod + def _strip_tag(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class RegistryListQuery(BaseModel): + """Query parameters for listing MCP registry services""" + search: Optional[str] = Field(None, description="Search keyword") + include_deleted: bool = Field(default=False, description="Include deleted records") + updated_since: Optional[str] = Field(None, description="Filter by update time") + version: Optional[str] = Field(None, description="Filter by version") + cursor: Optional[str] = Field(None, description="Pagination cursor") + limit: int = Field(default=30, ge=1, le=100, description="Items per page") + + @field_validator("search", "updated_since", "version", "cursor", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class CommunityListRequest(BaseModel): + """Request model for listing community MCP services""" + search: Optional[str] = Field(None, description="Search keyword") + tag: Optional[str] = Field(None, description="Filter by tag") + transport_type: Optional[str] = Field(None,description="Filter by transport: url or container") + cursor: Optional[str] = Field(None, description="Pagination cursor") + limit: int = Field(default=30, ge=1, le=100, description="Items per page") + + @field_validator("search", "tag", "cursor", "transport_type", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class CommunityPublishRequest(BaseModel): + """Publish a local MCP to the community; optional fields override the snapshot.""" + + mcp_id: int = Field(..., gt=0, description="MCP record ID to publish") + name: Optional[str] = Field(None, description="Community display name override") + description: Optional[str] = Field(None, description="Description override") + version: Optional[str] = Field(None, description="Version override") + tags: Optional[List[str]] = Field(None, description="Tags override") + mcp_server: Optional[str] = Field(None, max_length=500, description="Remote MCP server URL override (URL / HTTP / SSE transports)") + config_json: Optional[Dict[str, Any]] = Field(None, description="Container MCP configuration JSON override") + + @field_validator("name", "description", "version", "mcp_server", mode="before") + @classmethod + def _strip_publish_optional_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class CommunityUpdateRequest(BaseModel): + """Request model for updating community MCP service""" + community_id: int = Field(..., gt=0, description="Community record ID") + name: Optional[str] = Field(default=None, min_length=1, description="New MCP service name") + description: Optional[str] = Field(None, description="MCP service description") + tags: List[str] = Field(default_factory=list, description="MCP tags") + version: Optional[str] = Field(None, description="MCP version") + registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON") + config_json: Optional[Dict[str, Any]] = Field( + None, + description="Container MCP configuration JSON (omit to leave unchanged)", + ) + + @field_validator("name", "description", "version", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class DeleteMcpServiceRequest(BaseModel): + """Request model for deleting an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to delete") diff --git a/backend/consts/oauth_providers.py b/backend/consts/oauth_providers.py new file mode 100644 index 000000000..7429855b6 --- /dev/null +++ b/backend/consts/oauth_providers.py @@ -0,0 +1,140 @@ +import os +from typing import Dict + +from consts.model import OAuthProviderDefinition + +GITHUB_PROVIDER = OAuthProviderDefinition( + name="github", + display_name="GitHub", + icon="github", + authorize_url="https://github.com/login/oauth/authorize", + authorize_params={"scope": "read:user user:email"}, + token_url="https://github.com/login/oauth/access_token", + token_error_key="error", + token_error_message_key="error_description", + userinfo_url="https://api.github.com/user", + userinfo_field_map={ + "id": "id", + "email": "email", + "username": "login", + }, + userinfo_needs_email_fetch=True, + userinfo_email_url="https://api.github.com/user/emails", + client_id_env="GITHUB_OAUTH_CLIENT_ID", + client_secret_env="GITHUB_OAUTH_CLIENT_SECRET", +) + +GDE_PROVIDER = OAuthProviderDefinition( + name="gde", + display_name="Gde", + icon="gde", + authorize_url=f"{os.getenv('GDE_URL')}/dspcas/oauth2.0/authorize", + authorize_param_map={"client_id": "client_id", "redirect_uri": "redirect_uri"}, + token_url=f"{os.getenv('GDE_URL')}/dspcas/v2/oauth2.0/accessToken", + token_params_map={ + "client_id": "client_id", + "client_secret": "secret", + "code": "code", + "grant_type": "grant_type", + "redirect_uri": "redirect_uri", + }, + token_error_key="errorCode", + token_error_message_key="errorMessage", + userinfo_url=f"{os.getenv('GDE_URL')}/dspcas/oauth2.0/profile", + userinfo_params={"access_token": "{access_token}"}, + userinfo_field_map={"id": "attributes.userId", "username": "id"}, + client_id_env="GDE_OAUTH_CLIENT_ID", + client_secret_env="GDE_OAUTH_CLIENT_SECRET", +) + +LINK_APP_PROVIDER = OAuthProviderDefinition( + name="link_app", + display_name="Link App", + icon="link_app", + authorize_url=f"{os.getenv('LINK_APP_URL')}/CNS/oauth2/authorize", + authorize_params={"response_type": "code", "scope": "read write"}, + token_url=f"{os.getenv('LINK_APP_URL')}/CNS/oauth2/token", + token_params_map={ + "client_id": "client_id", + "client_secret": "client_secret", + "code": "code", + "grant_type": "grant_type", + "redirect_uri": "redirect_uri", + }, + token_error_key="error", + token_error_message_key="error_description", + userinfo_url=f"{os.getenv('LINK_APP_URL')}/CNS/getUserInfo", + userinfo_field_map={ + "id": "data.id", + "email": "data.email", + "username": "data.username", + }, + client_id_env="LINK_APP_OAUTH_CLIENT_ID", + client_secret_env="LINK_APP_OAUTH_CLIENT_SECRET", +) + +WECHAT_PROVIDER = OAuthProviderDefinition( + name="wechat", + display_name="WeChat", + icon="wechat", + authorize_url="https://open.weixin.qq.com/connect/qrconnect", + authorize_params={"response_type": "code", "scope": "snsapi_login"}, + authorize_fragment="#wechat_redirect", + authorize_param_map={ + "client_id": "appid", + "redirect_uri": "redirect_uri", + "scope": "scope", + "state": "state", + }, + encode_redirect_uri=True, + token_url="https://api.weixin.qq.com/sns/oauth2/access_token", + token_method="GET", + token_params_map={ + "client_id": "appid", + "client_secret": "secret", + "code": "code", + "grant_type": "grant_type", + }, + token_error_key="errcode", + token_error_message_key="errmsg", + token_response_id_key="openid", + userinfo_url="https://api.weixin.qq.com/sns/userinfo", + userinfo_auth_scheme="", + userinfo_params={"openid": "{openid}"}, + userinfo_field_map={ + "id": "openid", + "email": "", + "username": "nickname", + }, + client_id_env="WECHAT_OAUTH_APP_ID", + client_secret_env="WECHAT_OAUTH_APP_SECRET", + enabled_check="ENABLE_WECHAT_OAUTH", +) + +OAUTH_PROVIDER_REGISTRY: Dict[str, OAuthProviderDefinition] = { + "github": GITHUB_PROVIDER, + "wechat": WECHAT_PROVIDER, + "gde": GDE_PROVIDER, + "link_app": LINK_APP_PROVIDER, +} + + +def get_provider_definition(provider: str) -> OAuthProviderDefinition: + return OAUTH_PROVIDER_REGISTRY[provider] + + +def is_provider_enabled(definition: OAuthProviderDefinition) -> bool: + if definition.enabled_check: + return os.getenv(definition.enabled_check, "false").lower() in ( + "true", + "1", + "yes", + ) + + client_id = os.getenv(definition.client_id_env, "") + client_secret = os.getenv(definition.client_secret_env, "") + return bool(client_id and client_secret) + + +def get_all_provider_definitions() -> Dict[str, OAuthProviderDefinition]: + return dict(OAUTH_PROVIDER_REGISTRY) diff --git a/backend/consts/prompt_template.py b/backend/consts/prompt_template.py new file mode 100644 index 000000000..febcaeca5 --- /dev/null +++ b/backend/consts/prompt_template.py @@ -0,0 +1,15 @@ +PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP = { + "duty_system_prompt": "DUTY_SYSTEM_PROMPT", + "constraint_system_prompt": "CONSTRAINT_SYSTEM_PROMPT", + "few_shots_system_prompt": "FEW_SHOTS_SYSTEM_PROMPT", + "agent_variable_name_system_prompt": "AGENT_VARIABLE_NAME_SYSTEM_PROMPT", + "agent_display_name_system_prompt": "AGENT_DISPLAY_NAME_SYSTEM_PROMPT", + "agent_description_system_prompt": "AGENT_DESCRIPTION_SYSTEM_PROMPT", + "user_prompt": "USER_PROMPT", + "agent_name_regenerate_system_prompt": "AGENT_NAME_REGENERATE_SYSTEM_PROMPT", + "agent_name_regenerate_user_prompt": "AGENT_NAME_REGENERATE_USER_PROMPT", + "agent_display_name_regenerate_system_prompt": "AGENT_DISPLAY_NAME_REGENERATE_SYSTEM_PROMPT", + "agent_display_name_regenerate_user_prompt": "AGENT_DISPLAY_NAME_REGENERATE_USER_PROMPT", +} + +PROMPT_GENERATE_TEMPLATE_FIELDS = tuple(PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP.keys()) diff --git a/backend/consts/provider.py b/backend/consts/provider.py index 38bbc4027..fe49332b7 100644 --- a/backend/consts/provider.py +++ b/backend/consts/provider.py @@ -17,6 +17,8 @@ class ProviderEnum(str, Enum): # Dashcope DASHSCOPE_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/" DASHSCOPE_GET_URL = "https://dashscope.aliyuncs.com/api/v1/models" +DASHSCOPE_REALTIME_BASE_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime" +DASHSCOPE_STT_BASE_URL = DASHSCOPE_REALTIME_BASE_URL # TokenPony TOKENPONY_BASE_URL = "https://api.tokenpony.cn/v1/" diff --git a/backend/consts/scheduler.py b/backend/consts/scheduler.py new file mode 100644 index 000000000..6820a9687 --- /dev/null +++ b/backend/consts/scheduler.py @@ -0,0 +1,28 @@ +""" +Scheduler frequency constants +Centralized definition for auto-summary frequency options +""" +from datetime import timedelta + +# Core frequency config: includes value, timedelta, and label; this is the single source of truth +SUMMARY_FREQUENCY_CONFIG = [ + {"value": "1h", "timedelta": timedelta(hours=1), "label": "1h"}, + {"value": "3h", "timedelta": timedelta(hours=3), "label": "3h"}, + {"value": "6h", "timedelta": timedelta(hours=6), "label": "6h"}, + {"value": "1d", "timedelta": timedelta(days=1), "label": "1d"}, + {"value": "1w", "timedelta": timedelta(weeks=1), "label": "1w"}, +] + +# Generate valid frequency list from config (for validation) +VALID_SUMMARY_FREQUENCIES = [item["value"] for item in SUMMARY_FREQUENCY_CONFIG] + [None] + +# Generate frequency to timedelta mapping from config (direct value, no loop conversion needed) +FREQUENCY_MAP = {item["value"]: item["timedelta"] for item in SUMMARY_FREQUENCY_CONFIG} + +# Generate API options from config (for frontend) +SUMMARY_FREQUENCY_OPTIONS_FOR_API = [ + {"value": "disabled", "label": "Disabled"}, +] + [{"value": item["value"], "label": item["value"]} for item in SUMMARY_FREQUENCY_CONFIG] + +# Scheduler check interval (seconds) +SCHEDULER_CHECK_INTERVAL_SECONDS = 30 * 60 diff --git a/backend/data_process/ray_actors.py b/backend/data_process/ray_actors.py index 2fa590bec..c3879c007 100644 --- a/backend/data_process/ray_actors.py +++ b/backend/data_process/ray_actors.py @@ -1,11 +1,20 @@ +from io import BytesIO import logging import json +import time from typing import Any, Dict, List, Optional import ray -from consts.const import RAY_ACTOR_NUM_CPUS, REDIS_BACKEND_URL, DEFAULT_EXPECTED_CHUNK_SIZE, DEFAULT_MAXIMUM_CHUNK_SIZE -from database.attachment_db import get_file_stream +from consts.const import ( + RAY_ACTOR_NUM_CPUS, + REDIS_BACKEND_URL, + DEFAULT_EXPECTED_CHUNK_SIZE, + DEFAULT_MAXIMUM_CHUNK_SIZE, + TABLE_TRANSFORMER_MODEL_PATH, + UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH, +) +from database.attachment_db import build_s3_url, get_file_stream, upload_fileobj from database.model_management_db import get_model_by_model_id from nexent.data_process import DataProcessCore @@ -27,6 +36,60 @@ def __init__(self): f"Ray actor initialized using {RAY_ACTOR_NUM_CPUS} CPU cores...") self._processor = DataProcessCore() + def ping(self) -> bool: + """Lightweight health check used by prewarm logic.""" + return True + + def _prepare_process_params( + self, + task_id: Optional[str], + model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], + ) -> Dict[str, Any]: + """ + Normalize task/model-related processing params. + """ + process_params = dict(params) + self._apply_model_paths(process_params) + if task_id: + process_params["task_id"] = task_id + + # Reuse shared model param logic so we also keep extra fields + self._apply_model_chunk_sizes( + model_id=model_id, + tenant_id=tenant_id, + params=process_params, + ) + return process_params + + def _run_file_process( + self, + file_data: bytes, + filename: str, + chunking_strategy: str, + process_params: Dict[str, Any], + log_subject: str, + ) -> List[Dict[str, Any]]: + result = self._processor.file_process( + file_data=file_data, + filename=filename, + chunking_strategy=chunking_strategy, + **process_params + ) + + chunks, images_info = self._normalize_processor_result(result) + if images_info: + self._append_image_chunks( + source=filename, chunks=chunks, images_info=images_info) + chunks = self._validate_chunks(chunks, filename) + if not chunks: + return [] + + logger.info( + f"[RayActor] Processing done: produced {len(chunks)} chunks for {log_subject}='{filename}'") + return chunks + def process_file( self, source: str, @@ -54,54 +117,143 @@ def process_file( """ logger.info( f"[RayActor] Processing start: source='{source}', destination='{destination}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'") - - if task_id: - params['task_id'] = task_id - - # Get chunk size parameters from embedding model if model_id is provided - if model_id and tenant_id: - try: - # Get embedding model details directly by model_id - model_record = get_model_by_model_id( - model_id=model_id, tenant_id=tenant_id) - if model_record: - expected_chunk_size = model_record.get( - 'expected_chunk_size', DEFAULT_EXPECTED_CHUNK_SIZE) - maximum_chunk_size = model_record.get( - 'maximum_chunk_size', DEFAULT_MAXIMUM_CHUNK_SIZE) - model_name = model_record.get('display_name') - - # Pass chunk sizes to processing parameters - params['max_characters'] = maximum_chunk_size - params['new_after_n_chars'] = expected_chunk_size - - logger.info( - f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): " - f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}") - else: - logger.warning( - f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes") - except Exception as e: - logger.warning( - f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes") + process_params = self._prepare_process_params( + task_id=task_id, + model_id=model_id, + tenant_id=tenant_id, + params=params, + ) try: + fetch_start = time.perf_counter() file_stream = get_file_stream(source) if file_stream is None: raise FileNotFoundError( f"Unable to fetch file from URL: {source}") file_data = file_stream.read() + fetch_elapsed = time.perf_counter() - fetch_start + logger.info( + f"[RayActor] Fetch file bytes done: destination='{destination}', source='{source}', " + f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s") except Exception as e: logger.error(f"Failed to fetch file from {source}: {e}") raise - chunks = self._processor.file_process( + return self._run_file_process( file_data=file_data, filename=source, chunking_strategy=chunking_strategy, - **params - ) + process_params=process_params, + log_subject="source", + ) + + def _apply_model_paths(self, params: Dict[str, Any]) -> None: + params["table_transformer_model_path"] = TABLE_TRANSFORMER_MODEL_PATH + params[ + "unstructured_default_model_initialize_params_json_path" + ] = UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH + + def _apply_model_chunk_sizes( + self, + model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], + ) -> None: + if not (model_id and tenant_id): + return + + try: + model_record = get_model_by_model_id( + model_id=model_id, tenant_id=tenant_id) + if not model_record: + logger.warning( + f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes") + return + + expected_chunk_size = model_record.get( + 'expected_chunk_size', DEFAULT_EXPECTED_CHUNK_SIZE) + maximum_chunk_size = model_record.get( + 'maximum_chunk_size', DEFAULT_MAXIMUM_CHUNK_SIZE) + model_name = model_record.get('display_name') + model_type = model_record.get('model_type') + + params['max_characters'] = maximum_chunk_size + params['new_after_n_chars'] = expected_chunk_size + if model_type: + params['model_type'] = model_type + + logger.info( + f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): " + f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}") + except Exception as e: + logger.warning( + f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes") + + def _read_file_bytes(self, source: str) -> bytes: + try: + file_stream = get_file_stream(source) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {source}") + return file_stream.read() + except Exception as e: + logger.error(f"Failed to fetch file from {source}: {e}") + raise + + def _normalize_processor_result( + self, result: Any + ) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + if isinstance(result, tuple) and len(result) == 2: + chunks, images_info = result + return chunks or [], images_info or [] + return result or [], [] + def _append_image_chunks( + self, + source: str, + chunks: List[Dict[str, Any]], + images_info: List[Dict[str, Any]], + ) -> None: + folder = "images_in_attachments" + for index, image_data in enumerate(images_info): + if not isinstance(image_data, dict): + logger.warning( + f"[RayActor] Skipping image entry at index {index}: unexpected type {type(image_data)}" + ) + continue + if "image_bytes" not in image_data: + logger.warning( + f"[RayActor] Skipping image entry at index {index}: missing image_bytes" + ) + continue + + img_obj = BytesIO(image_data["image_bytes"]) + result = upload_fileobj( + file_obj=img_obj, + file_name=f"{index}.{image_data['image_format']}", + prefix=folder) + image_url = build_s3_url(result.get("object_name", "")) + + image_data["source_file"] = source + image_data["image_url"] = image_url + + chunks.append({ + "content": json.dumps({ + "source_file": source, + "position": image_data["position"], + "image_url": image_url, + }), + "filename": source, + "metadata": { + "chunk_index": len(chunks) + index, + "process_source": "UniversalImageExtractor", + "image_url": image_url, + } + }) + + def _validate_chunks( + self, chunks: Any, source: str + ) -> List[Dict[str, Any]]: if chunks is None: logger.warning( f"[RayActor] file_process returned None for source='{source}'") @@ -114,10 +266,97 @@ def process_file( logger.warning( f"[RayActor] file_process returned empty list for source='{source}'") return [] + return chunks + + def process_bytes( + self, + file_bytes: bytes, + filename: str, + chunking_strategy: str, + task_id: Optional[str] = None, + model_id: Optional[int] = None, + tenant_id: Optional[str] = None, + **params + ) -> List[Dict[str, Any]]: + """ + Process in-memory file bytes, auto-detecting its type using DataProcessCore.file_process. + """ + logger.info( + f"[RayActor] Processing bytes: filename='{filename}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'" + ) + process_params = self._prepare_process_params( + task_id=task_id, + model_id=model_id, + tenant_id=tenant_id, + params=params, + ) + + return self._run_file_process( + file_data=file_bytes, + filename=filename, + chunking_strategy=chunking_strategy, + process_params=process_params, + log_subject="filename", + ) + + def split_file( + self, + source: str, + destination: str, + task_id: Optional[str] = None, + max_size: int = 5 * 1024 * 1024, + file_data: Optional[bytes] = None, + **params + ) -> List[bytes]: + """ + Split file into parts using DataProcessCore.file_split and return raw bytes list. + """ + logger.info( + f"[RayActor] Splitting file: source='{source}', destination='{destination}', task_id='{task_id}', max_size={max_size}" + ) + + if file_data is None: + try: + fetch_start = time.perf_counter() + file_stream = get_file_stream(source) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {source}") + file_data = file_stream.read() + fetch_elapsed = time.perf_counter() - fetch_start + logger.info( + f"[RayActor] Fetch file bytes for split done: destination='{destination}', source='{source}', " + f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s") + except Exception as e: + logger.error(f"Failed to fetch file from {source}: {e}") + raise + + split_start = time.perf_counter() + parts = self._processor.file_split( + file_data=file_data, + filename=source, + max_size=max_size, + **params + ) + split_elapsed = time.perf_counter() - split_start + + if not parts: + logger.info( + f"[RayActor] Split done: destination='{destination}', source='{source}', " + f"parts=0, elapsed={split_elapsed:.3f}s") + return [] + + bytes_parts: List[bytes] = [] + for part in parts: + try: + bytes_parts.append(part.getvalue()) + except Exception: + continue logger.info( - f"[RayActor] Processing done: produced {len(chunks)} chunks for source='{source}'") - return chunks + f"[RayActor] Split done: destination='{destination}', source='{source}', " + f"parts={len(bytes_parts)}, elapsed={split_elapsed:.3f}s") + return bytes_parts def store_chunks_in_redis(self, redis_key: str, chunks: List[Dict[str, Any]]) -> bool: """ diff --git a/backend/data_process/tasks.py b/backend/data_process/tasks.py index 50414b711..4dd6edd69 100644 --- a/backend/data_process/tasks.py +++ b/backend/data_process/tasks.py @@ -4,32 +4,185 @@ import asyncio import json import logging +import math import os import threading import time -from typing import Any, Dict, Optional +from dataclasses import dataclass +from typing import Any, Dict, Optional, List, Tuple import aiohttp +import requests import re import ray -from celery import Task, chain, states +from celery import Task, chain, states, group, chord from celery.exceptions import Retry +from celery.result import allow_join_result -from consts.const import ELASTICSEARCH_SERVICE from utils.file_management_utils import get_file_size +from database.attachment_db import get_file_stream +from database.knowledge_db import get_knowledge_record from services.redis_service import get_redis_service from .app import app from .ray_actors import DataProcessorRayActor from consts.const import ( + ELASTICSEARCH_SERVICE, REDIS_BACKEND_URL, FORWARD_REDIS_RETRY_DELAY_S, FORWARD_REDIS_RETRY_MAX, + DP_REDIS_CHUNKS_WAIT_TIMEOUT_S, + DP_REDIS_CHUNKS_POLL_INTERVAL_MS, + RAY_ACTOR_NUM_CPUS, + RAY_NUM_CPUS, DISABLE_RAY_DASHBOARD, ROOT_DIR, + PER_WAVE_TIMEOUT, + MAX_TIMEOUT, + RAY_GLOBAL_ACTOR_POOL_SIZE, + RAY_ACTOR_WARM_TIMEOUT_S, + RAY_GLOBAL_ACTOR_POOL_NAME, + RAY_GLOBAL_ACTOR_POOL_NAMESPACE ) logger = logging.getLogger("data_process.tasks") +ASYNC_SPLIT_RETRY_MAX = max( + FORWARD_REDIS_RETRY_MAX * 5, FORWARD_REDIS_RETRY_MAX) +FORWARD_ES_CHUNK_BATCH_SIZE = 64 +IMAGE_METADATA_PROCESS_SOURCE = "UniversalImageExtractor" + + +def _wait_for_split_ready(redis_key: str, timeout_s: int, poll_interval_ms: int) -> int: + """ + Wait until async split aggregation is marked ready in Redis. + Returns aggregated chunk count. + Raises TimeoutError on timeout. + """ + if not REDIS_BACKEND_URL: + raise RuntimeError("REDIS_BACKEND_URL not configured") + + import redis + + client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True) + ready_key = f"{redis_key}:ready" + deadline = time.time() + timeout_s + + while time.time() < deadline: + if client.get(ready_key): + cached = client.get(redis_key) + if cached: + try: + chunks = json.loads(cached) + return len(chunks) if isinstance(chunks, list) else 0 + except Exception: + return 0 + return 0 + time.sleep(max(0.01, poll_interval_ms / 1000.0)) + + raise TimeoutError( + f"Timed out waiting for async split aggregation at key '{ready_key}' after {timeout_s}s" + ) + + +def _estimate_parallel_parts() -> int: + try: + total_cpus = RAY_NUM_CPUS + except Exception: + total_cpus = os.cpu_count() or 1 + actor_cpus = max(1, int(RAY_ACTOR_NUM_CPUS)) + return max(1, total_cpus // actor_cpus) + + +def _compute_split_wait_timeout(parts_count: int) -> int: + base_timeout = DP_REDIS_CHUNKS_WAIT_TIMEOUT_S + waves = math.ceil(max(1, parts_count) / _estimate_parallel_parts()) + dynamic_timeout = base_timeout + \ + max(0, waves - 1) * max(1, PER_WAVE_TIMEOUT) + return min(MAX_TIMEOUT, max(base_timeout, dynamic_timeout)) + + +def _count_image_metadata_chunks(chunks: Optional[List[Dict[str, Any]]]) -> int: + if not chunks: + return 0 + return sum( + 1 + for chunk in chunks + if isinstance(chunk, dict) and chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ) + + +def _get_next_available_batch_index( + batches: List[List[Dict[str, Any]]], + start_idx: int, + batch_size: int, +) -> int: + total_batches = len(batches) + idx = start_idx + for _ in range(total_batches): + if len(batches[idx]) < batch_size: + return idx + idx = (idx + 1) % total_batches + raise RuntimeError("No available batch capacity") + + +def _distribute_chunks_round_robin( + batches: List[List[Dict[str, Any]]], + chunks: List[Dict[str, Any]], + batch_size: int, + error_context: str, +) -> None: + idx = 0 + for chunk in chunks: + try: + idx = _get_next_available_batch_index(batches, idx, batch_size) + except RuntimeError as exc: + raise RuntimeError( + f"No available batch capacity while distributing {error_context}" + ) from exc + batches[idx].append(chunk) + idx = (idx + 1) % len(batches) + + +def _build_balanced_batches( + formatted_chunks: List[Dict[str, Any]], + batch_size: int = FORWARD_ES_CHUNK_BATCH_SIZE, +) -> List[List[Dict[str, Any]]]: + """ + Split chunks into max-size batches and spread image-metadata chunks evenly. + """ + total = len(formatted_chunks) + if total == 0: + return [] + if total <= batch_size: + return [formatted_chunks] + + total_batches = math.ceil(total / batch_size) + image_chunks = [ + chunk for chunk in formatted_chunks + if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ] + text_chunks = [ + chunk for chunk in formatted_chunks + if chunk.get("process_source") != IMAGE_METADATA_PROCESS_SOURCE + ] + + batches: List[List[Dict[str, Any]]] = [[] for _ in range(total_batches)] + + _distribute_chunks_round_robin( + batches=batches, + chunks=image_chunks, + batch_size=batch_size, + error_context="image metadata chunks", + ) + _distribute_chunks_round_robin( + batches=batches, + chunks=text_chunks, + batch_size=batch_size, + error_context="text chunks", + ) + + return batches + # Thread lock for initializing Ray to prevent race conditions ray_init_lock = threading.Lock() @@ -179,21 +332,489 @@ def run_in_thread(): raise -# Initialize the data processing core LAZILY -# This will be initialized on first task run by a worker process -def get_ray_actor() -> Any: +def _delete_source_file_via_http_sync( + *, + base_url: str, + index_name: str, + path_or_url: str, + scope: str, + timeout_s: float = 30.0, +) -> Dict[str, Any]: + base = (base_url or "").rstrip("/") + if not base: + raise RuntimeError("ELASTICSEARCH_SERVICE is not configured") + url = f"{base}/indices/{index_name}/documents" + params = {"path_or_url": path_or_url, "scope": scope} + + resp = requests.delete(url, params=params, timeout=timeout_s) + body_text = getattr(resp, "text", "") + parsed = None + try: + parsed = resp.json() + except Exception: + parsed = _parse_json_or_none(body_text) if body_text else None + + return { + "http_status": getattr(resp, "status_code", None), + "response_json": parsed if isinstance(parsed, dict) else None, + "response_text": body_text if not isinstance(parsed, dict) else None, + } + + +def _build_forward_error( + message: str, + index_name: str, + source: Optional[str], + original_filename: Optional[str], +) -> Exception: + return Exception(json.dumps({ + "message": message, + "index_name": index_name, + "task_name": "forward", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + +def _parse_json_or_none(text: str) -> Optional[Dict[str, Any]]: + try: + parsed = json.loads(text) + return parsed if isinstance(parsed, dict) else None + except Exception: + return None + + +@dataclass(frozen=True) +class _ForwardContext: + task_id: str + request_id: str + start_time: float + source: str + index_name: str + source_type: str + original_filename: Optional[str] + + +def _init_forward_context( + *, + task_id: str, + request_id: str, + start_time: float, + source: str, + index_name: str, + source_type: str, + original_filename: Optional[str], +) -> _ForwardContext: + return _ForwardContext( + task_id=task_id, + request_id=request_id, + start_time=start_time, + source=source, + index_name=index_name, + source_type=source_type, + original_filename=original_filename, + ) + + +def _is_forward_task_cancelled(ctx: _ForwardContext) -> bool: + try: + redis_service = get_redis_service() + return bool(redis_service.is_task_cancelled(ctx.task_id)) + except Exception as exc: + logger.warning( + f"[{ctx.request_id}] FORWARD TASK: Failed to check cancellation flag for task {ctx.task_id}: " + f"{exc}" + ) + return False + + +def _build_forward_cancelled_result(ctx: _ForwardContext) -> Dict[str, Any]: + return { + 'task_id': ctx.task_id, + 'source': ctx.source, + 'index_name': ctx.index_name, + 'original_filename': ctx.original_filename, + 'chunks_stored': 0, + 'storage_time': 0, + 'es_result': { + "success": False, + "message": "Indexing cancelled because document was deleted.", + "total_indexed": 0, + "total_submitted": 0, + }, + } + + +def _load_forward_chunks( + self: Task, + *, + processed_data: Dict[str, Any], + original_source: str, + original_index_name: str, + filename: Optional[str], +) -> Tuple[Optional[List[Dict[str, Any]]], bool, str, str, Optional[str]]: + chunks = processed_data.get('chunks') + split_async = bool(processed_data.get('split_async')) + + # If chunks are not in payload, try loading from Redis via the redis_key + if (not chunks) and processed_data.get('redis_key'): + redis_key = processed_data.get('redis_key') + if not REDIS_BACKEND_URL: + raise Exception(json.dumps({ + "message": "REDIS_BACKEND_URL not configured to retrieve chunks", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + try: + import redis + client = redis.Redis.from_url( + REDIS_BACKEND_URL, decode_responses=True) + ready_key = f"{redis_key}:ready" + if split_async: + ready_flag = client.get(ready_key) + if not ready_flag: + retry_num = getattr(self.request, 'retries', 0) + logger.info( + f"[{self.request.id}] FORWARD TASK: Async split not ready for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=ASYNC_SPLIT_RETRY_MAX, + exc=Exception(json.dumps({ + "message": "Async split not ready; will retry", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + ) + cached = client.get(redis_key) + if cached: + try: + logger.debug( + f"[{self.request.id}] FORWARD TASK: Retrieved Redis key '{redis_key}', payload_length={len(cached)}") + chunks = json.loads(cached) + except json.JSONDecodeError as jde: + # Log raw prefix to help diagnose incorrect writes + raw_preview = cached[:120] if isinstance( + cached, str) else str(type(cached)) + logger.error( + f"[{self.request.id}] FORWARD TASK: JSON decode error for key '{redis_key}': {str(jde)}; raw_prefix={raw_preview!r}") + raise + else: + if split_async: + retry_num = getattr(self.request, 'retries', 0) + logger.info( + f"[{self.request.id}] FORWARD TASK: Async split ready but chunks missing for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=ASYNC_SPLIT_RETRY_MAX, + exc=Exception(json.dumps({ + "message": "Async split ready but chunks missing; will retry", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + ) + # No busy-wait: release the worker slot and retry later + retry_num = getattr(self.request, 'retries', 0) + logger.info( + f"[{self.request.id}] FORWARD TASK: Chunks not yet available for key {redis_key}. Retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=FORWARD_REDIS_RETRY_MAX, + exc=Exception(json.dumps({ + "message": "Chunks not ready in Redis; will retry", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + ) + except Retry: + raise + except Exception as exc: + raise Exception(json.dumps({ + "message": f"Failed to retrieve chunks from Redis: {str(exc)}", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + + if processed_data.get('source'): + original_source = processed_data.get('source') + if processed_data.get('index_name'): + original_index_name = processed_data.get('index_name') + if processed_data.get('original_filename'): + filename = processed_data.get('original_filename') + + logger.info( + f"[{self.request.id}] FORWARD TASK: Received data for source '{original_source}' with {len(chunks) if chunks else 'None'} chunks") + + if chunks is None: + raise Exception(json.dumps({ + "message": "No chunks received for forwarding", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + if len(chunks) == 0: + if split_async and processed_data.get('redis_key'): + retry_num = getattr(self.request, 'retries', 0) + logger.info( + f"[{self.request.id}] FORWARD TASK: Empty chunks while waiting for async split. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=ASYNC_SPLIT_RETRY_MAX, + exc=Exception(json.dumps({ + "message": "Chunks not ready in Redis (empty); will retry", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + ) + logger.warning( + f"[{self.request.id}] FORWARD TASK: Empty chunks list received for source {original_source}") + + return chunks, split_async, original_source, original_index_name, filename + + +def _extract_error_code_from_es_response( + parsed_body: Optional[Dict[str, Any]], + text: str, +) -> Optional[str]: + error_code = None + if isinstance(parsed_body, dict): + error_code = parsed_body.get("error_code") + detail = parsed_body.get("detail") + if isinstance(detail, dict) and detail.get("error_code"): + error_code = detail.get("error_code") + elif isinstance(detail, str): + parsed_detail = _parse_json_or_none(detail) + if isinstance(parsed_detail, dict): + error_code = parsed_detail.get("error_code", error_code) + + if error_code: + return error_code + + try: + match = re.search( + r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text) + return match.group(1) if match else None + except Exception: + return None + + +def _send_chunks_to_es( + chunks: List[Dict[str, Any]], + index_name: str, + authorization: str | None, + task_id: Optional[str] = None, + source: str = "", + original_filename: str = "", + large_mode: bool = False, +) -> Dict[str, Any]: + async def _post(): + elasticsearch_url = ELASTICSEARCH_SERVICE + if not elasticsearch_url: + raise _build_forward_error( + message="ELASTICSEARCH_SERVICE env is not set", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + route_url = f"/indices/{index_name}/documents" + full_url = elasticsearch_url + route_url + headers = {"Content-Type": "application/json"} + if authorization: + headers["Authorization"] = authorization + if task_id: + headers["X-Task-Id"] = task_id + try: + connector = aiohttp.TCPConnector(verify_ssl=False) + timeout = aiohttp.ClientTimeout(total=600) + + request_params: Dict[str, str] = {} + + if large_mode: + request_params["large_mode"] = "true" + + async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session: + async with session.post( + full_url, + headers=headers, + json=chunks, + params=request_params, + raise_for_status=False + ) as response: + text = await response.text() + status = response.status + parsed_body = _parse_json_or_none(text) + + if status >= 400: + error_code = _extract_error_code_from_es_response( + parsed_body, text) + if error_code: + raise Exception(json.dumps({ + "error_code": error_code + }, ensure_ascii=False)) + + raise Exception( + f"ElasticSearch service returned HTTP {status}") + + result = parsed_body if isinstance(parsed_body, dict) else await response.json() + return result + + except aiohttp.ClientConnectorError as e: + logger.error( + f"[{task_id}] FORWARD TASK: Connection error to {full_url}: {str(e)}") + raise _build_forward_error( + message=f"Failed to connect to API: {str(e)}", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + except asyncio.TimeoutError as e: + logger.warning( + f"[{task_id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.") + raise _build_forward_error( + message=f"Timeout when indexing documents: {str(e)}", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + except Exception as e: + logger.error( + f"[{task_id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.") + raise _build_forward_error( + message=f"Unexpected error when indexing documents: {str(e)}", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + + return run_async(_post()) + + +@ray.remote(num_cpus=0) +class GlobalRayActorPoolManager: """ - Creates a new, anonymous DataProcessorRayActor instance for each call. - This allows for parallel execution of data processing tasks, with each - task running in its own actor. + Cluster-wide shared actor pool manager. + A single detached manager serves all Celery worker processes. """ + + def __init__(self, warm_timeout_s: float): + self.warm_timeout_s = warm_timeout_s + self.actors: List[Any] = [] + self.rr_index = 0 + + def _create_and_warm_actor(self) -> Optional[Any]: + actor = DataProcessorRayActor.remote() + try: + ray.get(actor.ping.remote(), timeout=self.warm_timeout_s) + return actor + except Exception as exc: + try: + ray.kill(actor, no_restart=True) + except Exception: + pass + logger.warning( + f"[GlobalRayActorPoolManager] Warm actor failed in {self.warm_timeout_s:.1f}s: {exc}" + ) + return None + + def ensure_pool(self, desired: int, max_allowed: int) -> int: + desired = max(0, int(desired)) + max_allowed = max(1, int(max_allowed)) + desired = min(desired, max_allowed) + missing = max(0, desired - len(self.actors)) + for _ in range(missing): + actor = self._create_and_warm_actor() + if actor is not None: + self.actors.append(actor) + return len(self.actors) + + def get_actor(self) -> Any: + if not self.actors: + actor = self._create_and_warm_actor() + if actor is None: + raise RuntimeError( + "Global actor pool is empty and actor warm-up failed") + self.actors.append(actor) + idx = self.rr_index % len(self.actors) + self.rr_index += 1 + return self.actors[idx] + + +def _get_or_create_global_pool_manager() -> Any: with ray_init_lock: init_ray_in_worker() - actor = DataProcessorRayActor.remote() - logger.debug( - "Successfully created a new DataProcessorRayActor for a task.") - return actor + # Prefer atomic get/create when supported. + try: + return GlobalRayActorPoolManager.options( + name=RAY_GLOBAL_ACTOR_POOL_NAME, + namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE, + lifetime="detached", + get_if_exists=True, + ).remote(RAY_ACTOR_WARM_TIMEOUT_S) + except TypeError: + pass + + try: + return ray.get_actor( + RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE) + except Exception: + pass + + try: + return GlobalRayActorPoolManager.options( + name=RAY_GLOBAL_ACTOR_POOL_NAME, + namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE, + lifetime="detached", + ).remote(RAY_ACTOR_WARM_TIMEOUT_S) + except Exception: + # Name race: another worker may have created it in the meantime. + return ray.get_actor( + RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE) + + +def prewarm_ray_actors(target_size: Optional[int] = None) -> int: + """ + Ensure a global shared pool of warm Ray actors exists for low-latency task execution. + """ + desired = RAY_GLOBAL_ACTOR_POOL_SIZE if target_size is None else max( + 0, int(target_size)) + manager = _get_or_create_global_pool_manager() + current_after = ray.get( + manager.ensure_pool.remote( + desired=desired, max_allowed=_estimate_parallel_parts()) + ) + logger.info( + f"Global Ray actor pool ready: current={current_after}, desired={desired}" + ) + return current_after + + +def get_ray_actor() -> Any: + """ + Return a warm actor from the global shared pool with round-robin selection. + """ + manager = _get_or_create_global_pool_manager() + return ray.get(manager.get_actor.remote()) + + +def _get_split_actor() -> Any: + """ + Reuse warm DataProcessorRayActor instances for split operations. + This keeps split path aligned with prewarmed actor pool. + """ + return get_ray_actor() class LoggingTask(Task): @@ -221,6 +842,473 @@ def on_retry(self, exc, task_id, args, kwargs, einfo): return super().on_retry(exc, task_id, args, kwargs, einfo) +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_part', queue='process_part_q') +def process_part( + self, + part_bytes: bytes, + filename: str, + chunking_strategy: str, + part_redis_key: str, + source: Optional[str] = None, + source_type: Optional[str] = None, + model_id: Optional[int] = None, + tenant_id: Optional[str] = None, + **params +) -> Dict[str, Any]: + """ + Hidden sub-task to process a file part with Ray. + """ + actor = get_ray_actor() + try: + chunks_ref = actor.process_bytes.remote( + part_bytes, + filename, + chunking_strategy, + task_id=None, + model_id=model_id, + tenant_id=tenant_id, + **params + ) + chunks = ray.get(chunks_ref) or [] + + if not REDIS_BACKEND_URL: + raise RuntimeError("REDIS_BACKEND_URL not configured") + + import redis + client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True) + client.set(part_redis_key, json.dumps(chunks, ensure_ascii=False)) + client.expire(part_redis_key, 2 * 60 * 60) + + return { + "part_redis_key": part_redis_key, + "chunks_count": len(chunks), + } + except Exception as e: + logger.error( + f"[process_part] Failed to process part for '{filename}': {str(e)}") + return { + "part_redis_key": part_redis_key, + "chunks_count": 0, + } + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_parts', queue='process_part_q') +def aggregate_parts( + self, + parts_results: List[List[Dict[str, Any]]], + source: Optional[str] = None, + index_name: Optional[str] = None, + original_filename: Optional[str] = None +) -> Dict[str, Any]: + """ + Hidden sub-task to aggregate part chunks. + """ + merged: List[Dict[str, Any]] = [] + for part_chunks in parts_results or []: + if part_chunks: + merged.extend(part_chunks) + return { + "chunks": merged, + "source": source, + "index_name": index_name, + "original_filename": original_filename + } + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_store_chunks', queue='process_part_q') +def aggregate_store_chunks( + self, + parts_results: List[Dict[str, Any]], + redis_key: str, + source: Optional[str] = None, + index_name: Optional[str] = None, + original_filename: Optional[str] = None +) -> Dict[str, Any]: + """ + Hidden sub-task to aggregate part chunks and store into Redis for forward task. + """ + if not REDIS_BACKEND_URL: + raise Exception(json.dumps({ + "message": "REDIS_BACKEND_URL not configured to store chunks", + "index_name": index_name, + "task_name": "process", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + try: + import redis + client = redis.Redis.from_url( + REDIS_BACKEND_URL, decode_responses=True) + + merged: List[Dict[str, Any]] = [] + for part_result in parts_results or []: + part_key = (part_result or {}).get("part_redis_key") + if not part_key: + continue + cached = client.get(part_key) + if not cached: + continue + try: + part_chunks = json.loads(cached) + if isinstance(part_chunks, list): + merged.extend(part_chunks) + except Exception: + continue + # best-effort cleanup for part payload key + try: + client.delete(part_key) + except Exception: + pass + + serialized = json.dumps(merged, ensure_ascii=False) + client.set(redis_key, serialized) + client.expire(redis_key, 2 * 60 * 60) + ready_key = f"{redis_key}:ready" + client.set(ready_key, "1") + client.expire(ready_key, 2 * 60 * 60) + logger.info( + f"[{self.request.id}] PROCESS TASK: Stored aggregated chunks in Redis at key '{redis_key}', count={len(merged)}") + except Exception as exc: + raise Exception(json.dumps({ + "message": f"Failed to store chunks to Redis: {str(exc)}", + "index_name": index_name, + "task_name": "process", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + return { + "chunks_count": len(merged), + "redis_key": redis_key, + "source": source, + "index_name": index_name, + "original_filename": original_filename + } + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.forward_part', queue='forward_q') +def forward_part( + self, + chunks: List[Dict[str, Any]], + index_name: str, + authorization: Optional[str] = None, + parent_task_id: Optional[str] = None, + parent_total_chunks: Optional[int] = None, + source: Optional[str] = None, + original_filename: Optional[str] = None, + batch_index: Optional[int] = None, + total_batches: Optional[int] = None, + large_mode: Optional[bool] = False, +) -> Dict[str, Any]: + """ + Forward sub-task that indexes a chunk batch. + """ + try: + # Respect cancellation from parent task if available + if parent_task_id: + try: + redis_service = get_redis_service() + if redis_service.is_task_cancelled(parent_task_id): + raise RuntimeError( + f"Parent task {parent_task_id} marked as cancelled") + except Exception: + pass + + es_result = _send_chunks_to_es( + chunks=chunks, + index_name=index_name, + authorization=authorization, + task_id=None, + source=source, + original_filename=original_filename, + large_mode=large_mode, + ) + + if not isinstance(es_result, dict) or not es_result.get("success"): + error_message = es_result.get( + "message", "Unknown error from main_server") if isinstance(es_result, dict) else "Unknown error" + raise Exception(json.dumps({ + "message": f"main_server API error: {error_message}", + "index_name": index_name, + "task_name": "forward_part", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + # Update parent task progress per finished batch so frontend can show real-time indexing count. + if parent_task_id: + try: + processed_delta = int(es_result.get("total_indexed", 0) or 0) + redis_service = get_redis_service() + redis_service.increment_progress_info( + task_id=parent_task_id, + delta_processed=processed_delta, + total_chunks=parent_total_chunks, + ) + except Exception as progress_exc: + logger.warning( + f"[{self.request.id}] FORWARD PART: Failed to update parent progress " + f"for task {parent_task_id}: {progress_exc}" + ) + + return { + "success": True, + "total_indexed": es_result.get("total_indexed", 0), + "total_submitted": es_result.get("total_submitted", len(chunks)), + "batch_index": batch_index, + "total_batches": total_batches, + } + except Exception as e: + retry_num = getattr(self.request, 'retries', 0) + logger.warning( + f"[{self.request.id}] FORWARD PART: Failed batch {batch_index}/{total_batches} " + f"(retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX}): {str(e)}" + ) + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=FORWARD_REDIS_RETRY_MAX, + exc=e + ) + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_forward_parts', queue='forward_q') +def aggregate_forward_parts( + self, + parts_results: List[Dict[str, Any]], + source: Optional[str] = None, + index_name: Optional[str] = None, + original_filename: Optional[str] = None +) -> Dict[str, Any]: + """ + Aggregate forward_part results. + """ + total_indexed = 0 + total_submitted = 0 + for result in parts_results or []: + if not result: + continue + total_indexed += int(result.get("total_indexed", 0) or 0) + total_submitted += int(result.get("total_submitted", 0) or 0) + + return { + "success": True, + "total_indexed": total_indexed, + "total_submitted": total_submitted, + "source": source, + "index_name": index_name, + "original_filename": original_filename + } + + +def _split_file_for_processing( + request_id: str, + source: str, + source_type: str, + task_id: str, + params: Dict[str, Any], + file_data: Optional[bytes] = None, +) -> List[bytes]: + max_size = 5 * 1024 * 1024 + params.pop("max_size", None) + logger.info( + f"[{request_id}] PROCESS TASK: Splitting file before processing (max_size={max_size})") + + split_actor_get_start = time.perf_counter() + split_actor = _get_split_actor() + split_actor_get_elapsed = time.perf_counter() - split_actor_get_start + logger.info( + f"[{request_id}] PROCESS TASK: split actor ready in {split_actor_get_elapsed:.3f}s") + + split_call_start = time.perf_counter() + split_kwargs = { + "source": source, + "destination": source_type, + "task_id": task_id, + "max_size": max_size, + **params, + } + if file_data is not None: + split_kwargs["file_data"] = file_data + + parts_ref = split_actor.split_file.remote(**split_kwargs) + parts = ray.get(parts_ref) + split_call_elapsed = time.perf_counter() - split_call_start + logger.info( + f"[{request_id}] PROCESS TASK: split_file RPC done in {split_call_elapsed:.3f}s " + f"(source_type={source_type})") + + if parts: + part_sizes = [len(p) for p in parts] + total_bytes = sum(part_sizes) + min_size = min(part_sizes) + max_part_size = max(part_sizes) + avg_size = total_bytes / len(part_sizes) + logger.info( + f"[{request_id}] PROCESS TASK: Split stats: parts={len(part_sizes)}, " + f"total={total_bytes/1024/1024:.2f}MB, " + f"min={min_size/1024:.2f}KB, max={max_part_size/1024:.2f}KB, avg={avg_size/1024:.2f}KB") + + return parts + + +def _run_processing_for_parts( + request_id: str, + source: str, + source_type: str, + task_id: str, + chunking_strategy: str, + filename_for_processing: str, + parts: List[bytes], + index_name: Optional[str], + original_filename: Optional[str], + embedding_model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], +) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]: + if not parts: + logger.warning( + f"[{request_id}] PROCESS TASK: Split returned no parts; fallback to full-file processing") + process_actor = get_ray_actor() + chunks_ref = process_actor.process_file.remote( + source, + chunking_strategy, + destination=source_type, + task_id=task_id, + model_id=embedding_model_id, + tenant_id=tenant_id, + **params + ) + logger.info( + f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...") + return False, ray.get(chunks_ref), None + + if len(parts) == 1: + process_actor = get_ray_actor() + chunks_ref = process_actor.process_bytes.remote( + parts[0], + filename_for_processing, + chunking_strategy, + task_id=None, + model_id=embedding_model_id, + tenant_id=tenant_id, + **params + ) + logger.info( + f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...") + return False, ray.get(chunks_ref), None + + redis_key = f"dp:{task_id}:chunks" + group_tasks = group( + process_part.s( + part_bytes=part, + filename=filename_for_processing, + chunking_strategy=chunking_strategy, + part_redis_key=f"dp:{task_id}:part:{idx}", + source=source, + source_type=source_type, + model_id=embedding_model_id, + tenant_id=tenant_id, + **params + ) for idx, part in enumerate(parts) + ) + callback = aggregate_store_chunks.s( + redis_key=redis_key, + source=source, + index_name=index_name, + original_filename=original_filename + ).set(queue='process_part_q') + logger.info( + f"[{request_id}] PROCESS TASK: Dispatching {len(parts)} part tasks...") + chord(group_tasks)(callback) + + split_wait_timeout = _compute_split_wait_timeout(len(parts)) + logger.info( + f"[{request_id}] PROCESS TASK: Waiting split aggregation, timeout={split_wait_timeout}s, " + f"parts={len(parts)}, est_parallel={_estimate_parallel_parts()}") + split_chunk_count = _wait_for_split_ready( + redis_key=redis_key, + timeout_s=split_wait_timeout, + poll_interval_ms=DP_REDIS_CHUNKS_POLL_INTERVAL_MS, + ) + return True, None, split_chunk_count + + +def _process_source_with_split( + request_id: str, + source: str, + source_type: str, + task_id: str, + chunking_strategy: str, + index_name: Optional[str], + original_filename: Optional[str], + embedding_model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], + file_data: Optional[bytes] = None, +) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]: + parts = _split_file_for_processing( + request_id=request_id, + source=source, + source_type=source_type, + task_id=task_id, + params=params, + file_data=file_data, + ) + filename_for_processing = original_filename or os.path.basename(source) + split_async, chunks, split_chunk_count = _run_processing_for_parts( + request_id=request_id, + source=source, + source_type=source_type, + task_id=task_id, + chunking_strategy=chunking_strategy, + filename_for_processing=filename_for_processing, + parts=parts, + index_name=index_name, + original_filename=original_filename, + embedding_model_id=embedding_model_id, + tenant_id=tenant_id, + params=params, + ) + + if split_async: + logger.info( + f"[{request_id}] PROCESS TASK: Async split finished with {split_chunk_count or 0} chunks") + else: + logger.info( + f"[{request_id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks") + + if not split_async: + redis_key = f"dp:{task_id}:chunks" + process_actor = get_ray_actor() + process_actor.store_chunks_in_redis.remote(redis_key, chunks) + logger.info( + f"[{request_id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'") + + return split_async, chunks, split_chunk_count + + +def _build_no_valid_chunks_error( + split_async: bool, + index_name: Optional[str], + source: str, + original_filename: Optional[str], +) -> Exception: + message = ( + "Async split completed but produced 0 chunks" + if split_async else + "Ray processing completed but produced 0 chunks" + ) + return Exception(json.dumps({ + "message": message, + "index_name": index_name, + "task_name": "process", + "source": source, + "original_filename": original_filename, + "error_code": "no_valid_chunks" + }, ensure_ascii=False)) + + @app.task(bind=True, base=LoggingTask, name='data_process.tasks.process', queue='process_q') def process( self, @@ -248,6 +1336,7 @@ def process( """ start_time = time.time() task_id = self.request.id + # _warn_if_queue_mismatch("PROCESS TASK", "process_q", self.request) logger.info( f"[{self.request.id}] PROCESS TASK: source_type: {source_type}") @@ -264,51 +1353,39 @@ def process( 'stage': 'extracting_text' } ) - # Get the data processor instance - actor = get_ray_actor() - try: # Process the file based on the source type file_size_mb = 0 + split_chunk_count = None + image_metadata_chunk_count = 0 + elapsed_time = 0.0 + chunks: Optional[List[Dict[str, Any]]] = None + split_async = False + if source_type == "local": # Check file existence and size for optimization if not os.path.exists(source): raise FileNotFoundError(f"File does not exist: {source}") file_size = os.path.getsize(source) - file_size_mb = file_size / (1024 * 1024) + file_size_mb = file_size / (5 * 1024 * 1024) logger.info( f"[{self.request.id}] PROCESS TASK: File size: {file_size_mb:.2f}MB") - # The unified actor call, mapping 'file' source_type to 'local' destination - # Submit Ray work and WAIT for processing to complete - logger.info( - f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for source='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}") - chunks_ref = actor.process_file.remote( - source, - chunking_strategy, - destination=source_type, + split_async, chunks, split_chunk_count = _process_source_with_split( + request_id=self.request.id, + source=source, + source_type=source_type, task_id=task_id, - model_id=embedding_model_id, + chunking_strategy=chunking_strategy, + index_name=index_name, + original_filename=original_filename, + embedding_model_id=embedding_model_id, tenant_id=tenant_id, - **params + params=params, ) - # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state) - logger.info( - f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...") - chunks = ray.get(chunks_ref) - logger.info( - f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks") - - # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task) - redis_key = f"dp:{task_id}:chunks" - actor.store_chunks_in_redis.remote(redis_key, chunks) - logger.info( - f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'") - - end_time = time.time() - elapsed_time = end_time - start_time + elapsed_time = time.time() - start_time processing_speed = file_size_mb / \ elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0 logger.info( @@ -318,33 +1395,32 @@ def process( logger.info( f"[{self.request.id}] PROCESS TASK: Processing from URL: {source}") - # For URL source, core.py expects a non-local destination to trigger URL fetching + # Measure MinIO fetch time in process worker logs for observability + fetch_start = time.perf_counter() + file_stream = get_file_stream(source) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {source}") + file_data = file_stream.read() + fetch_elapsed = time.perf_counter() - fetch_start logger.info( - f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for URL='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}") - chunks_ref = actor.process_file.remote( - source, - chunking_strategy, - destination=source_type, + f"[{self.request.id}] PROCESS TASK: MinIO fetch done in {fetch_elapsed:.3f}s, " + f"bytes={len(file_data)}") + + split_async, chunks, split_chunk_count = _process_source_with_split( + request_id=self.request.id, + source=source, + source_type=source_type, task_id=task_id, - model_id=embedding_model_id, + chunking_strategy=chunking_strategy, + index_name=index_name, + original_filename=original_filename, + embedding_model_id=embedding_model_id, tenant_id=tenant_id, - **params + params=params, + file_data=file_data, ) - # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state) - logger.info( - f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...") - chunks = ray.get(chunks_ref) - logger.info( - f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks") - - # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task) - redis_key = f"dp:{task_id}:chunks" - actor.store_chunks_in_redis.remote(redis_key, chunks) - logger.info( - f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'") - - end_time = time.time() - elapsed_time = end_time - start_time + elapsed_time = time.time() - start_time logger.info( f"[{self.request.id}] PROCESS TASK: URL processing completed in {elapsed_time:.2f}s") @@ -353,23 +1429,52 @@ def process( raise NotImplementedError( f"Source type '{source_type}' not yet supported") - chunk_count = len(chunks) if chunks else 0 - if chunk_count == 0: - raise Exception(json.dumps({ - "message": "Ray processing completed but produced 0 chunks", - "index_name": index_name, - "task_name": "process", - "source": source, - "original_filename": original_filename, - "error_code": "no_valid_chunks" - }, ensure_ascii=False)) + if split_async: + chunk_count = split_chunk_count or 0 + if chunk_count == 0: + raise _build_no_valid_chunks_error( + split_async=True, + index_name=index_name, + source=source, + original_filename=original_filename, + ) + # For async split, chunks are persisted in Redis; count image-metadata chunks from cached payload. + try: + if REDIS_BACKEND_URL: + import redis + redis_key = f"dp:{task_id}:chunks" + client = redis.Redis.from_url( + REDIS_BACKEND_URL, decode_responses=True) + cached = client.get(redis_key) + if cached: + cached_chunks = json.loads(cached) + if isinstance(cached_chunks, list): + image_metadata_chunk_count = _count_image_metadata_chunks( + cached_chunks) + except Exception as image_count_exc: + logger.warning( + f"[{self.request.id}] PROCESS TASK: Failed counting image metadata chunks for async split: {image_count_exc}") + else: + chunk_count = len(chunks) if chunks else 0 + if chunk_count == 0: + raise _build_no_valid_chunks_error( + split_async=False, + index_name=index_name, + source=source, + original_filename=original_filename, + ) + image_metadata_chunk_count = _count_image_metadata_chunks(chunks) + + logger.info( + f"[{self.request.id}] PROCESS TASK: Chunk composition: total={chunk_count}, " + f"image_metadata={image_metadata_chunk_count}, text={max(0, chunk_count - image_metadata_chunk_count)}") # Update task state to SUCCESS after Ray processing completes # This transitions from STARTED (PROCESSING) to SUCCESS (WAIT_FOR_FORWARDING) self.update_state( state=states.SUCCESS, meta={ - 'chunks_count': len(chunks) if chunks else 0, + 'chunks_count': chunk_count, 'processing_time': elapsed_time, 'source': source, 'index_name': index_name, @@ -391,7 +1496,9 @@ def process( 'source': source, 'index_name': index_name, 'original_filename': original_filename, - 'task_id': task_id + 'task_id': task_id, + 'split_async': split_async, + 'image_metadata_chunk_count': image_metadata_chunk_count, } return returned_data @@ -537,122 +1644,46 @@ def forward( """ start_time = time.time() task_id = self.request.id + # _warn_if_queue_mismatch("FORWARD TASK", "forward_q", self.request) original_source = source original_index_name = index_name filename = original_filename try: - # Before doing any heavy work, check whether this task has been - # explicitly cancelled (for example, because the user deleted the - # document from the knowledge base configuration page). - try: - redis_service = get_redis_service() - if redis_service.is_task_cancelled(task_id): - logger.info( - f"[{self.request.id}] FORWARD TASK: Detected cancellation flag for task {task_id}; " - f"skipping chunk forwarding for source '{source}' in index '{index_name}'." - ) - # Treat this as a graceful early exit. We still return a - # structured payload so callers can consider the task done. - return { - 'task_id': task_id, - 'source': source, - 'index_name': index_name, - 'original_filename': original_filename, - 'chunks_stored': 0, - 'storage_time': 0, - 'es_result': { - "success": False, - "message": "Indexing cancelled because document was deleted.", - "total_indexed": 0, - "total_submitted": 0, - }, - } - except Exception as cancel_check_exc: - logger.warning( - f"[{self.request.id}] FORWARD TASK: Failed to check cancellation flag for task {task_id}: " - f"{cancel_check_exc}" - ) + ctx = _init_forward_context( + task_id=task_id, + request_id=str(self.request.id), + start_time=start_time, + source=source, + index_name=index_name, + source_type=source_type, + original_filename=original_filename, + ) - chunks = processed_data.get('chunks') - # If chunks are not in payload, try loading from Redis via the redis_key - if (not chunks) and processed_data.get('redis_key'): - redis_key = processed_data.get('redis_key') - if not REDIS_BACKEND_URL: - raise Exception(json.dumps({ - "message": "REDIS_BACKEND_URL not configured to retrieve chunks", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": filename - }, ensure_ascii=False)) - try: - import redis - client = redis.Redis.from_url( - REDIS_BACKEND_URL, decode_responses=True) - cached = client.get(redis_key) - if cached: - try: - logger.debug( - f"[{self.request.id}] FORWARD TASK: Retrieved Redis key '{redis_key}', payload_length={len(cached)}") - chunks = json.loads(cached) - except json.JSONDecodeError as jde: - # Log raw prefix to help diagnose incorrect writes - raw_preview = cached[:120] if isinstance( - cached, str) else str(type(cached)) - logger.error( - f"[{self.request.id}] FORWARD TASK: JSON decode error for key '{redis_key}': {str(jde)}; raw_prefix={raw_preview!r}") - raise - else: - # No busy-wait: release the worker slot and retry later - retry_num = getattr(self.request, 'retries', 0) - logger.info( - f"[{self.request.id}] FORWARD TASK: Chunks not yet available for key {redis_key}. Retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") - raise self.retry( - countdown=FORWARD_REDIS_RETRY_DELAY_S, - max_retries=FORWARD_REDIS_RETRY_MAX, - exc=Exception(json.dumps({ - "message": "Chunks not ready in Redis; will retry", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": filename - }, ensure_ascii=False)) - ) - except Retry: - raise - except Exception as exc: - raise Exception(json.dumps({ - "message": f"Failed to retrieve chunks from Redis: {str(exc)}", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": filename - }, ensure_ascii=False)) - if processed_data.get('source'): - original_source = processed_data.get('source') - if processed_data.get('index_name'): - original_index_name = processed_data.get('index_name') - if processed_data.get('original_filename'): - filename = processed_data.get('original_filename') - logger.info( - f"[{self.request.id}] FORWARD TASK: Received data for source '{original_source}' with {len(chunks) if chunks else 'None'} chunks") + # Before doing any heavy work, check whether this task has been explicitly cancelled. + if _is_forward_task_cancelled(ctx): + logger.info( + f"[{self.request.id}] FORWARD TASK: Detected cancellation flag for task {task_id}; " + f"skipping chunk forwarding for source '{source}' in index '{index_name}'." + ) + return _build_forward_cancelled_result(ctx) + + chunks, split_async, original_source, original_index_name, filename = _load_forward_chunks( + self, + processed_data=processed_data, + original_source=original_source, + original_index_name=original_index_name, + filename=filename, + ) # Calculate total chunks for progress tracking total_chunks = len(chunks) if chunks else 0 - - if chunks is None: - raise Exception(json.dumps({ - "message": "No chunks received for forwarding", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - if len(chunks) == 0: - logger.warning( - f"[{self.request.id}] FORWARD TASK: Empty chunks list received for source {original_source}") formatted_chunks = [] + # Compute once per file to avoid repeated IO/MinIO calls inside loop + file_size = get_file_size(source_type, original_source) if isinstance( + original_source, str) else 0 + filename_resolved = filename or (os.path.basename(original_source) if original_source and isinstance( + original_source, str) else "") for i, chunk in enumerate(chunks): # Extract text and metadata content = chunk.get("content", "") @@ -664,20 +1695,18 @@ def forward( f"[{self.request.id}] FORWARD TASK: Chunk {i+1} has empty text content, skipping") continue - file_size = get_file_size(source_type, original_source) if isinstance( - original_source, str) else 0 - # Format as expected by the Elasticsearch API formatted_chunk = { "metadata": metadata, - "filename": filename or (os.path.basename(original_source) if original_source and isinstance(original_source, str) else ""), + "filename": filename_resolved, "path_or_url": original_source, "content": content, - "process_source": "Unstructured", + "process_source": chunk.get("process_source", "Unstructured"), "source_type": source_type, "file_size": file_size, "create_time": metadata.get("creation_date"), "date": metadata.get("date"), + "index": i, } formatted_chunks.append(formatted_chunk) @@ -691,112 +1720,6 @@ def forward( "error_code": "no_valid_chunks" }, ensure_ascii=False)) - async def index_documents(): - elasticsearch_url = ELASTICSEARCH_SERVICE - if not elasticsearch_url: - raise Exception(json.dumps({ - "message": "ELASTICSEARCH_SERVICE env is not set", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - route_url = f"/indices/{original_index_name}/documents" - full_url = elasticsearch_url + route_url - headers = {"Content-Type": "application/json"} - if authorization: - headers["Authorization"] = authorization - # Add task_id header for progress tracking - headers["X-Task-Id"] = task_id - - try: - connector = aiohttp.TCPConnector(verify_ssl=False) - timeout = aiohttp.ClientTimeout(total=600) - - async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session: - async with session.post( - full_url, - headers=headers, - json=formatted_chunks, - raise_for_status=False - ) as response: - text = await response.text() - status = response.status - # Try parse JSON body for structured error_code/message - parsed_body = None - try: - parsed_body = json.loads(text) - except Exception: - parsed_body = None - - if status >= 400: - error_code = None - if isinstance(parsed_body, dict): - error_code = parsed_body.get("error_code") - detail = parsed_body.get("detail") - if isinstance(detail, dict) and detail.get("error_code"): - error_code = detail.get("error_code") - elif isinstance(detail, str): - try: - parsed_detail = json.loads(detail) - if isinstance(parsed_detail, dict): - error_code = parsed_detail.get( - "error_code", error_code) - except Exception: - pass - - if not error_code: - try: - match = re.search( - r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text) - if match: - error_code = match.group(1) - except Exception: - pass - - if error_code: - # Raise flat payload to avoid nested JSON and preserve error_code - raise Exception(json.dumps({ - "error_code": error_code - }, ensure_ascii=False)) - - raise Exception( - f"ElasticSearch service returned HTTP {status}") - - result = parsed_body if isinstance(parsed_body, dict) else await response.json() - return result - - except aiohttp.ClientConnectorError as e: - logger.error( - f"[{self.request.id}] FORWARD TASK: Connection error to {full_url}: {str(e)}") - raise Exception(json.dumps({ - "message": f"Failed to connect to API: {str(e)}", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - except asyncio.TimeoutError as e: - logger.warning( - f"[{self.request.id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.") - raise Exception(json.dumps({ - "message": f"Timeout when indexing documents: {str(e)}", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - except Exception as e: - logger.error( - f"[{self.request.id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.") - raise Exception(json.dumps({ - "message": f"Unexpected error when indexing documents: {str(e)}", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - logger.info( f"[{self.request.id}] FORWARD TASK: Starting ES indexing for {len(formatted_chunks)} chunks to index '{original_index_name}'...") @@ -814,8 +1737,69 @@ async def index_documents(): 'processed_chunks': 0 # Will be updated during vectorization via Redis } ) + try: + redis_service = get_redis_service() + redis_service.save_progress_info(task_id, 0, total_chunks) + except Exception as progress_init_exc: + logger.warning( + f"[{self.request.id}] FORWARD TASK: Failed to initialize progress in Redis: " + f"{progress_init_exc}" + ) - es_result = run_async(index_documents()) + if len(formatted_chunks) < FORWARD_ES_CHUNK_BATCH_SIZE: + es_result = _send_chunks_to_es( + chunks=formatted_chunks, + index_name=original_index_name, + authorization=authorization, + task_id=task_id, + source=original_source, + original_filename=original_filename, + large_mode=False, + ) + else: + batches = _build_balanced_batches( + formatted_chunks=formatted_chunks, + batch_size=FORWARD_ES_CHUNK_BATCH_SIZE, + ) + total_batches = len(batches) + image_chunks_total = sum( + 1 for chunk in formatted_chunks if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ) + image_distribution = [ + sum( + 1 + for chunk in batch + if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ) + for batch in batches + ] + logger.info( + f"[{self.request.id}] FORWARD TASK: Batch distribution ready: total_batches={total_batches}, " + f"batch_size={FORWARD_ES_CHUNK_BATCH_SIZE}, image_metadata_total={image_chunks_total}, " + f"image_per_batch={image_distribution}") + group_tasks = group( + forward_part.s( + chunks=batch, + index_name=original_index_name, + authorization=authorization, + parent_task_id=task_id, + parent_total_chunks=total_chunks, + source=original_source, + original_filename=original_filename, + batch_index=idx + 1, + total_batches=total_batches, + # If request was split into multiple groups, force all groups to use large path. + large_mode=True, + ).set(queue='forward_q') for idx, batch in enumerate(batches) + ) + callback = aggregate_forward_parts.s( + source=original_source, + index_name=original_index_name, + original_filename=original_filename + ).set(queue='forward_q') + result = chord(group_tasks)(callback) + with allow_join_result(): + es_result = result.get() logger.debug( f"[{self.request.id}] FORWARD TASK: API response from main_server for source '{original_source}': {es_result}") @@ -884,6 +1868,7 @@ async def index_documents(): logger.info( f"[{self.request.id}] FORWARD TASK: Successfully stored {len(chunks)} chunks to index {original_index_name} in {end_time - start_time:.2f}s") + return { 'task_id': task_id, 'source': original_source, @@ -966,9 +1951,106 @@ async def index_documents(): raise -@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_and_forward') -def process_and_forward( - self, +@app.task( + bind=True, + base=LoggingTask, + name="data_process.tasks.cleanup_source", + queue="forward_q", +) +def cleanup_source(self, forward_result: Dict[str, Any]) -> Dict[str, Any]: + """ + Conditionally delete the MinIO source file after successful indexing. + + If the knowledge base is configured with preserve_source_file=false, call: + DELETE /indices/{index_name}/documents?path_or_url=...&scope=source_only + """ + index_name = (forward_result or {}).get("index_name") + source = (forward_result or {}).get("source") + + cleanup_info: Dict[str, Any] = { + "attempted": False, + "skipped_reason": None, + "success": None, + "http_status": None, + "response": None, + "error": None, + } + + if not index_name or not source: + cleanup_info["skipped_reason"] = "missing_index_name_or_source" + forward_result = dict(forward_result or {}) + forward_result["source_cleanup"] = cleanup_info + return forward_result + + try: + record = get_knowledge_record({"index_name": index_name}) or {} + preserve_source_file = record.get("preserve_source_file", True) + except Exception as exc: + logger.warning( + "[%s] CLEANUP TASK: Failed to load knowledge config for index '%s': %s", + getattr(self.request, "id", "unknown"), + index_name, + exc, + ) + cleanup_info["skipped_reason"] = "knowledge_record_lookup_failed" + forward_result = dict(forward_result or {}) + forward_result["source_cleanup"] = cleanup_info + return forward_result + + if preserve_source_file: + cleanup_info["skipped_reason"] = "preserve_source_file_true" + forward_result = dict(forward_result or {}) + forward_result["source_cleanup"] = cleanup_info + return forward_result + + cleanup_info["attempted"] = True + try: + resp = _delete_source_file_via_http_sync( + base_url=ELASTICSEARCH_SERVICE, + index_name=index_name, + path_or_url=source, + scope="source_only", + ) + cleanup_info["http_status"] = resp.get("http_status") + cleanup_info["response"] = ( + resp.get("response_json") + if resp.get("response_json") is not None + else resp.get("response_text") + ) + + ok = False + if isinstance(resp.get("response_json"), dict): + ok = bool(resp["response_json"].get("status") == "success") + elif resp.get("http_status") and 200 <= int(resp["http_status"]) < 300: + ok = True + + cleanup_info["success"] = ok + if not ok: + logger.warning( + "[%s] CLEANUP TASK: Source-only delete did not succeed. index='%s' source='%s' http_status=%s", + getattr(self.request, "id", "unknown"), + index_name, + source, + cleanup_info["http_status"], + ) + except Exception as exc: + cleanup_info["success"] = False + cleanup_info["error"] = str(exc) + logger.warning( + "[%s] CLEANUP TASK: Source-only delete failed. index='%s' source='%s' error=%s", + getattr(self.request, "id", "unknown"), + index_name, + source, + exc, + ) + + forward_result = dict(forward_result or {}) + forward_result["source_cleanup"] = cleanup_info + return forward_result + + +def submit_process_forward_chain( + *, source: str, source_type: str, chunking_strategy: str, @@ -976,30 +2058,14 @@ def process_and_forward( original_filename: Optional[str] = None, authorization: Optional[str] = None, embedding_model_id: Optional[int] = None, - tenant_id: Optional[str] = None + tenant_id: Optional[str] = None, ) -> str: """ - Combined task that chains processing and forwarding - - This task delegates to a chain of process -> forward - - Args: - source: Source file path, URL, or text content - source_type: source of the file("local", "minio") - chunking_strategy: Strategy for chunking the document - index_name: Name of the index to store documents - original_filename: The original name of the file - authorization: Authorization header for API calls - embedding_model_id: Embedding model ID for chunk size configuration - tenant_id: Tenant ID for retrieving model configuration + Build and enqueue a Celery chain: process -> forward. Returns: - Task ID of the chain + Celery chain task ID, or empty string if enqueue failed. """ - logger.info( - f"Starting processing chain for {source}, original_filename={original_filename}, strategy={chunking_strategy}, index={index_name}, model_id={embedding_model_id}") - - # Create a task chain task_chain = chain( process.s( source=source, @@ -1016,20 +2082,66 @@ def process_and_forward( source_type=source_type, original_filename=original_filename, authorization=authorization - ).set(queue='forward_q') + ).set(queue='forward_q'), + cleanup_source.s().set(queue='forward_q'), ) - # Execute the chain result = task_chain.apply_async() if result is None or not hasattr(result, 'id') or result.id is None: logger.error( "Celery chain apply_async() did not return a valid result or result.id") return "" - logger.info(f"Created task chain ID: {result.id}") - return result.id +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_and_forward') +def process_and_forward( + self, + source: str, + source_type: str, + chunking_strategy: str, + index_name: Optional[str] = None, + original_filename: Optional[str] = None, + authorization: Optional[str] = None, + embedding_model_id: Optional[int] = None, + tenant_id: Optional[str] = None +) -> str: + """ + Combined task that chains processing and forwarding + + This task delegates to a chain of process -> forward + + Args: + source: Source file path, URL, or text content + source_type: source of the file("local", "minio") + chunking_strategy: Strategy for chunking the document + index_name: Name of the index to store documents + original_filename: The original name of the file + authorization: Authorization header for API calls + embedding_model_id: Embedding model ID for chunk size configuration + tenant_id: Tenant ID for retrieving model configuration + + Returns: + Task ID of the chain + """ + logger.info( + f"Starting processing chain for {source}, original_filename={original_filename}, strategy={chunking_strategy}, index={index_name}, model_id={embedding_model_id}") + + chain_id = submit_process_forward_chain( + source=source, + source_type=source_type, + chunking_strategy=chunking_strategy, + index_name=index_name, + original_filename=original_filename, + authorization=authorization, + embedding_model_id=embedding_model_id, + tenant_id=tenant_id, + ) + if chain_id: + logger.info(f"Created task chain ID: {chain_id}") + return chain_id + + @app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_sync') def process_sync( self, diff --git a/backend/data_process/worker.py b/backend/data_process/worker.py index a5f5f4a27..48323869b 100644 --- a/backend/data_process/worker.py +++ b/backend/data_process/worker.py @@ -1,4 +1,4 @@ -""" +""" Celery worker script for data processing tasks This script is used to start Celery workers for processing data @@ -21,6 +21,7 @@ import os import sys import time +import threading import traceback import ray @@ -44,6 +45,7 @@ REDIS_URL, WORKER_CONCURRENCY, WORKER_NAME, + RAY_GLOBAL_ACTOR_POOL_SIZE, ) from .app import app @@ -200,6 +202,60 @@ def worker_ready_handler(**kwargs): # Register health check endpoints, start monitoring, etc. logger.debug("🔍 Worker is ready to receive tasks") + # Prewarm Ray actors for process-related queues to reduce first-task latency. + # IMPORTANT: run asynchronously so worker queue registration is never blocked. + try: + queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()} + if "process_q" in queue_set or "process_part_q" in queue_set: + from data_process.tasks import prewarm_ray_actors + + # Prewarm a cluster-global shared actor pool once at startup. + # Multiple workers may trigger this, but pool manager is idempotent. + target = RAY_GLOBAL_ACTOR_POOL_SIZE + + def _prewarm_in_background(): + try: + warmed = prewarm_ray_actors(target_size=target) + logger.info( + f"Prewarmed Ray actor pool in background, warmed_actors={warmed}, target={target}, queues={sorted(queue_set)}" + ) + except Exception as exc: + logger.warning(f"Background prewarm failed: {exc}") + + threading.Thread(target=_prewarm_in_background, daemon=True).start() + except Exception as exc: + logger.warning(f"Failed to schedule Ray actor prewarm on worker ready: {exc}") + + # Periodic concurrency + Ray CPU availability log for process_part_q. + try: + queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()} + if "process_part_q" in queue_set: + def _log_part_concurrency(): + while True: + try: + inspector = app.control.inspect(timeout=1) + active = inspector.active() or {} + part_active = 0 + for _, tasks in active.items(): + for t in tasks or []: + if t.get("name") == "data_process.tasks.process_part": + part_active += 1 + try: + ray_available = ray.available_resources() if ray.is_initialized() else {} + except Exception: + ray_available = {} + avail_cpu = ray_available.get("CPU", 0.0) + logger.info( + f"[process_part] active={part_active}, ray_available_cpu={avail_cpu}" + ) + except Exception as exc: + logger.debug(f"Failed to collect process_part concurrency stats: {exc}") + time.sleep(5) + + threading.Thread(target=_log_part_concurrency, daemon=True).start() + except Exception as exc: + logger.warning(f"Failed to start process_part concurrency logger: {exc}") + @worker_shutting_down.connect def worker_shutdown_handler(**kwargs): @@ -289,9 +345,9 @@ def validate_redis_connection() -> bool: def start_worker(): """Start Celery worker with appropriate settings""" - # Get configuration parameters + # Read from runtime env first, so launcher-assigned values always win. queues = QUEUES - worker_name = WORKER_NAME or f'worker-{os.getpid()}' + worker_name = WORKER_NAME concurrency = WORKER_CONCURRENCY logger.info(f"Start Celery worker '{worker_name}' with queues: {queues}") diff --git a/backend/data_process_service.py b/backend/data_process_service.py index 0576e01fc..23d3497d9 100644 --- a/backend/data_process_service.py +++ b/backend/data_process_service.py @@ -206,13 +206,21 @@ def start_workers(self): logger.debug(f"Process-worker concurrency set to: {process_worker_concurrency}") logger.debug(f"Forward-worker concurrency set to: {forward_worker_concurrency}") - # Define worker configurations based on new architecture + # Define worker configurations based on split architecture: + # - process-worker handles orchestration (process_q) + # - process-part-worker handles split sub-tasks (process_part_q) + # - forward-worker handles vectorization/storage (forward_q) workers_config = [ { 'name': 'process-worker', 'queue': 'process_q', 'concurrency': process_worker_concurrency }, + { + 'name': 'process-part-worker', + 'queue': 'process_part_q', + 'concurrency': process_worker_concurrency + }, { 'name': 'forward-worker', 'queue': 'forward_q', @@ -243,7 +251,7 @@ def start_workers(self): logging.basicConfig(level=logging.INFO, format='[%(asctime)s: %(levelname)s/%(name)s] %(message)s') logger = logging.getLogger("data_process.worker_launcher") -os.environ["QUEUES"] = "{config['queue']}" +os.environ["QUEUES"] = "{config['queue']}" # backward compatibility os.environ["WORKER_NAME"] = "{config['name']}" os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}" @@ -254,6 +262,10 @@ def start_workers(self): logger.debug(f"Celery app instance: {{celery_app}}") logger.debug(f"Attempting to start worker for queue: {config['queue']}") from data_process.worker import start_worker + # Re-apply launcher values after imports in case .env override changed them. + os.environ["QUEUES"] = "{config['queue']}" + os.environ["WORKER_NAME"] = "{config['name']}" + os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}" start_worker() except ImportError as e: logger.error(f"Import error: {{e}}") @@ -564,7 +576,11 @@ def start_all_services(self): if success_count > 0: self.log_service_info() - + + # Start auto-summary scheduler + from services.auto_summary_scheduler import auto_summary_scheduler + auto_summary_scheduler.start() + return success_count == enabled_count def log_service_info(self): @@ -700,7 +716,11 @@ def stop_all_services(self): logger.error(f"Final attempt to kill Flower process failed: {final_e}") finally: service_processes['flower'] = None - + + # Stop auto-summary scheduler + from services.auto_summary_scheduler import auto_summary_scheduler + auto_summary_scheduler.stop() + # Stop Redis last if service_processes['redis']: try: diff --git a/backend/database/a2a_agent_db.py b/backend/database/a2a_agent_db.py index 9becdd67b..c1d998272 100644 --- a/backend/database/a2a_agent_db.py +++ b/backend/database/a2a_agent_db.py @@ -29,6 +29,22 @@ def _get_db_session(): # Default cache TTL in seconds (24 hours) DEFAULT_CACHE_TTL_HOURS = 24 + +def _extract_base_url(url: str) -> str: + """Extract base URL (scheme + host + port) from a full URL. + + Args: + url: Full URL, e.g., http://example.com/path/to/agent.json + + Returns: + Base URL, e.g., http://example.com + """ + from urllib.parse import urlparse + parsed = urlparse(url) + if parsed.port: + return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}" + return f"{parsed.scheme}://{parsed.hostname}" + # Standard human-readable protocol label PROTOCOL_HTTP_JSON = "HTTP+JSON" PROTOCOL_JSONRPC = "JSONRPC" @@ -51,27 +67,19 @@ def _generate_endpoint_id(agent_id: int) -> str: def _extract_primary_interface(supported_interfaces: List[Dict[str, Any]]) -> tuple[str, str]: - """Extract the primary interface (HTTP+JSON) from supported interfaces. + """Extract the primary interface (first one) from supported interfaces. Args: supported_interfaces: List of interface objects with protocolBinding, url, protocolVersion. Returns: Tuple of (agent_url, protocol_version). - Falls back to first interface if HTTP+JSON not found. + Returns empty string for url if no interfaces found. """ if not supported_interfaces: return "", "1.0" - # Prefer HTTP+JSON - for iface in supported_interfaces: - if iface.get("protocolBinding", "").upper() in (PROTOCOL_HTTP_JSON, PROTOCOL_JSONRPC, PROTOCOL_GRPC): - return ( - iface.get("url", ""), - iface.get("protocolVersion", "1.0") - ) - - # Fall back to first interface + # Return the first interface to ensure URL and protocol are from the same interface first = supported_interfaces[0] return ( first.get("url", ""), @@ -148,6 +156,7 @@ def create_external_agent_from_url( version: Optional[str] = None, streaming: bool = False, supported_interfaces: Optional[List[Dict[str, Any]]] = None, + base_url: Optional[str] = None, ) -> Dict[str, Any]: """Create or update an external A2A agent discovered from URL. @@ -162,6 +171,7 @@ def create_external_agent_from_url( version: Agent version from Agent Card. streaming: Whether this agent supports SSE streaming. supported_interfaces: All supported protocol interfaces. + base_url: Base URL for health checks (service root address). Returns: Created agent information dict. @@ -170,6 +180,10 @@ def create_external_agent_from_url( expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS) protocol_type = _extract_protocol_type(supported_interfaces) + # Extract base_url from source_url if not provided + if not base_url and source_url: + base_url = _extract_base_url(source_url) + with _get_db_session() as session: # Check if agent already exists by source_url existing = session.query(A2AExternalAgent).filter( @@ -191,6 +205,8 @@ def create_external_agent_from_url( existing.cached_at = now existing.cache_expires_at = expires_at existing.updated_by = user_id + if base_url: + existing.base_url = base_url agent = existing else: # Create new record @@ -210,6 +226,7 @@ def create_external_agent_from_url( raw_card=raw_card, cached_at=now, cache_expires_at=expires_at, + base_url=base_url, delete_flag='N' ) session.add(agent) @@ -226,6 +243,7 @@ def create_external_agent_from_url( "streaming": agent.streaming, "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, + "base_url": agent.base_url, "is_available": agent.is_available, "cached_at": agent.cached_at.isoformat() if agent.cached_at else None, "cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None, @@ -244,6 +262,7 @@ def create_external_agent_from_nacos( version: Optional[str] = None, streaming: bool = False, supported_interfaces: Optional[List[Dict[str, Any]]] = None, + base_url: Optional[str] = None, ) -> Dict[str, Any]: """Create or update an external A2A agent discovered from Nacos. @@ -259,6 +278,7 @@ def create_external_agent_from_nacos( version: Agent version from Agent Card. streaming: Whether this agent supports SSE streaming. supported_interfaces: All supported protocol interfaces. + base_url: Base URL for health checks (service root address). Returns: Created agent information dict. @@ -267,6 +287,10 @@ def create_external_agent_from_nacos( expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS) protocol_type = _extract_protocol_type(supported_interfaces) + # Extract base_url from agent_url if not provided + if not base_url and agent_url: + base_url = _extract_base_url(agent_url) + with _get_db_session() as session: # Check if agent already exists by nacos_config_id + nacos_agent_name existing = session.query(A2AExternalAgent).filter( @@ -288,6 +312,8 @@ def create_external_agent_from_nacos( existing.cached_at = now existing.cache_expires_at = expires_at existing.updated_by = user_id + if base_url: + existing.base_url = base_url agent = existing else: agent = A2AExternalAgent( @@ -307,6 +333,7 @@ def create_external_agent_from_nacos( raw_card=raw_card, cached_at=now, cache_expires_at=expires_at, + base_url=base_url, delete_flag='N' ) session.add(agent) @@ -323,6 +350,7 @@ def create_external_agent_from_nacos( "streaming": agent.streaming, "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, + "base_url": agent.base_url, "is_available": agent.is_available, "cached_at": agent.cached_at.isoformat() if agent.cached_at else None, "cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None, @@ -360,6 +388,7 @@ def get_external_agent_by_id(external_agent_id: int, tenant_id: str) -> Optional "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, "source_url": agent.source_url, + "base_url": agent.base_url, "nacos_config_id": agent.nacos_config_id, "nacos_agent_name": agent.nacos_agent_name, "raw_card": agent.raw_card, @@ -416,6 +445,8 @@ def list_external_agents( "protocol_type": agent.protocol_type, "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, + "source_url": agent.source_url, + "base_url": agent.base_url, "is_available": agent.is_available, "last_check_result": agent.last_check_result, "create_time": agent.create_time.isoformat() if agent.create_time else None, @@ -1714,6 +1745,7 @@ def get_nacos_config_by_id(config_id: str, tenant_id: str) -> Optional[Dict[str, "name": config.name, "nacos_addr": config.nacos_addr, "nacos_username": config.nacos_username, + "nacos_password": config.nacos_password, "namespace_id": config.namespace_id, "description": config.description, "is_active": config.is_active, @@ -1749,6 +1781,8 @@ def list_nacos_configs(tenant_id: str, is_active: Optional[bool] = None) -> List "name": config.name, "nacos_addr": config.nacos_addr, "namespace_id": config.namespace_id, + "nacos_username": config.nacos_username, + "nacos_password": config.nacos_password, "is_active": config.is_active, "last_scan_at": config.last_scan_at.isoformat() if config.last_scan_at else None, } @@ -1804,6 +1838,75 @@ def delete_nacos_config(config_id: str, tenant_id: str) -> bool: return True +def update_nacos_config( + config_id: str, + tenant_id: str, + user_id: str, + name: Optional[str] = None, + nacos_addr: Optional[str] = None, + nacos_username: Optional[str] = None, + nacos_password: Optional[str] = None, + namespace_id: Optional[str] = None, + description: Optional[str] = None, + is_active: Optional[bool] = None +) -> Optional[Dict[str, Any]]: + """Update a Nacos config. + + Args: + config_id: The config ID. + tenant_id: Tenant ID. + user_id: User who is updating this config. + name: Optional new display name. + nacos_addr: Optional new Nacos server address. + nacos_username: Optional new Nacos username. + nacos_password: Optional new Nacos password. + namespace_id: Optional new Nacos namespace. + description: Optional new description. + is_active: Optional active status. + + Returns: + Updated config information dict, or None if not found. + """ + with _get_db_session() as session: + config = session.query(A2ANacosConfig).filter( + A2ANacosConfig.config_id == config_id, + A2ANacosConfig.tenant_id == tenant_id, + A2ANacosConfig.delete_flag != 'Y' + ).first() + + if not config: + return None + + if name is not None: + config.name = name + if nacos_addr is not None: + config.nacos_addr = nacos_addr + if nacos_username is not None: + config.nacos_username = nacos_username + if nacos_password is not None: + config.nacos_password = nacos_password + if namespace_id is not None: + config.namespace_id = namespace_id + if description is not None: + config.description = description + if is_active is not None: + config.is_active = is_active + + config.updated_by = user_id + session.flush() + + return { + "id": config.id, + "config_id": config.config_id, + "name": config.name, + "nacos_addr": config.nacos_addr, + "namespace_id": config.namespace_id, + "nacos_username": config.nacos_username, + "nacos_password": config.nacos_password, + "is_active": config.is_active, + } + + # ============================================================================= # A2A Artifact Operations # ============================================================================= diff --git a/backend/database/agent_db.py b/backend/database/agent_db.py index 3ced7625b..533659b0f 100644 --- a/backend/database/agent_db.py +++ b/backend/database/agent_db.py @@ -1,9 +1,11 @@ import logging -from typing import List -from sqlalchemy import update +from typing import List, Optional +from sqlalchemy import or_, update from database.client import get_db_session, as_dict, filter_property from database.db_models import AgentInfo, ToolInstance, AgentRelation +from database.agent_version_db import query_current_version_no +from consts.const import ASSET_OWNER_TENANT_ID from utils.str_utils import convert_list_to_string logger = logging.getLogger("agent_db") @@ -22,9 +24,12 @@ def search_agent_info_by_agent_id(agent_id: int, tenant_id: str, version_no: int with get_db_session() as session: agent = session.query(AgentInfo).filter( AgentInfo.agent_id == agent_id, - AgentInfo.tenant_id == tenant_id, AgentInfo.version_no == version_no, - AgentInfo.delete_flag != 'Y' + or_( + AgentInfo.tenant_id == tenant_id, + AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID, + ), + AgentInfo.delete_flag != 'Y', ).first() if not agent: @@ -98,6 +103,40 @@ def query_sub_agents_id_list(main_agent_id: int, tenant_id: str, version_no: int return [relation.selected_agent_id for relation in relations] +def query_sub_agent_relations(main_agent_id: int, tenant_id: str, version_no: int = 0) -> List[dict]: + """ + Query sub-agent relations by main agent id, including pinned version info. + Default version_no=0 queries the draft version. + + Args: + main_agent_id: Parent agent ID + tenant_id: Tenant ID + version_no: Version number to filter. Default 0 = draft/editing state + """ + with get_db_session() as session: + query = session.query(AgentRelation).filter( + AgentRelation.parent_agent_id == main_agent_id, + AgentRelation.tenant_id == tenant_id, + AgentRelation.version_no == version_no, + AgentRelation.delete_flag != 'Y') + relations = query.all() + return [as_dict(relation) for relation in relations] + + +def resolve_sub_agent_version_no( + selected_agent_id: int, + selected_agent_version_no: Optional[int], + tenant_id: str, +) -> int: + """ + Resolve the effective version number for a sub-agent relation. + Uses pinned version when set; otherwise falls back to child's current published version. + """ + if selected_agent_version_no is not None: + return selected_agent_version_no + return query_current_version_no(agent_id=selected_agent_id, tenant_id=tenant_id) or 0 + + def clear_agent_new_mark(agent_id: int, tenant_id: str, user_id: str, version_no: int = 0): """ Clear the NEW mark for an agent. @@ -158,7 +197,8 @@ def create_agent(agent_info, tenant_id: str, user_id: str): :return: Created agent object """ info_with_metadata = dict(agent_info) - info_with_metadata.setdefault("max_steps", 5) + info_with_metadata.setdefault("max_steps", 15) + info_with_metadata.setdefault("verification_config", None) info_with_metadata.update({ "tenant_id": tenant_id, "version_no": 0, # Default to draft version @@ -192,8 +232,14 @@ def create_agent(agent_info, tenant_id: str, user_id: str): "business_description": new_agent.business_description, "business_logic_model_id": new_agent.business_logic_model_id, "business_logic_model_name": new_agent.business_logic_model_name, + "prompt_template_id": new_agent.prompt_template_id, + "prompt_template_name": new_agent.prompt_template_name, "group_ids": new_agent.group_ids, "is_new": new_agent.is_new, + "enable_context_manager": new_agent.enable_context_manager, + "verification_config": new_agent.verification_config, + "greeting_message": new_agent.greeting_message, + "example_questions": new_agent.example_questions, "current_version_no": new_agent.current_version_no, "version_no": new_agent.version_no, "created_by": new_agent.created_by, diff --git a/backend/database/agent_repository_db.py b/backend/database/agent_repository_db.py new file mode 100644 index 000000000..a6bb4f48b --- /dev/null +++ b/backend/database/agent_repository_db.py @@ -0,0 +1,358 @@ +import logging +import math +from typing import Any, Dict, List, Optional + +from sqlalchemy import func, or_, update + +from database.client import as_dict, filter_property, get_db_session +from database.db_models import AgentRepository + +logger = logging.getLogger("agent_repository_db") + +# Listing status: NOT_SHARED (未共享), PENDING_REVIEW (待审核), +# REJECTED (审核驳回), SHARED (已共享) +STATUS_NOT_SHARED = "NOT_SHARED" +STATUS_PENDING_REVIEW = "PENDING_REVIEW" +STATUS_REJECTED = "REJECTED" +STATUS_SHARED = "SHARED" + +VALID_REPOSITORY_STATUSES = frozenset({ + STATUS_NOT_SHARED, + STATUS_PENDING_REVIEW, + STATUS_REJECTED, + STATUS_SHARED, +}) + +_UPSERT_IMMUTABLE_FIELDS = frozenset({ + "agent_id", + "agent_repository_id", + "publisher_tenant_id", +}) + +_UPSERT_SNAPSHOT_FIELDS = frozenset({ + "source_version_no", + "name", + "display_name", + "description", + "author", + "category_id", + "tags", + "tool_count", + "version_label", + "agent_info_json", +}) + + +def insert_agent_repository_record( + repository_data: Dict[str, Any], + publisher_tenant_id: str, + publisher_user_id: str, +) -> int: + """Insert a new agent repository listing record.""" + with get_db_session() as session: + payload = { + **repository_data, + "publisher_tenant_id": publisher_tenant_id, + "publisher_user_id": publisher_user_id, + "created_by": publisher_user_id, + "updated_by": publisher_user_id, + "delete_flag": "N", + } + if payload.get("status") is None: + payload["status"] = STATUS_NOT_SHARED + + new_record = AgentRepository( + **filter_property(payload, AgentRepository) + ) + session.add(new_record) + session.flush() + return int(new_record.agent_repository_id) + + +def get_agent_repository_by_id(repository_id: int) -> Optional[dict]: + """Fetch a repository listing by primary key.""" + with get_db_session() as session: + record = session.query(AgentRepository).filter( + AgentRepository.agent_repository_id == repository_id, + AgentRepository.delete_flag != "Y", + ).first() + return as_dict(record) if record else None + + +def get_agent_repository_by_id_and_publisher( + repository_id: int, + publisher_tenant_id: str, +) -> Optional[dict]: + """Fetch a repository listing scoped to the publisher tenant.""" + with get_db_session() as session: + record = session.query(AgentRepository).filter( + AgentRepository.agent_repository_id == repository_id, + AgentRepository.publisher_tenant_id == publisher_tenant_id, + AgentRepository.delete_flag != "Y", + ).first() + return as_dict(record) if record else None + + +def get_agent_repository_by_agent_id(agent_id: int) -> Optional[dict]: + """Fetch an active repository listing by root agent_id.""" + with get_db_session() as session: + record = session.query(AgentRepository).filter( + AgentRepository.agent_id == agent_id, + AgentRepository.delete_flag != "Y", + ).first() + return as_dict(record) if record else None + + +def upsert_agent_repository_record( + repository_data: Dict[str, Any], + publisher_tenant_id: str, + publisher_user_id: str, +) -> tuple[int, bool]: + """Insert or update a repository listing keyed by agent_id. + + When no record exists, inserts a new listing. When a record exists: + - Same source_version_no: updates status (and updated_by) only. + - Different source_version_no: updates all snapshot fields, preserving + agent_id, agent_repository_id, and publisher_tenant_id. + + Returns: + Tuple of (agent_repository_id, is_updated). is_updated is False on insert. + """ + agent_id = repository_data.get("agent_id") + if agent_id is None: + raise ValueError("agent_id is required for repository upsert") + + existing = get_agent_repository_by_agent_id(int(agent_id)) + if not existing: + repository_id = insert_agent_repository_record( + repository_data=repository_data, + publisher_tenant_id=publisher_tenant_id, + publisher_user_id=publisher_user_id, + ) + return repository_id, False + + existing_version = existing.get("source_version_no") + incoming_version = repository_data.get("source_version_no") + repository_id = int(existing["agent_repository_id"]) + + if existing_version == incoming_version: + update_fields: Dict[str, Any] = { + "status": repository_data.get("status", STATUS_NOT_SHARED), + "updated_by": publisher_user_id, + } + else: + update_fields = { + key: repository_data[key] + for key in _UPSERT_SNAPSHOT_FIELDS + if key in repository_data + } + update_fields["publisher_user_id"] = publisher_user_id + update_fields["updated_by"] = publisher_user_id + update_fields["status"] = repository_data.get("status", STATUS_NOT_SHARED) + + with get_db_session() as session: + session.execute( + update(AgentRepository) + .where( + AgentRepository.agent_repository_id == repository_id, + AgentRepository.publisher_tenant_id == publisher_tenant_id, + AgentRepository.delete_flag != "Y", + ) + .values(**update_fields) + ) + return repository_id, True + + +def list_agent_repository_summaries( + *, + status: Optional[str] = None, +) -> List[dict]: + """List all active repository summaries without heavy JSON blobs.""" + with get_db_session() as session: + query = session.query( + AgentRepository.agent_repository_id, + AgentRepository.author, + AgentRepository.name, + AgentRepository.display_name, + AgentRepository.description, + AgentRepository.status, + ).filter( + AgentRepository.delete_flag != "Y", + ) + if status: + query = query.filter(AgentRepository.status == status) + rows = query.order_by(AgentRepository.agent_repository_id.desc()).all() + return [ + { + "agent_repository_id": row.agent_repository_id, + "author": row.author, + "name": row.name, + "display_name": row.display_name, + "description": row.description, + "status": row.status, + } + for row in rows + ] + + +def query_agent_repository_list( + *, + page: int = 1, + page_size: int = 20, + search: Optional[str] = None, + tag: Optional[str] = None, + category_id: Optional[int] = None, + status: Optional[str] = STATUS_SHARED, + publisher_tenant_id: Optional[str] = None, +) -> Dict[str, Any]: + """Query repository listings with offset pagination.""" + page = max(page, 1) + page_size = max(min(page_size, 100), 1) + offset = (page - 1) * page_size + + with get_db_session() as session: + query = session.query(AgentRepository).filter( + AgentRepository.delete_flag != "Y", + ) + + if status: + query = query.filter(AgentRepository.status == status) + if publisher_tenant_id: + query = query.filter( + AgentRepository.publisher_tenant_id == publisher_tenant_id + ) + if category_id is not None: + query = query.filter(AgentRepository.category_id == category_id) + if tag: + query = query.filter(AgentRepository.tags.any(tag)) + if search: + keyword = f"%{search}%" + query = query.filter( + or_( + AgentRepository.name.ilike(keyword), + AgentRepository.display_name.ilike(keyword), + AgentRepository.description.ilike(keyword), + AgentRepository.author.ilike(keyword), + func.array_to_string(AgentRepository.tags, ",").ilike(keyword), + ) + ) + + total = query.count() + rows = ( + query.order_by(AgentRepository.agent_repository_id.desc()) + .offset(offset) + .limit(page_size) + .all() + ) + + total_pages = math.ceil(total / page_size) if total else 0 + return { + "items": [as_dict(row) for row in rows], + "pagination": { + "page": page, + "page_size": page_size, + "total": total, + "total_pages": total_pages, + }, + } + + +def update_agent_repository_by_id( + *, + repository_id: int, + publisher_tenant_id: str, + user_id: str, + updates: Dict[str, Any], +) -> int: + """Update a repository listing owned by the publisher tenant. Returns affected row count.""" + allowed_fields = { + "display_name", + "description", + "author", + "category_id", + "tags", + "tool_count", + "version_label", + "source_version_no", + "agent_info_json", + "status", + } + update_fields = { + key: value + for key, value in updates.items() + if key in allowed_fields + } + if not update_fields: + return 0 + + update_fields["updated_by"] = user_id + + with get_db_session() as session: + result = session.execute( + update(AgentRepository) + .where( + AgentRepository.agent_repository_id == repository_id, + AgentRepository.publisher_tenant_id == publisher_tenant_id, + AgentRepository.delete_flag != "Y", + ) + .values(**update_fields) + ) + return int(result.rowcount or 0) + + +def update_agent_repository_status_by_id( + *, + repository_id: int, + status: str, + user_id: str, +) -> int: + """Update repository listing status by primary key. Returns affected row count.""" + with get_db_session() as session: + result = session.execute( + update(AgentRepository) + .where( + AgentRepository.agent_repository_id == repository_id, + AgentRepository.delete_flag != "Y", + ) + .values(status=status, updated_by=user_id) + ) + return int(result.rowcount or 0) + + +def soft_delete_agent_repository_by_id( + *, + repository_id: int, + publisher_tenant_id: str, + user_id: str, +) -> int: + """Soft-delete a repository listing owned by the publisher tenant.""" + with get_db_session() as session: + result = session.execute( + update(AgentRepository) + .where( + AgentRepository.agent_repository_id == repository_id, + AgentRepository.publisher_tenant_id == publisher_tenant_id, + AgentRepository.delete_flag != "Y", + ) + .values(delete_flag="Y", updated_by=user_id) + ) + return int(result.rowcount or 0) + + +def list_agent_repository_by_publisher( + publisher_tenant_id: str, + *, + publisher_user_id: Optional[str] = None, +) -> List[dict]: + """List all repository listings published by a tenant.""" + with get_db_session() as session: + query = session.query(AgentRepository).filter( + AgentRepository.publisher_tenant_id == publisher_tenant_id, + AgentRepository.delete_flag != "Y", + ) + if publisher_user_id: + query = query.filter( + AgentRepository.publisher_user_id == publisher_user_id + ) + rows = query.order_by(AgentRepository.agent_repository_id.desc()).all() + return [as_dict(row) for row in rows] diff --git a/backend/database/agent_version_db.py b/backend/database/agent_version_db.py index 4df0158a8..c895cb249 100644 --- a/backend/database/agent_version_db.py +++ b/backend/database/agent_version_db.py @@ -1,9 +1,10 @@ import logging from typing import List, Optional, Tuple -from sqlalchemy import select, insert, update, func +from sqlalchemy import or_, select, insert, update, delete, func from database.client import get_db_session, as_dict from database.db_models import AgentInfo, ToolInstance, AgentRelation, AgentVersion, SkillInstance +from consts.const import ASSET_OWNER_TENANT_ID logger = logging.getLogger("agent_version_db") @@ -28,7 +29,6 @@ def search_version_by_version_no( with get_db_session() as session: version = session.query(AgentVersion).filter( AgentVersion.agent_id == agent_id, - AgentVersion.tenant_id == tenant_id, AgentVersion.version_no == version_no, AgentVersion.delete_flag == 'N', ).first() @@ -77,7 +77,10 @@ def query_current_version_no( with get_db_session() as session: agent = session.query(AgentInfo).filter( AgentInfo.agent_id == agent_id, - AgentInfo.tenant_id == tenant_id, + or_( + AgentInfo.tenant_id == tenant_id, + AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID, + ), AgentInfo.version_no == 0, AgentInfo.delete_flag == 'N', ).first() @@ -96,11 +99,17 @@ def query_agent_snapshot( # Query agent info snapshot agent = session.query(AgentInfo).filter( AgentInfo.agent_id == agent_id, - AgentInfo.tenant_id == tenant_id, + or_( + AgentInfo.tenant_id == tenant_id, + AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID, + ), AgentInfo.version_no == version_no, AgentInfo.delete_flag == 'N', ).first() + if agent is not None: + tenant_id = agent.tenant_id + # Query tool instances snapshot tools = session.query(ToolInstance).filter( ToolInstance.agent_id == agent_id, @@ -370,6 +379,96 @@ def delete_relation_snapshot( return result.rowcount +# ============== Restore Draft from Version Snapshot ============== +# Used by rollback: copies a published version's data back into draft (version_no=0) + +def restore_agent_draft( + agent_id: int, + tenant_id: str, + target_version_no: int, + target_agent_snapshot: dict, + target_tool_snapshots: List[dict], + target_relation_snapshots: List[dict], + target_skill_snapshots: List[dict], +) -> None: + """ + Atomically restore the agent draft (version_no=0) from a published version snapshot. + This replaces all draft data with the target version's data. + + Operations in a single transaction: + 1. Hard-delete current draft tools, relations, skills (version_no=0) to free up PK slots + 2. Update agent draft record with target version's agent data + 3. Bulk-insert tools copied from target version with version_no=0 + 4. Bulk-insert relations copied from target version with version_no=0 + 5. Bulk-insert skills copied from target version with version_no=0 + 6. Update current_version_no to point to target_version_no + """ + + with get_db_session() as session: + # 1. Hard-delete current draft tools to free up (tool_instance_id, version_no=0) keys + session.execute( + delete(ToolInstance).where( + ToolInstance.agent_id == agent_id, + ToolInstance.tenant_id == tenant_id, + ToolInstance.version_no == 0, + ) + ) + + # 2. Hard-delete current draft relations + session.execute( + delete(AgentRelation).where( + AgentRelation.parent_agent_id == agent_id, + AgentRelation.tenant_id == tenant_id, + AgentRelation.version_no == 0, + ) + ) + + # 3. Hard-delete current draft skills + session.execute( + delete(SkillInstance).where( + SkillInstance.agent_id == agent_id, + SkillInstance.tenant_id == tenant_id, + SkillInstance.version_no == 0, + ) + ) + + # 4. Update agent draft record with target version's data + draft_values = {k: v for k, v in target_agent_snapshot.items() + if k not in ('version_no', 'current_version_no')} + draft_values['current_version_no'] = target_version_no + session.execute( + update(AgentInfo) + .where( + AgentInfo.agent_id == agent_id, + AgentInfo.tenant_id == tenant_id, + AgentInfo.version_no == 0, + AgentInfo.delete_flag == 'N', + ) + .values(**draft_values) + ) + + # 5. Bulk-insert tools from target version (with version_no=0) + for tool in target_tool_snapshots: + tool_copy = {k: v for k, v in tool.items() + if k not in ('version_no',)} + tool_copy['version_no'] = 0 + session.execute(insert(ToolInstance).values(**tool_copy)) + + # 6. Bulk-insert relations from target version (with version_no=0) + for rel in target_relation_snapshots: + rel_copy = {k: v for k, v in rel.items() + if k not in ('version_no',)} + rel_copy['version_no'] = 0 + session.execute(insert(AgentRelation).values(**rel_copy)) + + # 7. Bulk-insert skills from target version (with version_no=0) + for skill in target_skill_snapshots: + skill_copy = {k: v for k, v in skill.items() + if k not in ('version_no',)} + skill_copy['version_no'] = 0 + session.execute(insert(SkillInstance).values(**skill_copy)) + + def delete_skill_snapshot( agent_id: int, tenant_id: str, diff --git a/backend/database/attachment_db.py b/backend/database/attachment_db.py index 1faabac23..06b84e5ac 100644 --- a/backend/database/attachment_db.py +++ b/backend/database/attachment_db.py @@ -2,9 +2,81 @@ import os import uuid from datetime import datetime -from typing import Any, BinaryIO, Dict, List, Optional +from typing import Any, BinaryIO, Dict, List, Optional, Tuple from .client import minio_client +from consts.const import S3_URL_PREFIX +from consts.const import NORTHBOUND_EXTERNAL_URL +from urllib.parse import quote + + +def _normalize_object_and_bucket(object_name: str, bucket: Optional[str] = None) -> Tuple[str, Optional[str]]: + """ + Normalize object_name + bucket from supported URL styles. + + Supports: + - s3://bucket/key + - /bucket/key + - key (uses provided bucket or default bucket) + """ + if not object_name: + return object_name, bucket + + if object_name.startswith(S3_URL_PREFIX): + s3_path = object_name[len(S3_URL_PREFIX) :] + parts = s3_path.split("/", 1) + parsed_bucket = parts[0] if parts[0] else None + parsed_key = parts[1] if len(parts) > 1 else "" + return parsed_key, parsed_bucket or bucket + + if object_name.startswith("/"): + path = object_name.lstrip("/") + parts = path.split("/", 1) + parsed_bucket = parts[0] if parts[0] else None + parsed_key = parts[1] if len(parts) > 1 else "" + return parsed_key, parsed_bucket or bucket + + return object_name, bucket + + +def build_s3_url(object_name: str, bucket: Optional[str] = None) -> str: + """ + Build an s3://bucket/key style URL from an object name (or passthrough if already s3://). + """ + if not object_name: + return "" + + if object_name.startswith(S3_URL_PREFIX): + return object_name + + if object_name.startswith("/"): + path = object_name.lstrip("/") + parts = path.split("/", 1) + if len(parts) == 2: + return f"{S3_URL_PREFIX}{parts[0]}/{parts[1]}" + return f"{S3_URL_PREFIX}{parts[0]}/" + + resolved_bucket = bucket or minio_client.default_bucket + if resolved_bucket: + return f"{S3_URL_PREFIX}{resolved_bucket}/{object_name}" + return f"{S3_URL_PREFIX}{object_name}" + + +def _build_mcp_presigned_url(presigned_url: str) -> str: + """ + Build northbound API proxy URL for MCP tools. + + Args: + presigned_url: Original MinIO presigned URL + + Returns: + str: URL wrapped with northbound API proxy, with presigned_url URL-encoded + """ + if not presigned_url: + return "" + # URL-encode the presigned_url before embedding it as a query parameter + encoded_presigned_url = quote(presigned_url, safe='') + return f"{NORTHBOUND_EXTERNAL_URL}/nb/v1/file/fetch?presigned_url={encoded_presigned_url}" def generate_object_name(file_name: str, prefix: str = "attachments") -> str: @@ -28,7 +100,13 @@ def generate_object_name(file_name: str, prefix: str = "attachments") -> str: return f"{prefix}/{timestamp}_{unique_id}{ext}" -def upload_file(file_path: str, object_name: Optional[str] = None, bucket: Optional[str] = None) -> Dict[str, Any]: +def upload_file( + file_path: str, + object_name: Optional[str] = None, + bucket: Optional[str] = None, + generate_presigned_url: bool = True, + presigned_url_expires: int = 86400 +) -> Dict[str, Any]: """ Upload local file to MinIO @@ -36,6 +114,8 @@ def upload_file(file_path: str, object_name: Optional[str] = None, bucket: Optio file_path: Local file path object_name: Object name, if not specified will be auto-generated bucket: Bucket name, if not specified will use default bucket + generate_presigned_url: Whether to generate presigned URL for external access (default True) + presigned_url_expires: Expiration time in seconds for presigned URL (default 86400 = 24 hours) Returns: Dict[str, Any]: Upload result, containing success flag, URL and error message (if any) @@ -55,6 +135,12 @@ def upload_file(file_path: str, object_name: Optional[str] = None, bucket: Optio if success: response["url"] = result + # Generate presigned URL for external access if requested + if generate_presigned_url: + presigned_result = get_file_url(object_name, bucket, presigned_url_expires) + if presigned_result.get("success"): + # Only expose MCP URL (with proxy prefix), not raw MinIO URL + response["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"]) else: response["error"] = result @@ -65,7 +151,10 @@ def upload_fileobj( file_obj: BinaryIO, file_name: str, bucket: Optional[str] = None, - prefix: str = "attachments" + prefix: str = "attachments", + generate_presigned_url: bool = True, + presigned_url_expires: int = 86400, + file_size: Optional[int] = None ) -> Dict[str, Any]: """ Upload file object to MinIO @@ -75,6 +164,9 @@ def upload_fileobj( file_name: File name bucket: Bucket name, if not specified will use default bucket prefix: Object name prefix, default is "attachments" + generate_presigned_url: Whether to generate presigned URL for external access (default True) + presigned_url_expires: Expiration time in seconds for presigned URL (default 86400 = 24 hours) + file_size: Pre-calculated file size in bytes. If not provided, will be calculated internally. Returns: Dict[str, Any]: Upload result, containing success flag, URL and error message (if any) @@ -82,26 +174,39 @@ def upload_fileobj( # Generate object name object_name = generate_object_name(file_name, prefix=prefix) - # Get current position - current_pos = file_obj.tell() - - # Calculate file size - file_obj.seek(0, os.SEEK_END) - file_size = file_obj.tell() - - # Reset to original position - file_obj.seek(current_pos) + # Calculate file size if not provided + if file_size is None: + try: + current_pos = file_obj.tell() + file_obj.seek(0, os.SEEK_END) + file_size = file_obj.tell() + file_obj.seek(0) # Seek to beginning for upload + except (ValueError, IOError): + file_size = 0 + file_obj.seek(0) # Try to seek to beginning anyway # Upload file success, result = minio_client.upload_fileobj( file_obj, object_name, bucket) + # Restore original position (if file is still open) + try: + file_obj.seek(0) + except (ValueError, IOError): + pass # File is closed, ignore + # Build response response = {"success": success, "object_name": object_name, "file_name": file_name, "file_size": file_size, "content_type": get_content_type(file_name), "upload_time": datetime.now().isoformat()} if success: response["url"] = result + # Generate presigned URL for external access if requested + if generate_presigned_url: + presigned_result = get_file_url(object_name, bucket, presigned_url_expires) + if presigned_result.get("success"): + # Only expose MCP URL (with proxy prefix), not raw MinIO URL + response["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"]) else: response["error"] = result @@ -134,14 +239,14 @@ def download_file(object_name: str, file_path: str, bucket: Optional[str] = None return response -def get_file_url(object_name: str, bucket: Optional[str] = None, expires: int = 3600) -> Dict[str, Any]: +def get_file_url(object_name: str, bucket: Optional[str] = None, expires: int = 86400) -> Dict[str, Any]: """ Get presigned URL for file Args: object_name: Object name bucket: Bucket name, if not specified will use default bucket - expires: URL expiration time in seconds + expires: URL expiration time in seconds (default 86400 = 24 hours) Returns: Dict[str, Any]: Result containing success flag, URL and error message (if any) @@ -165,6 +270,9 @@ def get_file_size_from_minio(object_name: str, bucket: Optional[str] = None) -> """ Get file size by object name """ + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) + # Ensure minio_client is initialized before accessing storage_config + minio_client._ensure_initialized() bucket = bucket or minio_client.storage_config.default_bucket return minio_client.get_file_size(object_name, bucket) @@ -172,15 +280,16 @@ def get_file_size_from_minio(object_name: str, bucket: Optional[str] = None) -> def file_exists(object_name: str, bucket: Optional[str] = None) -> bool: """ Check if a file exists in the bucket. - + Args: object_name: Object name in storage bucket: Bucket name, if not specified will use default bucket - + Returns: bool: True if file exists, False otherwise """ try: + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) return minio_client.file_exists(object_name, bucket) except Exception: return False @@ -189,15 +298,17 @@ def file_exists(object_name: str, bucket: Optional[str] = None) -> bool: def copy_file(source_object: str, dest_object: str, bucket: Optional[str] = None) -> Dict[str, Any]: """ Copy a file within the same bucket (atomic operation in MinIO). - + Args: source_object: Source object name dest_object: Destination object name bucket: Bucket name, if not specified will use default bucket - + Returns: Dict[str, Any]: Result containing success flag and error message (if any) """ + source_object, bucket = _normalize_object_and_bucket(source_object, bucket) + dest_object, bucket = _normalize_object_and_bucket(dest_object, bucket) success, result = minio_client.copy_file(source_object, dest_object, bucket) if success: return {"success": True, "object_name": result} @@ -223,8 +334,8 @@ def list_files(prefix: str = "", bucket: Optional[str] = None) -> List[Dict[str, for file in files: file["content_type"] = get_content_type(file["key"]) - # Get presigned URL (valid for 1 hour) - success, url = minio_client.get_file_url(file["key"], bucket, 3600) + # Get presigned URL (valid for 24 hours) + success, url = minio_client.get_file_url(file["key"], bucket, 86400) if success: file["url"] = url @@ -242,7 +353,9 @@ def delete_file(object_name: str, bucket: Optional[str] = None) -> Dict[str, Any Returns: Dict[str, Any]: Delete result, containing success flag and error message (if any) """ + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) if not bucket: + minio_client._ensure_initialized() bucket = minio_client.storage_config.default_bucket success, result = minio_client.delete_file(object_name, bucket) @@ -265,6 +378,7 @@ def get_file_stream(object_name: str, bucket: Optional[str] = None) -> Optional[ Returns: Optional[BinaryIO]: Standard BinaryIO stream object, or None if failed """ + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) success, result = minio_client.get_file_stream(object_name, bucket) if not success: return None @@ -341,6 +455,7 @@ def get_content_type(file_path: str) -> str: '.html': 'text/html', '.htm': 'text/html', '.json': 'application/json', + '.epub': 'application/epub', '.xml': 'application/xml', '.zip': 'application/zip', '.rar': 'application/x-rar-compressed', diff --git a/backend/database/cas_session_db.py b/backend/database/cas_session_db.py new file mode 100644 index 000000000..57d1aa8ea --- /dev/null +++ b/backend/database/cas_session_db.py @@ -0,0 +1,134 @@ +""" +Database operations for CAS-backed web sessions. +""" + +from datetime import datetime +from typing import Any, Dict, Optional + +from database.client import as_dict, get_db_session +from database.db_models import UserCasSession + +CAS_SESSION_ACTIVE = "active" +CAS_SESSION_REVOKED = "revoked" + + +def create_cas_session( + *, + session_id: str, + user_id: str, + cas_user_id: str, + expires_at: datetime, + cas_session_index: Optional[str] = None, +) -> Dict[str, Any]: + with get_db_session() as session: + record = UserCasSession( + session_id=session_id, + user_id=user_id, + cas_user_id=cas_user_id, + cas_session_index=cas_session_index, + status=CAS_SESSION_ACTIVE, + expires_at=expires_at, + created_by=user_id, + updated_by=user_id, + ) + session.add(record) + session.flush() + return as_dict(record) + + +def get_cas_session_by_session_id(session_id: str) -> Optional[Dict[str, Any]]: + if not session_id: + return None + with get_db_session() as session: + result = ( + session.query(UserCasSession) + .filter( + UserCasSession.session_id == session_id, + UserCasSession.delete_flag == "N", + ) + .first() + ) + return as_dict(result) if result else None + + +def is_cas_session_active(session_id: str) -> bool: + if not session_id: + return False + with get_db_session() as session: + result = ( + session.query(UserCasSession) + .filter( + UserCasSession.session_id == session_id, + UserCasSession.status == CAS_SESSION_ACTIVE, + UserCasSession.expires_at > datetime.now(), + UserCasSession.delete_flag == "N", + ) + .first() + ) + return result is not None + + +def revoke_cas_session_by_session_id(session_id: str, actor: str = "cas") -> int: + if not session_id: + return 0 + with get_db_session() as session: + result = ( + session.query(UserCasSession) + .filter( + UserCasSession.session_id == session_id, + UserCasSession.status == CAS_SESSION_ACTIVE, + UserCasSession.delete_flag == "N", + ) + .update( + { + "status": CAS_SESSION_REVOKED, + "revoked_at": datetime.now(), + "updated_by": actor, + } + ) + ) + return result + + +def revoke_cas_sessions_by_user_id(cas_user_id: str, actor: str = "cas") -> int: + if not cas_user_id: + return 0 + with get_db_session() as session: + result = ( + session.query(UserCasSession) + .filter( + UserCasSession.cas_user_id == cas_user_id, + UserCasSession.status == CAS_SESSION_ACTIVE, + UserCasSession.delete_flag == "N", + ) + .update( + { + "status": CAS_SESSION_REVOKED, + "revoked_at": datetime.now(), + "updated_by": actor, + } + ) + ) + return result + + +def revoke_cas_session_by_index(cas_session_index: str, actor: str = "cas") -> int: + if not cas_session_index: + return 0 + with get_db_session() as session: + result = ( + session.query(UserCasSession) + .filter( + UserCasSession.cas_session_index == cas_session_index, + UserCasSession.status == CAS_SESSION_ACTIVE, + UserCasSession.delete_flag == "N", + ) + .update( + { + "status": CAS_SESSION_REVOKED, + "revoked_at": datetime.now(), + "updated_by": actor, + } + ) + ) + return result diff --git a/backend/database/client.py b/backend/database/client.py index 9b0b97a52..e095c5636 100644 --- a/backend/database/client.py +++ b/backend/database/client.py @@ -89,6 +89,9 @@ def __init__(self): if MinioClient._initialized: return MinioClient._initialized = True + # Explicitly initialize attributes so external callers never hit missing-attribute errors. + self._storage_client = None + self.storage_config = None def _ensure_initialized(self): """Lazily initialize the storage client on first use.""" @@ -108,6 +111,23 @@ def _ensure_initialized(self): return True return False + @property + def default_bucket(self) -> Optional[str]: + """ + Resolve default bucket safely for callers that need bucket info. + Falls back to configured constant when lazy init has not run yet. + """ + try: + self._ensure_initialized() + except Exception: + # Keep this accessor resilient; operational methods can still raise + # detailed storage errors when invoked. + pass + + if getattr(self, "storage_config", None) is not None: + return self.storage_config.default_bucket + return MINIO_DEFAULT_BUCKET + def upload_file( self, file_path: str, @@ -158,14 +178,14 @@ def download_file(self, object_name: str, file_path: str, bucket: Optional[str] self._ensure_initialized() return self._storage_client.download_file(object_name, file_path, bucket) - def get_file_url(self, object_name: str, bucket: Optional[str] = None, expires: int = 3600) -> Tuple[bool, str]: + def get_file_url(self, object_name: str, bucket: Optional[str] = None, expires: int = 86400) -> Tuple[bool, str]: """ Get presigned URL for file Args: object_name: Object name bucket: Bucket name, if not specified use default bucket - expires: URL expiration time in seconds + expires: URL expiration time in seconds (default 86400 = 24 hours) Returns: Tuple[bool, str]: (Success status, Presigned URL or error message) @@ -330,3 +350,51 @@ def filter_property(data, model_class): """ model_fields = model_class.__table__.columns.keys() return {key: value for key, value in data.items() if key in model_fields} + + +# --------------------------------------------------------------------------- +# Monitoring-specific, isolated engine and session management +# --------------------------------------------------------------------------- +# Internal engine and session maker for monitoring data, isolated from main pool +_monitoring_engine = None +_monitoring_session_maker = None + + +def _get_monitoring_engine(): + global _monitoring_engine, _monitoring_session_maker + if _monitoring_engine is None: + _monitoring_engine = create_engine( + "postgresql://", + connect_args={ + "host": POSTGRES_HOST, + "user": POSTGRES_USER, + "password": NEXENT_POSTGRES_PASSWORD, + "database": POSTGRES_DB, + "port": POSTGRES_PORT, + "client_encoding": "utf8", + }, + echo=False, + pool_size=3, + pool_pre_ping=True, + pool_timeout=30, + ) + _monitoring_session_maker = sessionmaker(bind=_monitoring_engine) + return _monitoring_engine + + +@contextmanager +def get_monitoring_db_session(db_session=None): + _get_monitoring_engine() + session = _monitoring_session_maker() if db_session is None else db_session + try: + yield session + if db_session is None: + session.commit() + except Exception as e: + if db_session is None: + session.rollback() + logger.error(f"Monitoring database operation failed: {str(e)}") + raise + finally: + if db_session is None: + session.close() diff --git a/backend/database/community_mcp_db.py b/backend/database/community_mcp_db.py new file mode 100644 index 000000000..92b78a4ed --- /dev/null +++ b/backend/database/community_mcp_db.py @@ -0,0 +1,181 @@ +import logging +from typing import Any, Dict, List + +from sqlalchemy import func, or_ + +from database.client import as_dict, filter_property, get_db_session +from database.db_models import McpCommunityRecord + +logger = logging.getLogger("community_mcp_db") + + +def get_mcp_community_records( + *, + search: str | None = None, + tag: str | None = None, + transport_type: str | None = None, + cursor: str | None = None, + limit: int = 30, +) -> Dict[str, Any]: + with get_db_session() as session: + query = session.query(McpCommunityRecord).filter( + McpCommunityRecord.delete_flag != "Y" + ) + + if transport_type: + query = query.filter(McpCommunityRecord.transport_type == transport_type) + + if tag: + query = query.filter(McpCommunityRecord.tags.any(tag)) + + if search: + keyword = f"%{search}%" + query = query.filter( + or_( + McpCommunityRecord.mcp_name.ilike(keyword), + McpCommunityRecord.description.ilike(keyword), + func.array_to_string(McpCommunityRecord.tags, ",").ilike(keyword), + ) + ) + + cursor_id: int | None = None + if cursor: + try: + cursor_id = int(cursor) + except ValueError: + cursor_id = None + + if cursor_id is not None: + query = query.filter(McpCommunityRecord.community_id < cursor_id) + + rows: List[McpCommunityRecord] = ( + query.order_by(McpCommunityRecord.community_id.desc()) + .limit(limit + 1) + .all() + ) + + has_next = len(rows) > limit + page_rows = rows[:limit] + + next_cursor = None + if has_next and page_rows: + next_cursor = str(page_rows[-1].community_id) + + return { + "count": len(page_rows), + "nextCursor": next_cursor, + "items": [as_dict(row) for row in page_rows], + } + + +def get_mcp_community_tag_stats() -> List[Dict[str, Any]]: + with get_db_session() as session: + rows = ( + session.query( + func.unnest(McpCommunityRecord.tags).label("tag"), + func.count(McpCommunityRecord.community_id).label("count"), + ) + .filter( + McpCommunityRecord.delete_flag != "Y", + ) + .group_by("tag") + .order_by(func.count(McpCommunityRecord.community_id).desc(), "tag") + .all() + ) + return [{"tag": str(row.tag), "count": int(row.count)} for row in rows if row.tag] + + +def create_mcp_community_record(mcp_data: Dict[str, Any], tenant_id: str, user_id: str) -> int: + with get_db_session() as session: + mcp_data.update({ + "tenant_id": tenant_id, + "user_id": user_id, + "created_by": user_id, + "updated_by": user_id, + "delete_flag": "N", + "source": "community", + }) + new_record = McpCommunityRecord(**filter_property(mcp_data, McpCommunityRecord)) + session.add(new_record) + session.flush() + return int(new_record.community_id) + + +def get_mcp_community_record_by_id_and_tenant(community_id: int, tenant_id: str) -> Dict[str, Any] | None: + with get_db_session() as session: + record = session.query(McpCommunityRecord).filter( + McpCommunityRecord.community_id == community_id, + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).first() + return as_dict(record) if record else None + + +def update_mcp_community_record_by_id( + *, + community_id: int, + tenant_id: str, + user_id: str, + name: str | None = None, + description: str | None = None, + tags: List[str] | None = None, + version: str | None = None, + registry_json: Dict[str, Any] | None = None, + config_json: Dict[str, Any] | None = None, +) -> None: + update_fields: Dict[str, Any] = {"updated_by": user_id} + + if name is not None: + update_fields["mcp_name"] = name + if description is not None: + update_fields["description"] = description + if tags is not None: + update_fields["tags"] = tags + if version is not None: + update_fields["version"] = version + if registry_json is not None: + update_fields["registry_json"] = registry_json + if config_json is not None: + update_fields["config_json"] = config_json + + with get_db_session() as session: + session.query(McpCommunityRecord).filter( + McpCommunityRecord.community_id == community_id, + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).update(update_fields) + + +def delete_mcp_community_record_by_id(*, community_id: int, tenant_id: str, user_id: str) -> None: + with get_db_session() as session: + session.query(McpCommunityRecord).filter( + McpCommunityRecord.community_id == community_id, + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).update({"delete_flag": "Y", "updated_by": user_id}) + + +def list_mcp_community_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: + with get_db_session() as session: + rows = session.query(McpCommunityRecord).filter( + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).order_by(McpCommunityRecord.community_id.desc()).all() + return [as_dict(row) for row in rows] + +def get_mcp_community_tag_stats_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: + with get_db_session() as session: + rows = ( + session.query( + func.unnest(McpCommunityRecord.tags).label("tag"), + func.count(McpCommunityRecord.community_id).label("count"), + ) + .filter( + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ) + .group_by("tag") + .order_by(func.count(McpCommunityRecord.community_id).desc(), "tag") + .all() + ) + return [{"tag": str(row.tag), "count": int(row.count)} for row in rows if row.tag] diff --git a/backend/database/conversation_db.py b/backend/database/conversation_db.py index 18c0ee9fc..e401beda9 100644 --- a/backend/database/conversation_db.py +++ b/backend/database/conversation_db.py @@ -623,9 +623,18 @@ def get_conversation_history(conversation_id: int, user_id: Optional[str] = None } +def _image_exists(session, message_id: int, image_url: str) -> bool: + stmt = select(ConversationSourceImage).where( + ConversationSourceImage.message_id == message_id, + ConversationSourceImage.image_url == image_url, + ConversationSourceImage.delete_flag == 'N' + ).limit(1) + return session.execute(stmt).scalar_one_or_none() is not None + + def create_source_image(image_data: Dict[str, Any], user_id: Optional[str] = None) -> int: """ - Create image source reference + Create image source reference (skips if the same message_id + image_url already exists). Args: image_data: Dictionary containing image data, must include the following fields: @@ -634,17 +643,22 @@ def create_source_image(image_data: Dict[str, Any], user_id: Optional[str] = Non user_id: Reserved parameter for created_by and updated_by fields Returns: - int: Newly created image ID (auto-increment ID) + int: Newly created image ID (auto-increment ID), or -1 if skipped due to duplicate """ with get_db_session() as session: # Ensure message_id is of integer type message_id = int(image_data['message_id']) + image_url = image_data['image_url'] + + # Skip duplicate: same message_id + image_url already in DB + if _image_exists(session, message_id, image_url): + return -1 # Prepare data dictionary data = { "message_id": message_id, "conversation_id": image_data.get('conversation_id'), - "image_url": image_data['image_url'], + "image_url": image_url, "delete_flag": 'N', # Use the database's CURRENT_TIMESTAMP function "create_time": func.current_timestamp() @@ -1016,3 +1030,71 @@ def get_message_id_by_index(conversation_id: int, message_index: int) -> Optiona result = session.execute(stmt).scalar() return result + + +def get_latest_assistant_message_id(conversation_id: int, user_id: Optional[str] = None) -> Optional[int]: + """ + Get the most recent assistant message ID for a conversation. + + Args: + conversation_id: Conversation ID (integer) + user_id: Optional user ID for ownership check + + Returns: + Optional[int]: The latest assistant message ID, or None if not found + """ + with get_db_session() as session: + conversation_id = int(conversation_id) + + stmt = select(ConversationMessage.message_id).where( + ConversationMessage.conversation_id == conversation_id, + ConversationMessage.delete_flag == 'N', + ConversationMessage.message_role == 'assistant' + ).order_by(desc(ConversationMessage.message_index)).limit(1) + + if user_id: + stmt = stmt.join( + ConversationRecord, + ConversationMessage.conversation_id == ConversationRecord.conversation_id + ).where(ConversationRecord.created_by == user_id) + + result = session.execute(stmt).scalar() + return result + + +def update_message_minio_files(message_id: int, skill_file_uploads: List[Dict[str, Any]]) -> bool: + """ + Merge skill file uploads into an existing message's minio_files field. + + Args: + message_id: Message ID to update + skill_file_uploads: List of skill file upload metadata dicts to append + + Returns: + bool: True if the message was updated, False if the message was not found + """ + with get_db_session() as session: + message_id = int(message_id) + + stmt = select(ConversationMessage).where( + ConversationMessage.message_id == message_id, + ConversationMessage.delete_flag == 'N' + ) + record = session.scalars(stmt).first() + if not record: + return False + + existing = record.minio_files + if existing: + try: + if isinstance(existing, str): + existing = json.loads(existing) + except (json.JSONDecodeError, TypeError): + existing = [] + else: + existing = [] + + existing.extend(skill_file_uploads) + record.minio_files = json.dumps(existing, ensure_ascii=False) + + return True diff --git a/backend/database/db_models.py b/backend/database/db_models.py index 3741dd559..5450b5f74 100644 --- a/backend/database/db_models.py +++ b/backend/database/db_models.py @@ -1,5 +1,5 @@ -from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, ForeignKeyConstraint, Integer, JSON, Numeric, PrimaryKeyConstraint, Sequence, String, Text, TIMESTAMP, UniqueConstraint -from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy import BigInteger, Boolean, Column, Integer, JSON, Numeric, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float, text +from sqlalchemy.dialects.postgresql import ARRAY, JSONB from sqlalchemy.orm import DeclarativeBase from sqlalchemy.sql import func @@ -15,6 +15,8 @@ _TENANT_ID_DOC = "Tenant ID for multi-tenancy isolation" # Base class for tables without audit fields + + class SimpleTableBase(DeclarativeBase): pass @@ -178,6 +180,90 @@ class ModelRecord(TableBase): Boolean, default=True, doc="Whether to verify SSL certificates when connecting to this model API. Default is true. Set to false for local services without SSL support.") chunk_batch = Column( Integer, doc="Batch size for concurrent embedding requests during document chunking") + model_appid = Column( + String(100), doc="Application ID for model authentication (used by some STT/TTS providers like Volcano Engine)") + access_token = Column( + String(100), doc="Access token for model authentication (used by some STT/TTS providers like Volcano Engine)") + timeout_seconds = Column( + Integer, doc="Request timeout in seconds for this model. Default is 120 seconds.") + concurrency_limit = Column( + Integer, doc="Maximum concurrent requests for this model. Default is null (unlimited).") + + +class ModelMonitoringRecord(SimpleTableBase): + """ + Model monitoring record table - stores per-request LLM performance metrics. + Uses SimpleTableBase to avoid audit fields (created_by, updated_by, etc.). + """ + + __tablename__ = "model_monitoring_record_t" + __table_args__ = ( + Index("ix_monitoring_model_id", "model_id"), + Index("ix_monitoring_tenant_id", "tenant_id"), + Index("ix_monitoring_agent_id", "agent_id"), + Index("ix_monitoring_create_time", "create_time"), + Index("ix_monitoring_is_error", "is_error"), + Index("ix_monitoring_model_time", "model_id", "create_time"), + Index("ix_monitoring_model_type", "model_type"), + {"schema": SCHEMA}, + ) + + monitoring_id = Column( + Integer, + Sequence("model_monitoring_record_t_monitoring_id_seq", schema=SCHEMA), + primary_key=True, + nullable=False, + doc="Monitoring record ID, auto-increment primary key", + ) + model_id = Column( + Integer, doc="Model ID, foreign key reference to model_record_t.model_id" + ) + model_name = Column( + String(100), nullable=False, doc="Model name at the time of the request" + ) + agent_id = Column(Integer, doc="Agent ID that initiated the request") + agent_name = Column( + String(100), doc="Agent name at the time of the request") + conversation_id = Column( + Integer, doc="Conversation ID associated with this request" + ) + tenant_id = Column( + String(100), nullable=False, doc="Tenant ID for multi-tenant isolation" + ) + user_id = Column(String(100), doc="User ID who initiated the request") + request_duration_ms = Column( + Integer, doc="Total request duration in milliseconds") + ttft_ms = Column(Integer, doc="Time to first token in milliseconds") + input_tokens = Column(Integer, doc="Number of input tokens") + output_tokens = Column(Integer, doc="Number of output tokens") + total_tokens = Column(Integer, doc="Total tokens (input + output)") + generation_rate = Column( + Float, doc="Token generation rate (tokens per second)") + is_streaming = Column( + Boolean, default=False, doc="Whether the request used streaming" + ) + is_success = Column( + Boolean, default=True, doc="Whether the request completed successfully" + ) + is_error = Column( + Boolean, default=False, doc="Whether the request resulted in an error" + ) + error_type = Column( + String(50), doc="Error type classification (e.g., auth_error, rate_limit)" + ) + error_message = Column(Text, doc="Error message details") + retry_count = Column(Integer, default=0, doc="Number of retry attempts") + operation = Column( + String(50), doc="Operation type (e.g., llm_completion, llm_chat)" + ) + create_time = Column( + TIMESTAMP(timezone=False), server_default=func.now(), doc="Record creation time" + ) + delete_flag = Column(String(1), default="N", doc="Soft delete flag: Y/N") + display_name = Column(String(200), doc="User-facing model display name") + model_type = Column( + String(20), default="llm", doc="Model type: llm, embedding, multi_embedding" + ) class ToolInfo(TableBase): @@ -213,13 +299,16 @@ class AgentInfo(TableBase): agent_id = Column(Integer, Sequence( "ag_tenant_agent_t_agent_id_seq", schema=SCHEMA), nullable=False, primary_key=True, autoincrement=True, doc="ID") - version_no = Column(Integer, default=0, nullable=False, primary_key=True, doc="Version number. 0 = draft/editing state, >=1 = published snapshot") + version_no = Column(Integer, default=0, nullable=False, primary_key=True, + doc="Version number. 0 = draft/editing state, >=1 = published snapshot") name = Column(String(100), doc="Agent name") display_name = Column(String(100), doc="Agent display name") description = Column(Text, doc="Description") author = Column(String(100), doc="Agent author") - model_name = Column(String(100), doc="[DEPRECATED] Name of the model used, use model_id instead") - model_id = Column(Integer, doc="Model ID, foreign key reference to model_record_t.model_id") + model_name = Column( + String(100), doc="[DEPRECATED] Name of the model used, use model_id instead") + model_id = Column( + Integer, doc="Model ID, foreign key reference to model_record_t.model_id") max_steps = Column(Integer, doc="Maximum number of steps") duty_prompt = Column(Text, doc="Duty prompt content") constraint_prompt = Column(Text, doc="Constraint prompt content") @@ -231,12 +320,60 @@ class AgentInfo(TableBase): Boolean, doc="Whether to provide the running summary to the manager agent") business_description = Column( Text, doc="Manually entered by the user to describe the entire business process") - business_logic_model_name = Column(String(100), doc="Model name used for business logic prompt generation") - business_logic_model_id = Column(Integer, doc="Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id") + business_logic_model_name = Column( + String(100), doc="Model name used for business logic prompt generation") + business_logic_model_id = Column( + Integer, doc="Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id") + prompt_template_id = Column( + Integer, doc="Prompt template ID used for business logic prompt generation") + prompt_template_name = Column(String( + 100), doc="Prompt template name used for business logic prompt generation") group_ids = Column(String, doc="Agent group IDs list") is_new = Column(Boolean, default=False, doc="Whether this agent is marked as new for the user") current_version_no = Column(Integer, nullable=True, doc="Current published version number. NULL means no version published yet") ingroup_permission = Column(String(30), doc="In-group permission: EDIT, READ_ONLY, PRIVATE") + enable_context_manager = Column(Boolean, default=False, doc="Whether to enable context management (compression) for this agent") + verification_config = Column(JSONB, doc="Layered ReAct self-verification configuration") + greeting_message = Column(Text, doc="Agent greeting message displayed on chat initial screen") + example_questions = Column(JSONB, doc="List of example questions for starting a conversation with this agent") + + +class PromptTemplate(TableBase): + """ + Prompt template table for user-defined prompt generation templates. + """ + __tablename__ = "ag_prompt_template_t" + __table_args__ = ( + Index( + "uq_prompt_template_user_name_active", + "tenant_id", + "user_id", + "template_name", + unique=True, + postgresql_where=text("delete_flag = 'N'"), + ), + Index( + "idx_ag_prompt_template_t_user", + "tenant_id", + "user_id", + "template_type", + postgresql_where=text("delete_flag = 'N'"), + ), + {"schema": SCHEMA}, + ) + + template_id = Column(Integer, Sequence( + "ag_prompt_template_t_template_id_seq", schema=SCHEMA), primary_key=True, nullable=False, autoincrement=True, doc="Prompt template ID") + template_name = Column(String(100), nullable=False, + doc="Prompt template name") + description = Column(String(500), doc="Prompt template description") + template_type = Column(String(50), nullable=False, + default="agent_generate", doc="Prompt template type") + tenant_id = Column(String(100), nullable=False, doc="Tenant ID") + user_id = Column(String(100), nullable=False, doc="User ID") + template_content_zh = Column( + JSONB, nullable=False, doc="Chinese prompt template content") + template_content_en = Column(JSONB, doc="English prompt template content") class ToolInstance(TableBase): @@ -259,7 +396,8 @@ class ToolInstance(TableBase): user_id = Column(String(100), doc="User ID") tenant_id = Column(String(100), doc="Tenant ID") enabled = Column(Boolean, doc="Enabled") - version_no = Column(Integer, default=0, primary_key=True, nullable=False, doc="Version number. 0 = draft/editing state, >=1 = published snapshot") + version_no = Column(Integer, default=0, primary_key=True, nullable=False, + doc="Version number. 0 = draft/editing state, >=1 = published snapshot") class KnowledgeRecord(TableBase): @@ -275,11 +413,25 @@ class KnowledgeRecord(TableBase): knowledge_name = Column(String(100), doc="User-facing knowledge base name") knowledge_describe = Column(String(3000), doc="Knowledge base description") knowledge_sources = Column(String(300), doc="Knowledge base sources") - embedding_model_name = Column(String(200), doc="Embedding model name, used to record the embedding model used by the knowledge base") + embedding_model_name = Column(String( + 200), doc="Embedding model name, used to record the embedding model used by the knowledge base") + embedding_model_id = Column( + Integer, doc="Embedding model ID, foreign key reference to model_record_t.model_id") tenant_id = Column(String(100), doc="Tenant ID") group_ids = Column(String, doc="Knowledge base group IDs list") ingroup_permission = Column( String(30), doc="In-group permission: EDIT, READ_ONLY, PRIVATE") + summary_frequency = Column(String(10), nullable=True, + doc="Auto-summary frequency: '3h', '5h', '1d', '1w', or NULL (disabled)") + last_summary_time = Column(TIMESTAMP(timezone=False), nullable=True, + doc="Timestamp of last summary generation") + last_doc_update_time = Column(TIMESTAMP(timezone=False), nullable=True, + doc="Timestamp of last document add/delete operation") + preserve_source_file = Column( + Boolean, + default=True, + doc="Whether to preserve uploaded source documents after vectorization", + ) class TenantConfig(TableBase): @@ -338,11 +490,54 @@ class McpRecord(TableBase): String(200), doc="Docker container ID for MCP service, None for non-containerized MCP", ) + container_port = Column( + Integer, + doc="Host port bound for containerized MCP service", + ) authorization_token = Column( String(500), doc="Authorization token for MCP server authentication (e.g., Bearer token)", default=None, ) + custom_headers = Column( + JSON, + doc="Custom HTTP headers as JSON object for MCP server requests", + default=None, + ) + source = Column( + String(30), doc="Source type: local/mcp_registry/community") + registry_json = Column(JSONB, doc="Full MCP registry server.json snapshot") + config_json = Column(JSON, doc="MCP config data") + enabled = Column(Boolean, default=True, doc="Enabled") + tags = Column(ARRAY(Text), doc="Tags") + description = Column(Text, doc="Description") + + +class McpCommunityRecord(TableBase): + """Community MCP market records table.""" + + __tablename__ = "mcp_community_record_t" + __table_args__ = {"schema": SCHEMA} + + community_id = Column( + Integer, + Sequence("mcp_community_record_t_community_id_seq", schema=SCHEMA), + primary_key=True, + nullable=False, + doc="Community record ID, unique primary key", + ) + tenant_id = Column(String(100), doc="Publisher tenant ID") + user_id = Column(String(100), doc="Publisher user ID") + mcp_name = Column(String(100), doc="MCP name") + mcp_server = Column(String(500), doc="MCP server URL") + source = Column(String(30), doc="Source type, fixed to community") + version = Column(String(50), doc="MCP version") + registry_json = Column(JSONB, doc="Full MCP metadata JSON") + transport_type = Column( + String(30), doc="Transport type: http/sse/container") + config_json = Column(JSON, doc="Public-shareable MCP configuration JSON") + tags = Column(ARRAY(Text), doc="Tags") + description = Column(Text, doc="Description") class UserTenant(TableBase): @@ -356,7 +551,8 @@ class UserTenant(TableBase): primary_key=True, nullable=False, doc="User tenant relationship ID, unique primary key") user_id = Column(String(100), nullable=False, doc="User ID") tenant_id = Column(String(100), nullable=False, doc="Tenant ID") - user_role = Column(String(30), doc="User role: SUPER_ADMIN, ADMIN, DEV, USER") + user_role = Column( + String(30), doc="User role: SUPER_ADMIN, ADMIN, DEV, USER") user_email = Column(String(255), doc="User email address") @@ -367,11 +563,18 @@ class AgentRelation(TableBase): __tablename__ = "ag_agent_relation_t" __table_args__ = {"schema": SCHEMA} - relation_id = Column(Integer, Sequence("ag_agent_relation_t_relation_id_seq", schema=SCHEMA), primary_key=True, nullable=False, doc="Relationship ID, primary key") - selected_agent_id = Column(Integer, primary_key=True, doc="Selected agent ID") + relation_id = Column(Integer, Sequence("ag_agent_relation_t_relation_id_seq", schema=SCHEMA), + primary_key=True, nullable=False, doc="Relationship ID, primary key") + selected_agent_id = Column( + Integer, primary_key=True, doc="Selected agent ID") parent_agent_id = Column(Integer, doc="Parent agent ID") tenant_id = Column(String(100), doc="Tenant ID") - version_no = Column(Integer, default=0, nullable=False, doc="Version number. 0 = draft/editing state, >=1 = published snapshot") + version_no = Column(Integer, default=0, nullable=False, + doc="Version number. 0 = draft/editing state, >=1 = published snapshot") + selected_agent_version_no = Column( + Integer, nullable=True, + doc="Pinned version of selected_agent_id. NULL = runtime fallback to child current_version_no", + ) class PartnerMappingId(TableBase): @@ -487,12 +690,51 @@ class AgentVersion(TableBase): primary_key=True, nullable=False, doc=_PRIMARY_KEY_DOC) tenant_id = Column(String(100), nullable=False, doc="Tenant ID") agent_id = Column(Integer, nullable=False, doc="Agent ID") - version_no = Column(Integer, nullable=False, doc="Version number, starts from 1. Does not include 0 (draft)") - version_name = Column(String(100), doc="User-defined version name for display") + version_no = Column(Integer, nullable=False, + doc="Version number, starts from 1. Does not include 0 (draft)") + version_name = Column( + String(100), doc="User-defined version name for display") release_note = Column(Text, doc="Release notes / publish remarks") - source_version_no = Column(Integer, doc="Source version number. If this version is a rollback, record the source version") - source_type = Column(String(30), doc="Source type: NORMAL (normal publish) / ROLLBACK (rollback and republish)") - status = Column(String(30), default="RELEASED", doc="Version status: RELEASED / DISABLED / ARCHIVED") + source_version_no = Column( + Integer, doc="Source version number. If this version is a rollback, record the source version") + source_type = Column(String( + 30), doc="Source type: NORMAL (normal publish) / ROLLBACK (rollback and republish)") + status = Column(String(30), default="RELEASED", + doc="Version status: RELEASED / DISABLED / ARCHIVED") + is_a2a = Column(Boolean, default=False, + doc="Whether this version is published as an A2A Server agent") + + +class AgentRepository(TableBase): + """ + Agent repository (marketplace) table. Frozen snapshot of a published agent tree for sharing. + """ + __tablename__ = "ag_agent_repository_t" + __table_args__ = {"schema": SCHEMA} + + agent_repository_id = Column(BigInteger, Sequence("ag_agent_repository_t_agent_repository_id_seq", schema=SCHEMA), + primary_key=True, nullable=False, doc="Agent repository listing ID, unique primary key") + publisher_tenant_id = Column(String(100), nullable=False, doc="Publisher tenant ID") + publisher_user_id = Column(String(100), nullable=False, doc="Publisher user ID") + agent_id = Column(Integer, nullable=False, + doc="Root agent ID from ag_tenant_agent_t; upsert key") + source_version_no = Column(Integer, nullable=False, + doc="Published version number frozen at share time") + name = Column(String(100), nullable=False, + doc="Root agent programmatic name for display and search") + display_name = Column(String(100), doc="Root agent display name") + description = Column(Text, doc="Root agent description") + author = Column(String(100), doc="Agent author") + category_id = Column(Integer, doc="Optional marketplace category ID") + tags = Column(ARRAY(Text), doc="Marketplace tags") + tool_count = Column(Integer, + doc="Total tool count across all agents in the bundle (display only)") + version_label = Column(String(100), + doc="Repository entry version label for display (e.g. v1.0)") + agent_info_json = Column(JSONB, nullable=False, + doc="Frozen ExportAndImportDataFormat snapshot with optional skills") + status = Column(String(30), default="NOT_SHARED", + doc="Listing status: NOT_SHARED (未共享) / PENDING_REVIEW (待审核) / REJECTED (审核驳回) / SHARED (已共享)") class UserTokenInfo(TableBase): @@ -505,7 +747,8 @@ class UserTokenInfo(TableBase): token_id = Column(Integer, Sequence("user_token_info_t_token_id_seq", schema=SCHEMA), primary_key=True, nullable=False, doc="Token ID, unique primary key") access_key = Column(String(100), nullable=False, doc="Access Key (AK)") - user_id = Column(String(100), nullable=False, doc="User ID who owns this token") + user_id = Column(String(100), nullable=False, + doc="User ID who owns this token") class UserTokenUsageLog(TableBase): @@ -517,10 +760,68 @@ class UserTokenUsageLog(TableBase): token_usage_id = Column(Integer, Sequence("user_token_usage_log_t_token_usage_id_seq", schema=SCHEMA), primary_key=True, nullable=False, doc="Token usage log ID, unique primary key") - token_id = Column(Integer, nullable=False, doc="Foreign key to user_token_info_t.token_id") - call_function_name = Column(String(100), doc="API function name being called") - related_id = Column(Integer, doc="Related resource ID (e.g., conversation_id)") - meta_data = Column(JSONB, doc="Additional metadata for this usage log entry, stored as JSON") + token_id = Column(Integer, nullable=False, + doc="Foreign key to user_token_info_t.token_id") + call_function_name = Column( + String(100), doc="API function name being called") + related_id = Column( + Integer, doc="Related resource ID (e.g., conversation_id)") + meta_data = Column( + JSONB, doc="Additional metadata for this usage log entry, stored as JSON") + + +class UserOAuthAccount(TableBase): + __tablename__ = "user_oauth_account_t" + __table_args__ = ( + UniqueConstraint("provider", "provider_user_id", + name="uq_oauth_provider_user"), + {"schema": SCHEMA}, + ) + + oauth_account_id = Column( + Integer, + Sequence("user_oauth_account_t_oauth_account_id_seq", schema=SCHEMA), + primary_key=True, + nullable=False, + doc="OAuth account ID, primary key", + ) + user_id = Column(String(100), nullable=False, doc="Supabase user UUID") + provider = Column( + String(30), nullable=False, doc="OAuth provider name: github, wechat, gde, link_app" + ) + provider_user_id = Column( + String(200), nullable=False, doc="User ID from the OAuth provider" + ) + provider_email = Column( + String(255), doc="Email address from the OAuth provider") + provider_username = Column( + String(200), doc="Display name from the OAuth provider") + tenant_id = Column(String(100), doc="Tenant ID at time of linking") + + +class UserCasSession(TableBase): + __tablename__ = "user_cas_session_t" + __table_args__ = ( + Index("ix_user_cas_session_session_id", "session_id"), + Index("ix_user_cas_session_user_id", "user_id"), + Index("ix_user_cas_session_cas_user_id", "cas_user_id"), + {"schema": SCHEMA}, + ) + + cas_session_id = Column( + Integer, + Sequence("user_cas_session_t_cas_session_id_seq", schema=SCHEMA), + primary_key=True, + nullable=False, + doc="CAS session record ID", + ) + session_id = Column(String(100), nullable=False, unique=True, doc="JWT session ID") + user_id = Column(String(100), nullable=False, doc="Supabase user UUID") + cas_user_id = Column(String(200), nullable=False, doc="User ID from CAS") + cas_session_index = Column(String(500), doc="CAS SessionIndex or service ticket") + status = Column(String(30), nullable=False, default="active", doc="active/revoked") + expires_at = Column(TIMESTAMP(timezone=False), nullable=False, doc="Session expiration time") + revoked_at = Column(TIMESTAMP(timezone=False), doc="Revocation time") class SkillInfo(TableBase): @@ -532,11 +833,17 @@ class SkillInfo(TableBase): skill_id = Column(Integer, Sequence("ag_skill_info_t_skill_id_seq", schema=SCHEMA), primary_key=True, nullable=False, autoincrement=True, doc="Skill ID") - skill_name = Column(String(100), nullable=False, unique=True, doc="Unique skill name") + skill_name = Column(String(100), nullable=False, + unique=True, doc="Unique skill name") + tenant_id = Column(String(100), nullable=True, + doc="Tenant ID for multi-tenancy. NULL for pre-existing skills.") skill_description = Column(String(1000), doc="Skill description") skill_tags = Column(JSON, doc="Skill tags as JSON array") skill_content = Column(Text, doc="Skill content in markdown format") - params = Column(JSON, doc="Skill configuration parameters as JSON object") + config_schemas = Column( + JSON, doc="Parameter metadata from config/schema.yaml") + config_values = Column( + JSON, doc="Runtime parameter values from config/config.yaml") source = Column(String(30), nullable=False, default="official", doc="Skill source: official, custom, etc.") @@ -550,8 +857,10 @@ class SkillToolRelation(TableBase): rel_id = Column(Integer, Sequence("ag_skill_tools_rel_t_rel_id_seq", schema=SCHEMA), primary_key=True, nullable=False, autoincrement=True, doc="Relation ID") - skill_id = Column(Integer, nullable=False, doc="Foreign key to ag_skill_info_t.skill_id") - tool_id = Column(Integer, nullable=False, doc="Foreign key to ag_tool_info_t.tool_id") + skill_id = Column(Integer, nullable=False, + doc="Foreign key to ag_skill_info_t.skill_id") + tool_id = Column(Integer, nullable=False, + doc="Foreign key to ag_tool_info_t.tool_id") class SkillInstance(TableBase): @@ -570,12 +879,19 @@ class SkillInstance(TableBase): nullable=False, doc="Skill instance ID" ) - skill_id = Column(Integer, nullable=False, doc="Foreign key to ag_skill_info_t.skill_id") + skill_id = Column(Integer, nullable=False, + doc="Foreign key to ag_skill_info_t.skill_id") agent_id = Column(Integer, nullable=False, doc="Agent ID") user_id = Column(String(100), doc="User ID") tenant_id = Column(String(100), doc="Tenant ID") - enabled = Column(Boolean, default=True, doc="Whether this skill is enabled for the agent") - version_no = Column(Integer, default=0, primary_key=True, nullable=False, doc="Version number. 0 = draft/editing state, >=1 = published snapshot") + enabled = Column(Boolean, default=True, + doc="Whether this skill is enabled for the agent") + version_no = Column(Integer, default=0, primary_key=True, nullable=False, + doc="Version number. 0 = draft/editing state, >=1 = published snapshot") + config_values = Column( + JSON, doc="Per-agent runtime parameter values (mirrors ag_tool_instance_t.params)") + config_schemas = Column( + JSON, doc="Per-agent parameter schema overrides from config/schema.yaml") class OuterApiService(TableBase): @@ -588,13 +904,16 @@ class OuterApiService(TableBase): id = Column(BigInteger, Sequence("ag_outer_api_services_id_seq", schema=SCHEMA), primary_key=True, nullable=False, doc="Service ID, unique primary key") - mcp_service_name = Column(String(100), nullable=False, doc="MCP service name (unique identifier per tenant)") + mcp_service_name = Column(String(100), nullable=False, + doc="MCP service name (unique identifier per tenant)") description = Column(Text, doc="Service description from OpenAPI info") openapi_json = Column(JSONB, doc="Complete OpenAPI JSON specification") server_url = Column(String(500), doc="Base URL of the REST API server") headers_template = Column(JSONB, doc="Default headers template as JSON") - tenant_id = Column(String(100), nullable=False, doc="Tenant ID for multi-tenancy") - is_available = Column(Boolean, default=True, doc="Whether the service is available") + tenant_id = Column(String(100), nullable=False, + doc="Tenant ID for multi-tenancy") + is_available = Column(Boolean, default=True, + doc="Whether the service is available") # Alias for backward compatibility @@ -609,27 +928,37 @@ class A2ANacosConfig(TableBase): __tablename__ = "ag_a2a_nacos_config_t" __table_args__ = {"schema": SCHEMA} - id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC) - config_id = Column(String(64), unique=True, nullable=False, doc="Unique config identifier for API reference") + id = Column(BigInteger, primary_key=True, + autoincrement=True, doc=_PRIMARY_KEY_DOC) + config_id = Column(String(64), unique=True, nullable=False, + doc="Unique config identifier for API reference") # Nacos connection - nacos_addr = Column(String(512), nullable=False, doc="Nacos server address, e.g., http://nacos-server:8848") - nacos_username = Column(String(100), doc="Nacos username for authentication") - nacos_password = Column(String(256), doc="Nacos password, encrypted at rest") + nacos_addr = Column(String(512), nullable=False, + doc="Nacos server address, e.g., http://nacos-server:8848") + nacos_username = Column( + String(100), doc="Nacos username for authentication") + nacos_password = Column( + String(256), doc="Nacos password, encrypted at rest") # Discovery scope - namespace_id = Column(String(100), default="public", doc="Nacos namespace for service discovery") + namespace_id = Column(String(100), default="public", + doc="Nacos namespace for service discovery") # Metadata - name = Column(String(100), nullable=False, doc="Display name for this Nacos config") + name = Column(String(100), nullable=False, + doc="Display name for this Nacos config") description = Column(Text, doc="Description of this Nacos configuration") # Tenant isolation - tenant_id = Column(String(100), nullable=False, doc="Tenant ID for multi-tenancy") + tenant_id = Column(String(100), nullable=False, + doc="Tenant ID for multi-tenancy") # Status - is_active = Column(Boolean, default=True, doc="Whether this Nacos config is active") - last_scan_at = Column(TIMESTAMP(timezone=False), doc="Last time a scan was performed using this config") + is_active = Column(Boolean, default=True, + doc="Whether this Nacos config is active") + last_scan_at = Column(TIMESTAMP(timezone=False), + doc="Last time a scan was performed using this config") class A2AExternalAgent(TableBase): @@ -640,36 +969,49 @@ class A2AExternalAgent(TableBase): __tablename__ = "ag_a2a_external_agent_t" __table_args__ = {"schema": SCHEMA} - id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC) + id = Column(BigInteger, primary_key=True, + autoincrement=True, doc=_PRIMARY_KEY_DOC) # Agent metadata (cached from Agent Card) - name = Column(String(255), nullable=False, doc="Agent name from Agent Card") + name = Column(String(255), nullable=False, + doc="Agent name from Agent Card") description = Column(Text, doc="Agent description from Agent Card") - version = Column(String(50), doc="Agent version from Agent Card, e.g., 1.2.0") + version = Column( + String(50), doc="Agent version from Agent Card, e.g., 1.2.0") # Primary interface (extracted from supportedInterfaces for quick access) # In A2A 1.0, this should store the http-json-rpc URL - agent_url = Column(String(512), nullable=False, doc="Primary A2A endpoint URL (http-json-rpc by default)") + agent_url = Column(String(512), nullable=False, + doc="Primary A2A endpoint URL (http-json-rpc by default)") # Protocol type for calling this agent: JSONRPC, HTTP+JSON, GRPC - protocol_type = Column(String(20), default=PROTOCOL_JSONRPC, doc="Protocol type for calling this agent") + protocol_type = Column(String(20), default=PROTOCOL_JSONRPC, + doc="Protocol type for calling this agent") # Capabilities - streaming = Column(Boolean, default=False, doc="Whether this agent supports SSE streaming") + streaming = Column(Boolean, default=False, + doc="Whether this agent supports SSE streaming") # All supported interfaces (full JSON array from Agent Card) # Format: [{protocolBinding, url, protocolVersion}, ...] supported_interfaces = Column(JSON, doc="All supported interfaces array") # Source information - source_type = Column(String(20), nullable=False, doc="Discovery source: url or nacos") + source_type = Column(String(20), nullable=False, + doc="Discovery source: url or nacos") # For URL mode source_url = Column(String(512), doc="Direct URL to agent card") # For Nacos mode - nacos_config_id = Column(String(64), doc="Reference to Nacos config used for discovery") - nacos_agent_name = Column(String(255), doc="Original name used for Nacos query") + nacos_config_id = Column( + String(64), doc="Reference to Nacos config used for discovery") + nacos_agent_name = Column( + String(255), doc="Original name used for Nacos query") + + # Base URL for infrastructure health checks + base_url = Column(String( + 512), doc="Base URL for health checks (service root address), e.g., http://agent:8080") # Tenant isolation tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC) @@ -678,13 +1020,18 @@ class A2AExternalAgent(TableBase): raw_card = Column(JSON, doc="Full original Agent Card JSON from discovery") # Cache management - cached_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when Agent Card was cached") - cache_expires_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when cache expires") + cached_at = Column(TIMESTAMP(timezone=False), + doc="Timestamp when Agent Card was cached") + cache_expires_at = Column( + TIMESTAMP(timezone=False), doc="Timestamp when cache expires") # Health check status - is_available = Column(Boolean, default=True, doc="Whether this agent is currently reachable") - last_check_at = Column(TIMESTAMP(timezone=False), doc="Last health check timestamp") - last_check_result = Column(String(50), doc="Last health check result: OK, ERROR, TIMEOUT") + is_available = Column(Boolean, default=True, + doc="Whether this agent is currently reachable") + last_check_at = Column(TIMESTAMP(timezone=False), + doc="Last health check timestamp") + last_check_result = Column( + String(50), doc="Last health check result: OK, ERROR, TIMEOUT") class A2AExternalAgentRelation(TableBase): @@ -699,28 +1046,26 @@ class A2AExternalAgentRelation(TableBase): name="uq_local_external_agent", deferrable=True, ), - ForeignKeyConstraint( - ["external_agent_id"], - [f"{SCHEMA}.ag_a2a_external_agent_t.id"], - name="fk_external_agent", - deferrable=True, - ), {"schema": SCHEMA}, ) - id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC) + id = Column(BigInteger, primary_key=True, + autoincrement=True, doc=_PRIMARY_KEY_DOC) # Local agent (parent) - local_agent_id = Column(Integer, nullable=False, doc="Local parent agent ID") + local_agent_id = Column(Integer, nullable=False, + doc="Local parent agent ID") # External A2A agent (sub-agent) - FK to ag_a2a_external_agent_t.id - external_agent_id = Column(BigInteger, nullable=False, doc="External A2A agent ID (FK to ag_a2a_external_agent_t.id)") + external_agent_id = Column( + BigInteger, nullable=False, doc="External A2A agent ID (FK to ag_a2a_external_agent_t.id)") # Tenant isolation tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC) # Status - is_enabled = Column(Boolean, default=True, doc="Whether this relation is active") + is_enabled = Column(Boolean, default=True, + doc="Whether this relation is active") class A2AServerAgent(TableBase): @@ -731,7 +1076,8 @@ class A2AServerAgent(TableBase): __tablename__ = "ag_a2a_server_agent_t" __table_args__ = {"schema": SCHEMA} - id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC) + id = Column(BigInteger, primary_key=True, + autoincrement=True, doc=_PRIMARY_KEY_DOC) # Link to local agent agent_id = Column(Integer, nullable=False, doc="Local agent ID") @@ -741,35 +1087,44 @@ class A2AServerAgent(TableBase): tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC) # Generated endpoint ID - endpoint_id = Column(String(64), unique=True, nullable=False, doc="Generated endpoint ID") + endpoint_id = Column(String(64), unique=True, + nullable=False, doc="Generated endpoint ID") # Basic info (extracted from local agent, can be overridden) - name = Column(String(255), nullable=False, doc="Agent name exposed in Agent Card") + name = Column(String(255), nullable=False, + doc="Agent name exposed in Agent Card") description = Column(Text, doc="Agent description exposed in Agent Card") version = Column(String(50), doc="Agent version exposed in Agent Card") # Primary endpoint URL (http-json-rpc by default) - agent_url = Column(String(512), doc="Primary A2A endpoint URL (http-json-rpc by default)") + agent_url = Column( + String(512), doc="Primary A2A endpoint URL (http-json-rpc by default)") # Capabilities - streaming = Column(Boolean, default=False, doc="Whether this agent supports SSE streaming") + streaming = Column(Boolean, default=False, + doc="Whether this agent supports SSE streaming") # All supported interfaces (A2A 1.0 compliant) # Format: [{protocolBinding, url, protocolVersion}, ...] - supported_interfaces = Column(JSON, doc="All supported interfaces: [{protocolBinding, url, protocolVersion}, ...]") + supported_interfaces = Column( + JSON, doc="All supported interfaces: [{protocolBinding, url, protocolVersion}, ...]") # Agent Card customization (partial overrides only) - card_overrides = Column(JSON, doc="User customizations for Agent Card (partial override)") + card_overrides = Column( + JSON, doc="User customizations for Agent Card (partial override)") # A2A Server status - is_enabled = Column(Boolean, default=False, doc="Whether A2A Server is enabled for this agent") + is_enabled = Column(Boolean, default=False, + doc="Whether A2A Server is enabled for this agent") # Raw Agent Card (generated from settings, for debugging) raw_card = Column(JSON, doc="Generated Agent Card JSON (for debugging)") # Publishing timestamps - published_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when A2A Server was last enabled") - unpublished_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when A2A Server was disabled") + published_at = Column(TIMESTAMP(timezone=False), + doc="Timestamp when A2A Server was last enabled") + unpublished_at = Column(TIMESTAMP(timezone=False), + doc="Timestamp when A2A Server was disabled") class A2ATask(SimpleTableBase): @@ -782,7 +1137,8 @@ class A2ATask(SimpleTableBase): # Core identifiers (following A2A spec) id = Column(String(64), primary_key=True, doc="Task ID (A2A spec: taskId)") - context_id = Column(String(64), doc="Context ID for grouping related tasks") + context_id = Column( + String(64), doc="Context ID for grouping related tasks") # Endpoint and caller info endpoint_id = Column(String(64), nullable=False, doc="Endpoint ID") @@ -793,16 +1149,21 @@ class A2ATask(SimpleTableBase): raw_request = Column(JSON, doc="Original A2A request payload") # Task state (following A2A TaskState enum) - task_state = Column(String(50), nullable=False, server_default="TASK_STATE_SUBMITTED", doc="Task state: TASK_STATE_SUBMITTED, TASK_STATE_WORKING, TASK_STATE_COMPLETED, TASK_STATE_FAILED, TASK_STATE_CANCELED, TASK_STATE_INPUT_REQUIRED, TASK_STATE_REJECTED, TASK_STATE_AUTH_REQUIRED") - state_timestamp = Column(TIMESTAMP(timezone=False), doc="Task state last update timestamp") + task_state = Column(String(50), nullable=False, server_default="TASK_STATE_SUBMITTED", + doc="Task state: TASK_STATE_SUBMITTED, TASK_STATE_WORKING, TASK_STATE_COMPLETED, TASK_STATE_FAILED, TASK_STATE_CANCELED, TASK_STATE_INPUT_REQUIRED, TASK_STATE_REJECTED, TASK_STATE_AUTH_REQUIRED") + state_timestamp = Column(TIMESTAMP(timezone=False), + doc="Task state last update timestamp") # Task result result_data = Column(JSON, doc="Task final result data") # Timestamps - create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Task creation timestamp") - update_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), onupdate=func.now(), doc="Task last update timestamp") - completed_at = Column(TIMESTAMP(timezone=False), doc="Task completion timestamp") + create_time = Column(TIMESTAMP(timezone=False), + server_default=func.now(), doc="Task creation timestamp") + update_time = Column(TIMESTAMP(timezone=False), server_default=func.now( + ), onupdate=func.now(), doc="Task last update timestamp") + completed_at = Column(TIMESTAMP(timezone=False), + doc="Task completion timestamp") class A2AMessage(SimpleTableBase): @@ -814,23 +1175,30 @@ class A2AMessage(SimpleTableBase): __table_args__ = {"schema": SCHEMA} # Core identifiers (following A2A spec) - message_id = Column(String(64), primary_key=True, doc="Message ID (A2A spec: messageId)") - task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=True, doc="Task ID this message belongs to (nullable for standalone/simple requests)") + message_id = Column(String(64), primary_key=True, + doc="Message ID (A2A spec: messageId)") + task_id = Column(String(64), nullable=True, + doc="Task ID this message belongs to (nullable for standalone/simple requests)") # Message attributes - message_index = Column(Integer, nullable=False, doc="Order of message in the conversation") - role = Column(String(20), nullable=False, doc="Message sender role: user or agent") + message_index = Column(Integer, nullable=False, + doc="Order of message in the conversation") + role = Column(String(20), nullable=False, + doc="Message sender role: user or agent") # Message content (following A2A Part structure) - parts = Column(JSON, nullable=False, doc="Message parts following A2A Part structure") + parts = Column(JSON, nullable=False, + doc="Message parts following A2A Part structure") meta_data = Column(JSON, doc="Optional metadata") extensions = Column(JSON, doc="Extension URI list") # References to other tasks (optional) - reference_task_ids = Column(JSON, doc="Referenced task IDs array for multi-turn scenarios") + reference_task_ids = Column( + JSON, doc="Referenced task IDs array for multi-turn scenarios") # Timestamp - create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Message creation timestamp") + create_time = Column(TIMESTAMP( + timezone=False), server_default=func.now(), doc="Message creation timestamp") class A2AArtifact(SimpleTableBase): @@ -842,15 +1210,19 @@ class A2AArtifact(SimpleTableBase): # Core identifiers (following A2A spec) id = Column(String(64), primary_key=True, doc="Internal primary key") - artifact_id = Column(String(64), nullable=False, doc="Artifact ID (A2A spec: artifactId)") - task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=False, doc="Task ID this artifact belongs to") + artifact_id = Column(String(64), nullable=False, + doc="Artifact ID (A2A spec: artifactId)") + task_id = Column(String(64), nullable=False, + doc="Task ID this artifact belongs to") # Artifact attributes name = Column(String(255), doc="Human-readable artifact name") description = Column(Text, doc="Artifact description") - parts = Column(JSON, nullable=False, doc="Artifact parts following A2A Part structure") + parts = Column(JSON, nullable=False, + doc="Artifact parts following A2A Part structure") meta_data = Column(JSON, doc="Artifact metadata") extensions = Column(JSON, doc="Extension URI list") # Timestamp - create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Artifact creation timestamp") + create_time = Column(TIMESTAMP( + timezone=False), server_default=func.now(), doc="Artifact creation timestamp") diff --git a/backend/database/invitation_db.py b/backend/database/invitation_db.py index f7e27d005..32523cd06 100644 --- a/backend/database/invitation_db.py +++ b/backend/database/invitation_db.py @@ -300,8 +300,8 @@ def query_invitations_with_pagination( TenantInvitationCode.delete_flag == "N" ) - # Apply tenant filter if provided - if tenant_id: + # Apply tenant filter when tenant_id is specified (including ASSET_OWNER virtual tenant) + if tenant_id is not None: query = query.filter(TenantInvitationCode.tenant_id == tenant_id) # Apply sorting diff --git a/backend/database/knowledge_db.py b/backend/database/knowledge_db.py index df42e1888..8fc60d6bd 100644 --- a/backend/database/knowledge_db.py +++ b/backend/database/knowledge_db.py @@ -1,5 +1,6 @@ from typing import Any, Dict, List, Optional +import logging import uuid from sqlalchemy import func from sqlalchemy.exc import SQLAlchemyError @@ -7,6 +8,9 @@ from database.client import as_dict, get_db_session from database.db_models import KnowledgeRecord from utils.str_utils import convert_list_to_string +from consts.scheduler import VALID_SUMMARY_FREQUENCIES + +logger = logging.getLogger("knowledge_db") def _generate_index_name(knowledge_id: int) -> str: @@ -30,6 +34,7 @@ def create_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]: - user_id: Optional user ID for created_by and updated_by fields - tenant_id: Optional tenant ID for created_by and updated_by fields - embedding_model_name: embedding model name for the knowledge base + - preserve_source_file: whether to preserve uploaded source documents (optional) Returns: Dict[str, Any]: Dictionary with at least 'knowledge_id' and 'index_name' @@ -49,9 +54,11 @@ def create_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]: "knowledge_sources": query.get("knowledge_sources", "elasticsearch"), "tenant_id": query.get("tenant_id"), "embedding_model_name": query.get("embedding_model_name"), + "embedding_model_id": query.get("embedding_model_id"), "knowledge_name": knowledge_name, "group_ids": convert_list_to_string(group_ids) if isinstance(group_ids, list) else group_ids, "ingroup_permission": query.get("ingroup_permission"), + "preserve_source_file": query.get("preserve_source_file", True), } # For backward compatibility: if caller explicitly provides index_name, @@ -112,10 +119,16 @@ def upsert_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]: if existing_record: # Update existing record - existing_record.knowledge_name = query.get('knowledge_name') or query.get('index_name') - existing_record.knowledge_describe = query.get('knowledge_describe', '') - existing_record.knowledge_sources = query.get('knowledge_sources', 'elasticsearch') - existing_record.embedding_model_name = query.get('embedding_model_name') + existing_record.knowledge_name = query.get( + 'knowledge_name') or query.get('index_name') + existing_record.knowledge_describe = query.get( + 'knowledge_describe', '') + existing_record.knowledge_sources = query.get( + 'knowledge_sources', 'elasticsearch') + existing_record.embedding_model_name = query.get( + 'embedding_model_name') + existing_record.embedding_model_id = query.get( + 'embedding_model_id') existing_record.updated_by = query.get('user_id') existing_record.update_time = func.current_timestamp() @@ -245,9 +258,11 @@ def get_knowledge_record(query: Optional[Dict[str, Any]] = None) -> Dict[str, An # Support both index_name and knowledge_name queries if 'index_name' in query: - db_query = db_query.filter(KnowledgeRecord.index_name == query['index_name']) + db_query = db_query.filter( + KnowledgeRecord.index_name == query['index_name']) elif 'knowledge_name' in query: - db_query = db_query.filter(KnowledgeRecord.knowledge_name == query['knowledge_name']) + db_query = db_query.filter( + KnowledgeRecord.knowledge_name == query['knowledge_name']) # Add tenant_id filter only if it is provided in the query if 'tenant_id' in query and query['tenant_id'] is not None: @@ -345,6 +360,43 @@ def update_model_name_by_index_name(index_name: str, embedding_model_name: str, raise e +def update_embedding_model_by_index_name( + index_name: str, + embedding_model_id: int, + embedding_model_name: str, + tenant_id: str, + user_id: str +) -> bool: + """ + Update the embedding model (both ID and name) for a knowledge base. + + Args: + index_name: Internal index name of the knowledge base + embedding_model_id: New embedding model ID + embedding_model_name: New embedding model name + tenant_id: Tenant ID + user_id: User ID making the update + + Returns: + bool: Whether the update was successful + """ + try: + with get_db_session() as session: + result = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y', + KnowledgeRecord.tenant_id == tenant_id + ).update({ + "embedding_model_id": embedding_model_id, + "embedding_model_name": embedding_model_name, + "updated_by": user_id + }) + session.commit() + return result > 0 + except SQLAlchemyError as e: + raise e + + def get_index_name_by_knowledge_name(knowledge_name: str, tenant_id: str) -> str: """ Get the internal index_name from user-facing knowledge_name. @@ -361,16 +413,138 @@ def get_index_name_by_knowledge_name(knowledge_name: str, tenant_id: str) -> str """ try: with get_db_session() as session: + # First try resolving by user-facing knowledge_name. result = session.query(KnowledgeRecord).filter( KnowledgeRecord.knowledge_name == knowledge_name, KnowledgeRecord.tenant_id == tenant_id, KnowledgeRecord.delete_flag != 'Y' ).first() - if result: return result.index_name + + # Backward/forward compatibility: if caller already passes internal index_name, + # accept it directly by resolving on index_name as well. + index_result = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == knowledge_name, + KnowledgeRecord.tenant_id == tenant_id, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if index_result: + return index_result.index_name + raise ValueError( f"Knowledge base '{knowledge_name}' not found for the current tenant" ) except SQLAlchemyError as e: raise e + + +def get_knowledge_name_map_by_index_names(index_names: List[str]) -> Dict[str, str]: + """ + Get a mapping from index_name to knowledge_name (display name) for the given index_names. + Used to build user-friendly knowledge base summaries in prompts. + + Args: + index_names: List of internal index names + + Returns: + Dict[str, str]: Mapping of index_name -> knowledge_name. + If a knowledge base is not found in the database, + the index_name itself is used as the fallback value. + """ + if not index_names: + return {} + + try: + with get_db_session() as session: + result = session.query( + KnowledgeRecord.index_name, + KnowledgeRecord.knowledge_name + ).filter( + KnowledgeRecord.index_name.in_(index_names), + KnowledgeRecord.delete_flag != 'Y' + ).all() + + knowledge_name_map = {} + for row in result: + knowledge_name_map[row.index_name] = row.knowledge_name + + for index_name in index_names: + if index_name not in knowledge_name_map: + knowledge_name_map[index_name] = index_name + + return knowledge_name_map + except SQLAlchemyError: + logger.exception("Query knowledge name map error") + raise + + +def update_summary_frequency(index_name: str, summary_frequency: Optional[str], + _tenant_id: str, user_id: str) -> bool: + """Update the auto-summary frequency for a knowledge base.""" + valid_frequencies = VALID_SUMMARY_FREQUENCIES + if summary_frequency not in valid_frequencies: + raise ValueError(f"Invalid summary_frequency: {summary_frequency}") + try: + with get_db_session() as session: + record = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if not record: + return False + record.summary_frequency = summary_frequency + record.updated_by = user_id + session.commit() + return True + except SQLAlchemyError: + logger.exception("Update summary frequency error") + raise + + +def update_last_summary_time(index_name: str): + """Update last_summary_time to now after a successful summary generation.""" + from datetime import datetime + try: + with get_db_session() as session: + record = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if record: + record.last_summary_time = datetime.now() + session.commit() + except SQLAlchemyError: + logger.exception("Update last summary time error") + raise + + +def update_last_doc_update_time(index_name: str): + """Update last_doc_update_time to now after document add/delete operation.""" + from datetime import datetime + try: + with get_db_session() as session: + record = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if record: + record.last_doc_update_time = datetime.now() + session.commit() + except SQLAlchemyError: + logger.exception("Update last doc update time error") + raise + + +def get_knowledge_bases_for_auto_summary() -> List[Dict[str, Any]]: + """Query all knowledge bases with non-null summary_frequency.""" + try: + with get_db_session() as session: + records = session.query(KnowledgeRecord).filter( + KnowledgeRecord.summary_frequency.isnot(None), + KnowledgeRecord.delete_flag != 'Y' + ).all() + return [as_dict(record) for record in records] + except SQLAlchemyError: + logger.exception("Get knowledge bases error") + raise diff --git a/backend/database/model_management_db.py b/backend/database/model_management_db.py index cb1c6c69f..1a1a98c8b 100644 --- a/backend/database/model_management_db.py +++ b/backend/database/model_management_db.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Dict, List, Optional from sqlalchemy import and_, desc, func, insert, select, update @@ -7,6 +8,8 @@ from .db_models import ModelRecord from .utils import add_creation_tracking, add_update_tracking +logger = logging.getLogger("database.model_management_db") + def create_model_record(model_data: Dict[str, Any], user_id: str, tenant_id: str) -> bool: """ @@ -170,7 +173,7 @@ def get_model_records(filters: Optional[Dict[str, Any]], tenant_id: str) -> List return result_list -def get_model_by_display_name(display_name: str, tenant_id: str) -> Optional[Dict[str, Any]]: +def get_model_by_display_name(display_name: str, tenant_id: str, model_type: str = None) -> Optional[Dict[str, Any]]: """ Get a model record by display name @@ -179,6 +182,11 @@ def get_model_by_display_name(display_name: str, tenant_id: str) -> Optional[Dic tenant_id: """ filters = {'display_name': display_name} + + if model_type in ["multiEmbedding", "multi_embedding"]: + filters['model_type'] = "multi_embedding" + elif model_type == "embedding": + filters['model_type'] = "embedding" records = get_model_records(filters, tenant_id) if not records: @@ -203,7 +211,7 @@ def get_models_by_display_name(display_name: str, tenant_id: str) -> List[Dict[s return get_model_records(filters, tenant_id) -def get_model_id_by_display_name(display_name: str, tenant_id: str) -> Optional[int]: +def get_model_id_by_display_name(display_name: str, tenant_id: str, model_type: str = None) -> Optional[int]: """ Get a model ID by display name @@ -214,7 +222,7 @@ def get_model_id_by_display_name(display_name: str, tenant_id: str) -> Optional[ Returns: Optional[int]: Model ID """ - model = get_model_by_display_name(display_name, tenant_id) + model = get_model_by_display_name(display_name, tenant_id, model_type) return model["model_id"] if model else None diff --git a/backend/database/oauth_account_db.py b/backend/database/oauth_account_db.py new file mode 100644 index 000000000..3b798f738 --- /dev/null +++ b/backend/database/oauth_account_db.py @@ -0,0 +1,220 @@ +""" +Database operations for OAuth account management +""" + +import logging +from typing import Any, Dict, List, Optional + +from database.client import as_dict, get_db_session +from database.db_models import UserOAuthAccount + +logger = logging.getLogger(__name__) + + +def insert_oauth_account( + user_id: str, + provider: str, + provider_user_id: str, + provider_email: Optional[str] = None, + provider_username: Optional[str] = None, + tenant_id: Optional[str] = None, +) -> Dict[str, Any]: + with get_db_session() as session: + account = UserOAuthAccount( + user_id=user_id, + provider=provider, + provider_user_id=provider_user_id, + provider_email=provider_email, + provider_username=provider_username, + tenant_id=tenant_id, + created_by=user_id, + updated_by=user_id, + ) + session.add(account) + session.flush() + return as_dict(account) + + +def get_oauth_account_by_provider( + provider: str, provider_user_id: str +) -> Optional[Dict[str, Any]]: + with get_db_session() as session: + result = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.provider == provider, + UserOAuthAccount.provider_user_id == provider_user_id, + UserOAuthAccount.delete_flag == "N", + ) + .first() + ) + return as_dict(result) if result else None + + +def get_soft_deleted_oauth_account( + provider: str, provider_user_id: str +) -> Optional[Dict[str, Any]]: + with get_db_session() as session: + result = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.provider == provider, + UserOAuthAccount.provider_user_id == provider_user_id, + UserOAuthAccount.delete_flag == "Y", + ) + .first() + ) + return as_dict(result) if result else None + + +def list_oauth_accounts_by_user_id(user_id: str) -> List[Dict[str, Any]]: + with get_db_session() as session: + results = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.user_id == user_id, + UserOAuthAccount.delete_flag == "N", + ) + .all() + ) + return [as_dict(r) for r in results] + + +def rebind_oauth_account( + provider: str, + provider_user_id: str, + new_user_id: str, + provider_email: Optional[str] = None, + provider_username: Optional[str] = None, + tenant_id: Optional[str] = None, +) -> bool: + with get_db_session() as session: + result = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.provider == provider, + UserOAuthAccount.provider_user_id == provider_user_id, + UserOAuthAccount.delete_flag == "N", + ) + .first() + ) + if not result: + return False + + result.user_id = new_user_id + result.updated_by = new_user_id + if provider_email is not None: + result.provider_email = provider_email + if provider_username is not None: + result.provider_username = provider_username + if tenant_id is not None: + result.tenant_id = tenant_id + + return True + + +def update_oauth_account_tokens( + provider: str, + provider_user_id: str, + provider_username: Optional[str] = None, +) -> bool: + with get_db_session() as session: + result = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.provider == provider, + UserOAuthAccount.provider_user_id == provider_user_id, + UserOAuthAccount.delete_flag == "N", + ) + .first() + ) + if not result: + return False + + if provider_username is not None: + result.provider_username = provider_username + + return True + + +def delete_oauth_account(user_id: str, provider: str) -> bool: + with get_db_session() as session: + result = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.user_id == user_id, + UserOAuthAccount.provider == provider, + UserOAuthAccount.delete_flag == "N", + ) + .first() + ) + if not result: + return False + + result.delete_flag = "Y" + result.updated_by = user_id + return True + + +def reactivate_oauth_account( + provider: str, + provider_user_id: str, + user_id: str, + provider_email: Optional[str] = None, + provider_username: Optional[str] = None, + tenant_id: Optional[str] = None, +) -> bool: + with get_db_session() as session: + result = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.provider == provider, + UserOAuthAccount.provider_user_id == provider_user_id, + UserOAuthAccount.delete_flag == "Y", + ) + .first() + ) + if not result: + return False + + result.delete_flag = "N" + result.user_id = user_id + result.updated_by = user_id + if provider_email is not None: + result.provider_email = provider_email + if provider_username is not None: + result.provider_username = provider_username + if tenant_id is not None: + result.tenant_id = tenant_id + + return True + + +def count_oauth_accounts_by_user_id(user_id: str) -> int: + with get_db_session() as session: + return ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.user_id == user_id, + UserOAuthAccount.delete_flag == "N", + ) + .count() + ) + + +def soft_delete_all_oauth_accounts_by_user_id(user_id: str, deleted_by: str) -> int: + with get_db_session() as session: + result = ( + session.query(UserOAuthAccount) + .filter( + UserOAuthAccount.user_id == user_id, + UserOAuthAccount.delete_flag == "N", + ) + .all() + ) + count = 0 + for account in result: + account.delete_flag = "Y" + account.updated_by = deleted_by + count += 1 + return count \ No newline at end of file diff --git a/backend/database/prompt_template_db.py b/backend/database/prompt_template_db.py new file mode 100644 index 000000000..fbc286cf9 --- /dev/null +++ b/backend/database/prompt_template_db.py @@ -0,0 +1,165 @@ +import logging +from typing import Optional + +from sqlalchemy import select, update + +from database.client import as_dict, filter_property, get_db_session +from database.db_models import PromptTemplate + +logger = logging.getLogger("prompt_template_db") + + +def create_prompt_template(template_data: dict) -> dict: + """Create a prompt template.""" + with get_db_session() as session: + prompt_template = PromptTemplate( + **filter_property(template_data, PromptTemplate) + ) + prompt_template.delete_flag = "N" + session.add(prompt_template) + session.flush() + return as_dict(prompt_template) + + +def upsert_prompt_template_by_id(template_id: int, template_data: dict, user_id: str) -> dict: + """Create or update a prompt template with a fixed template ID.""" + with get_db_session() as session: + prompt_template = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + ).first() + + filtered_data = filter_property(template_data, PromptTemplate) + if prompt_template: + for key, value in filtered_data.items(): + setattr(prompt_template, key, value) + prompt_template.updated_by = user_id + else: + prompt_template = PromptTemplate(**filtered_data) + prompt_template.template_id = template_id + prompt_template.delete_flag = filtered_data.get("delete_flag", "N") + session.add(prompt_template) + + session.flush() + return as_dict(prompt_template) + + +def update_prompt_template(template_id: int, template_data: dict, user_id: str) -> dict: + """Update a prompt template.""" + with get_db_session() as session: + prompt_template = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + PromptTemplate.delete_flag == "N", + ).first() + + if not prompt_template: + raise ValueError("prompt template not found") + + for key, value in filter_property(template_data, PromptTemplate).items(): + if value is None: + continue + setattr(prompt_template, key, value) + + prompt_template.updated_by = user_id + session.flush() + return as_dict(prompt_template) + + +def delete_prompt_template(template_id: int, user_id: str) -> int: + """Soft-delete a prompt template.""" + with get_db_session() as session: + result = session.execute( + update(PromptTemplate) + .where( + PromptTemplate.template_id == template_id, + PromptTemplate.delete_flag == "N", + ) + .values(delete_flag="Y", updated_by=user_id) + ) + return result.rowcount + + +def query_prompt_templates_by_user( + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> list[dict]: + """Query prompt templates by tenant and user.""" + with get_db_session() as session: + templates = session.query(PromptTemplate).filter( + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ).order_by(PromptTemplate.update_time.desc(), PromptTemplate.template_id.desc()).all() + return [as_dict(template) for template in templates] + + +def get_prompt_template_by_id( + template_id: int, + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> Optional[dict]: + """Get a prompt template by ID.""" + with get_db_session() as session: + template = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ).first() + return as_dict(template) if template else None + + +def get_prompt_template_by_name( + template_name: str, + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> Optional[dict]: + """Get a prompt template by name.""" + with get_db_session() as session: + template = session.query(PromptTemplate).filter( + PromptTemplate.template_name == template_name, + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ).first() + return as_dict(template) if template else None + + +def get_prompt_template_by_template_id( + template_id: int, + template_type: str = "agent_generate", + include_deleted: bool = False, +) -> Optional[dict]: + """Get a prompt template by template ID regardless of owner.""" + with get_db_session() as session: + query = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + PromptTemplate.template_type == template_type, + ) + if not include_deleted: + query = query.filter(PromptTemplate.delete_flag == "N") + template = query.first() + return as_dict(template) if template else None + + +def query_prompt_template_names( + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> set[str]: + """Query all active prompt template names for the current user.""" + with get_db_session() as session: + rows = session.execute( + select(PromptTemplate.template_name).where( + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ) + ).all() + return {row[0] for row in rows if row and row[0]} diff --git a/backend/database/remote_mcp_db.py b/backend/database/remote_mcp_db.py index d535f9fba..b08769437 100644 --- a/backend/database/remote_mcp_db.py +++ b/backend/database/remote_mcp_db.py @@ -15,16 +15,31 @@ def create_mcp_record(mcp_data: Dict[str, Any], tenant_id: str, user_id: str): :param tenant_id: Tenant ID :param user_id: User ID :return: Created MCP record + + Note: Only fields defined in the McpRecord model are inserted. + Fields like 'transport_type' and 'version' are not part of McpRecord + and will be ignored. """ + # Filter to only include fields that exist in the model + # McpRecord fields: mcp_id, tenant_id, user_id, mcp_name, mcp_server, status, + # container_id, container_port, authorization_token, source, registry_json, + # config_json, enabled, tags, description, create_time, update_time, created_by, updated_by, delete_flag + allowed_fields = { + 'mcp_name', 'mcp_server', 'status', 'container_id', 'container_port', + 'authorization_token', 'custom_headers', 'source', 'registry_json', 'config_json', + 'enabled', 'tags', 'description' + } + + filtered_data = {k: v for k, v in mcp_data.items() if k in allowed_fields and v is not None} + filtered_data.update({ + "tenant_id": tenant_id, + "user_id": user_id, + "created_by": user_id, + "updated_by": user_id, + "delete_flag": "N" + }) with get_db_session() as session: - mcp_data.update({ - "tenant_id": tenant_id, - "user_id": user_id, - "created_by": user_id, - "updated_by": user_id, - "delete_flag": "N" - }) - new_mcp = McpRecord(**filter_property(mcp_data, McpRecord)) + new_mcp = McpRecord(**filtered_data) session.add(new_mcp) @@ -80,7 +95,7 @@ def update_mcp_status_by_name_and_url(mcp_name: str, mcp_server: str, tenant_id: ).update({"status": status, "updated_by": user_id}) -def get_mcp_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: +def get_mcp_records_by_tenant(tenant_id: str, tag: str | None = None) -> List[Dict[str, Any]]: """ Get all MCP records for a tenant @@ -88,14 +103,139 @@ def get_mcp_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: :return: List of MCP records """ with get_db_session() as session: - mcp_records = session.query(McpRecord).filter( + query = session.query(McpRecord).filter( McpRecord.tenant_id == tenant_id, McpRecord.delete_flag != 'Y' - ).order_by(McpRecord.create_time.desc()).all() + ) + + if tag: + query = query.filter(McpRecord.tags.any(tag)) + + mcp_records = query.order_by(McpRecord.create_time.desc()).all() return [as_dict(record) for record in mcp_records] +def get_mcp_records_by_container_port(container_port: int) -> List[Dict[str, Any]]: + """ + Get enabled MCP records that already use the given container port. + + The lookup is global. + """ + with get_db_session() as session: + query = session.query(McpRecord).filter( + McpRecord.container_port == container_port, + McpRecord.delete_flag != 'Y' + ) + + records = query.order_by(McpRecord.create_time.desc()).all() + return [as_dict(record) for record in records] + + +def update_mcp_record_manage_fields_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + name: str, + server_url: str, + description: str | None, + tags: List[str] | None, + source: str | None, + authorization_token: str | None, + custom_headers: Dict[str, Any] | None, + config_json: Dict[str, Any] | None, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update( + { + "mcp_name": name, + "mcp_server": server_url, + "description": description, + "tags": tags or [], + "source": source, + "authorization_token": authorization_token, + "custom_headers": custom_headers, + "config_json": config_json, + "updated_by": user_id, + } + ) + + +def update_mcp_record_enabled_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + enabled: bool, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update({"enabled": enabled, "updated_by": user_id}) + + +def update_mcp_record_status_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + status: bool, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update({"status": status, "updated_by": user_id}) + + +def update_mcp_record_container_fields_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + container_id: str | None, + container_port: int | None, + mcp_server: str, + status: bool | None, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update( + { + "container_id": container_id, + "container_port": container_port, + "mcp_server": mcp_server, + "status": status, + "updated_by": user_id, + } + ) + + +def delete_mcp_record_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update({"delete_flag": "Y", "updated_by": user_id}) + + def get_mcp_server_by_name_and_tenant(mcp_name: str, tenant_id: str) -> str: """ Get MCP server address by name and tenant ID @@ -134,6 +274,26 @@ def get_mcp_authorization_token_by_name_and_url(mcp_name: str, mcp_server: str, return mcp_record.authorization_token if mcp_record else None +def get_mcp_custom_headers_by_name_and_url(mcp_name: str, mcp_server: str, tenant_id: str) -> Dict[str, Any] | None: + """ + Get MCP custom headers by name, URL and tenant ID + + :param mcp_name: MCP name + :param mcp_server: MCP server URL + :param tenant_id: Tenant ID + :return: Custom headers dict, None if not found + """ + with get_db_session() as session: + mcp_record = session.query(McpRecord).filter( + McpRecord.mcp_name == mcp_name, + McpRecord.mcp_server == mcp_server, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).first() + + return mcp_record.custom_headers if mcp_record else None + + def update_mcp_record_by_name_and_url( update_data, tenant_id: str, @@ -161,6 +321,10 @@ def update_mcp_record_by_name_and_url( if hasattr(update_data, 'new_authorization_token'): update_fields["authorization_token"] = update_data.new_authorization_token + # Update custom_headers if provided + if hasattr(update_data, 'custom_headers'): + update_fields["custom_headers"] = update_data.custom_headers + with get_db_session() as session: session.query(McpRecord).filter( McpRecord.mcp_name == update_data.current_service_name, @@ -187,6 +351,26 @@ def check_mcp_name_exists(mcp_name: str, tenant_id: str) -> bool: return mcp_record is not None +def check_enabled_mcp_name_exists(mcp_name: str, tenant_id: str) -> bool: + """ + Check if enabled MCP name already exists for a tenant. + + Only enabled records participate in conflict checks for runtime container startup. + + :param mcp_name: MCP name + :param tenant_id: Tenant ID + :return: True if enabled name exists, False otherwise + """ + with get_db_session() as session: + mcp_record = session.query(McpRecord).filter( + McpRecord.mcp_name == mcp_name, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y', + McpRecord.enabled.is_(True), + ).first() + return mcp_record is not None + + def get_mcp_record_by_id_and_tenant(mcp_id: int, tenant_id: str) -> Dict[str, Any] | None: """ Get MCP record by ID and tenant ID diff --git a/backend/database/skill_db.py b/backend/database/skill_db.py index 2a718800b..6a3f69069 100644 --- a/backend/database/skill_db.py +++ b/backend/database/skill_db.py @@ -18,8 +18,7 @@ def _params_value_for_db(raw: Any) -> Any: """Strip UI/YAML comment metadata, then JSON round-trip for the DB JSON column.""" if raw is None: return None - stripped = strip_params_comments_for_db(raw) - return json.loads(json.dumps(stripped, default=str)) + return json.loads(json.dumps(strip_params_comments_for_db(raw), default=str)) def create_or_update_skill_by_skill_info(skill_info, tenant_id: str, user_id: str, version_no: int = 0): @@ -155,6 +154,31 @@ def delete_skill_instances_by_skill_id(skill_id: int, user_id: str): }) +def delete_skill_instances_by_tenant(tenant_id: str, user_id: str) -> int: + """Soft delete all skill instances for a tenant. + + This is called when a tenant is deleted to clean up all skill instances. + + Args: + tenant_id: Tenant ID to delete skill instances for + user_id: User ID for the updated_by field + + Returns: + Number of skill instances soft-deleted + """ + with get_db_session() as session: + count = session.query(SkillInstance).filter( + SkillInstance.tenant_id == tenant_id, + SkillInstance.delete_flag != 'Y' + ).update({ + SkillInstance.delete_flag: 'Y', + 'updated_by': user_id + }) + session.commit() + return count + + + # ============== SkillInfo Repository Functions ============== @@ -171,10 +195,12 @@ def _to_dict(skill: SkillInfo) -> Dict[str, Any]: return { "skill_id": skill.skill_id, "name": skill.skill_name, + "tenant_id": skill.tenant_id, "description": skill.skill_description, "tags": skill.skill_tags or [], "content": skill.skill_content or "", - "params": skill.params if skill.params is not None else {}, + "config_schemas": skill.config_schemas, + "config_values": skill.config_values, "source": skill.source, "created_by": skill.created_by, "create_time": skill.create_time.isoformat() if skill.create_time else None, @@ -183,10 +209,15 @@ def _to_dict(skill: SkillInfo) -> Dict[str, Any]: } -def list_skills() -> List[Dict[str, Any]]: - """List all skills from database.""" +def list_skills(tenant_id: str) -> List[Dict[str, Any]]: + """List all skills for a tenant from database. + + Args: + tenant_id: Tenant ID for filtering skills + """ with get_db_session() as session: skills = session.query(SkillInfo).filter( + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).all() results = [] @@ -197,11 +228,37 @@ def list_skills() -> List[Dict[str, Any]]: return results -def get_skill_by_name(skill_name: str) -> Optional[Dict[str, Any]]: - """Get skill by name.""" +def get_skill_by_name(skill_name: str, tenant_id: str) -> Optional[Dict[str, Any]]: + """Get skill by name within a tenant. + + Args: + skill_name: Skill name + tenant_id: Tenant ID for filtering + """ with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, + SkillInfo.delete_flag != 'Y' + ).first() + if skill: + result = _to_dict(skill) + result["tool_ids"] = _get_tool_ids(session, skill.skill_id) + return result + return None + + +def get_skill_by_id(skill_id: int, tenant_id: str) -> Optional[Dict[str, Any]]: + """Get skill by ID within a tenant. + + Args: + skill_id: Skill ID + tenant_id: Tenant ID for filtering + """ + with get_db_session() as session: + skill = session.query(SkillInfo).filter( + SkillInfo.skill_id == skill_id, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() if skill: @@ -211,8 +268,15 @@ def get_skill_by_name(skill_name: str) -> Optional[Dict[str, Any]]: return None -def get_skill_by_id(skill_id: int) -> Optional[Dict[str, Any]]: - """Get skill by ID.""" +def get_skill_by_id_global(skill_id: int) -> Optional[Dict[str, Any]]: + """Get skill by ID without tenant filter (global lookup for template skills). + + Args: + skill_id: Skill ID + + Returns: + Skill dict or None if not found. + """ with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_id == skill_id, @@ -225,15 +289,42 @@ def get_skill_by_id(skill_id: int) -> Optional[Dict[str, Any]]: return None -def create_skill(skill_data: Dict[str, Any]) -> Dict[str, Any]: - """Create a new skill.""" +def list_global_official_skills() -> List[Dict[str, Any]]: + """List all global official skills (tenant_id IS NULL) for installation. + + Returns: + List of skill dicts with skill_id, name, description, source. + """ + with get_db_session() as session: + skills = session.query(SkillInfo).filter( + SkillInfo.tenant_id.is_(None), + SkillInfo.delete_flag != 'Y', + SkillInfo.source == 'official' + ).all() + return [_to_dict(s) for s in skills] + if skill: + result = _to_dict(skill) + result["tool_ids"] = _get_tool_ids(session, skill.skill_id) + return result + return None + + +def create_skill(skill_data: Dict[str, Any], tenant_id: str) -> Dict[str, Any]: + """Create a new skill for a tenant. + + Args: + skill_data: Skill data dict + tenant_id: Tenant ID for the skill + """ with get_db_session() as session: skill = SkillInfo( skill_name=skill_data["name"], + tenant_id=tenant_id, skill_description=skill_data.get("description", ""), skill_tags=skill_data.get("tags", []), skill_content=skill_data.get("content", ""), - params=_params_value_for_db(skill_data.get("params")), + config_schemas=_params_value_for_db(skill_data.get("config_schemas")), + config_values=_params_value_for_db(skill_data.get("config_values")), source=skill_data.get("source", "custom"), created_by=skill_data.get("created_by"), create_time=datetime.now(), @@ -265,13 +356,15 @@ def create_skill(skill_data: Dict[str, Any]) -> Dict[str, Any]: def update_skill( skill_name: str, skill_data: Dict[str, Any], + tenant_id: str, updated_by: Optional[str] = None, ) -> Dict[str, Any]: - """Update an existing skill. + """Update an existing skill for a tenant. Args: - skill_name: Skill name (unique key). + skill_name: Skill name (unique key within tenant). skill_data: Business fields to update (description, content, tags, source, params, tool_ids). + tenant_id: Tenant ID for filtering. updated_by: Actor user id from server-side auth; never taken from the HTTP request body. Notes: @@ -282,6 +375,7 @@ def update_skill( with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != "Y", ).first() @@ -302,8 +396,10 @@ def update_skill( row_values["skill_tags"] = skill_data["tags"] if "source" in skill_data: row_values["source"] = skill_data["source"] - if "params" in skill_data: - row_values["params"] = _params_value_for_db(skill_data["params"]) + if "config_schemas" in skill_data: + row_values["config_schemas"] = _params_value_for_db(skill_data["config_schemas"]) + if "config_values" in skill_data: + row_values["config_values"] = _params_value_for_db(skill_data["config_values"]) session.execute( sa_update(SkillInfo) @@ -331,6 +427,7 @@ def update_skill( refreshed = session.query(SkillInfo).filter( SkillInfo.skill_id == skill_id, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != "Y", ).first() if not refreshed: @@ -344,11 +441,12 @@ def update_skill( return result -def delete_skill(skill_name: str, updated_by: Optional[str] = None) -> bool: - """Soft delete a skill (mark as deleted). +def delete_skill(skill_name: str, tenant_id: str, updated_by: Optional[str] = None) -> bool: + """Soft delete a skill for a tenant (mark as deleted). Args: skill_name: Name of the skill to delete + tenant_id: Tenant ID for filtering updated_by: User ID of the user performing the delete Returns: @@ -357,6 +455,7 @@ def delete_skill(skill_name: str, updated_by: Optional[str] = None) -> bool: with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() @@ -412,11 +511,12 @@ def get_tool_ids_by_names(tool_names: List[str], tenant_id: str) -> List[int]: return [t.tool_id for t in tools] -def get_tool_names_by_skill_name(skill_name: str) -> List[str]: - """Get tool names for a skill by skill name. +def get_tool_names_by_skill_name(skill_name: str, tenant_id: str) -> List[str]: + """Get tool names for a skill by skill name within a tenant. Args: skill_name: Name of the skill + tenant_id: Tenant ID for filtering Returns: List of tool names @@ -424,6 +524,7 @@ def get_tool_names_by_skill_name(skill_name: str) -> List[str]: with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() if not skill: @@ -432,11 +533,12 @@ def get_tool_names_by_skill_name(skill_name: str) -> List[str]: return get_tool_names_by_ids(session, tool_ids) -def get_skill_with_tool_names(skill_name: str) -> Optional[Dict[str, Any]]: - """Get skill with tool names included.""" +def get_skill_with_tool_names(skill_name: str, tenant_id: str) -> Optional[Dict[str, Any]]: + """Get skill with tool names included for a tenant.""" with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() if skill: @@ -446,3 +548,74 @@ def get_skill_with_tool_names(skill_name: str) -> Optional[Dict[str, Any]]: result["allowed_tools"] = get_tool_names_by_ids(session, tool_ids) return result return None + + +# ============== Skill Initialization Functions ============== + + +def check_skill_list_initialized(tenant_id: str) -> bool: + """Check if skill list has been initialized for the tenant. + + Args: + tenant_id: Tenant ID to check + + Returns: + True if skills have been initialized, False otherwise + """ + with get_db_session() as session: + count = session.query(SkillInfo).filter( + SkillInfo.tenant_id == tenant_id, + SkillInfo.delete_flag != 'Y', + SkillInfo.source != 'custom' + ).count() + return count > 0 + + +def upsert_scanned_skills(skills: List[Dict[str, Any]], user_id: str, tenant_id: str): + """Scan local skill directories and upsert skill metadata to ag_skill_info_t. + + Mirrors update_tool_table_from_scan_tool_list() in tool_db.py. + All fields are unconditionally overwritten on every scan (same as tools). + + Args: + skills: List of skill dicts with name, description, tags, content, params, inputs, source + user_id: User ID for tracking who initiated the scan + tenant_id: Tenant ID for the skills + """ + with get_db_session() as session: + existing_skills = session.query(SkillInfo).filter( + SkillInfo.tenant_id == tenant_id, + SkillInfo.delete_flag != 'Y' + ).all() + existing_dict = {s.skill_name: s for s in existing_skills} + + for skill_data in skills: + skill_name = skill_data.get("name") + if not skill_name: + continue + + if skill_name in existing_dict: + existing = existing_dict[skill_name] + # Unconditionally overwrite all fields on every scan (same as tools) + existing.skill_description = skill_data.get("description", "") + existing.skill_tags = skill_data.get("tags", []) + existing.skill_content = skill_data.get("content", "") + existing.config_schemas = _params_value_for_db(skill_data.get("config_schemas")) + existing.config_values = _params_value_for_db(skill_data.get("config_values")) + existing.updated_by = user_id + else: + new_skill = SkillInfo( + skill_name=skill_name, + tenant_id=tenant_id, + skill_description=skill_data.get("description", ""), + skill_tags=skill_data.get("tags", []), + skill_content=skill_data.get("content", ""), + config_schemas=_params_value_for_db(skill_data.get("config_schemas")), + config_values=_params_value_for_db(skill_data.get("config_values")), + source=skill_data.get("source", "official"), + created_by=user_id, + updated_by=user_id, + create_time=datetime.now(), + update_time=datetime.now(), + ) + session.add(new_skill) diff --git a/backend/database/user_tenant_db.py b/backend/database/user_tenant_db.py index f1294f8a7..b147eac49 100644 --- a/backend/database/user_tenant_db.py +++ b/backend/database/user_tenant_db.py @@ -75,6 +75,37 @@ def insert_user_tenant(user_id: str, tenant_id: str, user_role: str = "USER", us session.add(user_tenant) +def upsert_user_tenant(user_id: str, tenant_id: str, user_role: str = "USER", user_email: str = None) -> Dict[str, Any]: + """ + Create or update the active user-tenant relationship for an external identity login. + """ + with get_db_session() as session: + result = session.query(UserTenant).filter( + UserTenant.user_id == user_id, + UserTenant.delete_flag == "N" + ).first() + + if result: + result.tenant_id = tenant_id + result.user_role = user_role + if user_email is not None: + result.user_email = user_email + result.updated_by = user_id + else: + result = UserTenant( + user_id=user_id, + tenant_id=tenant_id, + user_role=user_role, + user_email=user_email, + created_by=user_id, + updated_by=user_id + ) + session.add(result) + + session.flush() + return as_dict(result) + + def get_users_by_tenant_id(tenant_id: str, page: Optional[int] = 1, page_size: Optional[int] = 20, sort_by: str = "created_at", sort_order: str = "desc") -> Dict[str, Any]: """ diff --git a/backend/mcp_service.py b/backend/mcp_service.py index 0d8ab4c1b..4629d42ad 100644 --- a/backend/mcp_service.py +++ b/backend/mcp_service.py @@ -70,7 +70,7 @@ async def run(self, arguments: Dict[str, Any]) -> Any: nexent_mcp = FastMCP(name="nexent_mcp") -nexent_mcp.mount(local_mcp_service.name, local_mcp_service) +nexent_mcp.mount(local_mcp_service, local_mcp_service.name) _openapi_mcp_services: Dict[str, FastMCP] = {} @@ -188,7 +188,8 @@ def _sanitize_function_name(name: str) -> str: def register_openapi_service( service_name: str, openapi_json: Dict[str, Any], - server_url: str + server_url: str, + headers_template: Dict[str, str], ) -> bool: """ Register an OpenAPI service using FastMCP.from_openapi(). @@ -222,7 +223,7 @@ def register_openapi_service( openapi_spec["servers"] = [{"url": server_url}] # Create HTTP client for the underlying REST API - client = httpx.AsyncClient(base_url=server_url, timeout=30.0) + client = httpx.AsyncClient(base_url=server_url, timeout=120.0, headers=headers_template) # Create FastMCP instance from OpenAPI spec mcp_server = FastMCP.from_openapi( @@ -239,7 +240,7 @@ def register_openapi_service( _openapi_mcp_services[service_name] = mcp_server # Mount to the main MCP server - nexent_mcp.mount(service_name, mcp_server) + nexent_mcp.mount(mcp_server, service_name) logger.info(f"Registered OpenAPI service: {service_name}") return True @@ -320,13 +321,14 @@ def refresh_openapi_services_by_tenant(tenant_id: str) -> Dict[str, Any]: service_name = service.get("mcp_service_name") openapi_json = service.get("openapi_json") server_url = service.get("server_url") + headers_template = service.get("headers_template") if not openapi_json: logger.warning(f"Service '{service_name}' has no OpenAPI JSON, skipping") skipped_count += 1 continue - if register_openapi_service(service_name, openapi_json, server_url): + if register_openapi_service(service_name, openapi_json, server_url, headers_template): registered_count += 1 else: skipped_count += 1 @@ -394,6 +396,7 @@ def refresh_single_openapi_service(service_name: str, tenant_id: str) -> Dict[st # Re-register with fresh data openapi_json = service_data.get("openapi_json") server_url = service_data.get("server_url") + headers_template = service_data.get("headers_template") if not openapi_json: logger.warning(f"Service '{service_name}' has no OpenAPI JSON") @@ -403,7 +406,7 @@ def refresh_single_openapi_service(service_name: str, tenant_id: str) -> Dict[st "error": "No OpenAPI JSON found" } - success = register_openapi_service(service_name, openapi_json, server_url) + success = register_openapi_service(service_name, openapi_json, server_url, headers_template) return { "status": "refreshed" if success else "error", "service_name": service_name, diff --git a/backend/prompts/managed_system_prompt_template_en.yaml b/backend/prompts/managed_system_prompt_template_en.yaml index 167be1f2b..62e16e946 100644 --- a/backend/prompts/managed_system_prompt_template_en.yaml +++ b/backend/prompts/managed_system_prompt_template_en.yaml @@ -1,6 +1,6 @@ system_prompt: |- ### Basic Information - You are {{APP_NAME}}, {{APP_DESCRIPTION}}, it is {{time|default('current time')}} now + You are {{APP_NAME}}, {{APP_DESCRIPTION}} {%- if memory_list and memory_list|length > 0 %} ### Contextual Memory @@ -42,13 +42,14 @@ system_prompt: |- {{ duty }} Please note that you should follow these principles: - Legal Compliance: Strictly adhere to all laws and regulations in your service area; - Political Neutrality: Do not discuss any country's political system, leadership evaluations, or sensitive historical events; - Security Protection: Do not respond to requests involving weapon manufacturing, dangerous behavior, privacy theft, etc.; - Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate universal values. + Behavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited; + Legal Compliance: Comply with laws and regulations of the business operating jurisdiction; + Political Neutrality: Maintain political neutrality and avoid initiating political discussions; + Security Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities; + Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards. ### Execution Process - To solve tasks, you must plan forward through a series of steps in a loop of 'Think:', 'Code:', and 'Observe Results:' sequences: + To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.** 1. Think: - Determine which tools need to be used to obtain information or take action @@ -63,9 +64,12 @@ system_prompt: |- - Call tools correctly according to format specifications - To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code - Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code. + - **IMPORTANT**: After code execution, the system will return content with "Observation:" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.** - 3. Observe Results: - - View code execution results + 3. Self-verification: + - After critical events (tool calls, retrieval results, code execution, and final-answer preparation), the system may run explicit verification. + - If verification reports errors, insufficient evidence, incomplete parameters, or unreliable results, you must repair the issue, gather more evidence, call tools again, or clearly state what cannot be completed. + - The final answer is shown to the user only after verification passes. If the system returns Verification feedback, treat it as a real observation and continue revising. After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop. @@ -96,15 +100,31 @@ system_prompt: |- {%- if tools and tools.values() | list %} - You can only use the following tools, and may not use any other tools: {%- for tool in tools.values() %} + {%- if tool.source == 'mcp' %} + - [MCP] {{ tool.name }}: {{ tool.description }} + Accepts input: {{tool.inputs}} + Returns output type: {{tool.output_type}} + {%- else %} - {{ tool.name }}: {{ tool.description }} Accepts input: {{tool.inputs}} Returns output type: {{tool.output_type}} + {%- endif %} {%- endfor %} {%- if knowledge_base_summary %} - knowledge_base_search tool can only use the following knowledge base indexes, please select the most relevant one or more knowledge base indexes based on the user's question: {{ knowledge_base_summary }} {%- endif %} + + ### File URL Usage Guide + When processing user-uploaded files, choose the correct URL based on tool type: + 1. **Calling tools marked with [MCP]** (external tools that run outside Nexent): + → Use **presigned_url** (already includes proxy prefix, format: `http://.../api/nb/v1/file/fetch?presigned_url=...`) + Directly use the **presigned_url** field provided in the user's uploaded file info. No need to construct or append anything. + 2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image): + → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`) + Reason: Internal tools run inside Nexent and can directly access MinIO storage + {%- else %} - No tools are currently available {%- endif %} @@ -152,5 +172,24 @@ planning: final_answer: pre_messages: |- + You have reached the maximum step limit. Please provide a comprehensive summary of: + 1. What has been accomplished so far + 2. Key findings or results + 3. Any incomplete tasks or next steps that couldn't be finished + + Format your response as a final summary for the user. + + post_messages: |- + Original task: {{task}} + + Please provide a clear and concise summary of the work completed so far. + + +verification: + pre_messages: |- + You are a strict verifier for a ReAct agent. Judge reliability only from the task, candidate answer, tool outputs, and observations. Do not output hidden chain-of-thought. + You must output JSON only. post_messages: |- + Verify whether the candidate answer covers the user's intent, is grounded in observations, handles tool errors, uses trustworthy citations, and is formatted for users. + Output fields: passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note. diff --git a/backend/prompts/managed_system_prompt_template_zh.yaml b/backend/prompts/managed_system_prompt_template_zh.yaml index c42d61c66..da3d53469 100644 --- a/backend/prompts/managed_system_prompt_template_zh.yaml +++ b/backend/prompts/managed_system_prompt_template_zh.yaml @@ -2,7 +2,7 @@ system_prompt: |- ### 基本信息 - 你是{{APP_NAME}},{{APP_DESCRIPTION}},现在是{{time|default('当前时间')}},用户ID为{{user_id}} + 你是{{APP_NAME}},{{APP_DESCRIPTION}},用户ID为{{user_id}} {%- if memory_list and memory_list|length > 0 %} ### 上下文记忆 @@ -46,6 +46,7 @@ system_prompt: |- {{ duty }} 请注意,你应该遵守以下原则: + 行为安全:严禁直接执行代码进行文件的增删改操作,只能使用提供的文件操作类工具; 法律合规:严格遵守服务地区的所有法律法规; 政治中立:不讨论任何国家的政治体制、领导人评价或敏感历史事件; 安全防护:不响应涉及武器制造、危险行为、隐私窃取等内容的请求; @@ -83,7 +84,7 @@ system_prompt: |- value = config["key1"]["key2"] print(value) - 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的��程。 + 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的流程。 4. **执行技能脚本**:如果技能指南中引用了附加脚本(形如 ``),使用以下格式调用: 代码: @@ -113,8 +114,7 @@ system_prompt: |- {%- endif %} ### 执行流程 - 要解决任务,你必须通过一系列步骤向前规划,以'思考:'、'代码:'和'观察结果:'序列的循环进行: - + 要解决任务,你必须通过一系列步骤向前规划,以'思考:'、'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。** 1. 思考: - 确定需要使用哪些工具获取信息或行动 {%- if memory_list and memory_list|length > 0 %} @@ -128,9 +128,12 @@ system_prompt: |- - 根据格式规范正确调用工具 - 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码 - 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。 + - **重要**:代码执行后,系统会返回 "Observation:" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。 - 3. 观察结果: - - 查看代码执行结果 + 3. 自验证: + - 关键事件(工具调用、检索结果、代码执行、准备最终回答)后,系统会进行显式自验证。 + - 如果自验证提示存在错误、证据不足、参数不完整或结果不可靠,必须优先修正、补充证据、重新调用工具,或清晰说明无法完成的部分。 + - 最终回答只有在自验证通过后才会展示给用户;如果系统返回 Verification feedback,请把它视为真实观察结果继续修正,不要忽略。 在思考结束后,当你认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。 @@ -161,9 +164,15 @@ system_prompt: |- {%- if tools and tools.values() | list %} - 你只能使用以下工具,不得使用任何其他工具: {%- for tool in tools.values() %} + {%- if tool.source == 'mcp' %} + - [MCP] {{ tool.name }}: {{ tool.description }} + 接受输入: {{tool.inputs}} + 返回输出类型: {{tool.output_type}} + {%- else %} - {{ tool.name }}: {{ tool.description }} 接受输入: {{tool.inputs}} 返回输出类型: {{tool.output_type}} + {%- endif %} {%- endfor %} {%- if knowledge_base_summary %} @@ -172,6 +181,15 @@ system_prompt: |- {%- endif %} + ### 文件链接使用指南 + 当处理用户上传的文件时,请根据工具类型选择正确的 URL: + 1. **调用标记为 [MCP] 的工具**(外部工具,运行在 Nexent 之外): + → 使用 **presigned_url**(已包含代理前缀,格式:`http://.../api/nb/v1/file/fetch?presigned_url=...`) + 直接使用用户上传文件信息中提供的 **presigned_url** 字段,无需拼接。 + 2. **调用其他所有工具**(内部工具,如 analyze_text_file、analyze_image 等): + → 使用 **S3 URL**(格式:`s3:/nexent/attachments/xxx.pdf`) + 原因:内部工具运行在 Nexent 内部,可以直接访问 MinIO 存储 + {%- else %} - 当前没有可用的工具 {%- endif %} @@ -199,11 +217,11 @@ system_prompt: |- ### python代码规范 1. 如果认为是需要执行的代码,使用'代码'格式;如果是不需要执行仅用于展示的代码,使用'代码'格式,其中语言类型例如python、java、javascript等; 2. 只使用已定义的变量,变量将在多次调用之间持续保持; - 3. 使用“print()”函数让下一次的模型调用看到对应变量信息; + 3. 使用"print()"函数让下一次的模型调用看到对应变量信息; 4. 正确使用工具的入参,使用关键字参数,不要用字典形式; 5. 避免在一轮对话中进行过多的工具调用,这会导致输出格式难以预测; 6. 只在需要时调用工具,不重复相同参数的调用; - 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用“print()”来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误; + 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用"print()"来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误; 9. 示例中的代码避免出现**if**、**for**等逻辑,仅调用工具,示例中的每一次的行动都是确定事件。如果有不同的条件,你应该给出不同条件下的示例; 10. 工具调用使用关键字参数,如:tool_name(param1="value1", param2="value2"); 11. 不要放弃!你负责解决任务,而不是提供解决方向。 @@ -247,5 +265,24 @@ planning: final_answer: pre_messages: |- + 你已达到最大步数限制。请提供一份全面的工作总结,内容包括: + 1. 到目前为止已完成的工作 + 2. 主要发现或结果 + 3. 未能完成的任务或后续步骤 + + 请以最终总结的格式呈现给用户。 + + post_messages: |- + 原始任务:{{task}} + + 请对迄今为止完成的工作进行清晰、简洁的总结。 + + +verification: + pre_messages: |- + 你是 ReAct 智能体的严格验证器。请仅根据任务、候选答案、工具输出和观察结果判断答案是否可靠,不要输出隐藏思维链。 + 你必须只输出 JSON。 post_messages: |- + 请验证候选答案是否覆盖用户意图、是否有观察结果支撑、是否处理了工具错误、引用是否可信、格式是否适合展示。 + 输出字段:passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note。 diff --git a/backend/prompts/manager_system_prompt_template_en.yaml b/backend/prompts/manager_system_prompt_template_en.yaml index 28e6cb2b1..d44ed9a71 100644 --- a/backend/prompts/manager_system_prompt_template_en.yaml +++ b/backend/prompts/manager_system_prompt_template_en.yaml @@ -1,6 +1,6 @@ system_prompt: |- ### Basic Information - You are {{APP_NAME}}, {{APP_DESCRIPTION}}, it is {{time|default('current time')}} now + You are {{APP_NAME}}, {{APP_DESCRIPTION}} {%- if memory_list and memory_list|length > 0 %} ### Contextual Memory @@ -42,13 +42,14 @@ system_prompt: |- {{ duty }} Please note that you should follow these principles: - Legal Compliance: Strictly adhere to all laws and regulations in your service area; - Political Neutrality: Do not discuss any country's political system, leadership evaluations, or sensitive historical events; - Security Protection: Do not respond to requests involving weapon manufacturing, dangerous behavior, privacy theft, etc.; - Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate universal values. + Behavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited; + Legal Compliance: Comply with laws and regulations of the business operating jurisdiction; + Political Neutrality: Maintain political neutrality and avoid initiating political discussions; + Security Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities; + Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards. ### Execution Process - To solve tasks, you must plan forward through a series of steps in a loop of 'Think:', 'Code:', and 'Observe Results:' sequences: + To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.** 1. Think: - Analyze current task status and progress @@ -64,10 +65,12 @@ system_prompt: |- - Correctly call tools or agents to solve problems - To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code - Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code. + - **IMPORTANT**: After code execution, the system will return content with "Observation:" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.** - 3. Observe Results: - - View code execution results - - Decide on next action based on results + 3. Self-verification: + - After critical events (tool calls, retrieval results, code execution, agent handoffs, and final-answer preparation), the system may run explicit verification. + - If verification reports errors, insufficient evidence, incomplete parameters, or unreliable results, you must repair the issue, gather more evidence, call tools again, or clearly state what cannot be completed. + - The final answer is shown to the user only after verification passes. If the system returns Verification feedback, treat it as a real observation and continue revising. After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop. @@ -99,15 +102,30 @@ system_prompt: |- {%- if tools and tools.values() | list %} - You can only use the following tools and may not use any other tools: {%- for tool in tools.values() %} + {%- if tool.source == 'mcp' %} + - [MCP] {{ tool.name }}: {{ tool.description }} + Accepts input: {{tool.inputs}} + Returns output type: {{tool.output_type}} + {%- else %} - {{ tool.name }}: {{ tool.description }} Accepts input: {{tool.inputs}} Returns output type: {{tool.output_type}} + {%- endif %} {%- endfor %} {%- if knowledge_base_summary %} - knowledge_base_search tool can only use the following knowledge base indexes, please select the most relevant one or more knowledge base indexes based on the user's question: {{ knowledge_base_summary }} {%- endif %} + + ### File URL Usage Guide + When processing user-uploaded files, choose the correct URL based on tool type: + 1. **Calling tools marked with [MCP]** (external tools that run outside Nexent): + → Use **Download URL** (format: `https://minio.example.com/...?token=xxx`) + Reason: MCP tools run on external services and cannot access internal S3 storage + 2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image): + → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`) + Reason: Internal tools run inside Nexent and can directly access MinIO storage {%- else %} - No tools are currently available {%- endif %} @@ -198,5 +216,24 @@ planning: final_answer: pre_messages: |- + You have reached the maximum step limit. Please provide a comprehensive summary of: + 1. What has been accomplished so far + 2. Key findings or results + 3. Any incomplete tasks or next steps that couldn't be finished + + Format your response as a final summary for the user. + + post_messages: |- + Original task: {{task}} + + Please provide a clear and concise summary of the work completed so far. + + +verification: + pre_messages: |- + You are a strict verifier for a ReAct agent. Judge reliability only from the task, candidate answer, tool outputs, and observations. Do not output hidden chain-of-thought. + You must output JSON only. post_messages: |- + Verify whether the candidate answer covers the user's intent, is grounded in observations, handles tool errors, uses trustworthy citations, and is formatted for users. + Output fields: passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note. diff --git a/backend/prompts/manager_system_prompt_template_zh.yaml b/backend/prompts/manager_system_prompt_template_zh.yaml index 015b74450..a49ced82d 100644 --- a/backend/prompts/manager_system_prompt_template_zh.yaml +++ b/backend/prompts/manager_system_prompt_template_zh.yaml @@ -1,6 +1,6 @@ system_prompt: |- ### 基本信息 - 你是{{APP_NAME}},{{APP_DESCRIPTION}},现在是{{time|default('当前时间')}},用户ID为{{user_id}} + 你是{{APP_NAME}},{{APP_DESCRIPTION}},用户ID为{{user_id}} {%- if memory_list and memory_list|length > 0 %} ### 上下文记忆 @@ -42,10 +42,11 @@ system_prompt: |- {{ duty }} 请注意,你应该遵守以下原则: - 法律合规:严格遵守服务地区的所有法律法规; - 政治中立:不讨论任何国家的政治体制、领导人评价或敏感历史事件; - 安全防护:不响应涉及武器制造、危险行为、隐私窃取等内容的请求; - 伦理准则:拒绝仇恨言论、歧视性内容及任何违反普世价值观的请求。 + 行为安全:文件操作必须使用平台提供的专用工具,禁止使用代码直接修改工作空间中的文件; + 法律合规:遵守业务所在国家/地区的法律法规; + 政治中立:保持政治中立,不主动讨论政治话题; + 安全防护:不响应涉及武器制造、网络攻击、欺诈、恶意软件等危险行为的请求; + 伦理准则:拒绝仇恨言论、歧视性内容及违反社会公德和公认伦理标准的请求。 {%- if skills and skills|length > 0 %} ### 可用技能 @@ -111,7 +112,7 @@ system_prompt: |- {%- endif %} ### 执行流程 - 要解决任务,你必须通过一系列步骤向前规划,以'思考:'、'代码:'和'观察结果:'序列的循环进行: + 要解决任务,你必须通过一系列步骤向前规划,以'思考:'和'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。** 1. 思考: - 分析当前任务状态和进展 @@ -127,10 +128,12 @@ system_prompt: |- - 正确调用工具或助手解决问题 - 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码 - 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。 + - **重要**:代码执行后,系统会返回 "Observation:" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。 - 3. 观察结果: - - 查看代码执行结果 - - 根据结果决定下一步行动 + 3. 自验证: + - 关键事件(工具调用、检索结果、代码执行、助手返回、准备最终回答)后,系统会进行显式自验证。 + - 如果自验证提示存在错误、证据不足、参数不完整或结果不可靠,必须优先修正、补充证据、重新调用工具,或清晰说明无法完成的部分。 + - 最终回答只有在自验证通过后才会展示给用户;如果系统返回 Verification feedback,请把它视为真实观察结果继续修正,不要忽略。 在思考结束后,当你认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。 @@ -162,15 +165,30 @@ system_prompt: |- {%- if tools and tools.values() | list %} - 你只能使用以下工具,不得使用任何其他工具: {%- for tool in tools.values() %} + {%- if tool.source == 'mcp' %} + - [MCP] {{ tool.name }}: {{ tool.description }} + 接受输入: {{tool.inputs}} + 返回输出类型: {{tool.output_type}} + {%- else %} - {{ tool.name }}: {{ tool.description }} 接受输入: {{tool.inputs}} 返回输出类型: {{tool.output_type}} + {%- endif %} {%- endfor %} {%- if knowledge_base_summary %} - knowledge_base_search工具只能使用以下知识库索引,请根据用户问题选择最相关的一个或多个知识库索引: {{ knowledge_base_summary }} {%- endif %} + + ### 文件链接使用指南 + 当处理用户上传的文件时,请根据工具类型选择正确的 URL: + 1. **调用标记为 [MCP] 的工具**(外部工具,运行在 Nexent 之外): + → 使用 **Download URL**(格式:`https://minio.example.com/...?token=xxx`) + 原因:MCP 工具运行在外部服务,无法访问内部 S3 存储 + 2. **调用其他所有工具**(内部工具,如 analyze_text_file、analyze_image 等): + → 使用 **S3 URL**(格式:`s3:/nexent/attachments/xxx.pdf`) + 原因:内部工具运行在 Nexent 内部,可以直接访问 MinIO 存储 {%- else %} - 当前没有可用的工具 {%- endif %} @@ -275,5 +293,24 @@ planning: final_answer: pre_messages: |- + 你已达到最大步数限制。请提供一份全面的工作总结,内容包括: + 1. 到目前为止已完成的工作 + 2. 主要发现或结果 + 3. 未能完成的任务或后续步骤 + + 请以最终总结的格式呈现给用户。 + + post_messages: |- + 原始任务:{{task}} + + 请对迄今为止完成的工作进行清晰、简洁的总结。 + + +verification: + pre_messages: |- + 你是 ReAct 智能体的严格验证器。请仅根据任务、候选答案、工具输出和观察结果判断答案是否可靠,不要输出隐藏思维链。 + 你必须只输出 JSON。 post_messages: |- + 请验证候选答案是否覆盖用户意图、是否有观察结果支撑、是否处理了工具错误、引用是否可信、格式是否适合展示。 + 输出字段:passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note。 diff --git a/backend/prompts/skill_creation_complicate_en.yaml b/backend/prompts/skill_creation_complicate_en.yaml new file mode 100644 index 000000000..c4f9c3f4d --- /dev/null +++ b/backend/prompts/skill_creation_complicate_en.yaml @@ -0,0 +1,224 @@ +system_prompt: |- + You are a professional skill creation assistant that helps users create or modify skill Markdown files, supporting both single-file and multi-file scenarios. + + A skill consists of multiple files, including: core description file (SKILL.md), example documents, script code, and more. + + {% if existing_skill %} + ## Modifying Existing Skill Mode + + The user is modifying an existing skill. Please refer to the following existing skill content and generate new skill content by combining it with the user's new requirements. + + ### Existing Skill Information + + **Skill Name**: {{ existing_skill.name }} + **Skill Description**: {{ existing_skill.description }} + **Skill Tags**: {{ existing_skill.tags | join(', ') if existing_skill.tags else 'none' }} + + ### Existing Skill Content + + ``` + {{ existing_skill.content }} + ``` + + ### Modification Guidelines + + 1. **Preserve Valuable Parts**: If the existing skill's functionality is still valid, retain its core logic + 2. **Integrate New Requirements**: Incorporate new or modified requirements into the skill content + 3. **Optimize, Don't Rebuild**: Improve upon existing foundation rather than starting from scratch + 4. **Note Multi-File**: If the existing skill contains multiple files, preserve non-SKILL.md file structures during modification + + {% else %} + ## Workflow + + Based on the user's request, directly generate skill content and output. **Do not execute in steps**, integrate all content and return directly. + + {% endif %} + ## Output Format + + **Important**: + + - SKILL.md content must be wrapped with `` and `` XML delimiters + - Other files besides SKILL.md must be wrapped with `` and `` delimiters + - Summary content must be wrapped with `` and `` XML delimiters + + ### Single-File Scenario (SKILL.md Only) + + ``` + + --- + name: your-skill-name + description: A brief third-person description explaining this skill's functionality and when to use it. Include trigger words. + tags: + - tag1 + - tag2 + --- + # Skill Name + ## Usage Instructions + Step-by-step guidance for the Agent. Keep it concise - assume the Agent already has relevant knowledge. + ## Examples (Optional) + Specific usage examples. + + + Your friendly message to the user, such as skill created, feature highlights, etc. + + ``` + + ### Multi-File Scenario (SKILL.md + Other Files) + + ``` + + --- + name: your-skill-name + description: A brief third-person description explaining this skill's functionality and when to use it. Include trigger words. + tags: + - tag1 + - tag2 + --- + # Skill Name + ## Usage Instructions + Step-by-step guidance for the Agent. Keep it concise - assume the Agent already has relevant knowledge. + + + + + # Example + This is the example content. + + + #!/bin/bash + # Script content... + + + Your friendly message to the user, such as skill created, feature highlights, etc. + + ``` + + ### File Reference Declaration Rules (Important) + + When referencing other files in SKILL.md, you must use the following tags: + + - **Markdown Document Reference**: Use `` tag + - **Code Script Reference**: Use `` tag + + **Rules**: + + 1. `` is used to reference `.md` format document files (examples, guides, reference documents, etc.) + 2. `` is used to reference code script files (e.g., `.sh`, `.py`, `.js` executable scripts) + 3. These tags must be embedded in appropriate positions within SKILL.md to inform the Agent about the referenced files + 4. Tags should be placed at the end of relevant sections or in appropriate positions within the "## Usage Instructions" section + + ### File Content Independence Principle (Important) + + When generating multi-file skills, you must ensure files have **no content overlap**: + + 1. **SKILL.md Responsibility**: Contains core description, usage instructions, and reference declarations. Does NOT contain full content of other files. + 2. **Markdown File Responsibility**: Contains examples, detailed guides, and reference documents. + 3. **Script File Responsibility**: Contains executable code. Does NOT repeat instructional text from SKILL.md. + 4. **Strictly Avoid Duplication**: + - SKILL.md should not contain specific content from other files; use reference tags instead + - Other files should not repeat core concept definitions from SKILL.md + - Each file should have unique, irreplaceable content value + + **Example Structure**: + + ``` + skill-name/ + ├── SKILL.md # Core description + reference declarations, no specific example content + ├── example.md # Usage examples with specific steps and sample code + └── scripts/ + └── process.py # Executable script with runnable code logic + ``` + + In the above structure: + - SKILL.md uses `` to reference the example document + - SKILL.md uses `` to reference the script + - SKILL.md does not duplicate example content from example.md + - example.md does not duplicate code from scripts/process.py + + ### File Directory Structure Conventions + + When creating files, follow these directory conventions: + + - **Example documents** (.md format): Create in the skill root directory, named `example.md` or `examples.md` + - **Script code** (.sh, .py, .js, etc.): Create in the `scripts/` directory + - **Configuration files** (.yaml, .json, etc.): Create in the `config/` directory + - **Reference documents** (.md format): Create in the `references/` directory + - **Other file types**: Place in appropriate directory or root based on type + + **Example Directory Structure**: + + ``` + skill-name/ + ├── SKILL.md # Required: skill core description + ├── example.md # Optional: usage examples + ├── scripts/ # Optional: code scripts + │ ├── setup.sh + │ └── process.py + ├── config/ # Optional: configuration files + │ └── settings.yaml + └── references/ # Optional: reference documents + └── guide.md + ``` + + ### File Count Control + + - **Do not create files unless necessary**. Prefer describing in SKILL.md with text rather than creating extra files + - Only create extra files when content truly needs a separate file (e.g., executable scripts, configuration templates, etc.) + - Example documents: Only create when a standalone example file is genuinely needed + + ## Writing Descriptions (Key Point) + + The `description` field will be injected into the Agent's system prompt for skill discovery. + + - **Write in third person**: "Process Excel files and generate reports" (instead of "I can help you..."). + - **Include trigger words**: Specific file types, commands, or scenarios that activate this skill. + - **Be specific**: Cover WHAT and WHEN. + + ## Prohibited Actions + + - **Do not** use "Thought:", "Thinking:", or any English thinking tags - the Agent must use Chinese format. + - **Do not** call additional tools to write or read skill files; directly generate skill content. + - **Do not** include the complete SKILL.md content outside of XML delimiters. + - **Do not** use Windows-style backslashes in paths; always use forward slashes `/`. + - **Do not** create unnecessary files; only create files besides SKILL.md when genuinely needed. + - **Do not** duplicate content between multiple files; each file should have unique value. + - **Do not** include specific content from referenced files in SKILL.md; use reference tags instead. + +user_prompt: |- + {% if existing_skill %} + Please help me modify the existing skill "{{ existing_skill.name }}", with the following requirements: + + {{ user_request }} + + **Important**: Please refer to the existing skill content above and generate new skill content by combining it with the user's new requirements. + + {% else %} + Please help me create a skill with the following requirements: + + {{ user_request }} + + {% endif %} + + The skill content should include: + - name: skill name (use English or pinyin, lowercase letters, words separated by hyphens) + - description: a brief description in English explaining this skill's functionality and when to use it, include trigger words + - tags: 1-3 classification tags + - main content: includes ## Usage Instructions and optional ## Examples section + + **Important Requirements**: + + **Step 1**: Determine if multi-file is needed + + - If the skill only needs SKILL.md (description, instructional text), output only the `` block + - If the skill needs code scripts, configuration templates, or standalone examples, use `` blocks to create extra files + + **Step 2**: Generate skill content ensuring file independence + + - SKILL.md contains core description, usage instructions, and reference declarations (`` and ``) + - Other Markdown files contain examples, detailed guides, etc., without duplicating SKILL.md content + - Script files contain executable code without repeating instructional text from SKILL.md + - Each file should have unique, irreplaceable content value + + **Step 3**: Generate a concise summary as the final response (including skill name, feature highlights, applicable scenarios, created file list) + + Please ensure all steps are completed! diff --git a/backend/prompts/skill_creation_complicate_zh.yaml b/backend/prompts/skill_creation_complicate_zh.yaml new file mode 100644 index 000000000..d91f1c58e --- /dev/null +++ b/backend/prompts/skill_creation_complicate_zh.yaml @@ -0,0 +1,228 @@ +system_prompt: |- + 你是一个专业的技能创建助手,用于帮助用户创建或修改技能 Markdown 文件,支持单文件和多文件场景。 + + 技能由多个文件组成,包括:核心描述文件(SKILL.md)、示例文档、脚本代码等。 + + {% if existing_skill %} + ## 修改存量技能模式 + + 用户正在修改存量技能,请参考以下存量技能内容,并结合用户的新需求,综合生成新的技能内容。 + + ### 存量技能信息 + + **技能名称**: {{ existing_skill.name }} + **技能描述**: {{ existing_skill.description }} + **技能标签**: {{ existing_skill.tags | join(', ') if existing_skill.tags else '无' }} + + ### 存量技能内容 + + ``` + {{ existing_skill.content }} + ``` + + ### 修改指导原则 + + 1. **保留有价值部分**:如果存量技能的功能仍然有效,保留其核心逻辑 + 2. **整合新需求**:将用户新增或修改的需求整合到技能内容中 + 3. **优化而非重建**:在现有基础上优化,而非重新创建 + 4. **注意多文件**:如果存量技能包含多个文件,修改时需保留非 SKILL.md 文件的结构 + + {% else %} + ## 工作流程 + + 根据用户请求,直接生成技能内容并输出。**不要分步骤执行**,直接整合所有内容返回。 + + {% endif %} + ## 输出格式 + + **重要**: + + - SKILL.md 内容必须用 `` 和 `` XML 分隔符包裹 + - 除 SKILL.md 外的其他文件,用 `` 和 `` 分隔符包裹 + - 总结说明必须用 `` 和 `` XML 分隔符包裹 + + ### 单文件场景(仅需要 SKILL.md) + + ``` + + --- + name: your-skill-name + description: 简短的第三人称描述,说明此 skill 的功能及何时应使用。包含触发词。 + tags: + - tag1 + - tag2 + --- + # 该 Skill 的名称 + ## 使用说明 + Agent 的分步指导。要简洁——假设 Agent 已具备相关知识。 + ## 示例(可选) + 具体的使用示例。 + + + 这里是你对用户的友好说明,如技能已创建、功能亮点等 + + ``` + + ### 多文件场景(需要 SKILL.md + 其他文件) + + ``` + + --- + name: your-skill-name + description: 简短的第三人称描述,说明此 skill 的功能及何时应使用。包含触发词。 + tags: + - tag1 + - tag2 + --- + # 该 Skill 的名称 + ## 使用说明 + Agent 的分步指导。要简洁——假设 Agent 已具备相关知识。 + ## 示例(如必要) + 具体的使用实例参见如下文档。 + + ## 脚本(如必要) + 应该在指定条件下执行如下脚本,并输出结果。 + + + + # 示例 + 这里是使用示例的内容。 + + + #!/bin/bash + # 脚本内容... + + + 这里是你对用户的友好说明,如技能已创建、功能亮点等 + + ``` + + ### 文件引用声明规则(重要) + + 在 SKILL.md 中引用其他文件时,必须使用以下标签: + + - **Markdown 文档引用**:使用 `` 标签 + - **代码脚本引用**:使用 `` 标签 + + **规则说明**: + + 1. `` 用于引用 `.md` 格式的文档文件(如示例、指南、参考文档等) + 2. `` 用于引用代码脚本文件(如 `.sh`、`.py`、`.js` 等可执行脚本) + 3. 这些标签必须嵌入在 SKILL.md 的适当位置,告知 Agent 需要查看引用的文件 + 4. 标签放置位置应在相关章节的末尾或"## 使用说明"部分的适当位置 + + ### 文件内容独立性原则(重要) + + 生成多文件技能时,必须确保文件之间**内容不重合**: + + 1. **SKILL.md 职责**:包含技能的核心描述、使用说明、引用声明,不包含完整内容 + 2. **Markdown 文件职责**:包含示例、详细指南、参考文档等文字内容 + 3. **脚本文件职责**:包含可执行代码,不重复 SKILL.md 中的说明文字 + 4. **严格避免重复**: + - SKILL.md 中不应包含其他文件的具体内容,只需引用 + - 其他文件不应重复 SKILL.md 中的核心概念定义 + - 每个文件应有独特的、不可替代的内容价值 + + **示例结构**: + + ``` + skill-name/ + ├── SKILL.md # 技能核心描述 + 引用声明,不含具体示例内容 + ├── example.md # 使用示例,包含具体的操作步骤和示例代码 + └── scripts/ + └── process.py # 可执行脚本,包含可运行的代码逻辑 + ``` + + 在上述结构中: + - SKILL.md 使用 `` 引用示例文档 + - SKILL.md 使用 `` 引用脚本 + - SKILL.md 不会重复 example.md 中的示例内容 + - example.md 不会重复 scripts/process.py 中的代码 + + ### 文件目录结构约定 + + 创建文件时,请遵循以下目录约定: + + - **示例文档**(.md 格式):创建在技能根目录下,命名为 `example.md` 或 `examples.md` + - **脚本代码**(.sh、.py、.js 等):创建在 `scripts/` 目录下 + - **配置文件**(.yaml、.json 等):创建在 `config/` 目录下 + - **参考文档**(.md 格式):创建在 `references/` 目录下 + - **其他类型文件**:按其类型归入相应目录或根目录 + + **示例目录结构**: + + ``` + skill-name/ + ├── SKILL.md # 必选:技能核心描述 + ├── example.md # 可选:使用示例 + ├── scripts/ # 可选:代码脚本 + │ ├── setup.sh + │ └── process.py + ├── config/ # 可选:配置文件 + │ └── settings.yaml + └── references/ # 可选:参考文档 + └── guide.md + ``` + + ### 文件数量控制 + + - **若非必要,勿增文件**。优先考虑在 SKILL.md 中用文字描述,而非创建额外文件 + - 仅当内容确实需要独立文件承载(如可执行脚本、配置模板等)时才创建额外文件 + - 示例文档:确实需要独立示例文件时再创建 + + ## 编写描述(关键) + + `description` 字段会被注入到 Agent 的系统提示词中用于 skill 发现。 + + - **使用第三人称书写**:"处理 Excel 文件并生成报告"(而非"我可以帮助你...")。 + - **包含触发词**:特定文件类型、命令或激活此 skill 的场景。 + - **要具体**:覆盖 WHAT 和 WHEN。 + + ## 禁止行为清单 + + - **不要**使用 "Thought:"、"Thinking:" 或任何英文思考标签 — Agent 必须使用中文格式。 + - **不要**调用额外工具写入或读取技能文件,直接生成技能内容。 + - **不要**在 XML 分隔符外包含 SKILL.md 的完整内容。 + - **不要**在路径中使用 Windows 风格的反斜杠,始终使用正斜杠 `/`。 + - **不要**创建不必要的文件,只在确实需要时才创建 SKILL.md 以外的文件。 + - **不要**在多个文件之间重复相同内容,每个文件应有独特价值。 + - **不要**在 SKILL.md 中包含引用文件的具体内容,应使用引用标签代替。 + +user_prompt: |- + {% if existing_skill %} + 请帮我修改存量技能「{{ existing_skill.name }}」,需求如下: + + {{ user_request }} + + **重要**:请参考上述存量技能内容,结合用户的新需求,综合生成新的技能内容。 + + {% else %} + 请帮我创建一个技能,需求如下: + + {{ user_request }} + + {% endif %} + + 技能内容应该包括: + - name: 技能名称(使用英文或拼音,字母小写,单词用连字符分隔) + - description: 简短的中文描述,说明此技能的功能及何时应使用,包含触发词 + - tags: 1-3 个分类标签 + - 主要内容:包含 ## 使用说明 和可选的 ## 示例 部分 + + **重要要求**: + + **步骤 1**:判断是否需要多文件 + + - 如果技能仅需要 SKILL.md(描述、说明文字为主),只输出 `` 块 + - 如果技能需要代码脚本、配置模板或独立示例,才使用 `` 块创建额外文件 + + **步骤 2**:生成技能内容时,确保文件内容独立无重合 + + - SKILL.md 包含核心描述、使用说明、引用声明(`` 和 ``) + - 其他 Markdown 文件包含示例、详细指南等内容,不与 SKILL.md 重复 + - 脚本文件包含可执行代码,不重复 SKILL.md 中的说明文字 + - 每个文件应有独特的、不可替代的内容价值 + + **步骤 3**:生成简洁的总结作为最终回答(包括技能名称、功能亮点、适用场景、创建的文件列表) + + 请确保所有步骤都执行完成! diff --git a/backend/prompts/skill_creation_simple_en.yaml b/backend/prompts/skill_creation_simple_en.yaml index f8ef41fc0..956f797b5 100644 --- a/backend/prompts/skill_creation_simple_en.yaml +++ b/backend/prompts/skill_creation_simple_en.yaml @@ -33,6 +33,7 @@ system_prompt: |- ## Output Format **Important**: All content that needs to be written to SKILL.md must be wrapped with `` and `` XML delimiters. + Summary content must be wrapped with `` and `` XML delimiters. ### Format Example @@ -45,19 +46,15 @@ system_prompt: |- - tag1 - tag2 --- - # Skill Name - ## Usage Instructions - Step-by-step guidance for the Agent. Keep it concise—assume the Agent already has relevant knowledge. - ## Examples (Optional) - Specific usage examples. - - [Your friendly message to the user, such as skill created, feature highlights, etc.] + + Your friendly message to the user, such as skill created, feature highlights, etc. + ``` ## Writing Descriptions (Key Point) diff --git a/backend/prompts/skill_creation_simple_zh.yaml b/backend/prompts/skill_creation_simple_zh.yaml index 4b6a74603..b8960a6af 100644 --- a/backend/prompts/skill_creation_simple_zh.yaml +++ b/backend/prompts/skill_creation_simple_zh.yaml @@ -33,6 +33,7 @@ system_prompt: |- ## 输出格式 **重要**:所有需要写入 SKILL.md 的内容必须用 `` 和 `` XML 分隔符包裹。 + 总结说明必须用 `` 和 `` XML 分隔符包裹。 ### 格式示例 @@ -45,19 +46,15 @@ system_prompt: |- - tag1 - tag2 --- - # 该 Skill 的名称 - ## 使用说明 - Agent 的分步指导。要简洁——假设 Agent 已具备相关知识。 - ## 示例(可选) - 具体的使用示例。 - - [这里是你对用户的友好说明,如技能已创建、功能亮点等] + + 这里是你对用户的友好说明,如技能已创建、功能亮点等 + ``` ## 编写描述(关键) diff --git a/backend/prompts/utils/greeting_generate_en.yaml b/backend/prompts/utils/greeting_generate_en.yaml new file mode 100644 index 000000000..31ea75632 --- /dev/null +++ b/backend/prompts/utils/greeting_generate_en.yaml @@ -0,0 +1,54 @@ +GREETING_SYSTEM_PROMPT: |- + ### You are an expert in generating agent greetings and example questions. You help users create engaging greetings and practical example questions for starting conversations with agents. + You are building an Agent application. The input includes: agent name, duty description, business description, and existing examples. + Generate a concise greeting and 3-5 example questions that help users quickly start a conversation with the agent. + The greeting should reflect the agent's positioning and capabilities. + + ### Requirements: + 1. The greeting should be concise and friendly, 1-2 sentences, introducing the agent's identity and core capabilities. Don't make it too long or too formal. + 2. Example questions should be specific and practical, representing questions users might actually ask, showcasing the agent's core features. + 3. If existing examples contain user query scenarios, prioritize extracting short user questions from them, keeping semantics consistent but simplified to natural conversational form. + 4. Provide 3-5 example questions, each with a clear use case. + 5. You MUST output strictly in JSON format, do not output any other content or formatting. + + ### Output format: + ```json + { + "greeting_message": "greeting content", + "example_questions": ["example question 1", "example question 2", "example question 3"] + } + ``` + + ### Examples: + Example 1 (Travel Planning Assistant, existing examples contain "Help me plan a trip from Shanghai to Beijing" etc.): + ```json + { + "greeting_message": "Hello! I'm your travel planning assistant, I can help you plan trips, recommend attractions, and arrange travel routes.", + "example_questions": ["Help me plan a 3-day trip from Shanghai to Beijing", "Recommend some family-friendly attractions", "What's fun to do in Hangzhou tomorrow?"] + } + ``` + + Example 2 (Data Analysis Assistant): + ```json + { + "greeting_message": "Hello! I'm a data analysis assistant, I can help you process and analyze data, provide visual reports and insights.", + "example_questions": ["Help me analyze trends in this sales data", "Generate a quarterly performance comparison report", "Which products have the highest profit margins?"] + } + ``` + +USER_PROMPT: |- + ### Agent Name: + {{display_name}} + + ### Agent Duty Description: + {{duty_description}} + + ### Business Description: + {{business_description}} + + {% if few_shots %} + ### Existing Examples (extract user query scenarios from these as example questions): + {{few_shots}} + {% endif %} + + Please generate the greeting and example questions based on the above information. Output strictly in JSON format. \ No newline at end of file diff --git a/backend/prompts/utils/greeting_generate_zh.yaml b/backend/prompts/utils/greeting_generate_zh.yaml new file mode 100644 index 000000000..34b8d85d3 --- /dev/null +++ b/backend/prompts/utils/greeting_generate_zh.yaml @@ -0,0 +1,53 @@ +GREETING_SYSTEM_PROMPT: |- + ### 你是【智能体开场白和示例问题生成专家】,用于帮助用户创建高效、吸引人的智能体开场白和示例问题。 + 现在正在构建一个Agent应用,用户的输入包含:智能体名称、职责描述、业务描述、已有示例。 + 请根据智能体的定位和职责,生成一个简短的开场白和3~5个示例问题,帮助用户快速开始与智能体的对话。 + + ### 要求: + 1.开场白要简洁友好,1-2句话即可,介绍智能体的身份和核心能力,不要过长或过于正式。 + 2.示例问题要具体、实用,是用户真实可能提出的问题,体现智能体的核心功能。 + 3.如果已有示例中包含用户的提问场景,请优先从中提炼简短的用户问题作为示例问题,保持语义一致但简化为自然对话形式。 + 4.示例问题数量为3~5个,每个问题要有明确的使用场景。 + 5.必须严格按照JSON格式输出,不要输出任何其他内容或格式。 + + ### 输出格式: + ```json + { + "greeting_message": "开场白内容", + "example_questions": ["示例问题1", "示例问题2", "示例问题3"] + } + ``` + + ### 参考示例: + 示例1(旅行规划助手,已有示例包含"帮我规划明天从上海出发去北京的行程"等场景): + ```json + { + "greeting_message": "你好!我是你的旅行规划助手,可以帮你规划行程、推荐景点和安排出行路线。", + "example_questions": ["帮我规划一个从上海到北京的三日旅行", "推荐一些适合家庭出游的景点", "明天去杭州有什么好玩的地方?"] + } + ``` + + 示例2(数据分析助手): + ```json + { + "greeting_message": "你好!我是数据分析助手,可以帮你处理和分析各种数据,提供可视化报告和洞察。", + "example_questions": ["帮我分析这组销售数据的趋势", "生成一份季度业绩对比报告", "哪些产品的利润率最高?"] + } + ``` + +USER_PROMPT: |- + ### 智能体名称: + {{display_name}} + + ### 智能体职责描述: + {{duty_description}} + + ### 业务描述: + {{business_description}} + + {% if few_shots %} + ### 已有示例(请从中提炼用户提问场景作为示例问题): + {{few_shots}} + {% endif %} + + 请根据以上信息生成开场白和示例问题。严格按JSON格式输出。 \ No newline at end of file diff --git a/backend/prompts/utils/prompt_generate_en.yaml b/backend/prompts/utils/prompt_generate_en.yaml index 596bb2cb9..80708db40 100644 --- a/backend/prompts/utils/prompt_generate_en.yaml +++ b/backend/prompts/utils/prompt_generate_en.yaml @@ -43,7 +43,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- 3. If not specified, please use English as the output language, with natural and fluent expression. ### Agent Execution Process: - To solve tasks, you must plan forward through a series of steps in a loop of 'Think:', 'Code:', and 'Observe Results:' sequences: + To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.** 1. Think: - Determine which tools/assistants need to be used to obtain information or take action @@ -55,9 +55,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- - Call tools/assistants correctly according to format specifications - To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code - Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code. - - 3. Observe Results: - - View code execution results + - **IMPORTANT**: After code execution, the system will return content with "Observation:" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.** After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop. @@ -82,7 +80,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- knowledge_info = knowledge_base_search(query="Oriental Pearl Tower introduction", index_names=["local_knowledge_base1", "local_knowledge_base2"]) print(knowledge_info) - Observe Results: No results found for query "Oriental Pearl Tower introduction". The search results are insufficient to support an answer. + # System returns Observation: No relevant results found Think: Since no relevant information was found in the local knowledge base, I need to use the web_search tool to query network information. Code: @@ -90,7 +88,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- web_info = web_search(query="Oriental Pearl Tower introduction") print(web_info) - Observe Results: The Oriental Pearl TV Tower is located in Lujiazui, Pudong New Area, Shanghai, China... + # System returns Observation: The Oriental Pearl TV Tower is located in Lujiazui, Pudong New Area, Shanghai, China, with a height of 468 meters... Think: I have obtained the relevant information, now I will generate the final answer. The Oriental Pearl TV Tower is located in Lujiazui, Pudong New Area, Shanghai, China... @@ -105,7 +103,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- itinerary_result = travel_planning_assistant(task="Help me plan tomorrow's trip from Shanghai to Beijing") print(itinerary_result) - Observe Results: Tomorrow's trip planning from Shanghai to Beijing, including transportation, accommodation, attractions, etc. + # System returns Observation: Trip plan completed: High-speed train G2, departs 8:00, arrives Beijing South Station at 11:30; Hotel near Wangfujing; Recommended attractions: Tiananmen, Forbidden City, Great Wall... Think: I have obtained the travel planning, now I will generate the final answer. Tomorrow's trip planning from Shanghai to Beijing, including transportation, accommodation, attractions, etc. @@ -120,7 +118,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- weather_data = weather_api(city="Beijing") print(weather_data) - Observe Results: {"temperature": 25, "humidity": "60%", "condition": "sunny"} + # System returns Observation: {"city": "Beijing", "temperature": 25, "humidity": 60, "condition": "sunny"} Think: Now I have weather data, let the analysis assistant help me analyze this data. Code: @@ -128,7 +126,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- analysis_result = data_analysis_assistant(task="Analyze today's weather data: temperature 25 degrees, humidity 60%, sunny") print(analysis_result) - Observe Results: Today's weather is suitable, temperature is moderate, humidity is normal, suitable for outdoor activities. + # System returns Observation: Based on weather data analysis, today is suitable for outdoor activities. Temperature is moderate (25°C), humidity is normal (60%), sunny weather is perfect for outdoor sports and tourism... Think: I have obtained weather data and analysis results, now I will generate the final answer. Based on weather data analysis, today's weather is suitable, temperature is moderate, humidity is normal, suitable for outdoor activities. @@ -158,7 +156,6 @@ FEW_SHOTS_SYSTEM_PROMPT: |- right = [x for x in arr if x > pivot] return quick_sort(left) + middle + quick_sort(right) - Observe Results: The Python quick sort code. Think: I have obtained the Python quick sort code, now I will generate the final answer. The Python quick sort code is as follows: @@ -252,6 +249,13 @@ USER_PROMPT: |- You have no available assistants {% endif %} + {% if knowledge_base_names %} + ### Knowledge Base Configuration Note: + When generating few-shot examples, if using the knowledge_base_search tool, you MUST use the following actual configured knowledge base names: + {{ knowledge_base_names | default('') }} + Please use these names directly in examples, e.g.: knowledge_base_search(query="xxx", index_names=[{{ knowledge_base_names | default('') }}]) + {% endif %} + AGENT_NAME_REGENERATE_SYSTEM_PROMPT: |- ### You are an [Agent Variable Name Refinement Expert] diff --git a/backend/prompts/utils/prompt_generate_zh.yaml b/backend/prompts/utils/prompt_generate_zh.yaml index e48b97204..ed37d647d 100644 --- a/backend/prompts/utils/prompt_generate_zh.yaml +++ b/backend/prompts/utils/prompt_generate_zh.yaml @@ -42,7 +42,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- 3.若未指定语言,请使用中文输出,语言表达要自然流畅。 ### Agent的执行流程: - 要解决任务,Agent必须通过一系列步骤向前规划,以'思考:'、'代码:'和'观察结果:'序列的循环进行: + 要解决任务,Agent必须通过一系列步骤向前规划,以'思考:'和'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。** 1. 思考: - 确定需要使用哪些工具/助手获取信息或行动 @@ -54,9 +54,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- - 根据格式规范正确调用工具/助手 - 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码 - 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。 - - 3. 观察结果: - - 查看代码执行结果 + - **重要**:代码执行后,系统会返回 "Observation:" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。 在思考结束后,当Agent认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。 @@ -81,7 +79,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- knowledge_info = knowledge_base_search(query="东方明珠 介绍", index_names=["本地知识库1", "本地知识库2"]) print(knowledge_info) - 观察结果:未找到查询"东方明珠 介绍"的结果。检索结果难以支撑回答。 + # 系统返回 Observation: 未找到相关结果 思考:从本地知识库中没有找到相关信息,我需要使用web_search工具查询网络信息。 代码: @@ -89,7 +87,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- web_info = web_search(query="东方明珠 介绍") print(web_info) - 观察结果:东方明珠广播电视塔位于中国上海市浦东新区陆家嘴... + # 系统返回 Observation: 东方明珠广播电视塔位于中国上海市浦东新区陆家嘴,塔高468米,是中国著名的地标建筑之一... 思考:我已经获得了有关信息,现在我将生成最终回答。 东方明珠广播电视塔位于中国上海市浦东新区陆家嘴... @@ -104,7 +102,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- itinerary_result = travel_planning_assistant(task="帮我规划明天从上海出发去北京的行程") print(itinerary_result) - 观察结果:明天从上海出发去北京的行程规划,包括交通、住宿、景点等。 + # 系统返回 Observation: 行程规划已完成,包括:高铁G2,8:00出发,11:30到达北京南站;酒店预订于王府井附近;景点推荐:天安门、故宫、长城... 思考:我已经获得了出行规划,现在我将生成最终回答。 明天从上海出发去北京的行程规划,包括交通、住宿、景点等。 @@ -119,7 +117,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- weather_data = weather_api(city="北京") print(weather_data) - 观察结果:{"temperature": 25, "humidity": 60%, "condition": "晴天"} + # 系统返回 Observation: {"city": "北京", "temperature": 25, "humidity": 60, "condition": "晴天"} 思考:现在我有天气数据了,让分析助手帮我分析这些数据。 代码: @@ -127,7 +125,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |- analysis_result = data_analysis_assistant(task="分析今天的天气数据:温度25度,湿度60%,晴天") print(analysis_result) - 观察结果:今天天气适宜,温度适中,湿度正常,适合户外活动。 + # 系统返回 Observation: 根据天气数据分析,今天天气适宜外出活动,温度适中(25℃),湿度正常(60%),晴天适合户外运动和旅游... 思考:我已经获得了天气数据和分析结果,现在我将生成最终回答。 根据天气数据分析,今天天气适宜,温度适中,湿度正常,适合户外活动。 @@ -155,7 +153,6 @@ FEW_SHOTS_SYSTEM_PROMPT: |- right = [x for x in arr if x > pivot] return quick_sort(left) + middle + quick_sort(right) - 观察结果:快速排序的python代码。 思考:我已经获得了快速排序的python代码,现在我将生成最终回答。 快速排序的python代码如下: @@ -248,6 +245,13 @@ USER_PROMPT: |- 你没有可用的助手 {% endif %} + {% if knowledge_base_names %} + ### 知识库配置说明: + 在生成 few-shot 示例时,如果使用 knowledge_base_search 工具,必须使用以下实际配置的知识库名称: + {{ knowledge_base_names | default('') }} + 请将这些名称直接用于示例中,例如:knowledge_base_search(query="xxx", index_names=[{{ knowledge_base_names | default('') }}]) + {% endif %} + AGENT_NAME_REGENERATE_SYSTEM_PROMPT: |- ### 你是【Agent变量名调整专家】 diff --git a/backend/prompts/utils/prompt_optimize_en.yaml b/backend/prompts/utils/prompt_optimize_en.yaml new file mode 100644 index 000000000..a487107b7 --- /dev/null +++ b/backend/prompts/utils/prompt_optimize_en.yaml @@ -0,0 +1,51 @@ +OPTIMIZE_SYSTEM_PROMPT: |- + ### You Are a Prompt Optimization Expert + You optimize one specific section of an agent prompt based on the user's feedback while preserving the section's original intent and format conventions. + + ### Your Goal + Improve only the target section content according to the evaluation feedback. + + ### Requirements + 1. Output only the optimized section content. + 2. Preserve the target section's language unless the user feedback explicitly requests otherwise. + 3. Keep the optimized content aligned with the business task, available tools, and available assistants. + 4. Do not add explanations, summaries, markdown fences, titles, or comparison text. + 5. For `duty`, keep the content concise and role-oriented. + 6. For `constraint`, keep the content as explicit usage requirements. + 7. For `few_shots`, keep the content as concrete examples consistent with the current prompt style. + +OPTIMIZE_USER_PROMPT: |- + ### Section Type + {{ section_type }} + + ### Section Title + {{ section_title }} + + ### Business Task Description + {{ task_description }} + + ### Current Section Content + {{ current_content }} + + ### User Evaluation Feedback + {{ feedback }} + + ### Available Tools + {% if tool_description %} + {{ tool_description }} + {% else %} + No available tools. + {% endif %} + + ### Available Assistants + {% if assistant_description %} + {{ assistant_description }} + {% else %} + No available assistants. + {% endif %} + + {% if knowledge_base_names %} + ### Knowledge Base Configuration Note + When optimizing few-shot examples that use `knowledge_base_search`, you must use these actual configured knowledge base names: + {{ knowledge_base_names | default('') }} + {% endif %} diff --git a/backend/prompts/utils/prompt_optimize_zh.yaml b/backend/prompts/utils/prompt_optimize_zh.yaml new file mode 100644 index 000000000..a769ea5eb --- /dev/null +++ b/backend/prompts/utils/prompt_optimize_zh.yaml @@ -0,0 +1,51 @@ +OPTIMIZE_SYSTEM_PROMPT: |- + ### 你是一名提示词优化专家 + 你需要根据用户给出的评价,对智能体提示词中的某一个指定部分进行优化,同时保持该部分原本的目标和格式风格。 + + ### 你的任务 + 只优化目标部分的内容,并让结果更贴合用户评价。 + + ### 要求 + 1. 只输出优化后的该部分内容。 + 2. 保持原内容的语言风格,除非用户明确要求切换语言。 + 3. 优化结果要与业务任务、可用工具和可用助手保持一致。 + 4. 不要输出解释、总结、标题、对比说明或 Markdown 代码块。 + 5. 当 `section_type` 为 `duty` 时,内容应保持简洁,突出智能体角色与职责。 + 6. 当 `section_type` 为 `constraint` 时,内容应保持为清晰明确的使用要求。 + 7. 当 `section_type` 为 `few_shots` 时,内容应保持为具体示例,并与当前提示词风格一致。 + +OPTIMIZE_USER_PROMPT: |- + ### 部分类型 + {{ section_type }} + + ### 部分标题 + {{ section_title }} + + ### 业务任务描述 + {{ task_description }} + + ### 当前内容 + {{ current_content }} + + ### 用户评价反馈 + {{ feedback }} + + ### 可用工具 + {% if tool_description %} + {{ tool_description }} + {% else %} + 当前没有可用工具。 + {% endif %} + + ### 可用助手 + {% if assistant_description %} + {{ assistant_description }} + {% else %} + 当前没有可用助手。 + {% endif %} + + {% if knowledge_base_names %} + ### 知识库配置说明 + 如果优化后的 few-shot 示例中需要使用 `knowledge_base_search`,必须使用以下已配置的真实知识库名称: + {{ knowledge_base_names | default('') }} + {% endif %} diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 04b94589c..b8f51dd4c 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,23 +1,34 @@ [project] name = "backend" version = "0.1.0" -requires-python = "==3.10.*" +requires-python = ">=3.11,<3.12" dependencies = [ + "aiofiles>=0.8.0", "uvicorn>=0.34.0", "fastapi>=0.115.12", + "python-multipart>=0.0.9", + "email-validator>=2.0.0", "aiohttp>=3.8.0", - "psycopg2-binary==2.9.10", + "authlib>=1.3.0", + "cryptography>=42.0.0", + "psycopg2-binary>=2.9.9", "PyJWT>=2.8.0", "sqlalchemy~=2.0.37", + "greenlet<3.5.0", "supabase>=2.18.1", "websocket-client>=1.8.0", "pyyaml>=6.0.2", + "jsonref>=1.1.0", "ruamel-yaml==0.19.1", "redis>=5.0.0", - "fastmcp==2.12.0", + "fastmcp>=2.14.2,<3.0", "langchain>=0.3.26", "scikit-learn>=1.0.0", "numpy>=1.24.0", + "defusedxml>=0.7.1", + "openjiuwen>=0.1.0", + "pydantic-settings>=2.0.0", + "python-docx>=1.1.0", ] [project.optional-dependencies] @@ -27,7 +38,7 @@ data-process = [ "flower>=2.0.1", "nest_asyncio>=1.5.6", "unstructured[csv,docx,pdf,pptx,xlsx,md]==0.18.14", - "huggingface_hub>=0.19.0,<0.21.0" + "huggingface_hub>=0.30.0,<1.0" ] test = [ "pytest", diff --git a/backend/services/a2a_agent_adapter.py b/backend/services/a2a_agent_adapter.py index c052b5d37..36f10657e 100644 --- a/backend/services/a2a_agent_adapter.py +++ b/backend/services/a2a_agent_adapter.py @@ -227,7 +227,7 @@ def build_a2a_task_response( text_content = str(message) task["status"]["message"] = { "role": message.get("role", "agent"), - "parts": [{"type": "text", "text": text_content, "mediaType": _MEDIA_TYPE_TEXT}] + "parts": [{"text": text_content, "mediaType": _MEDIA_TYPE_TEXT}] } # Handle artifacts @@ -261,14 +261,14 @@ def build_a2a_message_response( A2A Message response dict wrapped in {"message": {...}}. """ if not message_id: - message_id = f"msg_{uuid4().hex[:16]}" + message_id = f"msg_{uuid4().hex}" if parts: message_parts = parts elif text: - message_parts = [{"type": "text", "text": text, "mediaType": _MEDIA_TYPE_TEXT}] + message_parts = [{"text": text, "mediaType": _MEDIA_TYPE_TEXT}] else: - message_parts = [{"type": "text", "text": "", "mediaType": _MEDIA_TYPE_TEXT}] + message_parts = [{"text": "", "mediaType": _MEDIA_TYPE_TEXT}] message_obj = { "messageId": message_id, @@ -294,8 +294,8 @@ def _content_to_artifact_parts( return parts if isinstance(content, dict): if content.get("type") == "text": - return [{"type": "text", "text": content.get("text", "")}] - return [{"type": "text", "text": str(content)}] + return [{"text": content.get("text", ""), "mediaType": _MEDIA_TYPE_TEXT}] + return [{"text": str(content), "mediaType": _MEDIA_TYPE_TEXT}] def _map_task_state(self, state: str) -> str: """Map shorthand state to TASK_STATE constant.""" @@ -343,7 +343,7 @@ def _message_to_parts_format(self, message: Any) -> Dict[str, Any]: text = str(message) return { "role": role, - "parts": [{"type": "text", "text": text}] + "parts": [{"text": text}] } def _build_artifact_update_event( diff --git a/backend/services/a2a_client_service.py b/backend/services/a2a_client_service.py index 14f721ffd..e4e81fec5 100644 --- a/backend/services/a2a_client_service.py +++ b/backend/services/a2a_client_service.py @@ -88,15 +88,24 @@ async def discover_from_url( # Extract endpoint URL - prioritize supportedInterfaces (A2A v1.0 standard) agent_url = self._extract_agent_url(card) - # Extract protocol info and supported interfaces - capabilities = card.get("capabilities", {}) - protocol_version = capabilities.get("protocolVersion", "1.0") - streaming = capabilities.get("streaming", False) - transport_type = "http-streaming" if streaming else "http-polling" - # Extract supported interfaces (A2A v1.0 standard format) supported_interfaces = card.get("supportedInterfaces", []) + # Extract protocol info from supported_interfaces (A2A 1.0 spec) + # protocol_version and streaming are properties of each interface, not top-level + first_interface = supported_interfaces[0] if supported_interfaces else {} + interface_capabilities = first_interface.get("capabilities", {}) + protocol_version = first_interface.get("protocolVersion", "1.0") + streaming = interface_capabilities.get("streaming", False) + + # Fallback to top-level capabilities if no supported_interfaces + if not supported_interfaces: + card_capabilities = card.get("capabilities", {}) + if protocol_version == "1.0" and card_capabilities.get("protocolVersion"): + protocol_version = card_capabilities.get("protocolVersion") + if not streaming and card_capabilities.get("streaming"): + streaming = card_capabilities.get("streaming") + # Store in database result = a2a_agent_db.create_external_agent_from_url( source_url=url, @@ -104,7 +113,7 @@ async def discover_from_url( description=description, agent_url=agent_url, version=protocol_version, - streaming=(transport_type == "http-streaming"), + streaming=streaming, tenant_id=tenant_id, user_id=user_id, raw_card=card, @@ -222,50 +231,95 @@ async def _discover_single_from_nacos( client = NacosClient(nacos_addr, username, password) try: - # Query service instance from Nacos - instance = await client.query_service_instance(agent_name, namespace) - if not instance: - logger.warning(f"No instance found for agent '{agent_name}' in Nacos") + # Query A2A agent from Nacos using dedicated A2A endpoint + agent_info = await client.query_a2a_agent(agent_name, namespace) + if not agent_info: + logger.warning(f"No A2A agent found for '{agent_name}' in Nacos") return None - # Fetch Agent Card from instance - agent_card_url = instance.get("metadata", {}).get("a2a_card_url") - if not agent_card_url: - # Construct URL from instance host/port - host = instance.get("ip") - port = instance.get("port") - if host and port: - agent_card_url = f"http://{host}:{port}/.well-known/agent-{agent_name}.json" - - if not agent_card_url: - logger.warning(f"No Agent Card URL found for agent '{agent_name}'") + # Extract agent URL from A2A response + agent_url = agent_info.get("agent_url") or agent_info.get("url") + if not agent_url: + logger.warning(f"No agent URL found for A2A agent '{agent_name}'") return None - # Fetch Agent Card - try: - async with A2AHttpClient() as http_client: - card = await http_client.get_json(agent_card_url) - except aiohttp.ClientError: - # Network errors retrieving agent card should result in None - logger.warning(f"Failed to retrieve agent card from {agent_card_url}") - return None + # Get metadata and extract description from Nacos response + metadata = agent_info.get("metadata") or {} + description = agent_info.get("description") or metadata.get("description", "") + nacos_interfaces = metadata.get("supported_interfaces", []) + supported_interfaces = nacos_interfaces.copy() if nacos_interfaces else [] + protocol_version = "1.0" + streaming = False + agent_card_fetched = False + + # Fetch Agent Card from agent_url to get supported_interfaces (A2A v1.0 spec) + # Try common Agent Card endpoints (order matters - try more specific paths first) + card_urls = [ + f"{agent_url.rstrip('/')}/.well-known/agent-card.json", + f"{agent_url.rstrip('/')}/.well-known/agent.json", + f"{agent_url.rstrip('/')}/.well-known/agent-1.0.json", + f"{agent_url.rstrip('/')}/agent-card.json", + f"{agent_url.rstrip('/')}/agent.json", + ] + + for card_url in card_urls: + try: + async with A2AHttpClient() as http_client: + card = await http_client.get_json(card_url, headers=build_a2a_headers()) + + if card and (card.get("name") or card.get("agent_id")): + logger.info(f"Fetched Agent Card from {card_url}") + + # Extract supported_interfaces from Agent Card + card_interfaces = card.get("supportedInterfaces", []) + + # Always update from Agent Card if present + if card_interfaces: + supported_interfaces = card_interfaces + agent_card_fetched = True + + # Extract description from Agent Card if not found in Nacos + if not description: + description = card.get("description", "") + + # Extract protocol info from supported_interfaces + first_interface = supported_interfaces[0] if supported_interfaces else {} + capabilities = first_interface.get("capabilities", {}) + protocol_version = first_interface.get("protocolVersion", "1.0") + streaming = capabilities.get("streaming", False) + + # Merge raw_card: Agent Card takes precedence over Nacos info + agent_info = card + break + + except Exception as e: + logger.warning(f"Failed to fetch Agent Card from {card_url}: {e}") + continue + + if not agent_card_fetched: + logger.warning( + f"[Nacos Discovery] Failed to fetch Agent Card for '{agent_name}', " + f"using Nacos interfaces: {supported_interfaces}" + ) - # Extract endpoint URL and supported interfaces - agent_url = self._extract_agent_url(card) - supported_interfaces = card.get("supportedInterfaces", []) + logger.info( + f"[Nacos Discovery] Storing agent: name={agent_name}, " + f"agent_url={agent_url}, supported_interfaces_count={len(supported_interfaces) if supported_interfaces else 0}, " + f"protocol_version={protocol_version}, streaming={streaming}" + ) # Store in database result = a2a_agent_db.create_external_agent_from_nacos( - name=card.get("name", agent_name), - description=card.get("description", ""), + name=agent_name, + description=description, agent_url=agent_url, - protocol_version=card.get("capabilities", {}).get("protocolVersion", "1.0"), - transport_type="http-streaming" if card.get("capabilities", {}).get("streaming") else "http-polling", + version=protocol_version, + streaming=streaming, nacos_config_id=nacos_config["config_id"], nacos_agent_name=agent_name, tenant_id=tenant_id, user_id=user_id, - raw_card=card, + raw_card=agent_info, supported_interfaces=supported_interfaces ) @@ -312,13 +366,10 @@ def _extract_agent_url(self, card: Dict[str, Any]) -> str: return "" def _find_url_in_interfaces(self, interfaces: List[Any]) -> str: - """Find URL from supportedInterfaces array, preferring http-json-rpc.""" - json_rpc_protocols = ("http-json-rpc", "jsonrpc", "httpjsonrpc") - for iface in interfaces: - if iface.get("protocolBinding", "").lower() in json_rpc_protocols: - url = iface.get("url", "") - if url: - return url + """Find URL from supportedInterfaces array - return the first interface's URL. + + This ensures protocol and URL are always from the same interface. + """ for iface in interfaces: url = iface.get("url", "") if url: @@ -426,46 +477,128 @@ async def refresh_agent_card( if not agent: raise AgentDiscoveryError(f"Agent {external_agent_id} not found") + source_type = agent.get("source_type") + source_url = agent.get("source_url") + agent_url = agent.get("agent_url") + base_url = agent.get("base_url") + try: - # Fetch fresh Agent Card - source_url = agent.get("source_url") - if not source_url: - raise AgentDiscoveryError("No source URL available for refresh") + if source_type == "nacos": + # Nacos discovered agents: use /health endpoint to check availability + if not base_url: + raise AgentDiscoveryError("No base_url available for health check") - async with A2AHttpClient() as client: - card = await client.get_json(source_url) + health_url = f"{base_url.rstrip('/')}/health" + logger.info(f"Checking health for Nacos agent: {health_url}") - # Extract updated info - use _extract_agent_url for A2A v1.0 standard - new_url = self._extract_agent_url(card) - new_name = card.get("name") - new_description = card.get("description") - new_supported_interfaces = card.get("supportedInterfaces", []) + async with A2AHttpClient() as client: + health_response = await client.get_json(health_url) - # Note: Do NOT update protocol_type and agent_url during refresh - # These are user-configured values and should not be overwritten - # The refresh should only update metadata (name, description, supported_interfaces, raw_card) + # Update availability based on health check + a2a_agent_db.update_agent_availability( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + is_available=True, + check_result="OK" + ) - # Update cache - result = a2a_agent_db.refresh_external_agent_cache( - external_agent_id=external_agent_id, - tenant_id=tenant_id, - user_id=user_id, - new_raw_card=card, - new_name=new_name, - new_description=new_description, - new_supported_interfaces=new_supported_interfaces - ) + # Update cache timestamp + a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id + ) - # Update availability - a2a_agent_db.update_agent_availability( - external_agent_id=external_agent_id, - tenant_id=tenant_id, - is_available=True, - check_result="OK" - ) + logger.info(f"Health check passed for agent {external_agent_id}") + return { + "agent_id": external_agent_id, + "source_type": source_type, + "health_url": health_url, + "health_response": health_response, + "status": "available" + } - logger.info(f"Refreshed agent {external_agent_id}") - return result + else: + # URL discovered agents: fetch fresh Agent Card from source_url + if not source_url: + raise AgentDiscoveryError("No source URL available for refresh") + + async with A2AHttpClient() as client: + card = await client.get_json(source_url) + + # Extract updated info - use _extract_agent_url for A2A v1.0 standard + new_url = self._extract_agent_url(card) + new_name = card.get("name") + new_description = card.get("description") + new_supported_interfaces = card.get("supportedInterfaces", []) + + # Extract new protocol type from the card + new_protocol_type = _extract_protocol_type(new_supported_interfaces) + current_protocol_type = agent.get("protocol_type") + + # Determine if we need to update agent_url and protocol_type + # Update agent_url if it changed in the remote card + update_agent_url = new_url is not None and new_url != agent_url + + # Update protocol_type if it changed in the remote card + update_protocol_type = new_protocol_type != current_protocol_type + + # When protocol_type changes, we need to find the corresponding interface URL + if update_protocol_type: + logger.info( + f"Protocol type changed for agent {external_agent_id}: " + f"{current_protocol_type} -> {new_protocol_type}" + ) + # The database function will handle finding the correct interface URL + result = a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id, + new_raw_card=card, + new_agent_url=new_url if update_agent_url else None, + new_name=new_name, + new_description=new_description, + new_supported_interfaces=new_supported_interfaces, + new_protocol_type=new_protocol_type + ) + elif update_agent_url: + # Only agent_url changed + logger.info( + f"Agent URL changed for agent {external_agent_id}: " + f"{agent_url} -> {new_url}" + ) + result = a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id, + new_raw_card=card, + new_agent_url=new_url, + new_name=new_name, + new_description=new_description, + new_supported_interfaces=new_supported_interfaces + ) + else: + # No changes to agent_url or protocol_type, just update metadata + result = a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id, + new_raw_card=card, + new_name=new_name, + new_description=new_description, + new_supported_interfaces=new_supported_interfaces + ) + + # Update availability + a2a_agent_db.update_agent_availability( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + is_available=True, + check_result="OK" + ) + + logger.info(f"Refreshed agent {external_agent_id}") + return result except aiohttp.ClientError as e: logger.error(f"Failed to refresh agent {external_agent_id}: {e}") diff --git a/backend/services/a2a_server_service.py b/backend/services/a2a_server_service.py index 2cccbe40d..4d9c5e607 100644 --- a/backend/services/a2a_server_service.py +++ b/backend/services/a2a_server_service.py @@ -647,7 +647,7 @@ async def handle_message_send( return self.adapter.build_a2a_task_response( task_id=task_id, status="TASK_STATE_COMPLETED", - parts=[{"type": "text", "text": accumulated_text, "mediaType": "text/plain"}] if accumulated_text else None, + parts=[{"text": accumulated_text, "mediaType": "text/plain"}] if accumulated_text else None, context_id=context_id, timestamp=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") ) @@ -879,7 +879,7 @@ def get_task( message = result.get("message", "") if message: task_obj["artifacts"] = [{ - "parts": [{"type": "text", "text": str(message)}], + "parts": [{"text": str(message)}], "lastChunk": True }] diff --git a/backend/services/agent_repository_service.py b/backend/services/agent_repository_service.py new file mode 100644 index 000000000..87649bcd1 --- /dev/null +++ b/backend/services/agent_repository_service.py @@ -0,0 +1,306 @@ +import logging +from typing import Any, Dict, Optional + +from consts.const import ASSET_OWNER_TENANT_ID +from consts.model import AgentRepositorySnapshot +from database.agent_db import search_agent_info_by_agent_id +from database.agent_version_db import search_version_by_version_no +from database.agent_repository_db import ( + STATUS_PENDING_REVIEW, + VALID_REPOSITORY_STATUSES, + get_agent_repository_by_agent_id, + get_agent_repository_by_id, + insert_agent_repository_record, + list_agent_repository_summaries, + update_agent_repository_by_id, + update_agent_repository_status_by_id, +) +from services.agent_service import ( + collect_skill_zip_entries, + export_agent_dict_for_repository_impl, + import_agent_impl, + import_agent_with_skills_impl, +) + +logger = logging.getLogger("agent_repository_service") + +_UPDATE_SNAPSHOT_FIELDS = ( + "display_name", + "description", + "author", + "category_id", + "tags", + "tool_count", + "version_label", + "source_version_no", + "agent_info_json", + "status", +) + + +def _to_summary_item(record: Dict[str, Any]) -> Dict[str, Any]: + """Map a DB record to a lightweight marketplace summary item.""" + return { + "agent_repository_id": record.get("agent_repository_id"), + "author": record.get("author"), + "name": record.get("name"), + "display_name": record.get("display_name"), + "description": record.get("description"), + "status": record.get("status"), + } + + +def list_agent_repository_listings_impl( + *, + status: Optional[str] = None, +) -> Dict[str, Any]: + """List all repository listings with optional status filter.""" + if status is not None and status not in VALID_REPOSITORY_STATUSES: + raise ValueError( + f"Invalid status '{status}'; must be one of: " + f"{', '.join(sorted(VALID_REPOSITORY_STATUSES))}" + ) + records = list_agent_repository_summaries(status=status) + return {"items": [_to_summary_item(record) for record in records]} + + +def update_agent_repository_status_impl( + *, + agent_repository_id: int, + status: str, + user_id: str, +) -> Dict[str, Any]: + """Update a repository listing status by primary key.""" + if status not in VALID_REPOSITORY_STATUSES: + raise ValueError( + f"Invalid status '{status}'; must be one of: " + f"{', '.join(sorted(VALID_REPOSITORY_STATUSES))}" + ) + + record = get_agent_repository_by_id(agent_repository_id) + if not record: + raise ValueError("Repository listing not found") + + rows_affected = update_agent_repository_status_by_id( + repository_id=agent_repository_id, + status=status, + user_id=user_id, + ) + if rows_affected == 0: + raise ValueError("Repository listing not found") + + updated = get_agent_repository_by_id(agent_repository_id) + if not updated: + raise ValueError("Failed to load repository listing after update") + return _to_summary_item(updated) + + +def _to_list_item(record: Dict[str, Any]) -> Dict[str, Any]: + """Map a DB record to a marketplace list item (without heavy JSON blobs).""" + return { + "id": record.get("agent_repository_id"), + "agent_repository_id": record.get("agent_repository_id"), + "agent_id": record.get("agent_id"), + "name": record.get("name"), + "display_name": record.get("display_name"), + "description": record.get("description"), + "author": record.get("author"), + "category_id": record.get("category_id"), + "tags": record.get("tags") or [], + "tool_count": record.get("tool_count"), + "version_label": record.get("version_label"), + "status": record.get("status"), + "source_version_no": record.get("source_version_no"), + "publisher_tenant_id": record.get("publisher_tenant_id"), + "created_at": record.get("create_time"), + "updated_at": record.get("update_time"), + } + + +def _to_detail_item( + record: Dict[str, Any], + *, + include_bundles: bool = True, + is_updated: Optional[bool] = None, +) -> Dict[str, Any]: + """Map a DB record to a marketplace detail payload.""" + detail = _to_list_item(record) + if include_bundles: + detail["agent_info_json"] = record.get("agent_info_json") + if is_updated is not None: + detail["is_updated"] = is_updated + return detail + + +def _validate_create_payload(repository_data: Dict[str, Any]) -> None: + """Validate required fields before inserting a repository listing.""" + required_fields = ( + "agent_id", + "source_version_no", + "name", + "agent_info_json", + ) + missing = [ + field for field in required_fields + if field not in repository_data or repository_data[field] is None + ] + if missing: + raise ValueError(f"Missing required repository fields: {', '.join(missing)}") + if not repository_data.get("name"): + raise ValueError("name must be a non-empty string") + + agent_info_json = repository_data.get("agent_info_json") + if not isinstance(agent_info_json, dict): + raise ValueError("agent_info_json must be a JSON object") + for key in ("agent_id", "agent_info", "mcp_info"): + if key not in agent_info_json: + raise ValueError(f"agent_info_json must contain '{key}'") + + +def _validate_agent_info_json_shareable(agent_info_json: dict) -> None: + """Reject marketplace share when any agent in the tree belongs to ASSET_OWNER tenant.""" + agent_info_map = agent_info_json.get("agent_info") + if not isinstance(agent_info_map, dict): + return + for entry in agent_info_map.values(): + if not isinstance(entry, dict): + continue + if entry.get("tenant_id") == ASSET_OWNER_TENANT_ID: + raise ValueError("租户管理员智能体无法共享") + + +async def _build_agent_info_json( + agent_id: int, + tenant_id: str, + user_id: str, + version_no: int, +) -> dict: + """Build marketplace snapshot JSON via the agent export pipeline.""" + export_dict = await export_agent_dict_for_repository_impl( + agent_id=agent_id, + tenant_id=tenant_id, + user_id=user_id, + version_no=version_no, + ) + skills = collect_skill_zip_entries( + agent_id=agent_id, + tenant_id=tenant_id, + version_no=version_no, + ) + snapshot = AgentRepositorySnapshot( + **export_dict, + skills=skills or None, + ) + return snapshot.model_dump() + + +async def _build_repository_data_from_agent( + agent_id: int, + tenant_id: str, + user_id: str, + version_no: int, +) -> Dict[str, Any]: + """Build a repository upsert payload from a published agent version snapshot.""" + agent_info = search_agent_info_by_agent_id(agent_id, tenant_id, version_no) + agent_info_json = await _build_agent_info_json( + agent_id=agent_id, + tenant_id=tenant_id, + user_id=user_id, + version_no=version_no, + ) + _validate_agent_info_json_shareable(agent_info_json) + + version_meta = search_version_by_version_no(agent_id, tenant_id, version_no) + version_label = ( + version_meta.get("version_name") + if version_meta and version_meta.get("version_name") + else f"v{version_no}" + ) + + return { + "agent_id": agent_id, + "source_version_no": version_no, + "name": agent_info["name"], + "display_name": agent_info.get("display_name"), + "description": agent_info.get("description"), + "author": agent_info.get("author"), + "version_label": version_label, + "agent_info_json": agent_info_json, + "status": STATUS_PENDING_REVIEW, + } + + +async def create_agent_repository_listing_impl( + agent_id: int, + tenant_id: str, + user_id: str, + version_no: int, +) -> Dict[str, Any]: + """Create or update a repository listing from a published agent version. + + Loads agent metadata and builds agent_info_json via the export pipeline, + then inserts or updates the marketplace table. + + When a listing for the same agent_id already exists, snapshot fields are + updated via update_agent_repository_by_id. + """ + if version_no < 0: + raise ValueError("version_no must be >= 0") + + repository_data = await _build_repository_data_from_agent( + agent_id, tenant_id, user_id, version_no + ) + _validate_create_payload(repository_data) + + existing = get_agent_repository_by_agent_id(agent_id) + if not existing: + repository_id = insert_agent_repository_record( + repository_data=repository_data, + publisher_tenant_id=tenant_id, + publisher_user_id=user_id, + ) + is_updated = False + else: + repository_id = int(existing["agent_repository_id"]) + updates = { + key: repository_data[key] + for key in _UPDATE_SNAPSHOT_FIELDS + if key in repository_data + } + affected = update_agent_repository_by_id( + repository_id=repository_id, + publisher_tenant_id=tenant_id, + user_id=user_id, + updates=updates, + ) + if affected == 0: + raise ValueError("Failed to update repository listing") + is_updated = True + + record = get_agent_repository_by_id(repository_id) + if not record: + raise ValueError("Failed to load repository listing after write") + return _to_detail_item(record, is_updated=is_updated) + + +async def import_agent_from_repository_impl( + agent_repository_id: int, + authorization: str, +) -> Dict[int, int]: + """Import an agent tree from a marketplace repository listing into the current tenant.""" + record = get_agent_repository_by_id(agent_repository_id) + if not record: + raise ValueError("Repository listing not found") + + agent_info_json = record.get("agent_info_json") + if not isinstance(agent_info_json, dict): + raise ValueError("Repository listing has no agent snapshot") + + snapshot = AgentRepositorySnapshot.model_validate(agent_info_json) + if snapshot.skills: + return await import_agent_with_skills_impl( + snapshot, + snapshot.skills, + authorization, + ) + return await import_agent_impl(snapshot, authorization) diff --git a/backend/services/agent_service.py b/backend/services/agent_service.py index 950194da9..643d1995e 100644 --- a/backend/services/agent_service.py +++ b/backend/services/agent_service.py @@ -1,10 +1,13 @@ import asyncio +import base64 +import io import json import logging import os import uuid +import zipfile from collections import deque -from typing import Callable, Optional, Dict +from typing import Callable, Optional, Dict, List from fastapi import Header, Request from fastapi.responses import JSONResponse, StreamingResponse @@ -16,9 +19,12 @@ from agents.create_agent_info import create_agent_run_info, create_tool_config_list from agents.preprocess_manager import preprocess_manager from services.agent_version_service import publish_version_impl +from utils.prompt_template_utils import normalize_prompt_generate_template_content from consts.const import MEMORY_SEARCH_START_MSG, MEMORY_SEARCH_DONE_MSG, MEMORY_SEARCH_FAIL_MSG, TOOL_TYPE_MAPPING, \ LANGUAGE, MESSAGE_ROLE, MODEL_CONFIG_MAPPING, CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ, PERMISSION_PRIVATE -from consts.exceptions import MemoryPreparationException +from consts.exceptions import AppException, MemoryPreparationException, SkillDuplicateError +from consts.error_code import ErrorCode +from consts.agent_unavailable_reasons import AgentUnavailableReason from consts.model import ( AgentInfoRequest, AgentRequest, @@ -28,9 +34,11 @@ ExportAndImportDataFormat, MCPInfo, SkillInstanceInfoRequest, + SkillZipEntry, ToolInstanceInfoRequest, ToolSourceEnum, ModelConnectStatusEnum ) +from services.asset_owner_visibility import resolve_agent_list_permission from database.agent_db import ( create_agent, delete_agent_by_id, @@ -38,7 +46,9 @@ delete_related_agent, insert_related_agent, query_all_agent_info_by_tenant_id, + query_sub_agent_relations, query_sub_agents_id_list, + resolve_sub_agent_version_no, search_agent_id_by_agent_name, search_agent_info_by_agent_id, search_blank_sub_agent_by_main_agent_id, @@ -46,6 +56,7 @@ update_related_agents, clear_agent_new_mark ) +from database import a2a_agent_db from database.model_management_db import get_model_by_model_id, get_model_id_by_display_name from database.remote_mcp_db import get_mcp_server_by_name_and_tenant from database.tool_db import ( @@ -59,12 +70,20 @@ search_tools_for_sub_agent ) from database import skill_db -from database.agent_version_db import query_version_list +from database.attachment_db import upload_fileobj +from services.skill_service import SkillService +from services.file_management_service import is_allowed_skill_upload_path +from database.agent_version_db import query_version_list, query_current_version_no from database.group_db import query_group_ids_by_user from database.user_tenant_db import get_user_tenant_by_user_id -from database.a2a_agent_db import get_server_agent_ids +from database.a2a_agent_db import get_server_agent_ids, query_external_sub_agents +from services.prompt_template_service import ( + SYSTEM_PROMPT_TEMPLATE_ID, + SYSTEM_PROMPT_TEMPLATE_NAME, + get_prompt_template_summary, +) from utils.str_utils import convert_list_to_string, convert_string_to_list -from services.conversation_management_service import save_conversation_assistant, save_conversation_user +from services.conversation_management_service import save_conversation_assistant, save_conversation_user, save_skill_files_to_conversation from services.memory_config_service import build_memory_context from utils.auth_utils import get_current_user_info, get_user_language from utils.config_utils import tenant_config_manager @@ -73,15 +92,158 @@ from utils.prompt_template_utils import get_prompt_generate_prompt_template from utils.llm_utils import call_llm_for_system_prompt +# Monitoring utilities: bind Agent metadata once at the request boundary. +from nexent.monitor import AgentRunMetadata, agent_monitoring_context + # Import monitoring utilities from utils.monitoring import monitoring_manager logger = logging.getLogger(__name__) +SAFE_AGENT_STREAM_ERROR_MESSAGE = "Agent execution failed. Please try again later." + + +def _extract_json_objects_from_text(text: str) -> list[dict]: + """Extract all JSON objects embedded in a text blob.""" + if not text: + return [] + + decoder = json.JSONDecoder() + results: list[dict] = [] + index = 0 + + while index < len(text): + start_index = text.find("{", index) + if start_index < 0: + break + + try: + payload, end_index = decoder.raw_decode(text, start_index) + except json.JSONDecodeError: + index = start_index + 1 + continue + + if isinstance(payload, dict): + results.append(payload) + index = max(end_index, start_index + 1) + + return results + + +def _extract_skill_file_upload_payloads(content: str) -> list[dict]: + """Extract JSON payloads containing absolute_path from streamed tool output.""" + payloads: list[dict] = [] + for payload in _extract_json_objects_from_text(content): + if payload.get("absolute_path"): + payloads.append(payload) + return payloads + + +def _transform_skill_files_to_standard_format(upload_results: list[dict]) -> list[dict]: + """ + Transform skill file upload results to match the frontend attachment format. + + Skill upload format: + {file_name, absolute_path, object_name, preview_url, url, presigned_url, mime_type, file_size, status} + Frontend format: + {object_name, name, type, size, url, presigned_url, description} + """ + frontend_files = [] + for result in upload_results: + frontend_files.append({ + "object_name": result.get("object_name", ""), + "name": result.get("file_name", result.get("name", "")), + "type": "file", + "size": result.get("file_size", result.get("size", 0)), + "url": result.get("url", ""), + "presigned_url": result.get("presigned_url", result.get("preview_url", "")), + "description": "", + }) + return frontend_files + + +async def _process_skill_file_uploads( + content: str, + user_id: str, + tenant_id: str, +) -> list[dict]: + """Upload generated skill files to storage and return upload metadata.""" + + upload_results: list[dict] = [] + for payload in _extract_skill_file_upload_payloads(content): + absolute_path = str(payload.get("absolute_path") or "").strip() + file_name = str( + payload.get("file_name") + or payload.get("file_path") + or os.path.basename(absolute_path) + ) + mime_type = str(payload.get("mime_type") or payload.get("content_type") or "application/octet-stream") + if not absolute_path: + continue + + if not is_allowed_skill_upload_path(absolute_path): + logger.warning( + "[skill-file] rejected unsafe path absolute_path=%s", + absolute_path, + ) + continue + + if not file_name: + file_name = os.path.basename(absolute_path) + + if not os.path.exists(absolute_path): + continue + + try: + file_size = os.path.getsize(absolute_path) + actual_prefix = f"skill-files/{user_id}" if user_id else "skill-files" + with open(absolute_path, "rb") as file_obj: + upload_result = upload_fileobj( + file_obj=file_obj, + file_name=file_name, + prefix=actual_prefix, + generate_presigned_url=True, + file_size=file_size, + ) + if upload_result.get("success"): + upload_results.append( + { + "status": "success", + "file_name": file_name, + "absolute_path": absolute_path, + "object_name": upload_result.get("object_name"), + "preview_url": upload_result.get("presigned_url") or upload_result.get("url"), + "url": upload_result.get("url"), + "presigned_url": upload_result.get("presigned_url"), + "mime_type": mime_type, + "file_size": upload_result.get("file_size", file_size), + } + ) + else: + error_message = upload_result.get("error") or "Upload failed" + logger.warning( + "[skill-file] upload failed file_name=%s absolute_path=%s error=%s", + file_name, + absolute_path, + error_message, + ) + except Exception as exc: + logger.exception( + "[skill-file] failed to upload file file_name=%s absolute_path=%s", + file_name, + absolute_path, + ) + + return upload_results -# ------------------------------------------------------------- -# Internal helper functions -# ------------------------------------------------------------- + +def _safe_agent_stream_error_chunk() -> str: + """Return a sanitized SSE error chunk without internal exception details.""" + error_payload = json.dumps( + {"type": "error", "content": SAFE_AGENT_STREAM_ERROR_MESSAGE}, + ensure_ascii=False, + ) + return f"data: {error_payload}\n\n" def _resolve_user_tenant_language( @@ -308,12 +470,25 @@ def _regenerate_agent_value_with_llm( user_prompt_key: str, default_system_prompt: str, default_user_prompt_builder: Callable[[dict], str], - fallback_fn: Callable[[str], str] + fallback_fn: Callable[[str], str], + prompt_template_id: Optional[int] = None, + user_id: Optional[str] = None, ) -> str: """ Shared helper to regenerate agent-related values with an LLM. """ - prompt_template = get_prompt_generate_prompt_template(language) + if user_id is not None: + from services.prompt_template_service import resolve_prompt_generate_template + prompt_template = resolve_prompt_generate_template( + tenant_id=tenant_id, + user_id=user_id, + language=language, + prompt_template_id=prompt_template_id, + ) + else: + prompt_template = normalize_prompt_generate_template_content( + get_prompt_generate_prompt_template(language) + ) system_prompt = _render_prompt_template( prompt_template.get(system_prompt_key, ""), original_value=original_value @@ -345,7 +520,8 @@ def _regenerate_agent_value_with_llm( callback=None, tenant_id=tenant_id ) - candidate = (regenerated_value or "").strip().splitlines()[0].strip() + candidate = (regenerated_value or "").strip().splitlines()[ + 0].strip() if candidate in value_set: raise ValueError(f"Generated duplicate value '{candidate}'") return candidate @@ -370,7 +546,9 @@ def _regenerate_agent_name_with_llm( tenant_id: str, language: str = LANGUAGE["ZH"], agents_cache: list[dict] | None = None, - exclude_agent_id: int | None = None + exclude_agent_id: int | None = None, + prompt_template_id: Optional[int] = None, + user_id: Optional[str] = None, ) -> str: return _regenerate_agent_value_with_llm( original_value=original_name, @@ -379,8 +557,8 @@ def _regenerate_agent_name_with_llm( model_id=model_id, tenant_id=tenant_id, language=language, - system_prompt_key="AGENT_NAME_REGENERATE_SYSTEM_PROMPT", - user_prompt_key="AGENT_NAME_REGENERATE_USER_PROMPT", + system_prompt_key="agent_name_regenerate_system_prompt", + user_prompt_key="agent_name_regenerate_user_prompt", default_system_prompt=( "You refine agent variable names so that they stay close to the " "original meaning and remain unique within the tenant." @@ -398,11 +576,12 @@ def _regenerate_agent_name_with_llm( tenant_id=tenant_id, agents_cache=agents_cache, exclude_agent_id=exclude_agent_id - ) + ), + prompt_template_id=prompt_template_id, + user_id=user_id, ) - def _regenerate_agent_display_name_with_llm( original_display_name: str, existing_display_names: list[str], @@ -411,7 +590,9 @@ def _regenerate_agent_display_name_with_llm( tenant_id: str, language: str = LANGUAGE["ZH"], agents_cache: list[dict] | None = None, - exclude_agent_id: int | None = None + exclude_agent_id: int | None = None, + prompt_template_id: Optional[int] = None, + user_id: Optional[str] = None, ) -> str: return _regenerate_agent_value_with_llm( original_value=original_display_name, @@ -420,8 +601,8 @@ def _regenerate_agent_display_name_with_llm( model_id=model_id, tenant_id=tenant_id, language=language, - system_prompt_key="AGENT_DISPLAY_NAME_REGENERATE_SYSTEM_PROMPT", - user_prompt_key="AGENT_DISPLAY_NAME_REGENERATE_USER_PROMPT", + system_prompt_key="agent_display_name_regenerate_system_prompt", + user_prompt_key="agent_display_name_regenerate_user_prompt", default_system_prompt=( "You refine agent display names so they remain unique, concise, " "and aligned with the agent's capability." @@ -438,11 +619,12 @@ def _regenerate_agent_display_name_with_llm( tenant_id=tenant_id, agents_cache=agents_cache, exclude_agent_id=exclude_agent_id - ) + ), + prompt_template_id=prompt_template_id, + user_id=user_id, ) - async def check_agent_name_conflict_batch_impl( request: AgentNameBatchCheckRequest, authorization: str @@ -500,17 +682,21 @@ async def regenerate_agent_name_batch_impl( _, tenant_id, _ = get_current_user_info(authorization) agents_cache = query_all_agent_info_by_tenant_id(tenant_id) - existing_names = [agent.get("name") for agent in agents_cache if agent.get("name")] - existing_display_names = [agent.get("display_name") for agent in agents_cache if agent.get("display_name")] + existing_names = [agent.get("name") + for agent in agents_cache if agent.get("name")] + existing_display_names = [agent.get( + "display_name") for agent in agents_cache if agent.get("display_name")] # Always use tenant quick-config LLM model quick_config_model = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id ) - resolved_model_id = quick_config_model.get("model_id") if quick_config_model else None + resolved_model_id = quick_config_model.get( + "model_id") if quick_config_model else None if not resolved_model_id: - raise ValueError("No available model for regeneration. Please configure an LLM model first.") + raise ValueError( + "No available model for regeneration. Please configure an LLM model first.") results: list[dict] = [] # Use local mutable caches to avoid regenerated duplicates in the same batch @@ -540,7 +726,8 @@ async def regenerate_agent_name_batch_impl( exclude_agent_id=exclude_agent_id ) except Exception as e: - logger.error(f"Failed to regenerate agent name with LLM: {str(e)}, using fallback") + logger.error( + f"Failed to regenerate agent name with LLM: {str(e)}, using fallback") agent_name = _generate_unique_agent_name_with_suffix( agent_name, tenant_id=tenant_id, @@ -565,7 +752,8 @@ async def regenerate_agent_name_batch_impl( exclude_agent_id=exclude_agent_id ) except Exception as e: - logger.error(f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback") + logger.error( + f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback") agent_display_name = _generate_unique_display_name_with_suffix( agent_display_name, tenant_id=tenant_id, @@ -594,34 +782,60 @@ async def _stream_agent_chunks( agent_run_info, memory_ctx, ): - """Yield SSE chunks from agent_run while persisting messages & cleanup. - - This utility centralizes the common streaming logic used by both - generate_stream_with_memory and generate_stream_no_memory so that the code - is easier to maintain and less error-prone. - """ + """Yield SSE chunks from agent_run while persisting messages and cleanup.""" local_messages = [] captured_final_answer = None + captured_skill_files: dict[str, dict] = {} + skill_file_uploads: list[dict] = [] try: async for chunk in agent_run(agent_run_info): local_messages.append(chunk) - # Try to capture the final answer as it streams by in order to start memory addition try: data = json.loads(chunk) - if data.get("type") == "final_answer": + chunk_type = data.get("type") + if chunk_type == "final_answer": captured_final_answer = data.get("content") + + should_parse_skill_file = chunk_type in {"execution_logs", "parse"} or data.get("role") == "tool-response" + if should_parse_skill_file: + extracted_payload_count = 0 + content_value = data.get("content") + if isinstance(content_value, list): + content_items = content_value + elif content_value: + content_items = [{"type": "text", "text": str(content_value)}] + else: + content_items = [] + + for item in content_items: + if isinstance(item, dict) and item.get("type") == "text": + text_value = item.get("text") + if text_value: + extracted_payloads = _extract_json_objects_from_text(text_value) + for payload in extracted_payloads: + absolute_path = str(payload.get("absolute_path") or "").strip() + if not absolute_path: + continue + if absolute_path in captured_skill_files: + continue + if not os.path.exists(absolute_path): + continue + captured_skill_files[absolute_path] = payload + extracted_payload_count += 1 + if extracted_payload_count: + logger.info( + "[skill-file] captured payloads count=%s current_total=%s", + extracted_payload_count, + len(captured_skill_files), + ) except Exception: pass yield f"data: {chunk}\n\n" except Exception as run_exc: - logger.error(f"Agent run error: {str(run_exc)}") - # Emit an error chunk and terminate the stream immediately - error_payload = json.dumps( - {"type": "error", "content": str(run_exc)}, ensure_ascii=False) - yield f"data: {error_payload}\n\n" + logger.error("Agent run error: %r", run_exc, exc_info=True) + yield _safe_agent_stream_error_chunk() finally: - # Persist assistant messages for non-debug runs if not agent_request.is_debug: save_messages( agent_request, @@ -630,11 +844,54 @@ async def _stream_agent_chunks( tenant_id=tenant_id, user_id=user_id, ) - # Always unregister the run to release resources agent_run_manager.unregister_agent_run( agent_request.conversation_id, user_id) - # Schedule memory addition in background to avoid blocking SSE termination + try: + skill_file_content_local = "\n".join( + json.dumps(payload, ensure_ascii=False) + for payload in captured_skill_files.values() + ) + if skill_file_content_local: + skill_file_uploads = await _process_skill_file_uploads( + content=skill_file_content_local, + user_id=user_id, + tenant_id=tenant_id, + ) + logger.info( + "[skill-file] upload finished conversation=%s result_count=%s results=%s", + agent_request.conversation_id, + len(skill_file_uploads), skill_file_uploads + ) + if skill_file_uploads: + # Keep original format for real-time SSE display + skill_files_payload = json.dumps( + {"skill_file_uploads": skill_file_uploads}, + ensure_ascii=False, + ) + try: + yield f"data: {json.dumps({'type': 'skill_files', 'content': skill_files_payload}, ensure_ascii=False)}\n\n" + except RuntimeError: + # Stream is closing (e.g., client disconnect). Avoid raising during generator teardown. + pass + # Persist skill file uploads to the conversation history so they + # appear in subsequent GET /conversation/{id} calls. + # Transform to frontend attachment format (object_name, name, type, size, etc.) + try: + frontend_files = _transform_skill_files_to_standard_format(skill_file_uploads) + save_skill_files_to_conversation( + conversation_id=agent_request.conversation_id, + skill_file_uploads=frontend_files, + user_id=user_id, + ) + except Exception: + logger.exception( + "[skill-file] failed to persist skill file uploads to conversation=%s", + agent_request.conversation_id, + ) + except Exception: + logger.exception("Failed to process skill file uploads") + async def _add_memory_background(): try: # Skip if memory recording is disabled @@ -681,7 +938,8 @@ async def _add_memory_background(): # Create and store the background task to avoid warnings background_task = asyncio.create_task(_add_memory_background()) # Add done callback to handle any exceptions that might occur - background_task.add_done_callback(lambda t: t.exception() if t.exception() else None) + background_task.add_done_callback( + lambda t: t.exception() if t.exception() else None) except Exception as schedule_err: logger.error( f"Failed to schedule background memory addition: {schedule_err}") @@ -709,13 +967,35 @@ async def get_creating_sub_agent_id_service(tenant_id: str, user_id: str = None) return create_agent(agent_info={"enabled": False}, tenant_id=tenant_id, user_id=user_id)["agent_id"] -async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0): +async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0, user_id: Optional[str] = None): try: - agent_info = search_agent_info_by_agent_id(agent_id, tenant_id, version_no) + agent_info = search_agent_info_by_agent_id( + agent_id, tenant_id, version_no) + # Keep the request-scoped tenant_id unless the record explicitly provides one. + record_tenant_id = agent_info.get("tenant_id") + if record_tenant_id: + tenant_id = record_tenant_id except Exception as e: logger.error(f"Failed to get agent info: {str(e)}") raise ValueError(f"Failed to get agent info: {str(e)}") + # Calculate permission if user_id is provided + if user_id is not None: + try: + user_tenant_record = get_user_tenant_by_user_id(user_id) or {} + user_role = str(user_tenant_record.get("user_role") or "").upper() + can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES + + # Permission logic (same as agent list, including ASSET_OWNER read-only override) + agent_info["permission"] = resolve_agent_list_permission( + user_role=user_role, + agent=agent_info, + user_id=user_id, + can_edit_all=can_edit_all, + ) + except Exception as e: + logger.warning(f"Failed to calculate agent permission: {str(e)}") + try: tool_info = search_tools_for_sub_agent( agent_id=agent_id, tenant_id=tenant_id) @@ -732,21 +1012,52 @@ async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0 logger.error(f"Failed to get sub agent id list: {str(e)}") agent_info["sub_agent_id_list"] = [] + try: + skill_service = SkillService() + instances = skill_service.list_skill_instances( + agent_id=agent_id, + tenant_id=tenant_id, + version_no=version_no + ) + agent_info["skills"] = instances + except Exception as e: + logger.exception(f"Failed to get agent skills: {str(e)}") + agent_info["skills"] = [] + + try: + external_agents = query_external_sub_agents( + local_agent_id=agent_id, tenant_id=tenant_id, version_no=version_no) + agent_info["external_sub_agent_id_list"] = [ + ea["external_agent_id"] for ea in external_agents + ] + except Exception as e: + logger.error(f"Failed to get external sub agents: {str(e)}") + agent_info["external_sub_agent_id_list"] = [] + if agent_info["model_id"] is not None: model_info = get_model_by_model_id(agent_info["model_id"]) - agent_info["model_name"] = model_info.get("display_name", None) if model_info is not None else None + agent_info["model_name"] = model_info.get( + "display_name", None) if model_info is not None else None else: agent_info["model_name"] = None # Get business logic model display name from model_id if agent_info.get("business_logic_model_id") is not None: - business_logic_model_info = get_model_by_model_id(agent_info["business_logic_model_id"]) - agent_info["business_logic_model_name"] = business_logic_model_info.get("display_name", None) if business_logic_model_info is not None else None + business_logic_model_info = get_model_by_model_id( + agent_info["business_logic_model_id"]) + agent_info["business_logic_model_name"] = business_logic_model_info.get( + "display_name", None) if business_logic_model_info is not None else None elif "business_logic_model_name" not in agent_info: agent_info["business_logic_model_name"] = None + if not agent_info.get("prompt_template_id"): + agent_info["prompt_template_id"] = SYSTEM_PROMPT_TEMPLATE_ID + if not agent_info.get("prompt_template_name"): + agent_info["prompt_template_name"] = SYSTEM_PROMPT_TEMPLATE_NAME + if agent_info.get("group_ids") is not None: - agent_info["group_ids"] = convert_string_to_list(agent_info.get("group_ids")) + agent_info["group_ids"] = convert_string_to_list( + agent_info.get("group_ids")) # Check agent availability is_available, unavailable_reasons = check_agent_availability( @@ -757,6 +1068,12 @@ async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0 agent_info["is_available"] = is_available agent_info["unavailable_reasons"] = unavailable_reasons + # Set current_version_no from draft record (version_no=0) + # This ensures the returned data always has the current published version info + if version_no > 0: + draft_version_no = query_current_version_no(agent_id, tenant_id) + agent_info["current_version_no"] = draft_version_no + return agent_info @@ -802,6 +1119,15 @@ async def get_creating_sub_agent_info_impl(authorization: str = Header(None)): async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = Header(None)): user_id, tenant_id, _ = get_current_user_info(authorization) + if request.example_questions is not None and len(request.example_questions) > 6: + raise AppException(ErrorCode.COMMON_PARAMETER_INVALID, "example_questions cannot exceed 6 items") + + prompt_template_id, prompt_template_name = get_prompt_template_summary( + template_id=request.prompt_template_id, + tenant_id=tenant_id, + user_id=user_id, + ) + # If agent_id is None, create a new agent; otherwise, update existing agent_id: Optional[int] = request.agent_id try: @@ -818,11 +1144,16 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = "model_name": request.model_name, "business_logic_model_id": request.business_logic_model_id, "business_logic_model_name": request.business_logic_model_name, + "prompt_template_id": prompt_template_id, + "prompt_template_name": prompt_template_name, "max_steps": request.max_steps, "provide_run_summary": request.provide_run_summary, + "verification_config": request.verification_config, "duty_prompt": request.duty_prompt, "constraint_prompt": request.constraint_prompt, "few_shots_prompt": request.few_shots_prompt, + "greeting_message": request.greeting_message, + "example_questions": request.example_questions, "enabled": request.enabled if request.enabled is not None else True, "group_ids": convert_list_to_string(request.group_ids) if request.group_ids else user_group_ids, "ingroup_permission": request.ingroup_permission @@ -830,6 +1161,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = agent_id = created["agent_id"] else: # Update agent + request.prompt_template_id = prompt_template_id + request.prompt_template_name = prompt_template_name update_agent(agent_id, request, user_id) except Exception as e: logger.error(f"Failed to update agent info: {str(e)}") @@ -897,9 +1230,11 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = skill_info=SkillInstanceInfoRequest( skill_id=inst_skill_id, agent_id=agent_id, - skill_description=instance.get("skill_description"), + skill_description=instance.get( + "skill_description"), skill_content=instance.get("skill_content"), - enabled=False + enabled=False, + config_values=instance.get("config_values"), ), tenant_id=tenant_id, user_id=user_id @@ -913,7 +1248,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = if inst.get("skill_id") == skill_id), None ) - skill_description = (existing_instance or {}).get("skill_description") + skill_description = (existing_instance or {}).get( + "skill_description") skill_content = (existing_instance or {}).get("skill_content") skill_db.create_or_update_skill_by_skill_info( skill_info=SkillInstanceInfoRequest( @@ -922,6 +1258,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = skill_description=skill_description, skill_content=skill_content, enabled=True, + config_values=(existing_instance or {} + ).get("config_values"), ), tenant_id=tenant_id, user_id=user_id @@ -941,7 +1279,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = while len(search_list): left_ele = search_list.popleft() if left_ele == agent_id: - raise ValueError("Circular dependency detected: Agent cannot be related to itself or create circular calls") + raise ValueError( + "Circular dependency detected: Agent cannot be related to itself or create circular calls") if left_ele in agent_id_set: continue else: @@ -964,6 +1303,50 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = logger.error(f"Failed to update related agents: {str(e)}") raise ValueError(f"Failed to update related agents: {str(e)}") + # Handle related external agents saving when provided + try: + if request.related_external_agent_ids is not None and agent_id is not None: + related_external_agent_ids = request.related_external_agent_ids + # Query current relations + current_relations = a2a_agent_db.list_external_relations_by_local_agent( + local_agent_id=agent_id, + tenant_id=tenant_id + ) + current_external_ids = { + rel["external_agent_id"] for rel in current_relations + } + new_external_ids = set( + related_external_agent_ids) if related_external_agent_ids else set() + + # Find IDs to delete (in current but not in new) + ids_to_delete = current_external_ids - new_external_ids + # Find IDs to add (in new but not in current) + ids_to_add = new_external_ids - current_external_ids + + # Soft delete removed relations + for ext_agent_id in ids_to_delete: + a2a_agent_db.remove_external_agent_relation( + local_agent_id=agent_id, + external_agent_id=ext_agent_id, + tenant_id=tenant_id + ) + + # Add new relations + for ext_agent_id in ids_to_add: + try: + a2a_agent_db.add_external_agent_relation( + local_agent_id=agent_id, + external_agent_id=ext_agent_id, + tenant_id=tenant_id, + user_id=user_id + ) + except ValueError: + # Relation already exists, skip + pass + except Exception as e: + logger.error(f"Failed to update related external agents: {str(e)}") + raise ValueError(f"Failed to update related external agents: {str(e)}") + return {"agent_id": agent_id} @@ -1038,74 +1421,231 @@ async def clear_agent_memory(agent_id: int, tenant_id: str, user_id: str): # Silently fail to maintain agent deletion process -async def export_agent_impl(agent_id: int, authorization: str = Header(None)) -> str: - """ - Export the configuration information of the specified agent and all its sub-agents. - - Args: - agent_id (int): The ID of the agent to export. - authorization (str): User authentication information, obtained from the Header. - - Returns: - str: A formatted JSON string containing the configuration information of the agent and all its sub-agents. - - Data Structure Example: - model.py ExportAndImportDataFormat - - Note: - This function recursively finds all managed sub-agents and exports the detailed configuration of each agent (including tools, prompts, etc.) as a dictionary, and finally returns it as a formatted JSON string for frontend download and backup. - """ - - user_id, tenant_id, _ = get_current_user_info(authorization) - +async def _export_agent_dict_core( + root_agent_id: int, + tenant_id: str, + user_id: str, + version_no: int = 0, +) -> dict: + """Build ExportAndImportDataFormat dict for an agent tree at the given version.""" export_agent_dict = {} - search_list = deque([agent_id]) - agent_id_set = set() + search_list: deque = deque([(root_agent_id, version_no)]) + visited: set = set() mcp_info_set = set() - while len(search_list): - left_ele = search_list.popleft() - if left_ele in agent_id_set: + while search_list: + current_agent_id, current_version_no = search_list.popleft() + visit_key = (current_agent_id, current_version_no) + if visit_key in visited: continue + visited.add(visit_key) - agent_id_set.add(left_ele) - agent_info = await export_agent_by_agent_id(agent_id=left_ele, tenant_id=tenant_id, user_id=user_id) + agent_info = await export_agent_by_agent_id( + agent_id=current_agent_id, + tenant_id=tenant_id, + user_id=user_id, + version_no=current_version_no, + ) - # collect mcp name for tool in agent_info.tools: if tool.source == "mcp" and tool.usage: mcp_info_set.add(tool.usage) - search_list.extend(agent_info.managed_agents) + relations = query_sub_agent_relations( + main_agent_id=current_agent_id, + tenant_id=tenant_id, + version_no=current_version_no, + ) + for rel in relations: + child_id = rel["selected_agent_id"] + child_version = resolve_sub_agent_version_no( + child_id, + rel.get("selected_agent_version_no"), + tenant_id, + ) + search_list.append((child_id, child_version)) + export_agent_dict[str(agent_info.agent_id)] = agent_info - # convert mcp info to MCPInfo list mcp_info_list = [] for mcp_server_name in mcp_info_set: - # get mcp url by mcp_server_name and tenant_id mcp_url = get_mcp_server_by_name_and_tenant(mcp_server_name, tenant_id) mcp_info_list.append( MCPInfo(mcp_server_name=mcp_server_name, mcp_url=mcp_url)) export_data = ExportAndImportDataFormat( - agent_id=agent_id, agent_info=export_agent_dict, mcp_info=mcp_info_list) + agent_id=root_agent_id, + agent_info=export_agent_dict, + mcp_info=mcp_info_list, + ) return export_data.model_dump() -async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) -> ExportAndImportAgentInfo: +async def export_agent_dict_impl( + agent_id: int, + authorization: str = Header(None), + version_no: int = 0, +) -> dict: """ - Export a single agent's information based on agent_id + Export the configuration information of the specified agent and all its sub-agents. + + Args: + agent_id (int): The ID of the agent to export. + authorization (str): User authentication information, obtained from the Header. + version_no (int): Version to export. Default 0 = draft. + + Returns: + dict: ExportAndImportDataFormat as a plain dict (via model_dump). """ + user_id, tenant_id, _ = get_current_user_info(authorization) + return await _export_agent_dict_core( + root_agent_id=agent_id, + tenant_id=tenant_id, + user_id=user_id, + version_no=version_no, + ) + + +async def export_agent_dict_for_repository_impl( + agent_id: int, + tenant_id: str, + user_id: str, + version_no: int, +) -> dict: + """Export agent tree for marketplace repository storage (no HTTP auth header).""" + return await _export_agent_dict_core( + root_agent_id=agent_id, + tenant_id=tenant_id, + user_id=user_id, + version_no=version_no, + ) + + +async def export_agent_impl( + agent_id: int, + authorization: str = Header(None), + version_no: int = 0, +) -> str: + """Serialize export_agent_dict_impl output to a JSON string for download or ZIP embedding.""" + agent_dict = await export_agent_dict_impl( + agent_id, authorization, version_no=version_no + ) + return json.dumps(agent_dict) + + +def _collect_skill_names_from_tree( + agent_id: int, + tenant_id: str, + version_no: int, + visited: Optional[set] = None, +) -> List[str]: + """Collect unique skill names from an agent tree at the given version.""" + if visited is None: + visited = set() + + skill_names: List[str] = [] + seen_names: set = set() + + def _walk(current_agent_id: int, current_version_no: int) -> None: + visit_key = (current_agent_id, current_version_no) + if visit_key in visited: + return + visited.add(visit_key) + + skill_instances = skill_db.query_skill_instances_by_agent_id( + agent_id=current_agent_id, + tenant_id=tenant_id, + version_no=current_version_no, + ) + for inst in skill_instances: + skill_id = inst.get("skill_id") + skill = skill_db.get_skill_by_id(skill_id, tenant_id) + if skill: + name = skill.get("name") + if name and name not in seen_names: + seen_names.add(name) + skill_names.append(name) + + relations = query_sub_agent_relations( + main_agent_id=current_agent_id, + tenant_id=tenant_id, + version_no=current_version_no, + ) + for rel in relations: + child_id = rel["selected_agent_id"] + child_version = resolve_sub_agent_version_no( + child_id, + rel.get("selected_agent_version_no"), + tenant_id, + ) + _walk(child_id, child_version) + + _walk(agent_id, version_no) + return skill_names + + +def collect_skill_zip_entries( + agent_id: int, + tenant_id: str, + version_no: int = 0, +) -> List[SkillZipEntry]: + """Export skill ZIP payloads for all skills in an agent tree.""" + skill_names = _collect_skill_names_from_tree(agent_id, tenant_id, version_no) + if not skill_names: + return [] + + skill_service = SkillService(tenant_id=tenant_id) + exported = skill_service.export_skills_by_names(skill_names, tenant_id) + return [ + SkillZipEntry( + skill_name=entry["skill_name"], + skill_zip_base64=entry["skill_zip_base64"], + ) + for entry in exported + ] + + +async def export_agent_by_agent_id( + agent_id: int, + tenant_id: str, + user_id: str, + version_no: int = 0, +) -> ExportAndImportAgentInfo: + """Export a single agent's information based on agent_id and version_no.""" agent_info = search_agent_info_by_agent_id( - agent_id=agent_id, tenant_id=tenant_id) + agent_id=agent_id, tenant_id=tenant_id, version_no=version_no + ) agent_relation_in_db = query_sub_agents_id_list( - main_agent_id=agent_id, tenant_id=tenant_id) - tool_list = await create_tool_config_list(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id) + main_agent_id=agent_id, tenant_id=tenant_id, version_no=version_no + ) + tool_list = await create_tool_config_list( + agent_id=agent_id, + tenant_id=tenant_id, + user_id=user_id, + version_no=version_no, + ) + + # Collect skill names from skill instances + skill_names: List[str] = [] + try: + skill_instances = skill_db.query_skill_instances_by_agent_id( + agent_id=agent_id, tenant_id=tenant_id, version_no=version_no + ) + for inst in skill_instances: + skill_id = inst.get("skill_id") + skill = skill_db.get_skill_by_id(skill_id, tenant_id) + if skill: + name = skill.get("name") + if name: + skill_names.append(name) + except Exception as e: + logger.warning( + f"Failed to collect skill instances for agent {agent_id}: {e}") # Check if any tool is KnowledgeBaseSearchTool and set its metadata to empty dict for tool in tool_list: - if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "DataMateSearchTool"]: + if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "AnalyzeAudioTool", "AnalyzeVideoTool", "DataMateSearchTool"]: tool.metadata = {} # Get model_id and model display name from agent_info @@ -1113,16 +1653,20 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) model_display_name = None if model_id is not None: model_info = get_model_by_model_id(model_id) - model_display_name = model_info.get("display_name") if model_info is not None else None + model_display_name = model_info.get( + "display_name") if model_info is not None else None # Get business_logic_model_id and business logic model display name business_logic_model_id = agent_info.get("business_logic_model_id") business_logic_model_display_name = None if business_logic_model_id is not None: - business_logic_model_info = get_model_by_model_id(business_logic_model_id) - business_logic_model_display_name = business_logic_model_info.get("display_name") if business_logic_model_info is not None else None + business_logic_model_info = get_model_by_model_id( + business_logic_model_id) + business_logic_model_display_name = business_logic_model_info.get( + "display_name") if business_logic_model_info is not None else None agent_info = ExportAndImportAgentInfo(agent_id=agent_id, + tenant_id=agent_info["tenant_id"], name=agent_info["name"], display_name=agent_info["display_name"], description=agent_info["description"], @@ -1130,6 +1674,7 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) author=agent_info.get("author"), max_steps=agent_info["max_steps"], provide_run_summary=agent_info["provide_run_summary"], + verification_config=agent_info.get("verification_config"), duty_prompt=agent_info.get( "duty_prompt"), constraint_prompt=agent_info.get( @@ -1142,14 +1687,19 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) model_id=model_id, model_name=model_display_name, business_logic_model_id=business_logic_model_id, - business_logic_model_name=business_logic_model_display_name) + business_logic_model_name=business_logic_model_display_name, + skill_names=skill_names, + prompt_template_id=agent_info.get( + "prompt_template_id"), + prompt_template_name=agent_info.get("prompt_template_name")) return agent_info async def import_agent_impl( agent_info: ExportAndImportDataFormat, authorization: str = Header(None), - force_import: bool = False + force_import: bool = False, + skill_name_to_id: Optional[Dict[str, int]] = None ): """ Import agent using DFS. @@ -1237,9 +1787,9 @@ async def import_agent_by_agent_id( enabled=True, params=tool.params)) # check the validity of the agent parameters - if import_agent_info.max_steps <= 0 or import_agent_info.max_steps > 20: + if import_agent_info.max_steps <= 0 or import_agent_info.max_steps > 30: raise ValueError( - f"Invalid max steps: {import_agent_info.max_steps}. max steps must be greater than 0 and less than 20.") + f"Invalid max steps: {import_agent_info.max_steps}. max steps must be greater than 0 and less than 30.") if not import_agent_info.name.isidentifier(): raise ValueError( f"Invalid agent name: {import_agent_info.name}. agent name must be a valid python variable name.") @@ -1275,8 +1825,11 @@ async def import_agent_by_agent_id( "model_name": import_agent_info.model_name, "business_logic_model_id": business_logic_model_id, "business_logic_model_name": import_agent_info.business_logic_model_name, + "prompt_template_id": import_agent_info.prompt_template_id or SYSTEM_PROMPT_TEMPLATE_ID, + "prompt_template_name": import_agent_info.prompt_template_name or SYSTEM_PROMPT_TEMPLATE_NAME, "max_steps": import_agent_info.max_steps, "provide_run_summary": import_agent_info.provide_run_summary, + "verification_config": getattr(import_agent_info, "verification_config", None), "duty_prompt": import_agent_info.duty_prompt, "constraint_prompt": import_agent_info.constraint_prompt, "few_shots_prompt": import_agent_info.few_shots_prompt, @@ -1300,7 +1853,8 @@ async def import_agent_by_agent_id( release_note="Initial version from Agent Market" ) except Exception as e: - logger.warning(f"Failed to auto-publish version v1 for agent {new_agent_id}: {str(e)}") + logger.warning( + f"Failed to auto-publish version v1 for agent {new_agent_id}: {str(e)}") return new_agent_id @@ -1329,12 +1883,11 @@ async def clear_agent_new_mark_impl(agent_id: int, tenant_id: str, user_id: str) user_id (str): User ID (for audit purposes) """ rowcount = clear_agent_new_mark(agent_id, tenant_id, user_id) - logger.info(f"clear_agent_new_mark_impl called for agent_id={agent_id}, tenant_id={tenant_id}, user_id={user_id}, affected_rows={rowcount}") + logger.info( + f"clear_agent_new_mark_impl called for agent_id={agent_id}, tenant_id={tenant_id}, user_id={user_id}, affected_rows={rowcount}") return rowcount - - async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]: """ list all agent info @@ -1380,7 +1933,8 @@ async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]: # Apply visibility filter for DEV/USER based on group overlap if not can_edit_all: - agent_group_ids = set(convert_string_to_list(agent.get("group_ids"))) + agent_group_ids = set( + convert_string_to_list(agent.get("group_ids"))) ingroup_permission = agent.get("ingroup_permission") is_creator = str(agent.get("created_by")) == str(user_id) # Hide agent if: no group overlap OR (ingroup_permission is PRIVATE AND user is not creator) @@ -1408,23 +1962,24 @@ async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]: simple_agent_list: list[dict] = [] for entry in enriched_agents: agent = entry["raw_agent"] - unavailable_reasons = list(dict.fromkeys(entry["unavailable_reasons"])) + unavailable_reasons = list( + dict.fromkeys(entry["unavailable_reasons"])) model_id = agent.get("model_id") model_info = None if model_id is not None: if model_id not in model_cache: - model_cache[model_id] = get_model_by_model_id(model_id, tenant_id) + model_cache[model_id] = get_model_by_model_id( + model_id, tenant_id) model_info = model_cache.get(model_id) - # Permission logic: - # - If creator or can_edit_all: PERMISSION_EDIT - # - Otherwise: use ingroup_permission, default to PERMISSION_READ if None - if can_edit_all or str(agent.get("created_by")) == str(user_id): - permission = PERMISSION_EDIT - else: - ingroup_permission = agent.get("ingroup_permission") - permission = ingroup_permission if ingroup_permission is not None else PERMISSION_READ + # Permission logic (ASSET_OWNER-scoped + non-ASSET_OWNER role => READ_ONLY first): + permission = resolve_agent_list_permission( + user_role=user_role, + agent=agent, + user_id=user_id, + can_edit_all=can_edit_all, + ) simple_agent_list.append({ "agent_id": agent["agent_id"], @@ -1486,8 +2041,9 @@ def _mark_duplicates(groups: dict[str, list[dict]], reason_key: str) -> None: for duplicate_entry in sorted_entries[1:]: duplicate_entry["unavailable_reasons"].append(reason_key) - _mark_duplicates(name_groups, "duplicate_name") - _mark_duplicates(display_name_groups, "duplicate_display_name") + _mark_duplicates(name_groups, AgentUnavailableReason.DUPLICATE_NAME) + _mark_duplicates(display_name_groups, + AgentUnavailableReason.DUPLICATE_DISPLAY_NAME) def _collect_model_availability_reasons(agent: dict, tenant_id: str, model_cache: Dict[int, Optional[dict]]) -> list[str]: @@ -1499,7 +2055,7 @@ def _collect_model_availability_reasons(agent: dict, tenant_id: str, model_cache model_id=agent.get("model_id"), tenant_id=tenant_id, model_cache=model_cache, - reason_key="model_unavailable" + reason_key=AgentUnavailableReason.MODEL_UNAVAILABLE )) return reasons @@ -1557,15 +2113,17 @@ def check_agent_availability( agent_info = search_agent_info_by_agent_id(agent_id, tenant_id) if not agent_info: - return False, ["agent_not_found"] + return False, [AgentUnavailableReason.AGENT_NOT_FOUND] # Check tool availability - tool_info = search_tools_for_sub_agent(agent_id=agent_id, tenant_id=tenant_id) - tool_id_list = [tool["tool_id"] for tool in tool_info if tool.get("tool_id") is not None] + tool_info = search_tools_for_sub_agent( + agent_id=agent_id, tenant_id=tenant_id) + tool_id_list = [tool["tool_id"] + for tool in tool_info if tool.get("tool_id") is not None] if tool_id_list: tool_statuses = check_tool_is_available(tool_id_list) if not all(tool_statuses): - unavailable_reasons.append("tool_unavailable") + unavailable_reasons.append(AgentUnavailableReason.TOOL_UNAVAILABLE) # Check model availability model_reasons = _collect_model_availability_reasons( @@ -1639,7 +2197,20 @@ async def prepare_agent_run( is_debug=agent_request.is_debug, override_version_no=agent_request.version_no, override_model_id=agent_request.model_id, + tool_params=agent_request.tool_params, ) + + # Mount conversation-level reusable ContextManager if enabled + cm_config = getattr(agent_run_info.agent_config, + 'context_manager_config', None) + if cm_config and cm_config.enabled: + cm = agent_run_manager.get_or_create_context_manager( + conversation_id=str(agent_request.conversation_id), + config=cm_config, + max_steps=agent_run_info.agent_config.max_steps + ) + agent_run_info.context_manager = cm + agent_run_manager.register_agent_run( agent_request.conversation_id, agent_run_info, user_id) return agent_run_info, memory_context @@ -1744,18 +2315,19 @@ def _memory_token(message_text: str) -> str: yield data_chunk except Exception as run_exc: logger.error( - f"Agent run error after memory failure: {str(run_exc)}") - # Emit an error chunk and terminate the stream immediately - error_payload = json.dumps( - {"type": "error", "content": str(run_exc)}, ensure_ascii=False) - yield f"data: {error_payload}\n\n" + "Agent run error after memory failure: %r", + run_exc, + exc_info=True, + ) + yield _safe_agent_stream_error_chunk() return - except Exception as e: - logger.error(f"Generate stream with memory error: {str(e)}") - # Emit an error chunk and terminate the stream immediately - error_payload = json.dumps( - {"type": "error", "content": str(e)}, ensure_ascii=False) - yield f"data: {error_payload}\n\n" + except Exception as stream_exc: + logger.error( + "Generate stream with memory error: %r", + stream_exc, + exc_info=True, + ) + yield _safe_agent_stream_error_chunk() return finally: # Always unregister preprocess task @@ -1763,7 +2335,6 @@ def _memory_token(message_text: str) -> str: # Helper function for run_agent_stream, used when user memory is disabled (no memory tokens) -@monitoring_manager.monitor_endpoint("agent_service.generate_stream_no_memory", exclude_params=["authorization"]) async def generate_stream_no_memory( agent_request: AgentRequest, user_id: str, @@ -1773,7 +2344,6 @@ async def generate_stream_no_memory( """Stream agent responses without any memory preprocessing tokens or fallback logic.""" # Prepare run info respecting memory disabled (honor provided user_id/tenant_id) - monitoring_manager.add_span_event("generate_stream_no_memory.started") agent_run_info, memory_context = await prepare_agent_run( agent_request=agent_request, user_id=user_id, @@ -1781,10 +2351,7 @@ async def generate_stream_no_memory( language=language, allow_memory_search=False, ) - monitoring_manager.add_span_event("generate_stream_no_memory.completed") - monitoring_manager.add_span_event( - "generate_stream_no_memory.streaming.started") async for data_chunk in _stream_agent_chunks( agent_request=agent_request, user_id=user_id, @@ -1793,11 +2360,8 @@ async def generate_stream_no_memory( memory_ctx=memory_context, ): yield data_chunk - monitoring_manager.add_span_event( - "generate_stream_no_memory.streaming.completed") -@monitoring_manager.monitor_endpoint("agent_service.run_agent_stream", exclude_params=["authorization"]) async def run_agent_stream( agent_request: AgentRequest, http_request: Request, @@ -1810,27 +2374,6 @@ async def run_agent_stream( Start an agent run and stream responses. If user_id or tenant_id is provided, authorization will be overridden. (Useful in northbound apis) """ - import time - - # Add initial span attributes for tracking - monitoring_manager.set_span_attributes( - agent_id=agent_request.agent_id, - conversation_id=agent_request.conversation_id, - is_debug=agent_request.is_debug, - skip_user_save=skip_user_save, - has_override_user_id=user_id is not None, - has_override_tenant_id=tenant_id is not None, - query_length=len(agent_request.query) if agent_request.query else 0, - history_count=len( - agent_request.history) if agent_request.history else 0, - minio_files_count=len( - agent_request.minio_files) if agent_request.minio_files else 0 - ) - - # Step 1: Resolve user tenant language - resolve_start_time = time.time() - monitoring_manager.add_span_event("user_resolution.started") - resolved_user_id, resolved_tenant_id, language = _resolve_user_tenant_language( authorization=authorization, http_request=http_request, @@ -1838,25 +2381,7 @@ async def run_agent_stream( tenant_id=tenant_id, ) - resolve_duration = time.time() - resolve_start_time - monitoring_manager.add_span_event("user_resolution.completed", { - "duration": resolve_duration, - "user_id": resolved_user_id, - "tenant_id": resolved_tenant_id, - "language": language - }) - monitoring_manager.set_span_attributes( - resolved_user_id=resolved_user_id, - resolved_tenant_id=resolved_tenant_id, - language=language, - user_resolution_duration=resolve_duration - ) - - # Step 2: Save user message (if needed) if not agent_request.is_debug and not skip_user_save: - save_start_time = time.time() - monitoring_manager.add_span_event("user_message_save.started") - save_messages( agent_request, target=MESSAGE_ROLE["USER"], @@ -1864,56 +2389,39 @@ async def run_agent_stream( tenant_id=resolved_tenant_id, ) - save_duration = time.time() - save_start_time - monitoring_manager.add_span_event("user_message_save.completed", { - "duration": save_duration - }) - monitoring_manager.set_span_attributes( - user_message_saved=True, - user_message_save_duration=save_duration - ) - else: - monitoring_manager.add_span_event("user_message_save.skipped", { - "reason": "debug_mode" if agent_request.is_debug else "skip_user_save_flag" - }) - monitoring_manager.set_span_attributes(user_message_saved=False) - - # Step 3: Build memory context (skip for debug mode) - memory_start_time = time.time() - monitoring_manager.add_span_event("memory_context_build.started") - memory_ctx_preview = build_memory_context( resolved_user_id, resolved_tenant_id, agent_request.agent_id, skip_query=agent_request.is_debug ) - - memory_duration = time.time() - memory_start_time memory_enabled = memory_ctx_preview.user_config.memory_switch - monitoring_manager.add_span_event("memory_context_build.completed", { - "duration": memory_duration, - "memory_enabled": memory_enabled, - "agent_share_option": getattr(memory_ctx_preview.user_config, "agent_share_option", "unknown"), - "debug_mode": agent_request.is_debug - }) - monitoring_manager.set_span_attributes( + + agent_metadata = monitoring_manager.bind_agent_context(AgentRunMetadata( + agent_id=agent_request.agent_id, + conversation_id=agent_request.conversation_id, + user_id=resolved_user_id, + tenant_id=resolved_tenant_id, + query=agent_request.query, + is_debug=agent_request.is_debug, + language=language, memory_enabled=memory_enabled, - memory_context_build_duration=memory_duration, - agent_share_option=getattr( - memory_ctx_preview.user_config, "agent_share_option", "unknown") - ) + history_count=len( + agent_request.history) if agent_request.history else 0, + minio_files_count=len( + agent_request.minio_files) if agent_request.minio_files else 0, + extra_metadata={ + "agent_share_option": getattr( + memory_ctx_preview.user_config, + "agent_share_option", + "unknown", + ), + "skip_user_save": skip_user_save, + "has_override_user_id": user_id is not None, + "has_override_tenant_id": tenant_id is not None, + }, + )) - # Step 4: Choose streaming strategy - strategy_start_time = time.time() use_memory_stream = memory_enabled and not agent_request.is_debug - monitoring_manager.add_span_event("streaming_strategy.selected", { - "strategy": "with_memory" if use_memory_stream else "no_memory", - "memory_enabled": memory_enabled, - "is_debug": agent_request.is_debug - }) - if use_memory_stream: - monitoring_manager.add_span_event( - "stream_generator.memory_stream.creating") stream_gen = generate_stream_with_memory( agent_request, user_id=resolved_user_id, @@ -1921,8 +2429,6 @@ async def run_agent_stream( language=language, ) else: - monitoring_manager.add_span_event( - "stream_generator.no_memory_stream.creating") stream_gen = generate_stream_no_memory( agent_request, user_id=resolved_user_id, @@ -1930,43 +2436,25 @@ async def run_agent_stream( language=language, ) - strategy_duration = time.time() - strategy_start_time - monitoring_manager.add_span_event("streaming_strategy.completed", { - "duration": strategy_duration, - "selected_strategy": "with_memory" if use_memory_stream else "no_memory" - }) - monitoring_manager.set_span_attributes( - streaming_strategy=( - "with_memory" if use_memory_stream else "no_memory"), - strategy_selection_duration=strategy_duration - ) - - # Step 5: Create streaming response - response_start_time = time.time() - monitoring_manager.add_span_event("streaming_response.creating") + async def stream_with_agent_context(): + try: + with agent_monitoring_context(agent_metadata): + async for data_chunk in stream_gen: + yield data_chunk + except Exception as stream_exc: + logger.error( + "Agent stream response error: %r", + stream_exc, + exc_info=True, + ) + yield _safe_agent_stream_error_chunk() - response = StreamingResponse( - stream_gen, + return StreamingResponse( + stream_with_agent_context(), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}, ) - response_duration = time.time() - response_start_time - monitoring_manager.add_span_event("streaming_response.created", { - "duration": response_duration, - "media_type": "text/event-stream" - }) - monitoring_manager.set_span_attributes( - response_creation_duration=response_duration, - total_preparation_duration=(time.time() - resolve_start_time) - ) - - monitoring_manager.add_span_event("run_agent_stream.preparation_completed", { - "total_preparation_time": time.time() - resolve_start_time - }) - - return response - def stop_agent_tasks(conversation_id: int, user_id: str): """ @@ -2153,3 +2641,124 @@ def get_sub_agents_recursive(parent_agent_id: int, depth: int = 0, max_depth: in logger.exception( f"Failed to get agent call relationship for agent {agent_id}: {str(e)}") raise ValueError(f"Failed to get agent call relationship: {str(e)}") + + +async def export_agent_with_skills_impl( + agent_id: int, + authorization: str, + version_no: int = 0, +) -> dict: + """Export an agent, returning a ZIP if it has skill instances, otherwise a plain dict. + + The response is either: + - A dict with {"_zip": True, "data": bytes, "filename": str} when the agent has skills + - ExportAndImportDataFormat as a plain dict when the agent has no skills + """ + user_id, tenant_id, _ = get_current_user_info(authorization) + + skill_zip_entries = collect_skill_zip_entries( + agent_id=agent_id, tenant_id=tenant_id, version_no=version_no + ) + + if not skill_zip_entries: + return await export_agent_dict_impl( + agent_id, authorization, version_no=version_no + ) + + agent_json_str = await export_agent_impl( + agent_id, authorization, version_no=version_no + ) + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("agent.json", agent_json_str) + for entry in skill_zip_entries: + skill_zip_bytes = base64.b64decode(entry.skill_zip_base64) + zf.writestr(f"skills/{entry.skill_name}.zip", skill_zip_bytes) + + zip_buffer.seek(0) + zip_data = zip_buffer.read() + + agent_info = search_agent_info_by_agent_id( + agent_id=agent_id, tenant_id=tenant_id, version_no=version_no + ) + agent_name = agent_info.get( + "name", "anonymous") if agent_info else "anonymous" + + filename = f"{agent_name}.zip" + + return { + "_zip": True, + "data": zip_data, + "filename": filename + } + + +async def import_agent_with_skills_impl( + agent_info: "ExportAndImportDataFormat", + skills: List[SkillZipEntry], + authorization: str, + force_import: bool = False +): + """Import an agent with skills bundled from a ZIP export. + + For each skill in the bundle: + 1. Check if a skill with the same name already exists in the target tenant. + 2. If duplicates exist, raise SkillDuplicateError (do not create anything). + 3. If no duplicates, create the skill from ZIP bytes via SkillService. + 4. Create a SkillInstance linking the new skill_id to the new agent_id. + + Then proceeds with the standard agent import flow using the mapped skill IDs. + """ + from services.skill_service import SkillService + + user_id, tenant_id, _ = get_current_user_info(authorization) + + skill_name_to_zip_base64 = { + entry.skill_name: entry.skill_zip_base64 for entry in skills} + + existing_skills = skill_db.list_skills(tenant_id) + existing_skill_names = {s.get("name") for s in existing_skills} + + import_skill_names = set(skill_name_to_zip_base64.keys()) + duplicate_names = list(import_skill_names & existing_skill_names) + + if duplicate_names: + raise SkillDuplicateError(duplicate_names) + + skill_name_to_id: Dict[str, int] = {} + skill_service = SkillService(tenant_id=tenant_id) + + for skill_name, zip_base64 in skill_name_to_zip_base64.items(): + zip_bytes = base64.b64decode(zip_base64) + result = skill_service.create_skill_from_zip_bytes( + zip_bytes=zip_bytes, + skill_name=skill_name, + source="导入", + user_id=user_id, + tenant_id=tenant_id, + skip_duplicate_check=True + ) + skill_name_to_id[skill_name] = result.get("skill_id") + + agent_id_mapping = await import_agent_impl( + agent_info, authorization, force_import, + skill_name_to_id=skill_name_to_id + ) + + main_agent_id = agent_id_mapping.get(agent_info.agent_id) + if main_agent_id: + for skill_name, new_skill_id in skill_name_to_id.items(): + skill_db.create_or_update_skill_by_skill_info( + skill_info=SkillInstanceInfoRequest( + skill_id=new_skill_id, + agent_id=main_agent_id, + enabled=True, + version_no=0 + ), + tenant_id=tenant_id, + user_id=user_id, + version_no=0 + ) + + return agent_id_mapping diff --git a/backend/services/agent_version_service.py b/backend/services/agent_version_service.py index e8a443e3f..8ed6e14d4 100644 --- a/backend/services/agent_version_service.py +++ b/backend/services/agent_version_service.py @@ -22,6 +22,7 @@ delete_tool_snapshot, delete_relation_snapshot, delete_skill_snapshot, + restore_agent_draft, get_next_version_no, delete_version, SOURCE_TYPE_NORMAL, @@ -32,6 +33,7 @@ ) from database.model_management_db import get_model_by_model_id from utils.str_utils import convert_string_to_list +from consts.agent_unavailable_reasons import AgentUnavailableReason logger = logging.getLogger("agent_version_service") @@ -47,6 +49,17 @@ def _remove_audit_fields_for_insert(data: dict) -> None: data.pop('delete_flag', None) +def _build_sub_agent_relations(relations: List[dict]) -> List[dict]: + """Map relation snapshots to sub-agent relation payloads for API responses.""" + return [ + { + 'agent_id': r['selected_agent_id'], + 'version_no': r.get('selected_agent_version_no'), + } + for r in relations + ] + + def publish_version_impl( agent_id: int, tenant_id: str, @@ -90,11 +103,18 @@ def publish_version_impl( _remove_audit_fields_for_insert(tool_snapshot) insert_tool_snapshot(tool_snapshot) - # Insert relation snapshots + # Insert relation snapshots with pinned child agent versions for rel in relations_draft: + child_id = rel['selected_agent_id'] + child_version = query_current_version_no(child_id, tenant_id) + if child_version is None: + raise ValueError( + f"Sub-agent {child_id} has no published version; publish the sub-agent first." + ) rel_snapshot = rel.copy() rel_snapshot.pop('version_no', None) rel_snapshot['version_no'] = new_version_no + rel_snapshot['selected_agent_version_no'] = child_version _remove_audit_fields_for_insert(rel_snapshot) insert_relation_snapshot(rel_snapshot) @@ -124,7 +144,9 @@ def publish_version_impl( 'source_type': source_type, 'source_version_no': source_version_no, 'status': STATUS_RELEASED, + 'is_a2a': publish_as_a2a, 'created_by': user_id, + 'updated_by': user_id, } version_id = insert_version(version_data) @@ -267,6 +289,7 @@ def get_version_detail_impl( # Extract sub_agent_id_list from relations result['sub_agent_id_list'] = [r['selected_agent_id'] for r in relations_snapshot] + result['sub_agent_relations'] = _build_sub_agent_relations(relations_snapshot) # Get skill instances for this version (from ag_skill_instance_t with version_no) from database import skill_db as skill_db_module @@ -335,21 +358,18 @@ def _check_version_snapshot_availability( # Check if agent info exists if not agent_info: - return False, ["agent_not_found"] + return False, [AgentUnavailableReason.AGENT_NOT_FOUND] # Check model availability model_id = agent_info.get('model_id') if model_id is None or model_id == 0: - unavailable_reasons.append("model_not_configured") + unavailable_reasons.append(AgentUnavailableReason.MODEL_NOT_CONFIGURED) - # Check tools availability - if not tool_instances: - unavailable_reasons.append("no_tools") - else: - # Check if at least one tool is enabled + # Check tools availability (only when tools are configured) + if tool_instances: has_enabled_tool = any(t.get('enabled', True) for t in tool_instances) if not has_enabled_tool: - unavailable_reasons.append("all_tools_disabled") + unavailable_reasons.append(AgentUnavailableReason.ALL_TOOLS_DISABLED) return len(unavailable_reasons) == 0, unavailable_reasons @@ -360,9 +380,11 @@ def rollback_version_impl( target_version_no: int, ) -> dict: """ - Rollback to a specific version by updating current_version_no only. - This does NOT create a new version - it simply points the draft to an existing version. - The actual version creation happens when user clicks "publish". + Rollback to a specific version by restoring draft (version_no=0) with the target version's data. + This copies all snapshot data (agent, tools, relations, skills) from the target version into the draft, + then updates current_version_no to point to the target version. + + The user can then continue editing or re-publish from the restored state. Args: agent_id: Agent ID @@ -377,15 +399,35 @@ def rollback_version_impl( if not version: raise ValueError(f"Version {target_version_no} not found") - # Update current_version_no in draft to point to target version - rows_affected = update_agent_current_version( + # Get target version's snapshot data + (target_agent, target_tools, + target_relations) = query_agent_snapshot(agent_id, tenant_id, target_version_no) + if not target_agent: + raise ValueError(f"Agent snapshot for version {target_version_no} not found") + + # Ensure the draft still exists before attempting an in-place restore. + draft_agent, _, _ = query_agent_draft(agent_id, tenant_id) + if not draft_agent: + raise ValueError("Agent draft not found") + + # Get skill snapshots for target version + from database import skill_db as skill_db_module + target_skills = skill_db_module.query_skill_instances_by_agent_id( agent_id=agent_id, tenant_id=tenant_id, - current_version_no=target_version_no, + version_no=target_version_no, ) - if rows_affected == 0: - raise ValueError("Agent draft not found") + # Atomically restore draft from target version snapshot + restore_agent_draft( + agent_id=agent_id, + tenant_id=tenant_id, + target_version_no=target_version_no, + target_agent_snapshot=target_agent, + target_tool_snapshots=target_tools, + target_relation_snapshots=target_relations, + target_skill_snapshots=target_skills, + ) return { "message": f"Successfully rolled back to version {target_version_no}", @@ -687,6 +729,7 @@ def _get_version_detail_or_draft( # Add tools (only enabled tools) result['tools'] = [t for t in tools_draft if t.get('enabled', True)] result['sub_agent_id_list'] = [r['selected_agent_id'] for r in relations_draft] + result['sub_agent_relations'] = _build_sub_agent_relations(relations_draft) # Get draft skill instances (version_no=0) skills_draft = skill_db_module.query_skill_instances_by_agent_id( @@ -760,12 +803,11 @@ async def list_published_agents_impl( CAN_EDIT_ALL_USER_ROLES, get_user_tenant_by_user_id, query_group_ids_by_user, - PERMISSION_EDIT, - PERMISSION_READ, get_model_by_model_id, check_agent_availability, _apply_duplicate_name_availability_rules, ) + from services.asset_owner_visibility import resolve_agent_list_permission from database.agent_version_db import query_agent_snapshot # Get user role for permission check @@ -798,7 +840,8 @@ async def list_published_agents_impl( # Apply visibility filter for DEV/USER based on group overlap if not can_edit_all: agent_group_ids = set(convert_string_to_list(agent.get("group_ids"))) - if len(user_group_ids.intersection(agent_group_ids)) == 0: + is_creator = str(agent.get("created_by")) == str(user_id) + if not is_creator and len(user_group_ids.intersection(agent_group_ids)) == 0: continue agent_id = agent.get("agent_id") @@ -834,9 +877,10 @@ async def list_published_agents_impl( # Extract sub_agent_id_list from relations agent_info['sub_agent_id_list'] = [r['selected_agent_id'] for r in relations_snapshot] + agent_info['sub_agent_relations'] = _build_sub_agent_relations(relations_snapshot) - # Add published version info - agent_info['published_version_no'] = current_version_no + # Add current version info + agent_info['current_version_no'] = current_version_no # Check agent availability using the shared function _, unavailable_reasons = check_agent_availability( @@ -869,7 +913,12 @@ async def list_published_agents_impl( model_cache[model_id] = get_model_by_model_id(model_id, tenant_id) model_info = model_cache.get(model_id) - permission = PERMISSION_EDIT if can_edit_all or str(agent.get("created_by")) == str(user_id) else PERMISSION_READ + permission = resolve_agent_list_permission( + user_role=user_role, + agent=agent, + user_id=user_id, + can_edit_all=can_edit_all, + ) simple_agent_list.append({ "agent_id": agent.get("agent_id"), @@ -885,7 +934,9 @@ async def list_published_agents_impl( "is_new": agent.get("is_new", False), "group_ids": agent.get("group_ids", []), "permission": permission, - "published_version_no": agent.get("published_version_no"), + "current_version_no": agent.get("current_version_no"), + "greeting_message": agent.get("greeting_message"), + "example_questions": agent.get("example_questions"), }) return simple_agent_list diff --git a/backend/services/aidp_service.py b/backend/services/aidp_service.py new file mode 100644 index 000000000..acb18142e --- /dev/null +++ b/backend/services/aidp_service.py @@ -0,0 +1,99 @@ +""" +AIDP Service Layer +Handles API calls to AIDP for paginated knowledge base listing. +""" +import logging +from typing import Any, Dict +from urllib.parse import urljoin + +import httpx + +from consts.error_code import ErrorCode +from consts.exceptions import AppException +from nexent.utils.http_client_manager import http_client_manager + +logger = logging.getLogger("aidp_service") + +_LIST_PATH = "/KnowledgeBase/Tenants/aidp/KnowledgeBases" + + +def _validate_params(server_url: str, api_key: str) -> str: + """Validate parameters and return normalized base URL.""" + if not server_url or not isinstance(server_url, str): + raise AppException( + ErrorCode.AIDP_CONFIG_INVALID, + "AIDP server_url is required and must be a non-empty string", + ) + if not server_url.startswith(("http://", "https://")): + raise AppException( + ErrorCode.AIDP_CONFIG_INVALID, + "AIDP server_url must start with http:// or https://", + ) + if not api_key or not isinstance(api_key, str): + raise AppException( + ErrorCode.AIDP_CONFIG_INVALID, + "AIDP api_key is required and must be a non-empty string", + ) + return server_url.rstrip("/") + + +def fetch_aidp_knowledge_bases_impl( + server_url: str, + api_key: str, + page: int = 1, + page_size: int = 20, +) -> Dict[str, Any]: + """Fetch paginated knowledge bases from AIDP API.""" + normalized_url = _validate_params(server_url, api_key) + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + list_path = f"{_LIST_PATH}?page={page}&page_size={page_size}" + list_url = urljoin(f"{normalized_url}/", list_path) + logger.info("Fetching AIDP knowledge bases from %s", list_url) + + try: + client = http_client_manager.get_sync_client( + base_url=normalized_url, + timeout=20.0, + verify_ssl=True, + ) + response = client.get(list_url, headers=headers) + response.raise_for_status() + result = response.json() + if not isinstance(result, dict): + raise AppException( + ErrorCode.AIDP_SERVICE_ERROR, + "Unexpected AIDP knowledge base response format", + ) + return result + except httpx.RequestError as e: + logger.exception("AIDP request failed: %s", e) + raise AppException( + ErrorCode.AIDP_CONNECTION_ERROR, + f"AIDP API request failed: {str(e)}", + ) + except httpx.HTTPStatusError as e: + logger.exception( + "AIDP API HTTP error: %s, status_code: %s", + e, + e.response.status_code, + ) + if e.response.status_code in (401, 403): + raise AppException( + ErrorCode.AIDP_AUTH_ERROR, + f"AIDP authentication failed: {str(e)}", + ) + raise AppException( + ErrorCode.AIDP_SERVICE_ERROR, + f"AIDP API HTTP error {e.response.status_code}: {str(e)}", + ) + except ValueError as e: + logger.exception("Failed to parse AIDP API response: %s", e) + raise AppException( + ErrorCode.AIDP_SERVICE_ERROR, + f"Failed to parse AIDP API response: {str(e)}", + ) diff --git a/backend/services/asset_owner_visibility.py b/backend/services/asset_owner_visibility.py new file mode 100644 index 000000000..24cb697b2 --- /dev/null +++ b/backend/services/asset_owner_visibility.py @@ -0,0 +1,104 @@ +"""ASSET_OWNER tenant visibility filters, feature flags, and response post-processing.""" + +from typing import Any, Dict, List, Optional + +from consts.const import ( + AGENT_PROMPTS_HIDDEN_FLAG, + ASSET_OWNER_ROLE, + ASSET_OWNER_TENANT_ID, + ENABLE_ASSET_OWNER_ROLE, + PERMISSION_EDIT, + PERMISSION_READ, +) +from consts.exceptions import ValidationError + + +_PROMPT_FIELDS = ("duty_prompt", "constraint_prompt", "few_shots_prompt") + + +ASSET_OWNER_RESOURCES_ROUTE = "/asset-owner-resources" + + +def is_asset_owner_enabled() -> bool: + """Return whether the ASSET_OWNER feature flag is enabled.""" + return ENABLE_ASSET_OWNER_ROLE + + +def require_asset_owner_enabled() -> None: + """Raise ValidationError when the ASSET_OWNER feature is disabled.""" + if not ENABLE_ASSET_OWNER_ROLE: + raise ValidationError("ASSET_OWNER feature is not enabled") + + +def filter_accessible_routes_for_asset_owner_feature( + accessible_routes: List[str], +) -> List[str]: + """Remove asset-owner nav route when the ASSET_OWNER feature flag is disabled.""" + if ENABLE_ASSET_OWNER_ROLE: + return accessible_routes + return [r for r in accessible_routes if r != ASSET_OWNER_RESOURCES_ROUTE] + + +def can_view_skill(caller_tenant_id: Optional[str], skill_tenant_id: Optional[str]) -> bool: + """ + Return True when the caller may view a skill and its files. + + ASSET_OWNER-scoped skills (tenant_id asset_owner_tenant_id or legacy "") are + visible only to callers in the ASSET_OWNER virtual tenant. + """ + + if skill_tenant_id == ASSET_OWNER_TENANT_ID: + return caller_tenant_id == ASSET_OWNER_TENANT_ID + return True + + +def resolve_agent_list_permission( + user_role: str, + agent: Dict[str, Any], + user_id: str, + can_edit_all: bool, +) -> str: + """ + Resolve list-item permission for an agent. + + Highest priority: ASSET_OWNER-scoped agents are READ_ONLY for callers whose + user_role is not ASSET_OWNER (overrides can_edit_all, creator, ingroup_permission). + """ + role = (user_role or "").upper() + if agent.get("tenant_id") == ASSET_OWNER_TENANT_ID and role != ASSET_OWNER_ROLE: + return PERMISSION_READ + if can_edit_all or str(agent.get("created_by")) == str(user_id): + return PERMISSION_EDIT + ingroup_permission = agent.get("ingroup_permission") + return ingroup_permission if ingroup_permission is not None else PERMISSION_READ + + +def apply_agent_detail_prompt_visibility( + caller_tenant_id: Optional[str], + agent_info: Dict[str, Any], +) -> Dict[str, Any]: + """ + Mask system prompt fields when a non-ASSET_OWNER caller views an ASSET_OWNER-scoped agent. + + Sets duty_prompt, constraint_prompt, and few_shots_prompt to None and adds + prompts_hidden=True so clients can render a permission-denied state. + """ + result = dict(agent_info) + if caller_tenant_id == ASSET_OWNER_TENANT_ID: + return result + if result.get("tenant_id") != ASSET_OWNER_TENANT_ID: + return result + for field in _PROMPT_FIELDS: + result[field] = None + result[AGENT_PROMPTS_HIDDEN_FLAG] = True + return result + + +def postprocess_knowledge_visibility( + items: List[Dict[str, Any]], + caller_role: Optional[str], + caller_tenant_id: Optional[str], +) -> List[Dict[str, Any]]: + """Return knowledge records after visibility post-processing (no-op for now).""" + _ = (caller_role, caller_tenant_id) + return items diff --git a/backend/services/auto_summary_scheduler.py b/backend/services/auto_summary_scheduler.py new file mode 100644 index 000000000..5bc44e442 --- /dev/null +++ b/backend/services/auto_summary_scheduler.py @@ -0,0 +1,211 @@ +""" +Background scheduler that periodically checks knowledge bases with +auto-summary enabled and regenerates summaries as needed. +""" +import logging +import threading +import time +from datetime import datetime, timedelta +from typing import Optional + +from consts.scheduler import ( + FREQUENCY_MAP, + SCHEDULER_CHECK_INTERVAL_SECONDS, +) +from database.knowledge_db import get_knowledge_bases_for_auto_summary +from services.vectordatabase_service import ElasticSearchService, get_vector_db_core +from utils.config_utils import tenant_config_manager + +logger = logging.getLogger(__name__) + +# Check interval from centralized config +CHECK_INTERVAL_SECONDS = SCHEDULER_CHECK_INTERVAL_SECONDS + +# Track knowledge bases currently being processed to avoid duplicates +_in_flight: set = set() + + +def _parse_last_summary_time(last_summary_time) -> Optional[datetime]: + """Parse last_summary_time from various formats.""" + if last_summary_time is None: + return None + if isinstance(last_summary_time, datetime): + return last_summary_time.replace(tzinfo=None) + if isinstance(last_summary_time, str): + try: + return datetime.fromisoformat(last_summary_time) + except (ValueError, TypeError): + return None + return None + + +def _is_due_for_summary(last_summary_time, frequency: str, last_doc_update_time) -> bool: + """Check if a knowledge base is due for summary regeneration. + + Args: + last_summary_time: Timestamp of last summary generation + frequency: Summary frequency (e.g., '3h', '1d') + last_doc_update_time: Timestamp of last document add/delete operation + + Returns: + True if summary should be regenerated, False otherwise + """ + interval = FREQUENCY_MAP.get(frequency) + if interval is None: + return False + + last = _parse_last_summary_time(last_summary_time) + if last is None: + return True # Never summarized, do it now + + # Check if time interval has elapsed + if (datetime.now() - last) < interval: + return False + + # Check if there are new document changes since last summary + doc_update = _parse_last_summary_time(last_doc_update_time) + if doc_update is None: + return True # No doc update time recorded, assume need summary + + # Skip if no new documents since last summary + if doc_update <= last: + logger.info(f"Skipping summary: no document changes since last summary") + return False + + return True + + +def _run_auto_summary_for_kb(index_name: str, tenant_id: str): + """Run the summary generation for a single knowledge base.""" + if index_name in _in_flight: + logger.info(f"Skipping {index_name}: already being processed") + return + + _in_flight.add(index_name) + try: + logger.info(f"Starting auto-summary for knowledge base: {index_name}") + vdb_core = get_vector_db_core() + service = ElasticSearchService() + + from utils.document_vector_utils import ( + process_documents_for_clustering, + kmeans_cluster_documents, + summarize_clusters_map_reduce, + merge_cluster_summaries, + ) + + # Get model_id from tenant config for LLM summarization + model_id = None + if tenant_id: + try: + tenant_config = tenant_config_manager.load_config(tenant_id) + model_id_str = tenant_config.get("LLM_ID") + if model_id_str: + model_id = int(model_id_str) + logger.info(f"Using LLM model ID {model_id} for auto-summary (tenant: {tenant_id})") + else: + logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary will be placeholder only") + except Exception as e: + logger.warning(f"Failed to get LLM_ID from tenant config: {e}") + + sample_count = 40 # Smaller sample for auto-summary + document_samples, doc_embeddings = process_documents_for_clustering( + index_name=index_name, + vdb_core=vdb_core, + sample_doc_count=sample_count, + ) + + if not document_samples: + logger.warning(f"No documents found for auto-summary: {index_name}") + return + + clusters = kmeans_cluster_documents(doc_embeddings, k=None) + cluster_summaries = summarize_clusters_map_reduce( + document_samples=document_samples, + clusters=clusters, + language="zh", + doc_max_words=100, + cluster_max_words=150, + model_id=model_id, + tenant_id=tenant_id, + ) + final_summary = merge_cluster_summaries(cluster_summaries) + + # Save the summary and update last_summary_time + service.change_summary( + index_name=index_name, + summary_result=final_summary, + user_id="auto_scheduler", + ) + # change_summary already calls update_last_summary_time + logger.info(f"Auto-summary completed for knowledge base: {index_name}") + + except Exception as e: + logger.error(f"Auto-summary failed for {index_name}: {e}", exc_info=True) + finally: + _in_flight.discard(index_name) + + +def _scheduler_loop(stop_event: threading.Event): + """Main scheduler loop that runs in a background thread.""" + logger.info("Auto-summary scheduler started") + while not stop_event.is_set(): + try: + kbs = get_knowledge_bases_for_auto_summary() + logger.info(f"Checking {len(kbs)} knowledge bases for auto-summary") + + for kb in kbs: + if stop_event.is_set(): + break + frequency = kb.get("summary_frequency") + if _is_due_for_summary( + kb.get("last_summary_time"), + frequency, + kb.get("last_doc_update_time") + ): + _run_auto_summary_for_kb( + index_name=kb["index_name"], + tenant_id=kb.get("tenant_id", ""), + ) + + except Exception as e: + logger.error(f"Auto-summary scheduler check failed: {e}", exc_info=True) + + # Wait for next check interval, but respond to stop_event + stop_event.wait(timeout=CHECK_INTERVAL_SECONDS) + + logger.info("Auto-summary scheduler stopped") + + +class AutoSummaryScheduler: + """Manages the auto-summary background thread.""" + + def __init__(self): + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + + def start(self): + """Start the scheduler thread.""" + if self._thread and self._thread.is_alive(): + logger.warning("Auto-summary scheduler is already running") + return + self._stop_event.clear() + self._thread = threading.Thread( + target=_scheduler_loop, + args=(self._stop_event,), + daemon=True, + name="auto-summary-scheduler", + ) + self._thread.start() + logger.info("Auto-summary scheduler thread started") + + def stop(self): + """Signal the scheduler thread to stop.""" + self._stop_event.set() + if self._thread: + self._thread.join(timeout=60) + logger.info("Auto-summary scheduler thread stopped") + + +# Singleton instance +auto_summary_scheduler = AutoSummaryScheduler() diff --git a/backend/services/cas_service.py b/backend/services/cas_service.py new file mode 100644 index 000000000..7db3fce1a --- /dev/null +++ b/backend/services/cas_service.py @@ -0,0 +1,424 @@ +import json +import logging +import os +import secrets +import ssl +import urllib.parse +import urllib.request +from xml.etree.ElementTree import Element +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import Any, Dict, Optional + +import defusedxml.ElementTree as ET +from defusedxml.common import DefusedXmlException + +from consts.const import ( + CAS_CA_BUNDLE, + CAS_CALLBACK_BASE_URL, + CAS_EMAIL_ATTRIBUTE, + CAS_ENABLED, + CAS_LOGIN_MODE, + CAS_LOGOUT_URL, + CAS_RENEW_BEFORE_SECONDS, + CAS_RENEW_TIMEOUT_SECONDS, + CAS_ROLE_ATTRIBUTE, + CAS_ROLE_MAP_JSON, + CAS_SERVER_URL, + CAS_SESSION_MAX_AGE_SECONDS, + CAS_SSL_VERIFY, + CAS_SYNTHETIC_EMAIL_DOMAIN, + CAS_TENANT_ATTRIBUTE, + CAS_USER_ATTRIBUTE, + CAS_VALIDATE_PATH, + DEFAULT_TENANT_ID, + LOCAL_SESSION_MAX_AGE_SECONDS, +) +from database.cas_session_db import ( + create_cas_session, + revoke_cas_session_by_index, + revoke_cas_sessions_by_user_id, +) +from database.oauth_account_db import get_oauth_account_by_provider +from database.user_tenant_db import get_user_tenant_by_user_id, upsert_user_tenant +from services.oauth_service import ( + create_or_update_oauth_account, + find_supabase_user_id_by_email, +) +from services.skill_service import init_skill_list_for_tenant +from services.tool_configuration_service import init_tool_list_for_tenant +from utils.auth_utils import calculate_expires_at, generate_session_jwt, get_supabase_admin_client + +logger = logging.getLogger(__name__) + +CAS_PROVIDER = "cas" +VALID_ROLES = {"SU", "ADMIN", "DEV", "USER"} + + +class CasAuthenticationError(Exception): + pass + + +@dataclass +class CasPrincipal: + cas_user_id: str + email: str + username: str + role: str + tenant_id: str + session_index: str + expires_at: datetime + + +def get_cas_config() -> Dict[str, Any]: + mode = CAS_LOGIN_MODE if CAS_LOGIN_MODE in {"button", "force", "disabled"} else "disabled" + enabled = CAS_ENABLED and bool(CAS_SERVER_URL) + if not enabled: + mode = "disabled" + return { + "enabled": enabled, + "login_mode": mode, + "renew_before_seconds": CAS_RENEW_BEFORE_SECONDS, + "renew_timeout_seconds": CAS_RENEW_TIMEOUT_SECONDS, + "display_name": "CAS", + } + + +def build_login_url(redirect: str = "/") -> str: + _ensure_enabled() + service_url = _build_callback_url("/api/user/cas/callback", {"redirect": _normalize_redirect(redirect)}) + return f"{CAS_SERVER_URL}/login?service={service_url}" + + +def build_renew_url() -> str: + _ensure_enabled() + service_url = _build_callback_url("/api/user/cas/renew_callback", {}) + return f"{CAS_SERVER_URL}/login?service={service_url}&gateway=true" + + +def build_logout_url() -> str: + _ensure_enabled() + configured_logout_url = CAS_LOGOUT_URL.strip() + if not configured_logout_url: + return "" + + parsed_config = urllib.parse.urlsplit(configured_logout_url) + if parsed_config.scheme and parsed_config.netloc: + logout_url = configured_logout_url + else: + logout_url = f"{CAS_SERVER_URL}/{configured_logout_url.lstrip('/')}" + + parsed = urllib.parse.urlsplit(logout_url) + if parsed.query: + return logout_url + + query = f"service={CAS_CALLBACK_BASE_URL}" + return urllib.parse.urlunsplit((parsed.scheme, parsed.netloc, parsed.path, query, parsed.fragment)) + + +async def login_with_ticket(ticket: str, redirect: str = "/") -> Dict[str, Any]: + redirect = _normalize_redirect(redirect) + service_url = _build_callback_url("/api/user/cas/callback", {"redirect": redirect}) + principal = validate_service_ticket(ticket, service_url) + return await _create_project_session(principal, redirect=redirect) + + +async def renew_with_ticket(ticket: str) -> Dict[str, Any]: + service_url = _build_callback_url("/api/user/cas/renew_callback", {}) + principal = validate_service_ticket(ticket, service_url) + return await _create_project_session(principal, redirect="/", renew=True) + + +def validate_service_ticket(ticket: str, service_url: str) -> CasPrincipal: + _ensure_enabled() + if not ticket: + raise CasAuthenticationError("CAS ticket is missing") + + validate_path = CAS_VALIDATE_PATH if CAS_VALIDATE_PATH.startswith("/") else f"/{CAS_VALIDATE_PATH}" + validate_url = f"{CAS_SERVER_URL}{validate_path}" + xml_text = _http_get_text(f"{validate_url}?service={service_url}&ticket={ticket}") + logger.info("CAS serviceValidate response: %s", xml_text) + return parse_service_validate_response(xml_text, fallback_session_index=ticket) + + +def parse_service_validate_response(xml_text: str, fallback_session_index: str = "") -> CasPrincipal: + try: + root = ET.fromstring(xml_text) + except (ET.ParseError, DefusedXmlException) as exc: + raise CasAuthenticationError("Invalid CAS validation response") from exc + + failure = _find_first(root, "authenticationFailure") + if failure is not None: + raise CasAuthenticationError((failure.text or "CAS authentication failed").strip()) + + success = _find_first(root, "authenticationSuccess") + if success is None: + raise CasAuthenticationError("CAS authentication failed") + + user = _get_child_text(success, "user") + attrs_node = _find_first(success, "attributes") + attrs = _extract_attributes(attrs_node) if attrs_node is not None else {} + + cas_user_id = _attribute_or_default(attrs, CAS_USER_ATTRIBUTE, user) or user + if not cas_user_id: + raise CasAuthenticationError("CAS user id is missing") + + email = _attribute_or_default(attrs, CAS_EMAIL_ATTRIBUTE, "") + username = attrs.get("displayName") or attrs.get("name") or cas_user_id + role = _map_role(_attribute_or_default(attrs, CAS_ROLE_ATTRIBUTE, "USER")) + tenant_id = _attribute_or_default(attrs, CAS_TENANT_ATTRIBUTE, DEFAULT_TENANT_ID) or DEFAULT_TENANT_ID + session_index = attrs.get("SessionIndex") or attrs.get("sessionIndex") or fallback_session_index + expires_at = _resolve_expires_at(attrs) + + if not email: + safe_user = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in cas_user_id) + email = f"{safe_user}@{CAS_SYNTHETIC_EMAIL_DOMAIN}" + + return CasPrincipal( + cas_user_id=str(cas_user_id), + email=str(email).lower(), + username=str(username), + role=role, + tenant_id=str(tenant_id), + session_index=str(session_index or ""), + expires_at=expires_at, + ) + + +def parse_logout_request(logout_request: str) -> Dict[str, str]: + if not logout_request: + return {"cas_user_id": "", "session_index": ""} + try: + root = ET.fromstring(logout_request) + except (ET.ParseError, DefusedXmlException): + logger.warning("Invalid CAS logoutRequest XML") + return {"cas_user_id": "", "session_index": ""} + + session_index = _get_child_text(root, "SessionIndex") + cas_user_id = ( + _get_child_text(root, "NameID") + or _get_child_text(root, "nameID") + or _get_child_text(root, "user") + or _get_child_text(root, "casUserId") + ) + return {"cas_user_id": cas_user_id or "", "session_index": session_index or ""} + + +def revoke_from_logout_request(logout_request: str) -> Dict[str, Any]: + parsed = parse_logout_request(logout_request) + revoked = 0 + if parsed["cas_user_id"]: + revoked = revoke_cas_sessions_by_user_id(parsed["cas_user_id"]) + logger.info( + "CAS SLO revoke by cas_user_id: cas_user_id=%s revoked=%s", + parsed["cas_user_id"], + revoked, + ) + if revoked == 0 and parsed["session_index"]: + revoked = revoke_cas_session_by_index(parsed["session_index"]) + logger.info( + "CAS SLO revoke by session_index: session_index=%s revoked=%s", + parsed["session_index"], + revoked, + ) + if revoked == 0: + logger.warning("CAS SLO did not revoke any session: %s", parsed) + return {"revoked": revoked, **parsed} + + +async def _create_project_session(principal: CasPrincipal, redirect: str = "/", renew: bool = False) -> Dict[str, Any]: + user_id = _resolve_project_user(principal) + existing_tenant = get_user_tenant_by_user_id(user_id) + user_tenant = upsert_user_tenant( + user_id=user_id, + tenant_id=principal.tenant_id, + user_role=principal.role, + user_email=principal.email, + ) + if not existing_tenant: + await init_tool_list_for_tenant(principal.tenant_id, user_id) + await init_skill_list_for_tenant(principal.tenant_id, user_id) + + now = datetime.now() + max_local_expiry = now + timedelta(seconds=LOCAL_SESSION_MAX_AGE_SECONDS) + expires_at_dt = min(principal.expires_at, max_local_expiry) + expires_in_seconds = max(1, int((expires_at_dt - now).total_seconds())) + + session_id = secrets.token_urlsafe(32) + create_cas_session( + session_id=session_id, + user_id=user_id, + cas_user_id=principal.cas_user_id, + cas_session_index=principal.session_index, + expires_at=expires_at_dt, + ) + + jwt_token = generate_session_jwt(user_id, expires_in=expires_in_seconds, session_id=session_id) + + return { + "user": { + "id": str(user_id), + "email": principal.email, + "role": user_tenant.get("user_role", principal.role), + }, + "session": { + "access_token": jwt_token, + "refresh_token": "", + "expires_at": calculate_expires_at(jwt_token), + "expires_in_seconds": expires_in_seconds, + }, + "redirect_url": redirect, + "renew": renew, + } + + +def _resolve_project_user(principal: CasPrincipal) -> str: + existing = get_oauth_account_by_provider(CAS_PROVIDER, principal.cas_user_id) + if existing: + create_or_update_oauth_account( + user_id=existing["user_id"], + provider=CAS_PROVIDER, + provider_user_id=principal.cas_user_id, + email=principal.email, + username=principal.username, + tenant_id=principal.tenant_id, + ) + return existing["user_id"] + + admin_client = get_supabase_admin_client() + if not admin_client: + raise RuntimeError("Supabase admin client not available") + + user_id = find_supabase_user_id_by_email(admin_client, principal.email) + if not user_id: + create_resp = admin_client.auth.admin.create_user( + { + "email": principal.email, + "password": secrets.token_urlsafe(32), + "email_confirm": True, + "user_metadata": { + "full_name": principal.username, + "provider": CAS_PROVIDER, + "cas_user_id": principal.cas_user_id, + }, + } + ) + user_id = create_resp.user.id + + create_or_update_oauth_account( + user_id=user_id, + provider=CAS_PROVIDER, + provider_user_id=principal.cas_user_id, + email=principal.email, + username=principal.username, + tenant_id=principal.tenant_id, + ) + return user_id + + +def _ensure_enabled() -> None: + if not CAS_ENABLED or not CAS_SERVER_URL: + raise CasAuthenticationError("CAS is not configured") + + +def _build_callback_url(path: str, params: Dict[str, str]) -> str: + if not CAS_CALLBACK_BASE_URL: + raise CasAuthenticationError("CAS callback base URL is not configured") + query = _build_callback_query(params) + suffix = f"?{query}" if query else "" + return f"{CAS_CALLBACK_BASE_URL}{path}{suffix}" + + +def _build_callback_query(params: Dict[str, str]) -> str: + return "&".join(f"{key}={value}" for key, value in params.items()) + + +def _normalize_redirect(redirect: str) -> str: + if not redirect or not redirect.startswith("/") or redirect.startswith("//"): + return "/" + return redirect + + +def _build_ssl_context() -> ssl.SSLContext: + if CAS_CA_BUNDLE and os.path.isfile(CAS_CA_BUNDLE): + return ssl.create_default_context(cafile=CAS_CA_BUNDLE) + if not CAS_SSL_VERIFY: + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + return ctx + return ssl.create_default_context() + + +def _http_get_text(url: str) -> str: + req = urllib.request.Request(url, headers={"Accept": "application/xml,text/xml,*/*"}) + with urllib.request.urlopen(req, timeout=15, context=_build_ssl_context()) as resp: + return resp.read().decode("utf-8") + + +def _local_name(tag: str) -> str: + return tag.rsplit("}", 1)[-1] + + +def _find_first(node: Element, name: str) -> Optional[Element]: + for child in node.iter(): + if _local_name(child.tag) == name: + return child + return None + + +def _get_child_text(node: Element, name: str) -> str: + found = _find_first(node, name) + return (found.text or "").strip() if found is not None else "" + + +def _extract_attributes(attrs_node: Element) -> Dict[str, str]: + attrs: Dict[str, str] = {} + for child in list(attrs_node): + value = (child.text or "").strip() + if value: + attrs[_local_name(child.tag)] = value + return attrs + + +def _attribute_or_default(attrs: Dict[str, str], key: str, default: str) -> str: + if key and key in attrs: + return attrs[key] + return default + + +def _map_role(raw_role: str) -> str: + role = (raw_role or "USER").upper() + try: + role_map = json.loads(CAS_ROLE_MAP_JSON) if CAS_ROLE_MAP_JSON else {} + role = str(role_map.get(raw_role, role_map.get(role, role))).upper() + except Exception: + logger.warning("Invalid CAS_ROLE_MAP_JSON; falling back to raw role") + return role if role in VALID_ROLES else "USER" + + +def _resolve_expires_at(attrs: Dict[str, str]) -> datetime: + for key in ("expiresAt", "expirationDate", "validUntil", "notOnOrAfter"): + value = attrs.get(key) + if not value: + continue + parsed = _parse_datetime(value) + if parsed: + return parsed + return datetime.now() + timedelta(seconds=CAS_SESSION_MAX_AGE_SECONDS) + + +def _parse_datetime(value: str) -> Optional[datetime]: + try: + if value.isdigit(): + timestamp = int(value) + if timestamp > 10_000_000_000: + timestamp = timestamp / 1000 + return datetime.fromtimestamp(timestamp) + normalized = value.replace("Z", "+00:00") + parsed = datetime.fromisoformat(normalized) + if parsed.tzinfo: + parsed = parsed.astimezone().replace(tzinfo=None) + return parsed + except Exception: + return None diff --git a/backend/services/config_sync_service.py b/backend/services/config_sync_service.py index 9fe50813a..7feea9452 100644 --- a/backend/services/config_sync_service.py +++ b/backend/services/config_sync_service.py @@ -20,7 +20,7 @@ MODEL_ENGINE_ENABLED, TENANT_NAME ) -from database.model_management_db import get_model_id_by_display_name +from database.model_management_db import get_model_id_by_display_name, get_model_records from utils.config_utils import ( get_env_key, get_model_name_from_config, @@ -31,6 +31,20 @@ logger = logging.getLogger("config_sync_service") +def get_model_id_for_config(model_type: str, display_name: str, tenant_id: str) -> Optional[int]: + if not display_name: + return None + + records = get_model_records( + {"display_name": display_name, "model_type": model_type}, + tenant_id + ) + if records: + return records[0].get("model_id") + + return get_model_id_by_display_name(display_name, tenant_id) + + def handle_model_config(tenant_id: str, user_id: str, config_key: str, model_id: Optional[int], tenant_config_dict: dict) -> None: """ Handle model configuration updates, deletions, and settings operations @@ -98,8 +112,8 @@ async def save_config_impl(config, tenant_id, user_id): model_display_name = model_config.get("displayName") config_key = get_env_key(model_type) + "_ID" - model_id = get_model_id_by_display_name( - model_display_name, tenant_id) + model_id = get_model_id_for_config( + model_type, model_display_name, tenant_id) handle_model_config(tenant_id, user_id, config_key, model_id, tenant_config_dict) @@ -112,6 +126,21 @@ async def save_config_impl(config, tenant_id, user_id): embedding_api_config = model_config.get("apiConfig", {}) env_config[f"{model_prefix}_API_KEY"] = safe_value( embedding_api_config.get("apiKey")) + + # Save STT specific fields for speech recognition models + if model_type == "stt": + if model_config.get("modelFactory"): + stt_factory_key = "STT_MODEL_FACTORY" + tenant_config_manager.set_single_config( + user_id, tenant_id, stt_factory_key, model_config.get("modelFactory")) + if model_config.get("modelAppid"): + stt_appid_key = "STT_MODEL_APPID" + tenant_config_manager.set_single_config( + user_id, tenant_id, stt_appid_key, model_config.get("modelAppid")) + if model_config.get("accessToken"): + stt_token_key = "STT_ACCESS_TOKEN" + tenant_config_manager.set_single_config( + user_id, tenant_id, stt_token_key, model_config.get("accessToken")) logger.info("Configuration saved successfully") @@ -167,6 +196,7 @@ def build_models_config(tenant_id: str) -> dict: def build_model_config(model_config: dict) -> dict: if not model_config: return { + "id": None, "name": "", "displayName": "", "apiConfig": { @@ -176,6 +206,7 @@ def build_model_config(model_config: dict) -> dict: } config = { + "id": model_config.get("model_id"), "name": get_model_name_from_config(model_config) if model_config else "", "displayName": model_config.get("display_name", ""), "apiConfig": { @@ -187,4 +218,11 @@ def build_model_config(model_config: dict) -> dict: if "embedding" in model_config.get("model_type", ""): config["dimension"] = model_config.get("max_tokens", 0) + # Add voice model specific fields (STT and TTS) + model_type = model_config.get("model_type", "") + if model_type == "stt" or model_type == "tts": + config["modelFactory"] = model_config.get("model_factory", "") + config["modelAppid"] = model_config.get("model_appid", "") + config["accessToken"] = model_config.get("access_token", "") + return config diff --git a/backend/services/conversation_management_service.py b/backend/services/conversation_management_service.py index b98e79897..12edea7d5 100644 --- a/backend/services/conversation_management_service.py +++ b/backend/services/conversation_management_service.py @@ -8,6 +8,7 @@ from consts.const import LANGUAGE, MODEL_CONFIG_MAPPING, MESSAGE_ROLE, DEFAULT_EN_TITLE, DEFAULT_ZH_TITLE from consts.model import AgentRequest, ConversationResponse, MessageRequest, MessageUnit +from consts.exceptions import ConversationNotFoundError from database.conversation_db import ( create_conversation, create_conversation_message, @@ -18,16 +19,20 @@ get_conversation, get_conversation_history, get_conversation_list, + get_latest_assistant_message_id, get_message_id_by_index, get_source_images_by_conversation, get_source_images_by_message, get_source_searches_by_conversation, get_source_searches_by_message, rename_conversation, + update_message_minio_files, update_message_opinion ) from nexent.core.utils.observer import MessageObserver, ProcessType +from nexent.monitor import set_monitoring_context, set_monitoring_operation from nexent.core.models import OpenAIModel +from agents.agent_run_manager import agent_run_manager from utils.config_utils import get_model_name_from_config, tenant_config_manager from utils.prompt_template_utils import get_generate_title_prompt_template from utils.str_utils import remove_think_blocks @@ -122,7 +127,15 @@ def save_message(request: MessageRequest, user_id: str, tenant_id: str): # Parse image URL list content_json = json.loads(unit_content) if isinstance(content_json, dict) and 'images_url' in content_json: + # Deduplicate image URLs before saving + seen_urls = set() + unique_urls = [] for image_url in content_json['images_url']: + if image_url not in seen_urls: + seen_urls.add(image_url) + unique_urls.append(image_url) + # Also deduplicate against any URLs already saved in this same message + for image_url in unique_urls: image_data = {'message_id': message_id, 'conversation_id': conversation_id, 'image_url': image_url} create_source_image(image_data) @@ -200,7 +213,7 @@ def save_message(request: MessageRequest, user_id: str, tenant_id: str): def save_conversation_user(request: AgentRequest, user_id: str, tenant_id: str): user_role_count = sum(1 for item in getattr( - request, "history", []) if item.get("role") == MESSAGE_ROLE["USER"]) + request, "history", []) if item.role == MESSAGE_ROLE["USER"]) conversation_req = MessageRequest(conversation_id=request.conversation_id, message_idx=user_role_count * 2, role=MESSAGE_ROLE["USER"], message=[MessageUnit(type="string", content=request.query)], minio_files=request.minio_files) @@ -209,7 +222,7 @@ def save_conversation_user(request: AgentRequest, user_id: str, tenant_id: str): def save_conversation_assistant(request: AgentRequest, messages: List[str], user_id: str, tenant_id: str): user_role_count = sum(1 for item in getattr( - request, "history", []) if item.get("role") == MESSAGE_ROLE["USER"]) + request, "history", []) if item.role == MESSAGE_ROLE["USER"]) message_list = [] for item in messages: @@ -222,7 +235,7 @@ def save_conversation_assistant(request: AgentRequest, messages: List[str], user message_list.append(message) conversation_req = MessageRequest(conversation_id=request.conversation_id, message_idx=user_role_count * 2 + 1, - role=MESSAGE_ROLE["ASSISTANT"], message=message_list, minio_files=request.minio_files) + role=MESSAGE_ROLE["ASSISTANT"], message=message_list, minio_files=None) save_message(conversation_req, user_id=user_id, tenant_id=tenant_id) @@ -239,9 +252,14 @@ def call_llm_for_title(question: str, tenant_id: str, language: str = LANGUAGE[" str: Generated title """ prompt_template = get_generate_title_prompt_template(language=language) + set_monitoring_context(tenant_id=tenant_id, user_id=None) model_config = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) + display_name = model_config.get("display_name", "") if model_config else "" + set_monitoring_operation("title_generation", display_name=display_name or None) + + timeout_seconds = model_config.get("timeout_seconds") if model_config else None # Create OpenAIModel instance llm = OpenAIModel( @@ -251,7 +269,9 @@ def call_llm_for_title(question: str, tenant_id: str, language: str = LANGUAGE[" temperature=0.7, top_p=0.95, model_factory=model_config.get("model_factory", None), - ssl_verify=model_config.get("ssl_verify", True) + ssl_verify=model_config.get("ssl_verify", True), + timeout_seconds=timeout_seconds, + stream=False, ) # Build messages - use new template variable 'question' instead of 'content' @@ -287,7 +307,9 @@ def update_conversation_title(conversation_id: int, title: str, user_id: str = N """ success = rename_conversation(conversation_id, title, user_id) if not success: - raise Exception(f"Conversation {conversation_id} does not exist or has been deleted") + raise ConversationNotFoundError( + f"Conversation {conversation_id} does not exist or has been deleted" + ) return success @@ -362,6 +384,11 @@ def delete_conversation_service(conversation_id: int, user_id: str) -> bool: success = delete_conversation(conversation_id, user_id) if not success: raise Exception(f"Conversation {conversation_id} does not exist or has been deleted") + + # Defensive cleanup: release the ContextManager associated with this conversation + # to avoid memory leaks in edge cases + agent_run_manager.clear_conversation_context_manager(conversation_id) + return True except Exception as e: logging.error(f"Failed to delete conversation: {str(e)}") @@ -429,13 +456,15 @@ def get_conversation_history_service(conversation_id: int, user_id: str) -> List search_by_message[message_id] = [] search_by_message[message_id].append(search_item) - # Collect image content - grouped by message_id + # Collect image content - grouped by message_id, with URL deduplication image_by_message = {} for record in history_data['image_records']: message_id = record['message_id'] if message_id not in image_by_message: image_by_message[message_id] = [] - image_by_message[message_id].append(record['image_url']) + # Only add if not already present (by URL) + if record['image_url'] not in image_by_message[message_id]: + image_by_message[message_id].append(record['image_url']) # Sort by message index and build final message list, including images and search content messages = [] @@ -495,6 +524,10 @@ def get_conversation_history_service(conversation_id: int, user_id: str) -> List 'opinion_flag': msg['opinion_flag'] } + # Add minio_files field (if any, e.g., skill-generated attachments) + if 'minio_files' in msg and msg['minio_files']: + message_item['minio_files'] = msg['minio_files'] + # Add image content (if any) if message_id in image_by_message: message_item['picture'] = image_by_message[message_id] @@ -687,3 +720,52 @@ async def get_message_id_by_index_impl(conversation_id: int, message_index: int) if message_id is None: raise Exception("Message not found.") return message_id + + +def save_skill_files_to_conversation( + conversation_id: int, + skill_file_uploads: List[Dict[str, Any]], + user_id: str, +) -> bool: + """ + Append skill file upload records to the latest assistant message in a conversation. + + This persists generated documents (e.g., DOCX, XLSX created by skills) to the + conversation history so they appear in subsequent GET /conversation/{id} calls. + + Args: + conversation_id: Target conversation ID + skill_file_uploads: List of upload metadata dicts (e.g., from upload_fileobj) + user_id: User ID for ownership validation + + Returns: + bool: True if files were saved, False if no assistant message was found + """ + if not skill_file_uploads: + return False + + try: + message_id = get_latest_assistant_message_id(conversation_id, user_id) + if message_id is None: + logging.warning( + "[skill-file] no assistant message found for conversation=%s, " + "cannot persist skill file uploads", + conversation_id, + ) + return False + + success = update_message_minio_files(message_id, skill_file_uploads) + if success: + logging.info( + "[skill-file] persisted %d file(s) to message_id=%s conversation=%s", + len(skill_file_uploads), + message_id, + conversation_id, + ) + return success + except Exception as exc: + logging.exception( + "[skill-file] failed to persist skill file uploads for conversation=%s", + conversation_id, + ) + return False diff --git a/backend/services/data_process_service.py b/backend/services/data_process_service.py index 2b222a584..a7529127c 100644 --- a/backend/services/data_process_service.py +++ b/backend/services/data_process_service.py @@ -15,7 +15,7 @@ import redis import torch from PIL import Image -from celery import states, chain +from celery import states from transformers import CLIPProcessor, CLIPModel from nexent.data_process.core import DataProcessCore @@ -25,7 +25,7 @@ from database.attachment_db import delete_file, file_exists, get_file_size_from_minio, get_file_stream, upload_file from utils.file_management_utils import convert_office_to_pdf from data_process.app import app as celery_app -from data_process.tasks import process, forward +from data_process.tasks import submit_process_forward_chain from data_process.utils import get_task_info, get_all_task_ids_from_redis # Limit concurrent LibreOffice processes to avoid resource exhaustion @@ -54,7 +54,8 @@ def __init__(self): self._inspector = None self._inspector_last_time = 0 - self._inspector_ttl = 60 # Inspector cache time in seconds + # 5 minutes - inspector is expensive to create (ping all workers) + self._inspector_ttl = 300 self._inspector_lock = None self._inspector_lock = threading.Lock() @@ -105,7 +106,7 @@ async def stop(self): logger.info("Data processing service stopped") def _get_celery_inspector(self): - """Get Celery inspector""" + """Get Celery inspector (cached for performance)""" with self._inspector_lock: now = time.time() if self._inspector and now - self._inspector_last_time < self._inspector_ttl: @@ -117,9 +118,9 @@ def _get_celery_inspector(self): f"Celery broker URL is not configured properly, reconfiguring to {celery_app.conf.broker_url}") try: inspector = celery_app.control.inspect() - inspector.ping() self._inspector = inspector self._inspector_last_time = now + self._inspector_init_time = now return inspector except Exception as e: self._inspector = None @@ -142,67 +143,131 @@ async def get_all_tasks(self, filter: bool = True) -> List[Dict[str, Any]]: all_tasks = [] try: start_time = time.time() - logger.debug( - "Getting inspector to check for active and reserved tasks (concurrent)") + inspector_start = time.time() inspector = self._get_celery_inspector() - logger.debug( - f"⏰ Inspector initialization took {time.time() - start_time}s") + inspector_duration = time.time() - inspector_start - # Collect task IDs from different sources + # Collect task IDs from different sources and keep runtime metadata task_ids = set() + runtime_task_meta: Dict[str, Dict[str, Any]] = {} + + def _normalize_runtime_meta(task: Dict[str, Any]) -> Dict[str, Any]: + task_name_full = task.get('name', '') or '' + task_name = task_name_full.split( + '.')[-1] if task_name_full else '' + kwargs = task.get('kwargs') or {} + if isinstance(kwargs, str): + try: + import json as _json + kwargs = _json.loads(kwargs) + except Exception: + kwargs = {} + if not isinstance(kwargs, dict): + kwargs = {} + return { + 'task_name': task_name, + 'index_name': kwargs.get('index_name', ''), + 'path_or_url': kwargs.get('source', ''), + 'original_filename': kwargs.get('original_filename', ''), + } + + celery_start = time.time() + + # Use short timeout for inspector since workers can respond in ~0.1s + # Default 1s timeout is unnecessary and causes delay + short_timeout = 0.2 def get_active(): - return inspector.active() + t = time.time() + # Create fresh inspector with short timeout for each call + short_inspector = celery_app.control.inspect( + timeout=short_timeout) + result = short_inspector.active() + elapsed = time.time() - t + logger.info( + f"[get_all_tasks] inspector.active() took {elapsed:.3f}s") + return result if result else {} def get_reserved(): - return inspector.reserved() + t = time.time() + short_inspector = celery_app.control.inspect( + timeout=short_timeout) + result = short_inspector.reserved() + elapsed = time.time() - t + logger.info( + f"[get_all_tasks] inspector.reserved() took {elapsed:.3f}s") + return result if result else {} + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: future_active = executor.submit(get_active) future_reserved = executor.submit(get_reserved) - active_tasks_dict = future_active.result() - reserved_tasks_dict = future_reserved.result() - logger.debug( - f"⏰ Get active and reserved tasks (concurrent) took {time.time() - start_time}s") + active_tasks_dict = future_active.result( + timeout=short_timeout + 0.5) + reserved_tasks_dict = future_reserved.result( + timeout=short_timeout + 0.5) + celery_duration = time.time() - celery_start + if celery_duration > 0.5: + logger.warning( + f"[get_all_tasks] Inspector took {celery_duration:.3f}s (expected <0.5s)") if active_tasks_dict: for worker, tasks in active_tasks_dict.items(): for task in tasks: task_id = task.get('id') if task_id: task_ids.add(task_id) + runtime_task_meta[task_id] = _normalize_runtime_meta( + task) if reserved_tasks_dict: for worker, tasks in reserved_tasks_dict.items(): for task in tasks: task_id = task.get('id') if task_id: task_ids.add(task_id) + # Keep active metadata if already present + runtime_task_meta.setdefault( + task_id, _normalize_runtime_meta(task)) - # Currently, we don't have scheduled tasks, so skip getting scheduled tasks here - start_time = time.time() - logger.debug("Getting task IDs from Redis backend") - # Also get task IDs from Redis backend (covers completed/failed tasks within expiry) + # Get task IDs from Redis backend (covers completed/failed tasks within expiry) try: redis_task_ids = get_all_task_ids_from_redis(self.redis_client) - logger.debug( - f"⏰ Get Redis task IDs took {time.time() - start_time}s") for task_id in redis_task_ids: - # Add to the set, duplicates will be handled task_ids.add(task_id) except Exception as redis_error: logger.warning( f"Failed to query Redis for stored task IDs: {str(redis_error)}") - logger.debug( - f"Total unique task IDs collected (inspector + Redis): {len(task_ids)}") - tasks = [get_task_info(task_id) for task_id in task_ids] + + task_id_list = list(task_ids) + # Batch fetch all task info + tasks = [get_task_info(task_id) for task_id in task_id_list] all_task_infos = await asyncio.gather(*tasks, return_exceptions=True) - for task_info in all_task_infos: + for idx, task_info in enumerate(all_task_infos): if isinstance(task_info, Exception): logger.warning( f"Failed to get status for a task: {task_info}") continue + task_id = task_id_list[idx] + runtime_meta = runtime_task_meta.get(task_id, {}) + # Backfill runtime info for pending/reserved tasks that do not have result metadata yet + if runtime_meta: + if not task_info.get('task_name') and runtime_meta.get('task_name'): + task_info['task_name'] = runtime_meta.get('task_name') + if not task_info.get('index_name') and runtime_meta.get('index_name'): + task_info['index_name'] = runtime_meta.get( + 'index_name') + if not task_info.get('path_or_url') and runtime_meta.get('path_or_url'): + task_info['path_or_url'] = runtime_meta.get( + 'path_or_url') + if not task_info.get('original_filename') and runtime_meta.get('original_filename'): + task_info['original_filename'] = runtime_meta.get( + 'original_filename') + if filter and not (task_info.get('index_name') and task_info.get('task_name')): - continue + # Keep user-visible queued tasks even before worker updates task meta. + if task_info.get('task_name') not in {'process', 'forward', 'process_and_forward'}: + continue + if not task_info.get('index_name'): + continue all_tasks.append(task_info) - logger.debug(f"Retrieved {len(all_tasks)} tasks.") except Exception as e: logger.error(f"Error retrieving all tasks: {str(e)}") all_tasks = [] @@ -255,6 +320,17 @@ async def load_image(self, image_url: str) -> Optional[Image.Image]: async def _load_image(self, session: aiohttp.ClientSession, path: str) -> Optional[Image.Image]: """Internal method to load an image from various sources""" try: + if path.startswith('s3://'): + # Fetch from MinIO using s3://bucket/key + file_stream = get_file_stream(object_name=path) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {path}") + file_data = file_stream.read() + image_based64_str = base64.b64encode( + file_data).decode('utf-8') + path = f"data:image/jpeg;base64,{image_based64_str}" + # Check if input is base64 encoded if path.startswith('data:image'): # Extract the base64 data after the comma @@ -463,6 +539,8 @@ async def create_batch_tasks_impl(self, authorization: Optional[str], request: B chunking_strategy = source_config.get('chunking_strategy') index_name = source_config.get('index_name') original_filename = source_config.get('original_filename') + embedding_model_id = source_config.get('embedding_model_id') + tenant_id = source_config.get('tenant_id') # Validate required fields if not source: @@ -474,28 +552,23 @@ async def create_batch_tasks_impl(self, authorization: Optional[str], request: B f"Missing required field 'index_name' in source config: {source_config}") continue - # Create and submit a chain: process -> forward - task_chain = chain( - process.s( - source=source, - source_type=source_type, - chunking_strategy=chunking_strategy, - index_name=index_name, - original_filename=original_filename - ).set(queue='process_q'), - forward.s( - index_name=index_name, - source=source, - source_type=source_type, - original_filename=original_filename, - authorization=authorization - ).set(queue='forward_q') + chain_id = submit_process_forward_chain( + source=source, + source_type=source_type, + chunking_strategy=chunking_strategy, + index_name=index_name, + original_filename=original_filename, + authorization=authorization, + embedding_model_id=embedding_model_id, + tenant_id=tenant_id, ) + if not chain_id: + logger.error( + f"Failed to enqueue process-forward chain for source: {source}") + continue - task_result = task_chain.apply_async() - - task_ids.append(task_result.id) - logger.debug(f"Created task {task_result.id} for source: {source}") + task_ids.append(chain_id) + logger.debug(f"Created task {chain_id} for source: {source}") logger.info( f"Created {len(task_ids)} individual tasks for batch processing") return task_ids @@ -527,7 +600,7 @@ async def process_uploaded_text_file(self, file_content: bytes, filename: str, c f"Processing uploaded file: {filename} using SDK DataProcessCore") data_processor = DataProcessCore() - chunks = data_processor.file_process( + chunks, _ = data_processor.file_process( file_data=file_content, filename=filename, chunking_strategy=chunking_strategy @@ -559,7 +632,7 @@ async def process_uploaded_text_file(self, file_content: bytes, filename: str, c } async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: str) -> None: - """Full conversion pipeline: download → convert → upload → validate → cleanup. + """Full conversion pipeline: download -> convert -> upload -> validate -> cleanup. All five steps run inside data-process so that LibreOffice only needs to be installed in this container. @@ -576,7 +649,8 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st # Step 1: Download original Office file from MinIO original_stream = get_file_stream(object_name) if original_stream is None: - raise OfficeConversionException(f"Source file not found in storage: {object_name}") + raise OfficeConversionException( + f"Source file not found in storage: {object_name}") original_filename = os.path.basename(object_name) input_path = os.path.join(temp_dir, original_filename) @@ -588,10 +662,12 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st try: pdf_path = await convert_office_to_pdf(input_path, temp_dir, timeout=30) except Exception as exc: - raise OfficeConversionException(f"LibreOffice conversion failed: {exc}") from exc + raise OfficeConversionException( + f"LibreOffice conversion failed: {exc}") from exc # Step 3: Upload converted PDF to MinIO - result = upload_file(file_path=pdf_path, object_name=pdf_object_name) + result = upload_file(file_path=pdf_path, + object_name=pdf_object_name) if not result.get('success'): raise OfficeConversionException( f"Failed to upload PDF to MinIO: {result.get('error', 'Unknown error')}" @@ -600,14 +676,16 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st # Step 4: Validate the uploaded PDF (header check + minimum size) remote_size = get_file_size_from_minio(pdf_object_name) if remote_size <= 0: - raise OfficeConversionException("PDF validation failed: cannot read remote file size") + raise OfficeConversionException( + "PDF validation failed: cannot read remote file size") if remote_size < 100: raise OfficeConversionException( f"PDF validation failed: file too small ({remote_size} bytes)" ) remote_stream = get_file_stream(pdf_object_name) if remote_stream is None: - raise OfficeConversionException("PDF validation failed: cannot read uploaded file") + raise OfficeConversionException( + "PDF validation failed: cannot read uploaded file") try: header = remote_stream.read(5) finally: @@ -616,7 +694,8 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st except Exception: pass if not header.startswith(b'%PDF-'): - raise OfficeConversionException("PDF validation failed: invalid PDF header") + raise OfficeConversionException( + "PDF validation failed: invalid PDF header") except OfficeConversionException: # Clean up any partially-uploaded remote PDF so a future retry starts clean @@ -624,14 +703,16 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st delete_file(pdf_object_name) raise except Exception as exc: - raise OfficeConversionException(f"Unexpected error during conversion: {exc}") from exc + raise OfficeConversionException( + f"Unexpected error during conversion: {exc}") from exc finally: # Step 5: Clean up local temporary directory if temp_dir and os.path.exists(temp_dir): try: shutil.rmtree(temp_dir) except Exception as cleanup_err: - logger.warning(f"Failed to cleanup temp dir '{temp_dir}': {cleanup_err}") + logger.warning( + f"Failed to cleanup temp dir '{temp_dir}': {cleanup_err}") def convert_celery_states_to_custom(self, process_celery_state: Optional[str], forward_celery_state: Optional[str]) -> str: """Map Celery task states to a custom frontend state string. diff --git a/backend/services/datamate_service.py b/backend/services/datamate_service.py index 776e0eb1d..41858440b 100644 --- a/backend/services/datamate_service.py +++ b/backend/services/datamate_service.py @@ -51,7 +51,7 @@ async def _create_datamate_knowledge_records(knowledge_base_ids: List[str], "tenant_id": tenant_id, "user_id": user_id, # Use datamate as embedding model name - "embedding_model_name": embedding_model_names[i] + "embedding_model_name": embedding_model_names[i], } # Run synchronous database operation in executor to avoid blocking diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py index d73c91c72..585669c0c 100644 --- a/backend/services/file_management_service.py +++ b/backend/services/file_management_service.py @@ -4,12 +4,14 @@ import os from io import BytesIO from pathlib import Path -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple import httpx from fastapi import UploadFile from consts.const import ( + ASSET_OWNER_ATTACHMENTS_PREFIX, + ASSET_OWNER_TENANT_ID, DATA_PROCESS_SERVICE, FILE_PREVIEW_SIZE_LIMIT, MAX_CONCURRENT_UPLOADS, @@ -36,6 +38,7 @@ from utils.file_management_utils import save_upload_file from nexent import MessageObserver +from nexent.multi_modal.utils import parse_s3_url from nexent.core.models import OpenAILongContextModel # Create upload directory @@ -49,8 +52,220 @@ logger = logging.getLogger("file_management_service") +ALLOWED_SKILL_UPLOAD_ROOT = Path("/mnt/nexent").resolve() -async def upload_files_impl(destination: str, file: List[UploadFile], folder: str = None, index_name: Optional[str] = None) -> tuple: + +def is_allowed_skill_upload_path(file_path: str) -> bool: + """Return True when a local file path is under the allowed skill upload root.""" + if not file_path: + return False + + try: + candidate_path = Path(file_path).resolve() + except Exception: + return False + + try: + candidate_path.relative_to(ALLOWED_SKILL_UPLOAD_ROOT) + return True + except ValueError: + return False + + + + +def resolve_minio_upload_folder( + folder: Optional[str], + user_id: Optional[str] = None, + uploader_tenant_id: Optional[str] = None, +) -> str: + """Map caller context to the MinIO object prefix used for uploads. + + Resolution order (first match wins): + 1. Asset-owner tenant → ``attachments/asset_owner/{user_id}`` + 2. ``folder == "knowledge_base"`` → shared ``knowledge_base`` prefix + 3. Otherwise → per-user ``attachments/{user_id}`` when ``user_id`` is set + 4. Legacy fallback → ``folder`` if provided, else ``attachments`` + + Access control for reads is enforced separately; this function only + chooses the storage prefix. + + Args: + folder: Requested folder hint (e.g. ``"knowledge_base"`` or a legacy path). + user_id: Uploader user ID; required for user-scoped attachment paths. + uploader_tenant_id: Uploader tenant ID; asset-owner tenants use a dedicated prefix. + + Returns: + Resolved MinIO folder prefix (no leading or trailing slash). + """ + if uploader_tenant_id == ASSET_OWNER_TENANT_ID: + return f"{ASSET_OWNER_ATTACHMENTS_PREFIX}/{user_id}" + + if folder == "knowledge_base": + return "knowledge_base" + + if folder == "skill-files": + if user_id: + return f"skill-files/{user_id}" + return "skill-files" + + if user_id: + return f"attachments/{user_id}" + + return folder or "attachments" + + +def check_file_access( + object_name: str, + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> bool: + """ + Check if user has permission to access the file. + + Access rules: + - knowledge_base/*: All authenticated users can access + - attachments/{user_id}/*: Only the owner (user_id) can access + - images_in_attachments/*: All authenticated users can access + + Args: + object_name: File object name in storage + user_id: Current user ID + + Returns: + True if access is allowed, False otherwise + """ + if not user_id: + return False + + if object_name.startswith(ASSET_OWNER_ATTACHMENTS_PREFIX): + return caller_tenant_id == ASSET_OWNER_TENANT_ID + + if object_name.startswith("knowledge_base/"): + # Knowledge base files: all authenticated users can access + return True + + if object_name.startswith("images_in_attachments/"): + # Extracted image files used by knowledge-base image chunks. + # Keep them readable for authenticated users to avoid broken image citations. + return True + + if object_name.startswith("skill-files/"): + # Generated documents are private to the uploader and must stay user-scoped. + return object_name.startswith(f"skill-files/{user_id}/") + + # Check if file is in user's attachments folder + # Pattern: attachments/{user_id}/* + if object_name.startswith(f"attachments/{user_id}/"): + return True + + # For backward compatibility, allow access to files in root attachments folder + # Pattern: attachments/{filename} (no user_id subfolder) + if object_name.startswith("attachments/") and "/" not in object_name.replace("attachments/", "", 1): + # Old format: attachments/filename (no subdirectory) + # Allow access for backward compatibility + return True + + return False + + +def check_file_access_batch( + object_names: List[str], + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> Dict[str, bool]: + """ + Batch check file access permissions. + + Args: + object_names: List of file object names + user_id: Current user ID + caller_tenant_id: Caller's tenant ID for ASSET_OWNER path checks + + Returns: + Dict mapping object_name to access permission (True/False) + """ + return { + obj_name: check_file_access(obj_name, user_id, caller_tenant_id) + for obj_name in object_names + } + + +def validate_s3_url_access( + object_name: str, + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> None: + """ + Validate if user has permission to access the S3 URL. + + Args: + object_name: File object name in storage (extracted from S3 URL) + user_id: Current user ID + + Raises: + PermissionError: If user doesn't have permission to access the file + """ + if not user_id: + raise PermissionError("User authentication required to access files") + + if not check_file_access(object_name, user_id, caller_tenant_id): + logger.warning( + f"[validate_s3_url_access] Access denied: object_name={object_name}, user_id={user_id}") + raise PermissionError( + f"Access denied: You don't have permission to access this file ({object_name})") + + +def validate_urls_access( + urls: List[str], + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> None: + """ + Validate if user has permission to access the given URLs. + + Supports S3 URLs (s3://bucket/key or /bucket/key format). + + Args: + urls: List of URLs to validate (S3, HTTP, or HTTPS) + user_id: Current user ID + + Raises: + PermissionError: If user doesn't have permission to access any of the files + """ + if not urls: + return + + for url in urls: + if not url: + continue + + # Only validate S3 URLs (MinIO storage) + # HTTP/HTTPS URLs are external resources and are not subject to MinIO access control + if url.startswith("s3://"): + try: + _, object_name = parse_s3_url(url) + validate_s3_url_access(object_name, user_id, caller_tenant_id) + except ValueError as e: + logger.warning( + f"[validate_urls_access] Failed to parse S3 URL: {url}, error: {e}") + raise PermissionError(f"Invalid S3 URL format: {url}") + elif url.startswith("/") and not url.startswith("//"): + # Handle /bucket/key format (absolute path style) + parts = url.strip("/").split("/", 1) + if len(parts) == 2: + bucket, object_name = parts + validate_s3_url_access(object_name, user_id, caller_tenant_id) + + +async def upload_files_impl( + destination: str, + file: List[UploadFile], + folder: str = None, + index_name: Optional[str] = None, + user_id: Optional[str] = None, + uploader_tenant_id: Optional[str] = None, +) -> tuple: """ Upload files to local storage or MinIO based on destination. @@ -58,6 +273,9 @@ async def upload_files_impl(destination: str, file: List[UploadFile], folder: st destination: "local" or "minio" file: List of UploadFile objects folder: Folder name for MinIO uploads + index_name: Knowledge base index for conflict resolution + user_id: User ID for attachment path isolation + uploader_tenant_id: Uploader tenant ID (ASSET_OWNER uses dedicated prefix) Returns: tuple: (errors, uploaded_file_paths, uploaded_filenames) @@ -84,7 +302,9 @@ async def upload_files_impl(destination: str, file: List[UploadFile], folder: st errors.append(f"Failed to save file: {f.filename}") elif destination == "minio": - minio_results = await upload_to_minio(files=file, folder=folder) + actual_folder = resolve_minio_upload_folder( + folder, user_id, uploader_tenant_id) + minio_results = await upload_to_minio(files=file, folder=actual_folder) for result in minio_results: if result.get("success"): uploaded_filenames.append(result.get("file_name")) @@ -137,8 +357,26 @@ def make_unique_names(original_names: List[str], taken_lower: set) -> List[str]: return errors, uploaded_file_paths, uploaded_filenames -async def upload_to_minio(files: List[UploadFile], folder: str) -> List[dict]: - """Helper function to upload files to MinIO and return results.""" +async def upload_to_minio( + files: List[UploadFile], + folder: str, + user_id: Optional[str] = None, + uploader_tenant_id: Optional[str] = None, +) -> List[dict]: + """ + Helper function to upload files to MinIO and return results. + + Args: + files: List of files to upload + folder: Storage folder path or resolved MinIO prefix + user_id: User ID for attachment path isolation when folder is generic + uploader_tenant_id: Uploader tenant ID for ASSET_OWNER attachment prefix + + Returns: + List of upload results + """ + actual_folder = resolve_minio_upload_folder( + folder, user_id, uploader_tenant_id) results = [] for f in files: try: @@ -148,13 +386,20 @@ async def upload_to_minio(files: List[UploadFile], folder: str) -> List[dict]: # Convert file content to BytesIO object file_obj = BytesIO(file_content) + # Store original filename before upload + original_filename = f.filename or "" + # Upload file result = upload_fileobj( file_obj=file_obj, - file_name=f.filename or "", - prefix=folder + file_name=original_filename, + prefix=actual_folder, + file_size=len(file_content) ) + # Preserve original filename in result (upload_fileobj uses it for object name generation) + result["original_file_name"] = original_filename + # Reset file pointer for potential re-reading await f.seek(0) results.append(result) @@ -166,6 +411,7 @@ async def upload_to_minio(files: List[UploadFile], folder: str) -> List[dict]: results.append({ "success": False, "file_name": f.filename, + "original_file_name": f.filename, "error": "An error occurred while processing the file." }) return results @@ -206,6 +452,8 @@ def get_llm_model(tenant_id: str): # Get the tenant config main_model_config = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) + timeout_seconds = main_model_config.get( + "timeout_seconds") if main_model_config else None long_text_to_text_model = OpenAILongContextModel( observer=MessageObserver(), model_id=get_model_name_from_config(main_model_config), @@ -213,6 +461,7 @@ def get_llm_model(tenant_id: str): api_key=main_model_config.get("api_key"), max_context_tokens=main_model_config.get("max_tokens"), ssl_verify=main_model_config.get("ssl_verify", True), + timeout_seconds=timeout_seconds, ) return long_text_to_text_model @@ -244,7 +493,8 @@ async def resolve_preview_file(object_name: str) -> Tuple[str, str, int]: # Office documents - convert to PDF with caching elif content_type in OFFICE_MIME_TYPES: - name_without_ext = object_name.rsplit('.', 1)[0] if '.' in object_name else object_name + name_without_ext = object_name.rsplit( + '.', 1)[0] if '.' in object_name else object_name hash_suffix = hashlib.md5(object_name.encode()).hexdigest()[:8] pdf_object_name = f"preview/converted/{name_without_ext}_{hash_suffix}.pdf" temp_pdf_object_name = f"preview/converting/{name_without_ext}_{hash_suffix}.pdf.tmp" @@ -258,7 +508,8 @@ async def resolve_preview_file(object_name: str) -> Tuple[str, str, int]: # Unsupported file type else: - raise UnsupportedFileTypeException(f"Unsupported file type for preview: {content_type}") + raise UnsupportedFileTypeException( + f"Unsupported file type for preview: {content_type}") def get_preview_stream(actual_object_name: str, start: Optional[int] = None, end: Optional[int] = None): @@ -282,7 +533,8 @@ def get_preview_stream(actual_object_name: str, start: Optional[int] = None, end stream = get_file_range(actual_object_name, start, end) if stream is None: - raise NotFoundException("File not found or failed to read from storage") + raise NotFoundException( + "File not found or failed to read from storage") return stream @@ -296,7 +548,8 @@ def _is_pdf_cache_valid(pdf_object_name: str) -> bool: # Verify the cached file is readable by fetching a small range stream = get_file_range(pdf_object_name, 0, 0) if stream is None: - logger.warning(f"Corrupted cache detected (cannot read), deleting: {pdf_object_name}") + logger.warning( + f"Corrupted cache detected (cannot read), deleting: {pdf_object_name}") delete_file(pdf_object_name) return False @@ -305,7 +558,8 @@ def _is_pdf_cache_valid(pdf_object_name: str) -> bool: try: close_fn() except Exception as e: - logger.warning(f"Failed to close cache probe stream for {pdf_object_name}: {str(e)}") + logger.warning( + f"Failed to close cache probe stream for {pdf_object_name}: {str(e)}") return True @@ -358,7 +612,8 @@ async def _convert_office_to_cached_pdf( ) # Atomic move from temp to final location, then clean up temp - copy_result = copy_file(source_object=temp_pdf_object_name, dest_object=pdf_object_name) + copy_result = copy_file( + source_object=temp_pdf_object_name, dest_object=pdf_object_name) if not copy_result.get('success'): logger.error( "Failed to finalize converted PDF cache: object=%s, temp=%s, dest=%s, error=%s", @@ -367,7 +622,8 @@ async def _convert_office_to_cached_pdf( pdf_object_name, copy_result.get('error', 'Unknown error'), ) - raise RuntimeError("Failed to finalize converted PDF cache") + raise RuntimeError( + "Failed to finalize converted PDF cache") delete_file(temp_pdf_object_name) except Exception as e: @@ -376,7 +632,8 @@ async def _convert_office_to_cached_pdf( logger.error(f"Office conversion failed: {str(e)}") if isinstance(e, OfficeConversionException): raise - raise OfficeConversionException("Office file conversion failed") from e + raise OfficeConversionException( + "Office file conversion failed") from e finally: # Clean up the file lock (prevents memory leak for many unique files) async with _conversion_locks_guard: diff --git a/backend/services/haotian_service.py b/backend/services/haotian_service.py new file mode 100644 index 000000000..4d86823b5 --- /dev/null +++ b/backend/services/haotian_service.py @@ -0,0 +1,114 @@ +""" +Haotian Service Layer + +Implements proxy fetching and normalization for Haotian external knowledge base APIs. +""" + +import logging +from typing import Any, Dict, List, Tuple + +import httpx + +logger = logging.getLogger("haotian_service") + +_DEFAULT_KNOWLEDGE_BASE_ID = "a8d68fbf-bd6e-5461-a9d1-cf1bb3522e38" + + +def _normalize_list_payload(raw: Dict[str, Any]) -> Dict[str, Any]: + """ + Normalize Haotian list payload to: + { + "knowledge_sets": [ + { + "name": str, + "knowledge_bases": [{"dify_dataset_id": str, "name": str}] + } + ] + } + + When dify_dataset_id is "null", it is replaced with the default ID. + """ + knowledge_sets = raw.get("knowledge_sets", []) + if not isinstance(knowledge_sets, list): + knowledge_sets = [] + + normalized_sets: List[Dict[str, Any]] = [] + for ks in knowledge_sets: + if not isinstance(ks, dict): + continue + set_name = str(ks.get("name", "") or "").strip() + if not set_name: + continue + + bases = ks.get("knowledge_bases", []) + if not isinstance(bases, list): + bases = [] + + normalized_bases: List[Dict[str, Any]] = [] + for kb in bases: + if not isinstance(kb, dict): + continue + dataset_id = str(kb.get("dify_dataset_id", "") or "").strip() + kb_name = str(kb.get("name", "") or "").strip() + if not kb_name: + continue + if dataset_id == "null" or not dataset_id: + dataset_id = _DEFAULT_KNOWLEDGE_BASE_ID + normalized_bases.append( + {"dify_dataset_id": dataset_id, "name": kb_name} + ) + + if normalized_bases: + normalized_sets.append( + {"name": set_name, "knowledge_bases": normalized_bases} + ) + + return {"knowledge_sets": normalized_sets} + + +async def fetch_haotian_knowledge_sets_impl( + list_url: str, + external_authorization: str, + timeout_s: float = 20.0, +) -> Dict[str, Any]: + """ + Fetch knowledge sets from the external Haotian list API. + """ + if not list_url or not isinstance(list_url, str): + raise ValueError("list_url is required and must be a non-empty string") + if not external_authorization or not isinstance(external_authorization, str): + raise ValueError( + "authorization is required and must be a non-empty string" + ) + + headers = {"Authorization": external_authorization} + async with httpx.AsyncClient(timeout=timeout_s, follow_redirects=True, trust_env=False) as client: + resp = await client.get(list_url, headers=headers) + if resp.status_code >= 400: + raise RuntimeError( + f"Haotian list API HTTP error: {resp.status_code}" + ) + data = resp.json() + if not isinstance(data, dict): + raise RuntimeError("Haotian list API returned non-object JSON") + return _normalize_list_payload(data) + + +async def test_haotian_connection_impl( + list_url: str, + external_authorization: str, + timeout_s: float = 10.0, +) -> Tuple[bool, str]: + """ + Test Haotian connection by calling list_url once. + """ + try: + await fetch_haotian_knowledge_sets_impl( + list_url=list_url, + external_authorization=external_authorization, + timeout_s=timeout_s, + ) + return (True, "") + except Exception as e: + return (False, str(e)) + diff --git a/backend/services/image_service.py b/backend/services/image_service.py index 8decbd541..fdef3b081 100644 --- a/backend/services/image_service.py +++ b/backend/services/image_service.py @@ -1,5 +1,9 @@ +import base64 +import ipaddress import logging +import socket from http import HTTPStatus +from urllib.parse import urlparse, urlunparse import aiohttp @@ -13,7 +17,119 @@ logger = logging.getLogger("image_service") +def _validate_loopback_url(decoded_url: str) -> str | None: + """Validate that ``decoded_url`` is a genuine loopback URL and return a + rewritten URL whose host is a literal IPv4 loopback address, or ``None`` + when the input is not safe to fetch directly. + + This is a defense-in-depth check for the fast-path that bypasses the + data-processing service. The fast-path is only intended for loopback + images (e.g. served by an in-process component), so we must verify: + + * The scheme is ``http`` or ``https``. + * The hostname resolves to one or more IPv4 addresses, and **every** + resolved address falls inside the standard IPv4 loopback range + ``127.0.0.0/8``. Mixed results are rejected to prevent an attacker + from racing DNS to a private address. + * The URL is rewritten so the host portion is a literal loopback IP. + This both (a) removes the user-controlled hostname from the final + request URL that ``aiohttp`` issues, and (b) blocks DNS rebinding + attacks where the hostname is re-resolved to a private address + between validation and the actual ``GET``. + """ + try: + parsed = urlparse(decoded_url) + except Exception: + return None + + if parsed.scheme not in {"http", "https"}: + return None + + hostname = parsed.hostname + if not hostname: + return None + + try: + resolved_infos = socket.getaddrinfo(hostname, None) + except socket.gaierror: + return None + + if not resolved_infos: + return None + + safe_addresses: list[str] = [] + for info in resolved_infos: + sockaddr = info[4] + candidate = sockaddr[0] + try: + ip = ipaddress.ip_address(candidate) + except ValueError: + return None + if ip.version != 4 or not ip.is_loopback: + return None + safe_addresses.append(candidate) + + # Prefer the literal 127.0.0.1 to keep the rewritten URL stable when + # the hostname resolves to multiple loopback aliases. + chosen_ip = ( + "127.0.0.1" if "127.0.0.1" in safe_addresses else safe_addresses[0] + ) + + port = parsed.port + netloc = f"{chosen_ip}:{port}" if port is not None else chosen_ip + + return urlunparse( + ( + parsed.scheme, + netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ) + ) + + +async def _fetch_image_directly(safe_url: str): + """Fetch an image from a previously validated loopback URL. + + ``safe_url`` MUST be the output of :func:`_validate_loopback_url` so that + it contains a literal loopback IPv4 address and is no longer + user-controlled. Redirects are disabled and ``trust_env`` is off to + ensure the request never leaks to a private/external host through + proxy variables or HTTP 30x responses. + """ + timeout = aiohttp.ClientTimeout(total=10) + async with aiohttp.ClientSession( + timeout=timeout, trust_env=False + ) as session: + async with session.get(safe_url, allow_redirects=False) as response: + if response.status != HTTPStatus.OK: + error_text = await response.text() + logger.error( + "Failed to fetch loopback image directly: %s", error_text + ) + return {"success": False, "error": "Failed to fetch image"} + + content = await response.read() + content_type = response.headers.get("Content-Type", "image/jpeg") + return { + "success": True, + "base64": base64.b64encode(content).decode("utf-8"), + "content_type": content_type, + } + + async def proxy_image_impl(decoded_url: str): + # Fast path: only for loopback URLs, fetch directly. This avoids an + # extra hop through the data-processing service for local images. For + # any other URL (including all external/knowledge-base images such as + # AIDP), fall back to the data-processing service proxy, which is the + # existing safe path that CodeQL does not flag. + safe_url = _validate_loopback_url(decoded_url) + if safe_url is not None: + return await _fetch_image_directly(safe_url) + # Create session to call the data processing service async with aiohttp.ClientSession() as session: # Call the data processing service to load the image @@ -31,7 +147,11 @@ async def proxy_image_impl(decoded_url: str): def get_vlm_model(tenant_id: str): - # Get the tenant config + """Return the configured image understanding model for AnalyzeImageTool. + + The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"] + for compatibility, but it is the user-facing image understanding configuration. + """ vlm_model_config = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id) if not vlm_model_config: @@ -48,3 +168,27 @@ def get_vlm_model(tenant_id: str): max_tokens=512, ssl_verify=vlm_model_config.get("ssl_verify", True), ) + + +def get_image_understanding_model(tenant_id: str): + return get_vlm_model(tenant_id=tenant_id) + + +def get_video_understanding_model(tenant_id: str): + """Return the configured video understanding model for multimodal tools.""" + vlm_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id) + if not vlm_model_config: + return None + return OpenAIVLModel( + observer=MessageObserver(), + model_id=get_model_name_from_config( + vlm_model_config) if vlm_model_config else "", + api_base=vlm_model_config.get("base_url", ""), + api_key=vlm_model_config.get("api_key", ""), + temperature=0.7, + top_p=0.7, + frequency_penalty=0.5, + max_tokens=512, + ssl_verify=vlm_model_config.get("ssl_verify", True), + ) diff --git a/backend/services/invitation_service.py b/backend/services/invitation_service.py index 58a45d369..4011c67cc 100644 --- a/backend/services/invitation_service.py +++ b/backend/services/invitation_service.py @@ -19,8 +19,15 @@ ) from database.user_tenant_db import get_user_tenant_by_user_id from database.group_db import query_group_ids_by_user +from database.role_permission_db import check_role_permission +from consts.const import ( + ASSET_OWNER_TENANT_ID, + ASSET_OWNER_INVITE_CODE_TYPE, + ENABLE_ASSET_OWNER_ROLE, +) from consts.exceptions import NotFoundException, UnauthorizedError, DuplicateError from services.group_service import get_tenant_default_group_id +from services.asset_owner_visibility import require_asset_owner_enabled from utils.str_utils import convert_string_to_list logger = logging.getLogger(__name__) @@ -41,7 +48,7 @@ def create_invitation_code( Args: tenant_id (str): Tenant ID - code_type (str): Invitation code type (ADMIN_INVITE, DEV_INVITE, USER_INVITE) + code_type (str): Invitation code type (ADMIN_INVITE, DEV_INVITE, USER_INVITE, ASSET_OWNER_INVITE) invitation_code (Optional[str]): Invitation code, auto-generated if None group_ids (Optional[List[int]]): Associated group IDs capacity (int): Invitation code capacity @@ -58,9 +65,21 @@ def create_invitation_code( ValueError: When code_type is invalid """ # Validate code_type - valid_code_types = ["ADMIN_INVITE", "DEV_INVITE", "USER_INVITE"] + valid_code_types = [ + "ADMIN_INVITE", + "DEV_INVITE", + "USER_INVITE", + ASSET_OWNER_INVITE_CODE_TYPE, + ] + if ENABLE_ASSET_OWNER_ROLE: + valid_code_types.append(ASSET_OWNER_INVITE_CODE_TYPE) if code_type not in valid_code_types: - raise ValueError(f"Invalid code_type: {code_type}. Must be one of {valid_code_types}") + raise ValueError( + f"Invalid code_type: {code_type}. Must be one of {valid_code_types}") + + if code_type == ASSET_OWNER_INVITE_CODE_TYPE and not ENABLE_ASSET_OWNER_ROLE: + raise UnauthorizedError( + "ASSET_OWNER feature is not enabled") # Get user information user_info = get_user_tenant_by_user_id(user_id) @@ -70,10 +89,16 @@ def create_invitation_code( user_role = user_info.get("user_role", "USER") # Check permission based on code_type - if code_type == "ADMIN_INVITE" and user_role not in ["SU"]: - raise UnauthorizedError(f"User role {user_role} not authorized to create ADMIN_INVITE codes") + if code_type in ["ADMIN_INVITE", ASSET_OWNER_INVITE_CODE_TYPE] and user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to create ADMIN_INVITE codes") elif code_type in ["DEV_INVITE", "USER_INVITE"] and user_role not in ["SU", "ADMIN"]: - raise UnauthorizedError(f"User role {user_role} not authorized to create {code_type} codes") + raise UnauthorizedError( + f"User role {user_role} not authorized to create {code_type} codes") + + if code_type == ASSET_OWNER_INVITE_CODE_TYPE: + tenant_id = ASSET_OWNER_TENANT_ID + group_ids = [] # Set default group_ids based on code_type if not provided if group_ids is None: @@ -95,7 +120,8 @@ def create_invitation_code( # Check if invitation code already exists if query_invitation_by_code(invitation_code): - raise DuplicateError(f"Invitation code '{invitation_code}' already exists") + raise DuplicateError( + f"Invitation code '{invitation_code}' already exists") # Create invitation (status will be set automatically) invitation_id = add_invitation( @@ -112,11 +138,13 @@ def create_invitation_code( # Automatically update status based on expiry date and capacity update_invitation_code_status(invitation_id) - logger.info(f"Created invitation code {invitation_code} (type: {code_type}) for tenant {tenant_id} by user {user_id}") + logger.info( + f"Created invitation code {invitation_code} (type: {code_type}) for tenant {tenant_id} by user {user_id}") # Get the final invitation info with correct status invitation_info = query_invitation_by_id(invitation_id) - normalized_info = _normalize_invitation_data(invitation_info) if invitation_info else None + normalized_info = _normalize_invitation_data( + invitation_info) if invitation_info else None return { "invitation_id": invitation_id, @@ -154,8 +182,18 @@ def update_invitation_code( raise UnauthorizedError(f"User {user_id} not found") user_role = user_info.get("user_role", "USER") - if user_role not in ["SU", "ADMIN"]: - raise UnauthorizedError(f"User role {user_role} not authorized to update invitation codes") + + invitation_info = query_invitation_by_id(invitation_id) + if not invitation_info: + raise NotFoundException(f"Invitation {invitation_id} not found") + + code_type = invitation_info.get("code_type") + if code_type == ASSET_OWNER_INVITE_CODE_TYPE and user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to update invitation codes") + elif user_role not in ["SU", "ADMIN"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to update invitation codes") # Update invitation code success = modify_invitation( @@ -165,7 +203,8 @@ def update_invitation_code( ) if success: - logger.info(f"Updated invitation code {invitation_id} by user {user_id}") + logger.info( + f"Updated invitation code {invitation_id} by user {user_id}") # Automatically update status after successful update update_invitation_code_status(invitation_id) @@ -193,15 +232,19 @@ def delete_invitation_code(invitation_id: int, user_id: str) -> bool: raise UnauthorizedError(f"User {user_id} not found") user_role = user_info.get("user_role", "USER") - if user_role not in ["SU", "ADMIN"]: - raise UnauthorizedError( - f"User role {user_role} not authorized to delete invitation codes") - # Check if invitation exists invitation_info = query_invitation_by_id(invitation_id) if not invitation_info: raise NotFoundException(f"Invitation {invitation_id} not found") + code_type = invitation_info.get("code_type") + if code_type == ASSET_OWNER_INVITE_CODE_TYPE and user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to delete invitation codes") + elif user_role not in ["SU", "ADMIN"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to delete invitation codes") + # Delete invitation code success = remove_invitation( invitation_id=invitation_id, updated_by=user_id) @@ -306,7 +349,8 @@ def _calculate_current_status(invitation_data: Dict[str, Any]) -> Dict[str, Any] if current_time.date() > expiry_datetime.date(): new_status = "EXPIRE" except (ValueError, AttributeError, TypeError): - logger.warning(f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") + logger.warning( + f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") # Check capacity if usage_count >= capacity: @@ -346,7 +390,7 @@ def use_invitation_code( ) -> Dict[str, Any]: """ Use an invitation code by creating a usage record. - + Args: invitation_code (str): Invitation code to use user_id (str): User ID using the code @@ -359,7 +403,8 @@ def use_invitation_code( """ # Check if invitation is available if not check_invitation_available(invitation_code): - raise NotFoundException(f"Invitation code {invitation_code} is not available") + raise NotFoundException( + f"Invitation code {invitation_code} is not available") # Get invitation code details invitation_info = query_invitation_by_code(invitation_code) @@ -426,7 +471,8 @@ def update_invitation_code_status(invitation_id: int) -> bool: if current_time.date() > expiry_datetime.date(): new_status = "EXPIRE" except (ValueError, AttributeError, TypeError): - logger.warning(f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") + logger.warning( + f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") # Check capacity if not expired if new_status == "IN_USE" and usage_count >= capacity: @@ -439,7 +485,8 @@ def update_invitation_code_status(invitation_id: int) -> bool: updates={"status": new_status}, updated_by="system" ) - logger.info(f"Updated invitation code {invitation_id} status to {new_status}") + logger.info( + f"Updated invitation code {invitation_id} status to {new_status}") return True return False @@ -468,7 +515,8 @@ def _generate_unique_invitation_code(length: int = 6) -> str: attempts += 1 - raise RuntimeError(f"Failed to generate unique invitation code after {max_attempts} attempts") + raise RuntimeError( + f"Failed to generate unique invitation code after {max_attempts} attempts") def get_invitations_list( @@ -506,9 +554,13 @@ def get_invitations_list( # Permission logic: # - If tenant_id is provided: ADMIN or SU can view that tenant's invitations # - If tenant_id is not provided: Only SU can view all invitations - if tenant_id: - # If tenant_id is specified, user must be ADMIN/SU - if user_role not in ["SU", "ADMIN"]: + if tenant_id is not None: + # ASSET_OWNER_TENANT_ID virtual tenant_id is used for asset-owner invites (SU only) + if tenant_id == ASSET_OWNER_TENANT_ID: + if user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to view asset owner invitations") + elif user_role not in ["SU", "ADMIN"]: raise UnauthorizedError( f"User role {user_role} not authorized to view invitation lists") else: @@ -531,6 +583,7 @@ def get_invitations_list( # Normalize each invitation item in the list if result and "items" in result: - result["items"] = [_normalize_invitation_data(item) for item in result["items"]] + result["items"] = [_normalize_invitation_data( + item) for item in result["items"]] return result diff --git a/backend/services/mcp_management_service.py b/backend/services/mcp_management_service.py new file mode 100644 index 000000000..a62de250a --- /dev/null +++ b/backend/services/mcp_management_service.py @@ -0,0 +1,334 @@ +import logging +from datetime import datetime +from typing import Any, Dict, List +from urllib.parse import urlencode + +import aiohttp + +from consts.exceptions import ( + MCPConnectionError, + McpNotFoundError, + McpValidationError, +) +from database.community_mcp_db import ( + create_mcp_community_record, + delete_mcp_community_record_by_id, + get_mcp_community_record_by_id_and_tenant, + get_mcp_community_records, + get_mcp_community_tag_stats, + list_mcp_community_records_by_tenant, + update_mcp_community_record_by_id, +) +from database.remote_mcp_db import get_mcp_record_by_id_and_tenant + +logger = logging.getLogger("mcp_management_service") + +MCP_REGISTRY_BASE_URL = "https://registry.modelcontextprotocol.io/v0.1/servers" + + +# --------------------------------------------------------------------------- +# Community MCP Service Functions +# --------------------------------------------------------------------------- + +async def list_community_mcp_services( + *, + search: str | None = None, + tag: str | None = None, + transport_type: str | None = None, + cursor: str | None = None, + limit: int = 30, +) -> Dict[str, Any]: + """List public community MCP services. + + Args: + search: Search keyword + tag: Filter by tag + transport_type: Filter by transport (url or container) + cursor: Pagination cursor + limit: Items per page + + Returns: + Dictionary with count, nextCursor, and items + """ + db_result = get_mcp_community_records( + search=search, + tag=tag, + transport_type=transport_type, + cursor=cursor, + limit=limit, + ) + + raw_items = db_result.get("items", []) + items = [] + for item in raw_items: + items.append({ + "communityId": item.get("community_id"), + "name": item.get("mcp_name"), + "version": item.get("version"), + "description": item.get("description"), + "status": "active", + "createdAt": item.get("create_time"), + "updatedAt": item.get("update_time"), + "source": "community", + "transportType": item.get("transport_type"), + "serverUrl": item.get("mcp_server"), + "configJson": item.get("config_json") if isinstance(item.get("config_json"), dict) else None, + "registryJson": item.get("registry_json") if isinstance(item.get("registry_json"), dict) else None, + "tags": item.get("tags") or [], + }) + return { + "count": len(items), + "nextCursor": db_result.get("nextCursor"), + "items": items, + } + + +def list_community_mcp_tag_stats() -> List[Dict[str, Any]]: + """Get community MCP tag statistics. + + Args: + tenant_id: Tenant ID + + Returns: + List of tag statistics + """ + return get_mcp_community_tag_stats() + + +async def publish_community_mcp_service( + *, + tenant_id: str, + user_id: str, + mcp_id: int, + name: str | None = None, + description: str | None = None, + version: str | None = None, + tags: List[str] | None = None, + mcp_server: str | None = None, + config_json: Dict[str, Any] | None = None, +) -> int: + """Publish a local MCP service to the community. + + Optional ``name`` / ``description`` / ``version`` / ``tags`` / ``mcp_server`` / + ``config_json`` override the values copied from the local MCP row when creating + the community record. Omit an optional field (``None``) to keep the local MCP + value for that field. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID to publish + name: Optional community display name override + description: Optional description override + version: Optional version override + tags: Optional tags override + mcp_server: Optional remote MCP URL override + config_json: Optional container config override + + Returns: + Community record ID + + Raises: + McpNotFoundError: If MCP record is not found + """ + source_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not source_record: + raise McpNotFoundError("MCP record not found") + + source_registry_json = source_record.get("registry_json") if isinstance(source_record.get("registry_json"), dict) else None + source_config_json = source_record.get("config_json") if isinstance(source_record.get("config_json"), dict) else None + + final_name = name if name is not None else source_record.get("mcp_name") + final_description = description if description is not None else source_record.get("description") + final_version = version if version is not None else source_record.get("version") + final_tags = tags if tags is not None else source_record.get("tags") + final_mcp_server = ( + mcp_server if mcp_server is not None else source_record.get("mcp_server") + ) + final_config_json = ( + config_json if isinstance(config_json, dict) else source_config_json + ) + + # Remote MCP table may omit transport_type; community list still needs it for filters. + community_transport_type = "container" if final_config_json is not None else "url" + + community_id = create_mcp_community_record( + mcp_data={ + "mcp_name": final_name, + "mcp_server": final_mcp_server, + "version": final_version, + "registry_json": source_registry_json, + "transport_type": source_record.get("transport_type") or community_transport_type, + "config_json": final_config_json, + "tags": final_tags, + "description": final_description, + }, + tenant_id=tenant_id, + user_id=user_id, + ) + return community_id + + +async def update_community_mcp_service( + *, + tenant_id: str, + user_id: str, + community_id: int, + name: str | None, + description: str | None, + tags: List[str] | None, + version: str | None, + registry_json: Dict[str, Any] | None, +) -> None: + """Update a community MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + community_id: Community record ID + name: New MCP service name + description: MCP service description + tags: MCP tags + version: MCP version + registry_json: Registry metadata JSON + + Raises: + McpNotFoundError: If community MCP record is not found + """ + current = get_mcp_community_record_by_id_and_tenant(community_id=community_id, tenant_id=tenant_id) + if not current: + raise McpNotFoundError("Community MCP record not found") + + existing_config_json = current.get("config_json") if isinstance(current.get("config_json"), dict) else None + next_registry_json = registry_json if isinstance(registry_json, dict) else current.get("registry_json") + next_config_json = existing_config_json if isinstance(existing_config_json, dict) else None + + update_mcp_community_record_by_id( + community_id=community_id, + tenant_id=tenant_id, + user_id=user_id, + name=name, + description=description, + tags=tags, + version=version, + registry_json=next_registry_json, + config_json=next_config_json, + ) + + +async def delete_community_mcp_service( + *, + tenant_id: str, + user_id: str, + community_id: int, +) -> None: + """Delete a community MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + community_id: Community record ID + + Raises: + McpNotFoundError: If community MCP record is not found + """ + current = get_mcp_community_record_by_id_and_tenant(community_id=community_id, tenant_id=tenant_id) + if not current: + raise McpNotFoundError("Community MCP record not found") + delete_mcp_community_record_by_id( + community_id=community_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + +async def list_my_community_mcp_services( + *, + tenant_id: str, +) -> Dict[str, Any]: + """List MCP services published by the current user to the community. + + Args: + tenant_id: Tenant ID + + Returns: + Dictionary with count and items + """ + rows = list_mcp_community_records_by_tenant(tenant_id=tenant_id) + items = [] + for row in rows: + items.append({ + "communityId": row.get("community_id"), + "name": row.get("mcp_name"), + "version": row.get("version"), + "description": row.get("description"), + "status": "active", + "createdAt": row.get("create_time"), + "updatedAt": row.get("update_time"), + "source": "community", + "transportType": row.get("transport_type"), + "serverUrl": row.get("mcp_server"), + "configJson": row.get("config_json") if isinstance(row.get("config_json"), dict) else None, + "registryJson": row.get("registry_json") if isinstance(row.get("registry_json"), dict) else None, + "tags": row.get("tags") or [], + }) + return { + "count": len(items), + "items": items, + } + + +# --------------------------------------------------------------------------- +# Registry Functions +# --------------------------------------------------------------------------- + +async def list_registry_mcp_services( + *, + search: str | None = None, + include_deleted: bool = False, + updated_since: str | None = None, + version: str | None = None, + cursor: str | None = None, + limit: int = 30, +) -> Dict[str, Any]: + """List MCP services from the official MCP Registry. + + Args: + search: Search keyword + include_deleted: Include deleted records + updated_since: Filter by update time + version: Filter by version + cursor: Pagination cursor + limit: Items per page + + Returns: + Dictionary with servers and metadata + """ + params: Dict[str, Any] = {"limit": limit} + if search: + params["search"] = search + if include_deleted: + params["include_deleted"] = "true" + if updated_since: + params["updated_since"] = updated_since + if version: + params["version"] = version + if cursor: + params["cursor"] = cursor + + request_url = f"{MCP_REGISTRY_BASE_URL}?{urlencode(params)}" + timeout = aiohttp.ClientTimeout(total=20) + + async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: + async with session.get(request_url) as response: + if response.status >= 400: + raise RuntimeError(f"Registry request failed with status {response.status}") + payload = await response.json(content_type=None) + + raw_servers = payload.get("servers") if isinstance(payload, dict) else [] + metadata = payload.get("metadata") if isinstance(payload, dict) and isinstance(payload.get("metadata"), dict) else {} + + return { + "servers": raw_servers if isinstance(raw_servers, list) else [], + "metadata": metadata, + } diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py index 9214a1ffa..2dc276aeb 100644 --- a/backend/services/model_health_service.py +++ b/backend/services/model_health_service.py @@ -1,8 +1,10 @@ import logging +from typing import Optional from nexent.core import MessageObserver from nexent.core.models import OpenAIModel, OpenAIVLModel -from nexent.core.models.embedding_model import JinaEmbedding, OpenAICompatibleEmbedding +from nexent.core.models.embedding_model import JinaEmbedding, OpenAICompatibleEmbedding, DashScopeMultimodalEmbedding +from nexent.monitor import set_monitoring_context, set_monitoring_operation from nexent.core.models.rerank_model import OpenAICompatibleRerank from services.voice_service import get_voice_service @@ -13,6 +15,39 @@ logger = logging.getLogger("model_health_service") +DASHSCOPE_MODEL_FACTORY = "dashscope" +TOKENPONY_MODEL_FACTORY = "tokenpony" +PROVIDER_CATALOG_HEALTHCHECK_FACTORIES = {DASHSCOPE_MODEL_FACTORY, TOKENPONY_MODEL_FACTORY} +PROVIDER_CATALOG_HEALTHCHECK_TYPES = {"vlm", "vlm2", "vlm3"} + +EMBEDDING_TYPES = {"embedding", "multi_embedding"} + + +def _normalize_embedding_url(base_url: str) -> str: + """Append /embeddings suffix to base_url if not already present. + + For embedding and multimodal embedding models, the base_url should contain /embeddings. + If the user provides a base URL without the endpoint (e.g., https://api.jina.ai/v1), + this function normalizes it to include /embeddings (e.g., https://api.jina.ai/v1/embeddings). + """ + if not base_url or "/embeddings" in base_url: + return base_url + return f"{base_url.rstrip('/')}/embeddings" + + +def _infer_model_factory(model_type: str, base_url: str, current_factory: Optional[str] = None) -> Optional[str]: + """Infer model_factory from base_url if not already set or is generic. + + Currently handles: + - multi_embedding with dashscope URL -> "dashscope" + - embedding with dashscope URL -> "dashscope" (uses OpenAI-compatible endpoint) + """ + base_url_lower = base_url.lower() + if "dashscope" in base_url_lower: + return DASHSCOPE_MODEL_FACTORY + + return current_factory + async def _embedding_dimension_check( model_name: str, @@ -20,44 +55,92 @@ async def _embedding_dimension_check( model_base_url: str, model_api_key: str, ssl_verify: bool = True, + model_factory: Optional[str] = None, + timeout_seconds: Optional[float] = None, ): - # Test connectivity based on different model types + if model_type in EMBEDDING_TYPES: + model_base_url = _normalize_embedding_url(model_base_url) + + effective_timeout = timeout_seconds if timeout_seconds else 5.0 + if model_type == "embedding": + # DashScope text embedding models use OpenAI-compatible endpoint, same as generic embedding = await OpenAICompatibleEmbedding( model_name=model_name, base_url=model_base_url, api_key=model_api_key, embedding_dim=0, ssl_verify=ssl_verify, - ).dimension_check() + ).dimension_check(timeout=effective_timeout) if len(embedding) > 0: return len(embedding[0]) logging.warning( f"Embedding dimension check for {model_name} gets empty response") return 0 elif model_type == "multi_embedding": - embedding = await JinaEmbedding( - model_name=model_name, - base_url=model_base_url, - api_key=model_api_key, - embedding_dim=0, - ssl_verify=ssl_verify, - ).dimension_check() - if len(embedding) > 0: + model_factory_lower = (model_factory or "").lower() + if model_factory_lower == "dashscope": + embedding_instance = DashScopeMultimodalEmbedding( + api_key=model_api_key, + base_url=model_base_url, + model_name=model_name, + embedding_dim=0, + ssl_verify=ssl_verify, + ) + else: + embedding_instance = JinaEmbedding( + api_key=model_api_key, + base_url=model_base_url, + model_name=model_name, + embedding_dim=0, + ssl_verify=ssl_verify, + ) + embedding = await embedding_instance.dimension_check(timeout=effective_timeout) + if isinstance(embedding, list) and len(embedding) > 0 and isinstance(embedding[0], list): return len(embedding[0]) logging.warning( - f"Embedding dimension check for {model_name} gets empty response") + f"Embedding dimension check for {model_name} gets unexpected response: {type(embedding)}, value: {embedding}") return 0 else: raise ValueError(f"Unsupported model type: {model_type}") +async def _provider_catalog_connectivity_check( + model_name: str, + model_type: str, + model_api_key: str, + model_factory: Optional[str], +) -> bool: + """Validate provider-managed multimodal models through their model catalog.""" + provider = (model_factory or "").lower() + if provider not in PROVIDER_CATALOG_HEALTHCHECK_FACTORIES: + return False + + from services.model_provider_service import get_provider_models + + model_list = await get_provider_models({ + "provider": provider, + "model_type": model_type, + "api_key": model_api_key, + }) + if not model_list or any(model.get("_error") for model in model_list): + return False + + expected_model_id = model_name.lower() + return any(str(model.get("id", "")).lower() == expected_model_id for model in model_list) + + async def _perform_connectivity_check( model_name: str, model_type: str, model_base_url: str, model_api_key: str, ssl_verify: bool = True, + model_factory: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + display_name: Optional[str] = None, + timeout_seconds: Optional[float] = None, ) -> bool: """ Perform specific model connectivity check @@ -67,6 +150,8 @@ async def _perform_connectivity_check( model_base_url: Model base URL model_api_key: API key ssl_verify: Whether to verify SSL certificates (default: True) + display_name: Optional display name for monitoring + timeout_seconds: Optional request timeout in seconds Returns: bool: Connectivity check result """ @@ -74,33 +159,53 @@ async def _perform_connectivity_check( model_base_url = model_base_url.replace( LOCALHOST_NAME, DOCKER_INTERNAL_HOST).replace(LOCALHOST_IP, DOCKER_INTERNAL_HOST) + # Normalize embedding URLs by appending /embeddings if not present + if model_type in EMBEDDING_TYPES: + model_base_url = _normalize_embedding_url(model_base_url) + + effective_timeout = timeout_seconds if timeout_seconds else 5.0 connectivity: bool - # Test connectivity based on different model types if model_type == "embedding": - connectivity = len(await OpenAICompatibleEmbedding( + emb = await OpenAICompatibleEmbedding( model_name=model_name, base_url=model_base_url, api_key=model_api_key, embedding_dim=0, - ssl_verify=ssl_verify - ).dimension_check()) > 0 + ssl_verify=ssl_verify, + ).dimension_check(timeout=effective_timeout) + connectivity = len(emb) > 0 and len(emb[0]) > 0 elif model_type == "multi_embedding": - connectivity = len(await JinaEmbedding( - model_name=model_name, - base_url=model_base_url, - api_key=model_api_key, - embedding_dim=0, - ssl_verify=ssl_verify - ).dimension_check()) > 0 + model_factory_lower = (model_factory or "").lower() + if model_factory_lower == "dashscope": + embedding = DashScopeMultimodalEmbedding( + api_key=model_api_key, + base_url=model_base_url, + model_name=model_name, + embedding_dim=0, + ssl_verify=ssl_verify, + ) + else: + embedding = JinaEmbedding( + api_key=model_api_key, + base_url=model_base_url, + model_name=model_name, + embedding_dim=0, + ssl_verify=ssl_verify, + ) + emb = await embedding.dimension_check(timeout=effective_timeout) + connectivity = len(emb) > 0 and len(emb[0]) > 0 elif model_type == "llm": observer = MessageObserver() + set_monitoring_operation("connectivity_check", + display_name=display_name) connectivity = await OpenAIModel( observer, model_id=model_name, api_base=model_base_url, api_key=model_api_key, - ssl_verify=ssl_verify + ssl_verify=ssl_verify, + timeout_seconds=timeout_seconds, ).check_connectivity() elif model_type == "rerank": rerank_model = OpenAICompatibleRerank( @@ -110,8 +215,22 @@ async def _perform_connectivity_check( ssl_verify=ssl_verify, ) connectivity = await rerank_model.connectivity_check() - elif model_type == "vlm": + elif model_type in ("vlm", "vlm2", "vlm3"): + if ( + model_type in PROVIDER_CATALOG_HEALTHCHECK_TYPES + and (model_factory or "").lower() in PROVIDER_CATALOG_HEALTHCHECK_FACTORIES + ): + connectivity = await _provider_catalog_connectivity_check( + model_name=model_name, + model_type=model_type, + model_api_key=model_api_key, + model_factory=model_factory, + ) + return connectivity + observer = MessageObserver() + set_monitoring_operation("connectivity_check", + display_name=display_name) connectivity = await OpenAIVLModel( observer, model_id=model_name, @@ -119,53 +238,121 @@ async def _perform_connectivity_check( api_key=model_api_key, ssl_verify=ssl_verify ).check_connectivity() - elif model_type in ["tts", "stt"]: + elif model_type == 'stt': + voice_service = get_voice_service() + + # Determine STT provider based on model_factory + use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"] + + if use_volc: + # Use Volcano STT with appid and access_token + connectivity = await voice_service.check_voice_connectivity( + model_type="stt", + stt_config={ + "model_factory": model_factory, + "model_appid": model_appid, + "access_token": access_token, + "base_url": model_base_url + } + ) + else: + # Use Ali STT (default) with api_key and model name + connectivity = await voice_service.check_voice_connectivity( + model_type="stt", + stt_config={ + "api_key": model_api_key, + "base_url": model_base_url, + "model": model_name + } + ) + elif model_type == 'tts': voice_service = get_voice_service() - connectivity = await voice_service.check_voice_connectivity(model_type) + + # Determine TTS provider based on model_factory + use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"] + + if use_volc: + # Use Volcano TTS with appid and access_token + connectivity = await voice_service.check_voice_connectivity( + model_type="tts", + stt_config={ + "model_factory": model_factory, + "model_appid": model_appid, + "access_token": access_token, + "base_url": model_base_url + } + ) + else: + # Use Ali TTS (default) with api_key and model name + connectivity = await voice_service.check_voice_connectivity( + model_type="tts", + stt_config={ + "api_key": model_api_key, + "base_url": model_base_url, + "model": model_name + } + ) else: raise ValueError(f"Unsupported model type: {model_type}") return connectivity -async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: +async def check_model_connectivity(display_name: str, tenant_id: str, model_type: str = None) -> dict: try: # Query the database using display_name and tenant context from app layer - model = get_model_by_display_name(display_name, tenant_id=tenant_id) + model = get_model_by_display_name(display_name, tenant_id=tenant_id, model_type=model_type) if not model: - raise LookupError(f"Model configuration not found for {display_name}") + raise LookupError( + f"Model configuration not found for {display_name}") - # Still use repo/name concatenation for model instantiation repo, name = model.get("model_repo", ""), model.get("model_name", "") model_name = f"{repo}/{name}" if repo else name - # Set model to "detecting" status - update_data = { - "connect_status": ModelConnectStatusEnum.DETECTING.value} + update_data = {"connect_status": ModelConnectStatusEnum.DETECTING.value} update_model_record(model["model_id"], update_data) model_type = model["model_type"] model_base_url = model["base_url"] model_api_key = model["api_key"] - ssl_verify = model.get("ssl_verify", True) # Default to True if not present + # Default to True if not present + ssl_verify = model.get("ssl_verify", True) + model_factory = model.get("model_factory") + model_appid = model.get("model_appid") + access_token = model.get("access_token") + timeout_seconds = model.get("timeout_seconds") try: - # Use the common connectivity check function + set_monitoring_context(tenant_id=tenant_id) + + ssl_verify_fallback = False connectivity = await _perform_connectivity_check( - model_name, model_type, model_base_url, model_api_key, ssl_verify + model_name, model_type, model_base_url, model_api_key, ssl_verify, + model_factory, model_appid, access_token, display_name, timeout_seconds, ) + if not connectivity and ssl_verify: + ssl_verify_fallback = True + connectivity = await _perform_connectivity_check( + model_name, model_type, model_base_url, model_api_key, False, + model_factory, model_appid, access_token, display_name, timeout_seconds, + ) except Exception as e: - update_data = {"connect_status": ModelConnectStatusEnum.UNAVAILABLE.value} + update_data = { + "connect_status": ModelConnectStatusEnum.UNAVAILABLE.value} logger.error(f"Error checking model connectivity: {str(e)}") update_model_record(model["model_id"], update_data) raise e if connectivity: - logger.info(f"CONNECTED: {model_name}; Base URL: {model.get('base_url')}; API Key: {model.get('api_key')}") + logger.info( + f"CONNECTED: {model_name}") else: - logger.warning(f"UNCONNECTED: {model_name}; Base URL: {model.get('base_url')}; API Key: {model.get('api_key')}") + logger.warning( + f"UNCONNECTED: {model_name}") connect_status = ModelConnectStatusEnum.AVAILABLE.value if connectivity else ModelConnectStatusEnum.UNAVAILABLE.value update_data = {"connect_status": connect_status} + if ssl_verify_fallback: + update_data["ssl_verify"] = False update_model_record(model["model_id"], update_data) return { "connectivity": connectivity, @@ -174,9 +361,9 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: except Exception as e: logger.error(f"Error checking model connectivity: {str(e)}") if 'model' in locals() and model: - update_data = {"connect_status": ModelConnectStatusEnum.UNAVAILABLE.value} + update_data = { + "connect_status": ModelConnectStatusEnum.UNAVAILABLE.value} update_model_record(model["model_id"], update_data) - # Propagate for app layer to translate into HTTP raise e @@ -184,33 +371,40 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: async def verify_model_config_connectivity(model_config: dict): """ - Verify the connectivity of the model configuration, do not save to the database - Args: - model_config: Model configuration dictionary, containing necessary connection parameters - Returns: - dict: Contains the result of the connectivity test and error message if failed + Verify the connectivity of the model configuration, do not save to the database. """ try: model_name = model_config.get("model_name", "") model_type = model_config["model_type"] - model_base_url = model_config["base_url"] + model_base_url = model_config.get("base_url", "") model_api_key = model_config["api_key"] - ssl_verify = model_config.get("ssl_verify", True) # Default to True if not present + # Default to True if not present + ssl_verify = model_config.get("ssl_verify", True) + model_factory = model_config.get("model_factory") + model_appid = model_config.get("model_appid") + access_token = model_config.get("access_token") + # Get timeout from model config if present + timeout_seconds = model_config.get("timeout_seconds") + + # Infer model_factory from base_url when not provided + model_factory = _infer_model_factory(model_type, model_base_url, model_config.get("model_factory")) try: - # Use the common connectivity check function connectivity = await _perform_connectivity_check( - model_name, model_type, model_base_url, model_api_key, ssl_verify + model_name, model_type, model_base_url, model_api_key, ssl_verify, + model_factory, model_appid, access_token, None, timeout_seconds, ) if not connectivity and ssl_verify: connectivity = await _perform_connectivity_check( - model_name, model_type, model_base_url, model_api_key, False + model_name, model_type, model_base_url, model_api_key, False, + model_factory, model_appid, access_token, None, timeout_seconds, ) if not connectivity: + error_msg = f"Failed to connect to model '{model_name}' at {model_base_url}. Please verify the URL, API key, and network connection." return { "connectivity": False, "model_name": model_name, - "error": f"Failed to connect to model '{model_name}' at {model_base_url}. Please verify the URL, API key, and network connection." + "error": f"Failed to connect to model '{model_name}'. Please verify the URL, API key, and network connection." } return { @@ -219,7 +413,8 @@ async def verify_model_config_connectivity(model_config: dict): } except ValueError as e: error_msg = str(e) - logger.warning(f"UNCONNECTED: {model_name}; Base URL: {model_base_url}; API Key: {model_api_key}; Error: {error_msg}") + logger.warning( + f"UNCONNECTED: {model_name}; Error: {error_msg}") return { "connectivity": False, "model_name": model_name, @@ -244,13 +439,26 @@ async def embedding_dimension_check(model_config: dict): try: ssl_verify = model_config.get("ssl_verify", True) + model_factory = _infer_model_factory(model_type, model_base_url, model_config.get("model_factory")) + timeout_seconds = model_config.get("timeout_seconds") dimension = await _embedding_dimension_check( - model_name, model_type, model_base_url, model_api_key, ssl_verify + model_name, model_type, model_base_url, model_api_key, ssl_verify, + model_factory=model_factory, timeout_seconds=timeout_seconds ) + # Fallback to ssl_verify=False if initial check fails + if dimension == 0 and ssl_verify: + dimension = await _embedding_dimension_check( + model_name, model_type, model_base_url, model_api_key, False, + model_factory=model_factory, timeout_seconds=timeout_seconds + ) + if dimension == 0: + logger.error(f"Embedding dimension check returned 0 for model: {model_name}") + return None return dimension except ValueError as e: - logger.error(f"Error checking embedding dimension: {str(e)}") - return 0 + logger.error(f"Error checking embedding dimension for {model_name}: {str(e)}") + return None except Exception as e: - logger.error(f"Error checking embedding dimension: {model_name}; Base URL: {model_base_url}; Error: {str(e)}") - return 0 + logger.error( + f"Error checking embedding dimension for {model_name}: {str(e)}") + return None diff --git a/backend/services/model_management_service.py b/backend/services/model_management_service.py index d012803be..1511a9301 100644 --- a/backend/services/model_management_service.py +++ b/backend/services/model_management_service.py @@ -3,23 +3,29 @@ from consts.const import LOCALHOST_IP, LOCALHOST_NAME, DOCKER_INTERNAL_HOST from consts.model import ModelConnectStatusEnum -from consts.provider import ProviderEnum, SILICON_BASE_URL, DASHSCOPE_BASE_URL, TOKENPONY_BASE_URL +from consts.provider import ( + ProviderEnum, + SILICON_BASE_URL, + DASHSCOPE_BASE_URL, + DASHSCOPE_REALTIME_BASE_URL, + TOKENPONY_BASE_URL, +) from database.model_management_db import ( create_model_record, delete_model_record, - get_model_by_display_name, + get_model_by_name_factory, get_models_by_display_name, get_model_records, get_models_by_tenant_factory_type, - update_model_record, + update_model_record ) from services.model_provider_service import ( prepare_model_dict, - merge_existing_model_tokens, + merge_existing_model_attributes, get_provider_models, ) -from services.model_health_service import embedding_dimension_check +from services.model_health_service import embedding_dimension_check, _infer_model_factory from utils.model_name_utils import ( add_repo_to_name, split_repo_name, @@ -31,6 +37,23 @@ logger = logging.getLogger("model_management_service") +INDEPENDENT_MULTIMODAL_MODEL_TYPES = {"vlm", "vlm2", "vlm3"} + + +def _has_display_name_conflict(existing_models: List[Dict[str, Any]], model_type: Optional[str]) -> bool: + """Allow the three multimodal slots to share display names across slots.""" + if not existing_models: + return False + + if model_type in INDEPENDENT_MULTIMODAL_MODEL_TYPES: + return any( + existing.get("model_type") == model_type + or existing.get("model_type") not in INDEPENDENT_MULTIMODAL_MODEL_TYPES + for existing in existing_models + ) + + return True + async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict[str, Any]): """Create a single model record for the given tenant. @@ -45,9 +68,19 @@ async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict model_base_url.replace(LOCALHOST_NAME, DOCKER_INTERNAL_HOST) .replace(LOCALHOST_IP, DOCKER_INTERNAL_HOST) ) - model_data['ssl_verify'] = True + # Auto-set ssl_verify based on api_key: + # - Empty api_key (local/LAN services) -> ssl_verify=False + # - "open/router" URL -> ssl_verify=False + # - Otherwise -> ssl_verify=True + model_api_key = model_data.get("api_key", "") + if not model_api_key or "open/router" in model_base_url: + model_data["ssl_verify"] = False + else: + model_data["ssl_verify"] = True + + # Set model_factory to modelengine when using open/router URL if "open/router" in model_base_url: - model_data['ssl_verify'] = False + model_data["model_factory"] = "modelengine" # Split model_name into repo and name model_repo, model_name = split_repo_name( model_data["model_name"]) if model_data.get("model_name") else ("", "") @@ -66,17 +99,31 @@ async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict # Check display name conflict scoped by tenant if model_data.get("display_name"): - existing_model_by_display = get_model_by_display_name( + existing_models_by_display = get_models_by_display_name( model_data["display_name"], tenant_id) - if existing_model_by_display: + if _has_display_name_conflict(existing_models_by_display, model_data.get("model_type")): logging.error( f"Name {model_data['display_name']} is already in use, please choose another display name") raise ValueError( f"Name {model_data['display_name']} is already in use, please choose another display name") - # If embedding or multi_embedding, set max_tokens via embedding dimension check + # If embedding or multi_embedding, ensure base_url ends with /embeddings if model_data.get("model_type") in ("embedding", "multi_embedding"): - model_data["max_tokens"] = await embedding_dimension_check(model_data) + base_url = model_data.get("base_url", "") + if base_url and "/embeddings" not in base_url: + model_data["base_url"] = f"{base_url.rstrip('/')}/embeddings" + # Infer model_factory from base_url if not set + model_data["model_factory"] = _infer_model_factory( + model_data["model_type"], model_data["base_url"], model_data.get("model_factory") + ) + # Get embedding dimension + dimension = await embedding_dimension_check(model_data) + if dimension is None: + raise ValueError( + f"Failed to get embedding dimension for model '{model_data.get('display_name', model_data.get('model_name'))}'. " + "Please verify the URL, API key, and network connection." + ) + model_data["max_tokens"] = dimension # Set default chunk_batch if not provided if model_data.get("chunk_batch") is None: model_data["chunk_batch"] = 10 @@ -114,8 +161,8 @@ async def create_provider_models_for_tenant(tenant_id: str, provider_request: Di # Get provider model list model_list = await get_provider_models(provider_request) - # Merge existing model's max_tokens attribute - model_list = merge_existing_model_tokens( + # Merge existing model's attributes (max_tokens, api_key, timeout_seconds, concurrency_limit) + model_list = merge_existing_model_attributes( model_list, tenant_id, provider_request["provider"], provider_request["model_type"]) # Sort model list by ID @@ -143,7 +190,7 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay # ModelEngine models carry their own base_url in each model dict model_url = "" elif provider == ProviderEnum.DASHSCOPE.value: - model_url = DASHSCOPE_BASE_URL + model_url = DASHSCOPE_REALTIME_BASE_URL if model_type in ("stt", "tts") else DASHSCOPE_BASE_URL elif provider == ProviderEnum.TOKENPONY.value: model_url = TOKENPONY_BASE_URL else: @@ -153,6 +200,13 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay tenant_id, provider, model_type) model_list_ids = {model.get("id") for model in model_list} if model_list else set() + existing_model_map = { + add_repo_to_name( + model_repo=model["model_repo"], + model_name=model["model_name"], + ): model + for model in existing_model_list + } # Delete existing models not present for model in existing_model_list: @@ -162,22 +216,23 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay # Create or update new models for model in model_list: + model["model_type"] = model_type _, model_name = split_repo_name( model["id"]) if model.get("id") else ("", "") model_repo, model_name_only = split_repo_name( model.get("id", "")) if model.get("id") else ("", "") model_display_name = add_repo_to_name(model_repo, model_name_only) if model_name: - existing_model_by_display = get_model_by_display_name( - model_display_name, tenant_id) - if existing_model_by_display: + existing_model = existing_model_map.get(model_display_name) + if existing_model: + update_data = {} # Check if max_tokens has changed - existing_max_tokens = existing_model_by_display.get( - "max_tokens") + existing_max_tokens = existing_model.get("max_tokens") new_max_tokens = model.get("max_tokens") if new_max_tokens is not None and existing_max_tokens != new_max_tokens: - update_model_record(existing_model_by_display["model_id"], { - "max_tokens": new_max_tokens}, user_id) + update_data["max_tokens"] = new_max_tokens + if update_data: + update_model_record(existing_model["model_id"], update_data, user_id) continue model_dict = await prepare_model_dict( @@ -251,6 +306,15 @@ async def update_single_model_for_tenant( m.get("model_type") == "multi_embedding" for m in existing_models ) + # Auto-set ssl_verify based on api_key if provided: + # - Empty api_key -> ssl_verify=False + # - Otherwise -> ssl_verify=True + if "api_key" in model_data: + if not model_data["api_key"]: + model_data["ssl_verify"] = False + else: + model_data["ssl_verify"] = True + if has_multi_embedding: # Update both embedding and multi_embedding records for model in existing_models: @@ -276,12 +340,36 @@ async def update_single_model_for_tenant( async def batch_update_models_for_tenant(user_id: str, tenant_id: str, model_list: List[Dict[str, Any]]): - """Batch update models for a tenant.""" + """Batch update models for a tenant by model_id or model_name.""" try: for model in model_list: - update_model_record(model["model_id"], model, user_id, tenant_id) + # Build update data excluding id fields + update_data = {k: v for k, v in model.items() if k not in ["model_id", "model_name"]} + + model_id_or_name = model.get("model_id") or model.get("model_name") + + # Check if model_id is a numeric string (primary key) + if model_id_or_name and model_id_or_name.isdigit(): + update_model_record(int(model_id_or_name), update_data, user_id, tenant_id) + else: + # Parse "model_repo/model_name" format from frontend's model_id field + if "/" in model_id_or_name: + model_repo, model_name = model_id_or_name.split("/", 1) + else: + model_repo = None + model_name = model_id_or_name + + logging.info(f"[DEBUG] Updating model by name: model_name={model_name}, model_repo={model_repo}, tenant_id={tenant_id}") + + # Query to get model_id first, then update by primary key + model_record = get_model_by_name_factory(model_name, model_repo, tenant_id) + if not model_record: + logging.warning(f"Model not found: model_name={model_name}, model_repo={model_repo}, tenant_id={tenant_id}") + continue + + update_model_record(model_record["model_id"], update_data, user_id, tenant_id) - logging.debug("Batch update models successfully") + logging.info("[DEBUG] Batch update models successfully") except Exception as e: logging.error(f"Failed to batch update models: {str(e)}") raise Exception(f"Failed to batch update models: {str(e)}") @@ -484,6 +572,3 @@ async def list_models_for_admin( logging.error(f"Failed to retrieve admin model list: {str(e)}") raise Exception(f"Failed to retrieve admin model list: {str(e)}") - - - diff --git a/backend/services/model_provider_service.py b/backend/services/model_provider_service.py index dbff17082..1aa89fa3b 100644 --- a/backend/services/model_provider_service.py +++ b/backend/services/model_provider_service.py @@ -6,7 +6,7 @@ DEFAULT_MAXIMUM_CHUNK_SIZE, ) from consts.model import ModelConnectStatusEnum, ModelRequest -from consts.provider import ProviderEnum +from consts.provider import ProviderEnum, DASHSCOPE_REALTIME_BASE_URL from database.model_management_db import get_models_by_tenant_factory_type from services.model_health_service import embedding_dimension_check from services.providers.base import AbstractModelProvider @@ -100,11 +100,13 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a # Build the canonical representation using the existing Pydantic schema for # consistency of validation and default handling. # For embedding/multi_embedding models, max_tokens will be set via connectivity check later, - # so use 0 as placeholder if not provided + # so use 0 as placeholder if not provided. + # Set default timeout_seconds to 120 for LLM models (embedding models don't need it). model_type = model["model_type"] is_embedding_type = model_type in ["embedding", "multi_embedding"] max_tokens_value = model.get( "max_tokens", 0) if not is_embedding_type else 0 + timeout_seconds_value = 120 if not is_embedding_type else None model_obj = ModelRequest( model_factory=provider, @@ -115,7 +117,8 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a display_name=model_display_name, expected_chunk_size=expected_chunk_size, maximum_chunk_size=maximum_chunk_size, - chunk_batch=chunk_batch + chunk_batch=chunk_batch, + timeout_seconds=timeout_seconds_value ) model_dict = model_obj.model_dump() @@ -124,14 +127,18 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a # Determine the correct base_url and, for embeddings, update the actual # dimension by performing a real connectivity check. if model["model_type"] in ["embedding", "multi_embedding"]: - if provider != ProviderEnum.MODELENGINE.value: - # Ensure proper slash between base URL and endpoint + if provider == ProviderEnum.DASHSCOPE.value and model["model_type"] == "embedding": model_dict["base_url"] = f"{model_url.rstrip('/')}/embeddings" - else: - # For ModelEngine embedding models, append the embeddings path + elif provider == ProviderEnum.MODELENGINE.value: model_dict["base_url"] = f"{model_url.rstrip('/')}/{MODEL_ENGINE_NORTH_PREFIX}/embeddings" - # The embedding dimension might differ from the provided max_tokens. + elif "/embeddings" in model_url: + # URL already contains /embeddings endpoint, use as-is + model_dict["base_url"] = model_url.rstrip('/') + else: + model_dict["base_url"] = f"{model_url.rstrip('/')}/embeddings" model_dict["max_tokens"] = await embedding_dimension_check(model_dict) + elif model["model_type"] in ("stt", "tts") and provider == ProviderEnum.DASHSCOPE.value: + model_dict["base_url"] = DASHSCOPE_REALTIME_BASE_URL elif model["model_type"] == "rerank": if provider == ProviderEnum.DASHSCOPE.value: model_dict["base_url"] = f"{model_url.replace('compatible-mode/v1','api/v1').rstrip('/')}/services/rerank/text-rerank/text-rerank" @@ -155,19 +162,29 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a return model_dict -def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider: str, model_type: str) -> List[dict]: +def merge_existing_model_attributes( + model_list: List[dict], + tenant_id: str, + provider: str, + model_type: str, + fields: List[str] = None +) -> List[dict]: """ - Merge existing model's max_tokens attribute into the model list. + Merge existing model's attributes into the model list. Args: model_list: List of models tenant_id: Tenant ID provider: Provider model_type: Model type + fields: List of fields to merge (defaults to max_tokens, api_key, timeout_seconds, concurrency_limit) Returns: List[dict]: Merged model list """ + if fields is None: + fields = ["max_tokens", "api_key", "timeout_seconds", "concurrency_limit"] + if model_type == "embedding" or model_type == "multi_embedding": return model_list @@ -184,15 +201,35 @@ def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider "/" + existing_model["model_name"] existing_model_map[model_full_name] = existing_model - # Iterate through the model list, if the model exists in the existing model list, add max_tokens attribute + # Iterate through the model list, merge specified fields from existing models for model in model_list: if model.get("id") in existing_model_map: - model["max_tokens"] = existing_model_map[model.get( - "id")].get("max_tokens") + existing_model = existing_model_map[model.get("id")] + for field in fields: + if existing_model.get(field) is not None: + model[field] = existing_model.get(field) return model_list +def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider: str, model_type: str) -> List[dict]: + """ + Merge existing model's max_tokens attribute into the model list. + + DEPRECATED: Use merge_existing_model_attributes instead. + + Args: + model_list: List of models + tenant_id: Tenant ID + provider: Provider + model_type: Model type + + Returns: + List[dict]: Merged model list + """ + return merge_existing_model_attributes(model_list, tenant_id, provider, model_type, ["max_tokens"]) + + # Re-export provider classes for backward compatibility __all__ = [ "AbstractModelProvider", @@ -200,6 +237,7 @@ def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider "ModelEngineProvider", "prepare_model_dict", "merge_existing_model_tokens", + "merge_existing_model_attributes", "get_provider_models", "get_model_engine_raw_url", ] diff --git a/backend/services/northbound_service.py b/backend/services/northbound_service.py index a6eaed77d..c5493a551 100644 --- a/backend/services/northbound_service.py +++ b/backend/services/northbound_service.py @@ -1,31 +1,40 @@ import asyncio import hashlib +import json import logging import time from dataclasses import dataclass -from typing import Any, Dict, Optional +from os.path import basename +from typing import Any, Dict, List, Optional +from fastapi import HTTPException, UploadFile from fastapi.responses import StreamingResponse + +from consts.const import ASSET_OWNER_TENANT_ID from consts.exceptions import ( LimitExceededError, UnauthorizedError, + ConversationNotFoundError, ) -from consts.model import AgentRequest -from database.conversation_db import get_conversation_messages +from consts.model import AgentRequest, ToolParamsRequest +from database.conversation_db import get_conversation_messages, get_source_searches_by_message from database.token_db import log_token_usage, get_latest_usage_metadata from services.agent_service import ( run_agent_stream, stop_agent_tasks, - list_all_agent_info_impl, get_agent_id_by_name ) +from services.agent_version_service import list_published_agents_impl from services.conversation_management_service import ( save_conversation_user, get_conversation_list_service, create_new_conversation, update_conversation_title as update_conversation_title_service, ) +from services.file_management_service import upload_to_minio, resolve_minio_upload_folder, validate_urls_access +from database.attachment_db import get_file_url, get_file_size_from_minio +from nexent.multi_modal.utils import parse_s3_url logger = logging.getLogger("northbound_service") @@ -39,6 +48,188 @@ class NorthboundContext: token_id: int = 0 +def _build_northbound_file_descriptor( + upload_result: Dict[str, Any], + original_file_name: str = "", + file_type: Optional[str] = None, + file_size: Optional[int] = None, +) -> Dict[str, Any]: + """Normalize upload metadata for northbound API consumers.""" + object_name = str(upload_result.get("object_name") or "").strip() + # Use original filename if provided, otherwise fall back to upload result or object name + if original_file_name: + file_name = original_file_name + else: + file_name = str(upload_result.get("file_name") or basename(object_name) or "") + # Frontend-compatible field order + descriptor = { + "object_name": object_name, + "name": file_name, + "type": file_type or "file", + # Use provided file_size, or from upload_result, or 0 as fallback + "size": file_size if file_size is not None else upload_result.get("file_size", 0), + # Use relative URL format matching frontend: /nexent/{object_name} + "url": f"/nexent/{object_name}", + "description": "", + } + presigned_url = upload_result.get("presigned_url") + if presigned_url: + descriptor["presigned_url"] = presigned_url + return descriptor + + +async def upload_files_for_northbound( + ctx: NorthboundContext, + files: List[UploadFile], + folder: str = "attachments", +) -> Dict[str, Any]: + """Upload files for northbound callers and return reusable storage references.""" + if not files: + raise ValueError("No files in the request") + + actual_folder = resolve_minio_upload_folder(folder, ctx.user_id, ctx.tenant_id) + results = await upload_to_minio(files=files, folder=actual_folder) + normalized_files = [] + for result, upload_file in zip(results, files): + if result.get("success") and result.get("object_name"): + content_type = result.get("content_type", "") + file_type = "image" if content_type.startswith("image/") else "file" + # Extract original filename - use upload result first, then fallback to UploadFile + # The upload result contains the original filename passed to upload_fileobj + original_file_name = result.get("original_file_name") or upload_file.filename or "" + file_size = result.get("file_size", 0) + # If file_size is 0 but we have the UploadFile, try to get size from headers + if file_size == 0 and hasattr(upload_file, 'size') and upload_file.size: + file_size = upload_file.size + descriptor = _build_northbound_file_descriptor( + result, + original_file_name=original_file_name, + file_type=file_type, + file_size=file_size, + ) + normalized_files.append(descriptor) + + if not normalized_files: + raise ValueError("No valid files uploaded") + + success_count = sum(1 for result in results if result.get("success", False)) + failed_count = sum(1 for result in results if not result.get("success", False)) + + return { + "message": f"Processed {len(results)} files", + "requestId": ctx.request_id, + "summary": { + "total": len(results), + "uploaded": success_count, + "failed": failed_count, + }, + "files": normalized_files, + } + + +def _normalize_northbound_attachments( + attachments: Optional[List[Any]], + user_id: str, + tenant_id: str, +) -> Optional[List[Dict[str, Any]]]: + """Convert northbound attachment references into internal minio_files objects. + + Supports two formats: + 1. List of S3 URL strings (backward compatible): ["s3://nexent/...", "/nexent/...", "attachments/..."] + 2. List of attachment objects (full metadata): [{"object_name": "...", "name": "...", ...}] + """ + from database.attachment_db import _build_mcp_presigned_url + + if attachments is None: + return None + if not isinstance(attachments, list): + raise ValueError("attachments must be an array") + + normalized_files: List[Dict[str, Any]] = [] + for attachment in attachments: + # Handle dict format (full attachment object) + if isinstance(attachment, dict): + # Use the attachment dict directly, just ensure required fields + normalized_file = { + "object_name": attachment.get("object_name", ""), + "name": attachment.get("name", basename(attachment.get("object_name", ""))), + "type": attachment.get("type", "file"), + "size": attachment.get("size", 0), + "url": attachment.get("url", ""), + "description": attachment.get("description", ""), + } + # Add presigned_url if available, or generate one if we have object_name + if "presigned_url" in attachment: + normalized_file["presigned_url"] = attachment["presigned_url"] + elif normalized_file.get("object_name"): + try: + presigned_result = get_file_url(object_name=normalized_file["object_name"], expires=86400) + if presigned_result.get("success") and presigned_result.get("url"): + normalized_file["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"]) + except Exception: + pass + normalized_files.append(normalized_file) + continue + + # Handle string format (S3 URL) + if not isinstance(attachment, str) or not attachment.strip(): + raise ValueError("attachments must contain non-empty S3 URLs or object paths") + + attachment_url = attachment.strip() + + # Support multiple URL formats: + # 1. s3://nexent/attachments/xxx.md + # 2. /nexent/attachments/xxx.md + # 3. attachments/xxx.md (relative path) + if attachment_url.startswith("s3://"): + try: + _, object_name = parse_s3_url(attachment_url) + except ValueError as exc: + raise ValueError(f"Invalid S3 URL format: {attachment_url}") from exc + validate_url = attachment_url + elif attachment_url.startswith("/nexent/"): + object_name = attachment_url[len("/nexent/"):] + validate_url = f"s3://nexent/{object_name}" + elif attachment_url.startswith("attachments/") or attachment_url.startswith("nexent/"): + object_name = attachment_url if attachment_url.startswith("nexent/") else attachment_url + validate_url = f"s3://nexent/{object_name}" + else: + raise ValueError(f"Invalid attachment format: {attachment_url}. Expected s3:// URL, /nexent/ path, or attachments/ path") + + try: + validate_urls_access([validate_url], user_id, tenant_id) + presigned_result = get_file_url(object_name=object_name, expires=86400) + except PermissionError as exc: + detail = str(exc) + if "Invalid S3 URL format" in detail: + raise ValueError(detail) from exc + raise PermissionError(detail) from exc + + # Get file size from MinIO + try: + file_size = get_file_size_from_minio(object_name) + except Exception: + file_size = 0 + + # Build frontend-compatible minio_files format + file_name = basename(object_name.rstrip("/")) + normalized_file = { + "object_name": object_name, + "name": file_name, + "type": "file", + "size": file_size, + # Use relative URL format matching frontend: /nexent/{object_name} + "url": f"/nexent/{object_name}", + "description": "", + } + # Use MCP proxy URL for presigned_url (same as frontend format) + if presigned_result.get("success") and presigned_result.get("url"): + normalized_file["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"]) + normalized_files.append(normalized_file) + + return normalized_files + + # ----------------------------- # In-memory idempotency and rate limit placeholders # ----------------------------- @@ -111,6 +302,12 @@ def _build_idempotency_key(*parts: Any) -> str: return ":".join(processed) +def _build_title_update_idempotency_key(tenant_id: str, conversation_id: int, title: str) -> str: + """Build an ASCII-safe idempotency key for title updates.""" + title_hash = hashlib.sha256(title.encode("utf-8")).hexdigest() + return _build_idempotency_key(tenant_id, str(conversation_id), title_hash) + + # ----------------------------- # Agent resolver # ----------------------------- @@ -126,7 +323,9 @@ async def start_streaming_chat( conversation_id: Optional[int], agent_name: str, query: str, + attachments: Optional[List[Any]] = None, meta_data: Optional[Dict[str, Any]] = None, + tool_params: Optional[ToolParamsRequest] = None, idempotency_key: Optional[str] = None ) -> StreamingResponse: try: @@ -145,6 +344,11 @@ async def start_streaming_chat( # Get history according to internal_conversation_id history_resp = await get_conversation_history_internal(ctx, internal_conversation_id) agent_id = await get_agent_id_by_name(agent_name=agent_name, tenant_id=ctx.tenant_id) + normalized_attachments = _normalize_northbound_attachments( + attachments=attachments, + user_id=ctx.user_id, + tenant_id=ctx.tenant_id, + ) # Idempotency: only prevent concurrent duplicate starts composed_key = idempotency_key or _build_idempotency_key(ctx.tenant_id, str(conversation_id), agent_id, query) await idempotency_start(composed_key) @@ -153,8 +357,9 @@ async def start_streaming_chat( agent_id=agent_id, query=query, history=(history_resp.get("data", {})).get("history", []), - minio_files=None, + minio_files=normalized_attachments, is_debug=False, + tool_params=tool_params, ) # Synchronously persist the user message before starting the stream to avoid race conditions @@ -257,15 +462,58 @@ async def list_conversations(ctx: NorthboundContext) -> Dict[str, Any]: return {"message": "success", "data": conversations, "requestId": ctx.request_id} +def _format_search_record(record: Dict[str, Any]) -> Dict[str, Any]: + """Format a search source record for API response.""" + search_item = { + "title": record.get("source_title", ""), + "text": record.get("source_content", ""), + "source_type": record.get("source_type", ""), + "url": record.get("source_location", ""), + "filename": record.get("source_title", "") if record.get("source_type") == "file" else None, + "published_date": None, + "score": float(record["score_overall"]) if record.get("score_overall") is not None else None, + "tool_sign": record.get("tool_sign", ""), + "cite_index": record.get("cite_index") + } + + if record.get("published_date"): + if hasattr(record["published_date"], "strftime"): + search_item["published_date"] = record["published_date"].strftime("%Y-%m-%d") + else: + search_item["published_date"] = str(record["published_date"])[:10] + + return search_item + + async def get_conversation_history_internal(ctx: NorthboundContext, conversation_id: int) -> Dict[str, Any]: """Internal helper to get conversation history without logging.""" history = get_conversation_messages(conversation_id) - # Remove unnecessary fields result = [] for message in history: + # Parse minio_files from database (stored as JSON string) + minio_files = [] + raw_minio_files = message.get("minio_files") + if raw_minio_files: + try: + minio_files = json.loads(raw_minio_files) if isinstance(raw_minio_files, str) else raw_minio_files + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse minio_files for message {message.get('message_id')}") + + # Fetch search results for this message + message_id = message.get("message_id") + search_results = [] + if message_id: + try: + search_records = get_source_searches_by_message(message_id, user_id=ctx.user_id) + search_results = [_format_search_record(r) for r in search_records] + except Exception as e: + logger.warning(f"Failed to get search records for message {message_id}: {str(e)}") + result.append({ "role": message["message_role"], - "content": message["message_content"] + "content": message["message_content"], + "minio_files": minio_files, + "search": search_results }) response = { @@ -284,7 +532,18 @@ async def get_conversation_history(ctx: NorthboundContext, conversation_id: int) async def get_agent_info_list(ctx: NorthboundContext) -> Dict[str, Any]: try: - agent_info_list = await list_all_agent_info_impl(tenant_id=ctx.tenant_id, user_id=ctx.user_id) + agent_info_list = await list_published_agents_impl( + tenant_id=ctx.tenant_id, + user_id=ctx.user_id, + ) + # Match the same scope as /agent/published_list: non-asset-owner tenants + # also get the asset owner's published agents merged in. + if ctx.tenant_id != ASSET_OWNER_TENANT_ID: + asset_agent_list = await list_published_agents_impl( + tenant_id=ASSET_OWNER_TENANT_ID, + user_id=ctx.user_id, + ) + agent_info_list.extend(asset_agent_list) # Remove internal information that partner don't need for agent_info in agent_info_list: agent_info.pop("agent_id", None) @@ -298,7 +557,11 @@ async def update_conversation_title(ctx: NorthboundContext, conversation_id: int composed_key: Optional[str] = None try: # Idempotency: avoid concurrent duplicate title update for same conversation - composed_key = idempotency_key or _build_idempotency_key(ctx.tenant_id, str(conversation_id), title) + composed_key = idempotency_key or _build_title_update_idempotency_key( + ctx.tenant_id, + conversation_id, + title, + ) await idempotency_start(composed_key) update_conversation_title_service(conversation_id, title, ctx.user_id) @@ -324,6 +587,8 @@ async def update_conversation_title(ctx: NorthboundContext, conversation_id: int } except LimitExceededError as _: raise LimitExceededError("Duplicate request is still running, please wait.") + except ConversationNotFoundError: + raise except Exception as e: raise Exception(f"Failed to update conversation title for conversation_id {conversation_id}: {str(e)}") finally: diff --git a/backend/services/oauth_service.py b/backend/services/oauth_service.py new file mode 100644 index 000000000..fe2aa0c42 --- /dev/null +++ b/backend/services/oauth_service.py @@ -0,0 +1,577 @@ +import json +import logging +import os +import secrets +import ssl +import time +import urllib.request +from typing import Any, Dict, List, Optional +from urllib.parse import urlencode, quote + +import jwt +from pydantic import EmailStr, TypeAdapter, ValidationError as PydanticValidationError + +from consts.const import ( + ASSET_OWNER_INVITE_CODE_TYPE, + ASSET_OWNER_ROLE, + ASSET_OWNER_TENANT_ID, + DEFAULT_TENANT_ID, + OAUTH_CALLBACK_BASE_URL, + OAUTH_SSL_VERIFY, + OAUTH_CA_BUNDLE, + SUPABASE_JWT_SECRET, +) +from consts.exceptions import OAuthLinkError, OAuthProviderError +from services.asset_owner_visibility import require_asset_owner_enabled +from consts.oauth_providers import ( + get_all_provider_definitions, + get_provider_definition, + is_provider_enabled, +) +from database.oauth_account_db import ( + delete_oauth_account, + get_oauth_account_by_provider, + get_soft_deleted_oauth_account, + insert_oauth_account, + list_oauth_accounts_by_user_id, + reactivate_oauth_account, + update_oauth_account_tokens, +) +from database.user_tenant_db import get_user_tenant_by_user_id, insert_user_tenant + +logger = logging.getLogger(__name__) + +OAUTH_PENDING_EXPIRE_SECONDS = 10 * 60 +OAUTH_PENDING_PURPOSE = "oauth_account_completion" +_EMAIL_ADAPTER = TypeAdapter(EmailStr) + + +def _build_ssl_context() -> ssl.SSLContext: + if OAUTH_CA_BUNDLE and os.path.isfile(OAUTH_CA_BUNDLE): + return ssl.create_default_context(cafile=OAUTH_CA_BUNDLE) + if not OAUTH_SSL_VERIFY: + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + return ctx + return ssl.create_default_context() + + +_SSL_CTX = _build_ssl_context() + + +def parse_state(state: str) -> Dict[str, str]: + parts = state.split(":", 2) + if len(parts) >= 2: + return { + "provider": parts[0], + "token": parts[1], + "link_user_id": parts[2] if len(parts) > 2 else "", + } + return {"provider": state, "token": "", "link_user_id": ""} + + +def _resolve_field(data: dict, field_path: str) -> Any: + if "." not in field_path: + return data.get(field_path) + parts = field_path.split(".") + current = data + for part in parts: + if isinstance(current, dict): + current = current.get(part) + else: + return None + return current + + +def get_supported_providers() -> set: + return set(get_all_provider_definitions().keys()) + + +def get_enabled_providers() -> List[Dict[str, str]]: + providers = [] + for name, definition in get_all_provider_definitions().items(): + if is_provider_enabled(definition): + providers.append( + { + "name": definition.name, + "display_name": definition.display_name, + "icon": definition.icon, + "enabled": True, + } + ) + return providers + + +def get_authorize_url(provider: str, link_user_id: str = "") -> str: + try: + definition = get_provider_definition(provider) + except KeyError: + raise OAuthProviderError(f"Unsupported OAuth provider: {provider}") + + if not is_provider_enabled(definition): + raise OAuthProviderError(f"OAuth provider '{provider}' is not configured") + + callback_url = ( + f"{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider={provider}" + ) + random_token = secrets.token_urlsafe(32) + if link_user_id: + state = f"{provider}:{random_token}:{link_user_id}" + else: + state = f"{provider}:{random_token}" + + client_id = os.getenv(definition.client_id_env, "") + redirect_uri = ( + quote(callback_url, safe="") if definition.encode_redirect_uri else callback_url + ) + + params = dict(definition.authorize_params) + param_map = definition.authorize_param_map + params[param_map.get("client_id", "client_id")] = client_id + params[param_map.get("redirect_uri", "redirect_uri")] = redirect_uri + params[param_map.get("state", "state")] = state + + url = f"{definition.authorize_url}?{urlencode(params)}" + if definition.authorize_fragment: + url += definition.authorize_fragment + return url + + +def _http_post_json(url: str, data: dict, headers: Optional[dict] = None) -> dict: + req_data = json.dumps(data).encode("utf-8") + req_headers = {"Content-Type": "application/json", "Accept": "application/json"} + if headers: + req_headers.update(headers) + req = urllib.request.Request(url, data=req_data, headers=req_headers, method="POST") + with urllib.request.urlopen(req, timeout=15, context=_SSL_CTX) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def _http_get_json(url: str, headers: Optional[dict] = None) -> dict: + req = urllib.request.Request(url, headers=headers or {}) + with urllib.request.urlopen(req, timeout=15, context=_SSL_CTX) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def exchange_code_for_provider_token(provider: str, code: str) -> Dict[str, Any]: + try: + definition = get_provider_definition(provider) + except KeyError: + raise OAuthProviderError(f"Unsupported provider: {provider}") + + client_id = os.getenv(definition.client_id_env, "") + client_secret = os.getenv(definition.client_secret_env, "") + callback_url = ( + f"{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider={provider}" + ) + redirect_uri = ( + quote(callback_url, safe="") if definition.encode_redirect_uri else callback_url + ) + + param_map = definition.token_params_map + + result: Dict[str, Any] = {"access_token": ""} + + if definition.token_method.upper() == "POST": + body = dict(definition.token_extra_params) + body[param_map.get("client_id", "client_id")] = client_id + body[param_map.get("client_secret", "client_secret")] = client_secret + body[param_map.get("code", "code")] = code + body.setdefault(param_map.get("grant_type", "grant_type"), "authorization_code") + if param_map.get("redirect_uri", "") == "redirect_uri": + body["redirect_uri"] = redirect_uri + + resp = _http_post_json(definition.token_url, data=body) + else: + params = dict(definition.token_extra_params) + params[param_map.get("client_id", "client_id")] = client_id + params[param_map.get("client_secret", "client_secret")] = client_secret + params[param_map.get("code", "code")] = code + params[param_map.get("grant_type", "grant_type")] = "authorization_code" + if param_map.get("redirect_uri", "") == "redirect_uri": + params["redirect_uri"] = redirect_uri + + resp = _http_get_json(f"{definition.token_url}?{urlencode(params)}") + + if definition.token_error_key and definition.token_error_key in resp: + err_msg = resp.get( + definition.token_error_message_key, str(resp[definition.token_error_key]) + ) + raise OAuthProviderError(f"{provider} token exchange failed: {err_msg}") + + result["access_token"] = resp["access_token"] + if definition.token_response_id_key: + result["openid"] = resp.get(definition.token_response_id_key, "") + + return result + + +def get_provider_user_info( + provider: str, access_token: str, **kwargs: Any +) -> Dict[str, Any]: + try: + definition = get_provider_definition(provider) + except KeyError: + raise OAuthProviderError(f"Unsupported provider: {provider}") + + headers: Dict[str, str] = {"Accept": "application/json"} + if definition.userinfo_auth_scheme and access_token: + headers["Authorization"] = f"{definition.userinfo_auth_scheme} {access_token}" + + url_params = {} + for key, value in definition.userinfo_params.items(): + resolved = value.format( + openid=kwargs.get("openid", ""), access_token=access_token + ) + url_params[key] = resolved + + query = urlencode(url_params) if url_params else "" + separator = ( + "&" if "?" in definition.userinfo_url and query else ("?" if query else "") + ) + url = f"{definition.userinfo_url}{separator}{query}" + + user_resp = _http_get_json(url, headers=headers) + + field_map = definition.userinfo_field_map + result = {} + for our_key, provider_key in field_map.items(): + if provider_key: + result[our_key] = _resolve_field(user_resp, provider_key) or "" + else: + result[our_key] = "" + result["id"] = str(result.get("id", "")) + + if definition.userinfo_needs_email_fetch and not result.get("email"): + try: + emails_resp = _http_get_json( + definition.userinfo_email_url, + headers={"Authorization": f"Bearer {access_token}"}, + ) + if isinstance(emails_resp, list) and emails_resp: + primary = next( + (e for e in emails_resp if e.get("primary")), + emails_resp[0], + ) + result["email"] = primary.get("email", "") + except Exception: + logger.warning(f"Failed to fetch {provider} user emails") + + return result + + +def generate_pending_oauth_token( + provider: str, + provider_user_id: str, + provider_email: Optional[str] = None, + provider_username: Optional[str] = None, + expires_in: int = OAUTH_PENDING_EXPIRE_SECONDS, +) -> str: + if not SUPABASE_JWT_SECRET: + raise OAuthProviderError("JWT verification is not configured") + + now = int(time.time()) + payload = { + "purpose": OAUTH_PENDING_PURPOSE, + "provider": provider, + "provider_user_id": provider_user_id, + "provider_email": provider_email or "", + "provider_username": provider_username or "", + "iat": now, + "exp": now + expires_in, + } + return jwt.encode(payload, SUPABASE_JWT_SECRET, algorithm="HS256") + + +def parse_pending_oauth_token(pending_token: str) -> Dict[str, str]: + if not pending_token: + raise OAuthLinkError("OAuth account completion session is missing") + if not SUPABASE_JWT_SECRET: + raise OAuthProviderError("JWT verification is not configured") + + try: + payload = jwt.decode( + pending_token, + SUPABASE_JWT_SECRET, + algorithms=["HS256"], + options={"verify_exp": True, "verify_aud": False}, + ) + except jwt.ExpiredSignatureError as exc: + raise OAuthLinkError("OAuth account completion session has expired") from exc + except jwt.InvalidTokenError as exc: + raise OAuthLinkError("OAuth account completion session is invalid") from exc + + if payload.get("purpose") != OAUTH_PENDING_PURPOSE: + raise OAuthLinkError("OAuth account completion session is invalid") + if not payload.get("provider") or not payload.get("provider_user_id"): + raise OAuthLinkError("OAuth account completion session is incomplete") + + return { + "provider": str(payload.get("provider", "")), + "provider_user_id": str(payload.get("provider_user_id", "")), + "provider_email": str(payload.get("provider_email", "")), + "provider_username": str(payload.get("provider_username", "")), + } + + +def get_pending_oauth_info(pending_token: str) -> Dict[str, Any]: + payload = parse_pending_oauth_token(pending_token) + provider_email = payload.get("provider_email") or "" + return { + "provider": payload["provider"], + "provider_username": payload.get("provider_username") or "", + "provider_email": provider_email, + "email_required": not bool(provider_email), + } + + +def _validate_email(email: Optional[str]) -> str: + if not email: + raise OAuthLinkError("Email is required") + try: + return str(_EMAIL_ADAPTER.validate_python(email)).lower() + except PydanticValidationError as exc: + raise OAuthLinkError("Invalid email address") from exc + + +def find_supabase_user_id_by_email( + admin_client: Any, email: Optional[str] +) -> Optional[str]: + if not email: + return None + + page = 1 + while True: + users_resp = admin_client.auth.admin.list_users(page=page, per_page=100) + users = getattr(users_resp, "users", users_resp) + if users is None: + users = [] + if not users: + return None + for user in users: + user_email = getattr(user, "email", "") + if user_email and user_email.lower() == email.lower(): + return user.id + if len(users) < 100: + return None + page += 1 + + +def _role_from_invitation_type(code_type: str) -> str: + if code_type == "ADMIN_INVITE": + return "ADMIN" + if code_type == "DEV_INVITE": + return "DEV" + if code_type == ASSET_OWNER_INVITE_CODE_TYPE: + require_asset_owner_enabled() + return ASSET_OWNER_ROLE + return "USER" + + +async def complete_pending_oauth_account( + pending_token: str, + password: str, + invite_code: str, + email: Optional[str] = None, +) -> Dict[str, Any]: + from services.group_service import add_user_to_groups + from services.invitation_service import ( + check_invitation_available, + get_invitation_by_code, + use_invitation_code, + ) + from services.tool_configuration_service import init_tool_list_for_tenant + from services.user_management_service import generate_tts_stt_4_admin + from utils.auth_utils import calculate_expires_at, generate_session_jwt + + pending = parse_pending_oauth_token(pending_token) + provider = pending["provider"] + provider_user_id = pending["provider_user_id"] + provider_email = pending.get("provider_email") or "" + provider_username = pending.get("provider_username") or "" + + if len(password or "") < 6: + raise OAuthLinkError("Password must be at least 6 characters") + + final_email = _validate_email(provider_email or email) + normalized_invite_code = invite_code.upper() + + if get_oauth_account_by_provider(provider, provider_user_id): + raise OAuthLinkError(f"This {provider} account is already bound to another user") + + if not check_invitation_available(normalized_invite_code): + raise OAuthLinkError("Invitation code is invalid or unavailable") + + invitation_info = get_invitation_by_code(normalized_invite_code) + if not invitation_info: + raise OAuthLinkError("Invitation code is invalid or unavailable") + + admin_client = None + try: + from utils.auth_utils import get_supabase_admin_client + + admin_client = get_supabase_admin_client() + except Exception: + admin_client = None + if not admin_client: + raise RuntimeError("Supabase admin client not available") + + existing_user_id = find_supabase_user_id_by_email(admin_client, final_email) + if existing_user_id: + raise OAuthLinkError( + "Email already exists. Please log in with email and password, " + "then link this OAuth account in settings." + ) + + create_resp = admin_client.auth.admin.create_user( + { + "email": final_email, + "password": password, + "email_confirm": True, + "user_metadata": { + "full_name": provider_username, + "provider": provider, + }, + } + ) + supabase_user_id = create_resp.user.id + + tenant_id = invitation_info["tenant_id"] + if invitation_info.get("code_type") == ASSET_OWNER_INVITE_CODE_TYPE: + tenant_id = ASSET_OWNER_TENANT_ID + user_role = _role_from_invitation_type(invitation_info.get("code_type", "USER_INVITE")) + is_asset_owner_registration = user_role == ASSET_OWNER_ROLE + + insert_user_tenant( + user_id=supabase_user_id, + tenant_id=tenant_id, + user_role=user_role, + user_email=final_email, + ) + + invitation_result = use_invitation_code(normalized_invite_code, supabase_user_id) + group_ids = invitation_result.get("group_ids", []) + if isinstance(group_ids, str): + from utils.str_utils import convert_string_to_list + + group_ids = convert_string_to_list(group_ids) + if group_ids and not is_asset_owner_registration: + add_user_to_groups(supabase_user_id, group_ids, supabase_user_id) + + if user_role == "ADMIN": + await generate_tts_stt_4_admin(tenant_id, supabase_user_id) + if not is_asset_owner_registration: + await init_tool_list_for_tenant(tenant_id, supabase_user_id) + + create_or_update_oauth_account( + user_id=supabase_user_id, + provider=provider, + provider_user_id=provider_user_id, + email=final_email, + username=provider_username, + tenant_id=tenant_id, + ) + + expiry_seconds = 3600 + jwt_token = generate_session_jwt(supabase_user_id, expires_in=expiry_seconds) + expires_at = calculate_expires_at(jwt_token) + + return { + "user": { + "id": str(supabase_user_id), + "email": final_email, + "role": user_role, + }, + "session": { + "access_token": jwt_token, + "refresh_token": "", + "expires_at": expires_at, + "expires_in_seconds": expiry_seconds, + }, + } + + +def create_or_update_oauth_account( + user_id: str, + provider: str, + provider_user_id: str, + email: Optional[str] = None, + username: Optional[str] = None, + tenant_id: Optional[str] = None, +) -> Dict[str, Any]: + existing = get_oauth_account_by_provider(provider, provider_user_id) + + if existing: + if existing.get("user_id") != user_id: + raise OAuthLinkError( + f"This {provider} account is already bound to another user" + ) + else: + update_oauth_account_tokens( + provider=provider, + provider_user_id=provider_user_id, + provider_username=username, + ) + updated = get_oauth_account_by_provider(provider, provider_user_id) + return updated if updated else existing + + soft_deleted = get_soft_deleted_oauth_account(provider, provider_user_id) + if soft_deleted: + reactivate_oauth_account( + provider=provider, + provider_user_id=provider_user_id, + user_id=user_id, + provider_email=email, + provider_username=username, + tenant_id=tenant_id or DEFAULT_TENANT_ID, + ) + reactivated = get_oauth_account_by_provider(provider, provider_user_id) + return reactivated if reactivated else {"provider": provider, "provider_user_id": provider_user_id, "user_id": user_id} + + return insert_oauth_account( + user_id=user_id, + provider=provider, + provider_user_id=provider_user_id, + provider_email=email, + provider_username=username, + tenant_id=tenant_id or DEFAULT_TENANT_ID, + ) + + +def ensure_user_tenant_exists(user_id: str, email: str) -> Dict[str, Any]: + existing = get_user_tenant_by_user_id(user_id) + if existing: + return existing + + insert_user_tenant( + user_id=user_id, + tenant_id=DEFAULT_TENANT_ID, + user_role="USER", + user_email=email, + ) + logger.info(f"Created user_tenant for new OAuth user {user_id}") + result = get_user_tenant_by_user_id(user_id) + return result if result else {"user_id": user_id, "tenant_id": DEFAULT_TENANT_ID} + + +def list_linked_accounts(user_id: str) -> List[Dict[str, Any]]: + accounts = list_oauth_accounts_by_user_id(user_id) + result = [] + for acct in accounts: + result.append( + { + "provider": acct["provider"], + "provider_username": acct.get("provider_username"), + "provider_email": acct.get("provider_email"), + "linked_at": str(acct.get("create_time", "")), + } + ) + return result + + +def unlink_account(user_id: str, provider: str) -> bool: + success = delete_oauth_account(user_id, provider) + if not success: + raise OAuthLinkError(f"No linked {provider} account found") + return True diff --git a/backend/services/prompt_service.py b/backend/services/prompt_service.py index 3706c3cc5..f1564cdbc 100644 --- a/backend/services/prompt_service.py +++ b/backend/services/prompt_service.py @@ -1,18 +1,22 @@ import json import logging import queue +import sys import threading from typing import Optional, List from jinja2 import StrictUndefined, Template -from consts.const import LANGUAGE +from consts.const import LANGUAGE, ENABLE_JIUWEN_SDK from consts.error_code import ErrorCode from consts.error_message import ErrorMessage from consts.exceptions import AppException +from consts.model import AgentInfoRequest from database.agent_db import search_agent_info_by_agent_id, query_all_agent_info_by_tenant_id, \ query_sub_agents_id_list -from database.tool_db import query_tools_by_ids +from database.model_management_db import get_model_by_model_id +from database.knowledge_db import get_knowledge_name_map_by_index_names +from database.tool_db import query_tools_by_ids, query_tool_instances_by_id from services.agent_service import ( get_enable_tool_id_by_agent_id, _check_agent_name_duplicate, @@ -20,16 +24,49 @@ _regenerate_agent_name_with_llm, _regenerate_agent_display_name_with_llm, _generate_unique_agent_name_with_suffix, - _generate_unique_display_name_with_suffix + _generate_unique_display_name_with_suffix, + update_agent, ) +from services.prompt_template_service import resolve_prompt_generate_template from utils.llm_utils import call_llm_for_system_prompt -from utils.prompt_template_utils import get_prompt_generate_prompt_template +from utils.prompt_template_utils import ( + get_prompt_optimize_prompt_template, + get_prompt_template, +) + +from dataclasses import dataclass, field +from typing import Optional as Opt + +from adapters.exception import JiuwenSDKError, NexentCapabilityError + + +def _get_jiuwen_adapter_class(): + """Import Jiuwen adapter only when optimization paths need it.""" + try: + from adapters import JiuwenSDKAdapter + except ModuleNotFoundError: + return None + return JiuwenSDKAdapter + # Configure logging logger = logging.getLogger("prompt_service") +PROMPT_SECTION_TYPE_TITLES = { + LANGUAGE["ZH"]: { + "duty": "智能体角色", + "constraint": "使用要求", + "few_shots": "示例", + }, + LANGUAGE["EN"]: { + "duty": "Agent Role", + "constraint": "Usage Requirements", + "few_shots": "Few Shots", + }, +} + -def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: str, user_id: str, tenant_id: str, language: str, tool_ids: Optional[List[int]] = None, sub_agent_ids: Optional[List[int]] = None): +def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: str, user_id: str, tenant_id: str, language: str, prompt_template_id: Optional[int] = None, tool_ids: Optional[List[int]] = None, sub_agent_ids: Optional[List[int]] = None, knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True): try: for system_prompt in generate_and_save_system_prompt_impl( agent_id=agent_id, @@ -38,8 +75,11 @@ def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: user_id=user_id, tenant_id=tenant_id, language=language, + prompt_template_id=prompt_template_id, tool_ids=tool_ids, - sub_agent_ids=sub_agent_ids + sub_agent_ids=sub_agent_ids, + knowledge_base_display_names=knowledge_base_display_names, + has_selected_resources=has_selected_resources, ): # SSE format, each message ends with \n\n yield f"data: {json.dumps({'success': True, 'data': system_prompt}, ensure_ascii=False)}\n\n" @@ -62,8 +102,11 @@ def generate_and_save_system_prompt_impl(agent_id: int, user_id: str, tenant_id: str, language: str, + prompt_template_id: Optional[int] = None, tool_ids: Optional[List[int]] = None, - sub_agent_ids: Optional[List[int]] = None): + sub_agent_ids: Optional[List[int]] = None, + knowledge_base_display_names: Optional[List[str]] = None, + has_selected_resources: bool = True): # Get description of tool and agent from frontend-provided IDs # Frontend always provides tool_ids and sub_agent_ids (could be empty arrays) @@ -77,6 +120,20 @@ def generate_and_save_system_prompt_impl(agent_id: int, tool_info_list = get_enabled_tool_description_for_generate_prompt( tenant_id=tenant_id, agent_id=agent_id) + # Get knowledge base display names for few-shot examples + # Priority: frontend-provided > database query + if knowledge_base_display_names: + logger.debug( + f"Using frontend-provided knowledge base display names: {knowledge_base_display_names}") + else: + knowledge_base_display_names = get_knowledge_base_display_names( + tool_info_list=tool_info_list, + agent_id=agent_id, + tenant_id=tenant_id + ) + logger.debug( + f"Using database query for knowledge base display names: {knowledge_base_display_names}") + # Handle sub-agent IDs if sub_agent_ids and len(sub_agent_ids) > 0: sub_agent_info_list = [] @@ -95,9 +152,21 @@ def generate_and_save_system_prompt_impl(agent_id: int, sub_agent_info_list = get_enabled_sub_agent_description_for_generate_prompt( tenant_id=tenant_id, agent_id=agent_id) + # Re-evaluate has_selected_resources based on the actual resolved lists. + # The frontend value indicates user intent, but after resolving tool_ids/sub_agent_ids + # the actual lists are the source of truth. If both lists are empty, constraint and + # few_shots sections have no meaningful content to generate, so we force False. + has_selected_resources = bool(tool_info_list or sub_agent_info_list) + logger.info( + "Resolved resource availability: tools=%d, sub_agents=%d, has_selected_resources=%s", + len(tool_info_list), + len(sub_agent_info_list), + has_selected_resources, + ) + # 1. Real-time streaming push final_results = {"duty": "", "constraint": "", "few_shots": "", "agent_var_name": "", "agent_display_name": "", - "agent_description": ""} + "agent_description": "", "greeting_message": "", "example_questions": ""} # Get all existing agent names and display names for duplicate checking (only if not in create mode) all_agents = query_all_agent_info_by_tenant_id(tenant_id) @@ -113,8 +182,18 @@ def generate_and_save_system_prompt_impl(agent_id: int, ] # Collect results and yield non-name fields immediately, but hold name fields for duplicate checking - for result_data in generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id, - model_id, language): + for result_data in generate_system_prompt( + sub_agent_info_list, + task_description, + tool_info_list, + tenant_id, + user_id, + model_id, + language, + prompt_template_id, + knowledge_base_display_names, + has_selected_resources + ): result_type = result_data["type"] final_results[result_type] = result_data["content"] @@ -133,7 +212,8 @@ def generate_and_save_system_prompt_impl(agent_id: int, exclude_agent_id=agent_id, agents_cache=all_agents ): - logger.info(f"Agent name '{agent_name}' already exists, regenerating with LLM") + logger.info( + f"Agent name '{agent_name}' already exists, regenerating with LLM") try: agent_name = _regenerate_agent_name_with_llm( original_name=agent_name, @@ -143,12 +223,16 @@ def generate_and_save_system_prompt_impl(agent_id: int, tenant_id=tenant_id, language=language, agents_cache=all_agents, - exclude_agent_id=agent_id + exclude_agent_id=agent_id, + prompt_template_id=prompt_template_id, + user_id=user_id, ) - logger.info(f"Regenerated agent name: '{agent_name}'") + logger.info( + f"Regenerated agent name: '{agent_name}'") final_results["agent_var_name"] = agent_name except Exception as e: - logger.error(f"Failed to regenerate agent name with LLM: {str(e)}, using fallback") + logger.error( + f"Failed to regenerate agent name with LLM: {str(e)}, using fallback") # Fallback: add suffix agent_name = _generate_unique_agent_name_with_suffix( agent_name, @@ -174,7 +258,8 @@ def generate_and_save_system_prompt_impl(agent_id: int, exclude_agent_id=agent_id, agents_cache=all_agents ): - logger.info(f"Agent display_name '{agent_display_name}' already exists, regenerating with LLM") + logger.info( + f"Agent display_name '{agent_display_name}' already exists, regenerating with LLM") try: agent_display_name = _regenerate_agent_display_name_with_llm( original_display_name=agent_display_name, @@ -184,12 +269,16 @@ def generate_and_save_system_prompt_impl(agent_id: int, tenant_id=tenant_id, language=language, agents_cache=all_agents, - exclude_agent_id=agent_id + exclude_agent_id=agent_id, + prompt_template_id=prompt_template_id, + user_id=user_id, ) - logger.info(f"Regenerated agent display_name: '{agent_display_name}'") + logger.info( + f"Regenerated agent display_name: '{agent_display_name}'") final_results["agent_display_name"] = agent_display_name except Exception as e: - logger.error(f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback") + logger.error( + f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback") # Fallback: add suffix agent_display_name = _generate_unique_display_name_with_suffix( agent_display_name, @@ -222,10 +311,159 @@ def generate_and_save_system_prompt_impl(agent_id: int, if not has_content: raise Exception("Failed to generate prompt content.") + # 3. Generate greeting message and example questions + try: + greeting_template = get_prompt_template('greeting_generate', language) + greeting_system_prompt = greeting_template.get("GREETING_SYSTEM_PROMPT", "") + greeting_user_prompt_template = greeting_template.get("USER_PROMPT", "") + + greeting_user_prompt = Template(greeting_user_prompt_template, undefined=StrictUndefined).render({ + "display_name": final_results.get("agent_display_name", ""), + "duty_description": final_results.get("duty", ""), + "business_description": task_description, + "few_shots": final_results.get("few_shots", ""), + }) + + greeting_result = call_llm_for_system_prompt( + model_id=model_id, + user_prompt=greeting_user_prompt, + system_prompt=greeting_system_prompt, + tenant_id=tenant_id, + ) + + parsed = None + try: + json_start = greeting_result.find("{") + json_end = greeting_result.rfind("}") + 1 + if json_start >= 0 and json_end > json_start: + parsed = json.loads(greeting_result[json_start:json_end]) + except json.JSONDecodeError: + logger.warning(f"Failed to parse greeting JSON from LLM output: {greeting_result}") + + if parsed and "greeting_message" in parsed and "example_questions" in parsed: + greeting_message = parsed["greeting_message"] + example_questions = parsed["example_questions"] + if isinstance(example_questions, list) and len(example_questions) > 6: + example_questions = example_questions[:6] + else: + greeting_message = greeting_result.strip() if greeting_result else "" + example_questions = [] + + yield { + "type": "greeting_message", + "content": greeting_message, + "is_complete": True + } + yield { + "type": "example_questions", + "content": json.dumps(example_questions, ensure_ascii=False), + "is_complete": True + } + + final_results["greeting_message"] = greeting_message + final_results["example_questions"] = json.dumps(example_questions, ensure_ascii=False) + + # Update agent with greeting (skip in create mode) + if agent_id != 0: + update_agent(agent_id, AgentInfoRequest( + agent_id=agent_id, + greeting_message=greeting_message, + example_questions=example_questions, + ), user_id) + except Exception as e: + logger.warning(f"Greeting generation failed: {str(e)}, skipping greeting") + +def optimize_prompt_section_impl( + agent_id: int, + model_id: int, + task_description: str, + tenant_id: str, + language: str, + section_type: str, + section_title: str, + current_content: str, + feedback: str, + tool_ids: Optional[List[int]] = None, + sub_agent_ids: Optional[List[int]] = None, + knowledge_base_display_names: Optional[List[str]] = None, +) -> dict: + normalized_section_type = (section_type or "").strip() + if normalized_section_type not in {"duty", "constraint", "few_shots"}: + raise AppException( + ErrorCode.COMMON_PARAMETER_INVALID, + "Unsupported prompt section type." + ) + + if not (current_content or "").strip(): + raise AppException( + ErrorCode.COMMON_MISSING_REQUIRED_FIELD, + "Current section content is required." + ) -def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id: str, model_id: int, language: str = LANGUAGE["ZH"]): + if not (feedback or "").strip(): + raise AppException( + ErrorCode.COMMON_MISSING_REQUIRED_FIELD, + "Optimization feedback is required." + ) + + tool_info_list = _resolve_prompt_generation_tools( + agent_id=agent_id, + tenant_id=tenant_id, + tool_ids=tool_ids, + ) + knowledge_base_display_names = _resolve_knowledge_base_display_names( + agent_id=agent_id, + tenant_id=tenant_id, + tool_info_list=tool_info_list, + knowledge_base_display_names=knowledge_base_display_names, + ) + sub_agent_info_list = _resolve_prompt_generation_sub_agents( + agent_id=agent_id, + tenant_id=tenant_id, + sub_agent_ids=sub_agent_ids, + ) + + prompt_template = get_prompt_optimize_prompt_template(language) + prompt_context = join_info_for_optimize_prompt_section( + prompt_for_optimize=prompt_template, + section_type=normalized_section_type, + section_title=section_title or _default_prompt_section_title( + normalized_section_type, language), + task_description=task_description, + current_content=current_content, + feedback=feedback, + tool_info_list=tool_info_list, + sub_agent_info_list=sub_agent_info_list, + language=language, + knowledge_base_display_names=knowledge_base_display_names, + ) + + optimized_content = call_llm_for_system_prompt( + model_id=model_id, + user_prompt=prompt_context, + system_prompt=prompt_template["OPTIMIZE_SYSTEM_PROMPT"], + tenant_id=tenant_id, + ).strip() + + if not optimized_content: + raise AppException(ErrorCode.MODEL_PROMPT_GENERATION_FAILED) + + return { + "section_type": normalized_section_type, + "section_title": section_title or _default_prompt_section_title(normalized_section_type, language), + "original_content": current_content, + "optimized_content": optimized_content, + } + + +def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id: str, user_id: str, model_id: int, language: str = LANGUAGE["ZH"], prompt_template_id: Optional[int] = None, knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True): """Main function for generating system prompts""" - prompt_for_generate = get_prompt_generate_prompt_template(language) + prompt_for_generate = resolve_prompt_generate_template( + tenant_id=tenant_id, + user_id=user_id, + language=language, + prompt_template_id=prompt_template_id, + ) # Prepare content for generating system prompts content = join_info_for_generate_system_prompt( @@ -233,7 +471,9 @@ def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list sub_agent_info_list=sub_agent_info_list, task_description=task_description, tool_info_list=tool_info_list, - language=language + language=language, + knowledge_base_display_names=knowledge_base_display_names, + has_selected_resources=has_selected_resources, ) # Initialize state @@ -243,19 +483,111 @@ def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list stop_flags = {"duty": False, "constraint": False, "few_shots": False, "agent_var_name": False, "agent_display_name": False, "agent_description": False} - # Start all generation threads + # Get model concurrency limit to control the number of concurrent LLM calls + # If None or >= 6, no limit (all 6 calls run concurrently) + # If < 6, use semaphore to limit concurrent calls + model_config = get_model_by_model_id(model_id, tenant_id) + concurrency_limit = model_config.get( + "concurrency_limit") if model_config else None + + # Start all generation threads with concurrency control threads, error_holder = _start_generation_threads( - content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id) + content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id, + has_selected_resources, + concurrency_limit=concurrency_limit + ) # Stream results yield from _stream_results(produce_queue, latest, stop_flags, threads, error_holder) -def _start_generation_threads(content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id): - """Start all prompt generation threads""" +def _resolve_prompt_generation_tools( + agent_id: int, + tenant_id: str, + tool_ids: Optional[List[int]] = None, +) -> List[dict]: + if tool_ids and len(tool_ids) > 0: + logger.debug(f"Using frontend-provided tool IDs: {tool_ids}") + return query_tools_by_ids(tool_ids) + + logger.debug("No tools selected (empty tool_ids list)") + return get_enabled_tool_description_for_generate_prompt( + tenant_id=tenant_id, agent_id=agent_id + ) + + +def _resolve_knowledge_base_display_names( + agent_id: int, + tenant_id: str, + tool_info_list: List[dict], + knowledge_base_display_names: Optional[List[str]] = None, +) -> Optional[List[str]]: + if knowledge_base_display_names: + logger.debug( + f"Using frontend-provided knowledge base display names: {knowledge_base_display_names}" + ) + return knowledge_base_display_names + + resolved_names = get_knowledge_base_display_names( + tool_info_list=tool_info_list, + agent_id=agent_id, + tenant_id=tenant_id + ) + logger.debug( + f"Using database query for knowledge base display names: {resolved_names}") + return resolved_names + + +def _resolve_prompt_generation_sub_agents( + agent_id: int, + tenant_id: str, + sub_agent_ids: Optional[List[int]] = None, +) -> List[dict]: + if sub_agent_ids and len(sub_agent_ids) > 0: + sub_agent_info_list = [] + for sub_agent_id in sub_agent_ids: + try: + sub_agent_info = search_agent_info_by_agent_id( + agent_id=sub_agent_id, tenant_id=tenant_id) + sub_agent_info_list.append(sub_agent_info) + except Exception as exc: + logger.warning( + f"Failed to get sub-agent info for agent_id {sub_agent_id}: {str(exc)}" + ) + logger.debug(f"Using frontend-provided sub-agent IDs: {sub_agent_ids}") + return sub_agent_info_list + + logger.debug("No sub-agents selected (empty sub_agent_ids list)") + return get_enabled_sub_agent_description_for_generate_prompt( + tenant_id=tenant_id, agent_id=agent_id + ) + + +def _start_generation_threads(content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id, + has_selected_resources=True, concurrency_limit: Optional[int] = None): + """Start all prompt generation threads with optional concurrency control.""" # Shared error tracking across threads error_holder = {"error": None} + # Total number of generation tasks + total_tasks = 6 + + # Determine effective concurrency limit + # None means unlimited, 0 or negative means unlimited + if concurrency_limit is None or concurrency_limit <= 0 or concurrency_limit >= total_tasks: + effective_limit = None + else: + effective_limit = concurrency_limit + + # Use semaphore if concurrency is limited + semaphore = threading.Semaphore( + effective_limit) if effective_limit else None + if semaphore: + logger.info( + f"Using concurrency limit of {effective_limit} for prompt generation (total tasks: {total_tasks})") + else: + logger.info("Using unlimited concurrency for prompt generation") + def make_callback(tag): def callback_fn(current_text): latest[tag] = current_text @@ -264,8 +596,16 @@ def callback_fn(current_text): def run_and_flag(tag, sys_prompt): try: - call_llm_for_system_prompt( - model_id, content, sys_prompt, make_callback(tag), tenant_id) + # Acquire semaphore before starting (if limited) + if semaphore: + semaphore.acquire() + try: + call_llm_for_system_prompt( + model_id, content, sys_prompt, make_callback(tag), tenant_id) + finally: + # Always release semaphore after completion + if semaphore: + semaphore.release() except Exception as e: logger.error(f"Error in {tag} generation: {e}") error_holder["error"] = e @@ -275,18 +615,32 @@ def run_and_flag(tag, sys_prompt): threads = [] logger.info("Generating system prompt") + # Base sections always generated prompt_configs = [ - ("duty", prompt_for_generate["DUTY_SYSTEM_PROMPT"]), - ("constraint", prompt_for_generate["CONSTRAINT_SYSTEM_PROMPT"]), - ("few_shots", prompt_for_generate["FEW_SHOTS_SYSTEM_PROMPT"]), + ("duty", prompt_for_generate["duty_system_prompt"]), ("agent_var_name", - prompt_for_generate["AGENT_VARIABLE_NAME_SYSTEM_PROMPT"]), + prompt_for_generate["agent_variable_name_system_prompt"]), ("agent_display_name", - prompt_for_generate["AGENT_DISPLAY_NAME_SYSTEM_PROMPT"]), + prompt_for_generate["agent_display_name_system_prompt"]), ("agent_description", - prompt_for_generate["AGENT_DESCRIPTION_SYSTEM_PROMPT"]) + prompt_for_generate["agent_description_system_prompt"]) ] + # Constraint and few_shots sections are only generated when tools or sub-agents are selected + if has_selected_resources: + prompt_configs.extend([ + ("constraint", prompt_for_generate["constraint_system_prompt"]), + ("few_shots", prompt_for_generate["few_shots_system_prompt"]), + ]) + else: + logger.info( + "Skipping constraint and few_shots generation: no tools or sub-agents selected") + # Mark these sections as already complete with empty content + stop_flags["constraint"] = True + stop_flags["few_shots"] = True + latest["constraint"] = "" + latest["few_shots"] = "" + for tag, sys_prompt in prompt_configs: thread = threading.Thread(target=run_and_flag, args=(tag, sys_prompt)) thread.start() @@ -352,7 +706,7 @@ def _stream_results(produce_queue, latest, stop_flags, threads, error_holder): last_results[tag] = latest[tag] -def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_list, task_description, tool_info_list, language: str = LANGUAGE["ZH"]): +def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_list, task_description, tool_info_list, language: str = LANGUAGE["ZH"], knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True): input_label = "Inputs" if language == 'en' else "接受输入" output_label = "Output type" if language == 'en' else "返回输出类型" @@ -361,15 +715,90 @@ def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_lis for tool in tool_info_list]) assistant_description = "\n".join( [f"- {sub_agent_info['name']}: {sub_agent_info['description']}" for sub_agent_info in sub_agent_info_list]) - # Generate content using template - content = Template(prompt_for_generate["USER_PROMPT"], undefined=StrictUndefined).render({ + + # Build template context + template_context = { "task_description": task_description, "tool_description": tool_description, - "assistant_description": assistant_description - }) + "assistant_description": assistant_description, + # Always include knowledge_base_names to avoid StrictUndefined errors in template. + # An empty string is falsy, so the {% if knowledge_base_names %} block will be skipped. + "knowledge_base_names": "", + # Flag indicating whether tools or sub-agents are selected; + # templates use this to suppress boilerplate in constraint/few_shots sections + "has_selected_resources": has_selected_resources, + } + + # Always add knowledge_base_names to context (empty string when not available). + # This is necessary because Jinja2 StrictUndefined raises an error for any + # undefined variable, even inside an {% if %} block. + if knowledge_base_display_names: + kb_names_str = ", ".join( + f'"{name}"' for name in knowledge_base_display_names) + else: + kb_names_str = "" + template_context["knowledge_base_names"] = kb_names_str + + # Generate content using template + content = Template( + prompt_for_generate["user_prompt"], undefined=StrictUndefined).render(template_context) return content +def join_info_for_optimize_prompt_section( + prompt_for_optimize, + section_type: str, + section_title: str, + task_description: str, + current_content: str, + feedback: str, + tool_info_list, + sub_agent_info_list, + language: str = LANGUAGE["ZH"], + knowledge_base_display_names: Optional[List[str]] = None, +): + input_label = "Inputs" if language == LANGUAGE["EN"] else "接受输入" + output_label = "Output type" if language == LANGUAGE["EN"] else "返回输出类型" + + tool_description = "\n".join( + [f"- {tool['name']}: {tool['description']} \n {input_label}: {tool['inputs']}\n {output_label}: {tool['output_type']}" + for tool in tool_info_list] + ) + assistant_description = "\n".join( + [f"- {sub_agent_info['name']}: {sub_agent_info['description']}" for sub_agent_info in sub_agent_info_list] + ) + + if knowledge_base_display_names: + kb_names_str = ", ".join( + f'"{name}"' for name in knowledge_base_display_names) + else: + kb_names_str = "" + + template_context = { + "section_type": section_type, + "section_title": section_title, + "task_description": task_description, + "current_content": current_content, + "feedback": feedback, + "tool_description": tool_description, + "assistant_description": assistant_description, + "knowledge_base_names": kb_names_str, + } + + return Template( + prompt_for_optimize["OPTIMIZE_USER_PROMPT"], + undefined=StrictUndefined + ).render(template_context) + + +def _default_prompt_section_title(section_type: str, language: str) -> str: + localized_titles = PROMPT_SECTION_TYPE_TITLES.get( + language, + PROMPT_SECTION_TYPE_TITLES[LANGUAGE["ZH"]] + ) + return localized_titles.get(section_type, section_type) + + def get_enabled_tool_description_for_generate_prompt(agent_id: int, tenant_id: str): # Get tool information logger.info("Fetching tool instances") @@ -379,6 +808,74 @@ def get_enabled_tool_description_for_generate_prompt(agent_id: int, tenant_id: s return tool_info_list +def get_knowledge_base_display_names(tool_info_list: List[dict], agent_id: int, tenant_id: str) -> Optional[List[str]]: + """ + Extract knowledge base display names from tool configurations. + This is used to ensure few-shot examples use actual configured knowledge base names. + + Args: + tool_info_list: List of tool info dictionaries + agent_id: Agent ID for querying tool instances + tenant_id: Tenant ID for database queries + + Returns: + List of knowledge base display names if knowledge_base_search tool is configured, None otherwise + """ + # Check if knowledge_base_search tool is in the list + kb_tool_ids = [tool['tool_id'] for tool in tool_info_list if tool.get( + 'name') == 'knowledge_base_search'] + if not kb_tool_ids: + logger.debug("No knowledge_base_search tool found in tool list") + return None + + # Get the index_names from ToolInstance for knowledge_base_search tool + all_index_names = [] + for kb_tool_id in kb_tool_ids: + try: + tool_instance = query_tool_instances_by_id( + agent_id=agent_id, + tool_id=kb_tool_id, + tenant_id=tenant_id + ) + if tool_instance and tool_instance.get('params', {}).get('index_names'): + index_names = tool_instance['params']['index_names'] + if isinstance(index_names, list): + all_index_names.extend(index_names) + elif isinstance(index_names, str): + # Handle JSON string format + try: + all_index_names.extend(json.loads(index_names)) + except json.JSONDecodeError: + logger.warning( + f"Failed to parse index_names JSON: {index_names}") + except Exception as e: + logger.warning( + f"Failed to get tool instance for tool_id {kb_tool_id}: {e}") + + if not all_index_names: + logger.debug( + "No index_names configured for knowledge_base_search tool") + return None + + # Remove duplicates while preserving order + unique_index_names = list(dict.fromkeys(all_index_names)) + + # Convert to display names + knowledge_name_map = get_knowledge_name_map_by_index_names( + unique_index_names) + + # Return list of display names (knowledge_name) for each configured index_name + display_names = [] + for index_name in unique_index_names: + display_name = knowledge_name_map.get(index_name, index_name) + if display_name and display_name not in display_names: + display_names.append(display_name) + + logger.debug( + f"Converted index_names {unique_index_names} to display_names: {display_names}") + return display_names if display_names else None + + def get_enabled_sub_agent_description_for_generate_prompt(agent_id: int, tenant_id: str): logger.info("Fetching sub-agents information") @@ -392,3 +889,299 @@ def get_enabled_sub_agent_description_for_generate_prompt(agent_id: int, tenant_ sub_agent_info_list.append(sub_agent_info) return sub_agent_info_list + + +# ── Jiuwen SDK 集成 ─────────────────────────────────────────────────────────── + + +@dataclass +class OptimizeRequest: + """优化请求的统一数据结构""" + agent_id: int + model_id: int + task_description: str + section_type: str + section_title: str + current_content: str + feedback: str + mode: str = "general" + start_pos: Opt[int] = None + end_pos: Opt[int] = None + tool_ids: Opt[list[int]] = None + sub_agent_ids: Opt[list[int]] = None + knowledge_base_display_names: Opt[list[str]] = None + + +@dataclass +class OptimizeResult: + """优化结果的统一数据结构""" + optimized_content: str + source: str + section_type: str = "" + section_title: str = "" + original_content: str = "" + + +class PromptOptimizationService: + """提示词优化服务 — 统一入口,模式二选一""" + + def optimize_from_debug(self, agent_id: int, feedback: str, selected, history=None) -> OptimizeResult: + """基于调试对话自动优化整个 system prompt(完整模板)。 + + Args: + selected: OptimizeFromDebugSelected (pydantic model) or any object with user_question/assistant_answer. + history: Optional[List[HistoryItem]] + """ + if not (feedback or "").strip(): + raise AppException( + ErrorCode.COMMON_MISSING_REQUIRED_FIELD, + "Optimization feedback is required.", + ) + + if not self.is_jiuwen_mode_available(): + raise NexentCapabilityError( + "Auto optimize from debug requires Jiuwen SDK to be enabled." + ) + + agent_info = search_agent_info_by_agent_id( + agent_id=agent_id, tenant_id=self.tenant_id, version_no=0) + + duty = (agent_info.get("duty_prompt") or "").strip() + constraint = (agent_info.get("constraint_prompt") or "").strip() + few_shots = (agent_info.get("few_shots_prompt") or "").strip() + + original_full_prompt = "\n\n".join( + [ + "# Duty\n" + duty, + "# Constraint\n" + constraint, + "# FewShots\n" + few_shots, + ] + ).strip() + + if not original_full_prompt: + raise AppException( + ErrorCode.COMMON_MISSING_REQUIRED_FIELD, + "Agent system prompt is empty.", + ) + + user_question = getattr(selected, "user_question", None) or ( + selected.get("user_question") if isinstance(selected, dict) else "") + assistant_answer = getattr(selected, "assistant_answer", None) or ( + selected.get("assistant_answer") if isinstance(selected, dict) else "") + + bad_case_obj = type("_BadCase", (), {}) + bc = bad_case_obj() + bc.question = user_question or "" + bc.answer = assistant_answer or "" + bc.label = "" + bc.reason = feedback + + adapter_cls = _get_jiuwen_adapter_class() + if adapter_cls is None: + raise JiuwenSDKError("Jiuwen SDK adapter is unavailable") + + adapter = adapter_cls( + model_id=self.model_id, tenant_id=self.tenant_id) + + optimized_full_prompt = adapter.optimize_badcase( + prompt=original_full_prompt, + bad_cases=[bc], + language=self.language, + ) + + return OptimizeResult( + optimized_content=optimized_full_prompt, + source="jiuwen", + section_type="full_prompt", + section_title="system_prompt", + original_content=original_full_prompt, + ) + + def __init__(self, model_id: int, tenant_id: str, language: str): + self.model_id = model_id + self.tenant_id = tenant_id + self.language = language + + def is_jiuwen_mode_available(self) -> bool: + """判断 Jiuwen SDK 模式是否可用""" + if not ENABLE_JIUWEN_SDK: + return False + + return _get_jiuwen_adapter_class() is not None + + def optimize(self, request: OptimizeRequest) -> OptimizeResult: + """统一优化入口 — 优先 Jiuwen SDK,失败则降级 nexent 原生""" + if self.is_jiuwen_mode_available(): + logger.info( + f"[prompt-optimize] mode={request.mode}, using Jiuwen SDK") + try: + return self._optimize_with_jiuwen(request) + except JiuwenSDKError as e: + logger.warning(f"Jiuwen SDK 模式失败,降级到 nexent 原生: {e}") + return self._optimize_with_nexent(request) + else: + return self._optimize_with_nexent(request) + + def _optimize_with_jiuwen(self, request: OptimizeRequest) -> OptimizeResult: + """Jiuwen SDK 模式""" + logger.info( + f"[jiuwen-optimize] mode={request.mode}, start_pos={request.start_pos}, " + f"end_pos={request.end_pos}, prompt_len={len(request.current_content)}, " + f"feedback_len={len(request.feedback)}" + ) + adapter_cls = _get_jiuwen_adapter_class() + if adapter_cls is None: + raise JiuwenSDKError("Jiuwen SDK adapter is unavailable") + + adapter = adapter_cls( + model_id=self.model_id, + tenant_id=self.tenant_id, + ) + result = adapter.optimize( + prompt=request.current_content, + feedback=request.feedback, + mode=request.mode, + start_pos=request.start_pos, + end_pos=request.end_pos, + language=self.language, + ) + + # Jiuwen insert/select mode returns a fragment by design. + # We reassemble the full prompt here so frontend always receives full optimized content. + if request.mode == "insert": + if request.start_pos is None or not isinstance(request.start_pos, int): + raise JiuwenSDKError("insert mode requires start_pos") + if request.start_pos < 0 or request.start_pos > len(request.current_content): + raise JiuwenSDKError("insert mode start_pos out of bounds") + optimized_full = ( + request.current_content[: request.start_pos] + + result + + request.current_content[request.start_pos:] + ) + elif request.mode == "select": + if request.start_pos is None or request.end_pos is None: + raise JiuwenSDKError( + "select mode requires start_pos and end_pos") + if not isinstance(request.start_pos, int) or not isinstance(request.end_pos, int): + raise JiuwenSDKError( + "select mode start_pos/end_pos must be int") + if request.start_pos < 0 or request.end_pos < 0 or request.start_pos >= request.end_pos: + raise JiuwenSDKError("select mode start_pos/end_pos invalid") + if request.end_pos > len(request.current_content): + raise JiuwenSDKError("select mode end_pos out of bounds") + optimized_full = ( + request.current_content[: request.start_pos] + + result + + request.current_content[request.end_pos:] + ) + else: + optimized_full = result + + return OptimizeResult( + optimized_content=optimized_full, + source="jiuwen", + section_type=request.section_type, + section_title=request.section_title, + original_content=request.current_content, + ) + + def _optimize_with_nexent(self, request: OptimizeRequest) -> OptimizeResult: + """nexent 原生模式 — 只支持 general 模式""" + if request.mode != "general": + raise NexentCapabilityError( + f"nexent 原生模式只支持 general 模式," + f"当前请求 mode={request.mode} 不支持,请启用 Jiuwen SDK" + ) + + result = optimize_prompt_section_impl( + agent_id=request.agent_id, + model_id=self.model_id, + task_description=request.task_description, + tenant_id=self.tenant_id, + language=self.language, + section_type=request.section_type, + section_title=request.section_title, + current_content=request.current_content, + feedback=request.feedback, + tool_ids=request.tool_ids, + sub_agent_ids=request.sub_agent_ids, + knowledge_base_display_names=request.knowledge_base_display_names, + ) + return OptimizeResult( + optimized_content=result["optimized_content"], + source="nexent", + section_type=result["section_type"], + section_title=result["section_title"], + original_content=result["original_content"], + ) + + def optimize_badcase( + self, + current_content: str, + bad_cases: list, + agent_id: int, + section_type: str, + section_title: str, + tool_ids: Opt[list[int]] = None, + sub_agent_ids: Opt[list[int]] = None, + knowledge_base_display_names: Opt[list[str]] = None, + ) -> OptimizeResult: + """坏案例优化入口 — 优先 Jiuwen SDK,失败则降级""" + if self.is_jiuwen_mode_available(): + logger.info("[prompt-badcase] using Jiuwen SDK") + try: + return self._optimize_badcase_with_jiuwen( + current_content, bad_cases, section_type, section_title + ) + except JiuwenSDKError as e: + logger.warning(f"Jiuwen SDK badcase 模式失败,降级到 nexent 原生: {e}") + return self._optimize_badcase_with_nexent( + current_content, bad_cases, agent_id, section_type, section_title, + tool_ids, sub_agent_ids, knowledge_base_display_names, + ) + else: + return self._optimize_badcase_with_nexent( + current_content, bad_cases, agent_id, section_type, section_title, + tool_ids, sub_agent_ids, knowledge_base_display_names, + ) + + def _optimize_badcase_with_jiuwen( + self, current_content: str, bad_cases: list, section_type: str, section_title: str + ) -> OptimizeResult: + """Jiuwen SDK 坏案例优化""" + adapter_cls = _get_jiuwen_adapter_class() + if adapter_cls is None: + raise JiuwenSDKError("Jiuwen SDK adapter is unavailable") + + adapter = adapter_cls( + model_id=self.model_id, + tenant_id=self.tenant_id, + ) + result = adapter.optimize_badcase( + prompt=current_content, + bad_cases=bad_cases, + language=self.language, + ) + return OptimizeResult( + optimized_content=result, + source="jiuwen", + section_type=section_type, + section_title=section_title, + original_content=current_content, + ) + + def _optimize_badcase_with_nexent( + self, + current_content: str, + bad_cases: list, + agent_id: int, + section_type: str, + section_title: str, + tool_ids: Opt[list[int]] = None, + sub_agent_ids: Opt[list[int]] = None, + knowledge_base_display_names: Opt[list[str]] = None, + ) -> OptimizeResult: + """nexent 原生模式不支持坏案例优化""" + raise NexentCapabilityError( + "nexent 原生模式不支持 badcase 优化,请启用 Jiuwen SDK" + ) diff --git a/backend/services/prompt_template_service.py b/backend/services/prompt_template_service.py new file mode 100644 index 000000000..14224a099 --- /dev/null +++ b/backend/services/prompt_template_service.py @@ -0,0 +1,322 @@ +import logging +from typing import Optional + +from consts.const import DEFAULT_TENANT_ID, DEFAULT_USER_ID +from consts.const import LANGUAGE +from consts.exceptions import DuplicateError, NotFoundException, ValidationError +from consts.model import PromptTemplateRequest +from database.prompt_template_db import ( + create_prompt_template, + delete_prompt_template, + get_prompt_template_by_id, + get_prompt_template_by_name, + get_prompt_template_by_template_id, + query_prompt_templates_by_user, + upsert_prompt_template_by_id, + update_prompt_template, +) +from utils.prompt_template_utils import ( + get_prompt_generate_prompt_template, + merge_prompt_generate_templates, + normalize_prompt_generate_template_content, +) + +logger = logging.getLogger("prompt_template_service") + +SYSTEM_PROMPT_TEMPLATE_ID = 0 +SYSTEM_PROMPT_TEMPLATE_NAME = "system_default" +PROMPT_TEMPLATE_TYPE_AGENT_GENERATE = "agent_generate" +SYSTEM_PROMPT_TEMPLATE_DESCRIPTION = "System default prompt template" +SYSTEM_PROMPT_TEMPLATE_TENANT_ID = DEFAULT_TENANT_ID +SYSTEM_PROMPT_TEMPLATE_USER_ID = DEFAULT_USER_ID + + +def _normalize_prompt_template_entity(template: Optional[dict]) -> Optional[dict]: + """Normalize prompt template entity content keys to lowercase.""" + if not template: + return template + + normalized_template = dict(template) + normalized_template["template_content_zh"] = normalize_prompt_generate_template_content( + normalized_template.get("template_content_zh") + ) + template_content_en = normalize_prompt_generate_template_content( + normalized_template.get("template_content_en") + ) + normalized_template["template_content_en"] = template_content_en or None + return normalized_template + + +def build_system_default_prompt_template_payload() -> dict: + """Build the canonical system default prompt template payload from YAML files.""" + system_template_zh = normalize_prompt_generate_template_content( + get_prompt_generate_prompt_template(LANGUAGE["ZH"]) + ) + system_template_en = normalize_prompt_generate_template_content( + get_prompt_generate_prompt_template(LANGUAGE["EN"]) + ) + return { + "template_id": SYSTEM_PROMPT_TEMPLATE_ID, + "template_name": SYSTEM_PROMPT_TEMPLATE_NAME, + "description": SYSTEM_PROMPT_TEMPLATE_DESCRIPTION, + "template_type": PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + "tenant_id": SYSTEM_PROMPT_TEMPLATE_TENANT_ID, + "user_id": SYSTEM_PROMPT_TEMPLATE_USER_ID, + "template_content_zh": system_template_zh, + "template_content_en": system_template_en, + "created_by": SYSTEM_PROMPT_TEMPLATE_USER_ID, + "updated_by": SYSTEM_PROMPT_TEMPLATE_USER_ID, + "delete_flag": "N", + } + + +def sync_system_default_prompt_template() -> dict: + """Sync the YAML-backed system default prompt template into the database.""" + payload = build_system_default_prompt_template_payload() + prompt_template = upsert_prompt_template_by_id( + template_id=SYSTEM_PROMPT_TEMPLATE_ID, + template_data=payload, + user_id=SYSTEM_PROMPT_TEMPLATE_USER_ID, + ) + prompt_template["is_system_default"] = True + return _normalize_prompt_template_entity(prompt_template) + + +def get_system_default_prompt_template() -> dict: + """Return the system default prompt generation template from the database.""" + prompt_template = get_prompt_template_by_template_id( + template_id=SYSTEM_PROMPT_TEMPLATE_ID, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not prompt_template: + prompt_template = sync_system_default_prompt_template() + else: + prompt_template["is_system_default"] = True + return _normalize_prompt_template_entity({ + **prompt_template, + "is_system_default": True, + }) + + +def _normalize_template_request(request: PromptTemplateRequest) -> dict: + """Normalize prompt template request payload.""" + template_name = (request.template_name or "").strip() + if not template_name: + raise ValidationError("template_name is required") + + if request.template_type != PROMPT_TEMPLATE_TYPE_AGENT_GENERATE: + raise ValidationError("Unsupported template type") + + zh_content = normalize_prompt_generate_template_content( + request.template_content_zh.model_dump() + ) + if len(zh_content) == 0: + raise ValidationError("template_content_zh is required") + + en_content = None + if request.template_content_en is not None: + en_content = normalize_prompt_generate_template_content( + request.template_content_en.model_dump() + ) + if len(en_content) == 0: + en_content = None + + return { + "template_name": template_name, + "description": (request.description or "").strip() or None, + "template_type": request.template_type, + "template_content_zh": zh_content, + "template_content_en": en_content, + } + + +def list_prompt_templates_impl(tenant_id: str, user_id: str) -> list[dict]: + """List all prompt templates for the current user.""" + system_default_template = sync_system_default_prompt_template() + templates = query_prompt_templates_by_user( + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + return [system_default_template, *[ + _normalize_prompt_template_entity({ + **template, + "is_system_default": False, + }) + for template in templates + if template.get("template_id") != SYSTEM_PROMPT_TEMPLATE_ID + ]] + + +def get_prompt_template_detail_impl(template_id: int, tenant_id: str, user_id: str) -> dict: + """Get prompt template detail.""" + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + return get_system_default_prompt_template() + + template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not template: + raise NotFoundException("Prompt template not found") + + template["is_system_default"] = False + return _normalize_prompt_template_entity(template) + + +def create_prompt_template_impl( + request: PromptTemplateRequest, + tenant_id: str, + user_id: str, +) -> dict: + """Create a prompt template.""" + normalized_request = _normalize_template_request(request) + existing_template = get_prompt_template_by_name( + template_name=normalized_request["template_name"], + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if existing_template: + raise DuplicateError("Prompt template name already exists") + + created_template = create_prompt_template({ + **normalized_request, + "tenant_id": tenant_id, + "user_id": user_id, + "created_by": user_id, + "updated_by": user_id, + }) + created_template["is_system_default"] = False + return _normalize_prompt_template_entity(created_template) + + +def update_prompt_template_impl( + template_id: int, + request: PromptTemplateRequest, + tenant_id: str, + user_id: str, +) -> dict: + """Update a prompt template.""" + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + raise ValidationError("System default prompt template cannot be updated") + + existing_template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not existing_template: + raise NotFoundException("Prompt template not found") + + normalized_request = _normalize_template_request(request) + duplicate_template = get_prompt_template_by_name( + template_name=normalized_request["template_name"], + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if duplicate_template and duplicate_template["template_id"] != template_id: + raise DuplicateError("Prompt template name already exists") + + updated_template = update_prompt_template( + template_id=template_id, + template_data=normalized_request, + user_id=user_id, + ) + updated_template["is_system_default"] = False + return _normalize_prompt_template_entity(updated_template) + + +def delete_prompt_template_impl(template_id: int, tenant_id: str, user_id: str) -> dict: + """Delete a prompt template.""" + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + raise ValidationError("System default prompt template cannot be deleted") + + existing_template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not existing_template: + raise NotFoundException("Prompt template not found") + + deleted_count = delete_prompt_template(template_id=template_id, user_id=user_id) + return { + "template_id": template_id, + "deleted": deleted_count > 0, + } + + +def resolve_prompt_generate_template( + tenant_id: str, + user_id: str, + language: str, + prompt_template_id: Optional[int] = None, +) -> dict: + """Resolve prompt generation template for the current user and language.""" + system_default_template = sync_system_default_prompt_template() + system_template = ( + system_default_template.get("template_content_en") + if language == LANGUAGE["EN"] + else system_default_template.get("template_content_zh") + ) + fallback_system_template = system_default_template.get("template_content_zh") + + if not prompt_template_id or prompt_template_id == SYSTEM_PROMPT_TEMPLATE_ID: + return merge_prompt_generate_templates(system_template, fallback_system_template) + + prompt_template = get_prompt_template_by_id( + template_id=prompt_template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not prompt_template: + logger.warning( + "Prompt template %s not found for tenant %s user %s, falling back to system default", + prompt_template_id, + tenant_id, + user_id, + ) + return merge_prompt_generate_templates(system_template, fallback_system_template) + + custom_language_template = ( + prompt_template.get("template_content_en") + if language == LANGUAGE["EN"] + else prompt_template.get("template_content_zh") + ) + return merge_prompt_generate_templates( + custom_language_template, + prompt_template.get("template_content_zh"), + system_template, + fallback_system_template, + ) + + +def get_prompt_template_summary( + template_id: Optional[int], + tenant_id: str, + user_id: str, +) -> tuple[Optional[int], Optional[str]]: + """Resolve prompt template identity for saving on agent.""" + if template_id is None: + return None, None + + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + return SYSTEM_PROMPT_TEMPLATE_ID, SYSTEM_PROMPT_TEMPLATE_NAME + + prompt_template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not prompt_template: + raise NotFoundException("Prompt template not found") + + return prompt_template["template_id"], prompt_template["template_name"] diff --git a/backend/services/providers/dashscope_provider.py b/backend/services/providers/dashscope_provider.py index b9fb7ab7b..497dcfe99 100644 --- a/backend/services/providers/dashscope_provider.py +++ b/backend/services/providers/dashscope_provider.py @@ -6,6 +6,75 @@ from services.providers.base import AbstractModelProvider, _classify_provider_error +DASHSCOPE_IMAGE_GENERATION_KEYWORDS = ( + "image", + "wanx", + "aitryon", + "tryon", + "flux", + "stable-diffusion", + "sdxl", +) +DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS = ( + "qwen-vl", + "qwen2-vl", + "qwen2.5-vl", + "qwen3-vl", + "qwen3.5-vl", + "qwen3.6-vl", + "-vl", + "vl-", + "vision", + "visual", + "ocr", + "qwen3.6", + "qwen-3.6", +) +DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS = ("omni", "video-understanding", "video-ocr") + + +def _modality_set(value) -> set: + if not value: + return set() + if isinstance(value, str): + return {value.lower()} + return {str(item).lower() for item in value} + + +def _has_keyword(text: str, keywords: tuple) -> bool: + return any(keyword in text for keyword in keywords) + + +def _is_dashscope_explicit_image_understanding_model(model_id: str) -> bool: + return _has_keyword(model_id, DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS) + + +def _is_dashscope_image_generation_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool: + if _is_dashscope_explicit_image_understanding_model(model_id): + return False + return "image" in res_mods or _has_keyword(model_id, DASHSCOPE_IMAGE_GENERATION_KEYWORDS) + + +def _is_dashscope_video_understanding_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool: + searchable_text = f"{model_id} {desc.lower()}" + if "video" in req_mods and "text" in res_mods: + return True + return _has_keyword(searchable_text, DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS) + + +def _is_dashscope_image_understanding_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool: + searchable_text = f"{model_id} {desc.lower()}" + if _is_dashscope_image_generation_model(model_id, desc, req_mods, res_mods): + return False + if _is_dashscope_video_understanding_model(model_id, desc, req_mods, res_mods): + return False + if ("image" in req_mods or "video" in req_mods) and "text" in res_mods: + return True + return _is_dashscope_explicit_image_understanding_model(model_id) or _has_keyword( + searchable_text, DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS + ) + + class DashScopeModelProvider(AbstractModelProvider): """Concrete implementation for DashScope (Aliyun) provider.""" @@ -57,6 +126,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: categorized_models = { "chat": [], # Maps to "llm" "vlm": [], # Maps to "vlm" + "vlm2": [], # Maps to image generation models + "vlm3": [], # Maps to video understanding models "embedding": [], # Maps to "embedding" / "multi_embedding" "rerank": [], # Maps to "rerank" "tts": [], # Maps to "tts" @@ -68,9 +139,11 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: # Extract key fields for logical determination (lowercased for robustness) m_id = model_obj.get('model', '').lower() desc = model_obj.get('description', '') - metadata = model_obj.get('inference_metadata', {}) + metadata = model_obj.get('inference_metadata') or {} req_mod = metadata.get('request_modality', []) res_mod = metadata.get('response_modality', []) + req_mods = _modality_set(req_mod) + res_mods = _modality_set(res_mod) model_obj.setdefault("object", model_obj.get("object", "model")) model_obj.setdefault("owned_by", model_obj.get("owned_by", "dashscope")) cleaned_model = { @@ -107,8 +180,17 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: continue # 5. VLM - vision_mods = {'Image', 'Video'} - if (set(req_mod) & vision_mods) or (set(res_mod) & vision_mods) or '视觉' in desc: + if _is_dashscope_video_understanding_model(m_id, desc, req_mods, res_mods): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm3"}) + categorized_models['vlm3'].append(cleaned_model) + continue + + if _is_dashscope_image_generation_model(m_id, desc, req_mods, res_mods): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm2"}) + categorized_models['vlm2'].append(cleaned_model) + continue + + if _is_dashscope_image_understanding_model(m_id, desc, req_mods, res_mods): cleaned_model.update({"model_tag": "chat", "model_type": "vlm"}) categorized_models['vlm'].append(cleaned_model) continue @@ -124,7 +206,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: elif target_model_type in ("embedding", "multi_embedding"): return categorized_models["embedding"] elif target_model_type in categorized_models: - return categorized_models[target_model_type] + return [ + {**model, "model_type": target_model_type} + for model in categorized_models[target_model_type] + ] else: return [] except (httpx.HTTPStatusError, httpx.ConnectTimeout, httpx.ConnectError, Exception) as e: diff --git a/backend/services/providers/silicon_provider.py b/backend/services/providers/silicon_provider.py index ea41cc95d..1875b3949 100644 --- a/backend/services/providers/silicon_provider.py +++ b/backend/services/providers/silicon_provider.py @@ -1,4 +1,5 @@ import httpx +import re from typing import Dict, List from consts.const import DEFAULT_LLM_MAX_TOKENS @@ -6,6 +7,62 @@ from services.providers.base import AbstractModelProvider, _classify_provider_error +SILICON_VLM_MODEL_KEYWORDS = ( + "-vl", + "_vl", + "/vl", + ".vl", + "vl-", + "vision", + "visual", + "internvl", + "deepseek-vl", + "deepseekvl", + "glm-4v", + "minicpm-v", + "llava", + "kimi-vl", + "kimi-k2.5", + "kimi-k2.6", + "qvq", + "omni", + "qwen3.5", + "qwen3.6", +) + +SILICON_VLM_METADATA_KEYWORDS = ("image", "video", "vision", "visual") + + +def _contains_silicon_vlm_metadata(value) -> bool: + if isinstance(value, str): + lower_value = value.lower() + return any(keyword in lower_value for keyword in SILICON_VLM_METADATA_KEYWORDS) + if isinstance(value, list): + return any(_contains_silicon_vlm_metadata(item) for item in value) + if isinstance(value, dict): + return any(_contains_silicon_vlm_metadata(item) for item in value.values()) + return False + + +def _is_silicon_vlm_model(model: Dict) -> bool: + if _contains_silicon_vlm_metadata(model): + return True + + model_id = str(model.get("id", "")).lower() + model_name = str(model.get("name", "")).lower() + searchable_text = f"{model_id} {model_name}" + if any(keyword in searchable_text for keyword in SILICON_VLM_MODEL_KEYWORDS): + return True + + return bool(re.search(r"glm-\d+(?:\.\d+)?v", searchable_text)) + + +def _is_silicon_omni_model(model: Dict) -> bool: + model_id = str(model.get("id", "")).lower() + model_name = str(model.get("name", "")).lower() + return "omni" in f"{model_id} {model_name}" + + class SiliconModelProvider(AbstractModelProvider): """Concrete implementation for SiliconFlow provider.""" @@ -25,32 +82,39 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: headers = {"Authorization": f"Bearer {model_api_key}"} + provider_model_type = "vlm" if model_type in ("vlm2", "vlm3") else model_type + # Choose endpoint by model type - if model_type in ("llm", "vlm"): + if provider_model_type in ("llm", "vlm"): silicon_url = f"{SILICON_GET_URL}?sub_type=chat" - elif model_type in ("embedding", "multi_embedding"): + elif provider_model_type in ("embedding", "multi_embedding"): silicon_url = f"{SILICON_GET_URL}?sub_type=embedding" - elif model_type == "rerank": + elif provider_model_type == "rerank": silicon_url = f"{SILICON_GET_URL}?sub_type=reranker" else: - silicon_url = SILICON_GET_URL + return [] async with httpx.AsyncClient(verify=False) as client: response = await client.get(silicon_url, headers=headers) response.raise_for_status() model_list: List[Dict] = response.json()["data"] + if model_type == "vlm3": + model_list = [item for item in model_list if _is_silicon_omni_model(item)] + elif provider_model_type == "vlm": + model_list = [item for item in model_list if _is_silicon_vlm_model(item)] + # Annotate models with canonical fields expected downstream - if model_type in ("llm", "vlm"): + if provider_model_type in ("llm", "vlm"): for item in model_list: item["model_tag"] = "chat" item["model_type"] = model_type item["max_tokens"] = DEFAULT_LLM_MAX_TOKENS - elif model_type in ("embedding", "multi_embedding"): + elif provider_model_type in ("embedding", "multi_embedding"): for item in model_list: item["model_tag"] = "embedding" item["model_type"] = model_type - elif model_type == "rerank": + elif provider_model_type == "rerank": for item in model_list: item["model_tag"] = "rerank" item["model_type"] = model_type diff --git a/backend/services/providers/tokenpony_provider.py b/backend/services/providers/tokenpony_provider.py index ab4446c1b..be2bb9c71 100644 --- a/backend/services/providers/tokenpony_provider.py +++ b/backend/services/providers/tokenpony_provider.py @@ -9,6 +9,64 @@ from services.providers.base import AbstractModelProvider, _classify_provider_error +TOKENPONY_IMAGE_UNDERSTANDING_KEYWORDS = ( + "qwen-vl", + "qwen2-vl", + "qwen2.5-vl", + "qwen3-vl", + "qwen3.5-vl", + "qwen3.6-vl", + "-vl", + "vl-", + "vision", + "visual", + "ocr", + "gpt-4o", + "qwen3.6", + "qwen-3.6", +) +TOKENPONY_IMAGE_GENERATION_KEYWORDS = ( + "image", + "dall", + "flux", + "stable-diffusion", + "sdxl", + "midjourney", + "wanx", + "kolors", + "seedream", + "ideogram", + "recraft", +) +TOKENPONY_VIDEO_UNDERSTANDING_KEYWORDS = ("omni", "video") + + +def _has_keyword(text: str, keywords: tuple) -> bool: + return any(keyword in text for keyword in keywords) + + +def _is_tokenpony_explicit_image_understanding_model(model_id: str) -> bool: + return _has_keyword(model_id, TOKENPONY_IMAGE_UNDERSTANDING_KEYWORDS) + + +def _is_tokenpony_image_generation_model(model_id: str) -> bool: + if _is_tokenpony_explicit_image_understanding_model(model_id): + return False + return _has_keyword(model_id, TOKENPONY_IMAGE_GENERATION_KEYWORDS) + + +def _is_tokenpony_video_understanding_model(model_id: str) -> bool: + return _has_keyword(model_id, TOKENPONY_VIDEO_UNDERSTANDING_KEYWORDS) + + +def _is_tokenpony_image_understanding_model(model_id: str) -> bool: + if _is_tokenpony_image_generation_model(model_id): + return False + if _is_tokenpony_video_understanding_model(model_id): + return False + return _is_tokenpony_explicit_image_understanding_model(model_id) + + class TokenPonyModelProvider(AbstractModelProvider): """Concrete implementation for TokenPony provider.""" @@ -46,6 +104,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: categorized_models = { "chat": [], # Maps to "llm" "vlm": [], # Maps to "vlm" + "vlm2": [], # Maps to image generation models + "vlm3": [], # Maps to video understanding models "embedding": [], # Maps to "embedding" / "multi_embedding" "rerank": [], # Maps to "rerank" "tts": [], # Maps to "tts" @@ -86,9 +146,14 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: cleaned_model.update({"model_tag": "tts", "model_type": "tts"}) categorized_models['tts'].append(cleaned_model) - # 5. VLM (Vision Language Model / Image & Video Generation) - - elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision']): + # 5. Multimodal models + elif _is_tokenpony_video_understanding_model(m_id): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm3"}) + categorized_models['vlm3'].append(cleaned_model) + elif _is_tokenpony_image_generation_model(m_id): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm2"}) + categorized_models['vlm2'].append(cleaned_model) + elif _is_tokenpony_image_understanding_model(m_id): cleaned_model.update({"model_tag": "chat", "model_type": "vlm"}) categorized_models['vlm'].append(cleaned_model) @@ -104,7 +169,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: elif target_model_type in ("embedding", "multi_embedding"): return categorized_models["embedding"] elif target_model_type in categorized_models: - return categorized_models[target_model_type] + return [ + {**model, "model_type": target_model_type} + for model in categorized_models[target_model_type] + ] else: return [] diff --git a/backend/services/redis_service.py b/backend/services/redis_service.py index efd2c0a7b..1ffcf921c 100644 --- a/backend/services/redis_service.py +++ b/backend/services/redis_service.py @@ -1,6 +1,7 @@ import json import logging -from typing import Dict, Any, Optional +import re +from typing import Dict, Any, Optional, Tuple, Set, List import redis @@ -23,8 +24,8 @@ def client(self) -> redis.Redis: if not REDIS_URL: raise ValueError("REDIS_URL environment variable is not set") self._client = redis.from_url( - REDIS_URL, - socket_timeout=5, + REDIS_URL, + socket_timeout=5, socket_connect_timeout=5, decode_responses=True ) @@ -215,7 +216,7 @@ def delete_document_records(self, index_name: str, path_or_url: str) -> Dict[str return result - def _recursively_delete_task_and_parents(self, task_id: str) -> tuple[int, set]: + def _recursively_delete_task_and_parents(self, task_id: str) -> Tuple[int, Set[str]]: """ Iteratively delete a Celery task and all its parent tasks from Redis. A single task chain is deleted, and the IDs of the deleted tasks are returned. @@ -309,16 +310,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int: # Check for failed tasks where metadata is in the exception message if task_index_name is None and 'exc_message' in result: - try: - exc_str = str(result['exc_message']) - if '{' in exc_str and '}' in exc_str: - json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1] - cleaned_json_part = json_part.replace('\\"', '"') - error_data = json.loads(cleaned_json_part) - task_index_name = error_data.get('index_name') - except (json.JSONDecodeError, TypeError, IndexError) as e: - key_str = key.decode('utf-8') if isinstance(key, bytes) else key - logger.warning(f"Could not parse exception metadata for task key {key_str}: {e}") + error_data = self._extract_error_metadata_from_exc_message( + result.get("exc_message") + ) + if error_data: + task_index_name = error_data.get('index_name') if task_index_name == index_name: key_str = key.decode('utf-8') if isinstance(key, bytes) else key @@ -366,15 +362,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int: ) if task_index_name is None and 'exc_message' in result: - try: - exc_str = str(result['exc_message']) - if '{' in exc_str and '}' in exc_str: - json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1] - cleaned_json_part = json_part.replace('\\"', '"') - error_data = json.loads(cleaned_json_part) - task_index_name = error_data.get('index_name') - except (json.JSONDecodeError, TypeError, IndexError): - pass + error_data = self._extract_error_metadata_from_exc_message( + result.get("exc_message") + ) + if error_data: + task_index_name = error_data.get('index_name') if task_index_name == index_name: key_str = key.decode('utf-8') if isinstance(key, bytes) else key @@ -497,16 +489,12 @@ def _cleanup_document_celery_tasks(self, index_name: str, path_or_url: str) -> i # Check for failed tasks where metadata is in the exception message if task_index_name is None and 'exc_message' in result: - try: - exc_str = str(result['exc_message']) - if '{' in exc_str and '}' in exc_str: - json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1] - cleaned_json_part = json_part.replace('\\"', '"') - error_data = json.loads(cleaned_json_part) - task_index_name = error_data.get('index_name') - task_source = error_data.get('source') or error_data.get('path_or_url') - except (json.JSONDecodeError, TypeError, IndexError) as e: - logger.warning(f"Could not parse exception metadata for task {task_id}: {e}") + error_data = self._extract_error_metadata_from_exc_message( + result.get("exc_message") + ) + if error_data: + task_index_name = error_data.get('index_name') + task_source = error_data.get('source') or error_data.get('path_or_url') # Match both index name and document path/source if task_index_name == index_name and task_source == path_or_url: @@ -666,13 +654,13 @@ def save_error_info(self, task_id: str, error_reason: str, ttl_days: int = 30) - if not error_reason: logger.error(f"Cannot save error info for task {task_id}: error_reason is empty") return False - + ttl_seconds = ttl_days * 24 * 60 * 60 reason_key = f"error:reason:{task_id}" # Save error reason result = self.client.setex(reason_key, ttl_seconds, error_reason) - + if result: logger.info(f"Successfully saved error info to Redis for task {task_id}, key: {reason_key}") # Verify the save by reading it back @@ -707,13 +695,13 @@ def save_progress_info(self, task_id: str, processed_chunks: int, total_chunks: if not task_id: logger.error("Cannot save progress info: task_id is empty") return False - + progress_key = f"progress:{task_id}" progress_data = { 'processed_chunks': processed_chunks, 'total_chunks': total_chunks } - + ttl_seconds = ttl_hours * 3600 progress_json = json.dumps(progress_data) self.client.setex( @@ -728,6 +716,122 @@ def save_progress_info(self, task_id: str, processed_chunks: int, total_chunks: logger.error(f"Failed to save progress info for task {task_id}: {str(e)}") return False + def increment_progress_info(self, task_id: str, delta_processed: int, total_chunks: Optional[int] = None, ttl_hours: int = 24) -> bool: + """ + Atomically increment processed chunks for a task. + """ + if not task_id: + logger.error("Cannot increment progress info: task_id is empty") + return False + if delta_processed <= 0: + return True + + progress_key = f"progress:{task_id}" + ttl_seconds = ttl_hours * 3600 + max_retries = 5 + + for attempt in range(max_retries): + pipe = self.client.pipeline() + try: + pipe.watch(progress_key) + raw = pipe.get(progress_key) + current_processed, current_total = self._parse_progress(raw, total_chunks) + new_processed, current_total = self._compute_next_progress( + current_processed=current_processed, + delta_processed=delta_processed, + current_total=current_total, + total_chunks=total_chunks, + ) + + payload = json.dumps({ + "processed_chunks": new_processed, + "total_chunks": current_total, + }) + + pipe.multi() + pipe.setex(progress_key, ttl_seconds, payload) + pipe.execute() + logger.info( + f"[REDIS PROGRESS] Incremented progress for task {task_id}: " + f"+{delta_processed}, now {new_processed}/{current_total}" + ) + return True + except redis.WatchError: + continue + except Exception as exc: + logger.warning(f"Failed to increment progress for task {task_id}: {exc}") + return False + finally: + pipe.reset() + + logger.warning(f"Failed to increment progress for task {task_id}: too many concurrent updates") + return False + + def _parse_progress(self, raw: Any, total_chunks: Optional[int]) -> Tuple[int, int]: + """ + Parse persisted progress payload from Redis with tolerant fallback. + """ + default_total = int(total_chunks or 0) + if not raw: + return 0, default_total + + if isinstance(raw, bytes): + raw = raw.decode("utf-8") + + try: + data = json.loads(raw) + processed = int(data.get("processed_chunks", 0) or 0) + total = default_total if total_chunks else int(data.get("total_chunks", 0) or 0) + return processed, total + except Exception: + return 0, default_total + + def _compute_next_progress( + self, + current_processed: int, + delta_processed: int, + current_total: int, + total_chunks: Optional[int], + ) -> Tuple[int, int]: + """ + Compute new processed/total values, clamping to known total when available. + """ + next_processed = current_processed + int(delta_processed) + next_total = int(current_total or 0) + + if next_total <= 0 and total_chunks: + next_total = int(total_chunks) + + if next_total > 0: + next_processed = min(next_processed, next_total) + + return next_processed, next_total + + def _extract_error_metadata_from_exc_message(self, exc_message: Any) -> Optional[Dict[str, Any]]: + """ + Try to parse embedded JSON metadata from exception message with tolerant escaping. + """ + try: + exc_str = str(exc_message or "") + if "{" not in exc_str or "}" not in exc_str: + return None + json_part = exc_str[exc_str.find("{"): exc_str.rfind("}") + 1] + candidates = [ + json_part, + json_part.replace('\\"', '"'), + re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', json_part), + ] + for candidate in candidates: + try: + parsed = json.loads(candidate) + if isinstance(parsed, dict): + return parsed + except Exception: + continue + return None + except Exception: + return None + def get_progress_info(self, task_id: str) -> Optional[Dict[str, int]]: """ Get progress information for a specific task @@ -770,6 +874,79 @@ def get_error_info(self, task_id: str) -> Optional[str]: f"Failed to get error info for task {task_id}: {str(e)}") return None + def batch_get_progress_info(self, task_ids: List[str]) -> Dict[str, Optional[Dict[str, int]]]: + """ + Batch get progress information for multiple tasks in a single Redis call. + + Args: + task_ids: List of Celery task IDs + + Returns: + Dict mapping task_id to progress info dict, or None if not found + """ + if not task_ids: + return {} + + try: + # Build list of keys + progress_keys = [f"progress:{tid}" for tid in task_ids] + # Use pipeline for batch operation + pipe = self.client.pipeline() + for key in progress_keys: + pipe.get(key) + results = pipe.execute() + + # Build result dict + result = {} + for i, task_id in enumerate(task_ids): + progress_data = results[i] + if progress_data: + try: + if isinstance(progress_data, bytes): + progress_data = progress_data.decode('utf-8') + result[task_id] = json.loads(progress_data) + except (json.JSONDecodeError, TypeError): + result[task_id] = None + else: + result[task_id] = None + return result + except Exception as e: + logger.warning(f"Failed to batch get progress info: {str(e)}") + return {tid: None for tid in task_ids} + + def batch_get_error_info(self, task_ids: List[str]) -> Dict[str, Optional[str]]: + """ + Batch get error information for multiple tasks in a single Redis call. + + Args: + task_ids: List of Celery task IDs + + Returns: + Dict mapping task_id to error reason string, or None if not found + """ + if not task_ids: + return {} + + try: + # Build list of keys + error_keys = [f"error:reason:{tid}" for tid in task_ids] + # Use pipeline for batch operation + pipe = self.client.pipeline() + for key in error_keys: + pipe.get(key) + results = pipe.execute() + + # Build result dict + result = {} + for i, task_id in enumerate(task_ids): + reason = results[i] + # With decode_responses=True, reason is already a string + result[task_id] = reason if reason else None + return result + except Exception as e: + logger.warning(f"Failed to batch get error info: {str(e)}") + return {tid: None for tid in task_ids} + # Global Redis service instance _redis_service = None diff --git a/backend/services/remote_mcp_service.py b/backend/services/remote_mcp_service.py index ab0f0b04f..7e77a9c43 100644 --- a/backend/services/remote_mcp_service.py +++ b/backend/services/remote_mcp_service.py @@ -1,50 +1,78 @@ import logging import os import tempfile - +import asyncio +import socket +import random from fastmcp import Client from fastmcp.client.transports import StreamableHttpTransport, SSETransport - -from consts.const import CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ -from consts.exceptions import MCPConnectionError, MCPNameIllegal +from consts.const import CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ, NEXENT_MCP_DOCKER_IMAGE +from consts.exceptions import ( + MCPConnectionError, + MCPNameIllegal, + MCPContainerError, + McpNotFoundError, + McpValidationError, + McpNameConflictError, + McpPortConflictError, +) +from consts.model import MCPConfigRequest from database.remote_mcp_db import ( create_mcp_record, - delete_mcp_record_by_name_and_url, delete_mcp_record_by_container_id, get_mcp_records_by_tenant, check_mcp_name_exists, + check_enabled_mcp_name_exists, update_mcp_status_by_name_and_url, update_mcp_record_by_name_and_url, + update_mcp_record_manage_fields_by_id, + update_mcp_record_enabled_by_id, + update_mcp_record_container_fields_by_id, + update_mcp_record_status_by_id, + delete_mcp_record_by_id, get_mcp_authorization_token_by_name_and_url, get_mcp_record_by_id_and_tenant, + get_mcp_custom_headers_by_name_and_url, ) from database.user_tenant_db import get_user_tenant_by_user_id from services.mcp_container_service import MCPContainerManager +from utils.http_client_utils import create_httpx_client logger = logging.getLogger("remote_mcp_service") -async def mcp_server_health(remote_mcp_server: str, authorization_token: str | None = None) -> bool: +# --------------------------------------------------------------------------- +# Health Check +# --------------------------------------------------------------------------- + +async def mcp_server_health(remote_mcp_server: str, authorization_token: str | None = None, custom_headers: dict | None = None) -> bool: + """Check if an MCP server is healthy and reachable.""" try: - # Select transport based on URL ending url_stripped = remote_mcp_server.strip() - headers = {"Authorization": authorization_token} if authorization_token else {} + headers = {} + if authorization_token: + headers["Authorization"] = authorization_token + if custom_headers: + headers.update(custom_headers) if url_stripped.endswith("/sse"): transport = SSETransport( url=url_stripped, - headers=headers + headers=headers, + httpx_client_factory=create_httpx_client ) elif url_stripped.endswith("/mcp"): transport = StreamableHttpTransport( url=url_stripped, - headers=headers + headers=headers, + httpx_client_factory=create_httpx_client ) else: # Default to StreamableHttpTransport for unrecognized formats transport = StreamableHttpTransport( url=url_stripped, - headers=headers + headers=headers, + httpx_client_factory=create_httpx_client ) client = Client(transport=transport) @@ -52,11 +80,99 @@ async def mcp_server_health(remote_mcp_server: str, authorization_token: str | N connected = client.is_connected() return connected except BaseException as e: - logger.error( - f"Remote MCP server health check failed: {e}", exc_info=True) - # Prevent library-level exits (e.g., SystemExit) from crashing the service - raise MCPConnectionError("MCP connection failed") + logger.error(f"Remote MCP server health check failed: {e}", exc_info=True) + error_message = str(e).strip() or repr(e) + if isinstance(e, (asyncio.TimeoutError, TimeoutError)) or "timeout" in error_message.lower(): + raise MCPConnectionError("MCP_HEALTH_TIMEOUT") + raise MCPConnectionError(error_message) + + +# --------------------------------------------------------------------------- +# Helper Functions +# --------------------------------------------------------------------------- + +def _is_container_record(record: dict | None) -> bool: + """Check if the MCP record is container-based. + + A record is considered container-based if it has: + - container_id (Docker container ID) + - config_json (container configuration) + """ + if not record: + return False + return record.get("container_id") is not None or record.get("config_json") is not None + + +# --------------------------------------------------------------------------- +# Port Management Functions +# --------------------------------------------------------------------------- + +def check_container_port_conflict_records(port: int) -> bool: + """Check if there are enabled MCP records that already use the given container port.""" + from database.remote_mcp_db import get_mcp_records_by_container_port + return not get_mcp_records_by_container_port(container_port=port) + +def check_runtime_host_port_available(port: int) -> bool: + """Return True when the host port is not occupied by a listener.""" + probe_targets = [(socket.AF_INET, "127.0.0.1")] + if socket.has_ipv6: + probe_targets.append((socket.AF_INET6, "::1")) + + try: + host_infos = socket.getaddrinfo("host.docker.internal", port, socket.AF_UNSPEC, socket.SOCK_STREAM) + for family, _, _, _, sockaddr in host_infos: + probe_targets.append((family, sockaddr[0])) + except OSError: + pass + + for family, host in probe_targets: + try: + with socket.socket(family, socket.SOCK_STREAM) as probe_socket: + probe_socket.settimeout(0.2) + connect_result = probe_socket.connect_ex((host, port) if family == socket.AF_INET else (host, port, 0, 0)) + if connect_result == 0: + logger.info(f"Host port {port} is already in use on {host}") + return False + except OSError: + continue + + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as bind_probe: + if hasattr(socket, "SO_EXCLUSIVEADDRUSE"): + bind_probe.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) + else: + bind_probe.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 0) + bind_probe.bind(("0.0.0.0", port)) + bind_probe.listen(1) + return True + except OSError as exc: + logger.info(f"Host port {port} is already in use: {exc}") + return False + + +def check_container_port_conflict(*, port: int) -> bool: + """Check if a port is available for MCP container.""" + no_conflict_records = check_container_port_conflict_records(port=port) + runtime_available = check_runtime_host_port_available(port) + return no_conflict_records and runtime_available + + +def suggest_container_port() -> int: + """Suggest an available port for MCP container.""" + min_port = 2000 + max_port = 50000 + count = 0 + while count < 1000: + port = random.randint(min_port, max_port) + if check_container_port_conflict(port=port): + return port + count += 1 + raise McpPortConflictError("No available port found") + +# --------------------------------------------------------------------------- +# Add Functions +# --------------------------------------------------------------------------- async def add_remote_mcp_server_list( tenant_id: str, @@ -65,48 +181,233 @@ async def add_remote_mcp_server_list( remote_mcp_server_name: str, container_id: str | None = None, authorization_token: str | None = None, + custom_headers: dict | None = None, + source: str | None = "local", + container_port: int | None = None, ): + """Add a remote MCP server to the list. - # check if MCP name already exists + Args: + tenant_id: Tenant ID + user_id: User ID + remote_mcp_server: MCP server URL + remote_mcp_server_name: MCP service name + container_id: Docker container ID (optional) + authorization_token: Authorization token (optional) + custom_headers: Custom HTTP headers (optional) + + Raises: + MCPNameIllegal: If MCP name already exists + MCPConnectionError: If MCP server is not reachable + """ if check_mcp_name_exists(mcp_name=remote_mcp_server_name, tenant_id=tenant_id): - logger.error( - f"MCP name already exists, tenant_id: {tenant_id}, remote_mcp_server_name: {remote_mcp_server_name}") + logger.error(f"MCP name already exists: {remote_mcp_server_name}") raise MCPNameIllegal("MCP name already exists") - # check if the address is available - if not await mcp_server_health(remote_mcp_server=remote_mcp_server, authorization_token=authorization_token): + if not await mcp_server_health(remote_mcp_server=remote_mcp_server, authorization_token=authorization_token, custom_headers=custom_headers): raise MCPConnectionError("MCP connection failed") - # update the PG database record insert_mcp_data = { "mcp_name": remote_mcp_server_name, "mcp_server": remote_mcp_server, "status": True, "container_id": container_id, "authorization_token": authorization_token, + "custom_headers": custom_headers, + "source": source, + "container_port": container_port, } - create_mcp_record(mcp_data=insert_mcp_data, - tenant_id=tenant_id, user_id=user_id) + create_mcp_record(mcp_data=insert_mcp_data, tenant_id=tenant_id, user_id=user_id) -async def delete_remote_mcp_server_list(tenant_id: str, - user_id: str, - remote_mcp_server: str, - remote_mcp_server_name: str): - # delete the record in the PG database - delete_mcp_record_by_name_and_url(mcp_name=remote_mcp_server_name, - mcp_server=remote_mcp_server, - tenant_id=tenant_id, - user_id=user_id) +async def add_mcp_service( + *, + tenant_id: str, + user_id: str, + name: str, + description: str | None, + source: str, + server_url: str, + tags: list | None, + authorization_token: str | None, + custom_headers: dict | None = None, + container_config: dict | None, + registry_json: dict | None, + enabled: bool = False, + container_id: str | None = None, + container_port: int | None = None, +) -> None: + """Add an MCP service record. + Args: + tenant_id: Tenant ID + user_id: User ID + name: MCP service name + description: MCP service description + source: Source type (local/mcp_registry/community) + server_url: MCP server URL + tags: MCP tags + authorization_token: Authorization token for MCP server + custom_headers: Custom HTTP headers + container_config: Container configuration + registry_json: Registry metadata JSON + enabled: Whether the MCP is enabled + container_id: Docker container ID + container_port: Container port + """ + status: bool | None = None + normalized_container_id = container_id if isinstance(container_id, str) and container_id else None + is_container = container_id is not None or container_config is not None + config_json = container_config if is_container and isinstance(container_config, dict) else None + + if enabled: + if check_mcp_name_exists(mcp_name=name, tenant_id=tenant_id): + logger.error(f"MCP name already exists: {name}") + raise MCPNameIllegal("MCP name already exists") -async def update_remote_mcp_server_list( - update_data, + if not await mcp_server_health(remote_mcp_server=server_url, authorization_token=authorization_token, custom_headers=custom_headers): + raise MCPConnectionError("MCP connection failed") + + status = True + + create_mcp_record( + mcp_data={ + "mcp_name": name, + "mcp_server": server_url, + "status": status, + "container_id": normalized_container_id, + "container_port": container_port, + "authorization_token": authorization_token, + "custom_headers": custom_headers, + "source": source, + "registry_json": registry_json, + "enabled": enabled, + "tags": tags, + "description": description, + "config_json": config_json, + }, + tenant_id=tenant_id, + user_id=user_id, + ) + + +async def add_container_mcp_service( + *, tenant_id: str, user_id: str, -): + name: str, + description: str | None, + source: str, + tags: list | None, + authorization_token: str | None, + registry_json: dict | None, + port: int, + mcp_config: MCPConfigRequest, +) -> dict: + """Add a container-based MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + name: MCP service name + description: MCP service description + source: Source type + tags: MCP tags + authorization_token: Authorization token + registry_json: Registry metadata JSON + port: Host port for the container + mcp_config: MCP server configuration + + Returns: + Container information dictionary """ - Update an existing remote MCP server record. + service_name = name + if check_mcp_name_exists(mcp_name=service_name, tenant_id=tenant_id): + raise McpNameConflictError("Enabled MCP name already exists") + + if not check_container_port_conflict(port=port): + raise McpPortConflictError(f"Port {port} is already in use") + + servers = mcp_config.mcpServers + if len(servers) != 1: + raise McpValidationError("Exactly one mcpServers entry is required") + + _, config = next(iter(servers.items())) + command = config.command + if not command: + raise McpValidationError("command is required") + if command.strip().lower() == "docker": + raise McpValidationError("Docker command is not supported") + + env_vars = dict(config.env or {}) + auth_token = authorization_token + if auth_token: + env_vars["authorization_token"] = auth_token + + full_command = [ + "python", + "-m", + "mcp_proxy", + "--host", + "0.0.0.0", + "--port", + str(port), + "--transport", + "streamablehttp", + "--", + command, + *(config.args or []), + ] + + container_manager = MCPContainerManager() + try: + container_info = await container_manager.start_mcp_container( + service_name=service_name, + tenant_id=tenant_id, + user_id=user_id, + env_vars=env_vars, + host_port=port, + image=NEXENT_MCP_DOCKER_IMAGE, + full_command=full_command, + ) + logger.info(f"Started MCP container with info: {container_info}") + + container_config = mcp_config.model_dump(exclude_none=True) + + await add_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + name=service_name, + description=description, + source=source, + server_url=container_info.get("mcp_url"), + tags=tags, + authorization_token=auth_token, + container_config=container_config, + registry_json=registry_json, + enabled=True, + container_id=container_info.get("container_id"), + container_port=container_info.get("host_port"), + ) + except Exception as exc: + logger.warning(f"Failed to start container MCP service: {exc}") + raise + + return { + "service_name": service_name, + "mcp_url": container_info.get("mcp_url"), + "container_id": container_info.get("container_id"), + "container_name": container_info.get("container_name"), + "host_port": container_info.get("host_port"), + } + + +# --------------------------------------------------------------------------- +# Update Functions +# --------------------------------------------------------------------------- + +async def update_remote_mcp_server_list(update_data, tenant_id: str, user_id: str) -> None: + """Update an existing remote MCP server record. Args: update_data: MCPUpdateRequest containing current and new values @@ -114,40 +415,31 @@ async def update_remote_mcp_server_list( user_id: User ID Raises: - MCPNameIllegal: If the new MCP name already exists (and is different from current) + MCPNameIllegal: If the new MCP name already exists MCPConnectionError: If the new MCP server URL is not accessible """ - # Check if the current record exists by verifying the name exists for this tenant if not check_mcp_name_exists(mcp_name=update_data.current_service_name, tenant_id=tenant_id): - logger.error( - f"MCP name does not exist, tenant_id: {tenant_id}, current_mcp_server_name: {update_data.current_service_name}") raise MCPNameIllegal("MCP name does not exist") - # If the new name is different from the current name, check if it already exists if update_data.new_service_name != update_data.current_service_name: if check_mcp_name_exists(mcp_name=update_data.new_service_name, tenant_id=tenant_id): - logger.error( - f"New MCP name already exists, tenant_id: {tenant_id}, new_mcp_server_name: {update_data.new_service_name}") raise MCPNameIllegal("New MCP name already exists") - # User authorization token authorization_token = update_data.new_authorization_token + custom_headers = getattr(update_data, 'custom_headers', None) - # Check if the new server URL is accessible try: status = await mcp_server_health( remote_mcp_server=update_data.new_mcp_url, - authorization_token=authorization_token + authorization_token=authorization_token, + custom_headers=custom_headers, ) except BaseException: status = False if not status: - logger.error( - f"New MCP server health check failed: {update_data.new_mcp_url}") raise MCPConnectionError("New MCP server connection failed") - # Update the database record update_mcp_record_by_name_and_url( update_data=update_data, tenant_id=tenant_id, @@ -156,7 +448,309 @@ async def update_remote_mcp_server_list( ) -async def get_remote_mcp_server_list(tenant_id: str, user_id: str | None = None, is_need_auth: bool = True) -> list[dict]: +def update_mcp_service( + *, + tenant_id: str, + user_id: str, + mcp_id: int, + new_name: str, + description: str | None, + server_url: str, + authorization_token: str | None, + custom_headers: dict | None, + tags: list | None, +) -> None: + """Update an MCP service record by ID. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + new_name: New MCP service name + description: MCP service description + server_url: New MCP server URL + authorization_token: Authorization token + custom_headers: Custom HTTP headers + tags: MCP tags + + Raises: + McpNotFoundError: If MCP record is not found + """ + current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not current_record: + raise McpNotFoundError("MCP record not found") + + is_container = _is_container_record(current_record) + config_json = None + if is_container: + config_json = current_record.get("config_json") if isinstance(current_record.get("config_json"), dict) else None + + update_mcp_record_manage_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + name=new_name, + description=description, + server_url=server_url, + source=(current_record.get("source") or "local"), + authorization_token=authorization_token, + custom_headers=custom_headers, + config_json=config_json, + tags=tags, + ) + + +async def update_mcp_service_enabled( + *, + tenant_id: str, + user_id: str, + mcp_id: int, + enabled: bool, +) -> None: + """Enable or disable an MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + enabled: True to enable, False to disable + + Raises: + McpNotFoundError: If MCP record is not found + McpNameConflictError: If an enabled service with the same name exists + McpPortConflictError: If the container port is not available + MCPConnectionError: If MCP connection fails + """ + current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not current_record: + raise McpNotFoundError("MCP record not found") + + if enabled: + current_name = current_record.get("mcp_name") + if current_name: + records = get_mcp_records_by_tenant(tenant_id=tenant_id) + for record in records: + if int(record.get("mcp_id") or 0) == mcp_id: + continue + record_name = record.get("mcp_name") + is_enabled = bool(record.get("enabled")) + if is_enabled and record_name == current_name: + raise McpNameConflictError("An enabled service already uses this name") + + authorization_token = current_record.get("authorization_token") + custom_headers = current_record.get("custom_headers") if isinstance(current_record.get("custom_headers"), dict) else None + + if _is_container_record(current_record): + if enabled: + port = current_record.get("container_port") + if port is None: + raise McpValidationError("Container port is missing, cannot rebuild container") + if not check_runtime_host_port_available(port): + raise McpPortConflictError(f"Port {port} is already in use") + + config_json = current_record.get("config_json") + if not isinstance(config_json, dict): + raise McpValidationError("Container configuration is missing, cannot rebuild container") + + try: + mcp_config = MCPConfigRequest(**config_json) + except Exception as exc: + raise McpValidationError(f"Invalid container configuration: {exc}") + + servers = mcp_config.mcpServers + if not servers or len(servers) != 1: + raise McpValidationError("Exactly one mcpServers entry is required") + _, config = next(iter(servers.items())) + command = config.command + if not command: + raise McpValidationError("command is required") + + env_vars = dict(config.env or {}) + if authorization_token: + env_vars["authorization_token"] = authorization_token + + full_command = [ + "python", + "-m", + "mcp_proxy", + "--host", + "0.0.0.0", + "--port", + str(port), + "--transport", + "streamablehttp", + "--", + command, + *(config.args or []), + ] + + container_manager = MCPContainerManager() + container_info = await container_manager.start_mcp_container( + service_name=current_record.get("mcp_name"), + tenant_id=tenant_id, + user_id=user_id, + env_vars=env_vars, + host_port=port, + image=NEXENT_MCP_DOCKER_IMAGE, + full_command=full_command, + ) + + next_server_url = container_info.get("mcp_url") + next_container_id = container_info.get("container_id") + next_container_port = container_info.get("host_port") or port + + health_ok = False + MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS = 10 + MCP_CONTAINER_HEALTH_CHECK_DELAY_SECONDS = 0.5 + for attempt in range(MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS): + try: + health_ok = await mcp_server_health( + remote_mcp_server=next_server_url, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + except MCPConnectionError: + health_ok = False + if health_ok: + break + if attempt < MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS - 1: + await asyncio.sleep(MCP_CONTAINER_HEALTH_CHECK_DELAY_SECONDS) + + if not health_ok: + if next_container_id: + try: + await MCPContainerManager().stop_mcp_container(next_container_id) + except Exception as exc: + logger.warning(f"Failed to stop unhealthy container {next_container_id}: {exc}") + update_mcp_record_container_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + container_id=None, + container_port=port, + mcp_server=next_server_url, + status=False, + ) + raise MCPConnectionError("MCP connection failed") + + update_mcp_record_container_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + container_id=next_container_id, + container_port=next_container_port, + mcp_server=next_server_url, + status=True, + ) + else: + current_container_id = current_record.get("container_id") + if current_container_id and current_record.get("config_json"): + try: + manager = MCPContainerManager() + await manager.stop_mcp_container(current_container_id) + except Exception as exc: + logger.warning(f"Failed to stop container {current_container_id}: {exc}") + update_mcp_record_container_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + container_id=None, + container_port=current_record.get("container_port"), + mcp_server=current_record.get("mcp_server"), + status=None, + ) + elif enabled: + server_url = current_record.get("mcp_server") + health_ok = await mcp_server_health( + remote_mcp_server=server_url, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + update_mcp_record_status_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + status=bool(health_ok), + ) + if not health_ok: + raise MCPConnectionError("MCP connection failed") + + update_mcp_record_enabled_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + enabled=enabled, + ) + + +# --------------------------------------------------------------------------- +# Delete Functions +# --------------------------------------------------------------------------- + +async def delete_mcp_service( + *, + tenant_id: str, + user_id: str, + mcp_id: int, +) -> None: + """Delete an MCP service by ID. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + + Raises: + McpNotFoundError: If MCP record is not found + """ + current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not current_record: + raise McpNotFoundError("MCP record not found") + container_id = current_record.get("container_id") + if container_id: + try: + manager = MCPContainerManager() + await manager.stop_mcp_container(container_id=container_id) + except Exception as exc: + logger.warning(f"Failed to stop container: {exc}, but continue to delete MCP record") + + delete_mcp_record_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + +async def delete_mcp_by_container_id(tenant_id: str, user_id: str, container_id: str) -> None: + """Soft delete MCP record associated with a specific container ID.""" + delete_mcp_record_by_container_id( + container_id=container_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + +# --------------------------------------------------------------------------- +# List Functions +# --------------------------------------------------------------------------- + +async def get_remote_mcp_server_list( + tenant_id: str, + user_id: str | None = None, + is_need_auth: bool = True, +) -> list[dict]: + """Get list of remote MCP servers with full details. + + Args: + tenant_id: Tenant ID + user_id: User ID for permission checking + is_need_auth: Whether to include authorization tokens + + Returns: + List of MCP server records with all fields including container_id, description, + enabled, source, update_time, tags, container_port, registry_json, config_json, + container_status, and authorization_token + """ mcp_records = get_mcp_records_by_tenant(tenant_id=tenant_id) mcp_records_list = [] can_edit_all = False @@ -165,23 +759,60 @@ async def get_remote_mcp_server_list(tenant_id: str, user_id: str | None = None, user_role = str(user_tenant_record.get("user_role") or "").upper() can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES + container_status_map = {} + try: + manager = MCPContainerManager() + for container in manager.list_mcp_containers(tenant_id=tenant_id): + container_id = container.get("container_id") + status = container.get("status") + if not container_id: + continue + if status == "running": + container_status_map[container_id] = "running" + elif status: + container_status_map[container_id] = "stopped" + except Exception as exc: + logger.warning(f"Failed to load container runtime status: {exc}") + for record in mcp_records: created_by = record.get("created_by") or record.get("user_id") if user_id is None: permission = PERMISSION_READ else: - permission = PERMISSION_EDIT if can_edit_all or str( - created_by) == str(user_id) else PERMISSION_READ + permission = PERMISSION_EDIT if can_edit_all or str(created_by) == str(user_id) else PERMISSION_READ + + config_json = record.get("config_json") + container_id = record.get("container_id") + + is_container = container_id is not None or config_json is not None + + container_status = None + if is_container: + if container_id: + container_status = container_status_map.get(container_id, "stopped") + else: + container_status = "stopped" record_dict = { "remote_mcp_server_name": record["mcp_name"], "remote_mcp_server": record["mcp_server"], - "status": record["status"], + "status": record.get("status"), "permission": permission, "mcp_id": record.get("mcp_id"), + "container_id": container_id, + "description": record.get("description"), + "enabled": record.get("enabled"), + "source": record.get("source"), + "update_time": record.get("update_time"), + "tags": record.get("tags") or [], + "container_port": record.get("container_port"), + "registry_json": record.get("registry_json"), + "config_json": record.get("config_json"), + "container_status": container_status, } if is_need_auth: record_dict["authorization_token"] = record.get("authorization_token") + record_dict["custom_headers"] = record.get("custom_headers") mcp_records_list.append(record_dict) return mcp_records_list @@ -192,13 +823,15 @@ def attach_mcp_container_permissions( tenant_id: str, user_id: str | None = None, ) -> list[dict]: - """ - Attach permission (EDIT/READ) to each MCP container entry. + """Attach permission (EDIT/READ) to each MCP container entry. + + Args: + containers: List of container records + tenant_id: Tenant ID + user_id: User ID for permission checking - Rules: - - If user's role is in CAN_EDIT_ALL_USER_ROLES => EDIT for all containers - - Otherwise => EDIT only if the container is associated with an MCP record created by this user - - If association cannot be determined => default to READ + Returns: + List of containers with permission field added """ if not containers: return [] @@ -208,19 +841,17 @@ def attach_mcp_container_permissions( user_role = str(user_tenant_record.get("user_role") or "").upper() can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES - created_by_by_container_id: dict[str, str] = {} + created_by_by_container_id = {} try: for record in get_mcp_records_by_tenant(tenant_id=tenant_id) or []: cid = record.get("container_id") if not cid: continue - created_by_by_container_id[str(cid)] = str( - record.get("created_by") or record.get("user_id") or "" - ) + created_by_by_container_id[str(cid)] = str(record.get("created_by") or record.get("user_id") or "") except Exception as e: logger.warning(f"Failed to load MCP records for permission mapping: {e}") - enriched: list[dict] = [] + enriched = [] for container in containers: container_id = str(container.get("container_id") or "") created_by = created_by_by_container_id.get(container_id, "") @@ -228,77 +859,196 @@ def attach_mcp_container_permissions( if user_id is None: permission = PERMISSION_READ else: - permission = PERMISSION_EDIT if can_edit_all or ( - created_by and str(created_by) == str(user_id) - ) else PERMISSION_READ + permission = PERMISSION_EDIT if can_edit_all or (created_by and str(created_by) == str(user_id)) else PERMISSION_READ enriched.append({**container, "permission": permission}) return enriched -async def check_mcp_health_and_update_db(mcp_url, service_name, tenant_id, user_id): - # Get authorization token from database +async def get_mcp_record_by_id(mcp_id: int, tenant_id: str) -> dict | None: + """Get MCP record by ID. + + Args: + mcp_id: MCP record ID + tenant_id: Tenant ID + + Returns: + Dictionary containing mcp_name, mcp_server, authorization_token, and custom_headers, or None if not found + """ + mcp_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not mcp_record: + return None + + return { + "mcp_name": mcp_record.get("mcp_name"), + "mcp_server": mcp_record.get("mcp_server"), + "authorization_token": mcp_record.get("authorization_token"), + "custom_headers": mcp_record.get("custom_headers"), + } + + +# --------------------------------------------------------------------------- +# Health Check Functions +# --------------------------------------------------------------------------- + +async def check_mcp_health_and_update_db(mcp_url, service_name, tenant_id, user_id) -> None: + """Check MCP health and update database status. + + Args: + mcp_url: MCP server URL + service_name: MCP service name + tenant_id: Tenant ID + user_id: User ID + + Raises: + MCPConnectionError: If MCP connection fails + """ authorization_token = get_mcp_authorization_token_by_name_and_url( mcp_name=service_name, mcp_server=mcp_url, tenant_id=tenant_id ) + custom_headers = get_mcp_custom_headers_by_name_and_url( + mcp_name=service_name, + mcp_server=mcp_url, + tenant_id=tenant_id + ) - # check the health of the MCP server try: status = await mcp_server_health( remote_mcp_server=mcp_url, - authorization_token=authorization_token + authorization_token=authorization_token, + custom_headers=custom_headers, ) except BaseException: status = False - # update the status of the MCP server in the database + update_mcp_status_by_name_and_url( mcp_name=service_name, mcp_server=mcp_url, tenant_id=tenant_id, user_id=user_id, - status=status) + status=status + ) if not status: raise MCPConnectionError("MCP connection failed") -async def delete_mcp_by_container_id(tenant_id: str, user_id: str, container_id: str): - """ - Soft delete MCP record associated with a specific container ID. +async def check_mcp_service_health( + *, + tenant_id: str, + user_id: str, + mcp_id: int, +) -> str: + """Check MCP service health by ID. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + + Returns: + "healthy" if MCP is reachable - This is used when stopping a containerized MCP so that the MCP record and - its container are removed together. + Raises: + McpNotFoundError: If MCP record is not found + McpValidationError: If MCP server URL is empty + MCPConnectionError: If MCP connection fails """ - delete_mcp_record_by_container_id( - container_id=container_id, + record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not record: + raise McpNotFoundError("MCP record not found") + + server_url = record.get("mcp_server") + if not server_url: + raise McpValidationError("MCP server URL is empty") + + authorization_token = record.get("authorization_token") + custom_headers = record.get("custom_headers") + + try: + status = await mcp_server_health( + remote_mcp_server=server_url, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + except MCPConnectionError: + update_mcp_record_status_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + status=False, + ) + raise + except Exception as exc: + logger.error(f"MCP health check failed: {exc}") + update_mcp_record_status_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + status=False, + ) + raise MCPConnectionError(str(exc) or "MCP connection failed") + + update_mcp_record_status_by_id( + mcp_id=mcp_id, tenant_id=tenant_id, user_id=user_id, + status=status, ) + if not status: + raise MCPConnectionError("MCP connection failed") + + return "healthy" -async def get_mcp_record_by_id(mcp_id: int, tenant_id: str) -> dict | None: - """ - Get MCP record by ID + +# --------------------------------------------------------------------------- +# Tool Functions +# --------------------------------------------------------------------------- + +async def list_mcp_service_tools_by_id(*, tenant_id: str, mcp_id: int) -> list[dict]: + """Get tools from an MCP service by ID. Args: - mcp_id: MCP record ID tenant_id: Tenant ID + mcp_id: MCP record ID Returns: - Dictionary containing mcp_name, mcp_server, and authorization_token, or None if not found + List of tool dictionaries + + Raises: + McpNotFoundError: If MCP record is not found + McpValidationError: If MCP record is missing connection fields + MCPConnectionError: If MCP connection fails """ - mcp_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) - if not mcp_record: - return None + record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not record: + raise McpNotFoundError("MCP record not found") - return { - "mcp_name": mcp_record.get("mcp_name"), - "mcp_server": mcp_record.get("mcp_server"), - "authorization_token": mcp_record.get("authorization_token"), - } + service_name = record.get("mcp_name") + server_url = record.get("mcp_server") + if not service_name or not server_url: + raise McpValidationError("MCP record is missing runtime connection fields") + authorization_token = record.get("authorization_token") + custom_headers = record.get("custom_headers") + + from services.tool_configuration_service import get_tool_from_remote_mcp_server + tools_info = await get_tool_from_remote_mcp_server( + mcp_server_name=service_name, + remote_mcp_server=server_url, + tenant_id=tenant_id, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + return [tool.__dict__ for tool in tools_info] + + +# --------------------------------------------------------------------------- +# Image Upload Functions +# --------------------------------------------------------------------------- async def upload_and_start_mcp_image( tenant_id: str, @@ -308,69 +1058,56 @@ async def upload_and_start_mcp_image( port: int, service_name: str | None = None, env_vars: str | None = None, -): - """ - Upload MCP Docker image and start container. +) -> dict: + """Upload MCP Docker image and start container. Args: - tenant_id: Tenant ID for isolation - user_id: User ID for isolation + tenant_id: Tenant ID + user_id: User ID file_content: Raw file content bytes filename: Original filename port: Host port to expose the MCP server on - service_name: Optional name for the MCP service (auto-generated if not provided) + service_name: Optional name for the MCP service env_vars: Optional environment variables as JSON string Returns: - Dictionary with service details including mcp_url, container_id, etc. + Dictionary with service details Raises: MCPContainerError: If container operations fail MCPNameIllegal: If service name already exists ValueError: If file validation fails """ - # Validate file type if not filename.lower().endswith('.tar'): raise ValueError("Only .tar files are allowed") - # Validate file size (limit to 1GB) file_size = len(file_content) - if file_size > 1024 * 1024 * 1024: # 1GB limit + if file_size > 1024 * 1024 * 1024: raise ValueError("File size exceeds 1GB limit") - # Parse environment variables parsed_env_vars = None if env_vars: + import json try: - import json parsed_env_vars = json.loads(env_vars) if not isinstance(parsed_env_vars, dict): raise ValueError("Environment variables must be a JSON object") except (json.JSONDecodeError, ValueError) as e: raise ValueError(f"Invalid environment variables format: {str(e)}") - # Generate service name if not provided final_service_name = service_name if not final_service_name: - # Remove .tar extension from filename final_service_name = os.path.splitext(filename)[0] - # Check if MCP service name already exists if check_mcp_name_exists(mcp_name=final_service_name, tenant_id=tenant_id): raise MCPNameIllegal("MCP service name already exists") - # Save file to temporary location (delete=False, manual cleanup) with tempfile.NamedTemporaryFile(delete=False, suffix='.tar') as temp_file: temp_file.write(file_content) temp_file_path = temp_file.name try: - # Initialize container manager container_manager = MCPContainerManager() - - # Start container from uploaded image - # Note: uploaded image should be a complete MCP server implementation - # that can be started directly without additional commands (uses image's CMD/ENTRYPOINT) container_info = await container_manager.start_mcp_container_from_tar( tar_file_path=temp_file_path, service_name=final_service_name, @@ -378,22 +1115,18 @@ async def upload_and_start_mcp_image( user_id=user_id, env_vars=parsed_env_vars, host_port=port, - full_command=None, # Uploaded image should contain the MCP server + full_command=None, ) finally: - # Manual cleanup of temporary file try: os.unlink(temp_file_path) except Exception as e: - logger.warning( - f"Failed to clean up temporary file {temp_file_path}: {e}") + logger.warning(f"Failed to clean up temporary file {temp_file_path}: {e}") - # Extract authorization_token from env_vars for database registration authorization_token = None if parsed_env_vars: authorization_token = parsed_env_vars.get("authorization_token") - # Register to remote MCP server list await add_remote_mcp_server_list( tenant_id=tenant_id, user_id=user_id, @@ -401,6 +1134,7 @@ async def upload_and_start_mcp_image( remote_mcp_server_name=final_service_name, container_id=container_info["container_id"], authorization_token=authorization_token, + container_port=port ) return { diff --git a/backend/services/skill_service.py b/backend/services/skill_service.py index cf47b4df4..f5b7d1c7c 100644 --- a/backend/services/skill_service.py +++ b/backend/services/skill_service.py @@ -1,19 +1,32 @@ """Skill management service.""" +import aiofiles +import argparse +import ast +import asyncio +import inspect import io import json import logging import os -from typing import Any, Dict, List, Optional, Union +import uuid +import zipfile +import re +import threading +from typing import Any, Dict, List, Optional, Tuple, Union import yaml from nexent.skills import SkillManager from nexent.skills.skill_loader import SkillLoader -from consts.const import CONTAINER_SKILLS_PATH, ROOT_DIR +from nexent.core.utils.observer import MessageObserver +from nexent.core.agents.agent_model import ModelConfig +from consts.const import CONTAINER_SKILLS_PATH, OFFICIAL_SKILLS_ZIP_PATH, ROOT_DIR from consts.exceptions import SkillException from database import skill_db -from database.db_models import SkillInfo +from agents.skill_creation_agent import create_skill_from_request +from utils.prompt_template_utils import get_skill_creation_simple_prompt_template +from utils.content_classifier_utils import ContentClassifier logger = logging.getLogger(__name__) @@ -246,6 +259,51 @@ def _commented_tree_to_plain(node: Any) -> Any: return node +def _ruamel_tree_to_plain(node: Any) -> Any: + """Convert ruamel CommentedMap/Seq to plain dict/list with NO comment merging. + + Used for parsing config.yaml into config_values where the value must be clean + (e.g. ``/mnt/nexent`` not ``/mnt/nexent # Initial workspace path``). + """ + from ruamel.yaml.comments import CommentedMap, CommentedSeq + + if isinstance(node, CommentedMap): + return {k: _ruamel_tree_to_plain(v) for k, v in node.items()} + if isinstance(node, CommentedSeq): + return [_ruamel_tree_to_plain(v) for v in node] + return node + + +def _parse_yaml_ruamel_plain(text: str) -> Dict[str, Any]: + """Parse YAML with ruamel round-trip and return plain dict (no comment merging). + + Used for ``config.yaml`` → ``config_values`` where scalar values must be clean. + """ + from ruamel.yaml import YAML + from ruamel.yaml.comments import CommentedMap + + y = YAML(typ="rt") + try: + root = y.load(text) + except Exception as exc: + raise SkillException(f"Invalid YAML in config/config.yaml: {exc}") from exc + if root is None: + return {} + if isinstance(root, CommentedMap): + plain = _ruamel_tree_to_plain(root) + elif isinstance(root, dict): + plain = root + else: + raise SkillException( + "config/config.yaml must contain a JSON or YAML object (mapping), not a list or scalar" + ) + if not isinstance(plain, dict): + raise SkillException( + "config/config.yaml must contain a JSON or YAML object (mapping), not a list or scalar" + ) + return _params_dict_to_storable(plain) + + def _parse_yaml_with_ruamel_merge_eol_comments(text: str) -> Dict[str, Any]: """Parse YAML with ruamel; merge ``#`` into scalar values only (``value # tip`` for the UI). @@ -279,6 +337,189 @@ def _parse_yaml_with_ruamel_merge_eol_comments(text: str) -> Dict[str, Any]: return _params_dict_to_storable(plain) +def _get_skill_inputs_from_code(scripts_dir: str) -> List[Dict[str, Any]]: + """Extract argparse parameters from skill scripts using AST analysis. + + Walks every ``scripts/*.py`` file (skipping ``_*.py``) and uses AST to find + all ``parser.add_argument(...)`` calls anywhere in the file, including inside + function bodies and ``if __name__ == "__main__":`` blocks. + + Mirrors ``get_local_tools()`` in tool_configuration_service.py. + + Args: + scripts_dir: Absolute path to the skill's ``scripts/`` directory. + + Returns: + List of input parameter dicts with name, type, required, description, default. + """ + inputs: List[Dict[str, Any]] = [] + seen_names: set = set() + + if not os.path.isdir(scripts_dir): + return inputs + + for filename in os.listdir(scripts_dir): + if not filename.endswith(".py") or filename.startswith("_"): + continue + + script_path = os.path.join(scripts_dir, filename) + try: + source = open(script_path, "r", encoding="utf-8").read() + except (OSError, IOError): + continue + + try: + tree = ast.parse(source, filename=filename) + except SyntaxError: + continue + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if not _is_add_argument_call(node): + continue + + parsed = _extract_arg_from_add_argument(node) + if not parsed: + continue + + param_name = parsed["name"] + if param_name in ("help", "h") or param_name in seen_names: + continue + seen_names.add(param_name) + + inputs.append({ + "name": param_name, + "type": parsed["type"], + "required": parsed["required"], + "description_en": parsed.get("description_en", ""), + }) + + return inputs + + +def _is_add_argument_call(node: ast.Call) -> bool: + """Return True if node is a call to ``.add_argument(...)``.""" + if not isinstance(node.func, ast.Attribute): + return False + if node.func.attr != "add_argument": + return False + if isinstance(node.func.value, ast.Name) and node.func.value.id == "parser": + return True + if isinstance(node.func.value, ast.Attribute): + return True + return False + + +def _extract_arg_from_add_argument(node: ast.Call) -> Optional[Dict[str, Any]]: + """Extract parameter metadata from an ``add_argument`` Call AST node.""" + args = node.args + kwargs = {kw.arg: kw.value for kw in node.keywords} + + # Positional arg 0 = name or first positional arg (--name / name) + name_node = args[0] if args else kwargs.get("name") + if name_node is None: + return None + param_name = _ast_literal_eval(name_node) + if not param_name or not isinstance(param_name, str): + return None + + # --name style + if param_name.startswith("--"): + param_name = param_name[2:] + elif param_name.startswith("-"): + param_name = param_name[1:] + + # Determine type + param_type = "string" + type_node = kwargs.get("type") + if type_node is not None: + type_name = _get_type_name(type_node) + if type_name in ("int", "integer"): + param_type = "number" + elif type_name in ("float",): + param_type = "number" + elif type_name in ("bool",): + param_type = "boolean" + + # Description + help_node = kwargs.get("help") + description = "" + if help_node is not None: + val = _ast_literal_eval(help_node) + if isinstance(val, str): + description = val + + # Required / default + required = False + default: Any = None + + if kwargs.get("required") is not None: + req_val = _ast_literal_eval(kwargs["required"]) + if req_val is True: + required = True + + default_node = kwargs.get("default") + if default_node is not None: + default = _ast_literal_eval(default_node) + if default is None or (isinstance(default, str) and default == ""): + required = False + elif not required: + required = False + + return { + "name": param_name, + "type": param_type, + "required": required, + "description_en": description, + } + + +def _get_type_name(node: ast.AST) -> str: + """Get the type name string from a type-related AST node.""" + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + return node.attr + if isinstance(node, ast.Call) and isinstance(node.func, ast.Name): + return node.func.id + if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute): + return node.func.attr + return "" + + +def _ast_literal_eval(node: ast.AST) -> Any: + """Safely evaluate a literal AST node (Name, Constant, Str, Num, etc.) to a Python value.""" + if isinstance(node, (ast.Constant, ast.Num)): + return getattr(node, "value", None) + if isinstance(node, ast.Str): # Python < 3.8 compat + return node.s + if isinstance(node, ast.Name): + name = node.id + if name == "None": + return None + if name == "True": + return True + if name == "False": + return False + return name + if isinstance(node, (ast.List, ast.Tuple)): + elts = [_ast_literal_eval(e) for e in node.elts] + return list(elts) if isinstance(node, ast.List) else tuple(elts) + if isinstance(node, ast.Dict): + return {_ast_literal_eval(k): _ast_literal_eval(v) for k, v in node.keys} + if isinstance(node, ast.UnaryOp) and isinstance(node.op, (ast.UAdd, ast.USub)): + val = _ast_literal_eval(node.operand) + if isinstance(val, (int, float)): + return -val if isinstance(node.op, ast.USub) else val + if isinstance(node, ast.BinOp): + left = _ast_literal_eval(node.left) + right = _ast_literal_eval(node.right) + if isinstance(left, str) and isinstance(right, str) and isinstance(node.op, ast.Add): + return left + right + return None + + def _parse_yaml_fallback_pyyaml(text: str) -> Dict[str, Any]: """Parse YAML with PyYAML (comments are dropped).""" try: @@ -305,7 +546,7 @@ def _parse_skill_params_from_config_bytes(raw: bytes) -> Dict[str, Any]: data = json.loads(text) except json.JSONDecodeError: try: - return _parse_yaml_with_ruamel_merge_eol_comments(text) + return _parse_yaml_ruamel_plain(text) except ImportError: logger.warning("ruamel.yaml not installed; YAML comments will be dropped on parse") return _parse_yaml_fallback_pyyaml(text) @@ -325,6 +566,66 @@ def _parse_skill_params_from_config_bytes(raw: bytes) -> Dict[str, Any]: return _params_dict_to_storable(data) +def _parse_skill_schema_from_yaml_bytes(raw: bytes) -> List[Dict[str, Any]]: + """Parse config/schema.yaml bytes into List[SkillParam]. + + Expected YAML structure: + param_name: + type: string | number | boolean | array | object + required: true | false + description_en: "English description" + description_zh: "Chinese description" + depends_on: other_param_name + + Returns a list of param dicts with name, type, required, description_en, + description_zh, depends_on — matching frontend SkillParam interface. + """ + text = raw.decode("utf-8-sig").strip() + if not text: + logger.warning("[schema] Empty raw bytes for schema.yaml") + return [] + data: Any = None + parse_method = "unknown" + try: + data = json.loads(text) + parse_method = "json" + except json.JSONDecodeError: + try: + data = _parse_yaml_with_ruamel_merge_eol_comments(text) + parse_method = "ruamel" + except ImportError: + data = _parse_yaml_fallback_pyyaml(text) + parse_method = "pyyaml" + except SkillException: + raise + except Exception: + try: + data = _parse_yaml_fallback_pyyaml(text) + parse_method = "pyyaml" + except Exception as exc: + logger.warning("[schema] All YAML parsers failed: %s", exc) + return [] + + if not isinstance(data, dict): + logger.warning("[schema] Parsed data is not a dict (type=%s, parse_method=%s)", type(data).__name__, parse_method) + return [] + + result: List[Dict[str, Any]] = [] + for param_name, meta in data.items(): + if not isinstance(meta, dict): + logger.debug("[schema] Skipping param '%s': meta is not a dict (%s)", param_name, type(meta).__name__) + continue + result.append({ + "name": param_name, + "type": meta.get("type", "string"), + "required": bool(meta.get("required", False)), + "description_en": meta.get("description_en", meta.get("description", "")), + "description_zh": meta.get("description_zh", ""), + "depends_on": meta.get("depends_on"), + }) + return result + + def _read_params_from_zip_config_yaml( zip_bytes: bytes, preferred_skill_root: Optional[str] = None, @@ -346,11 +647,127 @@ def _read_params_from_zip_config_yaml( return params +def _find_zip_member_schema_yaml( + file_list: List[str], + preferred_skill_root: Optional[str] = None, +) -> Optional[str]: + """Return the ZIP entry path for .../config/schema.yaml (any depth; case-insensitive).""" + for entry in file_list: + norm = _normalize_zip_entry_path(entry) + # Match .../config/schema.yaml at any depth + parts = norm.split("/") + if len(parts) >= 2 and parts[-2] == "config" and parts[-1] == "schema.yaml": + logger.debug("[schema] Found schema.yaml via config/ prefix match: %s", entry) + return entry + # Fallback: if preferred_root is given, also check /config/schema.yaml + if preferred_skill_root and norm == f"{preferred_skill_root}/config/schema.yaml": + logger.debug("[schema] Found schema.yaml via preferred_root match: %s", entry) + return entry + logger.debug("[schema] No schema.yaml found in ZIP entries (preferred_root=%s, entry_count=%d)", preferred_skill_root, len(file_list)) + return None + + +def _read_schema_yaml_from_zip( + zip_bytes: bytes, + preferred_skill_root: Optional[str] = None, +) -> Optional[List[Dict[str, Any]]]: + """If the archive contains config/schema.yaml, parse it into List[SkillParam]; else None.""" + import zipfile + + zip_stream = io.BytesIO(zip_bytes) + with zipfile.ZipFile(zip_stream, "r") as zf: + member = _find_zip_member_schema_yaml( + zf.namelist(), + preferred_skill_root=preferred_skill_root, + ) + if not member: + return None + raw = zf.read(member) + parsed = _parse_skill_schema_from_yaml_bytes(raw) + if not parsed: + logger.debug("[schema] Parsed result is empty from ZIP member %s", member) + return parsed + + +def _get_skill_inputs_from_zip( + zip_bytes: bytes, + preferred_skill_root: Optional[str] = None, +) -> List[Dict[str, Any]]: + """Extract argparse parameters from scripts/*.py inside a ZIP archive. + + Mirrors ``_get_skill_inputs_from_code`` but reads from ZIP bytes instead of filesystem. + + Args: + zip_bytes: ZIP archive content. + preferred_skill_root: Preferred folder name inside ZIP containing scripts/. + + Returns: + List of input parameter dicts with name, type, required, description, default. + """ + zip_stream = io.BytesIO(zip_bytes) + inputs: List[Dict[str, Any]] = [] + seen_names: set = set() + + try: + with zipfile.ZipFile(zip_stream, "r") as zf: + file_list = zf.namelist() + scripts_root = preferred_skill_root or "" + + for member in file_list: + normalized = member.replace("\\", "/").strip() + if not normalized.endswith(".py") or "/_" in normalized or normalized.endswith("/_"): + continue + if not normalized.startswith(scripts_root + "/scripts/"): + if scripts_root: + continue + parts = normalized.split("/") + if len(parts) < 2 or parts[-2] != "scripts": + continue + + try: + source = zf.read(member).decode("utf-8") + except (OSError, UnicodeDecodeError): + continue + + try: + tree = ast.parse(source, filename=member) + except SyntaxError: + continue + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if not _is_add_argument_call(node): + continue + parsed = _extract_arg_from_add_argument(node) + if not parsed: + continue + param_name = parsed["name"] + if param_name in ("help", "h") or param_name in seen_names: + continue + seen_names.add(param_name) + inputs.append({ + "name": param_name, + "type": parsed["type"], + "required": parsed["required"], + "description_en": parsed.get("description_en", ""), + }) + except zipfile.BadZipFile: + return inputs + + return inputs + + def _local_skill_config_yaml_path(skill_name: str, local_skills_dir: str) -> str: """Absolute path to //config/config.yaml.""" return os.path.join(local_skills_dir, skill_name, "config", "config.yaml") +def _local_skill_schema_yaml_path(skill_name: str, local_skills_dir: str) -> str: + """Absolute path to //config/schema.yaml.""" + return os.path.join(local_skills_dir, skill_name, "config", "schema.yaml") + + def _write_skill_params_to_local_config_yaml( skill_name: str, params: Dict[str, Any], @@ -380,24 +797,28 @@ def _remove_local_skill_config_yaml(skill_name: str, local_skills_dir: str) -> N logger.info("Removed %s (params cleared in DB)", path) -def get_skill_manager() -> SkillManager: - """Get or create the global SkillManager instance.""" - global _skill_manager - if _skill_manager is None: - _skill_manager = SkillManager(CONTAINER_SKILLS_PATH) - return _skill_manager +def get_skill_manager(tenant_id: Optional[str] = None) -> SkillManager: + """Create a SkillManager instance with optional tenant-based directory isolation. + + Args: + tenant_id: Tenant ID for directory isolation. When provided, skills + are stored under CONTAINER_SKILLS_PATH / tenant_id / + """ + return SkillManager(base_skills_dir=CONTAINER_SKILLS_PATH, tenant_id=tenant_id) class SkillService: """Skill management service for backend operations.""" - def __init__(self, skill_manager: Optional[SkillManager] = None): + def __init__(self, skill_manager: Optional[SkillManager] = None, tenant_id: Optional[str] = None): """Initialize SkillService. Args: - skill_manager: Optional SkillManager instance, uses global if not provided + skill_manager: Optional SkillManager instance, uses tenant-aware global if not provided + tenant_id: Tenant ID for skill isolation. Required when no skill_manager is provided. """ - self.skill_manager = skill_manager or get_skill_manager() + self.tenant_id = tenant_id + self.skill_manager = skill_manager or get_skill_manager(tenant_id) def _resolve_local_skills_dir_for_overlay(self) -> Optional[str]: """Directory where skill folders live: ``SKILLS_PATH``, else ``ROOT_DIR/skills`` if present.""" @@ -410,12 +831,15 @@ def _resolve_local_skills_dir_for_overlay(self) -> Optional[str]: return candidate return None - def _overlay_params_from_local_config_yaml(self, skill: Dict[str, Any]) -> Dict[str, Any]: - """Prefer ``//config/config.yaml`` for ``params`` in API responses. + def _enrich_configs_from_yaml(self, skill: Dict[str, Any]) -> Dict[str, Any]: + """Read local config files and overlay onto skill. + + config/config.yaml → config_values (runtime defaults dict) + config/schema.yaml → config_schemas (parameter metadata list) - The database stores comment-free JSON (no legacy ``_comment`` keys, no `` # `` suffixes). - On-disk YAML may use ``#`` lines; when the file exists, parse with ruamel (inline tips - on scalars only) and use for ``params``; otherwise use DB. + If a file does not exist, the corresponding DB key is removed so the + response never contains stale data (e.g. {"configs": null} instead of + the old DB value). """ out = dict(skill) local_dir = self._resolve_local_skills_dir_for_overlay() @@ -424,70 +848,89 @@ def _overlay_params_from_local_config_yaml(self, skill: Dict[str, Any]) -> Dict[ name = out.get("name") if not name: return out - path = _local_skill_config_yaml_path(name, local_dir) - if not os.path.isfile(path): - return out - try: - with open(path, "rb") as f: - raw = f.read() - out["params"] = _parse_skill_params_from_config_bytes(raw) - logger.info("Using local config.yaml params (scalar inline comment tooltips) for skill %s", name) - except Exception as exc: - logger.warning( - "Could not use local config.yaml for skill %s params (using DB): %s", - name, - exc, - ) + config_path = _local_skill_config_yaml_path(name, local_dir) + if os.path.isfile(config_path): + try: + with open(config_path, "rb") as f: + raw = f.read() + out["config_values"] = _parse_skill_params_from_config_bytes(raw) + except Exception as exc: + logger.warning("Could not parse local config.yaml for skill %s: %s", name, exc) + else: + out.pop("config_values", None) + # schema.yaml takes precedence over DB config_schemas + schema_path = _local_skill_schema_yaml_path(name, local_dir) + if os.path.isfile(schema_path): + try: + with open(schema_path, "rb") as f: + raw = f.read() + parsed = _parse_skill_schema_from_yaml_bytes(raw) + out["config_schemas"] = parsed + except Exception as exc: + logger.warning("Could not parse local schema.yaml for skill %s: %s", name, exc) + else: + out.pop("config_schemas", None) return out def list_skills(self, tenant_id: Optional[str] = None) -> List[Dict[str, Any]]: - """List all skills for tenant. + """List all skills for a tenant. Args: - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for filtering skills. Uses instance tenant_id if not provided. Returns: List of skill info dicts """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - skills = skill_db.list_skills() - return [self._overlay_params_from_local_config_yaml(s) for s in skills] + skills = skill_db.list_skills(effective_tenant_id) + enriched = [self._enrich_configs_from_yaml(s) for s in skills] + return enriched except Exception as e: logger.error(f"Error listing skills: {e}") raise SkillException(f"Failed to list skills: {str(e)}") from e def get_skill(self, skill_name: str, tenant_id: Optional[str] = None) -> Optional[Dict[str, Any]]: - """Get a specific skill. + """Get a specific skill within a tenant. Args: skill_name: Name of the skill - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided. Returns: Skill dict or None if not found """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - skill = skill_db.get_skill_by_name(skill_name) + skill = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if skill: - return self._overlay_params_from_local_config_yaml(skill) + return self._enrich_configs_from_yaml(skill) return None except Exception as e: logger.error(f"Error getting skill {skill_name}: {e}") raise SkillException(f"Failed to get skill: {str(e)}") from e - def get_skill_by_id(self, skill_id: int) -> Optional[Dict[str, Any]]: - """Get a specific skill by ID. + def get_skill_by_id(self, skill_id: int, tenant_id: Optional[str] = None) -> Optional[Dict[str, Any]]: + """Get a specific skill by ID within a tenant. Args: skill_id: ID of the skill + tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided. Returns: Skill dict or None if not found """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - skill = skill_db.get_skill_by_id(skill_id) + skill = skill_db.get_skill_by_id(skill_id, effective_tenant_id) if skill: - return self._overlay_params_from_local_config_yaml(skill) + return self._enrich_configs_from_yaml(skill) return None except Exception as e: logger.error(f"Error getting skill by ID {skill_id}: {e}") @@ -499,11 +942,11 @@ def create_skill( tenant_id: Optional[str] = None, user_id: Optional[str] = None ) -> Dict[str, Any]: - """Create a new skill. + """Create a new skill for a tenant. Args: skill_data: Skill data including name, description, content, etc. - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: User ID of the creator Returns: @@ -512,12 +955,16 @@ def create_skill( Raises: SkillException: If skill already exists locally or in database (409) """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") + skill_name = skill_data.get("name") if not skill_name: raise SkillException("Skill name is required") # Check if skill already exists in database - existing = skill_db.get_skill_by_name(skill_name) + existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if existing: raise SkillException(f"Skill '{skill_name}' already exists") @@ -533,17 +980,17 @@ def create_skill( try: # Create database record first - result = skill_db.create_skill(skill_data) + result = skill_db.create_skill(skill_data, effective_tenant_id) # Create local skill file (SKILL.md) self.skill_manager.save_skill(skill_data) - # Mirror DB params to config/config.yaml when present (same layout as ZIP uploads). - if self.skill_manager.local_skills_dir and skill_data.get("params") is not None: + # Mirror DB config_schemas to config/config.yaml when present (same layout as ZIP uploads). + if self.skill_manager.base_skills_dir and skill_data.get("config_schemas") is not None: try: _write_skill_params_to_local_config_yaml( skill_name, - _params_dict_to_storable(skill_data["params"]), + _params_dict_to_storable(skill_data["config_schemas"]), self.skill_manager.local_skills_dir, ) except Exception as exc: @@ -554,7 +1001,7 @@ def create_skill( ) logger.info(f"Created skill '{skill_name}' with local files") - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) except SkillException: raise except Exception as e: @@ -566,6 +1013,7 @@ def create_skill_from_file( file_content: Union[bytes, str, io.BytesIO], skill_name: Optional[str] = None, file_type: str = "auto", + source: str = "自定义", tenant_id: Optional[str] = None, user_id: Optional[str] = None ) -> Dict[str, Any]: @@ -579,12 +1027,14 @@ def create_skill_from_file( file_content: File content as bytes, string, or BytesIO skill_name: Optional skill name (extracted from ZIP if not provided) file_type: File type hint - "md", "zip", or "auto" (detect) - tenant_id: Tenant ID (reserved for future multi-tenant support) + source: Source identifier for the skill (e.g., "自定义", "官方", "导入") + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: User ID of the creator Returns: Created skill dict """ + effective_tenant_id = tenant_id or self.tenant_id content_bytes: bytes if isinstance(file_content, str): content_bytes = file_content.encode("utf-8") @@ -600,14 +1050,15 @@ def create_skill_from_file( file_type = "md" if file_type == "zip": - return self._create_skill_from_zip(content_bytes, skill_name, user_id, tenant_id) + return self._create_skill_from_zip(content_bytes, skill_name, source, user_id, effective_tenant_id) else: - return self._create_skill_from_md(content_bytes, skill_name, user_id, tenant_id) + return self._create_skill_from_md(content_bytes, skill_name, source, user_id, effective_tenant_id) def _create_skill_from_md( self, content_bytes: bytes, skill_name: Optional[str] = None, + source: str = "自定义", user_id: Optional[str] = None, tenant_id: Optional[str] = None ) -> Dict[str, Any]: @@ -624,7 +1075,7 @@ def _create_skill_from_md( raise SkillException("Skill name is required") # Check if skill already exists in database - existing = skill_db.get_skill_by_name(name) + existing = skill_db.get_skill_by_name(name, tenant_id) if existing: raise SkillException(f"Skill '{name}' already exists") @@ -639,27 +1090,30 @@ def _create_skill_from_md( "description": skill_data.get("description", ""), "content": skill_data.get("content", ""), "tags": skill_data.get("tags", []), - "source": "custom", + "source": source, "tool_ids": tool_ids, "allowed-tools": allowed_tools, # Preserve for local file sync } + # Note: scripts/ reflection is only possible for ZIP uploads (scripts exist in ZIP bytes). + # For MD-only uploads there are no scripts to reflect at create time. # Set created_by and updated_by if user_id is provided if user_id: skill_dict["created_by"] = user_id skill_dict["updated_by"] = user_id - result = skill_db.create_skill(skill_dict) + result = skill_db.create_skill(skill_dict, tenant_id) # Write SKILL.md to local storage self.skill_manager.save_skill(skill_dict) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def _create_skill_from_zip( self, zip_bytes: bytes, skill_name: Optional[str] = None, + source: str = "自定义", user_id: Optional[str] = None, tenant_id: Optional[str] = None ) -> Dict[str, Any]: @@ -716,7 +1170,7 @@ def _create_skill_from_zip( raise SkillException("Skill name is required") # Check if skill already exists in database - existing = skill_db.get_skill_by_name(name) + existing = skill_db.get_skill_by_name(name, tenant_id) if existing: raise SkillException(f"Skill '{name}' already exists") @@ -746,32 +1200,46 @@ def _create_skill_from_zip( "description": skill_data.get("description", ""), "content": skill_data.get("content", ""), "tags": skill_data.get("tags", []), - "source": "custom", + "source": source, "tool_ids": tool_ids, "allowed-tools": allowed_tools, # Preserve for local file sync } preferred_root = detected_skill_name or name + + # Priority: schema.yaml (list metadata) > scripts AST (list) > config.yaml (dict defaults) + schema_from_zip = _read_schema_yaml_from_zip(zip_bytes, preferred_root) + inputs_from_scripts = _get_skill_inputs_from_zip( + zip_bytes, + preferred_skill_root=preferred_root, + ) params_from_zip = _read_params_from_zip_config_yaml( zip_bytes, preferred_skill_root=preferred_root, ) + + if schema_from_zip: + skill_dict["config_schemas"] = schema_from_zip + elif inputs_from_scripts: + skill_dict["config_schemas"] = inputs_from_scripts + + # config.yaml always goes into config_values (runtime defaults dict) if params_from_zip is not None: - skill_dict["params"] = params_from_zip + skill_dict["config_values"] = params_from_zip # Set created_by and updated_by if user_id is provided if user_id: skill_dict["created_by"] = user_id skill_dict["updated_by"] = user_id - result = skill_db.create_skill(skill_dict) + result = skill_db.create_skill(skill_dict, tenant_id) # Save SKILL.md to local storage self.skill_manager.save_skill(skill_dict) self._upload_zip_files(zip_bytes, name, detected_skill_name) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def _delete_local_skill_files(self, skill_name: str) -> None: """Delete all files within a skill's local directory, preserving the directory itself. @@ -783,14 +1251,14 @@ def _delete_local_skill_files(self, skill_name: str) -> None: local_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name) logger.info("Starting deletion of local files for skill '%s' from '%s'", skill_name, local_dir) - + if not os.path.isdir(local_dir): logger.info("Local skill directory does not exist, nothing to delete: %s", local_dir) return try: items = os.listdir(local_dir) logger.info("Found %d items to delete in '%s'", len(items), local_dir) - + for item in items: item_path = os.path.join(local_dir, item) if item_path.endswith("/"): @@ -822,20 +1290,34 @@ def _upload_zip_files( zip_stream = io.BytesIO(zip_bytes) - # Determine if folder renaming is needed + try: + with zipfile.ZipFile(zip_stream, "r") as zf: + file_list = zf.namelist() + except zipfile.BadZipFile: + raise SkillException("Invalid ZIP archive") + + # Determine if this ZIP has a subdirectory structure or root-level structure. + # Root-level: SKILL.md is at root (e.g., "SKILL.md", "script/analyze.py") -> no stripping + # Subdirectory: SKILL.md is inside a folder (e.g., "my-skill/SKILL.md") -> strip folder prefix needs_rename = ( original_folder_name is not None and original_folder_name != skill_name ) + has_root_skill_md = any( + not fp.endswith("/") + and fp.replace("\\", "/").split("/")[0].lower() == "skill.md" + for fp in file_list + ) + logger.info( - "Starting ZIP extraction for skill '%s': needs_rename=%s, original_folder='%s'", - skill_name, needs_rename, original_folder_name + "Starting ZIP extraction for skill '%s': needs_rename=%s, original_folder='%s', has_root_skill_md=%s", + skill_name, needs_rename, original_folder_name, has_root_skill_md ) + zip_stream.seek(0) try: with zipfile.ZipFile(zip_stream, "r") as zf: - file_list = zf.namelist() logger.info("ZIP contains %d entries for skill '%s'", len(file_list), skill_name) extracted_count = 0 @@ -847,10 +1329,12 @@ def _upload_zip_files( parts = normalized_path.split("/") # Calculate target relative path + # Only strip the first component when the ZIP has a subdirectory structure + # (SKILL.md is inside a folder, not at root level) if needs_rename and len(parts) >= 2 and parts[0] == original_folder_name: - # Replace original folder name with skill_name relative_path = parts[0].replace(original_folder_name, skill_name) + "/" + "/".join(parts[1:]) - elif len(parts) >= 2: + elif len(parts) >= 2 and not has_root_skill_md: + # Strip first component (ZIP has subdirectory structure without root SKILL.md) relative_path = "/".join(parts[1:]) else: relative_path = normalized_path @@ -861,7 +1345,8 @@ def _upload_zip_files( file_data = zf.read(file_path) local_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name) - local_path = os.path.join(local_dir, relative_path) + normalized_relative = relative_path.replace("/", os.sep).replace("\\", os.sep) + local_path = os.path.normpath(os.path.join(local_dir, normalized_relative)) os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, "wb") as f: f.write(file_data) @@ -896,7 +1381,10 @@ def update_skill_from_file( Returns: Updated skill dict """ - existing = skill_db.get_skill_by_name(skill_name) + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") + existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if not existing: raise SkillException(f"Skill not found: {skill_name}") @@ -915,9 +1403,9 @@ def update_skill_from_file( file_type = "md" if file_type == "zip": - return self._update_skill_from_zip(content_bytes, skill_name, user_id, tenant_id) + return self._update_skill_from_zip(content_bytes, skill_name, user_id, effective_tenant_id) else: - return self._update_skill_from_md(content_bytes, skill_name, user_id, tenant_id) + return self._update_skill_from_md(content_bytes, skill_name, user_id, effective_tenant_id) def _update_skill_from_md( self, @@ -948,7 +1436,7 @@ def _update_skill_from_md( } result = skill_db.update_skill( - skill_name, skill_dict, updated_by=user_id or None + skill_name, skill_dict, tenant_id, updated_by=user_id or None ) # Clean up existing local files before writing new ones @@ -959,7 +1447,7 @@ def _update_skill_from_md( skill_dict["allowed-tools"] = allowed_tools self.skill_manager.save_skill(skill_dict) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def _update_skill_from_zip( self, @@ -969,7 +1457,7 @@ def _update_skill_from_zip( tenant_id: Optional[str] = None, ) -> Dict[str, Any]: """Update skill from ZIP archive.""" - existing = skill_db.get_skill_by_name(skill_name) + existing = skill_db.get_skill_by_name(skill_name, tenant_id) if not existing: raise SkillException(f"Skill not found: {skill_name}") @@ -1025,10 +1513,10 @@ def _update_skill_from_zip( logger.warning(f"Could not parse SKILL.md from ZIP: {e}") if params_from_zip is not None: - skill_dict["params"] = params_from_zip + skill_dict["config_values"] = params_from_zip result = skill_db.update_skill( - skill_name, skill_dict, updated_by=user_id or None + skill_name, skill_dict, tenant_id, updated_by=user_id or None ) # Clean up existing local files before writing new ones @@ -1042,7 +1530,7 @@ def _update_skill_from_zip( # Update other files in local storage self._upload_zip_files(zip_bytes, skill_name, original_folder_name) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def update_skill( self, @@ -1051,61 +1539,66 @@ def update_skill( tenant_id: Optional[str] = None, user_id: Optional[str] = None ) -> Dict[str, Any]: - """Update an existing skill. + """Update an existing skill for a tenant. Args: skill_name: Name of the skill to update skill_data: Business fields from the application layer (no audit fields). - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: Updater id from server-side auth (JWT / session); sets DB updated_by. Returns: Updated skill dict """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - existing = skill_db.get_skill_by_name(skill_name) + existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if not existing: raise SkillException(f"Skill not found: {skill_name}") result = skill_db.update_skill( - skill_name, skill_data, updated_by=user_id or None + skill_name, skill_data, effective_tenant_id, updated_by=user_id or None ) - # Keep config/config.yaml in sync when params are updated (matches ZIP import path). - if CONTAINER_SKILLS_PATH and "params" in skill_data: + # Keep config/config.yaml in sync when config_values are updated (matches ZIP import path). + local_dir = self.skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH + if local_dir and "config_values" in skill_data: try: - raw_params = skill_data["params"] - if raw_params is None: - _remove_local_skill_config_yaml(skill_name, CONTAINER_SKILLS_PATH) + raw_config_values = skill_data["config_values"] + if raw_config_values is None: + _remove_local_skill_config_yaml(skill_name, local_dir) else: _write_skill_params_to_local_config_yaml( skill_name, - _params_dict_to_storable(raw_params), - CONTAINER_SKILLS_PATH, + _params_dict_to_storable(raw_config_values), + local_dir, ) except Exception as exc: logger.warning( - "Local config/config.yaml sync failed after params update for %s: %s", + "Local config/config.yaml sync failed after config_values update for %s: %s", skill_name, exc, ) # Optional: sync SKILL.md on disk when SKILLS_PATH is configured (DB is source of truth). - if not CONTAINER_SKILLS_PATH: + if not local_dir: logger.warning( "SKILLS_PATH is not set; skipped local SKILL.md sync after DB update for %s", skill_name, ) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) try: - allowed_tools = skill_db.get_tool_names_by_skill_name(skill_name) + allowed_tools = skill_db.get_tool_names_by_skill_name(skill_name, effective_tenant_id) local_skill_dict = { "name": skill_name, "description": skill_data.get("description", existing.get("description", "")), "content": skill_data.get("content", existing.get("content", "")), "tags": skill_data.get("tags", existing.get("tags", [])), "allowed-tools": allowed_tools, + "files": skill_data.get("files", []), } self.skill_manager.save_skill(local_skill_dict) except Exception as exc: @@ -1115,7 +1608,7 @@ def update_skill( exc, ) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) except SkillException: raise except Exception as e: @@ -1125,18 +1618,22 @@ def update_skill( def delete_skill( self, skill_name: str, + tenant_id: Optional[str] = None, user_id: Optional[str] = None ) -> bool: - """Delete a skill. + """Delete a skill for a tenant. Args: skill_name: Name of the skill to delete - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: User ID of the user performing the delete Returns: True if deleted successfully """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: # Delete local skill files from filesystem skill_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name) @@ -1146,7 +1643,7 @@ def delete_skill( logger.info(f"Deleted skill directory: {skill_dir}") # Delete from database (soft delete with updated_by) - return skill_db.delete_skill(skill_name, updated_by=user_id) + return skill_db.delete_skill(skill_name, effective_tenant_id, updated_by=user_id) except Exception as e: logger.error(f"Error deleting skill {skill_name}: {e}") raise SkillException(f"Failed to delete skill: {str(e)}") from e @@ -1178,7 +1675,7 @@ def get_enabled_skills_for_agent( result = [] for skill_instance in enabled_skills: skill_id = skill_instance.get("skill_id") - skill = skill_db.get_skill_by_id(skill_id) + skill = skill_db.get_skill_by_id(skill_id, tenant_id) if skill: # Get skill info from ag_skill_info_t (repository returns keys: name, description, content) merged = { @@ -1258,7 +1755,7 @@ def build_skills_summary( for skill_instance in agent_skills: skill_id = skill_instance.get("skill_id") - skill = skill_db.get_skill_by_id(skill_id) + skill = skill_db.get_skill_by_id(skill_id, tenant_id) if skill: if available_skills is not None and skill.get("name") not in available_skills: continue @@ -1268,8 +1765,12 @@ def build_skills_summary( "description": skill.get("description", ""), }) else: - # Fallback: use all skills - all_skills = skill_db.list_skills() + # Fallback: use all skills from the current tenant + effective_tenant_id = tenant_id or self.tenant_id + if effective_tenant_id: + all_skills = skill_db.list_skills(effective_tenant_id) + else: + all_skills = [] skills_to_include = all_skills if available_skills is not None: available_set = set(available_skills) @@ -1305,13 +1806,16 @@ def get_skill_content(self, skill_name: str, tenant_id: Optional[str] = None) -> Args: skill_name: Name of the skill to load - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided. Returns: Skill content in markdown format """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + return "" try: - skill = skill_db.get_skill_by_name(skill_name) + skill = skill_db.get_skill_by_name(skill_name, effective_tenant_id) return skill.get("content", "") if skill else "" except Exception as e: logger.error(f"Error getting skill content {skill_name}: {e}") @@ -1355,7 +1859,8 @@ def get_skill_file_content( """ try: local_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name) - full_path = os.path.join(local_dir, file_path) + normalized_file_path = file_path.replace("/", os.sep).replace("\\", os.sep) + full_path = os.path.normpath(os.path.join(local_dir, normalized_file_path)) if not os.path.exists(full_path): logger.warning(f"File not found: {full_path}") @@ -1443,3 +1948,811 @@ def get_skill_instance( tenant_id=tenant_id, version_no=version_no ) + + def create_skill_from_zip_bytes( + self, + zip_bytes: bytes, + skill_name: Optional[str] = None, + source: str = "导入", + user_id: Optional[str] = None, + tenant_id: Optional[str] = None, + skip_duplicate_check: bool = False + ) -> Dict[str, Any]: + """Create a skill from ZIP bytes, optionally skipping the duplicate name check. + + This is the shared implementation used by both the upload endpoint and the + agent import flow. When skip_duplicate_check is True, the existence check + is bypassed (used during agent import where we pre-validate duplicates). + + Args: + zip_bytes: Raw ZIP file bytes + skill_name: Optional skill name override + source: Source label for the skill + user_id: Creator user ID + tenant_id: Tenant ID + skip_duplicate_check: If True, skip the "skill already exists" check + + Returns: + Created skill dict + """ + import zipfile + + zip_stream = io.BytesIO(zip_bytes) + + try: + with zipfile.ZipFile(zip_stream, "r") as zf: + file_list = zf.namelist() + except zipfile.BadZipFile: + raise SkillException("Invalid ZIP archive") + + zip_stream.seek(0) + + skill_md_path: Optional[str] = None + detected_skill_name: Optional[str] = None + + for file_path in file_list: + if file_path.endswith("/"): + continue + normalized_path = file_path.replace("\\", "/") + parts = normalized_path.split("/") + if len(parts) == 1 and parts[0].lower() == "skill.md": + skill_md_path = file_path + break + + if not skill_md_path: + for file_path in file_list: + if file_path.endswith("/"): + continue + normalized_path = file_path.replace("\\", "/") + parts = normalized_path.split("/") + if len(parts) >= 2 and parts[-1].lower() == "skill.md": + skill_md_path = file_path + detected_skill_name = parts[0] + break + + if not skill_md_path: + raise SkillException("SKILL.md not found in ZIP archive") + + name = skill_name or detected_skill_name + if not name: + raise SkillException("Skill name is required") + + if not skip_duplicate_check: + existing = skill_db.get_skill_by_name(name, tenant_id) + if existing: + raise SkillException(f"Skill '{name}' already exists") + + with zipfile.ZipFile(zip_stream, "r") as zf: + skill_content = zf.read(skill_md_path).decode("utf-8") + + try: + skill_data = SkillLoader.parse(skill_content) + except ValueError as e: + raise SkillException(f"Invalid SKILL.md in ZIP: {e}") + + if not name: + name = skill_data.get("name") + + if not name: + raise SkillException("Skill name is required") + + allowed_tools = skill_data.get("allowed_tools", []) + tool_ids = [] + if allowed_tools: + tool_ids = skill_db.get_tool_ids_by_names(allowed_tools, tenant_id) + + skill_dict = { + "name": name, + "description": skill_data.get("description", ""), + "content": skill_data.get("content", ""), + "tags": skill_data.get("tags", []), + "source": source, + "tool_ids": tool_ids, + "allowed-tools": allowed_tools, + } + + preferred_root = detected_skill_name or name + + schema_from_zip = _read_schema_yaml_from_zip(zip_bytes, preferred_root) + inputs_from_scripts = _get_skill_inputs_from_zip( + zip_bytes, + preferred_skill_root=preferred_root, + ) + params_from_zip = _read_params_from_zip_config_yaml( + zip_bytes, + preferred_skill_root=preferred_root, + ) + + if schema_from_zip: + skill_dict["config_schemas"] = schema_from_zip + elif inputs_from_scripts: + skill_dict["config_schemas"] = inputs_from_scripts + + if params_from_zip is not None: + skill_dict["config_values"] = params_from_zip + + if user_id: + skill_dict["created_by"] = user_id + skill_dict["updated_by"] = user_id + + result = skill_db.create_skill(skill_dict, tenant_id) + + self.skill_manager.save_skill(skill_dict) + + self._upload_zip_files(zip_bytes, name, detected_skill_name) + + return self._enrich_configs_from_yaml(result) + + def export_skills_by_names( + self, + skill_names: List[str], + tenant_id: Optional[str] = None + ) -> List[Dict[str, str]]: + """Export skills as ZIP files by name. + + Packages the entire skill directory (SKILL.md, scripts/, assets/, config/) + into a ZIP for each skill name. + + Args: + skill_names: List of skill names to export + tenant_id: Tenant ID for skill lookup + + Returns: + List of dicts with skill_name and skill_zip_base64 + """ + import base64 + + effective_tenant_id = tenant_id or self.tenant_id + results: List[Dict[str, str]] = [] + + for skill_name in skill_names: + skill_dir = os.path.join( + self.skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH, + skill_name + ) + if not os.path.isdir(skill_dir): + logger.warning(f"Skill directory not found for export: {skill_name}") + continue + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: + for root, dirs, files in os.walk(skill_dir): + for file in files: + file_path = os.path.join(root, file) + rel_path = os.path.relpath(file_path, skill_dir) + arcname = os.path.join(skill_name, rel_path) + zf.write(file_path, arcname) + + zip_buffer.seek(0) + zip_base64 = base64.b64encode(zip_buffer.read()).decode("utf-8") + results.append({ + "skill_name": skill_name, + "skill_zip_base64": zip_base64 + }) + + return results + + +def classify_streaming_content( + content: str, + classifier: Any +) -> List[Dict[str, Any]]: + """Classify streaming content using the ContentClassifier. + + Args: + content: Raw streaming content to classify + classifier: ContentClassifier instance + + Returns: + List of classified event dictionaries + """ + return classifier.classify(content) + + +class SkillCreationStreamService: + """Service for handling skill creation streaming operations.""" + + def __init__(self, skill_service: Optional["SkillService"] = None): + """Initialize the stream service. + + Args: + skill_service: Optional SkillService instance for accessing skill manager + """ + self.skill_service = skill_service or SkillService() + + def get_skill_manager_local_dir(self) -> str: + """Get local_skills_dir from SkillManager. + + Returns: + Local skills directory path + """ + return self.skill_service.skill_manager.local_skills_dir or "" + + def create_classifier(self) -> "ContentClassifier": + """Create a new ContentClassifier instance. + + Returns: + New ContentClassifier instance + """ + from utils.content_classifier_utils import ContentClassifier + return ContentClassifier() + + def classify_content( + self, + content: str, + classifier: "ContentClassifier" + ) -> List[Dict[str, Any]]: + """Classify streaming content using the provided classifier. + + Args: + content: Raw streaming content to classify + classifier: ContentClassifier instance + + Returns: + List of classified event dictionaries + """ + return classifier.classify(content) + + +def create_skill_creation_stream_generator( + observer: Any, + classifier: "ContentClassifier", +) -> Any: + """Create a generator that processes observer messages and yields SSE events. + + Args: + observer: MessageObserver instance with cached messages + classifier: ContentClassifier instance for content classification + + Yields: + SSE-formatted event strings + """ + import json + from consts.const import STREAMABLE_CONTENT_TYPES + + cached = observer.get_cached_message() + for msg in cached: + if isinstance(msg, str): + try: + data = json.loads(msg) + msg_type = data.get("type", "") + content = data.get("content", "") + + if msg_type == "step_count": + yield f"data: {json.dumps({'type': 'step_count', 'content': content}, ensure_ascii=False)}\n\n" + elif msg_type in STREAMABLE_CONTENT_TYPES: + for event in classifier.classify(content): + yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n" + except (json.JSONDecodeError, Exception): + pass + + +def format_final_answer_sse(classifier: "ContentClassifier", final_result: str) -> List[str]: + """Format final answer content into SSE event strings. + + Args: + classifier: ContentClassifier instance for content classification + final_result: Final answer content to format + + Returns: + List of SSE-formatted event strings + """ + import json + + events = [] + for event in classifier.classify(final_result): + events.append(f"data: {json.dumps(event, ensure_ascii=False)}\n\n") + return events + + +# ========== Skill Creation Task Manager ========== + + +class SkillCreationTaskManager: + """Singleton manager to track active skill creation threads and their stop events.""" + + _instance: Optional["SkillCreationTaskManager"] = None + _lock = threading.Lock() + + def __new__(cls) -> "SkillCreationTaskManager": + if cls._instance is None: + with cls._lock: + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._tasks: Dict[str, Tuple[threading.Thread, threading.Event]] = {} + cls._instance._tasks_lock = threading.Lock() + return cls._instance + + def register_task(self, task_id: str, thread: threading.Thread, stop_event: threading.Event) -> None: + """Register a new skill creation task. + + Args: + task_id: Unique identifier for the task + thread: The thread running the skill creation + stop_event: Event to signal stop request + """ + with self._tasks_lock: + self._tasks[task_id] = (thread, stop_event) + logger.info(f"Registered skill creation task: {task_id}") + + def unregister_task(self, task_id: str) -> None: + """Unregister a completed skill creation task. + + Args: + task_id: Unique identifier for the task + """ + with self._tasks_lock: + if task_id in self._tasks: + del self._tasks[task_id] + logger.info(f"Unregistered skill creation task: {task_id}") + + def stop_task(self, task_id: str) -> bool: + """Signal a skill creation task to stop. + + Args: + task_id: Unique identifier for the task + + Returns: + True if the task was found and stop was signaled, False otherwise + """ + with self._tasks_lock: + if task_id in self._tasks: + _, stop_event = self._tasks[task_id] + stop_event.set() + logger.info(f"Stop signal sent for skill creation task: {task_id}") + return True + return False + + def is_task_running(self, task_id: str) -> bool: + """Check if a task is still running. + + Args: + task_id: Unique identifier for the task + + Returns: + True if the task exists and is still alive + """ + with self._tasks_lock: + if task_id in self._tasks: + thread, _ = self._tasks[task_id] + return thread.is_alive() + return False + + +# Singleton instance +skill_creation_task_manager = SkillCreationTaskManager() + + +# ========== Skill Creation Stream Service ========== + + +def stream_skill_creation( + user_request: str, + language: str, + model_config: "ModelConfig", + existing_skill: Optional[Dict[str, Any]] = None, + complexity: str = "simple", +) -> tuple[str, Any]: + """Stream skill creation process as an async generator. + + This function handles all the business logic for skill creation: + - Loads prompt template + - Creates observer, stop_event, and classifier + - Registers the task with the task manager + - Starts the agent thread + - Yields SSE events until completion + + Args: + user_request: User's skill description request + language: Language code (e.g., "zh", "en") + model_config: Model configuration + existing_skill: Optional existing skill for modification + complexity: Skill complexity level ("simple" or "complicated") + + Returns: + Tuple of (task_id, generator_function) + The task_id should be passed to the caller for stop functionality + """ + task_id = str(uuid.uuid4()) + + async def generate(): + is_task_registered = False + observer = None + classifier = None + + try: + # Load prompt template + template = get_skill_creation_simple_prompt_template( + language=language, + existing_skill=existing_skill, + complexity=complexity + ) + + # Create observer and classifier + observer = MessageObserver(lang=language) + stop_event = threading.Event() + classifier = ContentClassifier() + + # Get local skills directory + local_skills_dir = SkillService().skill_manager.local_skills_dir or "" + + def run_task(): + create_skill_from_request( + system_prompt=template.get("system_prompt", ""), + user_prompt=user_request, + model_config_list=[model_config], + observer=observer, + stop_event=stop_event, + local_skills_dir=local_skills_dir + ) + + thread = threading.Thread(target=run_task) + + # Register task before starting + skill_creation_task_manager.register_task(task_id, thread, stop_event) + is_task_registered = True + + thread.start() + + while thread.is_alive(): + for event in create_skill_creation_stream_generator(observer, classifier): + yield event + await asyncio.sleep(0.1) + + thread.join() + + for event in create_skill_creation_stream_generator(observer, classifier): + yield event + + yield f"data: {json.dumps({'type': 'done'}, ensure_ascii=False)}\n\n" + + except Exception as e: + logger.error(f"Error in stream_skill_creation: {e}") + yield f"data: {json.dumps({'type': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n" + finally: + if is_task_registered: + skill_creation_task_manager.unregister_task(task_id) + + return task_id, generate + + +# ============== Skill List Initialization ============== + + +async def init_skill_list_for_tenant(tenant_id: str, user_id: str): + """Initialize skill list for a new tenant by scanning local skill directories. + + Mirrors init_tool_list_for_tenant() in tool_configuration_service.py. + + Args: + tenant_id: Tenant ID for the new tenant + user_id: User ID for tracking who initiated the scan + + Returns: + Dictionary containing initialization result + """ + from database import skill_db as skill_db_module + + if skill_db_module.check_skill_list_initialized(tenant_id): + logger.info(f"Skill list already initialized for tenant {tenant_id}, skipping") + return {"status": "already_initialized", "message": "Skill list already exists"} + + logger.info(f"Initializing skill list for new tenant: {tenant_id}") + await update_skill_list(tenant_id=tenant_id, user_id=user_id) + return {"status": "success", "message": "Skill list initialized successfully"} + + +async def update_skill_list(tenant_id: str, user_id: str): + """Scan local skill directories and update ag_skill_info_t. + + Mirrors update_tool_list() in tool_configuration_service.py. + + Args: + tenant_id: Tenant ID for the tenant + user_id: User ID for tracking who initiated the scan + """ + from database import skill_db as skill_db_module + from nexent.skills import SkillManager + + skill_manager = SkillManager(base_skills_dir=CONTAINER_SKILLS_PATH, tenant_id=tenant_id) + # Use the resolved tenant-scoped local path for schema/config file reading + local_base = skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH + scanned_skills = skill_manager.list_skills() + + skills_to_upsert = [] + for skill_info in scanned_skills: + skill_name = skill_info.get("name") + if not skill_name: + continue + + skill_data = { + "name": skill_name, + "description": skill_info.get("description", ""), + "tags": skill_info.get("tags", []), + "source": "official", + } + + try: + full_skill = skill_manager.load_skill(skill_name) + if full_skill: + skill_data["content"] = full_skill.get("content", "") + + # Try schema.yaml first; fall back to AST-parsed scripts + schema_path = _local_skill_schema_yaml_path(skill_name, local_base) + if os.path.isfile(schema_path): + async with aiofiles.open(schema_path, "rb") as f: + raw = await f.read() + parsed = _parse_skill_schema_from_yaml_bytes(raw) + skill_data["config_schemas"] = parsed + logger.debug("Loaded config_schemas from schema.yaml for skill %s", skill_name) + else: + scripts_dir = os.path.join(local_base, skill_name, "scripts") + inputs = _get_skill_inputs_from_code(scripts_dir) + if inputs: + skill_data["config_schemas"] = inputs + except Exception as e: + logger.warning(f"Could not load full skill content for {skill_name}: {e}") + skill_data["content"] = "" + + skills_to_upsert.append(skill_data) + + if skills_to_upsert: + skill_db_module.upsert_scanned_skills(skills_to_upsert, user_id, tenant_id) + logger.info(f"Upserted {len(skills_to_upsert)} skills for tenant {tenant_id}") + else: + logger.info(f"No skills found to upsert for tenant {tenant_id}") + + +def install_skills_for_tenant( + skill_ids: List[int], + tenant_id: str, + user_id: Optional[str] = None +) -> List[int]: + """Install specified official skills into a new tenant by copying their records. + + For each skill_id provided, finds the global template skill (official skill with + NULL tenant_id) and creates a copy in ag_skill_info_t for the target tenant. + Skills that cannot be found as global templates are skipped with a warning. + + Args: + skill_ids: List of skill IDs to install for the tenant. + tenant_id: Target tenant ID to install skills into. + user_id: User ID for created_by/updated_by audit fields. + + Returns: + List of skill IDs that were successfully installed. + """ + from database import skill_db as skill_db_module + + if not skill_ids: + return [] + + installed_ids: List[int] = [] + for skill_id in skill_ids: + try: + template = skill_db_module.get_skill_by_id_global(skill_id) + if not template: + logger.warning( + f"Skill template with ID {skill_id} not found for installation " + f"into tenant {tenant_id}" + ) + continue + + skill_name = template.get("name", "") + if not skill_name: + logger.warning( + f"Skill template {skill_id} has no name, skipping installation " + f"for tenant {tenant_id}" + ) + continue + + existing = skill_db_module.get_skill_by_name(skill_name, tenant_id) + if existing: + logger.info( + f"Skill '{skill_name}' already exists for tenant {tenant_id}, skipping" + ) + installed_ids.append(existing.get("skill_id")) + continue + + skill_data = { + "name": skill_name, + "description": template.get("description", ""), + "tags": template.get("tags", []), + "content": template.get("content", ""), + "config_schemas": template.get("config_schemas"), + "config_values": template.get("config_values"), + "source": template.get("source", "official"), + "created_by": user_id, + "updated_by": user_id, + } + result = skill_db_module.create_skill(skill_data, tenant_id) + new_skill_id = result.get("skill_id") + if new_skill_id: + installed_ids.append(new_skill_id) + logger.info( + f"Installed skill '{skill_name}' (ID {new_skill_id}) for tenant {tenant_id}" + ) + else: + logger.warning( + f"create_skill returned no skill_id for '{skill_name}', " + f"tenant {tenant_id}" + ) + except Exception as e: + logger.error( + f"Failed to install skill ID {skill_id} into tenant {tenant_id}: {e}" + ) + + return installed_ids + + +def install_skills_from_zip_for_tenant( + skill_names: List[str], + tenant_id: str, + user_id: Optional[str] = None, + locale: Optional[str] = None +) -> List[str]: + """Install official skills into a new tenant by reading ZIP files from OFFICIAL_SKILLS_ZIP_PATH. + + For each skill_name provided, derives the ZIP filename as .zip, + reads the file from OFFICIAL_SKILLS_ZIP_PATH, and creates the skill via + create_skill_from_file (which handles ZIP extraction, SKILL.md parsing, + and database record creation). + + Skills that cannot be found as ZIP files are skipped with a warning. + Skills that already exist for the tenant are skipped (not reinstalled). + + Args: + skill_names: List of skill names to install (e.g. ["search-knowledge-base"]). + tenant_id: Target tenant ID to install skills into. + user_id: User ID for created_by/updated_by audit fields. + locale: Frontend locale (e.g. "zh" or "en"). Determines the source label: + "zh" → "官方", other locales → "official". + + Returns: + List of skill names that were successfully installed. + """ + if not skill_names: + return [] + + zip_dir = OFFICIAL_SKILLS_ZIP_PATH + if not os.path.isdir(zip_dir): + logger.warning(f"Official skills zip directory not found: {zip_dir}") + return [] + + # Derive source label from locale: zh → "官方", otherwise "official" + source = "官方" if locale == "zh" else "official" + + installed: List[str] = [] + service = SkillService(tenant_id=tenant_id) + + for skill_name in skill_names: + zip_filename = f"{skill_name}.zip" + zip_path = os.path.join(zip_dir, zip_filename) + + if not os.path.isfile(zip_path): + logger.warning( + f"ZIP file not found for skill '{skill_name}': {zip_path}" + ) + continue + + try: + existing = skill_db.get_skill_by_name(skill_name, tenant_id) + if existing: + logger.info( + f"Skill '{skill_name}' already exists for tenant {tenant_id}, skipping" + ) + installed.append(skill_name) + continue + + with open(zip_path, "rb") as f: + zip_content = f.read() + + result = service.create_skill_from_file( + file_content=zip_content, + skill_name=skill_name, + file_type="zip", + source=source, + tenant_id=tenant_id, + user_id=user_id, + ) + installed_name = result.get("name", skill_name) + installed.append(installed_name) + logger.info( + f"Installed skill '{installed_name}' for tenant {tenant_id} " + f"from ZIP {zip_filename}" + ) + except Exception as e: + logger.error( + f"Failed to install skill '{skill_name}' from ZIP for tenant {tenant_id}: {e}" + ) + + return installed + + +def get_official_skills_with_status( + tenant_id: Optional[str] = None +) -> List[Dict[str, Any]]: + """Return all official skills with their installation status for a tenant. + + Scans the official-skills-zip directory for available official skills + (filename without .zip = skill name). For each skill, checks whether + it is already installed for the target tenant and whether local resource + files exist. + + Args: + tenant_id: Tenant ID to check installation status for. + + Returns: + List of dicts with skill_id, name, description, source, and status + ("installable" | "installed" | "resource_missing"). + """ + from database import skill_db as skill_db_module + + result: List[Dict[str, Any]] = [] + + zip_dir = OFFICIAL_SKILLS_ZIP_PATH + if not os.path.isdir(zip_dir): + logger.warning(f"Official skills zip directory not found: {zip_dir}") + return result + + try: + zip_files = [f for f in os.listdir(zip_dir) if f.lower().endswith(".zip")] + except OSError as e: + logger.warning(f"Failed to list official skills zip directory: {e}") + return result + + for zip_file in sorted(zip_files): + skill_name = zip_file[:-4] + if not skill_name: + continue + + skill_id: Optional[int] = None + is_installed = False + has_resources = True + + if tenant_id: + existing = skill_db_module.get_skill_by_name(skill_name, tenant_id) + if existing: + skill_id = existing.get("skill_id") + is_installed = True + skill_manager = SkillManager( + base_skills_dir=CONTAINER_SKILLS_PATH, + tenant_id=tenant_id + ) + skill_dir = os.path.join( + skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH or "", + skill_name + ) + has_resources = os.path.isdir(skill_dir) + + if skill_id is None: + global_skill = skill_db_module.get_skill_by_name(skill_name, None) + if global_skill: + skill_id = global_skill.get("skill_id") + + if is_installed and not has_resources: + status = "resource_missing" + elif is_installed: + status = "installed" + else: + status = "installable" + + description = "" + if skill_id: + db_skill = skill_db_module.get_skill_by_id(skill_id, tenant_id) if tenant_id else None + if db_skill: + description = db_skill.get("description", "") + if not description: + db_global = skill_db_module.get_skill_by_name(skill_name, None) + if db_global: + description = db_global.get("description", "") + + result.append({ + "skill_id": skill_id if skill_id is not None else 0, + "name": skill_name, + "description": description, + "source": "official", + "status": status, + }) + + return result diff --git a/backend/services/tenant_service.py b/backend/services/tenant_service.py index bb761d2b4..6ed96a849 100644 --- a/backend/services/tenant_service.py +++ b/backend/services/tenant_service.py @@ -3,9 +3,12 @@ """ import asyncio import logging +import os +import shutil import uuid from typing import Any, Dict, List, Optional +from database import skill_db from database.tenant_config_db import ( get_single_config_info, insert_config, @@ -23,8 +26,9 @@ from database.remote_mcp_db import get_mcp_records_by_tenant, delete_mcp_record_by_name_and_url from database.invitation_db import query_invitations_by_tenant, remove_invitation from database.tool_db import delete_tools_by_agent_id -from consts.const import TENANT_NAME, TENANT_ID, DEFAULT_GROUP_ID +from consts.const import ASSET_OWNER_TENANT_ID, TENANT_NAME, TENANT_ID, DEFAULT_GROUP_ID, CONTAINER_SKILLS_PATH from consts.exceptions import NotFoundException, ValidationError, UserRegistrationException +from services.skill_service import install_skills_from_zip_for_tenant logger = logging.getLogger(__name__) @@ -47,7 +51,8 @@ def get_tenant_info(tenant_id: str) -> Dict[str, Any]: # Get tenant name name_config = get_single_config_info(tenant_id, TENANT_NAME) if not name_config: - logger.warning(f"The name of tenant {tenant_id} not found, creating default config.") + logger.warning( + f"The name of tenant {tenant_id} not found, creating default config.") # Auto-create TENANT_NAME config with default name _ensure_tenant_name_config(tenant_id) # Re-fetch after creation @@ -92,7 +97,8 @@ def _ensure_tenant_name_config(tenant_id: str) -> bool: if success: logger.info(f"Auto-created TENANT_NAME config for tenant {tenant_id}") else: - logger.error(f"Failed to auto-create TENANT_NAME config for tenant {tenant_id}") + logger.error( + f"Failed to auto-create TENANT_NAME config for tenant {tenant_id}") return success @@ -133,8 +139,11 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]: Returns: Dict[str, Any]: Dictionary containing paginated tenant data and pagination info """ - # Get all tenant IDs first - all_tenant_ids = get_all_tenant_ids() + # Exclude virtual ASSET_OWNER tenant from admin tenant listings + all_tenant_ids = [ + tid for tid in get_all_tenant_ids() + if tid != ASSET_OWNER_TENANT_ID + ] total = len(all_tenant_ids) # Calculate pagination @@ -151,7 +160,8 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]: tenant_info = get_tenant_info(tenant_id) tenants.append(tenant_info) except NotFoundException: - logging.warning(f"Tenant info of {tenant_id} not found. Returning basic tenant structure.") + logging.warning( + f"Tenant info of {tenant_id} not found. Returning basic tenant structure.") tenant_info = { "tenant_id": tenant_id, "tenant_name": "", @@ -168,7 +178,13 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]: } -def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[str, Any]: +def create_tenant( + tenant_name: str, + created_by: Optional[str] = None, + skill_ids: Optional[List[int]] = None, + skill_names: Optional[List[str]] = None, + locale: Optional[str] = None +) -> Dict[str, Any]: """ Create a new tenant with default group @@ -191,11 +207,13 @@ def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[st # Check if tenant name already exists if check_tenant_name_exists(tenant_name.strip()): - raise ValidationError(f"Tenant with name '{tenant_name.strip()}' already exists") + raise ValidationError( + f"Tenant with name '{tenant_name.strip()}' already exists") try: # Create default group first - default_group_id = _create_default_group_for_tenant(tenant_id, created_by) + default_group_id = _create_default_group_for_tenant( + tenant_id, created_by) # Create tenant ID configuration tenant_id_data = { @@ -231,15 +249,48 @@ def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[st } group_success = insert_config(group_config_data) if not group_success: - raise ValidationError("Failed to create tenant default group configuration") + raise ValidationError( + "Failed to create tenant default group configuration") + + # Install requested skills for the new tenant + # Prefer skill_names (ZIP-based installation) over skill_ids (legacy record-copy) + installed_skill_names: List[str] = [] + if skill_names: + try: + installed_skill_names = install_skills_from_zip_for_tenant( + skill_names=skill_names, + tenant_id=tenant_id, + user_id=created_by, + locale=locale + ) + except Exception as e: + logger.warning( + f"Failed to install skills from ZIP for tenant {tenant_id}: {e}") + elif skill_ids: + try: + from services.skill_service import install_skills_for_tenant as install_by_ids + installed_by_ids = install_by_ids( + skill_ids=skill_ids, + tenant_id=tenant_id, + user_id=created_by + ) + logger.info( + f"Legacy install_skills_for_tenant installed IDs: {installed_by_ids} " + f"for tenant {tenant_id}" + ) + except Exception as e: + logger.warning( + f"Failed to install skills by IDs for tenant {tenant_id}: {e}") tenant_info = { "tenant_id": tenant_id, "tenant_name": tenant_name.strip(), - "default_group_id": str(default_group_id) + "default_group_id": str(default_group_id), + "installed_skill_names": installed_skill_names, } - logger.info(f"Created tenant {tenant_id} with name '{tenant_name}' and default group {default_group_id}") + logger.info( + f"Created tenant {tenant_id} with name '{tenant_name}' and default group {default_group_id}") return tenant_info except Exception as e: @@ -270,13 +321,15 @@ def update_tenant_info(tenant_id: str, tenant_name: str, updated_by: Optional[st # Check if tenant name already exists (exclude current tenant) if check_tenant_name_exists(tenant_name.strip(), exclude_tenant_id=tenant_id): - raise ValidationError(f"Tenant with name '{tenant_name.strip()}' already exists") + raise ValidationError( + f"Tenant with name '{tenant_name.strip()}' already exists") # Check if tenant name config exists name_config = get_single_config_info(tenant_id, TENANT_NAME) if not name_config: # Tenant config doesn't exist, create it with the provided name - logger.info(f"TENANT_NAME config not found for {tenant_id}, creating new config.") + logger.info( + f"TENANT_NAME config not found for {tenant_id}, creating new config.") tenant_name_data = { "tenant_id": tenant_id, "config_key": TENANT_NAME, @@ -302,6 +355,57 @@ def update_tenant_info(tenant_id: str, tenant_name: str, updated_by: Optional[st return updated_tenant +async def _delete_skills_for_tenant(tenant_id: str, actor: str) -> None: + """ + Delete all skills, skill instances, and local skill files for a tenant. + + This performs cascade cleanup of: + - All skill instances (ag_skill_instance_t) for the tenant + - All skills (ag_skill_info_t) for the tenant + - All local skill directories and files under CONTAINER_SKILLS_PATH/{tenant_id}/ + + Args: + tenant_id: Tenant ID to delete skills for + actor: User ID performing the deletion (for audit trail) + """ + logger.info(f"Deleting skills and local files for tenant {tenant_id}") + + # 1. Soft-delete all skill instances for the tenant (regardless of skill source) + try: + deleted_count = skill_db.delete_skill_instances_by_tenant( + tenant_id, actor) + logger.info( + f"Soft-deleted {deleted_count} skill instances for tenant {tenant_id}") + except Exception as e: + logger.warning( + f"Failed to soft-delete skill instances for tenant {tenant_id}: {str(e)}") + + # 2. Soft-delete all skills for the tenant + skills = skill_db.list_skills(tenant_id) + for skill in skills: + try: + skill_name = skill.get("name") + if skill_name: + skill_db.delete_skill(skill_name, tenant_id, actor) + logger.info( + f"Soft-deleted skill '{skill_name}' for tenant {tenant_id}") + except Exception as e: + logger.warning( + f"Failed to soft-delete skill {skill.get('name')}: {str(e)}") + + # 3. Delete the tenant's local skill directory and all its contents + if CONTAINER_SKILLS_PATH: + tenant_skill_root = os.path.join(CONTAINER_SKILLS_PATH, tenant_id) + if os.path.exists(tenant_skill_root): + try: + shutil.rmtree(tenant_skill_root) + logger.info( + f"Deleted tenant skill root directory: {tenant_skill_root}") + except Exception as e: + logger.warning( + f"Failed to delete tenant skill root directory {tenant_skill_root}: {str(e)}") + + async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> bool: """ Delete tenant and all associated resources @@ -312,6 +416,7 @@ async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> boo - All models in the tenant - All knowledge bases in the tenant - All agents in the tenant (including tool instances) + - All skills, skill instances, and local skill files for the tenant - All MCP configurations in the tenant - All invitation codes in the tenant - All tenant configurations @@ -332,12 +437,14 @@ async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> boo if not name_config: raise NotFoundException(f"Tenant {tenant_id} does not exist") - logger.info(f"Starting cascade deletion for tenant {tenant_id} by {deleted_by}") + logger.info( + f"Starting cascade deletion for tenant {tenant_id} by {deleted_by}") try: # 1. Deactivate all users in the tenant (full cleanup including Supabase deletion) logger.info(f"Deactivating users for tenant {tenant_id}") - users_result = get_users_by_tenant_id(tenant_id, page=1, page_size=10000) + users_result = get_users_by_tenant_id( + tenant_id, page=1, page_size=10000) users = users_result.get("users", []) if users: @@ -346,9 +453,11 @@ async def delete_single_user(user: Dict[str, Any]) -> None: if user_id: try: await delete_user_and_cleanup(user_id, tenant_id) - logger.info(f"Deactivated user {user_id} for tenant {tenant_id}") + logger.info( + f"Deactivated user {user_id} for tenant {tenant_id}") except Exception as e: - logger.warning(f"Failed to deactivate user {user_id}: {str(e)}") + logger.warning( + f"Failed to deactivate user {user_id}: {str(e)}") # Concurrently delete all users await asyncio.gather(*[delete_single_user(user) for user in users]) @@ -360,16 +469,19 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: remove_group(group["group_id"], deleted_by) except Exception as e: - logger.warning(f"Failed to delete group {group.get('group_id')}: {str(e)}") + logger.warning( + f"Failed to delete group {group.get('group_id')}: {str(e)}") # 3. Delete all models in the tenant logger.info(f"Deleting models for tenant {tenant_id}") models = get_model_records({"tenant_id": tenant_id}, tenant_id) for model in models: try: - delete_model_record(model["model_id"], deleted_by or "system", tenant_id) + delete_model_record( + model["model_id"], deleted_by or "system", tenant_id) except Exception as e: - logger.warning(f"Failed to delete model {model.get('model_id')}: {str(e)}") + logger.warning( + f"Failed to delete model {model.get('model_id')}: {str(e)}") # 4. Delete all knowledge bases in the tenant logger.info(f"Deleting knowledge bases for tenant {tenant_id}") @@ -381,7 +493,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None: "user_id": deleted_by or "system" }) except Exception as e: - logger.warning(f"Failed to delete knowledge base {kb.get('knowledge_id')}: {str(e)}") + logger.warning( + f"Failed to delete knowledge base {kb.get('knowledge_id')}: {str(e)}") # 5. Delete all agents in the tenant (including related data) logger.info(f"Deleting agents for tenant {tenant_id}") @@ -390,24 +503,34 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: agent_id = agent.get("agent_id") # Delete tool instances first - delete_tools_by_agent_id(agent_id, tenant_id, deleted_by or "system", version_no=0) + delete_tools_by_agent_id( + agent_id, tenant_id, deleted_by or "system", version_no=0) # Delete agent relationships - delete_agent_relationship(agent_id, tenant_id, deleted_by or "system", version_no=0) + delete_agent_relationship( + agent_id, tenant_id, deleted_by or "system", version_no=0) # Delete the agent delete_agent_by_id(agent_id, tenant_id, deleted_by or "system") except Exception as e: - logger.warning(f"Failed to delete agent {agent.get('agent_id')}: {str(e)}") + logger.warning( + f"Failed to delete agent {agent.get('agent_id')}: {str(e)}") # Also delete published agents (version_no >= 1) - agents_published = query_all_agent_info_by_tenant_id(tenant_id, version_no=1) + agents_published = query_all_agent_info_by_tenant_id( + tenant_id, version_no=1) for agent in agents_published: try: agent_id = agent.get("agent_id") - delete_tools_by_agent_id(agent_id, tenant_id, deleted_by or "system", version_no=1) - delete_agent_relationship(agent_id, tenant_id, deleted_by or "system", version_no=1) + delete_tools_by_agent_id( + agent_id, tenant_id, deleted_by or "system", version_no=1) + delete_agent_relationship( + agent_id, tenant_id, deleted_by or "system", version_no=1) delete_agent_by_id(agent_id, tenant_id, deleted_by or "system") except Exception as e: - logger.warning(f"Failed to delete published agent {agent.get('agent_id')}: {str(e)}") + logger.warning( + f"Failed to delete published agent {agent.get('agent_id')}: {str(e)}") + + # 5b. Delete all skills, skill instances, and local skill files for the tenant + _delete_skills_for_tenant(tenant_id, deleted_by or "system") # 6. Delete all MCP configurations in the tenant logger.info(f"Deleting MCP records for tenant {tenant_id}") @@ -421,7 +544,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None: deleted_by or "system" ) except Exception as e: - logger.warning(f"Failed to delete MCP {mcp.get('mcp_id')}: {str(e)}") + logger.warning( + f"Failed to delete MCP {mcp.get('mcp_id')}: {str(e)}") # 7. Delete all invitation codes in the tenant logger.info(f"Deleting invitations for tenant {tenant_id}") @@ -430,7 +554,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: remove_invitation(invitation["invitation_id"], deleted_by) except Exception as e: - logger.warning(f"Failed to delete invitation {invitation.get('invitation_id')}: {str(e)}") + logger.warning( + f"Failed to delete invitation {invitation.get('invitation_id')}: {str(e)}") # 8. Delete all tenant configurations (must be done last) logger.info(f"Deleting tenant configurations for tenant {tenant_id}") @@ -440,9 +565,11 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: delete_config_by_tenant_config_id(config["tenant_config_id"]) except Exception as e: - logger.warning(f"Failed to delete config {config.get('tenant_config_id')}: {str(e)}") + logger.warning( + f"Failed to delete config {config.get('tenant_config_id')}: {str(e)}") - logger.info(f"Successfully deleted tenant {tenant_id} and all associated resources") + logger.info( + f"Successfully deleted tenant {tenant_id} and all associated resources") return True except Exception as e: @@ -476,5 +603,6 @@ def _create_default_group_for_tenant(tenant_id: str, created_by: Optional[str] = return group_id except Exception as e: - logger.error(f"Failed to create default group for tenant {tenant_id}: {str(e)}") + logger.error( + f"Failed to create default group for tenant {tenant_id}: {str(e)}") raise ValidationError(f"Failed to create default group: {str(e)}") diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py index e3a4cfa4f..6e6260544 100644 --- a/backend/services/tool_configuration_service.py +++ b/backend/services/tool_configuration_service.py @@ -15,7 +15,6 @@ from consts.const import DATA_PROCESS_SERVICE, LOCAL_MCP_SERVER, MCP_MANAGEMENT_API from consts.exceptions import MCPConnectionError, NotFoundException, ToolExecutionException from consts.model import ToolInstanceInfoRequest, ToolInfo, ToolSourceEnum, ToolValidateRequest -from database.client import minio_client from database.outer_api_tool_db import ( upsert_openapi_service, query_openapi_services_by_tenant, @@ -25,6 +24,7 @@ get_mcp_authorization_token_by_name_and_url, get_mcp_records_by_tenant, get_mcp_server_by_name_and_tenant, + get_mcp_custom_headers_by_name_and_url, ) from database.tool_db import ( check_tool_list_initialized, @@ -34,39 +34,47 @@ search_last_tool_instance_by_tool_id, update_tool_table_from_scan_tool_list, ) +from database.knowledge_db import get_knowledge_name_map_by_index_names from mcpadapt.smolagents_adapter import _sanitize_function_name -from services.file_management_service import get_llm_model -from services.vectordatabase_service import get_embedding_model, get_rerank_model, get_vector_db_core +from services.file_management_service import get_llm_model, validate_urls_access +from services.vectordatabase_service import get_embedding_model_by_index_name, get_rerank_model +from utils.http_client_utils import create_httpx_client from database.client import minio_client -from services.image_service import get_vlm_model -from services.vectordatabase_service import get_embedding_model, get_vector_db_core +from services.image_service import get_video_understanding_model, get_vlm_model +from nexent.monitor import set_monitoring_context, set_monitoring_operation +from services.vectordatabase_service import get_vector_db_core from utils.langchain_utils import discover_langchain_modules from utils.tool_utils import get_local_tools_classes, get_local_tools_description_zh logger = logging.getLogger("tool_configuration_service") -def _create_mcp_transport(url: str, authorization_token: Optional[str] = None): +def _create_mcp_transport(url: str, authorization_token: Optional[str] = None, custom_headers: Optional[Dict[str, Any]] = None): """ Create appropriate MCP transport based on URL ending. Args: url: MCP server URL authorization_token: Optional authorization token + custom_headers: Optional custom HTTP headers Returns: Transport instance (SSETransport or StreamableHttpTransport) """ url_stripped = url.strip() - headers = {"Authorization": authorization_token} if authorization_token else {} + headers = {} + if authorization_token: + headers["Authorization"] = authorization_token + if custom_headers: + headers.update(custom_headers) if url_stripped.endswith("/sse"): - return SSETransport(url=url_stripped, headers=headers) + return SSETransport(url=url_stripped, headers=headers, httpx_client_factory=create_httpx_client) elif url_stripped.endswith("/mcp"): - return StreamableHttpTransport(url=url_stripped, headers=headers) + return StreamableHttpTransport(url=url_stripped, headers=headers, httpx_client_factory=create_httpx_client) else: # Default to StreamableHttpTransport for unrecognized formats - return StreamableHttpTransport(url=url_stripped, headers=headers) + return StreamableHttpTransport(url=url_stripped, headers=headers, httpx_client_factory=create_httpx_client) def python_type_to_json_schema(annotation: Any) -> str: @@ -129,11 +137,15 @@ def get_local_tools() -> List[ToolInfo]: if hasattr(param.default, 'exclude') and param.default.exclude: continue + # Check if default is a Pydantic FieldInfo (has .default attribute) + is_pydantic_field = hasattr(param.default, 'default') + # Get description in both languages - param_description = param.default.description if hasattr(param.default, 'description') else "" + param_description = param.default.description if is_pydantic_field else "" # First try to get from param.default.description_zh (FieldInfo) - param_description_zh = param.default.description_zh if hasattr(param.default, 'description_zh') else None + # Note: Pydantic Field doesn't have description_zh attribute, so use getattr with default + param_description_zh = getattr(param.default, 'description_zh', None) if is_pydantic_field else None # Fallback to init_param_descriptions if not found if param_description_zh is None and param_name in init_param_descriptions: @@ -145,11 +157,21 @@ def get_local_tools() -> List[ToolInfo]: "description": param_description, "description_zh": param_description_zh } - if param.default.default is PydanticUndefined: - param_info["optional"] = False + + # Handle both Pydantic FieldInfo and simple defaults + if is_pydantic_field: + if param.default.default is PydanticUndefined: + param_info["optional"] = False + else: + param_info["default"] = param.default.default + param_info["optional"] = True else: - param_info["default"] = param.default.default - param_info["optional"] = True + # Simple default value (not a FieldInfo) + if param.default == inspect.Parameter.empty: + param_info["optional"] = False + else: + param_info["default"] = param.default + param_info["optional"] = True init_params_list.append(param_info) @@ -261,13 +283,15 @@ async def get_all_mcp_tools(tenant_id: str) -> List[ToolInfo]: mcp_info = get_mcp_records_by_tenant(tenant_id=tenant_id) tools_info = [] for record in mcp_info: - # only update connected server - if record["status"]: + # Only scan MCP services that are explicitly enabled and currently healthy. + if bool(record.get("enabled")) and bool(record.get("status")): try: tools_info.extend(await get_tool_from_remote_mcp_server( mcp_server_name=record["mcp_name"], remote_mcp_server=record["mcp_server"], - tenant_id=tenant_id + tenant_id=tenant_id, + authorization_token=record.get("authorization_token"), + custom_headers=record.get("custom_headers"), )) except Exception as e: logger.error(f"mcp connection error: {str(e)}") @@ -339,7 +363,8 @@ async def get_tool_from_remote_mcp_server( mcp_server_name: str, remote_mcp_server: str, tenant_id: Optional[str] = None, - authorization_token: Optional[str] = None + authorization_token: Optional[str] = None, + custom_headers: Optional[Dict[str, Any]] = None ): """ Get the tool information from the remote MCP server, avoid blocking the event loop @@ -349,6 +374,7 @@ async def get_tool_from_remote_mcp_server( remote_mcp_server: URL of the MCP server tenant_id: Optional tenant ID for database lookup of authorization_token authorization_token: Optional authorization token for authentication (if not provided and tenant_id is given, will be fetched from database) + custom_headers: Optional custom HTTP headers """ # Get authorization token from database if not provided if authorization_token is None and tenant_id: @@ -358,10 +384,18 @@ async def get_tool_from_remote_mcp_server( tenant_id=tenant_id ) + # Get custom headers from database if not provided + if custom_headers is None and tenant_id: + custom_headers = get_mcp_custom_headers_by_name_and_url( + mcp_name=mcp_server_name, + mcp_server=remote_mcp_server, + tenant_id=tenant_id + ) + tools_info = [] try: - transport = _create_mcp_transport(remote_mcp_server, authorization_token) + transport = _create_mcp_transport(remote_mcp_server, authorization_token, custom_headers) client = Client(transport=transport, timeout=10) async with client: # List available operations @@ -381,8 +415,9 @@ async def get_tool_from_remote_mcp_server( input_schema["properties"][k]["type"] = "string" sanitized_tool_name = _sanitize_function_name(tool.name) + tool_description = tool.description or "" tool_info = ToolInfo(name=sanitized_tool_name, - description=tool.description, + description=tool_description, params=[], source=ToolSourceEnum.MCP.value, inputs=str(input_schema["properties"]), @@ -481,7 +516,8 @@ async def list_all_tools(tenant_id: str): param["description_zh"] = sdk_param.get("description_zh") break - # Merge inputs description_zh from SDK + # Use SDK full input schema for local tools to keep runtime inputs + # aligned with current tool code (instead of stale DB snapshots). inputs_str = tool.get("inputs", "{}") try: inputs = json.loads(inputs_str) if isinstance(inputs_str, str) else inputs_str @@ -514,7 +550,6 @@ async def list_all_tools(tenant_id: str): "category": tool.get("category") } formatted_tools.append(formatted_tool) - return formatted_tools @@ -534,7 +569,8 @@ async def _call_mcp_tool( mcp_url: str, tool_name: str, inputs: Optional[Dict[str, Any]], - authorization_token: Optional[str] = None + authorization_token: Optional[str] = None, + custom_headers: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Common method to call MCP tool with connection handling. @@ -544,6 +580,7 @@ async def _call_mcp_tool( tool_name: Name of the tool to call inputs: Parameters to pass to the tool authorization_token: Optional authorization token for authentication + custom_headers: Optional custom HTTP headers Returns: Dict containing tool execution result @@ -551,7 +588,7 @@ async def _call_mcp_tool( Raises: MCPConnectionError: If MCP connection fails """ - transport = _create_mcp_transport(mcp_url, authorization_token) + transport = _create_mcp_transport(mcp_url, authorization_token, custom_headers) client = Client(transport=transport) async with client: # Check if connected @@ -615,16 +652,22 @@ async def _validate_mcp_tool_remote( if not actual_mcp_url: raise NotFoundException(f"MCP server not found for name: {usage}") - # Get authorization token from database + # Get authorization token and custom headers from database authorization_token = None + custom_headers = None if tenant_id: authorization_token = get_mcp_authorization_token_by_name_and_url( mcp_name=usage, mcp_server=actual_mcp_url, tenant_id=tenant_id ) + custom_headers = get_mcp_custom_headers_by_name_and_url( + mcp_name=usage, + mcp_server=actual_mcp_url, + tenant_id=tenant_id + ) - return await _call_mcp_tool(actual_mcp_url, tool_name, inputs, authorization_token) + return await _call_mcp_tool(actual_mcp_url, tool_name, inputs, authorization_token, custom_headers) def _get_tool_class_by_name(tool_name: str) -> Optional[type]: @@ -680,6 +723,8 @@ def _validate_local_tool( if not tool_class: raise NotFoundException(f"Tool class not found for {tool_name}") + runtime_inputs = dict(inputs or {}) + # Parse instantiation parameters first instantiation_params = params or {} # Get signature and extract default values for all parameters @@ -702,7 +747,20 @@ def _validate_local_tool( instantiation_params[param_name] = param.default if tool_name == "knowledge_base_search": - embedding_model = get_embedding_model(tenant_id=tenant_id) + index_names = instantiation_params.get("index_names", []) + is_multimodal = instantiation_params.pop("multimodal", False) + + # Must have embedding model for knowledge base search + if not index_names or not tenant_id: + raise ToolExecutionException( + "Embedding model is required for knowledge_base_search but index_names or tenant_id is missing") + + embedding_model, model_id, _ = get_embedding_model_by_index_name(tenant_id, index_names[0]) + if not embedding_model: + raise ToolExecutionException( + f"No embedding model found for index '{index_names[0]}'. " + f"Please configure an embedding model for this knowledge base.") + vdb_core = get_vector_db_core() # Get rerank configuration @@ -712,11 +770,21 @@ def _validate_local_tool( if rerank and rerank_model_name: rerank_model = get_rerank_model(tenant_id=tenant_id, model_name=rerank_model_name) + # Build display_name to index_name mapping for LLM parameter conversion + display_name_to_index_map = {} + if index_names: + knowledge_name_map = get_knowledge_name_map_by_index_names(index_names) + for idx_name, kb_name in knowledge_name_map.items(): + display_name_to_index_map[kb_name] = idx_name + params = { **instantiation_params, 'vdb_core': vdb_core, 'embedding_model': embedding_model, 'rerank_model': rerank_model, + 'display_name_to_index_map': display_name_to_index_map, + # Internal access control: restrict results to specific document paths (path_or_urls) + 'document_paths': instantiation_params.get('document_paths'), } tool_instance = tool_class(**params) elif tool_name in ["dify_search", "datamate_search"]: @@ -732,15 +800,49 @@ def _validate_local_tool( 'rerank_model': rerank_model, } tool_instance = tool_class(**params) + elif tool_name in ("haotian_search", "aidp_search"): + # Haotian and AIDP share the same instantiation shape: drop the + # backend-only rerank keys and explicitly set observer=None + # (otherwise Python falls back to the FieldInfo default, which + # later triggers "'FieldInfo' has no attribute 'lang'" in + # forward()). + filtered_params = {k: v for k, v in instantiation_params.items() + if k not in ["observer", "rerank_model", "rerank"]} + filtered_params["observer"] = None + tool_instance = tool_class(**filtered_params) elif tool_name == "analyze_image": if not tenant_id or not user_id: raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") + # get_vlm_model reads the first multimodal slot, now shown as image understanding. image_to_text_model = get_vlm_model(tenant_id=tenant_id) + vlm_display_name = getattr( + image_to_text_model, 'display_name', None) + set_monitoring_context(tenant_id=tenant_id) + set_monitoring_operation( + "tool_validation", display_name=vlm_display_name) params = { **instantiation_params, 'vlm_model': image_to_text_model, - 'storage_client': minio_client + 'storage_client': minio_client, + 'validate_url_access': lambda urls: validate_urls_access(urls, user_id) + } + tool_instance = tool_class(**params) + elif tool_name in ["analyze_audio", "analyze_video"]: + if not tenant_id or not user_id: + raise ToolExecutionException( + f"Tenant ID and User ID are required for {tool_name} validation") + video_understanding_model = get_video_understanding_model(tenant_id=tenant_id) + model_display_name = getattr( + video_understanding_model, 'display_name', None) + set_monitoring_context(tenant_id=tenant_id) + set_monitoring_operation( + "tool_validation", display_name=model_display_name) + params = { + **instantiation_params, + 'vlm_model': video_understanding_model, + 'storage_client': minio_client, + 'validate_url_access': lambda urls: validate_urls_access(urls, user_id) } tool_instance = tool_class(**params) elif tool_name == "analyze_text_file": @@ -748,16 +850,33 @@ def _validate_local_tool( raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") long_text_to_text_model = get_llm_model(tenant_id=tenant_id) + llm_display_name = getattr( + long_text_to_text_model, 'display_name', None) + set_monitoring_context(tenant_id=tenant_id) + set_monitoring_operation( + "tool_validation", display_name=llm_display_name) params = { **instantiation_params, 'llm_model': long_text_to_text_model, 'storage_client': minio_client, - "data_process_service_url": DATA_PROCESS_SERVICE + "data_process_service_url": DATA_PROCESS_SERVICE, + 'validate_url_access': lambda urls: validate_urls_access(urls, user_id) } tool_instance = tool_class(**params) else: tool_instance = tool_class(**instantiation_params) + # # Only pass declared runtime inputs to forward() to avoid unexpected kwargs. + # declared_inputs = getattr(tool_class, "inputs", {}) or {} + # allowed_input_names = ( + # set(declared_inputs.keys()) if isinstance(declared_inputs, dict) else set() + # ) + # filtered_runtime_inputs = ( + # {k: v for k, v in runtime_inputs.items() if k in allowed_input_names} + # if allowed_input_names + # else runtime_inputs + # ) + result = tool_instance.forward(**(inputs or {})) return result except Exception as e: @@ -868,6 +987,7 @@ def import_openapi_service( tenant_id: str, user_id: str, service_description: str = None, + headers_template: Dict[str, Any] = None, force_update: bool = False ) -> Dict[str, Any]: """ @@ -881,6 +1001,7 @@ def import_openapi_service( tenant_id: Tenant ID for multi-tenancy user_id: User ID for audit service_description: Optional service description (if not provided, reads from openapi_json.info.description) + headers_template: Optional default headers template force_update: If True, replace all existing tools for this service Returns: @@ -901,7 +1022,8 @@ def import_openapi_service( server_url=server_url, tenant_id=tenant_id, user_id=user_id, - description=service_description + description=service_description, + headers_template=headers_template, ) logger.info(f"Imported service '{service_name}' for tenant {tenant_id}") diff --git a/backend/services/user_management_service.py b/backend/services/user_management_service.py index 39ea8cfbe..0b38a76bc 100644 --- a/backend/services/user_management_service.py +++ b/backend/services/user_management_service.py @@ -15,11 +15,36 @@ from utils.auth_utils import ( get_supabase_client, + get_supabase_admin_client, calculate_expires_at, get_jwt_expiry_seconds, + ensure_cas_session_active_from_authorization, + resolve_tenant_id_from_user_tenant_record, ) -from consts.const import INVITE_CODE, SUPABASE_URL, SUPABASE_KEY, DEFAULT_TENANT_ID -from consts.exceptions import NoInviteCodeException, IncorrectInviteCodeException, UserRegistrationException, UnauthorizedError +from consts.const import ( + INVITE_CODE, + SUPABASE_URL, + SUPABASE_KEY, + DEFAULT_TENANT_ID, + ASSET_OWNER_TENANT_ID, + ASSET_OWNER_INVITE_CODE_TYPE, + ASSET_OWNER_ROLE, + ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL, +) + +from services.asset_owner_visibility import ( + filter_accessible_routes_for_asset_owner_feature, + require_asset_owner_enabled, +) +from consts.exceptions import ( + NoInviteCodeException, + IncorrectInviteCodeException, + UserRegistrationException, + UnauthorizedError, + ValidationError, +) +from consts.error_code import ErrorCode +from consts.exceptions import AppException from database.model_management_db import create_model_record from database.user_tenant_db import insert_user_tenant, get_user_tenant_by_user_id @@ -29,7 +54,7 @@ from services.invitation_service import use_invitation_code, check_invitation_available, get_invitation_by_code from services.group_service import add_user_to_groups from services.tool_configuration_service import init_tool_list_for_tenant - +from services.skill_service import init_skill_list_for_tenant logging.getLogger("user_management_service").setLevel(logging.DEBUG) @@ -83,6 +108,7 @@ def validate_token(token: str) -> Tuple[bool, Optional[Any]]: try: user = get_current_user_from_client(client, token) if user: + ensure_cas_session_active_from_authorization(token) return True, user return False, None except Exception as e: @@ -133,6 +159,12 @@ async def signup_user_with_invitation(email: EmailStr, auto_login: Optional[bool] = True): """User registration with invitation code support""" client = get_supabase_client() + + # Validate password strength before registration + if not validate_password_strength(password): + raise AppException(ErrorCode.PROFILE_PASSWORD_WEAK, + "Password must be at least 8 characters with uppercase, lowercase, and digit.") + logging.info( f"Receive registration request: email={email}, invite_code={'provided' if invite_code else 'not provided'}, auto_login={auto_login}") @@ -163,12 +195,17 @@ async def signup_user_with_invitation(email: EmailStr, user_role = "ADMIN" elif code_type == "DEV_INVITE": user_role = "DEV" + elif code_type == ASSET_OWNER_INVITE_CODE_TYPE: + require_asset_owner_enabled() + raise ValidationError(ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL) logging.info( f"Invitation code {invite_code} validated successfully, will assign role: {user_role}") except IncorrectInviteCodeException: raise + except ValidationError: + raise except Exception as e: logging.error( f"Invitation code {invite_code} validation failed: {str(e)}") @@ -187,14 +224,20 @@ async def signup_user_with_invitation(email: EmailStr, # Determine tenant_id based on invitation code if invitation_info: tenant_id = invitation_info["tenant_id"] + if invitation_info.get("code_type") == ASSET_OWNER_INVITE_CODE_TYPE: + tenant_id = ASSET_OWNER_TENANT_ID else: tenant_id = DEFAULT_TENANT_ID + is_asset_owner_registration = user_role == ASSET_OWNER_ROLE + # Create user tenant relationship - logging.debug(f"Creating user tenant relationship: user_id={user_id}, tenant_id={tenant_id}, user_role={user_role}") + logging.debug( + f"Creating user tenant relationship: user_id={user_id}, tenant_id={tenant_id}, user_role={user_role}") insert_user_tenant( user_id=user_id, tenant_id=tenant_id, user_role=user_role, user_email=email) - logging.debug(f"User tenant relationship created successfully for user {user_id}") + logging.debug( + f"User tenant relationship created successfully for user {user_id}") # Use invitation code now that we have the real user_id if invitation_info: @@ -205,7 +248,7 @@ async def signup_user_with_invitation(email: EmailStr, # Add user to groups specified in invitation code group_ids = invitation_result.get("group_ids", []) - if group_ids: + if group_ids and not is_asset_owner_registration: try: # Convert group_ids from string to list if needed if isinstance(group_ids, str): @@ -213,7 +256,8 @@ async def signup_user_with_invitation(email: EmailStr, group_ids = convert_string_to_list(group_ids) if group_ids: - group_results = add_user_to_groups(user_id, group_ids, user_id) + group_results = add_user_to_groups( + user_id, group_ids, user_id) successful_adds = [ r for r in group_results if not r.get("error")] logging.info( @@ -235,7 +279,9 @@ async def signup_user_with_invitation(email: EmailStr, await generate_tts_stt_4_admin(tenant_id, user_id) # Initialize tool list for the new tenant (only once per tenant) - await init_tool_list_for_tenant(tenant_id, user_id) + if not is_asset_owner_registration: + await init_tool_list_for_tenant(tenant_id, user_id) + await init_skill_list_for_tenant(tenant_id, user_id) return await parse_supabase_response(False, response, user_role, auto_login) else: @@ -330,14 +376,24 @@ async def signin_user(email: EmailStr, "password": password }) + user_tenant = get_user_tenant_by_user_id(response.user.id) + if user_tenant and user_tenant.get("user_role") == ASSET_OWNER_ROLE: + try: + require_asset_owner_enabled() + except ValidationError: + client.auth.sign_out() + raise + # Get actual expiration time from access_token expiry_seconds = get_jwt_expiry_seconds(response.session.access_token) expires_at = calculate_expires_at(response.session.access_token) - # Get role information from user metadata - user_role = "user" # Default role - if 'role' in response.user.user_metadata: # Adapt to historical user data - user_role = response.user.user_metadata['role'] + # Prefer user_tenant_t role; fall back to Supabase metadata for legacy users + user_role = "user" + if user_tenant and user_tenant.get("user_role"): + user_role = user_tenant["user_role"] + elif "role" in response.user.user_metadata: + user_role = response.user.user_metadata["role"] logging.info( f"User {email} logged in successfully, session validity is {expiry_seconds} seconds, role: {user_role}") @@ -374,7 +430,8 @@ async def refresh_user_token(authorization, refresh_token: str): async def get_session_by_authorization(authorization): # Extract clean token from authorization header - clean_token = authorization.replace("Bearer ", "") if authorization.startswith("Bearer ") else authorization + clean_token = authorization.replace( + "Bearer ", "") if authorization.startswith("Bearer ") else authorization # Use the unified token validation function is_valid, user = validate_token(clean_token) @@ -411,9 +468,27 @@ async def get_user_info(user_id: str) -> Optional[Dict[str, Any]]: # Get user tenant relationship user_tenant = get_user_tenant_by_user_id(user_id) if not user_tenant: + # User exists in Supabase but not in local database - this is an inconsistent state. + # Delete the orphaned Supabase account and return None to trigger 401. + logging.warning( + f"User {user_id} not found in local database, cleaning up orphaned Supabase account" + ) + try: + admin_client = get_supabase_admin_client() + if admin_client and hasattr(admin_client.auth, "admin"): + admin_client.auth.admin.delete_user(user_id) + logging.info(f"Deleted orphaned Supabase user {user_id}") + else: + logging.warning( + f"Could not get Supabase admin client to delete user {user_id}" + ) + except Exception as delete_err: + logging.error( + f"Failed to delete orphaned Supabase user {user_id}: {str(delete_err)}" + ) return None - tenant_id = user_tenant["tenant_id"] + tenant_id = resolve_tenant_id_from_user_tenant_record(user_tenant) user_role = user_tenant["user_role"] user_email = user_tenant["user_email"] @@ -437,7 +512,7 @@ async def get_user_info(user_id: str) -> Optional[Dict[str, Any]]: "user_email": user_email, "user_role": user_role, "permissions": permissions_data["permissions"], - "accessibleRoutes": permissions_data["accessibleRoutes"] + "accessibleRoutes": permissions_data["accessibleRoutes"], } } @@ -476,9 +551,13 @@ def format_role_permissions(permissions: List[Dict[str, Any]]) -> Dict[str, List # Add permission_subtype to accessible routes for LEFT_NAV_MENU type accessible_routes.append(permission_subtype) + accessible_routes = filter_accessible_routes_for_asset_owner_feature( + accessible_routes + ) + return { "permissions": formatted_permissions, - "accessibleRoutes": accessible_routes + "accessibleRoutes": accessible_routes, } @@ -522,3 +601,85 @@ def delete_token(token_id: int, user_id: str) -> bool: True if the token was deleted, False if not found or not owned by user. """ return delete_token_record(token_id, user_id) + + +# ----------------------------- +# Password Management +# ----------------------------- + +def validate_password_strength(password: str) -> bool: + """Validate password meets minimum security requirements. + + Args: + password: The password to validate. + + Returns: + True if password meets requirements, False otherwise. + """ + if len(password) < 8: + return False + has_upper = any(c.isupper() for c in password) + has_lower = any(c.islower() for c in password) + has_digit = any(c.isdigit() for c in password) + return has_upper and has_lower and has_digit + + +async def update_password(user_id: str, old_password: str, new_password: str) -> bool: + """Update user password with old password verification. + + This method first re-authenticates the user with their old password, + then updates to the new password. + + Args: + user_id: The user ID to update password for. + old_password: The current password for verification. + new_password: The new password to set. + + Returns: + True if password was updated successfully. + + Raises: + UnauthorizedError: If old password is incorrect. + AppException (PROFILE_PASSWORD_WEAK): If new password does not meet requirements. + AppException (PROFILE_PASSWORD_SAME_AS_OLD): If new password is the same as old password. + """ + if not validate_password_strength(new_password): + raise AppException(ErrorCode.PROFILE_PASSWORD_WEAK) + + if old_password == new_password: + raise AppException(ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD) + + admin_client = get_supabase_admin_client() + + try: + user_tenant = get_user_tenant_by_user_id(user_id) + if not user_tenant or not user_tenant.get("user_email"): + raise UnauthorizedError("Unable to retrieve user email") + + user_email = user_tenant["user_email"] + + # Re-authenticate with old password to verify identity using admin client + try: + admin_client.auth.sign_in_with_password({ + "email": user_email, + "password": old_password + }) + except Exception as auth_err: + logging.warning( + f"Password verification failed for user {user_id}: {str(auth_err)}") + raise UnauthorizedError("Invalid old password") + + # Update to new password using admin client + admin_client.auth.update_user({"password": new_password}) + + logging.info(f"Password updated successfully for user {user_id}") + return True + + except UnauthorizedError: + raise + except AppException: + raise + except Exception as exc: + logging.error( + f"Failed to update password for user {user_id}: {str(exc)}") + raise diff --git a/backend/services/user_service.py b/backend/services/user_service.py index ceb471844..6f4edcb1a 100644 --- a/backend/services/user_service.py +++ b/backend/services/user_service.py @@ -11,6 +11,7 @@ from database.group_db import remove_user_from_all_groups from database.memory_config_db import soft_delete_all_configs_by_user_id from database.conversation_db import soft_delete_all_conversations_by_user +from database.oauth_account_db import soft_delete_all_oauth_accounts_by_user_id from utils.auth_utils import get_supabase_admin_client from utils.memory_utils import build_memory_config @@ -174,7 +175,14 @@ async def delete_user_and_cleanup(user_id: str, tenant_id: str) -> None: except Exception as e: logger.error(f"Failed clearing memory for user {user_id}: {e}") - # 5) Delete from Supabase + # 5) Soft-delete OAuth account bindings + try: + deleted_oauth = soft_delete_all_oauth_accounts_by_user_id(user_id, user_id) + logger.debug(f"\t{deleted_oauth} OAuth account bindings deleted.") + except Exception as e: + logger.error(f"Failed deleting OAuth accounts for user {user_id}: {e}") + + # 6) Delete from Supabase try: admin_client = get_supabase_admin_client() if admin_client and hasattr(admin_client.auth, "admin"): diff --git a/backend/services/vectordatabase_service.py b/backend/services/vectordatabase_service.py index 5639103de..dd2f6e51a 100644 --- a/backend/services/vectordatabase_service.py +++ b/backend/services/vectordatabase_service.py @@ -10,6 +10,7 @@ 4. Health check interface """ import asyncio +import hashlib import json import logging import os @@ -20,15 +21,15 @@ from fastapi import Body, Depends, Path, Query from fastapi.responses import StreamingResponse -from nexent.core.models.embedding_model import OpenAICompatibleEmbedding, JinaEmbedding, BaseEmbedding +from nexent.core.models.embedding_model import OpenAICompatibleEmbedding, JinaEmbedding, DashScopeMultimodalEmbedding, BaseEmbedding from nexent.core.models.rerank_model import OpenAICompatibleRerank, BaseRerank from nexent.vector_database.base import VectorDatabaseCore from nexent.vector_database.elasticsearch_core import ElasticSearchCore from nexent.vector_database.datamate_core import DataMateCore -from consts.const import DATAMATE_URL, ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE, PERMISSION_EDIT, PERMISSION_READ +from consts.const import DATAMATE_URL, ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE, PERMISSION_EDIT, PERMISSION_READ, ASSET_OWNER_TENANT_ID from consts.model import ChunkCreateRequest, ChunkUpdateRequest -from database.attachment_db import delete_file +from database.attachment_db import delete_file, file_exists, get_file_stream from database.knowledge_db import ( create_knowledge_record, delete_knowledge_record, @@ -36,13 +37,17 @@ update_knowledge_record, get_knowledge_info_by_tenant_id, update_model_name_by_index_name, + update_last_doc_update_time, + update_last_summary_time, + update_embedding_model_by_index_name, ) from utils.str_utils import convert_list_to_string from database.user_tenant_db import get_user_tenant_by_user_id from database.group_db import query_group_ids_by_user -from database.model_management_db import get_model_records +from database.model_management_db import get_model_by_display_name, get_model_by_model_id, get_model_records from services.redis_service import get_redis_service from services.group_service import get_tenant_default_group_id +from services.asset_owner_visibility import postprocess_knowledge_visibility from utils.config_utils import tenant_config_manager, get_model_name_from_config from utils.file_management_utils import get_all_files_status, get_file_size from utils.str_utils import convert_string_to_list @@ -76,6 +81,134 @@ def _update_progress(task_id: str, processed: int, total: int): f"[PROGRESS CALLBACK] Exception updating progress for task {task_id}: {str(e)}") +def _get_embedding_model_display_name(model_id: Optional[int], tenant_id: str) -> str: + """ + Get embedding model display_name from model_id. + + Args: + model_id: The model ID to look up + tenant_id: Tenant ID for the lookup + + Returns: + The model's display_name if found, empty string otherwise + """ + if model_id is None: + return "" + try: + model = get_model_by_model_id(model_id, tenant_id) + if model: + return model.get("display_name", "") + except Exception as e: + logger.warning(f"Failed to get display_name for model_id {model_id}: {e}") + return "" + + +def _is_multimodal_by_model_id(model_id: Optional[int], tenant_id: str) -> bool: + """ + Determine whether an embedding model is multimodal based on model_id. + + Args: + model_id: The embedding model ID. + tenant_id: Tenant ID for model lookup. + + Returns: + True when the model type is `multi_embedding`, otherwise False. + """ + if model_id is None: + return False + try: + model = get_model_by_model_id(model_id, tenant_id) + if model: + return model.get("model_type") == "multi_embedding" + except Exception as e: + logger.warning(f"Failed to determine multimodal flag for model_id {model_id}: {e}") + return False + + +class KnowledgeBaseNeedsModelConfigError(Exception): + """Exception raised when a knowledge base needs an embedding model to be configured.""" + def __init__(self, index_name: str, message: str = None): + self.index_name = index_name + self.message = message or f"Knowledge base '{index_name}' needs an embedding model to be configured" + super().__init__(self.message) + + +def get_embedding_model_by_index_name(tenant_id: str, index_name: str) -> tuple[Optional[Any], Optional[int], dict]: + """ + Get the embedding model for a knowledge base by its index_name. + + Args: + tenant_id: Tenant ID + index_name: The index name of the knowledge base + + Returns: + Tuple of (embedding model instance or None, model_id or None, metadata dict) + metadata contains: { + "status": str, # "ok" | "needs_config" | "error" + "needs_update": bool, # Whether the database needs to be updated + "update_info": dict, # Fields to update if needs_update is True + "message": str # Status message + } + + Design principles: + - Force explicit configuration: model_id must be explicitly set by user + - No auto-fix: never automatically use tenant default model + - Clear error guidance: return needs_config status for user action + """ + try: + knowledge_record = get_knowledge_record({ + "index_name": index_name, + "tenant_id": tenant_id, + "include_asset_owner_assets": True, + }) + + if not knowledge_record: + return None, None, { + "status": "error", + "needs_update": False, + "message": f"Knowledge base '{index_name}' not found" + } + + model_id = knowledge_record.get("embedding_model_id") + + # Case 1: model_id exists and is valid, use it + if model_id: + model, _ = get_embedding_model_by_id(tenant_id, model_id) + if model: + return model, model_id, { + "status": "ok", + "needs_update": False, + "message": "Embedding model found" + } + # Model ID exists but model not found - fall through to error + logger.warning(f"Model ID {model_id} specified for index '{index_name}' but model not found") + + # Case 2: model_id does not exist or is invalid + # Design principle: Force explicit configuration, no auto-fix + # Return needs_config to guide user to select a model + embedding_model_name = knowledge_record.get("embedding_model_name") + if embedding_model_name: + # Has model_name but no valid model_id (legacy data) + logger.warning(f"Index '{index_name}' has embedding_model_name but no valid model_id, needs explicit configuration") + else: + # No model configured at all + logger.error(f"Index '{index_name}' has no embedding model configured") + + return None, None, { + "status": "needs_config", + "needs_update": False, + "message": f"No embedding model configured for knowledge base '{index_name}'. Please select a model." + } + + except Exception as e: + logger.warning(f"Failed to get embedding model for index {index_name}: {e}") + return None, None, { + "status": "error", + "needs_update": False, + "message": str(e) + } + + ALLOWED_CHUNK_FIELDS = { "id", "title", @@ -175,71 +308,142 @@ def check_knowledge_base_exist_impl(knowledge_name: str, vdb_core: VectorDatabas # Case B: Name is available in this tenant return {"status": "available"} - -def get_embedding_model(tenant_id: str, model_name: Optional[str] = None): +def _normalize_model_type(raw_model_type: Optional[str]) -> Optional[str]: + if raw_model_type in ["multiEmbedding", "multi_embedding"]: + return "multi_embedding" + if raw_model_type == "embedding": + return "embedding" + return None + +def _build_model_config(model: dict) -> dict: + return { + "model_repo": model.get("model_repo", ""), + "model_name": model["model_name"], + "api_key": model.get("api_key", ""), + "base_url": model.get("base_url", ""), + "model_type": model.get("model_type", "embedding"), + "max_tokens": model.get("max_tokens", 1024), + "ssl_verify": model.get("ssl_verify", True), + } + +def _create_embedding_model(model: dict) -> Any: + model_config = _build_model_config(model) + common_kwargs = { + "api_key": model_config.get("api_key", ""), + "base_url": model_config.get("base_url", ""), + "model_name": get_model_name_from_config(model_config) or "", + "embedding_dim": model_config.get("max_tokens", 1024), + "ssl_verify": model_config.get("ssl_verify", True), + } + if model.get("model_type", "embedding") == "multi_embedding": + model_factory = model.get("model_factory", "").lower() + if model_factory == "dashscope": + return DashScopeMultimodalEmbedding(**common_kwargs) + return JinaEmbedding(**common_kwargs) + return OpenAICompatibleEmbedding(**common_kwargs) + +def get_embedding_model( + tenant_id: str, + model_name: Optional[str] = None, + model_type: Optional[str] = None +) -> tuple[Optional[Any], Optional[int]]: """ Get the embedding model for the tenant, optionally using a specific model name. Args: tenant_id: Tenant ID - model_name: Optional specific model name to use (format: "model_repo/model_name" or just "model_name") - If provided, will try to find the model in the tenant's model list. + model_name: Optional display name of the embedding model to use. + If provided, will find the model by display_name in the tenant's model list. + model_type: Optional model type filter. When model_name is omitted, queries tenant + model records by this type; when model_type is also omitted, prefers + embedding models, then multi_embedding models. Returns: - Embedding model instance or None + Tuple of (embedding model instance or None, model_id or None) """ - # If model_name is provided, try to find it in the tenant's models if model_name: try: - models = get_model_records({"model_type": "embedding"}, tenant_id) - for model in models: - model_display_name = model.get("model_repo") + "/" + model["model_name"] if model.get("model_repo") else model["model_name"] - if model_display_name == model_name: - # Found the model, create embedding instance - model_config = { - "model_repo": model.get("model_repo", ""), - "model_name": model["model_name"], - "api_key": model.get("api_key", ""), - "base_url": model.get("base_url", ""), - "model_type": "embedding", - "max_tokens": model.get("max_tokens", 1024), - "ssl_verify": model.get("ssl_verify", True), - } - return OpenAICompatibleEmbedding( - api_key=model_config.get("api_key", ""), - base_url=model_config.get("base_url", ""), - model_name=get_model_name_from_config(model_config) or "", - embedding_dim=model_config.get("max_tokens", 1024), - ssl_verify=model_config.get("ssl_verify", True), - ) + model_type = _normalize_model_type(model_type) + if model_type: + model = get_model_by_display_name(model_name, tenant_id, model_type) + else: + model = get_model_by_display_name(model_name, tenant_id) + + if not model or model.get("model_type") not in ["embedding", "multi_embedding"]: + logger.warning(f"Model '{model_name}' not found or is not an embedding model") + return None, None + + return _create_embedding_model(model), model.get("model_id") except Exception as e: logger.warning(f"Failed to get embedding model by name {model_name}: {e}") + else: + try: + if model_type: + records = get_model_records({"model_type": model_type}, tenant_id) + else: + records = get_model_records({"model_type": "embedding"}, tenant_id) + if not records: + records = get_model_records({"model_type": "multi_embedding"}, tenant_id) + + if records: + model = records[0] + if model.get("model_type") in ["embedding", "multi_embedding"]: + return _create_embedding_model(model), model.get("model_id") + logger.warning( + f"Resolved model is not an embedding model: {model.get('model_type')}" + ) + except Exception as e: + logger.warning(f"Failed to get default embedding model for tenant {tenant_id}: {e}") - # Fall back to default embedding model (current behavior) - model_config = tenant_config_manager.get_model_config( - key="EMBEDDING_ID", tenant_id=tenant_id) + return None, None - model_type = model_config.get("model_type", "") - if model_type == "embedding": - # Get the es core - return OpenAICompatibleEmbedding( - api_key=model_config.get("api_key", ""), - base_url=model_config.get("base_url", ""), - model_name=get_model_name_from_config(model_config) or "", - embedding_dim=model_config.get("max_tokens", 1024), - ssl_verify=model_config.get("ssl_verify", True), - ) - elif model_type == "multi_embedding": - return JinaEmbedding( - api_key=model_config.get("api_key", ""), - base_url=model_config.get("base_url", ""), - model_name=get_model_name_from_config(model_config) or "", - embedding_dim=model_config.get("max_tokens", 1024), - ssl_verify=model_config.get("ssl_verify", True), - ) - else: - return None +def get_embedding_model_by_id(tenant_id: str, model_id: int) -> tuple[Optional[Any], Optional[int]]: + """ + Get the embedding model by model_id. + + Args: + tenant_id: Tenant ID + model_id: Model ID to query + + Returns: + Tuple of (embedding model instance or None, model_id or None) + """ + try: + model = get_model_by_model_id(model_id, tenant_id) + if model and model.get("model_type") in ["embedding", "multi_embedding"]: + model_config = { + "model_repo": model.get("model_repo", ""), + "model_name": model["model_name"], + "api_key": model.get("api_key", ""), + "base_url": model.get("base_url", ""), + "model_type": model.get("model_type", "embedding"), + "max_tokens": model.get("max_tokens", 1024), + "ssl_verify": model.get("ssl_verify", True), + } + model_type = model.get("model_type", "embedding") + if model_type == "multi_embedding": + embedding_model = JinaEmbedding( + api_key=model_config.get("api_key", ""), + base_url=model_config.get("base_url", ""), + model_name=get_model_name_from_config(model_config) or "", + embedding_dim=model_config.get("max_tokens", 1024), + ssl_verify=model_config.get("ssl_verify", True), + ) + else: + embedding_model = OpenAICompatibleEmbedding( + api_key=model_config.get("api_key", ""), + base_url=model_config.get("base_url", ""), + model_name=get_model_name_from_config(model_config) or "", + embedding_dim=model_config.get("max_tokens", 1024), + ssl_verify=model_config.get("ssl_verify", True), + ) + return embedding_model, model.get("model_id") + else: + logger.warning(f"Model with id {model_id} not found or is not an embedding model") + except Exception as e: + logger.warning(f"Failed to get embedding model by id {model_id}: {e}") + return None, None def get_rerank_model(tenant_id: str, model_name: Optional[str] = None): @@ -415,11 +619,19 @@ def create_index( None, description="ID of the user creating the knowledge base"), tenant_id: Optional[str] = Body( None, description="ID of the tenant creating the knowledge base"), + model_id: Optional[int] = Body( + None, description="ID of the embedding model to use"), ): try: if vdb_core.check_index_exists(index_name): raise Exception(f"Index {index_name} already exists") - embedding_model = get_embedding_model(tenant_id) + + # Get embedding model by model_id if provided + if model_id: + embedding_model, actual_model_id = get_embedding_model_by_id(tenant_id, model_id) + else: + embedding_model, actual_model_id = None, None + success = vdb_core.create_index(index_name, embedding_dim=embedding_dim or ( embedding_model.embedding_dim if embedding_model else 1024)) if not success: @@ -427,7 +639,8 @@ def create_index( knowledge_data = {"index_name": index_name, "created_by": user_id, "tenant_id": tenant_id, - "embedding_model_name": embedding_model.model} + "embedding_model_name": embedding_model.model if embedding_model else None, + "embedding_model_id": actual_model_id} create_knowledge_record(knowledge_data) return {"status": "success", "message": f"Index {index_name} created successfully"} except Exception as e: @@ -443,6 +656,8 @@ def create_knowledge_base( ingroup_permission: Optional[str] = None, group_ids: Optional[List[int]] = None, embedding_model_name: Optional[str] = None, + is_multimodal: Optional[bool] = None, + preserve_source_file: Optional[bool] = None, ): """ Create a new knowledge base with a user-facing name and an internal Elasticsearch index name. @@ -462,13 +677,25 @@ def create_knowledge_base( group_ids: List of group IDs (optional) embedding_model_name: Specific embedding model name to use (optional). If provided, will use this model instead of tenant default. + preserve_source_file: Whether to preserve uploaded source documents after + vectorization (optional; defaults to True when omitted). For backward compatibility, legacy callers can still use create_index() directly with an explicit index_name. """ try: # Get embedding model - use user-selected model if provided, otherwise use tenant default - embedding_model = get_embedding_model(tenant_id, embedding_model_name) + selected_model_type = None + if is_multimodal is True: + selected_model_type = "multi_embedding" + elif is_multimodal is False and embedding_model_name: + selected_model_type = "embedding" + + embedding_model, model_id = get_embedding_model( + tenant_id, + embedding_model_name, + selected_model_type + ) # Determine the embedding model name to save: use user-provided name if available, # otherwise use the model's display name @@ -483,6 +710,7 @@ def create_knowledge_base( "user_id": user_id, "tenant_id": tenant_id, "embedding_model_name": saved_embedding_model_name, + "embedding_model_id": model_id, } # Add group permission and group IDs if provided @@ -490,6 +718,8 @@ def create_knowledge_base( knowledge_data["ingroup_permission"] = ingroup_permission if group_ids is not None: knowledge_data["group_ids"] = group_ids + if preserve_source_file is not None: + knowledge_data["preserve_source_file"] = preserve_source_file record_info = create_knowledge_record(knowledge_data) index_name = record_info["index_name"] @@ -570,6 +800,77 @@ def update_knowledge_base( return result + @staticmethod + def update_embedding_model( + index_name: str, + model_id: int, + tenant_id: str, + user_id: Optional[str] = None, + ) -> Dict[str, Any]: + """ + Update the embedding model for a knowledge base. + + Args: + index_name: Internal index name of the knowledge base + model_id: ID of the embedding model to use + tenant_id: Tenant ID + user_id: ID of the user making the update + + Returns: + Dict containing update result information + + Raises: + ValueError: If model is not found or is not an embedding model + Exception: If update fails + """ + try: + # Validate the model exists and is an embedding model + model = get_model_by_model_id(model_id, tenant_id) + if not model: + raise ValueError(f"Model with id {model_id} not found") + + if model.get("model_type") not in ["embedding", "multi_embedding"]: + raise ValueError( + f"Model '{model.get('display_name', model_id)}' is not an embedding model. " + f"Please select an embedding model." + ) + + # Update the database record + # Use display_name as embedding_model_name + embedding_model_name = model.get("display_name") + success = update_embedding_model_by_index_name( + index_name=index_name, + embedding_model_id=model_id, + embedding_model_name=embedding_model_name, + tenant_id=tenant_id, + user_id=user_id or "" + ) + + if not success: + raise Exception(f"Failed to update embedding model for index '{index_name}'") + + logger.info( + f"Embedding model updated for knowledge base '{index_name}' " + f"to model '{model.get('display_name', model_id)}' (id: {model_id}) by user '{user_id}'" + ) + + # Use display_name for consistency with database update + model_display_name = model.get("display_name") + return { + "status": "success", + "index_name": index_name, + "model_id": model_id, + "model_name": model_display_name, + "model_display_name": model.get("display_name"), + "message": f"Embedding model updated successfully to '{model_display_name}'" + } + + except ValueError: + raise + except Exception as e: + logger.error(f"Failed to update embedding model for index '{index_name}': {e}") + raise Exception(f"Failed to update embedding model: {str(e)}") + @staticmethod async def delete_index( index_name: str = Path(..., @@ -631,7 +932,9 @@ def list_indices( Permission logic: - SU: All knowledgebases visible, all editable - ADMIN: Knowledgebases from same tenant visible, all editable - - USER/DEV: Knowledgebases where user belongs to intersecting groups, permission determined by: + - DEV on ASSET_OWNER-scoped records: all visible, read-only (READ_ONLY) + - SU/ADMIN/SPEED cross-tenant view of ASSET_OWNER records: read-only + - USER/DEV (non-ASSET_OWNER records): group intersection required; permission by: * If user is creator: editable * If ingroup_permission=EDIT: editable * If ingroup_permission=READ_ONLY: read-only @@ -663,7 +966,9 @@ def list_indices( es_indices_list = vdb_core.get_user_indices(pattern) # Get all knowledgebase records from database (for cleanup and permission checking) - all_db_records = get_knowledge_info_by_tenant_id(target_tenant_id) + all_db_records = get_knowledge_info_by_tenant_id( + target_tenant_id + ) # Filter visible knowledgebases based on user role and permissions visible_knowledgebases = [] @@ -679,6 +984,8 @@ def list_indices( # Check permission based on user role permission = None + record_tenant_id = str(record.get("tenant_id") or "") + is_asset_owner_record = record_tenant_id == ASSET_OWNER_TENANT_ID # Fallback logic: if user_id equals user_tenant_id, treat as legacy admin user # even if user_role is None or empty @@ -690,7 +997,12 @@ def list_indices( effective_user_role = "SPEED" logger.info("User under SPEED version is treated as admin") - if effective_user_role in ["SU", "ADMIN", "SPEED"]: + if is_asset_owner_record: + if effective_user_role in ["ASSET_OWNER"]: + permission = PERMISSION_EDIT + elif effective_user_role in ["SU", "ADMIN", "SPEED", "DEV"]: + permission = PERMISSION_READ + elif effective_user_role in ["SU", "ADMIN", "SPEED", "ASSET_OWNER"]: # SU, ADMIN and SPEED roles can see all knowledgebases permission = PERMISSION_EDIT elif effective_user_role in ["USER", "DEV"]: @@ -756,6 +1068,11 @@ def list_indices( model_name_is_none_list.append(index_name) # Build response + visible_knowledgebases = postprocess_knowledge_visibility( + visible_knowledgebases, + caller_role=user_role, + caller_tenant_id=target_tenant_id, + ) indices = [record["index_name"] for record in visible_knowledgebases] response = { @@ -774,6 +1091,12 @@ def list_indices( index_name = record["index_name"] index_stats = indice_stats.get(index_name, {}) + # Get embedding model display_name from model_id + model_id = record.get("embedding_model_id") + tenant_id = record.get("tenant_id") or target_tenant_id + embedding_model_display_name = _get_embedding_model_display_name(model_id, tenant_id) + is_multimodal = _is_multimodal_by_model_id(model_id, tenant_id) + stats_info.append({ # Internal index name (used as ID) "name": index_name, @@ -784,9 +1107,17 @@ def list_indices( # knowledge source and ingroup permission from DB record "knowledge_sources": record["knowledge_sources"], "ingroup_permission": record["ingroup_permission"], + "is_multimodal": is_multimodal, "tenant_id": record.get("tenant_id"), + # Embedding model info: display_name from model_id + "embedding_model_name": embedding_model_display_name or record.get("embedding_model_name", ""), + "embedding_model_id": model_id, # Update time for sorting and display "update_time": record.get("update_time"), + # Auto-summary settings + "summary_frequency": record.get("summary_frequency"), + "last_summary_time": record.get("last_summary_time"), + "preserve_source_file": record.get("preserve_source_file", True), "stats": index_stats, }) @@ -812,6 +1143,9 @@ def index_documents( ] = Body(..., description="Document List to process"), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), task_id: Optional[str] = None, + model_id: Optional[int] = Body( + None, description="ID of the embedding model to use"), + large_mode: bool = False, ): """ Index documents and create vector embeddings, create index if it doesn't exist @@ -821,6 +1155,8 @@ def index_documents( index_name: Index name data: List containing document data to be indexed vdb_core: VectorDatabaseCore instance + task_id: Optional task ID for progress tracking + model_id: Optional model ID for the embedding model Returns: IndexingResponse object containing indexing result information @@ -833,7 +1169,7 @@ def index_documents( if not vdb_core.check_index_exists(index_name): try: ElasticSearchService.create_index( - index_name, vdb_core=vdb_core) + index_name, vdb_core=vdb_core, model_id=model_id) logger.info(f"Created new index {index_name}") except Exception as create_error: raise Exception( @@ -882,12 +1218,27 @@ def index_documents( "author": author, "date": date, "content": text, - "process_source": "Unstructured", + "process_source": metadata.get("process_source", "Unstructured"), "file_size": file_size, "create_time": create_time, "languages": metadata.get("languages", []), "embedding_model_name": embedding_model_name } + + image_url = metadata.get("image_url", "") + if len(image_url) > 0: + # Fetch image bytes from MinIO (supports s3://bucket/key or /bucket/key) + try: + file_stream = get_file_stream( + object_name=image_url) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {image_url}") + document["image_bytes"] = file_stream.read() + except Exception as e: + logger.error( + f"Failed to fetch file from {image_url}: {e}") + raise documents.append(document) @@ -908,8 +1259,9 @@ def index_documents( 'tenant_id') if knowledge_record else None if tenant_id: + model_type = "EMBEDDING_ID" if embedding_model.model_type == "text" else "MULTI_EMBEDDING_ID" model_config = tenant_config_manager.get_model_config( - key="EMBEDDING_ID", tenant_id=tenant_id) + key=model_type, tenant_id=tenant_id) embedding_batch_size = model_config.get("chunk_batch", 10) if embedding_batch_size is None: embedding_batch_size = 10 @@ -939,6 +1291,7 @@ def index_documents( embedding_model=embedding_model, documents=documents, embedding_batch_size=embedding_batch_size, + large_mode=large_mode, progress_callback=lambda processed, total: _update_progress( task_id, processed, total) if task_id else None ) @@ -959,6 +1312,9 @@ def index_documents( logger.warning( f"[REDIS PROGRESS] Exception updating final progress for task {task_id}: {str(e)}") + # Update last_doc_update_time for auto-summary tracking + update_last_doc_update_time(index_name) + return { "success": True, "message": f"Successfully indexed {total_indexed} documents", @@ -993,35 +1349,33 @@ async def list_files( """ try: files_map: Dict[str, Dict[str, Any]] = {} - # Get existing files from ES + total_start_time = time.time() + + logger.info(f"[list_files] index={index_name}, include_chunks={include_chunks}") + + # Step 1: Get existing files from ES (includes chunk_count via aggregation) + step1_start = time.time() existing_files = vdb_core.get_documents_detail(index_name) + step1_duration = time.time() - step1_start + logger.info(f"[list_files:step1] ES get_documents_detail: {len(existing_files)} files in {step1_duration:.3f}s") - # Get unique celery files list and the status of each file + # Step 2: Get celery task statuses from external service + step2_start = time.time() celery_task_files = await get_all_files_status(index_name) + step2_duration = time.time() - step2_start + logger.info(f"[list_files:step2] Celery task status: {len(celery_task_files)} tasks in {step2_duration:.3f}s") - # For files already stored in ES, add to files list + # Step 3: Build files_map from ES data + step3_start = time.time() for file_info in existing_files: utc_create_time_str = file_info.get('create_time', '') - # Try to parse the create_time string, fallback to current timestamp if format is invalid try: utc_create_timestamp = datetime.strptime(utc_create_time_str, '%Y-%m-%dT%H:%M:%S').replace( tzinfo=timezone.utc).timestamp() except (ValueError, TypeError): utc_create_timestamp = time.time() - # Always re-query chunk count to ensure accuracy (aggregation may be stale) path_or_url = file_info.get('path_or_url') - chunk_count = file_info.get('chunk_count', 0) - try: - count_result = vdb_core.client.count( - index=index_name, - body={"query": {"term": {"path_or_url": path_or_url}}} - ) - chunk_count = count_result.get("count", chunk_count) - except Exception as count_err: - logger.warning( - f"Failed to get chunk count for {path_or_url}: {count_err}, using aggregation value {chunk_count}") - file_data = { 'path_or_url': path_or_url, 'file': file_info.get('filename', ''), @@ -1029,65 +1383,40 @@ async def list_files( 'create_time': int(utc_create_timestamp * 1000), 'status': "COMPLETED", 'latest_task_id': '', - 'chunk_count': chunk_count, + 'chunk_count': file_info.get('chunk_count', 0), 'error_reason': None, 'has_error_info': False } files_map[path_or_url] = file_data + step3_duration = time.time() - step3_start + logger.info(f"[list_files:step3] Build files_map from ES: {len(existing_files)} files in {step3_duration:.3f}s") - # For files not yet stored in ES (files currently being processed) + # Step 4: Merge celery task data (Redis progress already fetched in get_all_files_status) + step4_start = time.time() + celery_file_count = 0 for path_or_url, status_info in celery_task_files.items(): - status_dict = status_info if isinstance( - status_info, dict) else {} + celery_file_count += 1 + status_dict = status_info if isinstance(status_info, dict) else {} - # Get source_type and original_filename, with defaults - source_type = status_dict.get('source_type') if status_dict.get( - 'source_type') else 'minio' + source_type = status_dict.get('source_type') if status_dict.get('source_type') else 'minio' original_filename = status_dict.get('original_filename') + filename = original_filename or (os.path.basename(path_or_url) if path_or_url else '') - # Determine the filename - filename = original_filename or ( - os.path.basename(path_or_url) if path_or_url else '') - - # Safely get file size; default to 0 on any error file_size = 0 if path_or_url in files_map: file_size = files_map[path_or_url].get('file_size', 0) else: try: - file_size = get_file_size( - source_type or 'minio', path_or_url) + file_size = get_file_size(source_type or 'minio', path_or_url) except Exception as size_err: - logger.error( - f"Failed to get file size for '{path_or_url}': {size_err}") + logger.error(f"Failed to get file size for '{path_or_url}': {size_err}") file_size = 0 - # Get progress from status_dict first, then try Redis for real-time updates + # Get progress from celery_task_files (already includes Redis batch data) processed_chunks = status_dict.get('processed_chunks') total_chunks = status_dict.get('total_chunks') task_id = status_dict.get('latest_task_id', '') - # Always try to get latest progress from Redis if task_id exists - # Redis has the most up-to-date progress during vectorization - if task_id: - try: - redis_service = get_redis_service() - progress_info = redis_service.get_progress_info( - task_id) - if progress_info: - redis_processed = progress_info.get( - 'processed_chunks') - redis_total = progress_info.get('total_chunks') - if redis_processed is not None: - processed_chunks = redis_processed - if redis_total is not None: - total_chunks = redis_total - logger.debug( - f"Retrieved progress from Redis for task {task_id}: {processed_chunks}/{total_chunks}") - except Exception as e: - logger.debug( - f"Failed to get progress from Redis for task {task_id}: {str(e)}") - if path_or_url in files_map: file_data = files_map[path_or_url] else: @@ -1102,13 +1431,12 @@ async def list_files( } files_map[path_or_url] = file_data - file_data['status'] = status_dict.get('state', file_data.get( - 'status', 'UNKNOWN')) + file_data['status'] = status_dict.get('state', file_data.get('status', 'UNKNOWN')) file_data['latest_task_id'] = task_id file_data['processed_chunk_num'] = processed_chunks file_data['total_chunk_num'] = total_chunks - # Get error reason for failed documents + # Get error reason for failed documents (fetch from Redis batch if needed) if task_id and status_dict.get('state') in ['PROCESS_FAILED', 'FORWARD_FAILED']: try: redis_service = get_redis_service() @@ -1116,17 +1444,20 @@ async def list_files( if error_reason: file_data['error_reason'] = error_reason file_data['has_error_info'] = True - except Exception as e: - logger.debug( - f"Failed to get error info for task {task_id}: {str(e)}") + except Exception: + pass # Error info is optional, don't fail the request + step4_duration = time.time() - step4_start + logger.info(f"[list_files:step4] Merge celery tasks: {celery_file_count} tasks in {step4_duration:.3f}s") files = list(files_map.values()) + logger.info(f"[list_files:step4] Total files built: {len(files)}") # Unified chunks processing for all files if include_chunks: - # Prepare msearch body for all completed files + step5_start = time.time() completed_files_map = { f['path_or_url']: f for f in files if f['status'] == "COMPLETED"} + completed_count = len(completed_files_map) msearch_body = [] for path_or_url in completed_files_map.keys(): @@ -1137,7 +1468,6 @@ async def list_files( "_source": ["id", "title", "content", "create_time"] }) - # Initialize chunks for all files for file_data in files: file_data['chunks'] = [] file_data['chunk_count'] = file_data.get('chunk_count', 0) @@ -1169,46 +1499,30 @@ async def list_files( }) file_data['chunks'] = chunks - # Get accurate chunk count using count query instead of len(chunks) - # because msearch may have size limits - try: - count_result = vdb_core.client.count( - index=index_name, - body={ - "query": {"term": {"path_or_url": file_path}}} - ) - file_data['chunk_count'] = count_result.get( - "count", len(chunks)) - except Exception as count_err: - logger.warning( - f"Failed to get chunk count for {file_path}: {count_err}, using len(chunks)") - file_data['chunk_count'] = len(chunks) + # chunk_count from aggregation is already accurate + # no need for additional count queries except Exception as e: logger.error( f"Error during msearch for chunks: {str(e)}") + step5_duration = time.time() - step5_start + logger.info(f"[list_files:step5] ES msearch chunks: {completed_count} files in {step5_duration:.3f}s") else: - # When include_chunks=False, ensure chunk_count is accurate for completed files + # When include_chunks=False, chunk_count is already accurate from ES aggregation + # No need for additional count queries - doc_count from terms aggregation is accurate for file_data in files: file_data['chunks'] = [] - if file_data.get('status') == "COMPLETED": - # Always re-query chunk count for completed files to ensure accuracy - try: - count_result = vdb_core.client.count( - index=index_name, - body={ - "query": {"term": {"path_or_url": file_data.get('path_or_url')}}} - ) - file_data['chunk_count'] = count_result.get( - "count", 0) - except Exception as count_err: - logger.warning( - f"Failed to get chunk count for {file_data.get('path_or_url')}: {count_err}") - file_data['chunk_count'] = file_data.get( - 'chunk_count', 0) - else: - file_data['chunk_count'] = file_data.get( - 'chunk_count', 0) + # chunk_count is already set from ES aggregation (doc_count) + file_data['chunk_count'] = file_data.get('chunk_count', 0) + + for file_data in files: + file_data["source_available"] = ( + ElasticSearchService._compute_source_available(file_data) + ) + + total_duration = time.time() - total_start_time + logger.info(f"[list_files:complete] index={index_name}, total_files={len(files)}, " + f"total_duration={total_duration:.3f}s") return {"files": files} @@ -1216,6 +1530,100 @@ async def list_files( raise Exception( f"Error getting file list for index {index_name}: {str(e)}") + DOCUMENT_DELETE_SCOPES = ("source_only", "full") + + @staticmethod + def _preview_pdf_cache_object_name(object_name: str) -> str: + """Object key for Office-to-PDF preview cache (matches file_management_service).""" + name_without_ext = ( + object_name.rsplit(".", 1)[0] if "." in object_name else object_name + ) + hash_suffix = hashlib.md5(object_name.encode()).hexdigest()[:8] + return f"preview/converted/{name_without_ext}_{hash_suffix}.pdf" + + @staticmethod + def _compute_source_available(file_data: Dict[str, Any]) -> bool: + path_or_url = file_data.get("path_or_url") or "" + status = file_data.get("status", "") + if status != "COMPLETED": + return True + if path_or_url.startswith("knowledge_base/"): + return file_exists(path_or_url) + return True + + @staticmethod + def delete_source_file(path_or_url: str) -> Dict[str, Any]: + """Remove MinIO source (and preview cache); does not touch Elasticsearch.""" + minio_result = delete_file(path_or_url) + deleted_minio = bool(minio_result.get("success")) + + if path_or_url.startswith("knowledge_base/"): + preview_key = ElasticSearchService._preview_pdf_cache_object_name( + path_or_url + ) + try: + if file_exists(preview_key): + delete_file(preview_key) + except Exception as exc: + logger.warning( + "Failed to delete preview cache for '%s': %s", + path_or_url, + exc, + ) + + return {"deleted_minio": deleted_minio} + + @staticmethod + async def _assert_source_only_deletable( + index_name: str, path_or_url: str + ) -> None: + celery_task_files = await get_all_files_status(index_name) + status_info = celery_task_files.get(path_or_url) + if not status_info or not isinstance(status_info, dict): + return + state = status_info.get("state") or "" + if state and state != "COMPLETED": + raise ValueError( + f"Cannot delete source file while document is in state '{state}'. " + "Wait until processing completes or use scope=full to remove the document." + ) + + @staticmethod + async def delete_document_by_scope( + index_name: str, + path_or_url: str, + scope: str, + vdb_core: VectorDatabaseCore, + ) -> Dict[str, Any]: + if scope not in ElasticSearchService.DOCUMENT_DELETE_SCOPES: + raise ValueError( + f"Invalid scope '{scope}'. " + f"Must be one of: {ElasticSearchService.DOCUMENT_DELETE_SCOPES}" + ) + + if scope == "source_only": + await ElasticSearchService._assert_source_only_deletable( + index_name, path_or_url + ) + minio_part = ElasticSearchService.delete_source_file(path_or_url) + return { + "status": "success", + "scope": scope, + "deleted_es_count": 0, + "deleted_minio": minio_part.get("deleted_minio", False), + "source_available": False, + "message": ( + "Source file deleted; index chunks and vectors preserved." + ), + } + + result = ElasticSearchService.delete_documents( + index_name, path_or_url, vdb_core + ) + result["scope"] = scope + result["source_available"] = False + return result + @staticmethod def delete_documents( index_name: str = Path(..., description="Name of the index"), @@ -1228,6 +1636,10 @@ def delete_documents( index_name, path_or_url) # 2. Delete MinIO file minio_result = delete_file(path_or_url) + + # Update last_doc_update_time for auto-summary tracking + update_last_doc_update_time(index_name) + return {"status": "success", "deleted_es_count": deleted_count, "deleted_minio": minio_result.get("success")} @staticmethod @@ -1450,6 +1862,8 @@ def change_summary( "index_name": index_name } update_knowledge_record(update_data) + # Update last_summary_time for auto-summary tracking + update_last_summary_time(index_name) return {"status": "success", "message": f"Index {index_name} summary updated successfully", "summary": summary_result} except Exception as e: @@ -1550,23 +1964,23 @@ def create_chunk( Automatically generates and stores embedding for semantic search. """ try: - # Get knowledge base's embedding model name - embedding_model_name = None + # Get knowledge base's embedding model by model_id + embedding_model_id = None if tenant_id: try: knowledge_record = get_knowledge_record({ "index_name": index_name, "tenant_id": tenant_id }) - embedding_model_name = knowledge_record.get("embedding_model_name") if knowledge_record else None + embedding_model_id = knowledge_record.get("embedding_model_id") if knowledge_record else None except Exception as e: - logger.warning(f"Failed to get embedding model name for index {index_name}: {e}") + logger.warning(f"Failed to get embedding model id for index {index_name}: {e}") # Generate embedding if we have content and can get embedding model embedding_vector = None if chunk_request.content: try: - embedding_model = get_embedding_model(tenant_id, embedding_model_name) if tenant_id else None + embedding_model = get_embedding_model_by_id(tenant_id, embedding_model_id)[0] if tenant_id and embedding_model_id else None if embedding_model: embeddings = embedding_model.get_embeddings(chunk_request.content) if embeddings and len(embeddings) > 0: @@ -1596,8 +2010,8 @@ def create_chunk( # Add embedding if generated if embedding_vector: chunk_payload["embedding"] = embedding_vector - if embedding_model_name: - chunk_payload["embedding_model_name"] = embedding_model_name + if embedding_model_id: + chunk_payload["embedding_model_id"] = embedding_model_id result = vdb_core.create_chunk(index_name, chunk_payload) return { @@ -1617,6 +2031,7 @@ def update_chunk( chunk_request: ChunkUpdateRequest, vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), user_id: Optional[str] = None, + tenant_id: Optional[str] = None, ): """ Update a chunk document. @@ -1700,10 +2115,23 @@ def search_hybrid( if weight_accurate < 0 or weight_accurate > 1: raise ValueError("weight_accurate must be between 0 and 1") - embedding_model = get_embedding_model(tenant_id) + # Get embedding model from the first index's knowledge base record + if not index_names: + raise ValueError("At least one index name is required") + + embedding_model, model_id, meta = get_embedding_model_by_index_name(tenant_id, index_names[0]) + if not embedding_model: - raise ValueError( - "No embedding model configured for the current tenant") + if meta.get("status") == "needs_config": + # Return a clear error indicating model needs to be configured + raise KnowledgeBaseNeedsModelConfigError( + index_name=index_names[0], + message=f"Knowledge base '{index_names[0]}' does not have an embedding model configured. Please select a model in the knowledge base settings." + ) + else: + raise ValueError( + f"No embedding model found for index '{index_names[0]}'. " + f"Please configure an embedding model for this knowledge base.") start_time = time.perf_counter() raw_results = vdb_core.hybrid_search( @@ -1729,6 +2157,8 @@ def search_hybrid( "total": len(formatted_results), "query_time_ms": elapsed_ms, } + except KnowledgeBaseNeedsModelConfigError: + raise except ValueError: raise except Exception as exc: diff --git a/backend/services/voice_service.py b/backend/services/voice_service.py index 05dba6231..5a08e1f8b 100644 --- a/backend/services/voice_service.py +++ b/backend/services/voice_service.py @@ -1,17 +1,22 @@ import asyncio import logging -from typing import Any, Optional +from typing import Any, Dict, Optional -from nexent.core.models.stt_model import STTConfig, STTModel -from nexent.core.models.tts_model import TTSConfig, TTSModel +from nexent.core.models.stt_model import BaseSTTModel +from nexent.core.models.tts_model import BaseTTSModel +from nexent.core.models.volc_stt_model import VolcSTTConfig, VolcSTTModel +from nexent.core.models.ali_stt_model import AliSTTConfig, AliSTTModel +from nexent.core.models.volc_tts_model import VolcTTSConfig, VolcTTSModel +from nexent.core.models.ali_tts_model import AliTTSConfig, AliTTSModel -from consts.const import APPID, CLUSTER, SPEED_RATIO, TEST_VOICE_PATH, TOKEN, VOICE_TYPE +from consts.const import TEST_VOICE_PATH, TEST_PCM_PATH from consts.exceptions import ( VoiceServiceException, STTConnectionException, TTSConnectionException, - VoiceConfigException ) +from database.model_management_db import get_model_records +from utils.config_utils import tenant_config_manager logger = logging.getLogger("voice_service") @@ -19,56 +24,311 @@ class VoiceService: """Voice service that handles STT and TTS operations""" - def __init__(self): - """Initialize the voice service with configurations from const.py""" - try: - # Initialize STT configuration - self.stt_config = STTConfig( - appid=APPID, - token=TOKEN - ) + def _get_stt_model_from_config( + self, + model_factory: Optional[str] = None, + model_name: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + base_url: Optional[str] = None, + language: str = "zh" + ) -> BaseSTTModel: + """ + Get the appropriate STT model based on model factory configuration. - # Initialize TTS configuration - self.tts_config = TTSConfig( - appid=APPID, - token=TOKEN, - cluster=CLUSTER, - voice_type=VOICE_TYPE, - speed_ratio=SPEED_RATIO + Args: + model_factory: Model factory/vendor name + model_name: Model name + api_key: API key (for Ali STT) + model_appid: Application ID (for Volcano STT) + access_token: Access token (for Volcano STT) + base_url: Custom WebSocket URL (optional) + language: Language for speech recognition + + Returns: + STT model instance based on configuration + """ + # Default to Ali Cloud if model_factory is not specified or is dashscope + use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"] + + if use_volc: + # Use Volcano Engine STT + volc_config = VolcSTTConfig( + appid=model_appid or "", + access_token=access_token or "", + ws_url=base_url if base_url else "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel", + format="pcm", + rate=16000 ) + return VolcSTTModel(volc_config, TEST_PCM_PATH) + else: + # Use Ali Cloud STT (default) + ali_config = AliSTTConfig( + api_key=api_key or "", + model=model_name or "qwen3-asr-flash-realtime", + language=language, + ws_url=base_url if base_url else None, + format="pcm", + rate=16000, + enable_vad=True, + timeout=5 + ) + return AliSTTModel(ali_config, TEST_PCM_PATH) + + def _get_stt_model_from_tenant_config( + self, + tenant_id: str, + language: str = "zh" + ) -> BaseSTTModel: + """ + Get STT model based on tenant's model configuration. - # Initialize models - self.stt_model = STTModel(self.stt_config, TEST_VOICE_PATH) - self.tts_model = TTSModel(self.tts_config) + Args: + tenant_id: Tenant ID + language: Language for speech recognition + + Returns: + STT model instance based on tenant's configuration + """ + try: + # Get STT model configuration from tenant config + stt_config = tenant_config_manager.get_model_config(tenant_id, "stt") + + if stt_config: + model_factory = stt_config.get("model_factory", "") + model_name = stt_config.get("model_name", "") + api_key = stt_config.get("api_key", "") + base_url = stt_config.get("base_url", "") + model_appid = stt_config.get("model_appid", "") + access_token_val = stt_config.get("access_token", "") + + return self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model_name, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + base_url=base_url, + language=language + ) + + # Try to get from model records in database + model_records = get_model_records({"model_type": "stt"}, tenant_id) + if model_records: + record = model_records[0] + model_factory = record.get("model_factory", "") + model_name = record.get("model_name", "") + api_key = record.get("api_key", "") + base_url = record.get("base_url", "") + model_appid = record.get("model_appid", "") + access_token_val = record.get("access_token", "") + + return self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model_name, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + base_url=base_url, + language=language + ) + + logger.warning(f"No STT model configuration found for tenant {tenant_id}, using default config") + return self._get_stt_model_from_config(language=language) except Exception as e: - logger.error(f"Failed to initialize voice service: {str(e)}") - raise VoiceConfigException(f"Voice service initialization failed: {str(e)}") from e + logger.error(f"Error getting STT model config for tenant {tenant_id}: {str(e)}") + return self._get_stt_model_from_config(language=language) + + def _get_tts_model_from_config( + self, + model_factory: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + speed_ratio: float = 1.0, + base_url: Optional[str] = None, + model: Optional[str] = None + ) -> BaseTTSModel: + """ + Get the appropriate TTS model based on model factory configuration. + + Args: + model_factory: Model factory/vendor name + api_key: API key (for Ali TTS) + model_appid: Application ID (for Volcano TTS) + access_token: Access token (for Volcano TTS) + speed_ratio: Speech speed ratio + base_url: Custom WebSocket URL (optional) + model: Model name (for Ali TTS) - async def start_stt_streaming_session(self, websocket) -> None: + Returns: + TTS model instance based on configuration """ - Start STT streaming session + use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"] + + if use_volc: + volc_config = VolcTTSConfig( + appid=model_appid or "", + token=access_token or "", + speed_ratio=speed_ratio, + ws_url=base_url or None, + ) + return VolcTTSModel(volc_config) + else: + ali_config = AliTTSConfig( + api_key=api_key or "", + model=model or "qwen3-tts-flash", + voice="Cherry", + speech_rate=speed_ratio, + ws_url=base_url if base_url else None + ) + return AliTTSModel(ali_config) + + def _get_tts_model_from_tenant_config( + self, + tenant_id: str + ) -> BaseTTSModel: + """ + Get TTS model based on tenant's model configuration. + + Args: + tenant_id: Tenant ID + + Returns: + TTS model instance based on tenant's configuration + """ + try: + tts_config = tenant_config_manager.get_model_config(tenant_id, "tts") + + if tts_config: + model_factory = tts_config.get("model_factory", "") + api_key = tts_config.get("api_key", "") + model_appid = tts_config.get("model_appid", "") + access_token_val = tts_config.get("access_token", "") + speed_ratio = float(tts_config.get("speed_ratio", 1.0)) + base_url = tts_config.get("base_url", "") + model = tts_config.get("model") or tts_config.get("model_name", "") + + return self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + speed_ratio=speed_ratio, + base_url=base_url if base_url else None, + model=model if model else None + ) + + model_records = get_model_records({"model_type": "tts"}, tenant_id) + if model_records: + record = model_records[0] + model_factory = record.get("model_factory", "") + api_key = record.get("api_key", "") + model_appid = record.get("model_appid", "") + access_token_val = record.get("access_token", "") + speed_ratio = float(record.get("speed_ratio", 1.0)) + base_url = record.get("base_url", "") + model = record.get("model_name", "") + + return self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + speed_ratio=speed_ratio, + base_url=base_url if base_url else None, + model=model if model else None + ) + + logger.warning(f"No TTS model configuration found for tenant {tenant_id}, using default config") + return self._get_tts_model_from_config() + + except Exception as e: + logger.error(f"Error getting TTS model config for tenant {tenant_id}: {str(e)}") + return self._get_tts_model_from_config() + + async def start_stt_streaming_session( + self, + websocket, + stt_config: Optional[Dict[str, Any]] = None, + tenant_id: Optional[str] = None, + language: str = "zh" + ) -> None: + """ + Start STT streaming session. Args: websocket: WebSocket connection for real-time audio streaming + stt_config: STT configuration dict from client (preferred) + tenant_id: Tenant ID for model lookup + language: Language for speech recognition (default: zh) Raises: STTConnectionException: If STT streaming fails """ try: - logger.info("Starting STT streaming session") - await self.stt_model.start_streaming_session(websocket) + model_factory = None + model_name = None + api_key = None + model_appid = None + access_token = None + base_url = None + + if stt_config: + model_factory = stt_config.get("model_factory") + model_name = stt_config.get("model") or stt_config.get("model_name") + api_key = stt_config.get("api_key") or stt_config.get("apiKey") + model_appid = stt_config.get("model_appid") or stt_config.get("appid") + access_token = stt_config.get("access_token") + base_url = stt_config.get("base_url") or stt_config.get("baseUrl") + language = stt_config.get("language", language) + else: + logger.warning("No stt_config provided, will use tenant model config if available") + + # Get STT model based on configuration + if model_factory or api_key or model_appid: + stt_model = self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model_name, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + base_url=base_url, + language=language + ) + elif tenant_id: + stt_model = self._get_stt_model_from_tenant_config(tenant_id, language) + else: + logger.warning("No tenant_id provided and no explicit config, using default Ali STT") + stt_model = self._get_stt_model_from_config( + api_key=api_key, + language=language + ) + + await stt_model.start_streaming_session(websocket) except Exception as e: logger.error(f"STT streaming session failed: {str(e)}") raise STTConnectionException(f"STT streaming failed: {str(e)}") from e - async def generate_tts_speech(self, text: str, stream: bool = True) -> Any: + async def generate_tts_speech( + self, + text: str, + stream: bool = True, + tts_config: Optional[Dict[str, Any]] = None, + tenant_id: Optional[str] = None, + model_name_override: Optional[str] = None + ) -> Any: """ Generate TTS speech from text Args: text: Text to convert to speech stream: Whether to stream the audio or return complete audio + tts_config: TTS configuration dict from client (preferred) + tenant_id: Tenant ID for model lookup + model_name_override: Model name override Returns: Audio data (streaming or complete) @@ -81,67 +341,145 @@ async def generate_tts_speech(self, text: str, stream: bool = True) -> Any: try: logger.info(f"Generating TTS speech for text: {text[:50]}...") - speech_result = await self.tts_model.generate_speech(text, stream=stream) + + model_factory = None + api_key = None + model_appid = None + access_token = None + speed_ratio = 1.0 + base_url = None + model_name = None + + if tts_config: + model_factory = tts_config.get("model_factory") + api_key = tts_config.get("api_key") or tts_config.get("apiKey") + model_appid = tts_config.get("model_appid") or tts_config.get("appid") + access_token = tts_config.get("access_token") + speed_ratio = float(tts_config.get("speed_ratio", 1.0)) + base_url = tts_config.get("base_url") or tts_config.get("baseUrl") + model_name = tts_config.get("model") or tts_config.get("model_name") + + # If model_name is provided directly, use it + effective_model = model_name_override or model_name + logger.info(f"TTS config - api_key: {bool(api_key)}, model_name_override: {model_name_override}, " + f"model_name from config: {model_name}, effective_model: {effective_model}") + + + # Determine model factory and create appropriate TTS model + use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"] + + if use_volc: + # Use Volcano TTS + tts_model = self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=effective_model + ) + logger.info(f"TTS model created: Volcano TTS (factory={model_factory})") + elif api_key: + # Use Ali TTS with provided api_key + tts_model = self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=effective_model + ) + logger.info(f"TTS model created: Ali TTS (api_key provided)") + elif tenant_id: + tts_model = self._get_tts_model_from_tenant_config(tenant_id) + logger.info(f"TTS model created from tenant config for tenant_id={tenant_id}") + else: + logger.warning("No api_key, model_name, or tenant_id provided, using default TTS model") + tts_model = self._get_tts_model_from_config() + + speech_result = await tts_model.generate_speech(text, stream=stream) return speech_result except Exception as e: logger.error(f"TTS generation failed: {str(e)}") raise TTSConnectionException(f"TTS generation failed: {str(e)}") from e - async def stream_tts_to_websocket(self, websocket, text: str) -> None: + async def stream_tts_to_websocket( + self, + websocket, + text: str, + tenant_id: Optional[str] = None, + model_name: Optional[str] = None, + tts_config: Optional[Dict[str, Any]] = None, + ) -> None: """ Stream TTS audio to WebSocket with proper error handling and fallback Args: websocket: WebSocket connection to stream to text: Text to convert to speech + tenant_id: Optional tenant ID for model selection + model_name: Optional model name override + tts_config: Optional TTS configuration dict with model_factory, api_key, model_appid, access_token, base_url Raises: TTSConnectionException: If TTS service connection fails VoiceServiceException: If TTS streaming fails """ - try: - # Generate and stream audio chunks - speech_result = await self.generate_tts_speech(text, stream=True) - - # Check if it's an async iterator or a regular iterable - if hasattr(speech_result, '__aiter__'): - # It's an async iterator, use async for - async for chunk in speech_result: - if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(chunk) - else: - break - elif hasattr(speech_result, '__iter__'): - # It's a regular iterator, use normal for - for chunk in speech_result: - if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(chunk) - else: - break - else: - # It's a single chunk, send it directly + speech_result = await self.generate_tts_speech( + text, + stream=True, + tenant_id=tenant_id, + model_name_override=model_name, + tts_config=tts_config + ) + + # Check if it's an async iterator or a regular iterable + if hasattr(speech_result, '__aiter__'): + # It's an async iterator, use async for + async for chunk in speech_result: if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(speech_result) - - await asyncio.sleep(0.1) - - except TypeError as te: - # If speech_result is still a coroutine, try calling it directly without stream=True - if "async for" in str(te) and "requires an object with __aiter__" in str(te): - logger.error("Falling back to non-streaming TTS") - speech_data = await self.generate_tts_speech(text, stream=False) + await websocket.send_bytes(chunk) + else: + break + elif hasattr(speech_result, '__iter__'): + # It's a regular iterator, use normal for + for chunk in speech_result: if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(speech_data) - else: - raise + await websocket.send_bytes(chunk) + else: + break + else: + # It's a single chunk, send it directly + if websocket.client_state.name == "CONNECTED": + await websocket.send_bytes(speech_result) # Send end marker after successful TTS generation if websocket.client_state.name == "CONNECTED": await websocket.send_json({"status": "completed"}) - async def check_stt_connectivity(self) -> bool: + async def check_stt_connectivity( + self, + model_factory: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + language: str = "zh", + model: str = "qwen3-asr-flash-realtime", + base_url: Optional[str] = None + ) -> bool: """ - Check STT service connectivity + Check STT service connectivity. + + Args: + model_factory: Model factory/vendor name (e.g., "volc", "dashscope") + api_key: API key for Ali STT + model_appid: Application ID for Volcano STT + access_token: Access token for Volcano STT + language: Language for speech recognition (default: zh) + model: STT model name (default: qwen3-asr-flash-realtime) + base_url: Custom WebSocket URL (optional) Returns: bool: True if STT service is connected, False otherwise @@ -150,8 +488,20 @@ async def check_stt_connectivity(self) -> bool: STTConnectionException: If connectivity check fails """ try: - logger.info(f"Checking STT connectivity with config: {self.stt_config}") - connected = await self.stt_model.check_connectivity() + # Get STT model based on factory + stt_model = self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + base_url=base_url, + language=language + ) + + + connected = await stt_model.check_connectivity() + if not connected: logger.error("STT service connection failed") raise STTConnectionException("STT service connection failed") @@ -162,9 +512,27 @@ async def check_stt_connectivity(self) -> bool: logger.error(f"STT connectivity check failed: {str(e)}") raise STTConnectionException(f"STT connectivity check failed: {str(e)}") from e - async def check_tts_connectivity(self) -> bool: + async def check_tts_connectivity( + self, + model_factory: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + speed_ratio: float = 1.0, + base_url: Optional[str] = None, + model: Optional[str] = None + ) -> bool: """ - Check TTS service connectivity + Check TTS service connectivity. + + Args: + model_factory: Model factory/vendor name (e.g., "volc", "dashscope") + api_key: API key for Ali TTS + model_appid: Application ID for Volcano TTS + access_token: Access token for Volcano TTS + speed_ratio: Speech speed ratio + base_url: Custom WebSocket URL (optional) + model: Model name (e.g., "qwen3-tts-flash") Returns: bool: True if TTS service is connected, False otherwise @@ -173,11 +541,21 @@ async def check_tts_connectivity(self) -> bool: TTSConnectionException: If connectivity check fails """ try: - logger.info(f"Checking TTS connectivity with config: {self.tts_config}") - connected = await self.tts_model.check_connectivity() + tts_model = self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=model + ) + + connected = await tts_model.check_connectivity() if not connected: - logger.error("TTS service connection failed") - raise TTSConnectionException("TTS service connection failed") + msg = "TTS service connectivity check returned False" + logger.warning(msg) + raise TTSConnectionException(msg) return connected except TTSConnectionException: raise @@ -185,12 +563,17 @@ async def check_tts_connectivity(self) -> bool: logger.error(f"TTS connectivity check failed: {str(e)}") raise TTSConnectionException(f"TTS connectivity check failed: {str(e)}") from e - async def check_voice_connectivity(self, model_type: str) -> bool: + async def check_voice_connectivity( + self, + model_type: str, + stt_config: Optional[Dict[str, Any]] = None + ) -> bool: """ - Check voice service connectivity based on model type + Check voice service connectivity based on model type. Args: model_type: Type of model to check ('stt' or 'tts') + stt_config: Optional STT configuration dict Returns: bool: True if the specified service is connected, False otherwise @@ -202,9 +585,44 @@ async def check_voice_connectivity(self, model_type: str) -> bool: """ try: if model_type == 'stt': - return await self.check_stt_connectivity() + model_factory = stt_config.get("model_factory") if stt_config else None + api_key = stt_config.get("api_key") if stt_config else None + model_appid = stt_config.get("model_appid") if stt_config else None + access_token = stt_config.get("access_token") if stt_config else None + language = stt_config.get("language", "zh") if stt_config else "zh" + model = stt_config.get("model", "qwen3-asr-flash-realtime") if stt_config else "qwen3-asr-flash-realtime" + base_url = stt_config.get("base_url") if stt_config else None + + return await self.check_stt_connectivity( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + language=language, + model=model, + base_url=base_url + ) elif model_type == 'tts': - return await self.check_tts_connectivity() + model_factory = stt_config.get("model_factory") if stt_config else None + api_key = stt_config.get("api_key") if stt_config else None + model_appid = stt_config.get("model_appid") if stt_config else None + access_token = stt_config.get("access_token") if stt_config else None + speed_ratio = float(stt_config.get("speed_ratio", 1.0)) if stt_config else 1.0 + base_url = stt_config.get("base_url") if stt_config else None + model = stt_config.get("model", "qwen3-tts-flash") if stt_config else "qwen3-tts-flash" + + connected = await self.check_tts_connectivity( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=model + ) + if not connected: + raise TTSConnectionException("TTS service connectivity check returned False") + return connected else: logger.error(f"Unknown model type: {model_type}") raise VoiceServiceException(f"Unknown model type: {model_type}") diff --git a/backend/utils/a2a_http_client.py b/backend/utils/a2a_http_client.py index 2bc829403..8b7c55d9f 100644 --- a/backend/utils/a2a_http_client.py +++ b/backend/utils/a2a_http_client.py @@ -134,6 +134,7 @@ async def get_json( "User-Agent": "Nexent-A2A-Client/1.0", "Accept": CONTENT_TYPE_JSON, "Connection": "close", + "A2A-Version": "1.0", } if headers: request_headers.update(headers) @@ -141,14 +142,24 @@ async def get_json( logger.debug(f"A2A GET request: url={url}") try: - _, body = await self._request_with_retry( + status, body = await self._request_with_retry( "GET", url, headers=request_headers ) + # Decode body and handle empty responses + body_text = body.decode('utf-8') if body else "" + + if not body_text.strip(): + logger.error( + f"A2A GET received empty response for {url}: HTTP status={status}. " + f"Expected JSON response but got empty body." + ) + raise ValueError(f"Empty response from {url} (HTTP {status})") + # Parse JSON from body import json - data = json.loads(body.decode('utf-8')) + data = json.loads(body_text) return data except asyncio.TimeoutError as e: logger.error(f"A2A GET timeout for {url}: {e}") @@ -156,6 +167,9 @@ async def get_json( except aiohttp.ClientResponseError as e: logger.error(f"A2A GET HTTP error for {url}: {e.status}") raise + except ValueError: + # Re-raise empty response errors without wrapping + raise except Exception as e: import traceback logger.error(f"A2A GET request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}") @@ -176,6 +190,7 @@ async def post_json( "Content-Type": CONTENT_TYPE_JSON, "Accept": CONTENT_TYPE_JSON, "Connection": "close", + "A2A-Version": "1.0", } if headers: request_headers.update(headers) @@ -183,15 +198,29 @@ async def post_json( logger.info(f"A2A POST request: url={url}, payload={payload}") try: - _, body = await self._request_with_retry( + status, body = await self._request_with_retry( "POST", url, json=payload, headers=request_headers ) + # Decode body and handle empty responses + body_text = body.decode('utf-8') if body else "" + + if not body_text.strip(): + logger.error( + f"A2A POST received empty response for {url}: HTTP status={status}. " + f"This usually indicates the remote agent is not responding correctly. " + f"Check that the agent URL '{url}' is correct and the agent is running." + ) + raise ValueError( + f"Empty response from agent at {url} (HTTP {status}). " + f"The agent may be unreachable, still processing, or the endpoint URL is incorrect." + ) + # Parse JSON from body import json - data = json.loads(body.decode('utf-8')) + data = json.loads(body_text) return data except asyncio.TimeoutError as e: logger.error(f"A2A POST timeout for {url}: {e}") @@ -199,6 +228,9 @@ async def post_json( except aiohttp.ClientResponseError as e: logger.error(f"A2A POST HTTP error for {url}: {e.status}") raise + except ValueError: + # Re-raise empty response errors without wrapping + raise except Exception as e: import traceback logger.error(f"A2A POST request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}") @@ -249,6 +281,7 @@ def build_a2a_headers(api_key: Optional[str] = None) -> Dict[str, str]: headers = { "Content-Type": CONTENT_TYPE_JSON, "Accept": CONTENT_TYPE_JSON, + "A2A-Version": "1.0", } if api_key: headers["Authorization"] = f"Bearer {api_key}" diff --git a/backend/utils/auth_utils.py b/backend/utils/auth_utils.py index 7b40576e2..4ade6f211 100644 --- a/backend/utils/auth_utils.py +++ b/backend/utils/auth_utils.py @@ -3,13 +3,17 @@ import hmac import hashlib from datetime import datetime, timedelta -from typing import Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple import jwt +import httpx from fastapi import Request from supabase import create_client +from supabase.lib.client_options import SyncClientOptions from consts.const import ( + ASSET_OWNER_ROLE, + ASSET_OWNER_TENANT_ID, DEFAULT_TENANT_ID, DEFAULT_USER_ID, IS_SPEED_MODE, @@ -42,7 +46,9 @@ TIMESTAMP_VALIDITY_WINDOW = 5 * 60 -def calculate_hmac_signature(secret_key: str, access_key: str, timestamp: str, body: str) -> str: +def calculate_hmac_signature( + secret_key: str, access_key: str, timestamp: str, body: str +) -> str: """ Calculate HMAC-SHA256 signature for AK/SK authentication. @@ -84,7 +90,9 @@ def get_aksk_config(tenant_id: str) -> Tuple[str, str]: raise UnauthorizedError("AK/SK authentication is not configured") -def verify_aksk_signature(access_key: str, timestamp: str, signature: str, body: str, tenant_id: str = None) -> bool: +def verify_aksk_signature( + access_key: str, timestamp: str, signature: str, body: str, tenant_id: str = None +) -> bool: """Verify AK/SK signature; returns False instead of raising on mismatch.""" tenant = tenant_id or DEFAULT_TENANT_ID try: @@ -95,17 +103,22 @@ def verify_aksk_signature(access_key: str, timestamp: str, signature: str, body: if access_key != expected_access_key: return False - expected_sig = calculate_hmac_signature(secret_key, access_key, timestamp, body) + expected_sig = calculate_hmac_signature( + secret_key, access_key, timestamp, body) return hmac.compare_digest(expected_sig, signature) -def validate_aksk_authentication(headers: Dict[str, str], body: str, tenant_id: str = None) -> bool: +def validate_aksk_authentication( + headers: Dict[str, str], body: str, tenant_id: str = None +) -> bool: """ Validate AK/SK authentication. Returns True when valid, otherwise raises domain exceptions. """ - from consts.exceptions import SignatureValidationError # imported lazily for test-time stubbing + from consts.exceptions import ( + SignatureValidationError, + ) # imported lazily for test-time stubbing try: access_key, ts, sig = extract_aksk_headers(headers) @@ -129,6 +142,7 @@ def validate_aksk_authentication(headers: Dict[str, str], body: str, tenant_id: logger.exception("Unexpected error during AK/SK authentication") raise UnauthorizedError("Authentication failed") from exc + # --------------------------------------------------------------------------- # Bearer Token (API Key) authentication # --------------------------------------------------------------------------- @@ -151,7 +165,11 @@ def validate_bearer_token(authorization: Optional[str]) -> Tuple[bool, Optional[ return False, None # Extract token from "Bearer " format - token = authorization.replace("Bearer ", "") if authorization.startswith("Bearer ") else authorization + token = ( + authorization.replace("Bearer ", "") + if authorization.startswith("Bearer ") + else authorization + ) if not token: logger.warning("Empty bearer token") @@ -161,7 +179,9 @@ def validate_bearer_token(authorization: Optional[str]) -> Tuple[bool, Optional[ try: token_info = get_token_by_access_key(token) if token_info and token_info.get("delete_flag") != "Y": - logger.debug(f"Token validated successfully for user {token_info.get('user_id')}") + logger.debug( + f"Token validated successfully for user {token_info.get('user_id')}" + ) return True, token_info else: logger.warning(f"Invalid or inactive token: {token[:20]}...") @@ -202,19 +222,59 @@ def get_user_and_tenant_by_access_key(access_key: str) -> Dict[str, str]: tenant_id = user_tenant_record["tenant_id"] else: tenant_id = DEFAULT_TENANT_ID - logger.warning(f"No tenant relationship found for user {user_id}, using default tenant") + logger.warning( + f"No tenant relationship found for user {user_id}, using default tenant" + ) return { "user_id": user_id, "tenant_id": tenant_id, - "token_id": token_info.get("token_id") + "token_id": token_info.get("token_id"), } +def resolve_tenant_id_from_user_tenant_record(user_tenant: Dict[str, Any]) -> str: + """ + Resolve the effective tenant_id from a user_tenant_t record. + + ASSET_OWNER users may have an empty legacy tenant_id; map them to the + virtual ASSET_OWNER tenant. Fall back to DEFAULT_TENANT_ID when unset. + """ + tenant_id = user_tenant.get("tenant_id") + if tenant_id: + return tenant_id + + user_role = (user_tenant.get("user_role") or "").upper() + if user_role == ASSET_OWNER_ROLE: + return ASSET_OWNER_TENANT_ID + + return DEFAULT_TENANT_ID + + +def _build_supabase_options() -> SyncClientOptions: + """Build ClientOptions that bypass the system HTTP proxy. + + httpx 0.28 reads the Windows system proxy (e.g. Clash on 127.0.0.1:7897) + by default and routes every request through it. When the proxy cannot + reach a local service (such as GoTrue on http://localhost:8000) the + request hangs until the timeout, breaking login. + + Pass an explicit ``httpx.Client`` with ``trust_env=False`` and + ``proxy=None`` so Supabase always talks to ``SUPABASE_URL`` directly. + """ + http_client = httpx.Client( + trust_env=False, + proxy=None, + timeout=httpx.Timeout(30.0, connect=10.0), + follow_redirects=True, + ) + return SyncClientOptions(httpx_client=http_client) + + def get_supabase_client(): """Get Supabase client instance with regular key (user-context operations).""" try: - return create_client(SUPABASE_URL, SUPABASE_KEY) + return create_client(SUPABASE_URL, SUPABASE_KEY, options=_build_supabase_options()) except Exception as e: logging.error(f"Failed to create Supabase client: {str(e)}") return None @@ -223,7 +283,7 @@ def get_supabase_client(): def get_supabase_admin_client(): """Get Supabase client instance with service role key for admin operations.""" try: - return create_client(SUPABASE_URL, SERVICE_ROLE_KEY) + return create_client(SUPABASE_URL, SERVICE_ROLE_KEY, options=_build_supabase_options()) except Exception as e: logging.error(f"Failed to create Supabase admin client: {str(e)}") return None @@ -245,8 +305,10 @@ def get_jwt_expiry_seconds(token: str) -> int: # 10 years in seconds return 10 * 365 * 24 * 60 * 60 # Ensure token is pure JWT, remove possible Bearer prefix - jwt_token = token.replace( - "Bearer ", "") if token.startswith("Bearer ") else token + jwt_token = ( + token.replace("Bearer ", "") if token.startswith( + "Bearer ") else token + ) # If debug expiration time is set, return directly for quick debugging if DEBUG_JWT_EXPIRE_SECONDS > 0: @@ -286,41 +348,38 @@ def calculate_expires_at(token: Optional[str] = None) -> int: return int((datetime.now() + timedelta(seconds=expiry_seconds)).timestamp()) -def _extract_user_id_from_jwt_token(authorization: str) -> Optional[str]: +def _decode_jwt_token(authorization: str) -> dict: """ Extract user ID from JWT token after verifying signature and expiration. Args: authorization: Authorization header value - Returns: - Optional[str]: User ID, return None if parsing fails - Raises: UnauthorizedError: If token is invalid, expired, or signature verification fails """ if not SUPABASE_JWT_SECRET: - logging.error("SUPABASE_JWT_SECRET (or JWT_SECRET) is not configured; cannot verify JWT") + logging.error( + "SUPABASE_JWT_SECRET (or JWT_SECRET) is not configured; cannot verify JWT" + ) raise UnauthorizedError("JWT verification is not configured") try: # Format authorization header - token = authorization.replace("Bearer ", "") if authorization.startswith( - "Bearer ") else authorization + token = ( + authorization.replace("Bearer ", "") + if authorization.startswith("Bearer ") + else authorization + ) # Decode and verify JWT (signature + expiration) # verify_aud=False: allow tokens with aud claim (e.g. test JWT, Supabase) without strict audience check - decoded = jwt.decode( + return jwt.decode( token, SUPABASE_JWT_SECRET, algorithms=["HS256"], options={"verify_exp": True, "verify_aud": False}, ) - - # Extract user ID from JWT claims - user_id = decoded.get("sub") - - return user_id except jwt.ExpiredSignatureError: logging.warning("Token expired") raise UnauthorizedError("Token has expired") @@ -333,10 +392,47 @@ def _extract_user_id_from_jwt_token(authorization: str) -> Optional[str]: except UnauthorizedError: raise except Exception as e: - logging.error(f"Failed to extract user ID from token: {str(e)}") + logging.error(f"Failed to decode token: {str(e)}") raise UnauthorizedError("Invalid or expired authentication token") +def _extract_user_id_from_jwt_token(authorization: str) -> Optional[str]: + """ + Extract user ID from JWT token after verifying signature and expiration. + """ + decoded = _decode_jwt_token(authorization) + return decoded.get("sub") + + +def extract_session_id_from_authorization(authorization: Optional[str]) -> Optional[str]: + """Extract the sid claim without enforcing token validity, for idempotent logout.""" + if not authorization: + return None + try: + token = ( + authorization.replace("Bearer ", "") + if authorization.startswith("Bearer ") + else authorization + ) + decoded = jwt.decode(token, options={"verify_signature": False}) + sid = decoded.get("sid") + return str(sid) if sid else None + except Exception: + return None + + +def ensure_cas_session_active_from_authorization(authorization: Optional[str]) -> None: + """Reject CAS-issued JWTs whose server-side session is expired or revoked.""" + session_id = extract_session_id_from_authorization(authorization) + if not session_id: + return + + from database.cas_session_db import is_cas_session_active + + if not is_cas_session_active(str(session_id)): + raise UnauthorizedError("CAS session has expired or been revoked") + + def get_current_user_id(authorization: Optional[str] = None) -> tuple[str, str]: """ Get current user ID and tenant ID from authorization token @@ -354,25 +450,33 @@ def get_current_user_id(authorization: Optional[str] = None) -> tuple[str, str]: return DEFAULT_USER_ID, DEFAULT_TENANT_ID # In normal mode, missing auth header means unauthorized - return 401, not default user - if authorization is None or (isinstance(authorization, str) and not authorization.strip()): + if authorization is None or ( + isinstance(authorization, str) and not authorization.strip() + ): raise UnauthorizedError("No authorization header provided") try: - user_id = _extract_user_id_from_jwt_token(authorization) + decoded = _decode_jwt_token(authorization) + user_id = decoded.get("sub") if not user_id: raise UnauthorizedError("Invalid or expired authentication token") + ensure_cas_session_active_from_authorization(authorization) + user_tenant_record = get_user_tenant_by_user_id(user_id) - if user_tenant_record and user_tenant_record.get('tenant_id'): - tenant_id = user_tenant_record['tenant_id'] + if user_tenant_record and user_tenant_record.get("tenant_id"): + tenant_id = user_tenant_record["tenant_id"] logging.debug(f"Found tenant ID for user {user_id}: {tenant_id}") else: tenant_id = DEFAULT_TENANT_ID logging.warning( - f"No tenant relationship found for user {user_id}, using default tenant") + f"No tenant relationship found for user {user_id}, using default tenant" + ) return user_id, tenant_id + except UnauthorizedError: + raise except Exception as e: logging.error(f"Failed to get user ID and tenant ID: {str(e)}") raise UnauthorizedError("Invalid or expired authentication token") @@ -393,8 +497,8 @@ def get_user_language(request: Request = None) -> str: # Read language setting from cookie if request: try: - if hasattr(request, 'cookies') and request.cookies: - cookie_locale = request.cookies.get('NEXT_LOCALE') + if hasattr(request, "cookies") and request.cookies: + cookie_locale = request.cookies.get("NEXT_LOCALE") if cookie_locale and cookie_locale in [LANGUAGE["ZH"], LANGUAGE["EN"]]: return cookie_locale except (AttributeError, TypeError) as e: @@ -407,6 +511,7 @@ def get_user_language(request: Request = None) -> str: # Simple JWT helpers for tests and tooling # --------------------------------------------------------------------------- + def generate_test_jwt(user_id: str, expires_in: int = 3600) -> str: """ Generate a simple unsigned JWT for testing purposes (HS256 with dummy secret) @@ -423,7 +528,25 @@ def generate_test_jwt(user_id: str, expires_in: int = 3600) -> str: return jwt.encode(payload, MOCK_JWT_SECRET_KEY, algorithm="HS256") -def get_current_user_info(authorization: Optional[str] = None, request: Request = None) -> tuple[str, str, str]: +def generate_session_jwt(user_id: str, expires_in: int = 3600, session_id: str = None) -> str: + """Generate a signed JWT compatible with the existing auth verification flow.""" + now = int(time.time()) + payload = { + "sub": user_id, + "role": "authenticated", + "aud": "authenticated", + "iat": now, + "exp": now + expires_in, + "iss": SUPABASE_URL, + } + if session_id: + payload["sid"] = session_id + return jwt.encode(payload, SUPABASE_JWT_SECRET, algorithm="HS256") + + +def get_current_user_info( + authorization: Optional[str] = None, request: Request = None +) -> tuple[str, str, str]: """ Get current user information, including user ID, tenant ID, and language preference diff --git a/backend/utils/content_classifier_utils.py b/backend/utils/content_classifier_utils.py new file mode 100644 index 000000000..fcdb33f70 --- /dev/null +++ b/backend/utils/content_classifier_utils.py @@ -0,0 +1,197 @@ +"""Content classification utilities for streaming LLM output parsing.""" + +import re +from typing import Any, Dict, List, Optional + + +class ContentClassifier: + """Parse XML tags from LLM output and classify streaming content in real-time. + + Uses tag pool matching with state machine for elegant streaming XML parsing. + Classifies content into: + - skill_body: SKILL.md content (including frontmatter - detected by frontend) + - file_content: Additional file content with path information + - summary: Summary text after + - others: Content outside all tags (LLM reasoning process) + + Includes DoS protection to prevent resource exhaustion from malicious input. + """ + + MAX_BUFFER_SIZE = 1024 * 1024 # 1MB + MAX_TAG_LENGTH = 256 # Single tag max length + MAX_PATH_LENGTH = 512 # File path max length + MAX_TAG_COUNT = 100 # Max tags before stopping + + def __init__(self): + self.state = "others" # others | skill_body | file | summary + self.current_file_path: Optional[str] = None + self.buffer = "" + self.tag_count = 0 + self._known_tags = { + "", + "", + "", + "", + "", + } + self._pending_file_path: Optional[str] = None + + def classify(self, chunk: str) -> List[Dict[str, Any]]: + """Process streaming chunk and return list of classified events.""" + results = [] + self.buffer += chunk + + while self.buffer: + if self.buffer.startswith("<"): + if ">" not in self.buffer: + break + results.extend(self._process_tag_start()) + else: + results.extend(self._process_non_tag_content()) + + return results + + def _process_tag_start(self) -> List[Dict[str, Any]]: + """Process buffer when it starts with '<' - extracts and handles tags.""" + results = [] + gt_pos = self.buffer.index(">") + potential_tag = self.buffer[:gt_pos + 1] + matched = self._match_known_tag_with_buffer(potential_tag) + + if matched: + results.extend(self._handle_matched_tag(gt_pos, potential_tag, matched)) + elif len(potential_tag) > self.MAX_TAG_LENGTH: + results.extend(self._emit_dos_protected_content()) + else: + results.extend(self._emit_potential_tag_start()) + + return results + + def _handle_matched_tag(self, gt_pos: int, potential_tag: str, matched_tag: str) -> List[Dict[str, Any]]: + """Handle a successfully matched tag and process following content.""" + results = [] + if self.tag_count >= self.MAX_TAG_COUNT: + self.buffer = self.buffer[gt_pos + 1:] + return results + + self.tag_count += 1 + content_after_tag = self.buffer[gt_pos + 1:] + self.buffer = "" + + event = self._handle_tag(matched_tag) + if event: + results.append(event) + + if content_after_tag: + results.extend(self._process_content_after_tag(content_after_tag)) + + return results + + def _process_content_after_tag(self, content: str) -> List[Dict[str, Any]]: + """Process content following a tag, handling embedded tag starts.""" + results = [] + if "<" not in content: + event = self._create_event(content) + if event: + results.append(event) + return results + + next_tag_pos = content.index("<") + immediate_content = content[:next_tag_pos] + if immediate_content: + event = self._create_event(immediate_content) + if event: + results.append(event) + + self.buffer = content[next_tag_pos:] + return results + + def _emit_dos_protected_content(self) -> List[Dict[str, Any]]: + """Handle content that exceeds max tag length (DoS protection).""" + results = [] + event = self._create_event("<") + if event: + results.append(event) + self.buffer = self.buffer[1:] + return results + + def _emit_potential_tag_start(self) -> List[Dict[str, Any]]: + """Handle buffer starting with '<' that doesn't match any known tag.""" + results = [] + event = self._create_event("<") + if event: + results.append(event) + self.buffer = self.buffer[1:] + return results + + def _process_non_tag_content(self) -> List[Dict[str, Any]]: + """Process buffered content that doesn't start with '<'.""" + results = [] + emit_len = min(len(self.buffer), 64) + event = self._create_event(self.buffer[:emit_len]) + if event: + results.append(event) + self.buffer = self.buffer[emit_len:] + return results + + def _match_known_tag_with_buffer(self, buffer_content: str) -> Optional[str]: + """Check if buffer content matches a known complete tag.""" + # Check exact match for simple tags + if buffer_content in self._known_tags: + return buffer_content + + # Check pattern + if buffer_content.startswith(""): + match = re.match( + r'$', + buffer_content + ) + if match: + self._pending_file_path = match.group(1) + return "" + + return None + + def _create_event(self, content: str) -> Dict[str, Any]: + """Create event based on current state.""" + if not content: + return {} + + if self.state == "skill_body": + return {"type": "skill_body", "content": content} + elif self.state == "file": + return {"type": "file_content", "content": content, "path": self.current_file_path} + elif self.state == "summary": + return {"type": "summary", "content": content} + else: + return {"type": "others", "content": content} + + def _handle_tag(self, tag: str) -> Optional[Dict[str, Any]]: + """Handle matched tag and update state.""" + if tag == "": + self.state = "skill_body" + return None + + elif tag == "": + self.state = "summary" + return None + + elif tag == "" or tag == "": + if tag == "": + self.state = "summary" + else: + self.state = "others" + return None + + elif tag == "": + self.state = "file" + self.current_file_path = self._pending_file_path + self._pending_file_path = None + return {"type": "file_content", "content": "", "path": self.current_file_path, "is_new_file": True} + + elif tag == "": + self.state = "skill_body" + self.current_file_path = None + return None + + return None diff --git a/backend/utils/context_utils.py b/backend/utils/context_utils.py new file mode 100644 index 000000000..0c3af8915 --- /dev/null +++ b/backend/utils/context_utils.py @@ -0,0 +1,1338 @@ +"""Context component building utilities for system prompt assembly. + +Provides build_context_components() to convert agent configuration data +into ContextComponent instances for use with ContextManager. + +This module implements the piecewise component architecture where each +semantic section of the system prompt is emitted by a dedicated function, +allowing ContextManager to assemble them in the correct order. +""" + +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +if TYPE_CHECKING: + from nexent.core.agents.agent_model import ( + ContextComponent, + ToolsComponent, + SkillsComponent, + MemoryComponent, + KnowledgeBaseComponent, + ManagedAgentsComponent, + ExternalAgentsComponent, + SystemPromptComponent, + ToolConfig, + AgentConfig, + ExternalA2AAgentConfig, + ) + + +# ============================================================================= +# SECTION 1: Long-text format functions (expanded from Jinja2 templates) +# Each function accepts language and is_manager params for variant-specific text +# ============================================================================= + + +def _format_memory_context( + memory_list: List[Any], + language: str = "zh", +) -> str: + """Format memory search results with full usage guidelines. + + Jinja2 templates have ~30 lines of "记忆使用准则" text that must be + included here for semantic equivalence. + """ + if not memory_list: + return "" + + # Group memories by level in correct order: tenant, user_agent, user, agent + level_order = ["tenant", "user_agent", "user", "agent"] + memory_by_level: Dict[str, List[Any]] = {} + for mem in memory_list: + if isinstance(mem, dict): + level = mem.get("memory_level", "user") + if level not in memory_by_level: + memory_by_level[level] = [] + memory_by_level[level].append(mem) + + lines = [] + + if language == "zh": + lines.append("### 上下文记忆") + lines.append("基于之前的交互记录,以下是按作用域和重要程度排序的最相关记忆:") + lines.append("") + + for level in level_order: + if level in memory_by_level: + level_title = { + "tenant": "Tenant", + "user_agent": "User_agent", + "user": "User", + "agent": "Agent", + }.get(level, level.title()) + lines.append(f"**{level_title} 层级记忆:**") + for item in memory_by_level[level]: + content = item.get("memory", "") or item.get("content", "") + score = item.get("score", 0.0) + lines.append(f"- {content} `({score:.2f})`") + lines.append("") + + lines.append("**记忆使用准则:**") + lines.append("1. **冲突处理优先级**:当记忆信息存在矛盾时,严格按以下顺序处理:") + lines.append("- **最优先**:在上述列表中位置靠前的记忆具有优先权") + lines.append("- **次优先**:当前对话内容与记忆直接冲突时,以当前对话为准") + lines.append("- **次优先**:相关度分数越高,表示记忆越可信") + lines.append("") + lines.append("2. **记忆整合最佳实践**:") + lines.append(" - 自然地将相关记忆融入回答中,避免显式使用\"根据记忆\"、\"根据上下文\"或\"根据交互记忆\"等语言") + lines.append(" - 利用记忆信息调整回答的语调、方式和技术深度以适应用户") + lines.append(" - 让记忆指导您对用户偏好和上下文的理解") + lines.append("") + lines.append("3. **级别特定说明**:") + lines.append(" - **tenant(租户级)**:组织层面的约束和政策(不可违背)") + lines.append(" - **user_agent(用户-代理级)**:特定用户在代理中的交互模式和既定工作流程") + lines.append(" - **user(用户级)**:用户的个人偏好、技能水平和历史上下文") + lines.append(" - **agent(代理级)**:您的既定行为模式和能力特征,通常对所有用户共享(重要性最低)") + else: + lines.append("### Contextual Memory") + lines.append("Based on previous interactions, here are the most relevant memories organized by scope and importance:") + lines.append("") + + for level in level_order: + if level in memory_by_level: + lines.append(f"**{level.title()} Level Memory:**") + for item in memory_by_level[level]: + content = item.get("memory", "") or item.get("content", "") + score = item.get("score", 0.0) + lines.append(f"- {content} `({score:.2f})`") + lines.append("") + + lines.append("**Memory Usage Guidelines:**") + lines.append("1. **Conflict Resolution Priority**: When memories contradict each other, follow this strict order:") + lines.append(" - **Primary**: Information appearing EARLIER in the above numbered list takes precedence") + lines.append(" - **Secondary**: Current conversation context overrides historical memory when directly contradicted") + lines.append(" - **Tertiary**: Higher relevance scores indicate more trustworthy information") + lines.append("") + lines.append("2. **Memory Integration Best Practices**:") + lines.append(" - Seamlessly weave relevant memories into your responses without explicitly saying \"I remember\", \"based on memory\" or \"based on context\"") + lines.append(" - Use memories to inform your tone, approach, and technical level appropriate for this user") + lines.append(" - Let memories guide your assumptions about user preferences and context") + lines.append("") + lines.append("3. **Level-Specific Considerations**:") + lines.append(" - **tenant**: Organizational constraints and policies (non-negotiable)") + lines.append(" - **user_agent**: Specific interaction dynamics and established workflow patterns") + lines.append(" - **user**: Individual preferences, skills, and historical context") + lines.append(" - **agent**: Your established behavioral patterns and capabilities, usually shared by all users (least important)") + + return "\n".join(lines) + + +def _format_skills_description( + skills: List[Dict[str, str]], + language: str = "zh", +) -> str: + """Format skill descriptions with full 6-step usage process. + + Jinja2 templates have ~50 lines of "技能使用流程" text that must be + included here for semantic equivalence. + """ + if not skills: + return "" + + lines = [] + + # Build the block + skills_block_lines = [""] + for skill in skills: + name = skill.get("name", "") + desc = skill.get("description", "") + skills_block_lines.append(" ") + skills_block_lines.append(f" {name}") + skills_block_lines.append(f" {desc}") + skills_block_lines.append(" ") + skills_block_lines.append("") + skills_block = "\n".join(skills_block_lines) + + if language == "zh": + lines.append("### 可用技能") + lines.append("") + lines.append("你拥有以下技能(Skills)。技能是预定义的专业能力模块,包含详细执行指南和可选的附加脚本。") + lines.append("") + lines.append(skills_block) + lines.append("") + lines.append("**技能使用流程**:") + lines.append("1. 收到用户请求后,首先审视 `` 中每个技能的 description,判断是否有匹配的技能。") + lines.append("2. **加载技能**:根据不同场景选择读取方式:") + lines.append(" - **首次加载**:调用 `read_skill_md(\"skill_name\")` 读取技能的完整执行指南(默认读取 SKILL.md)") + lines.append(" - **精确读取**:如只需特定文件(如示例、参考文档),可指定 additional_files:") + lines.append(" ") + lines.append(" skill_content = read_skill_md(\"skill_name\", [\"examples.md\", \"reference/api_doc\"])") + lines.append(" print(skill_content)") + lines.append(" ") + lines.append(" 注意:当 additional_files 非空时,默认不再自动读取 SKILL.md,如需同时读取请显式指定。") + lines.append("") + lines.append(" - **加载技能配置**:如果技能需要读取配置变量,可先调用 `read_skill_config(\"skill_name\")` 读取配置字符串,通过 `json.loads` 方法转化为配置字典,再从中获取所需值:") + lines.append(" ") + lines.append(" import json") + lines.append(" config = json.loads(read_skill_config(\"skill_name\"))") + lines.append(" # 返回示例: {\"key_a\": {\"key2\": \"value2\"}, \"others\": {...}}") + lines.append(" value = config[\"key1\"][\"key2\"]") + lines.append(" print(value)") + lines.append(" ") + lines.append("") + lines.append("3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的流程。") + lines.append("") + lines.append("4. **执行技能脚本**:如果技能指南中引用了附加脚本(形如 ``),使用以下格式调用:") + lines.append(" 代码:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" 对于需要附加参数的脚本,需要参照脚本调用说明,将参数直接以字符串形式传递。") + lines.append(" 例如对于希望附加的参数:--param1 value1 --flag,则使用以下格式调用run_skill_script:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\", \"--param1 value1 --flag\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" 注意:只执行技能指南中明确声明的脚本路径,绝不自行构造脚本路径。") + lines.append("") + lines.append("5. **整合输出**:根据技能指南要求的输出格式,结合脚本执行结果生成最终回答。") + lines.append("") + lines.append("6. **引用场景处理**:当技能内容中出现引用标记或需要引用其他文件时,需要识别并再次调用 read_skill_md:") + lines.append(" - **引用模板识别**:注意技能内容中形如 `` 或自然语言式的引用声明(如\"详见 examples.md\"、\"请参考 reference/api_doc\")") + lines.append(" - **自动补全**:发现引用后,尝试读取被引用的文件获取更多信息") + lines.append(" - **示例**:") + lines.append(" ") + lines.append(" # 技能内容提示\"请参考 examples.md 获取详细示例\"") + lines.append(" additional_info = read_skill_md(\"skill_name\", [\"examples.md\"])") + lines.append(" print(additional_info)") + lines.append(" ") + else: + lines.append("### Available Skills") + lines.append("") + lines.append("You have the following Skills. Skills are predefined professional capability modules with detailed execution guides and optional additional scripts.") + lines.append("") + lines.append(skills_block) + lines.append("") + lines.append("**Skill Usage Process**:") + lines.append("1. After receiving a user request, first examine the description of each skill in `` to determine if there is a matching skill.") + lines.append("2. **Load Skill**: Choose the appropriate reading method based on the scenario:") + lines.append(" - **First-time load**: Call `read_skill_md(\"skill_name\")` to read the complete execution guide (defaults to reading SKILL.md)") + lines.append(" - **Precise read**: If you only need specific files (like examples, reference docs), specify additional_files:") + lines.append(" ") + lines.append(" skill_content = read_skill_md(\"skill_name\", [\"examples.md\", \"reference/api_doc\"])") + lines.append(" print(skill_content)") + lines.append(" ") + lines.append(" Note: When additional_files is non-empty, SKILL.md is no longer auto-read. If you need both, explicitly specify it.") + lines.append("") + lines.append(" - **Load skill config**: If the skill needs configuration variables, call `read_skill_config(\"skill_name\")` to read the config string, convert to dict via `json.loads`, then access values:") + lines.append(" ") + lines.append(" import json") + lines.append(" config = json.loads(read_skill_config(\"skill_name\"))") + lines.append(" # Example: {\"key_a\": {\"key2\": \"value2\"}, \"others\": {...}}") + lines.append(" value = config[\"key1\"][\"key2\"]") + lines.append(" print(value)") + lines.append(" ") + lines.append("") + lines.append("3. **Follow Skill Guide**: After skill content is injected, strictly follow its steps. Do not skip steps or replace with your own code.") + lines.append("") + lines.append("4. **Execute Skill Script**: If the skill guide references additional scripts (like ``), call:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" For scripts needing extra params, pass them as a command-line string per the script's calling instructions.") + lines.append(" Example for --param1 value1 --flag:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\", \"--param1 value1 --flag\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" Note: Only execute script paths explicitly declared in the skill guide. Never construct paths yourself.") + lines.append("") + lines.append("5. **Integrate Output**: Generate the final answer based on the skill guide's output format and script execution results.") + lines.append("") + lines.append("6. **Handle References**: When the skill content has reference markers or needs to reference other files, identify and call read_skill_md again:") + lines.append(" - **Reference template recognition**: Look for patterns like `` or natural-language references (\"see examples.md\", \"refer to reference/api_doc\")") + lines.append(" - **Auto-complete**: After discovering a reference, try reading the referenced file for more info") + lines.append(" - **Example**:") + lines.append(" ") + lines.append(" # Skill content says \"see examples.md for detailed examples\"") + lines.append(" additional_info = read_skill_md(\"skill_name\", [\"examples.md\"])") + lines.append(" print(additional_info)") + lines.append(" ") + + return "\n".join(lines) + + +def _format_tools_description( + tools: Dict[str, Any], + knowledge_base_summary: Optional[str] = None, + language: str = "zh", + is_manager: bool = True, +) -> str: + """Format tool descriptions with file URL usage guide. + + Jinja2 templates have ~10 lines of "文件链接使用指南" text that must be + included here for semantic equivalence. + + Note: Managed agents use different presigned_url guidance than manager agents. + """ + if not tools: + no_tools_msg = "- 当前没有可用的工具" if language == "zh" else "- No tools are currently available" + return no_tools_msg + + lines = [] + + if language == "zh": + lines.append("- 你只能使用以下工具,不得使用任何其他工具:") + else: + lines.append("- You can only use the following tools and may not use any other tools:") + + for name, tool in tools.items(): + if hasattr(tool, 'description'): + desc = tool.description + inputs = tool.inputs + output_type = tool.output_type + source = getattr(tool, 'source', 'local') + else: + desc = tool.get('description', '') + inputs = tool.get('inputs', '') + output_type = tool.get('output_type', '') + source = tool.get('source', 'local') + + # MCP tools have [MCP] prefix + if source == 'mcp': + if language == "zh": + lines.append(f"- [MCP] {name}: {desc}") + lines.append(f" 接受输入: {inputs}") + lines.append(f" 返回输出类型: {output_type}") + else: + lines.append(f"- [MCP] {name}: {desc}") + lines.append(f" Accepts input: {inputs}") + lines.append(f" Returns output type: {output_type}") + else: + if language == "zh": + lines.append(f"- {name}: {desc}") + lines.append(f" 接受输入: {inputs}") + lines.append(f" 返回输出类型: {output_type}") + else: + lines.append(f"- {name}: {desc}") + lines.append(f" Accepts input: {inputs}") + lines.append(f" Returns output type: {output_type}") + + # Knowledge base summary + if knowledge_base_summary: + if language == "zh": + lines.append("- knowledge_base_search工具只能使用以下知识库索引,请根据用户问题选择最相关的一个或多个知识库索引:") + lines.append(f" {knowledge_base_summary}") + else: + lines.append("- knowledge_base_search tool can only use the following knowledge base indexes, please select the most relevant one or more knowledge base indexes based on the user's question:") + lines.append(f" {knowledge_base_summary}") + + # File URL usage guide + lines.append("") + if language == "zh": + lines.append("### 文件链接使用指南") + lines.append("当处理用户上传的文件时,请根据工具类型选择正确的 URL:") + lines.append("1. **调用标记为 [MCP] 的工具**(外部工具,运行在 Nexent 之外):") + if is_manager: + lines.append(" → 使用 **Download URL**(格式:`https://minio.example.com/...?token=xxx`)") + lines.append(" 原因:MCP 工具运行在外部服务,无法访问内部 S3 存储") + else: + lines.append(" → 使用 **presigned_url**(已包含代理前缀,格式:`http://.../api/nb/v1/file/fetch?presigned_url=...`)") + lines.append(" 直接使用用户上传文件信息中提供的 **presigned_url** 字段,无需拼接。") + lines.append("2. **调用其他所有工具**(内部工具,如 analyze_text_file、analyze_image 等):") + lines.append(" → 使用 **S3 URL**(格式:`s3:/nexent/attachments/xxx.pdf`)") + lines.append(" 原因:内部工具运行在 Nexent 内部,可以直接访问 MinIO 存储") + else: + lines.append("### File URL Usage Guide") + lines.append("When processing user-uploaded files, choose the correct URL based on tool type:") + lines.append("1. **Calling tools marked with [MCP]** (external tools that run outside Nexent):") + if is_manager: + lines.append(" → Use **Download URL** (format: `https://minio.example.com/...?token=xxx`)") + lines.append(" Reason: MCP tools run on external services and cannot access internal S3 storage") + else: + lines.append(" → Use **presigned_url** (already includes proxy prefix, format: `http://.../api/nb/v1/file/fetch?presigned_url=...`)") + lines.append(" Directly use the **presigned_url** field provided in the user's uploaded file info. No need to construct or append anything.") + lines.append("2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image):") + lines.append(" → Use **S3 URL** (format: `s3:/nexent/attachments/xxx.pdf`)") + lines.append(" Reason: Internal tools run inside Nexent and can directly access MinIO storage") + + return "\n".join(lines) + + +def _format_managed_agents_description( + managed_agents: Dict[str, Any], + language: str = "zh", +) -> str: + """Format managed sub-agent descriptions with calling specifications. + + Jinja2 templates have ~15 lines of "内部助手调用规范" text that must be + included here for semantic equivalence. + """ + if not managed_agents: + return "" + + lines = [] + + if language == "zh": + lines.append("你可以使用以下内部助手(通过函数调用方式协作):") + for name, agent in managed_agents.items(): + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("内部助手调用规范:") + lines.append(" 1. 调用方式:") + lines.append(" - 接受输入:{\"task\": {\"type\": \"string\", \"description\": \"任务描述\"}}") + lines.append(" - 返回输出类型:{\"type\": \"string\", \"description\": \"执行结果\"}") + lines.append(" 2. 使用策略:") + lines.append(" - 任务分解:单次调用中不要让助手一次做过多的事情,任务拆分是你的工作,你需要将复杂任务分解为可管理的子任务") + lines.append(" - 专业匹配:根据助手的专长分配任务") + lines.append(" - 信息整合:整合不同助手的输出生成连贯解决方案") + lines.append(" - 效率优化:避免重复工作") + lines.append(" 3. 协作要求:") + lines.append(" - 评估助手返回的结果") + lines.append(" - 必要时提供额外指导或重新分配任务") + lines.append(" - 在助手结果基础上进行工作,避免重复工作") + lines.append(" - 注意保留子助手回答中的特殊符号,如索引溯源信息等") + else: + lines.append("You can use the following internal agents (via function calls):") + for name, agent in managed_agents.items(): + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("Internal agent calling specifications:") + lines.append(" 1. Calling method:") + lines.append(" - Accepts input: {\"task\": {\"type\": \"string\", \"description\": \"task description\"}}") + lines.append(" - Returns output type: {\"type\": \"string\", \"description\": \"execution result\"}") + lines.append(" 2. Usage strategy:") + lines.append(" - Task decomposition: Don't let agents do too many things in a single call, task breakdown is your job, you need to decompose complex tasks into manageable subtasks") + lines.append(" - Professional matching: Assign tasks based on agent expertise") + lines.append(" - Information integration: Integrate outputs from different agents to generate coherent solutions") + lines.append(" - Efficiency optimization: Avoid duplicate work") + lines.append(" 3. Collaboration requirements:") + lines.append(" - Evaluate agent returned results") + lines.append(" - Provide additional guidance or reassign tasks when necessary") + lines.append(" - Work based on agent results, avoid duplicate work") + lines.append(" - Pay attention to preserving special symbols in sub-agent answers, such as index traceability information") + + return "\n".join(lines) + + +def _format_external_agents_description( + external_a2a_agents: Dict[str, Any], + language: str = "zh", +) -> str: + """Format external A2A agent descriptions with calling specifications. + + Jinja2 templates have ~5 lines of "外部助手调用规范" text that must be + included here for semantic equivalence. + """ + if not external_a2a_agents: + return "" + + lines = [] + + if language == "zh": + lines.append("你还可以使用以下外部助手(通过 A2A 协议远程调用):") + for agent_id, agent in external_a2a_agents.items(): + name = agent.name if hasattr(agent, 'name') else agent.get('name', '') + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("外部助手调用规范:") + lines.append(" 1. 调用格式:`agent_name(task=\"自然语言任务描述\")`,注意:只需要 task 参数,不需要其他参数") + lines.append(" 2. 例如:`tool_assistant(task=\"北京天气怎么样\")`") + lines.append(" 3. 任务描述使用自然语言,让外部助手自动识别和处理") + else: + lines.append("You can also use the following external agents (called via A2A protocol remotely):") + for agent_id, agent in external_a2a_agents.items(): + name = agent.name if hasattr(agent, 'name') else agent.get('name', '') + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("External agent calling specifications:") + lines.append(" 1. Call format: `agent_name(task=\"natural language task description\")`, NOTE: only task parameter is needed, no other parameters") + lines.append(" 2. Example: `tool_assistant(task=\"What's the weather in Beijing?\")`") + lines.append(" 3. Use natural language for task description, let the external agent handle the rest") + + return "\n".join(lines) + + +def _format_skills_usage_requirements( + skills: List[Dict[str, str]], + language: str = "zh", +) -> str: + """Format skills usage requirements section. + + This is the "技能使用要求" section that appears after the skills reference + in the Available Resources section. + """ + if not skills: + no_skills_msg = "- 当前没有可用的技能" if language == "zh" else "- No skills are currently available" + return no_skills_msg + + lines = [] + + if language == "zh": + lines.append("- 你拥有上述 `` 中列出的技能。技能中引用的脚本通过 `run_skill_script()` 函数调用,该函数由平台提供,不需要导入。") + lines.append("") + lines.append("### 技能使用要求") + lines.append("1. **技能优先**:如果用户请求匹配了某个技能的 description,必须先调用 `read_skill_md()` 加载技能指南,再按指南执行。不得跳过技能自行编写代码解决。") + lines.append("2. **忠实执行**:读取技能内容后,严格按技能指南中的步骤操作。不要自行修改流程、跳过步骤或用通用代码替代技能定义的流程。") + lines.append("3. **脚本调用规范**:只使用 `run_skill_script` 工具执行技能指南中明确要求的脚本。传入的 `skill_name` 和 `script_path` 必须与技能指南中的声明完全一致,不要自行拼接或猜测路径。如果需要附加参数,将参数以命令行字符串形式传递给`run_skill_script`。") + lines.append("4. **失败回退**:如果 `read_skill_md` 返回错误或 `run_skill_script` 执行失败,向用户说明情况,并尝试用通用推理模式提供替代方案。") + lines.append("5. **技能组合**:如果一个任务需要多个技能配合,按逻辑依赖顺序依次加载和执行,前一个技能的输出可作为后一个技能的输入。") + else: + lines.append("- You have the skills listed in `` above. Scripts referenced in skills are called via the `run_skill_script()` function, which is provided by the platform and does not need to be imported.") + lines.append("") + lines.append("### Skill Usage Requirements") + lines.append("1. **Skill Priority**: If a user request matches a skill's description, you must first call `read_skill_md()` to load the skill guide, then execute per the guide. Do not skip skills and write your own code.") + lines.append("2. **Faithful Execution**: After reading skill content, strictly follow the skill guide's steps. Do not modify the flow, skip steps, or replace with generic code.") + lines.append("3. **Script Calling Specification**: Only use `run_skill_script` to execute scripts explicitly required in the skill guide. The `skill_name` and `script_path` must match the skill guide's declaration exactly. Do not construct or guess paths. For extra params, pass them as a command-line string to `run_skill_script`.") + lines.append("4. **Failure Fallback**: If `read_skill_md` returns an error or `run_skill_script` fails, explain to the user and try to provide an alternative via general reasoning mode.") + lines.append("5. **Skill Combination**: If a task needs multiple skills, load and execute in logical dependency order. The output of one skill can be input to the next.") + + return "\n".join(lines) + + +def _format_agent_fallback( + managed_agents: Dict[str, Any], + external_a2a_agents: Dict[str, Any], + language: str = "zh", +) -> str: + """Format fallback message when no agents are available.""" + if managed_agents or external_a2a_agents: + return "" + + return "- 当前没有可用的助手" if language == "zh" else "- No agents are currently available" + + +def _format_app_context(app_name: str, app_description: str, user_id: str) -> str: + """Format application context for system prompt injection.""" + lines = [ + f"Application: {app_name}", + f"Description: {app_description}", + f"Current user: {user_id}", + ] + return "\n".join(lines) + + +# ============================================================================= +# SECTION 2: Skeleton component builders +# These build SystemPromptComponent instances for fixed text sections +# ============================================================================= + + +def build_skeleton_header_component( + app_name: str, + app_description: str, + user_id: str, + language: str = "zh", + priority: int = 100, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the header section. + + Section: "### 基本信息" / "### Basic Information" + Content: Agent identity, app name/description, user_id. + Note: Current time is intentionally excluded from the system prompt so the + static system prefix can hit the LLM KV/prompt cache across requests. The + current time is injected on the user-message side instead (see CoreAgent.run). + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 基本信息\n你是{app_name},{app_description},用户ID为{user_id}" + else: + content = f"### Basic Information\nYou are {app_name}, {app_description}" + + return SystemPromptComponent( + content=content, + template_name="header", + priority=priority, + ) + + +def build_skeleton_duty_component( + duty: str, + language: str = "zh", + priority: int = 80, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the duty section. + + Section: "### 核心职责" / "### Core Responsibilities" + Content: Agent's primary duty + 5 safety principles + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 核心职责\n{duty}\n\n请注意,你应该遵守以下原则:\n行为安全:文件操作必须使用平台提供的专用工具,禁止使用代码直接修改工作空间中的文件;\n法律合规:遵守业务所在国家/地区的法律法规;\n政治中立:保持政治中立,不主动讨论政治话题;\n安全防护:不响应涉及武器制造、网络攻击、欺诈、恶意软件等危险行为的请求;\n伦理准则:拒绝仇恨言论、歧视性内容及违反社会公德和公认伦理标准的请求。" + else: + content = f"### Core Responsibilities\n{duty}\n\nPlease note that you should follow these principles:\nBehavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited;\nLegal Compliance: Comply with laws and regulations of the business operating jurisdiction;\nPolitical Neutrality: Maintain political neutrality and avoid initiating political discussions;\nSecurity Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities;\nEthical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards." + + return SystemPromptComponent( + content=content, + template_name="duty", + priority=priority, + ) + + +def build_skeleton_execution_flow_component( + memory_list: Optional[List[Any]] = None, + language: str = "zh", + is_manager: bool = True, + priority: int = 60, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the execution flow section. + + Section: "### 执行流程" / "### Execution Process" + Content: Think/Code loop instructions + output format specs + Note: memory_list affects one line in the Think section (manager only) + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + has_memory = memory_list and len(memory_list) > 0 + + if language == "zh": + lines = ["### 执行流程"] + lines.append("要解决任务,你必须通过一系列步骤向前规划,以'思考:'和'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。**") + lines.append("") + lines.append("1. 思考:") + lines.append(" - 分析当前任务状态和进展") + if is_manager and has_memory: + lines.append(" - 合理参考之前交互中的上下文记忆信息") + lines.append(" - 定下一步最佳行动(使用工具或分配给助手)") + lines.append(" - 解释你的决策逻辑和预期结果") + lines.append("") + lines.append("2. 代码:") + lines.append(" - 用简单的Python编写代码") + lines.append(" - 遵循python代码规范和python语法") + lines.append(" - 正确调用工具或助手解决问题") + lines.append(" - 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码") + lines.append(" - 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。") + lines.append(" - **重要**:代码执行后,系统会返回 \"Observation:\" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。") + lines.append("") + lines.append("3. 自验证:") + lines.append(" - 关键事件(工具调用、检索结果、代码执行、助手返回、准备最终回答)后,系统会进行显式自验证。") + lines.append(" - 如果自验证提示存在错误、证据不足、参数不完整或结果不可靠,必须优先修正、补充证据、重新调用工具,或清晰说明无法完成的部分。") + lines.append(" - 最终回答只有在自验证通过后才会展示给用户;如果系统返回 Verification feedback,请把它视为真实观察结果继续修正,不要忽略。") + lines.append("") + lines.append("在思考结束后,当你认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。") + lines.append("") + lines.append("生成最终回答时,你需要遵循以下规范:") + lines.append("1. Markdown格式要求:") + lines.append(" - 使用标准Markdown语法格式化输出,支持标题、列表、表格、代码块、链接等") + lines.append(" - 展示图片和视频使用链接方式,不需要外套代码块,格式:[链接文本](URL),图片格式:![alt文本](图片URL),视频格式:") + lines.append(" - 段落之间使用单个空行分隔,避免多个连续空行") + lines.append(" - 数学公式使用标准Markdown格式:行内公式用 $公式$,块级公式用 $$公式$$") + lines.append("") + lines.append("2. 引用标记规范(仅在使用了检索工具时):") + lines.append(" - 引用标记格式必须严格为:`[[字母+数字]]`,例如:`[[a1]]`、`[[b2]]`、`[[c3]]`") + lines.append(" - 字母部分必须是单个小写字母(a-e),数字部分必须是整数") + lines.append(" - 引用标记的字母和数字必须与检索工具的检索结果一一对应") + lines.append(" - 引用标记应紧跟在相关信息或句子之后,通常放在句末或段落末尾") + lines.append(" - 多个引用标记可以连续使用,例如:`[[a1]][[b2]]`") + lines.append(" - **重要**:仅添加引用标记,不要添加链接、参考文献列表等多余内容") + lines.append(" - 如果检索结果中没有匹配的引用,则不显示该引用标记") + lines.append("") + lines.append("3. 格式细节要求:") + lines.append(" - 避免在Markdown中使用HTML标签,优先使用Markdown原生语法") + lines.append(" - 代码块中的代码应保持原始格式,不要添加额外的转义字符") + lines.append(" - 若未使用检索工具,则不添加任何引用标记") + else: + lines = ["### Execution Process"] + lines.append("To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.**") + lines.append("") + lines.append("1. Think:") + lines.append(" - Analyze current task status and progress") + if is_manager and has_memory: + lines.append(" - Reference relevant contextual memories from previous interactions when applicable") + lines.append(" - Determine the best next action (use tools or delegate to agents)") + lines.append(" - Explain your decision logic and expected results") + lines.append("") + lines.append("2. Code:") + lines.append(" - Write code in simple Python") + lines.append(" - Follow Python coding standards and Python syntax") + lines.append(" - Correctly call tools or agents to solve problems") + lines.append(" - To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code") + lines.append(" - Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code.") + lines.append(" - **IMPORTANT**: After code execution, the system will return content with \"Observation:\" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.**") + lines.append("") + lines.append("3. Self-verification:") + lines.append(" - After critical events (tool calls, retrieval results, code execution, agent handoffs, and final-answer preparation), the system may run explicit verification.") + lines.append(" - If verification reports errors, insufficient evidence, incomplete parameters, or unreliable results, you must repair the issue, gather more evidence, call tools again, or clearly state what cannot be completed.") + lines.append(" - The final answer is shown to the user only after verification passes. If the system returns Verification feedback, treat it as a real observation and continue revising.") + lines.append("") + lines.append("After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop.") + lines.append("") + lines.append("When generating the final answer, you need to follow these specifications:") + lines.append("1. **Markdown Format Requirements**:") + lines.append(" - Use standard Markdown syntax to format your output, supporting headings, lists, tables, code blocks, and links.") + lines.append(" - Display images and videos using links instead of wrapping them in code blocks. Use `[link text](URL)` for links, `![alt text](image URL)` for images, and `` for videos.") + lines.append(" - Use a single blank line between paragraphs, avoid multiple consecutive blank lines") + lines.append(" - Mathematical formulas use standard Markdown format: inline formulas use $formula$, block formulas use $$formula$$") + lines.append("") + lines.append("2. **Reference Mark Specifications** (only when retrieval tools are used):") + lines.append(" - Reference mark format must strictly be: `[[letter+number]]`, for example: `[[a1]]`, `[[b2]]`, `[[c3]]`") + lines.append(" - The letter part must be a single lowercase letter (a-e), the number part must be an integer") + lines.append(" - The letters and numbers of reference marks must correspond one-to-one with the retrieval results of retrieval tools") + lines.append(" - Reference marks should be placed immediately after relevant information or sentences, usually at the end of sentences or paragraphs") + lines.append(" - Multiple reference marks can be used consecutively, for example: `[[a1]][[b2]]`") + lines.append(" - **Important**: Only add reference marks, do not add links, reference lists, or other extraneous content") + lines.append(" - If there is no matching reference in the retrieval results, do not display that reference mark") + lines.append("") + lines.append("3. **Format Detail Requirements**:") + lines.append(" - Avoid using HTML tags in Markdown, prioritize native Markdown syntax") + lines.append(" - Code in code blocks should maintain original format, do not add extra escape characters") + lines.append(" - If no retrieval tools are used, do not add any reference marks") + + content = "\n".join(lines) + + return SystemPromptComponent( + content=content, + template_name="execution_flow", + priority=priority, + ) + + +def build_skeleton_constraint_component( + constraint: str, + language: str = "zh", + priority: int = 30, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the constraint section. + + Section: "### 资源使用要求" / "### Resource Usage Requirements" + Content: User-defined constraint text + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 资源使用要求\n{constraint}" + else: + content = f"### Resource Usage Requirements\n{constraint}" + + return SystemPromptComponent( + content=content, + template_name="constraint", + priority=priority, + ) + + +def build_skeleton_code_norms_component( + language: str = "zh", + is_manager: bool = True, + priority: int = 20, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the Python code norms section. + + Section: "### python代码规范" / "### Python Code Specifications" + Content: 12 fixed code rules (11 for managed agents) + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + lines = ["### python代码规范"] + lines.append("1. 如果认为是需要执行的代码,使用'代码'格式;如果是不需要执行仅用于展示的代码,使用'代码'格式,其中语言类型例如python、java、javascript等;") + lines.append("2. 只使用已定义的变量,变量将在多次调用之间持续保持;") + lines.append("3. 使用\"print()\"函数让下一次的模型调用看到对应变量信息;") + lines.append("4. 正确使用工具/助手的入参,使用关键字参数,不要用字典形式;") + lines.append("5. 避免在一轮对话中进行过多的工具/助手调用,这会导致输出格式难以预测;") + lines.append("6. 只在需要时调用工具/助手,不重复相同参数的调用;") + lines.append("7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用\"print()\"来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误;") + lines.append("9. 示例中的代码避免出现**if**、**for**等逻辑,仅调用工具/助手,示例中的每一次的行动都是确定事件。如果有不同的条件,你应该给出不同条件下的示例;") + lines.append("10. 工具调用使用关键字参数,如:tool_name(param1=\"value1\", param2=\"value2\");") + if is_manager: + lines.append("11. 助手调用必须使用task参数,如:assistant_name(task=\"任务描述\");") + lines.append("12. 不要放弃!你负责解决任务,而不是提供解决方向。") + else: + lines = ["### Python Code Specifications"] + lines.append("1. If it is considered to be code that needs to be executed, use 'code'. If the code does not need to be executed for display only, use 'code', where language_type can be python, java, javascript, etc;") + lines.append("2. Only use defined variables, variables will persist between multiple calls;") + lines.append("3. Use \"print()\" function to let the next model call see corresponding variable information;") + lines.append("4. Use tool/agent input parameters correctly, use keyword arguments, not dictionary format;") + lines.append("5. Avoid making too many tool/agent calls in one round of conversation, as this will make the output format unpredictable;") + lines.append("6. Only call tools/agents when needed, do not repeat calls with the same parameters;") + lines.append("7. Use variable names to save function call results. In each intermediate step, you can use \"print()\" to save any important information you need. The saved information persists between code executions. The content printed by print() should be treated as a string, do not perform dictionary-related operations such as .get(), [] etc., to avoid type errors;") + lines.append("8. Avoid **if**, **for** and other logic in example code, only call tools/agents. Each action in the example is a deterministic event. If there are different conditions, you should provide examples under different conditions;") + lines.append("9. Tool calls use keyword arguments, such as: tool_name(param1=\"value1\", param2=\"value2\");") + if is_manager: + lines.append("10. Agent calls must use task parameter, such as: agent_name(task=\"task description\");") + lines.append("11. Don't give up! You are responsible for solving the task, not providing solution directions.") + + content = "\n".join(lines) + + return SystemPromptComponent( + content=content, + template_name="code_norms", + priority=priority, + ) + + +def build_skeleton_footer_component( + few_shots: str, + language: str = "zh", + priority: int = 10, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the footer section. + + Section: "### 示例模板" + ending + Content: few_shots + "$1M reward" ending + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 示例模板\n{few_shots}\n\n现在开始!如果你正确解决任务,你将获得100万美元的奖励。" + else: + content = f"### Example Templates\n{few_shots}\n\nNow start! If you solve the task correctly, you will receive a reward of 1 million dollars." + + return SystemPromptComponent( + content=content, + template_name="footer", + priority=priority, + ) + + +# ============================================================================= +# SECTION 3: Piecewise component builders (existing, enhanced) +# ============================================================================= + + +def build_tools_component( + tools: Dict[str, Any], + knowledge_base_summary: Optional[str] = None, + language: str = "zh", + is_manager: bool = True, + priority: int = 50, +) -> "ToolsComponent": + """Build ToolsComponent from tool configurations. + + Args: + tools: Dict of tool name -> ToolConfig or tool dict + knowledge_base_summary: Summary text from knowledge bases + language: Language code ('zh' or 'en') + is_manager: Whether this is a manager agent + priority: Component priority for selection + + Returns: + ToolsComponent instance + """ + from nexent.core.agents.agent_model import ToolsComponent + + tool_list = [] + for name, tool in tools.items(): + if hasattr(tool, 'description'): + tool_dict = { + "name": name, + "description": tool.description, + "inputs": getattr(tool, 'inputs', ''), + "output_type": getattr(tool, 'output_type', ''), + "source": getattr(tool, 'source', 'local'), + } + else: + tool_dict = { + "name": name, + "description": tool.get('description', ''), + "inputs": tool.get('inputs', ''), + "output_type": tool.get('output_type', ''), + "source": tool.get('source', 'local'), + } + tool_list.append(tool_dict) + + formatted_desc = _format_tools_description( + tools, + knowledge_base_summary=knowledge_base_summary, + language=language, + is_manager=is_manager, + ) + return ToolsComponent( + tools=tool_list, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_skills_component( + skills: List[Dict[str, str]], + language: str = "zh", + priority: int = 70, +) -> "SkillsComponent": + """Build SkillsComponent from skill configurations. + + Args: + skills: List of skill dicts with name and description + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + SkillsComponent instance + """ + from nexent.core.agents.agent_model import SkillsComponent + + formatted_desc = _format_skills_description(skills, language=language) + return SkillsComponent( + skills=skills, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_memory_component( + memory_list: List[Any], + search_query: Optional[str] = None, + language: str = "zh", + priority: int = 90, +) -> "MemoryComponent": + """Build MemoryComponent from memory search results. + + Args: + memory_list: List of memory search results + search_query: Query used to search memory + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + MemoryComponent instance + """ + from nexent.core.agents.agent_model import MemoryComponent + + memories = [] + for mem in memory_list: + if isinstance(mem, dict): + memories.append({ + "content": mem.get('memory', '') or mem.get('content', ''), + "memory_type": mem.get('memory_type', 'user'), + "metadata": mem.get('metadata', {}), + }) + elif isinstance(mem, str): + memories.append({ + "content": mem, + "memory_type": "user", + "metadata": {}, + }) + + formatted_content = _format_memory_context(memory_list, language=language) + return MemoryComponent( + memories=memories, + formatted_content=formatted_content, + search_query=search_query, + priority=priority, + ) + + +def build_knowledge_base_component( + knowledge_base_summary: str, + kb_ids: Optional[List[str]] = None, + priority: int = 10, +) -> "KnowledgeBaseComponent": + """Build KnowledgeBaseComponent from knowledge base summary. + + Args: + knowledge_base_summary: Summary text from knowledge bases + kb_ids: List of knowledge base IDs used + priority: Component priority for selection + + Returns: + KnowledgeBaseComponent instance + """ + from nexent.core.agents.agent_model import KnowledgeBaseComponent + + return KnowledgeBaseComponent( + summary=knowledge_base_summary, + kb_ids=kb_ids or [], + priority=priority, + ) + + +def build_managed_agents_component( + managed_agents: Dict[str, Any], + language: str = "zh", + priority: int = 45, +) -> "ManagedAgentsComponent": + """Build ManagedAgentsComponent from managed sub-agent configurations. + + Args: + managed_agents: Dict of agent name -> AgentConfig + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + ManagedAgentsComponent instance + """ + from nexent.core.agents.agent_model import ManagedAgentsComponent + + agent_list = [] + for name, agent in managed_agents.items(): + if hasattr(agent, 'description'): + agent_dict = { + "name": name, + "description": agent.description, + "tools": [], + } + if hasattr(agent, 'tools'): + agent_dict["tools"] = [t.name for t in agent.tools if hasattr(t, 'name')] + else: + agent_dict = { + "name": name, + "description": agent.get('description', ''), + "tools": [], + } + agent_list.append(agent_dict) + + formatted_desc = _format_managed_agents_description(managed_agents, language=language) + return ManagedAgentsComponent( + agents=agent_list, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_external_agents_component( + external_a2a_agents: Dict[str, Any], + language: str = "zh", + priority: int = 44, +) -> "ExternalAgentsComponent": + """Build ExternalAgentsComponent from external A2A agent configurations. + + Args: + external_a2a_agents: Dict of agent_id -> ExternalA2AAgentConfig + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + ExternalAgentsComponent instance + """ + from nexent.core.agents.agent_model import ExternalAgentsComponent + + agent_list = [] + for agent_id, agent in external_a2a_agents.items(): + if hasattr(agent, 'agent_id'): + agent_dict = { + "agent_id": str(agent.agent_id), + "name": agent.name, + "description": agent.description, + "url": getattr(agent, 'url', ''), + } + else: + agent_dict = { + "agent_id": str(agent_id), + "name": agent.get('name', ''), + "description": agent.get('description', ''), + "url": agent.get('url', ''), + } + agent_list.append(agent_dict) + + formatted_desc = _format_external_agents_description(external_a2a_agents, language=language) + return ExternalAgentsComponent( + agents=agent_list, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_system_prompt_component( + content: str, + template_name: Optional[str] = None, + priority: int = 100, +) -> "SystemPromptComponent": + """Build SystemPromptComponent with rendered content. + + Args: + content: Rendered system prompt content + template_name: Source template name for reference + priority: Component priority (highest by default) + + Returns: + SystemPromptComponent instance + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + return SystemPromptComponent( + content=content, + template_name=template_name, + priority=priority, + ) + + +def build_skills_usage_component( + skills: List[Dict[str, str]], + language: str = "zh", + priority: int = 40, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for skills usage requirements. + + This is a skeleton-like component but its content depends on + whether skills exist, so it's built dynamically. + + Args: + skills: List of skill dicts + language: Language code ('zh' or 'en') + priority: Component priority + + Returns: + SystemPromptComponent instance + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + content = _format_skills_usage_requirements(skills, language=language) + return SystemPromptComponent( + content=content, + template_name="skills_usage", + priority=priority, + ) + + +def build_agent_fallback_component( + managed_agents: Dict[str, Any], + external_a2a_agents: Dict[str, Any], + language: str = "zh", + priority: int = 5, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for agent fallback message. + + Only emits content when no agents are available. + + Args: + managed_agents: Dict of managed agents + external_a2a_agents: Dict of external agents + language: Language code + priority: Component priority + + Returns: + SystemPromptComponent instance (may have empty content) + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + content = _format_agent_fallback(managed_agents, external_a2a_agents, language=language) + return SystemPromptComponent( + content=content, + template_name="agent_fallback", + priority=priority, + ) + + +# ============================================================================= +# SECTION 4: Main assembly function - build_context_components +# ============================================================================= + + +def build_context_components( + # Raw params for piecewise assembly (NEW in Goal 3) + duty: Optional[str] = None, + constraint: Optional[str] = None, + few_shots: Optional[str] = None, + app_name: Optional[str] = None, + app_description: Optional[str] = None, + user_id: Optional[str] = None, + language: str = "zh", + is_manager: bool = True, + # Piecewise data sources + tools: Optional[Dict[str, Any]] = None, + skills: Optional[List[Dict[str, str]]] = None, + managed_agents: Optional[Dict[str, Any]] = None, + external_a2a_agents: Optional[Dict[str, Any]] = None, + memory_list: Optional[List[Any]] = None, + memory_search_query: Optional[str] = None, + knowledge_base_summary: Optional[str] = None, + kb_ids: Optional[List[str]] = None, + # Legacy param for fallback (removed short-circuit in Goal 3) + system_prompt: Optional[str] = None, + # Inclusion flags (kept for backward compatibility) + include_tools: bool = True, + include_skills: bool = True, + include_memory: bool = True, + include_knowledge_base: bool = True, + include_managed_agents: bool = True, + include_external_agents: bool = True, + include_app_context: bool = True, +) -> List["ContextComponent"]: + """Build list of ContextComponents from agent configuration data. + + Piecewise assembly: Each semantic section is emitted as a dedicated + ContextComponent, assembled in the exact order matching Jinja2 templates. + + Assembly order (12 sections): + 1. Header (基本信息) + 2. Memory (上下文记忆) - if memory_list exists + 3. Duty (核心职责 + 安全准则) + 4. Skills (可用技能 + 6步流程) - if skills exist + 5. Execution Flow (执行流程 + 输出规范) + 6. Tools (可用资源/1. 工具 + 文件链接指南) + 7. Managed Agents (可用资源/2. 助手) - if managed_agents exist + 8. External Agents (外部助手) - if external_a2a_agents exist + 9. Agent Fallback (当前没有可用的助手) - if no agents + 10. Skills Usage (可用资源/3. 技能 + 使用要求) + 11. Constraint (资源使用要求) + 12. Code Norms (python代码规范) + 13. Footer (示例模板 + 结尾) + + Note: The a330d815 short-circuit (if system_prompt: return [single]) + has been REMOVED. All callers must provide raw params for piecewise assembly. + The system_prompt param is kept for future fallback use but not currently + used in the piecewise path. + + Args: + duty: Agent's primary duty text + constraint: Resource usage constraint text + few_shots: Example templates text + app_name: Application name + app_description: Application description + user_id: Current user ID + language: Language code ('zh' or 'en') + is_manager: Whether this is a manager agent + tools: Dict of tool name -> ToolConfig + skills: List of skill dicts with name and description + managed_agents: Dict of agent name -> AgentConfig + external_a2a_agents: Dict of agent_id -> ExternalA2AAgentConfig + memory_list: List of memory search results + memory_search_query: Query used to search memory + knowledge_base_summary: Summary text from knowledge bases + kb_ids: List of knowledge base IDs + system_prompt: (Legacy) Pre-rendered system prompt - NOT USED in piecewise path + include_*: Flags for backward compatibility + + Returns: + List of ContextComponent instances ready for ContextManager + """ + components: List = [] + + # 1. Header + if app_name and app_description and user_id: + components.append( + build_skeleton_header_component( + app_name=app_name, + app_description=app_description, + user_id=user_id, + language=language, + ) + ) + + # 2. Memory (if exists) + if include_memory and memory_list: + components.append( + build_memory_component( + memory_list=memory_list, + search_query=memory_search_query, + language=language, + ) + ) + + # 3. Duty + Safety Principles + if duty: + components.append( + build_skeleton_duty_component( + duty=duty, + language=language, + ) + ) + + # 4. Skills (if exists) - includes 6-step process + if include_skills and skills: + components.append( + build_skills_component( + skills=skills, + language=language, + ) + ) + + # 5. Execution Flow + components.append( + build_skeleton_execution_flow_component( + memory_list=memory_list, + language=language, + is_manager=is_manager, + ) + ) + + # 6. Tools + File URL Guide + if include_tools and tools: + components.append( + build_tools_component( + tools=tools, + knowledge_base_summary=knowledge_base_summary, + language=language, + is_manager=is_manager, + ) + ) + + # 7. Managed Agents (if exists) - manager only + if is_manager and include_managed_agents and managed_agents: + components.append( + build_managed_agents_component( + managed_agents=managed_agents, + language=language, + ) + ) + + # 8. External Agents (if exists) - manager only + if is_manager and include_external_agents and external_a2a_agents: + components.append( + build_external_agents_component( + external_a2a_agents=external_a2a_agents, + language=language, + ) + ) + + # 9. Agent Fallback (if no agents available) - manager only + if is_manager and not managed_agents and not external_a2a_agents: + fallback_comp = build_agent_fallback_component( + managed_agents=managed_agents or {}, + external_a2a_agents=external_a2a_agents or {}, + language=language, + ) + if fallback_comp.content: # Only add if has content + components.append(fallback_comp) + + # 10. Skills Usage Requirements + if include_skills: + components.append( + build_skills_usage_component( + skills=skills or [], + language=language, + ) + ) + + # 11. Constraint + if constraint: + components.append( + build_skeleton_constraint_component( + constraint=constraint, + language=language, + ) + ) + + # 12. Code Norms + components.append( + build_skeleton_code_norms_component( + language=language, + is_manager=is_manager, + ) + ) + + # 13. Footer + if few_shots: + components.append( + build_skeleton_footer_component( + few_shots=few_shots, + language=language, + ) + ) + + return components + + +def build_app_context_string( + app_name: str, + app_description: str, + user_id: str, +) -> str: + """Build app context string for template injection. + + Args: + app_name: Application name + app_description: Application description + user_id: Current user ID + + Returns: + Formatted app context string + """ + return _format_app_context(app_name, app_description, user_id) diff --git a/backend/utils/file_management_utils.py b/backend/utils/file_management_utils.py index 7d31a74bb..83c3957e7 100644 --- a/backend/utils/file_management_utils.py +++ b/backend/utils/file_management_utils.py @@ -2,6 +2,7 @@ import logging import os import subprocess +import time import traceback from pathlib import Path from typing import List @@ -15,7 +16,6 @@ from consts.model import ProcessParams from database.attachment_db import get_file_size_from_minio from utils.auth_utils import get_current_user_id -from utils.config_utils import tenant_config_manager logger = logging.getLogger("file_management_utils") @@ -45,18 +45,13 @@ async def trigger_data_process(files: List[dict], process_params: ProcessParams) if not files: return None - # Get chunking size according to the embedding model - embedding_model_id = None + # Get tenant_id from authorization for downstream task processing + embedding_model_id = process_params.model_id tenant_id = None try: _, tenant_id = get_current_user_id(process_params.authorization) - # Get embedding model ID from tenant config - tenant_config = tenant_config_manager.load_config(tenant_id) - embedding_model_id_str = tenant_config.get("EMBEDDING_ID") if tenant_config else None - if embedding_model_id_str: - embedding_model_id = int(embedding_model_id_str) except Exception as e: - logger.warning(f"Failed to get embedding model ID for tenant: {e}") + logger.warning(f"Failed to get tenant_id from authorization: {e}") # Build headers with authorization headers = { @@ -134,19 +129,23 @@ async def trigger_data_process(files: List[dict], process_params: ProcessParams) async def get_all_files_status(index_name: str): """ - Get status for all files according to index_name, matching corresponding tasks, + Get status for all files according to index_name, matching corresponding tasks, and then convert to custom state - + Args: index_name: Index name to filter tasks - + Returns: Dictionary with path_or_url as keys and dict values: {state, latest_task_id} """ + start_time = time.time() try: try: async with httpx.AsyncClient() as client: response = await client.get(f"{DATA_PROCESS_SERVICE}/tasks/indices/{index_name}", timeout=10.0) + http_duration = time.time() - start_time + logger.info(f"[get_all_files_status] HTTP request to {DATA_PROCESS_SERVICE}/tasks/indices/{index_name} " + f"completed in {http_duration:.3f}s, status={response.status_code}") if response.status_code == 200: tasks_list = response.json() else: @@ -214,41 +213,46 @@ async def get_all_files_status(index_name: str): file_state['total_chunks'] = task_info.get( 'total_chunks', file_state.get('total_chunks')) result = {} + # Use local fallback logic for state conversion (avoiding HTTP call to external service) + # The conversion logic is simple and can be done locally + step_local_start = time.time() + + # Batch fetch progress info from Redis for all task_ids (single round-trip) + redis_progress_batch = {} + if file_states: + try: + from services.redis_service import get_redis_service + redis_service = get_redis_service() + all_task_ids = [fs.get('latest_task_id', '') for fs in file_states.values()] + all_task_ids = [tid for tid in all_task_ids if tid] + if all_task_ids: + redis_progress_batch = redis_service.batch_get_progress_info(all_task_ids) or {} + except Exception as e: + logger.debug(f"Failed to batch get Redis progress info: {e}") + for path_or_url, file_state in file_states.items(): - # Call remote state conversion API so this service no longer depends on Celery - custom_state = await _convert_to_custom_state( + custom_state = _convert_to_custom_state_local( process_celery_state=file_state['process_state'] or '', forward_celery_state=file_state['forward_state'] or '' ) - # Try to get progress from Redis - always check Redis for real-time progress - # especially when task is in progress (FORWARDING or PROCESSING) + + # Get progress from pre-fetched batch Redis data processed_chunks = file_state.get('processed_chunks') total_chunks = file_state.get('total_chunks') task_id = file_state['latest_task_id'] or '' - # Always try to get latest progress from Redis if task_id exists - # Redis has the most up-to-date progress during vectorization - if task_id: - try: - from services.redis_service import get_redis_service - redis_service = get_redis_service() - progress_info = redis_service.get_progress_info(task_id) - if progress_info: - # Use Redis progress as primary source (it's updated in real-time) - redis_processed = progress_info.get('processed_chunks') - redis_total = progress_info.get('total_chunks') - if redis_processed is not None: - processed_chunks = redis_processed - if redis_total is not None: - total_chunks = redis_total - logger.debug( - f"Retrieved progress from Redis for task {task_id}: {processed_chunks}/{total_chunks}") - else: - logger.debug( - f"No progress info in Redis for task {task_id}, using task state values: {processed_chunks}/{total_chunks}") - except Exception as e: + # Use pre-fetched batch Redis data for progress + if task_id and task_id in redis_progress_batch: + progress_info = redis_progress_batch.get(task_id) + if progress_info: + redis_processed = progress_info.get('processed_chunks') + redis_total = progress_info.get('total_chunks') + if redis_processed is not None: + processed_chunks = redis_processed + if redis_total is not None: + total_chunks = redis_total logger.debug( - f"Failed to get progress from Redis for task {task_id}: {str(e)}") + f"Retrieved progress from batch Redis for task {task_id}: {processed_chunks}/{total_chunks}") result[path_or_url] = { 'state': custom_state, @@ -259,41 +263,26 @@ async def get_all_files_status(index_name: str): 'processed_chunks': processed_chunks, 'total_chunks': total_chunks, } + step_local_duration = time.time() - step_local_start + logger.info(f"[get_all_files_status] Local processing: {len(result)} files in {step_local_duration:.3f}s") + total_duration = time.time() - start_time + logger.info(f"[get_all_files_status] Complete: {len(result)} files processed in {total_duration:.3f}s") return result except Exception as e: logger.error(f"Error getting all files status for index {index_name}, details: {str(e)} {traceback.format_exc()}") return {} # Return empty dict on error -async def _convert_to_custom_state(process_celery_state: str, forward_celery_state: str) -> str: - """Delegates Celery-state conversion to the data-process service. - - This removes the direct dependency on the *celery* package for callers of - `file_management_utils`. +def _convert_to_custom_state_local(process_celery_state: str, forward_celery_state: str) -> str: + """ + Local state conversion logic - handles all known Celery states. + Returns "UNKNOWN" only if the states are not recognized. """ - try: - payload = { - "process_state": process_celery_state, - "forward_state": forward_celery_state, - } - - async with httpx.AsyncClient() as client: - response = await client.post(f"{DATA_PROCESS_SERVICE}/tasks/convert_state", json=payload, timeout=5.0) - - if response.status_code == 200: - return response.json().get("state", "WAIT_FOR_PROCESSING") - else: - logger.warning( - "State conversion service error: %s - %s", response.status_code, response.text - ) - except Exception as e: - logger.warning("Failed to convert state via service: %s", str(e)) - - # Fallback mapping without Celery dependency (string comparison only) success = "SUCCESS" failure = "FAILURE" pending = "PENDING" started = "STARTED" + unknown = "UNKNOWN" if process_celery_state == failure: return "PROCESS_FAILED" @@ -304,6 +293,11 @@ async def _convert_to_custom_state(process_celery_state: str, forward_celery_sta if not process_celery_state and not forward_celery_state: return "WAIT_FOR_PROCESSING" + # Check if states are known Celery states + known_states = {success, failure, pending, started, ""} + if process_celery_state not in known_states or forward_celery_state not in known_states: + return unknown + forward_state_map = { pending: "WAIT_FOR_FORWARDING", started: "FORWARDING", diff --git a/backend/utils/http_client_utils.py b/backend/utils/http_client_utils.py new file mode 100644 index 000000000..fd215c067 --- /dev/null +++ b/backend/utils/http_client_utils.py @@ -0,0 +1,22 @@ +"""HTTP client factory utilities shared across services.""" + +import httpx +from httpx import AsyncClient + + +def create_httpx_client( + headers: dict[str, str] | None = None, + timeout: httpx.Timeout | None = None, + auth: httpx.Auth | None = None, + follow_redirects: bool = True, + **extra_kwargs, +) -> AsyncClient: + return AsyncClient( + headers=headers, + timeout=timeout, + auth=auth, + follow_redirects=follow_redirects, + trust_env=False, + verify=False, + **extra_kwargs, + ) diff --git a/backend/utils/llm_utils.py b/backend/utils/llm_utils.py index d1aa6fcf3..f7caba37d 100644 --- a/backend/utils/llm_utils.py +++ b/backend/utils/llm_utils.py @@ -6,6 +6,7 @@ from consts.exceptions import AppException from database.model_management_db import get_model_by_model_id from nexent.core.models import OpenAIModel +from nexent.monitor import set_monitoring_context, set_monitoring_operation from utils.config_utils import get_model_name_from_config logger = logging.getLogger("llm_utils") @@ -66,6 +67,14 @@ def call_llm_for_system_prompt( """ llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id) + display_name = llm_model_config.get("display_name", "") if llm_model_config else "" + if tenant_id: + set_monitoring_context(tenant_id=tenant_id) + set_monitoring_operation("system_prompt_generation", + display_name=display_name or None) + + timeout_seconds = llm_model_config.get("timeout_seconds") if llm_model_config else None + llm = OpenAIModel( model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "", api_base=llm_model_config.get("base_url", "") if llm_model_config else "", @@ -74,6 +83,8 @@ def call_llm_for_system_prompt( top_p=0.95, model_factory=llm_model_config.get("model_factory") if llm_model_config else None, ssl_verify=llm_model_config.get("ssl_verify", True) if llm_model_config else True, + display_name=display_name or None, + timeout_seconds=timeout_seconds, ) messages = [ {"role": MESSAGE_ROLE["SYSTEM"], "content": system_prompt}, @@ -92,9 +103,21 @@ def call_llm_for_system_prompt( reasoning_content_seen = False content_tokens_seen = 0 for chunk in current_request: - delta = chunk.choices[0].delta + choices = getattr(chunk, "choices", None) + if choices is None: + logger.warning("Received non-standard chunk without choices during prompt generation.") + continue + if not choices: + logger.debug("Received empty choices chunk during prompt generation; skipping.") + continue + + delta = getattr(choices[0], "delta", None) + if delta is None: + logger.debug("Skipping LLM stream chunk without delta") + continue + reasoning_content = getattr(delta, "reasoning_content", None) - new_token = delta.content + new_token = getattr(delta, "content", None) # Note: reasoning_content is separate metadata and doesn't affect content filtering # We only filter content based on tags in delta.content diff --git a/backend/utils/memory_utils.py b/backend/utils/memory_utils.py index ada7019a1..e3ba01d6d 100644 --- a/backend/utils/memory_utils.py +++ b/backend/utils/memory_utils.py @@ -1,4 +1,5 @@ import logging +import re from typing import Dict, Any from urllib.parse import urlparse @@ -9,6 +10,11 @@ logger = logging.getLogger("memory_utils") +def _sanitize_index_component(value: str) -> str: + """Convert arbitrary text into an Elasticsearch-safe index component.""" + return re.sub(r"[^a-z0-9_.-]", "_", value.lower()) + + def build_memory_config(tenant_id: str) -> Dict[str, Any]: """Return a fully-validated configuration dictionary for *mem0* ``Memory``. """ @@ -30,9 +36,8 @@ def build_memory_config(tenant_id: str) -> Dict[str, Any]: es_host = f"{parsed.scheme}://{parsed.hostname}" es_port = parsed.port # Normalize repo/name to avoid problematic characters in index names - safe_repo = embed_raw["model_repo"].lower().replace( - "/", "_") if embed_raw["model_repo"] else "" - safe_name = embed_raw["model_name"].lower().replace("/", "_") + safe_repo = _sanitize_index_component(embed_raw["model_repo"]) if embed_raw["model_repo"] else "" + safe_name = _sanitize_index_component(embed_raw["model_name"]) index_name = ( f"mem0_{safe_repo}_{safe_name}_{embed_raw['max_tokens']}" if embed_raw["model_repo"] @@ -73,4 +78,4 @@ def build_memory_config(tenant_id: str) -> Dict[str, Any]: }, "telemetry": {"enabled": False}, } - return memory_config \ No newline at end of file + return memory_config diff --git a/backend/utils/monitoring.py b/backend/utils/monitoring.py index eb20d88ec..e6da57041 100644 --- a/backend/utils/monitoring.py +++ b/backend/utils/monitoring.py @@ -2,12 +2,12 @@ Global Monitoring Manager for Backend This module initializes and configures the global monitoring manager instance -with backend environment variables. All other backend modules should import -`monitoring_manager` directly from this module. +with backend environment variables using OTLP protocol. All other backend modules +should import `monitoring_manager` directly from this module. Usage: from utils.monitoring import monitoring_manager - + @monitoring_manager.monitor_endpoint("my_service.my_function") async def my_function(): return {"status": "ok"} @@ -17,67 +17,88 @@ async def my_function(): MonitoringConfig, get_monitoring_manager ) -# Import configuration from backend (support both relative and absolute imports) try: - # Try relative import first (when running from backend directory) from consts.const import ( ENABLE_TELEMETRY, - SERVICE_NAME, - JAEGER_ENDPOINT, - PROMETHEUS_PORT, - TELEMETRY_SAMPLE_RATE, - LLM_SLOW_REQUEST_THRESHOLD_SECONDS, - LLM_SLOW_TOKEN_RATE_THRESHOLD + MONITORING_PROVIDER, + MONITORING_PROJECT_NAME, + OTEL_SERVICE_NAME, + OTEL_EXPORTER_OTLP_ENDPOINT, + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT, + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, + OTEL_EXPORTER_OTLP_PROTOCOL, + OTEL_EXPORTER_OTLP_METRICS_ENABLED, + MONITORING_INSTRUMENT_REQUESTS, + MONITORING_FASTAPI_INCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDE_SPANS, + MONITORING_TRACE_CONTENT_MODE, + MONITORING_TRACE_MAX_CHARS, + MONITORING_TRACE_MAX_ITEMS, + OTLP_HEADERS, + TELEMETRY_SAMPLE_RATE ) except ImportError: - # Fallback to absolute import (when running from project root) from backend.consts.const import ( ENABLE_TELEMETRY, - SERVICE_NAME, - JAEGER_ENDPOINT, - PROMETHEUS_PORT, - TELEMETRY_SAMPLE_RATE, - LLM_SLOW_REQUEST_THRESHOLD_SECONDS, - LLM_SLOW_TOKEN_RATE_THRESHOLD + MONITORING_PROVIDER, + MONITORING_PROJECT_NAME, + OTEL_SERVICE_NAME, + OTEL_EXPORTER_OTLP_ENDPOINT, + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT, + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, + OTEL_EXPORTER_OTLP_PROTOCOL, + OTEL_EXPORTER_OTLP_METRICS_ENABLED, + MONITORING_INSTRUMENT_REQUESTS, + MONITORING_FASTAPI_INCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDE_SPANS, + MONITORING_TRACE_CONTENT_MODE, + MONITORING_TRACE_MAX_CHARS, + MONITORING_TRACE_MAX_ITEMS, + OTLP_HEADERS, + TELEMETRY_SAMPLE_RATE ) import logging logger = logging.getLogger(__name__) -# ============================================================================ -# Global Monitoring Manager Instance -# ============================================================================ - -# Get the global monitoring manager instance monitoring_manager = get_monitoring_manager() -# Initialize monitoring configuration immediately when this module is imported - def _initialize_monitoring(): - """Initialize monitoring configuration with backend environment variables.""" + """Initialize monitoring configuration with OTLP settings.""" config = MonitoringConfig( enable_telemetry=ENABLE_TELEMETRY, - service_name=SERVICE_NAME, - jaeger_endpoint=JAEGER_ENDPOINT, - prometheus_port=PROMETHEUS_PORT, + service_name=OTEL_SERVICE_NAME, + provider=MONITORING_PROVIDER or "otlp", + otlp_endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, + otlp_traces_endpoint=OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or None, + otlp_metrics_endpoint=OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or None, + otlp_protocol=OTEL_EXPORTER_OTLP_PROTOCOL, + otlp_headers=OTLP_HEADERS, + export_metrics=OTEL_EXPORTER_OTLP_METRICS_ENABLED, + instrument_requests=MONITORING_INSTRUMENT_REQUESTS, + fastapi_included_urls=MONITORING_FASTAPI_INCLUDED_URLS, + fastapi_excluded_urls=MONITORING_FASTAPI_EXCLUDED_URLS, + fastapi_exclude_spans=MONITORING_FASTAPI_EXCLUDE_SPANS, + project_name=MONITORING_PROJECT_NAME or None, telemetry_sample_rate=TELEMETRY_SAMPLE_RATE, - llm_slow_request_threshold_seconds=LLM_SLOW_REQUEST_THRESHOLD_SECONDS, - llm_slow_token_rate_threshold=LLM_SLOW_TOKEN_RATE_THRESHOLD + trace_content_mode=MONITORING_TRACE_CONTENT_MODE, + trace_max_chars=MONITORING_TRACE_MAX_CHARS, + trace_max_items=MONITORING_TRACE_MAX_ITEMS ) - # Configure the SDK monitoring system using the singleton monitoring_manager.configure(config) logger.info( - f"Global monitoring initialized: service_name={SERVICE_NAME}, enable_telemetry={ENABLE_TELEMETRY}") + f"OTLP monitoring initialized: service_name={OTEL_SERVICE_NAME}, " + f"enable_telemetry={config.enable_telemetry}, provider={config.provider}, " + f"endpoint={config.otlp_endpoint}, trace_endpoint={config.get_trace_endpoint()}, " + f"protocol={OTEL_EXPORTER_OTLP_PROTOCOL}" + ) -# Initialize monitoring when module is imported _initialize_monitoring() - -# Export the global monitoring manager instance -__all__ = [ - 'monitoring_manager' -] +__all__ = ['monitoring_manager'] diff --git a/backend/utils/nacos_client.py b/backend/utils/nacos_client.py new file mode 100644 index 000000000..0fa87410a --- /dev/null +++ b/backend/utils/nacos_client.py @@ -0,0 +1,624 @@ +""" +Nacos Client for service discovery. + +Provides functionality to query service instances from Nacos service registry. +Used by A2A agent discovery to find external A2A agents registered in Nacos. +""" +import logging +from typing import Any, Dict, Optional + +import aiohttp + +logger = logging.getLogger(__name__) + + +class NacosClientError(Exception): + """Base exception for Nacos client errors.""" + pass + + +class NacosConnectionError(NacosClientError): + """Raised when connection to Nacos fails.""" + pass + + +class NacosServiceNotFoundError(NacosClientError): + """Raised when the requested service is not found in Nacos.""" + pass + + +class NacosClient: + """Async client for Nacos service registry operations. + + Provides methods to query service instances for A2A agent discovery. + """ + + def __init__( + self, + nacos_addr: str, + username: Optional[str] = None, + password: Optional[str] = None + ): + """Initialize Nacos client. + + Args: + nacos_addr: Nacos server address (e.g., http://nacos-server:8848). + username: Optional Nacos username for authentication. + password: Optional Nacos password for authentication. + """ + self.nacos_addr = nacos_addr.rstrip("/") + self.username = username + self.password = password + self._session: Optional[aiohttp.ClientSession] = None + self._access_token: Optional[str] = None + + async def _get_session(self) -> aiohttp.ClientSession: + """Get or create an aiohttp session.""" + if self._session is None or self._session.closed: + timeout = aiohttp.ClientTimeout(total=30) + self._session = aiohttp.ClientSession(timeout=timeout) + return self._session + + async def close(self) -> None: + """Close the client session.""" + if self._session and not self._session.closed: + await self._session.close() + self._session = None + + def _build_auth_params(self) -> Dict[str, str]: + """Build authentication parameters for Nacos API requests.""" + params = {} + if self.username: + params["username"] = self.username + if self.password: + params["password"] = self.password + return params + + async def query_a2a_agent( + self, + agent_name: str, + namespace: str = "public" + ) -> Optional[Dict[str, Any]]: + """Query A2A agent info from Nacos using the dedicated A2A endpoint. + + Args: + agent_name: The name of the A2A agent to query. + namespace: Nacos namespace ID (defaults to "public"). + + Returns: + A dict containing agent information: + - agent_name: Agent name + - agent_url: A2A agent endpoint URL + - metadata: Additional metadata + Or None if no agent is found. + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + params = self._build_auth_params() + agent_name = agent_name.strip() + params["agentName"] = agent_name + params["namespaceId"] = namespace.strip() if namespace else "public" + + url = f"{self.nacos_addr}/nacos/v3/admin/ai/a2a" + + try: + session = await self._get_session() + async with session.get(url, params=params) as response: + text = await response.text() + + if response.status == 200: + data = await response.json() + return self._parse_a2a_response(data, agent_name) + elif response.status == 404: + logger.warning( + f"A2A agent '{agent_name}' not found in Nacos namespace '{namespace}'" + ) + return None + else: + raise NacosConnectionError( + f"Nacos A2A API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}") + raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e + + def _parse_a2a_response( + self, + response_data: Dict[str, Any], + agent_name: str + ) -> Optional[Dict[str, Any]]: + """Parse Nacos A2A agent response. + + Args: + response_data: Response data from Nacos A2A API. + agent_name: Agent name for logging. + + Returns: + Agent info dict or None if no agent found. + """ + if response_data.get("code") != 0: + msg = response_data.get("message", "unknown error") + logger.warning(f"Nacos A2A API error for '{agent_name}': {msg}") + return None + + data = response_data.get("data") + if not data: + logger.info(f"No A2A agent data found for '{agent_name}'") + return None + + logger.info(f"[Nacos A2A Parse] Found agent: {data}") + return data + + async def query_service_instance( + self, + service_name: str, + namespace: str = "public", + clusters: Optional[str] = None, + healthy_only: bool = False, + group_name: str = "DEFAULT_GROUP" + ) -> Optional[Dict[str, Any]]: + """Query service instance(s) from Nacos using v3 client API. + + Args: + service_name: The name of the service to query. + namespace: Nacos namespace ID (defaults to "public"). + clusters: Comma-separated cluster names (optional). + healthy_only: If True, only return healthy instances. + group_name: Nacos group name (defaults to "DEFAULT_GROUP"). + + Returns: + A dict containing instance information with keys: + - ip: Instance IP address + - port: Instance port + - metadata: Instance metadata dict (may contain 'a2a_card_url') + Or None if no instance is found. + + Raises: + NacosConnectionError: If connection to Nacos fails. + NacosServiceNotFoundError: If the service does not exist. + """ + params = self._build_auth_params() + service_name = service_name.strip() + params["serviceName"] = service_name + params["namespaceId"] = namespace.strip() if namespace else "public" + params["groupName"] = group_name + if clusters: + params["clusterName"] = clusters + if healthy_only: + params["healthyOnly"] = "true" + + url = f"{self.nacos_addr}/nacos/v3/client/ns/instance/list" + + logger.info( + f"[Nacos Query] URL: {url}, params: " + f"serviceName='{service_name}', namespaceId='{namespace}', groupName='{group_name}'" + ) + + try: + session = await self._get_session() + async with session.get(url, params=params) as response: + text = await response.text() + logger.info( + f"[Nacos Response] status={response.status}, " + f"body_len={len(text)}, body={text[:300]}" + ) + + if response.status == 200: + data = await response.json() + return self._parse_v3_instance_response(data, service_name) + elif response.status == 404: + logger.warning( + f"Service '{service_name}' not found in Nacos namespace '{namespace}'" + ) + return None + else: + raise NacosConnectionError( + f"Nacos API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}") + raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e + + def _parse_v3_instance_response( + self, + response_data: Dict[str, Any], + service_name: str + ) -> Optional[Dict[str, Any]]: + """Parse Nacos v3 client API instance list response. + + Nacos v3 API returns: { "code": 0, "message": "success", "data": [...] } + + Args: + response_data: Response data from Nacos v3 API. + service_name: Service name for fallback metadata. + + Returns: + First instance as a dict or None if no instances exist. + """ + if response_data.get("code") != 0: + msg = response_data.get("message", "unknown error") + logger.warning(f"Nacos API error for '{service_name}': {msg}") + return None + + data = response_data.get("data") + if data is None: + logger.info(f"[Nacos Parse] No data field in response for service '{service_name}'") + return None + + hosts = data if isinstance(data, list) else [] + logger.info(f"[Nacos Parse] Found {len(hosts)} instances for service '{service_name}'") + + if not hosts: + logger.info(f"[Nacos Parse] No hosts found for service '{service_name}'") + return None + + for instance in hosts: + instance_data = { + "ip": instance.get("ip"), + "port": instance.get("port"), + "healthy": instance.get("healthy", False), + "weight": instance.get("weight", 1.0), + "enabled": instance.get("enabled", True), + "metadata": instance.get("metadata") or {} + } + + if instance_data["enabled"] and instance_data.get("healthy", False): + logger.info( + f"[Nacos Parse] Found healthy instance for '{service_name}': " + f"{instance_data['ip']}:{instance_data['port']}" + ) + return instance_data + + first_instance = hosts[0] + logger.info( + f"[Nacos Parse] No healthy instance found, returning first instance for '{service_name}': " + f"{first_instance.get('ip')}:{first_instance.get('port')}" + ) + return { + "ip": first_instance.get("ip"), + "port": first_instance.get("port"), + "healthy": first_instance.get("healthy", False), + "weight": first_instance.get("weight", 1.0), + "enabled": first_instance.get("enabled", True), + "metadata": first_instance.get("metadata") or {} + } + + def _parse_instance_response( + self, + data: Dict[str, Any], + service_name: str + ) -> Optional[Dict[str, Any]]: + """Parse Nacos instance list response (v1 API legacy format). + + Args: + data: Response data from Nacos /instance/list API. + service_name: Service name for fallback metadata. + + Returns: + First instance as a dict or None if no instances exist. + """ + hosts = data.get("hosts") or [] + + if not hosts: + logger.debug(f"No hosts found for service '{service_name}'") + return None + + for instance in hosts: + instance_data = { + "ip": instance.get("ip"), + "port": instance.get("port"), + "healthy": instance.get("healthy", False), + "weight": instance.get("weight", 1.0), + "enabled": instance.get("enabled", True), + "metadata": instance.get("metadata") or {} + } + + if instance_data["enabled"] and instance_data.get("healthy", False): + logger.debug( + f"Found healthy instance for '{service_name}': " + f"{instance_data['ip']}:{instance_data['port']}" + ) + return instance_data + + first_instance = hosts[0] + return { + "ip": first_instance.get("ip"), + "port": first_instance.get("port"), + "healthy": first_instance.get("healthy", False), + "weight": first_instance.get("weight", 1.0), + "enabled": first_instance.get("enabled", True), + "metadata": first_instance.get("metadata") or {} + } + + async def list_services( + self, + namespace: str = "public", + page_no: int = 1, + page_size: int = 100, + group_name: str = "DEFAULT_GROUP" + ) -> Dict[str, Any]: + """List all services in a namespace using v3 Admin API. + + Args: + namespace: Nacos namespace ID (defaults to "public"). + page_no: Page number (1-indexed). + page_size: Number of services per page. + group_name: Group name filter (defaults to "DEFAULT_GROUP"). + + Returns: + Dict containing: + - count: Total number of services + - services: List of service names + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + session = await self._get_session() + access_token = None + if self.username and self.password: + access_token = await self._get_access_token(session) + if not access_token: + raise NacosConnectionError("Authentication failed. Please check username and password.") + + params = { + "pageNo": page_no, + "pageSize": page_size, + "namespaceId": namespace, + "groupName": group_name + } + headers = {} + if access_token: + headers["AccessToken"] = access_token + + url = f"{self.nacos_addr}/nacos/v3/admin/ns/service" + + try: + async with session.get(url, params=params, headers=headers) as response: + if response.status == 200: + data = await response.json() + if data.get("code") == 0: + return { + "count": data.get("data", {}).get("count", 0), + "services": data.get("data", {}).get("doms", []) + } + elif data.get("code") == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + raise NacosConnectionError( + f"Nacos API error: {data.get('message', 'unknown')}" + ) + elif response.status == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + text = await response.text() + raise NacosConnectionError( + f"Nacos API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to list services from Nacos: {e}") + raise NacosConnectionError(f"Failed to list services from Nacos: {e}") from e + + async def get_service_detail( + self, + service_name: str, + namespace: str = "public", + group_name: str = "DEFAULT_GROUP" + ) -> Optional[Dict[str, Any]]: + """Get detailed information about a service using v3 Admin API. + + Args: + service_name: The name of the service. + namespace: Nacos namespace ID (defaults to "public"). + group_name: Nacos group name (defaults to "DEFAULT_GROUP"). + + Returns: + Service detail dict or None if not found. + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + session = await self._get_session() + access_token = None + if self.username and self.password: + access_token = await self._get_access_token(session) + if not access_token: + raise NacosConnectionError("Authentication failed. Please check username and password.") + + params = { + "serviceName": service_name, + "namespaceId": namespace, + "groupName": group_name + } + headers = {} + if access_token: + headers["AccessToken"] = access_token + + url = f"{self.nacos_addr}/nacos/v3/admin/ns/service" + + try: + async with session.get(url, params=params, headers=headers) as response: + if response.status == 200: + data = await response.json() + if data.get("code") == 0: + return data.get("data") + elif data.get("code") == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + msg = data.get("message", "") + if "not found" in msg.lower() or "not exist" in msg.lower(): + return None + raise NacosConnectionError( + f"Nacos API error: {msg}" + ) + elif response.status == 404: + return None + elif response.status == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + text = await response.text() + raise NacosConnectionError( + f"Nacos API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to get service detail from Nacos: {e}") + raise NacosConnectionError( + f"Failed to get service detail from Nacos: {e}" + ) from e + + async def check_health( + self, + host: str, + port: int, + namespace: str = "public" + ) -> bool: + """Check if an instance is healthy. + + Args: + host: Instance IP address. + port: Instance port. + namespace: Nacos namespace ID. + + Returns: + True if the instance is healthy, False otherwise. + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + params = self._build_auth_params() + params["serviceName"] = "__nacos^naming*" + params["ip"] = host + params["port"] = port + params["namespaceId"] = namespace + + url = f"{self.nacos_addr}/nacos/v1/ns/instance/health" + + try: + session = await self._get_session() + async with session.get(url, params=params) as response: + if response.status == 200: + text = await response.text() + return text.lower() == "ok" + return False + + except aiohttp.ClientError as e: + logger.error(f"Failed to check instance health: {e}") + return False + + async def test_connectivity( + self, + namespace: str = "public" + ) -> Dict[str, Any]: + """Test connectivity to the Nacos server. + + Args: + namespace: Nacos namespace ID to test connectivity with. + + Returns: + Dict containing: + - success: Whether the connection was successful + - message: Human-readable message about the result + """ + try: + session = await self._get_session() + + access_token = None + if self.username and self.password: + access_token = await self._get_access_token(session) + if not access_token: + return { + "success": False, + "message": "Authentication failed. Please check username and password." + } + + url = f"{self.nacos_addr}/nacos/v3/admin/ns/ops/metrics" + headers = {} + if access_token: + headers["AccessToken"] = access_token + + async with session.get(url, headers=headers) as response: + if response.status == 200: + data = await response.json() + if data.get("code") == 0: + return { + "success": True, + "message": "Successfully connected to Nacos server" + } + else: + return { + "success": False, + "message": f"Nacos API error: {data.get('message', 'unknown')}" + } + elif response.status == 403: + return { + "success": False, + "message": "Authentication failed. Please check username and password." + } + else: + text = await response.text() + return { + "success": False, + "message": f"Nacos server returned status {response.status}: {text}" + } + + except aiohttp.ClientError as e: + logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}") + return { + "success": False, + "message": f"Failed to connect to Nacos server: {e}" + } + + async def _get_access_token(self, session: aiohttp.ClientSession) -> Optional[str]: + """Get access token from Nacos authentication endpoint with caching. + + Args: + session: aiohttp session to use for the request. + + Returns: + Access token string if authentication successful, None otherwise. + """ + if self._access_token: + return self._access_token + + try: + url = f"{self.nacos_addr}/nacos/v1/auth/login" + form_data = aiohttp.FormData() + form_data.add_field("username", self.username) + form_data.add_field("password", self.password) + + async with session.post(url, data=form_data) as response: + if response.status == 200: + result = await response.json() + token = result.get("accessToken") + if token: + self._access_token = token + return token + logger.warning(f"Nacos login failed: {result.get('message', 'unknown')}") + else: + text = await response.text() + logger.warning(f"Nacos login request returned status {response.status}: {text}") + return None + + except aiohttp.ClientError as e: + logger.error(f"Failed to login to Nacos: {e}") + return None + + def _clear_access_token(self) -> None: + """Clear the cached access token.""" + self._access_token = None + + async def __aenter__(self) -> "NacosClient": + """Async context manager entry.""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + """Async context manager exit.""" + await self.close() diff --git a/backend/utils/prompt_template_utils.py b/backend/utils/prompt_template_utils.py index 643e6cd40..299d3bf94 100644 --- a/backend/utils/prompt_template_utils.py +++ b/backend/utils/prompt_template_utils.py @@ -5,9 +5,56 @@ import yaml from consts.const import LANGUAGE +from consts.prompt_template import ( + PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP, + PROMPT_GENERATE_TEMPLATE_FIELDS, +) logger = logging.getLogger("prompt_template_utils") +PROMPT_GENERATE_TEMPLATE_KEY_MAP = PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP +PROMPT_GENERATE_TEMPLATE_KEYS = PROMPT_GENERATE_TEMPLATE_FIELDS + + +def get_prompt_generate_template_keys() -> list[str]: + """Return the supported prompt generation template keys.""" + return list(PROMPT_GENERATE_TEMPLATE_FIELDS) + + +def normalize_prompt_generate_template_content( + template_content: Optional[Dict[str, Any]] +) -> Dict[str, str]: + """Normalize prompt generation template content and keep non-empty fields only.""" + normalized: Dict[str, str] = {} + if not isinstance(template_content, dict): + return normalized + + for key in PROMPT_GENERATE_TEMPLATE_FIELDS: + legacy_key = PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP[key] + value = template_content.get(key) + if value is None: + value = template_content.get(legacy_key) + if isinstance(value, str) and value.strip(): + normalized[key] = value + + return normalized + + +def merge_prompt_generate_templates( + *template_contents: Optional[Dict[str, Any]] +) -> Dict[str, str]: + """Merge multiple prompt generation templates with first-non-empty priority.""" + merged: Dict[str, str] = {} + + for template_content in template_contents: + normalized = normalize_prompt_generate_template_content(template_content) + for key in PROMPT_GENERATE_TEMPLATE_FIELDS: + value = normalized.get(key) + if value and key not in merged: + merged[key] = value + + return merged + def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kwargs) -> Dict[str, Any]: """ @@ -16,6 +63,7 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw Args: template_type: Template type, supports the following values: - 'prompt_generate': Prompt generation template + - 'prompt_optimize': Prompt section optimization template - 'agent': Agent template including manager and managed agents - 'generate_title': Title generation template - 'document_summary': Document summary template (Map stage) @@ -33,6 +81,10 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw LANGUAGE["ZH"]: 'backend/prompts/utils/prompt_generate_zh.yaml', LANGUAGE["EN"]: 'backend/prompts/utils/prompt_generate_en.yaml' }, + 'prompt_optimize': { + LANGUAGE["ZH"]: 'backend/prompts/utils/prompt_optimize_zh.yaml', + LANGUAGE["EN"]: 'backend/prompts/utils/prompt_optimize_en.yaml' + }, 'agent': { LANGUAGE["ZH"]: { 'manager': 'backend/prompts/manager_system_prompt_template_zh.yaml', @@ -47,6 +99,10 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw LANGUAGE["ZH"]: 'backend/prompts/utils/generate_title_zh.yaml', LANGUAGE["EN"]: 'backend/prompts/utils/generate_title_en.yaml' }, + 'greeting_generate': { + LANGUAGE["ZH"]: 'backend/prompts/utils/greeting_generate_zh.yaml', + LANGUAGE["EN"]: 'backend/prompts/utils/greeting_generate_en.yaml' + }, 'document_summary': { LANGUAGE["ZH"]: 'backend/prompts/document_summary_agent_zh.yaml', LANGUAGE["EN"]: 'backend/prompts/document_summary_agent_en.yaml' @@ -58,6 +114,10 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw 'skill_creation_simple': { LANGUAGE["ZH"]: 'backend/prompts/skill_creation_simple_zh.yaml', LANGUAGE["EN"]: 'backend/prompts/skill_creation_simple_en.yaml' + }, + 'skill_creation_complicated': { + LANGUAGE["ZH"]: 'backend/prompts/skill_creation_complicate_zh.yaml', + LANGUAGE["EN"]: 'backend/prompts/skill_creation_complicate_en.yaml' } } @@ -77,7 +137,7 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw # Go up one level from utils to backend, then use the template path backend_dir = os.path.dirname(current_dir) absolute_template_path = os.path.join(backend_dir, template_path.replace('backend/', '')) - + # Read and return template content with open(absolute_template_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) @@ -97,6 +157,19 @@ def get_prompt_generate_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[ return get_prompt_template('prompt_generate', language) +def get_prompt_optimize_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[str, Any]: + """ + Get prompt optimization template. + + Args: + language: Language code ('zh' or 'en') + + Returns: + dict: Loaded prompt optimization template configuration + """ + return get_prompt_template('prompt_optimize', language) + + def get_agent_prompt_template(is_manager: bool, language: str = LANGUAGE["ZH"]) -> Dict[str, Any]: """ Get agent prompt template @@ -152,30 +225,42 @@ def get_cluster_summary_reduce_prompt_template(language: str = LANGUAGE["ZH"]) - def get_skill_creation_simple_prompt_template( language: str = LANGUAGE["ZH"], - existing_skill: Optional[Dict[str, Any]] = None + existing_skill: Optional[Dict[str, Any]] = None, + complexity: str = "simple" ) -> Dict[str, str]: """ - Get skill creation simple prompt template with Jinja2 rendering. + Get skill creation prompt template with Jinja2 rendering. This template is structured YAML with system_prompt and user_prompt sections. Supports Jinja2 template syntax for dynamic content based on existing_skill. + Supports both simple and complicated skill creation templates. Args: language: Language code ('zh' or 'en') existing_skill: Optional dict containing existing skill info for update scenarios. Expected keys: name, description, tags, content + complexity: Complexity level ('simple' or 'complicated') Returns: Dict[str, str]: Template with keys 'system_prompt' and 'user_prompt', rendered with variables """ from jinja2 import Template + # Select template based on complexity template_path_map = { - LANGUAGE["ZH"]: 'backend/prompts/skill_creation_simple_zh.yaml', - LANGUAGE["EN"]: 'backend/prompts/skill_creation_simple_en.yaml' + "simple": { + LANGUAGE["ZH"]: 'backend/prompts/skill_creation_simple_zh.yaml', + LANGUAGE["EN"]: 'backend/prompts/skill_creation_simple_en.yaml' + }, + "complicated": { + LANGUAGE["ZH"]: 'backend/prompts/skill_creation_complicate_zh.yaml', + LANGUAGE["EN"]: 'backend/prompts/skill_creation_complicate_en.yaml' + } } - template_path = template_path_map.get(language, template_path_map[LANGUAGE["ZH"]]) + # Default to simple if complexity is not recognized + template_type = template_path_map.get(complexity, template_path_map["simple"]) + template_path = template_type.get(language, template_type[LANGUAGE["ZH"]]) current_dir = os.path.dirname(os.path.abspath(__file__)) backend_dir = os.path.dirname(current_dir) diff --git a/backend/utils/tool_utils.py b/backend/utils/tool_utils.py index f06f36bc3..f1d9147e3 100644 --- a/backend/utils/tool_utils.py +++ b/backend/utils/tool_utils.py @@ -46,7 +46,8 @@ def get_local_tools_description_zh() -> Dict[str, Dict]: if hasattr(param.default, 'exclude') and param.default.exclude: continue - param_description_zh = param.default.description_zh if hasattr(param.default, 'description_zh') else None + # Note: Pydantic Field doesn't have description_zh attribute + param_description_zh = getattr(param.default, 'description_zh', None) if hasattr(param.default, 'description_zh') else None if param_description_zh is None and param_name in init_param_descriptions: param_description_zh = init_param_descriptions[param_name].get('description_zh') diff --git a/doc/docs/.vitepress/config.mts b/doc/docs/.vitepress/config.mts index 6ee76ff5d..87e79a831 100644 --- a/doc/docs/.vitepress/config.mts +++ b/doc/docs/.vitepress/config.mts @@ -385,6 +385,7 @@ export default defineConfig({ ], }, { text: "性能监控", link: "/zh/sdk/monitoring" }, + { text: "OpenTelemetry 设计", link: "/zh/sdk/opentelemetry-design" }, { text: "向量数据库", link: "/zh/sdk/vector-database" }, { text: "数据处理", link: "/zh/sdk/data-process" }, ], diff --git a/doc/docs/en/backend/overview.md b/doc/docs/en/backend/overview.md index 962233f18..d77dfee3c 100644 --- a/doc/docs/en/backend/overview.md +++ b/doc/docs/en/backend/overview.md @@ -202,4 +202,6 @@ python backend/mcp_service.py # MCP service - Resource pool management - Auto-scaling capabilities -For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview). \ No newline at end of file +For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview). + +For skill development and management, see the [Skills System Documentation](./skills/index). \ No newline at end of file diff --git a/doc/docs/en/backend/skills/index.md b/doc/docs/en/backend/skills/index.md new file mode 100644 index 000000000..7824260fa --- /dev/null +++ b/doc/docs/en/backend/skills/index.md @@ -0,0 +1,37 @@ +# Backend Skills Documentation + +This section covers Nexent's Skills system in the backend infrastructure, including skill definitions, skill package structures, and system architecture. + +## Available Documentation + +### Overview and Architecture +- [Skills System Overview](./overview): Skill types, lifecycle, and version management + +## Skills vs. Tools + +In Nexent, **Tools** and **Skills** are two distinct layers: + +- **Tool**: A single atomic operation the agent can call, such as `read_file` or `tavily_search`. When enabled, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for this conversation, the LLM still consumes context tokens to "see" it. +- **Skill**: A workflow of multiple tools bundled with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance; it decides whether to activate a skill based on the user's actual needs. The corresponding toolset is only loaded when activated — effectively saving Token consumption. + +## Quick Start + +1. **Explore capabilities**: Read [Skills System Overview](./overview) to understand the supported skill types +2. **Try creation**: Experience NL-to-Skill creation on the [Skill Management](../../user-guide/skills) page +3. **Create manually**: Upload `SKILL.md` or a ZIP package to create a custom skill +4. **Configure for agents**: Enable skills in the agent's tool configuration + +## Related References + +- [Skill Management (User Guide)](../../user-guide/skills) +- [Agent Development Guide](../../user-guide/agent-development) +- [Local Tools Overview](../../user-guide/local-tools/index) +- [SDK Tool Development Guide](../../sdk/core/tools) +- [MCP Tool Development](../tools/mcp) +- [FAQ](../../quick-start/faq) + +## Getting Help + +- Check the [FAQ](../../quick-start/faq) for common skill usage questions +- Ask questions in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions) +- Review [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) for known issues diff --git a/doc/docs/en/backend/skills/overview.md b/doc/docs/en/backend/skills/overview.md new file mode 100644 index 000000000..34fbd2f97 --- /dev/null +++ b/doc/docs/en/backend/skills/overview.md @@ -0,0 +1,138 @@ +# Skills System Overview + +A Skill is Nexent's way of extending an agent's capabilities. Each skill consists of: + +- **Skill description**: What this skill does and when to use it +- **Tool bundle**: A package of one or more Nexent SDK methods or user-defined tools +- **Parameter template**: Which parameters users can fill in for this skill +- **Usage examples**: How this skill is typically used + +Compared to selecting tools one by one, skills make configuring complex capabilities simple — install one skill package instead of configuring each tool separately. + +## Skill Package Structure + +A skill can be a single `SKILL.md` file or a ZIP package with multiple files: + +``` +skill-name/ +├── SKILL.md # Skill definition file (required) +├── config/ +│ ├── config.yaml # Default parameter values (optional) +│ └── schema.yaml # Parameter types and descriptions (optional) +├── scripts/ +│ └── *.py # Python scripts (optional) +├── examples.md # Usage examples (optional) +└── assets/ # Static assets (optional) +``` + +### SKILL.md Structure + +Each skill must have a `SKILL.md` file, consisting of two parts: + +**Part 1: YAML Frontmatter (required)** + +```yaml +--- +name: skill-name +description: | + A description of what this skill does and when to use it. + Write in third person, e.g., "This skill is used for..." +tags: + - tag1 + - tag2 +--- +``` + +**Part 2: Skill Body** + +Below the frontmatter, you can write Markdown content including: +- Detailed usage instructions and guidelines +- Example code for tool invocation +- Error handling instructions +- Usage limits and caveats + +### Two Skill Types + +Skills fall into two categories based on their purpose: + +**Tool Skills**: Used to expose the capabilities of one or more Nexent SDK methods. The body should include tool parameter descriptions, usage examples, return formats, and error handling. Once the user configures the parameters, the agent can call these tools directly. + +**Agent Skills**: Used to teach an agent how to perform a complex task. The body should include workflow instructions, domain knowledge, best practices, and sometimes helper scripts. The body will contain detailed step-by-step guidance. + +## Official Skills Overview + +### File Operations + +| Skill Name | Description | +|-----------|-------------| +| `read-file` | Read file content and metadata within the workspace | +| `create-file-directory` | Create files or directories | +| `delete-file-directory` | Delete files or directories | +| `move-file-directory` | Move or rename files/directories | +| `list-directory` | List directory structure in a tree view | + +### Knowledge Base Search + +| Skill Name | Description | +|-----------|-------------| +| `search-knowledge-base` | Local knowledge base semantic search (supports hybrid / accurate / semantic modes) | +| `search-dify` | Dify knowledge base search | +| `search-idata` | iData knowledge base search | +| `search-datamate` | DataMate knowledge base search (with similarity threshold control) | + +### Web Search + +| Skill Name | Description | +|-----------|-------------| +| `search-web-tavily` | Tavily real-time web search | +| `search-web-linkup` | Linkup image and text mixed search | +| `search-web-exa` | Exa deep web search | + +### Multimodal Analysis + +| Skill Name | Description | +|-----------|-------------| +| `analyze-image` | VLM-based image content analysis and Q&A | +| `analyze-text-file` | PDF/Word/Excel file content extraction and Q&A | + +### Communication and Remote Operations + +| Skill Name | Description | +|-----------|-------------| +| `email-utils` | IMAP receive / SMTP send (supports HTML / CC / BCC) | +| `run-shell-ssh` | Persistent SSH session for remote command execution | + +## Skill Lifecycle + +### Version Management + +Each skill supports two version states: + +- **Draft version (version=0)**: Development and debugging stage, changes take effect immediately, suitable for iterative adjustments +- **Published version (version>=1)**: Production use, parameters locked to prevent accidental changes + +### Skill Instances + +The same skill can be configured with different parameter values for different agents, independently. + +For example, a search skill can be configured for a "Technical Documentation Agent" to search only the technical knowledge base, and for a "Customer Service Agent" to search only the customer service knowledge base. + +### Common Workflow + +``` +Create skill → Configure parameters → Select skill for agent → Debug → Publish + ↓ + Edit draft version +``` + +## Security Notes + +- **Path isolation**: Files within a skill package can only be accessed within the skill directory scope +- **Parameter validation**: Parameters defined in schema.yaml are validated by the frontend form +- **Permission control**: Skill instances are tenant-isolated; APIs require authentication tokens + +## Related References + +- [Skill Management (User Guide)](../../user-guide/skills) +- [Agent Development Guide](../../user-guide/agent-development) +- [Local Tools Overview](../../user-guide/local-tools/index) diff --git a/doc/docs/en/backend/tools/index.md b/doc/docs/en/backend/tools/index.md index 2d2d2c185..82d73b82c 100644 --- a/doc/docs/en/backend/tools/index.md +++ b/doc/docs/en/backend/tools/index.md @@ -12,6 +12,10 @@ Integrate with the LangChain ecosystem for advanced AI workflows. Model Context Protocol tools for standardized AI agent communication. → [MCP Tools Development](./mcp) +### Skills System +Create reusable skill packages through natural language or ZIP files, giving agents more flexible tool-calling capabilities. +→ [Skills Documentation](../skills/index) + ## Quick Start 1. **Choose your tool type**: LangChain for general AI workflows, MCP for standardized agent communication diff --git a/doc/docs/en/deployment/devcontainer.md b/doc/docs/en/deployment/devcontainer.md index 84a49f47e..ce6efe7be 100644 --- a/doc/docs/en/deployment/devcontainer.md +++ b/doc/docs/en/deployment/devcontainer.md @@ -25,7 +25,7 @@ This development container configuration sets up a complete Nexent development e 1. Clone the project locally 2. Open project folder in Cursor/VS Code -3. Run `docker/deploy.sh` script in `infrastructure` mode to start containers +3. Run `./deploy.sh --components infrastructure,application --port-policy development` from the `docker` directory to start base containers 4. Enter `nexent-minio` and `nexent-elasticsearch` containers, copy `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `ELASTICSEARCH_API_KEY` environment variables to corresponding positions in `docker/docker-compose.dev.yml` 5. Press `F1` or `Ctrl+Shift+P`, type `Dev Containers: Reopen in Container ...` 6. Cursor will start the development container based on configuration in `.devcontainer` directory diff --git a/doc/docs/en/deployment/docker-build.md b/doc/docs/en/deployment/docker-build.md index 47f51d891..bf36dc5d4 100644 --- a/doc/docs/en/deployment/docker-build.md +++ b/doc/docs/en/deployment/docker-build.md @@ -178,6 +178,11 @@ Notes: ## 🚀 Deployment Recommendations -After building is complete, you can use the docker/deploy.sh script for deployment, or directly start the services using docker-compose. +After building is complete, you can deploy local images from the `docker` directory: -> When starting a test of locally built images, you need to change APP_VERSION="$(get_app_version)" to APP_VERSION="latest" in docker/deploy.sh, because the deployment will default to using the image corresponding to the current version. +```bash +cd docker +bash deploy.sh --image-source local-latest +``` + +> `local-latest` uses local `latest` Nexent application images and avoids pulling those images again. You do not need to modify `docker/deploy.sh`. diff --git a/doc/docs/en/developer-guide/environment-setup.md b/doc/docs/en/developer-guide/environment-setup.md index 21f3cb6af..e2b0b9ed3 100644 --- a/doc/docs/en/developer-guide/environment-setup.md +++ b/doc/docs/en/developer-guide/environment-setup.md @@ -23,7 +23,7 @@ Before backend work, start core services (PostgreSQL, Redis, Elasticsearch, MinI ```bash # Run from the docker directory at the project root cd docker -./deploy.sh --mode infrastructure +./deploy.sh --components infrastructure --port-policy development ``` :::: info Important Notes @@ -139,4 +139,3 @@ This adds: - Testing framework (pytest) - Data processing dependencies (unstructured) - Other developer utilities - diff --git a/doc/docs/en/getting-started/features.md b/doc/docs/en/getting-started/features.md index e699c1f8c..2216d7163 100644 --- a/doc/docs/en/getting-started/features.md +++ b/doc/docs/en/getting-started/features.md @@ -1,78 +1,73 @@ # Key Features -Nexent provides powerful capabilities for building and deploying AI agents with minimal effort. Here are the core features that make Nexent unique. +Nexent v2.0 delivers powerful capabilities for building and deploying AI agents. Here are the core features that make Nexent unique. -## 🧠 Smart Agent Prompt Generation +## ⚙️ Multi-Model Integration -Turn plain language into runnable prompts. Nexent automatically chooses the right tools and plans the best action path for every request. +Nexent is compatible with any OpenAI-compatible model provider, offering one-stop coverage for LLM, Embedding, VLM, STT, and TTS model types. Supports seamless synchronization with the ModelEngine platform, with built-in connection monitoring and automatic failover. The platform supports connecting to any service that follows the OpenAI API protocol, making it easy to diversify models or switch to domestic alternatives. -![Feature 1](../../assets/Feature1.png) +## 🤖 Zero-Code Agent Generation -## ⚡ Scalable Data Process Engine +Describe your needs in natural language and Nexent automatically transforms them into executable agent configurations. The system intelligently selects appropriate tools, plans the optimal execution path, and generates professional prompts. No code, no drag-and-drop configuration — experience true "what you imagine is what you get" agent creation. Agents can also be imported and exported for easy sharing and reuse. Built-in debugging provides online testing so you can iterate and refine rapidly. -Process 20+ data formats with fast OCR and table structure extraction, scaling smoothly from a single process to large-batch pipelines. +## 🤝 A2A Protocol & Agent Collaboration -![Feature 2](../../assets/Feature2.png) +Nexent supports the **Agent-to-Agent (A2A)** communication protocol, enabling seamless multi-agent collaboration. A main agent can invoke sub-agents to complete specific tasks; once a sub-agent finishes execution, results are aggregated back to the main agent. Multiple collaborative sub-agents can be configured, each with its own toolset, model configuration, and execution strategy — making it easy to build complex distributed agent workflows. -## 📚 Personal-Grade Knowledge Base +## 🧠 Layered Memory Architecture -Import files in real time, auto-summarise them, and let agents access both personal and global knowledge instantly, also knowing what it can get from each knowledge base. +Intelligent context management is the key to agents that truly understand you. Nexent provides a two-tier memory system: -![Feature 3](../../assets/Feature3.png) +- **User-Level Memory**: Personal preferences, habits, and usage patterns +- **User-Agent Memory**: Collaboration history and context for a specific user with a specific agent -## 🌐 Internet Knowledge Search +The system automatically extracts key information from conversations to generate memory entries — no manual input required. Memory entries can also be added or modified manually for greater flexibility. Smart retrieval ensures every conversation automatically pulls in the most relevant contextual memories, enabling truly personalized service. -Connect to 5+ web search providers so agents can mix fresh internet facts with your private data. +## 📝 Progressive Skill Disclosure -![Feature 4](../../assets/Feature4.png) +Nexent introduces a **Progressive Skill Disclosure** mechanism. As users input tasks, the system dynamically reveals the most relevant Skill suggestions based on the current context — helping users quickly find the tools and methods best suited to the current task. This mechanism helps preventing context explosion and maximizing context window efficiency. -## 🔍 Knowledge-Level Traceability +## 🗄️ Personal-Grade Knowledge Base -Serve answers with precise citations from web and knowledge-base sources, making every fact verifiable. +Create personal knowledge bases on the Nexent platform. Import files in real time with automatic parsing and vectorization, enabling agents to access private data instantly. Supports 20+ document formats including text, PDF, Word, PowerPoint, Excel, and CSV — with fast OCR and table structure extraction built in. Each knowledge base automatically generates its own summary, helping the agent accurately determine when to retrieve from it. Fine-grained access controls can be set: private, department-wide, or organization-wide visibility. -![Feature 5](../../assets/Feature5.png) +## 🔧 MCP Tool Ecosystem -## 🎭 Multimodal Understanding & Dialogue +Nexent builds its tool ecosystem on the **Model Context Protocol (MCP)** — described as the "USB-C of AI" — a universal interface standard for connecting AI agents to the external world. -Speak, type, files, or show images. Nexent understands voice, text, and pictures, and can even generate new images on demand. +- Add third-party MCP services quickly via URL or JSON configuration +- Develop local MCP tools with LangChain integrations and custom Python plugins +- Hot-swap tools, models, and toolchains without touching core code +- Built-in tool testing lets you verify whether tools work as expected before building an agent -![Feature 6](../../assets/Feature6.png) +## 🌐 Internet Knowledge Integration -## 🔧 MCP Tool Ecosystem +Connect to multiple web search providers so agents can blend the freshest internet information with your private data. Hybrid search mode balances real-time accuracy with relevance. -Drop in or build Python plug-ins that follow the MCP spec; swap models, tools, and chains without touching core code. +## 🔍 Knowledge Traceability & Citations -![Feature 7](../../assets/Feature7.png) +Every answer comes with precise citations from web search results or knowledge base documents, making every fact transparent and verifiable. Source information is fully traceable with one click, building trust in agent responses. -## 🏗️ Architecture Benefits +## 🎭 Multimodal Interaction -### ⚡ Distributed Processing Capabilities -- **Asynchronous Architecture**: High-performance asynchronous processing based on asyncio -- **Multi-threading Safety**: Thread-safe concurrent processing mechanisms -- **Celery Integration**: Optimized for distributed task queues -- **Batch Optimization**: Intelligent batch operations to reduce network overhead +Supports multiple input modes: voice, text, images, and files. Agents can understand voice, text, and images, and can generate new images on demand — delivering a truly natural multimodal conversation experience. -### 🏢 Enterprise-grade Scalability -- **Modular Design**: Loose-coupled module architecture for easy extension -- **Plugin-based Tools**: Standardized tool interfaces for rapid integration -- **Configuration Management**: Flexible configuration system supporting multi-environment deployment -- **Monitoring Friendly**: Comprehensive logging and status monitoring +## 🔢 Agent Version Management -### 🚀 High-performance Optimization -- **Connection Pooling**: Intelligent reuse of database and HTTP connections -- **Memory Management**: Stream processing of large files and memory optimization -- **Concurrency Control**: Intelligent concurrency limiting and load balancing -- **Caching Strategy**: Multi-layer caching to improve response speed +A comprehensive version control system supports agent iteration and historical rollback. Every version is independently archived; view change history, compare versions, and roll back whenever needed. Agent configurations can also be imported and exported in JSON format, enabling seamless migration across environments and smooth team collaboration. -For detailed information about Nexent's software architecture and technical advantages, see our **[Software Architecture](./software-architecture)** guide. +## 🏪 Agent Market -## 🎯 Use Cases +A built-in agent marketplace brings together high-quality agents from both official and community creators. Download with one click to use immediately, or integrate them as sub-agents into your own agent workflows to rapidly build complex applications. -Nexent is designed for various scenarios including: -- **Business Intelligence**: Automated data analysis and reporting -- **Customer Support**: Intelligent chat agents with knowledge base integration -- **Content Processing**: Document analysis, summarization, and extraction -- **Research Assistance**: Academic paper analysis and information synthesis -- **Personal Productivity**: Smart assistants for daily tasks and information management +## 👥 Multi-Tenant RBAC & User Management -For detailed agent scenarios and real-world implementations, see our **[MCP Ecosystem Use Cases](../mcp-ecosystem/use-cases)**. \ No newline at end of file +Nexent provides a complete multi-tenant, role-based permission management system: + +- **Four Roles**: Super Administrator, Tenant Administrator, Developer, and Regular User — each with clearly defined responsibilities +- **Multi-Tenant Isolation**: Complete data isolation between tenants, with platform-wide management support +- **User Group Mechanism**: Manage resources and access permissions through groups, supporting flexible permission delegation +- **Invitation Code Mechanism**: Controlled registration safeguards platform security +- **Resource-Level Permissions**: Fine-grained access control on agents, knowledge bases, and more — down to the user group level + +For detailed information about Nexent's software architecture and technical advantages, see our **[Software Architecture](./software-architecture)** guide. diff --git a/doc/docs/en/getting-started/overview.md b/doc/docs/en/getting-started/overview.md index 0f3936ed0..e77107eb4 100644 --- a/doc/docs/en/getting-started/overview.md +++ b/doc/docs/en/getting-started/overview.md @@ -17,10 +17,10 @@ Nexent is a zero-code platform for auto-generating production-grade AI agents, b > *If you want to go fast, go alone; if you want to go far, go together.* -We have released **Nexent v1**, and the platform is now relatively stable. However, there may still be some bugs, and we are continuously improving and adding new features. Stay tuned: we will announce **v2.0** soon! +We have released **Nexent v2.0** — a major upgrade over v1.0. This release brings A2A protocol support, progressive Skill disclosure, layered memory architecture, full-featured user management with RBAC, agent version management, and the Agent Market. Core capabilities like knowledge base integration, multimodal interaction, and the MCP tool ecosystem have been significantly enhanced. The platform is maturing rapidly and we welcome your feedback. -* **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features. -* **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab. +- **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features. +- **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab. > *Rome wasn't built in a day.* @@ -32,15 +32,21 @@ Most of all, we need visibility. Star ⭐ and watch the [GitHub repository](http ## ✨ Key Features -Nexent offers a comprehensive set of features for building powerful AI agents: - -- **🤖 Smart Agent Generation** - Zero-code agent creation using natural language -- **📊 Scalable Data Processing** - Handle 20+ file formats with intelligent extraction -- **🧠 Personal Knowledge Base** - Real-time file import with auto-summarization -- **🌐 Internet Integration** - Connect to multiple search providers and web sources -- **🔍 Knowledge Traceability** - Precise citation and source verification -- **🎭 Multimodal Support** - Voice, text, images, and file processing -- **🔧 MCP Ecosystem** - Extensible tool integration and custom development +Nexent v2.0 delivers a comprehensive feature set for building powerful AI agents: + +- **⚙️ Multi-Model Integration** — OpenAI-compatible any provider, with full Embedding/VLM/STT/TTS support +- **🤖 Zero-Code Agent Generation** — Describe in plain language, deploy in one click +- **🤝 A2A Agent Collaboration** — Agent-to-Agent protocol for seamless multi-agent workflows +- **🧠 Layered Memory Architecture** — Two-tier memory system with cross-conversation context accumulation +- **📝 Progressive Skill Disclosure** — Context-aware tool suggestions that reveal as you go +- **🗄️ Personal-Grade Knowledge Base** — 20+ format document import with intelligent retrieval +- **🔧 MCP Tool Ecosystem** — Plug-and-play extensibility with custom tool development +- **🌐 Internet Knowledge Integration** — Multi-source hybrid search blending real-time web with private data +- **🔍 Knowledge-Level Traceability** — Precise citations and verifiable sources on every answer +- **🎭 Multimodal Interaction** — Voice, text, images, and files for fully natural conversations +- **🔢 Agent Version Management** — Version iteration and rollback for safe, controlled deployments +- **🏪 Agent Market** — Official and community agents ready to install and use +- **👥 Multi-Tenant RBAC** — Tenant isolation, role-based permissions, and fine-grained resource access For detailed feature information and examples, see our **[Features Guide](./features)**. @@ -49,20 +55,23 @@ For detailed feature information and examples, see our **[Features Guide](./feat Nexent adopts a modern distributed microservices architecture designed to provide high-performance, scalable AI agent platform. The entire system is based on containerized deployment, supporting cloud-native and enterprise-grade application scenarios. ### 🌐 Layered Architecture Design -- **Frontend Layer** - Modern user interface built with Next.js + React + TypeScript -- **API Gateway Layer** - FastAPI high-performance web framework for request routing and load balancing -- **Business Logic Layer** - Agent management, conversation management, knowledge base management, and model management -- **Data Layer** - Distributed storage architecture with PostgreSQL, Elasticsearch, Redis, and MinIO + +- **Frontend Layer** — Modern user interface built with Next.js + React + TypeScript +- **API Gateway Layer** — FastAPI high-performance web framework for request routing and load balancing +- **Business Logic Layer** — Agent management, conversation management, knowledge base management, and model management +- **Data Layer** — Distributed storage architecture with PostgreSQL, Elasticsearch, Redis, and MinIO ### 🚀 Core Service Architecture -- **Agent Services** - Agent generation and execution based on SmolAgents framework -- **Data Processing Services** - Real-time and batch processing supporting 20+ file formats -- **MCP Ecosystem** - Standardized tool interfaces and plugin architecture + +- **Agent Services** — Agent generation and execution based on SmolAgents framework +- **Data Processing Services** — Real-time and batch processing supporting 20+ file formats +- **MCP Ecosystem** — Standardized tool interfaces and plugin architecture ### ⚡ Distributed Features -- **Asynchronous Processing** - High-performance async processing architecture based on asyncio -- **Microservices Design** - Service decoupling with independent scaling and deployment -- **Containerized Deployment** - Docker Compose service orchestration supporting cloud-native deployment + +- **Asynchronous Processing** — High-performance async processing architecture based on asyncio +- **Microservices Design** — Service decoupling with independent scaling and deployment +- **Containerized Deployment** — Docker Compose service orchestration supporting cloud-native deployment For detailed architectural design and technical implementation, see our **[Software Architecture](./software-architecture)**. @@ -70,9 +79,9 @@ For detailed architectural design and technical implementation, see our **[Softw Ready to get started? Here are your next steps: -1. **📋 [Installation & Deployment](../quick-start/installation)** - System requirements and deployment guide -2. **🔧 [Developer Guide](../developer-guide/overview)** - Build from source and customize -3. **❓ [FAQ](../quick-start/faq)** - Common questions and troubleshooting +1. **📋 [Installation & Deployment](../quick-start/installation)** — System requirements and deployment guide +2. **🔧 [Developer Guide](../developer-guide/overview)** — Build from source and customize +3. **❓ [FAQ](../quick-start/faq)** — Common questions and troubleshooting ## 💬 Community & contact diff --git a/doc/docs/en/getting-started/software-architecture.md b/doc/docs/en/getting-started/software-architecture.md index 701d89319..99e38a5f9 100644 --- a/doc/docs/en/getting-started/software-architecture.md +++ b/doc/docs/en/getting-started/software-architecture.md @@ -1,8 +1,8 @@ # Software Architecture -Nexent adopts a modern distributed microservices architecture designed to provide high-performance, scalable AI agent platform. The entire system is based on containerized deployment, supporting cloud-native and enterprise-grade application scenarios. +Nexent adopts a modern distributed microservices architecture designed to provide a high-performance, scalable AI agent platform. The entire system is containerized with Docker and supports cloud-native and enterprise-grade deployment scenarios. -![Software Architecture Diagram](../../assets/architecture_en.png) +![Software Architecture Diagram](../../assets/architecture_zh.png) ## 🏗️ Overall Architecture Design @@ -11,156 +11,284 @@ Nexent's software architecture follows layered design principles, structured int ### 🌐 Frontend Layer - **Technology Stack**: Next.js + React + TypeScript - **Functions**: User interface, agent interaction, multimodal input processing -- **Features**: Responsive design, real-time communication, internationalization support +- **Features**: Responsive design, real-time WebSocket communication, internationalization (i18n) ### 🔌 API Gateway Layer -- **Core Service**: FastAPI high-performance web framework -- **Responsibilities**: Request routing, authentication, API version management, load balancing -- **Ports**: 5010 (main service), 5012 (data processing service) +Distributed API services built on FastAPI: + +| Service | Port | Description | +|---------|------|-------------| +| **nexent-config** | 5010 | Main API service - agent CRUD, configuration management | +| **nexent-runtime** | 5014 | Runtime service - agent execution, streaming responses | +| **nexent-mcp** | 5011/5015 | MCP service - tool protocol management, FastMCP server | +| **nexent-northbound** | 5013 | External API service - A2A protocol, partner integrations | +| **nexent-data-process** | 5012 | Data processing service - document parsing, vectorization | ### 🧠 Business Logic Layer -- **Agent Management**: Agent generation, execution, monitoring -- **Conversation Management**: Multi-turn dialogue, context maintenance, history tracking -- **Knowledge Base Management**: Document processing, vectorization, retrieval -- **Model Management**: Multi-model support, health checks, load balancing +The backend implements a clean layered architecture: + +#### App Layer (`backend/apps/`) +- **Purpose**: HTTP boundary layer - parse/validate inputs, call services, map errors to HTTP +- **Key Modules**: + - `agent_app.py` - Agent CRUD, version management, streaming execution + - `conversation_management_app.py` - Multi-turn dialogue, history tracking + - `model_managment_app.py` - Model configuration, health checks + - `skill_app.py` - Skill creation and management + - `knowledge_summary_app.py` - Knowledge base operations + - `remote_mcp_app.py` - Remote MCP tool management + - `a2a_client_app.py` / `a2a_server_app.py` - A2A protocol support + +#### Service Layer (`backend/services/`) +- **Purpose**: Core business logic orchestration, coordinate repositories/SDKs +- **Key Modules**: + - `agent_service.py` - Agent lifecycle, execution orchestration, memory management + - `agent_version_service.py` - Version publishing, rollback, comparison + - `model_management_service.py` - Multi-model support, load balancing + - `memory_config_service.py` - Memory configuration, context building + - `conversation_management_service.py` - Session management, history persistence + - `skill_service.py` - Skill generation, template processing + - `data_process_service.py` - Document processing pipeline + - `mcp_container_service.py` - MCP container lifecycle management + - `remote_mcp_service.py` - Remote MCP server integration + - `a2a_client_service.py` / `a2a_server_service.py` - A2A agent communication + - `redis_service.py` - Caching, distributed locks, session storage + +#### Agent Core (`backend/agents/`) +- **Purpose**: Agent execution framework built on SmolAgents +- **Key Components**: + - `agent_run_manager.py` - Agent run lifecycle, streaming coordination + - `create_agent_info.py` - Agent configuration builder, tool integration + - `preprocess_manager.py` - Document preprocessing orchestration + - `skill_creation_agent.py` - LLM-powered skill generation ### 📊 Data Layer Distributed data storage architecture with multiple specialized databases: #### 🗄️ Structured Data Storage -- **PostgreSQL**: Primary database storing user information, agent configurations, conversation records -- **Port**: 5434 -- **Features**: ACID transactions, relational data integrity - -#### 🔍 Search Engine -- **Elasticsearch**: Vector database and full-text search engine -- **Port**: 9210 -- **Functions**: Vector similarity search, hybrid search, large-scale optimization +- **PostgreSQL** (port 5434): Primary relational database + - User and tenant management (`user_tenant_db.py`) + - Agent configuration and versions (`agent_db.py`, `agent_version_db.py`) + - Tool definitions and instances (`tool_db.py`) + - Conversation history (`conversation_db.py`) + - Group and permission management (`group_db.py`, `role_permission_db.py`) + - Memory configuration (`memory_config_db.py`) + - Skill definitions (`skill_db.py`) +- **Features**: ACID transactions, relation integrity, multi-tenancy support + +#### 🔍 Vector Search & Full-Text Search +- **Elasticsearch** (port 9210): Vector and full-text search engine + - Knowledge base storage (`knowledge_db.py`) + - Vector similarity search, hybrid search + - Semantic chunking and indexing +- **Features**: Scalable search, relevance ranking, large-scale optimization #### 💾 Cache Layer -- **Redis**: High-performance in-memory database -- **Port**: 6379 -- **Usage**: Session caching, temporary data, distributed locks +- **Redis** (port 6379): High-performance in-memory database + - Session caching + - Temporary data storage + - Distributed locks (`redis_service.py`) + - Celery task broker for async jobs +- **Features**: Sub-millisecond latency, persistence with AOF #### 📁 Object Storage -- **MinIO**: Distributed object storage service -- **Port**: 9010 -- **Functions**: File storage, multimedia resource management, large file processing +- **MinIO** (port 9010/9011): Distributed object storage + - File uploads and attachments (`attachment_db.py`) + - Document storage for knowledge base + - Preview generation and temporary files +- **Features**: S3-compatible API, large file handling ## 🔧 Core Service Architecture ### 🤖 Agent Services ``` -Agent framework based on SmolAgents, providing: -├── Agent generation and configuration -├── Tool calling and integration -├── Reasoning and decision execution -└── Lifecycle management +Agent Framework (SmolAgents-based): +├── Agent Creation & Configuration +│ ├── Name/display name generation (LLM-powered) +│ ├── Tool integration and selection +│ ├── Sub-agent relationship management +│ └── Version control and publishing +├── Agent Execution Engine +│ ├── Streaming response (SSE) +│ ├── Tool calling and orchestration +│ ├── Multi-model support (LLM + Business logic) +│ └── Memory context building +├── Version Management +│ ├── Publishing and rollback +│ ├── Version comparison +│ └── A2A agent card registration +└── Lifecycle Management + ├── Run registration and tracking + ├── Stop and cleanup + └── Preprocessing coordination ``` ### 📈 Data Processing Services ``` -Distributed data processing architecture: -├── Real-time document processing (20+ format support) -├── Batch data processing pipelines -├── OCR and table structure extraction -└── Vectorization and index construction +Distributed Data Processing Pipeline: +├── Document Ingestion +│ ├── Multi-format support (20+ formats) +│ ├── PDF parsing with OCR +│ └── Table structure extraction +├── Chunking & Processing +│ ├── Semantic chunking algorithms +│ ├── Batch processing with Celery +│ └── Ray distributed computing +├── Vectorization & Indexing +│ ├── Embedding generation +│ ├── Elasticsearch indexing +│ └── Incremental updates +└── Preview Generation + ├── PDF to preview conversion + └── Image thumbnail generation ``` ### 🌐 MCP Ecosystem ``` -Model Context Protocol tool integration: -├── Standardized tool interfaces -├── Plugin architecture -├── Third-party service integration -└── Custom tool development +Model Context Protocol Integration: +├── Local MCP Service +│ ├── Stable built-in tools +│ └── Docker-based tool containers +├── Remote MCP Service +│ ├── Dynamic remote MCP server proxy +│ └── Outer API tool integration +├── MCP Container Management +│ ├── Container lifecycle (Docker) +│ ├── Log aggregation +│ └── Resource monitoring +└── FastMCP Server + ├── Tool registration and discovery + └── Standardized tool interfaces +``` + +### 🔄 A2A Protocol Support +``` +Agent-to-Agent Communication: +├── A2A Client +│ ├── Agent card discovery +│ ├── Task submission and streaming +│ └── Response handling +├── A2A Server +│ ├── Agent card registration +│ ├── Task processing +│ └── Message streaming +└── Agent Adapter + ├── Nexent ↔ A2A protocol translation + └── Skill execution coordination ``` ## 🚀 Distributed Architecture Features ### ⚡ Asynchronous Processing Architecture -- **Foundation Framework**: High-performance async processing based on asyncio +- **Foundation**: asyncio-based high-performance async processing +- **Task Queue**: Celery + Redis for distributed task execution +- **Computing Framework**: Ray for distributed computing in data processing +- **Stream Processing**: Server-Sent Events (SSE) for real-time streaming - **Concurrency Control**: Thread-safe concurrent processing mechanisms -- **Task Queue**: Celery + Ray distributed task execution -- **Stream Processing**: Real-time data and response streaming ### 🔄 Microservices Design ``` -Service decomposition strategy: -├── nexent (main service) - Agent core logic -├── nexent-data-process (data processing) - Document processing pipeline -├── nexent-mcp-service (MCP service) - Tool protocol service -└── Optional services (SSH, monitoring, etc.) +Service Decomposition Strategy: +├── nexent-config (5010) +│ └── Agent CRUD, configuration, user management +├── nexent-runtime (5014) +│ └── Agent execution, streaming responses +├── nexent-mcp (5011/5015) +│ └── MCP tool protocol, container management +├── nexent-northbound (5013) +│ └── External APIs, A2A protocol, partner integration +├── nexent-data-process (5012) +│ └── Document processing, vectorization, Celery workers +├── nexent-web (3000) +│ └── Frontend Next.js application +└── Optional Services + ├── nexent-redis (6379) - Caching and message broker + ├── nexent-elasticsearch (9210) - Vector search + ├── nexent-postgresql (5434) - Relational data + └── nexent-minio (9010) - Object storage ``` ### 🌍 Containerized Deployment ``` -Docker Compose service orchestration: -├── Application service containerization -├── Database service isolation -├── Network layer security configuration -└── Volume mounting for data persistence +Docker Compose Orchestration: +├── Application Services Containerization +├── Database Service Isolation +├── Network Layer Security (bridge network) +├── Volume Mounting for Data Persistence +├── Health Checks and Auto-restart +└── Kubernetes Support (IS_DEPLOYED_BY_KUBERNETES) ``` ## 🔐 Security and Scalability ### 🛡️ Security Architecture - **Authentication**: Multi-tenant support, user permission management -- **Data Security**: End-to-end encryption, secure transmission protocols -- **Network Security**: Inter-service secure communication, firewall configuration +- **Authorization**: Role-based access control (RBAC), group-based permissions +- **Data Security**: Tenant data isolation, secure transmission (HTTPS) +- **Network Security**: Service间安全通信, Docker network isolation ### 📈 Scalability Design - **Horizontal Scaling**: Independent microservice scaling, load balancing - **Vertical Scaling**: Resource pool management, intelligent scheduling -- **Storage Scaling**: Distributed storage, data sharding +- **Storage Scaling**: Distributed storage (MinIO), data sharding (Elasticsearch) +- **Cache Scaling**: Redis clustering for session and data caching ### 🔧 Modular Architecture -- **Loose Coupling Design**: Low inter-service dependencies, standardized interfaces +- **Loose Coupling**: Low inter-service dependencies, standardized interfaces - **Plugin Architecture**: Hot-swappable tools and models -- **Configuration Management**: Environment isolation, dynamic configuration updates +- **Configuration Management**: Environment-based configuration, dynamic updates +- **Single Source of Truth**: Environment variables centralized in `backend/consts/const.py` ## 🔄 Data Flow Architecture ### 📥 User Request Flow ``` -User Input → Frontend Validation → API Gateway → Route Distribution → Business Service → Data Access → Database +User Input → Frontend Validation → API Gateway (nexent-config) + → Route Distribution → Business Service (Service Layer) + → Data Access (Database Layer) → PostgreSQL/Elasticsearch/Redis/MinIO ``` ### 🤖 Agent Execution Flow ``` -User Message → Agent Creation → Tool Calling → Model Inference → Streaming Response → Result Storage +User Message → nexent-runtime → Agent Service + → Memory Context Build → Tool Resolution + → Model Inference (Streaming) → SSE Response + → Conversation Save → History Storage ``` ### 📚 Knowledge Base Processing Flow ``` -File Upload → Temporary Storage → Data Processing → Vectorization → Knowledge Base Storage → Index Update +File Upload → nexent-config → nexent-data-process + → Document Parsing → Chunking → Vectorization + → Elasticsearch Index → Search Ready ``` ### ⚡ Real-time Processing Flow ``` -Real-time Input → Instant Processing → Agent Response → Streaming Output +Real-time Input → Streaming Endpoint → Async Processing + → SSE Stream → Frontend Display ``` ## 🎯 Architecture Advantages ### 🏢 Enterprise-grade Features -- **High Availability**: Multi-layer redundancy, failover capabilities -- **High Performance**: Asynchronous processing, intelligent caching +- **High Availability**: Multi-service redundancy, health checks, auto-restart +- **High Performance**: Async processing, Redis caching, vector search optimization - **High Concurrency**: Distributed architecture, load balancing -- **Monitoring Friendly**: Comprehensive logging and status monitoring +- **Monitoring Friendly**: OpenTelemetry observability, Grafana Tempo tracing, structured logging ### 🔧 Developer Friendly -- **Modular Development**: Clear hierarchical structure -- **Standardized Interfaces**: Unified API design -- **Flexible Configuration**: Environment adaptation, feature toggles -- **Easy Testing**: Unit testing and integration testing support +- **Modular Development**: Clean layered architecture (App → Service → Database) +- **Standardized Interfaces**: Unified API design with FastAPI +- **Flexible Configuration**: Environment-based configuration, hot-reload +- **Easy Testing**: Comprehensive test suites, dependency injection ### 🌱 Ecosystem Compatibility -- **MCP Standard**: Compliant with Model Context Protocol -- **Open Source Ecosystem**: Integration with rich open source tools -- **Cloud Native**: Support for Kubernetes and Docker deployment +- **MCP Standard**: Full Model Context Protocol implementation +- **A2A Protocol**: Agent-to-agent communication support +- **Open Source Ecosystem**: Integration with SmolAgents, FastMCP, LangChain +- **Cloud Native**: Docker Compose and Kubernetes deployment support - **Multi-model Support**: Compatible with mainstream AI model providers --- -This architectural design ensures that Nexent can provide a stable, scalable AI agent service platform while maintaining high performance. Whether for individual users or enterprise-level deployments, it delivers excellent user experience and technical assurance. \ No newline at end of file +This architectural design ensures that Nexent can provide a stable, scalable AI agent service platform while maintaining high performance. Whether for individual users or enterprise-level deployments, it delivers excellent user experience and technical assurance. diff --git a/doc/docs/en/quick-start/installation.md b/doc/docs/en/quick-start/installation.md index f01576513..7b6a9cb76 100644 --- a/doc/docs/en/quick-start/installation.md +++ b/doc/docs/en/quick-start/installation.md @@ -1,13 +1,16 @@ -# Installation & Deployment +# Docker Installation & Deployment ## 🎯 Prerequisites -| Resource | Minimum | -|----------|---------| -| **CPU** | 2 cores | -| **RAM** | 6 GiB | -| **Architecture** | x86_64 / ARM64 | -| **Software** | Docker & Docker Compose installed | +| Resource | Minimum | Recommended | +|----------|---------|-------------| +| **CPU** | 4 cores | 8 cores | +| **RAM** | 8 GiB | 16 GiB | +| **Disk** | 40 GiB | 100 GiB | +| **Architecture** | x86_64 / ARM64 | | +| **Software** | Docker & Docker Compose installed | Docker 24+, Docker Compose v2+ | + +> **💡 Note**: The recommended configuration of **8 cores and 16 GiB RAM** provides good performance for production workloads. ## 🚀 Quick Start @@ -16,10 +19,9 @@ ```bash git clone https://github.com/ModelEngine-Group/nexent.git cd nexent/docker -cp .env.example .env # Configure environment variables ``` -> **💡 Tip**: If there are no special requirements, you can directly use `.env.example` for deployment without making any changes. If you need to configure voice models (STT/TTS), you will need to set the relevant parameters in `.env`. We will work on making this configuration available through the frontend soon—stay tuned. +> **💡 Tip**: `deploy.sh` automatically copies `.env.example` to `docker/.env` when `docker/.env` does not exist. If you need to configure voice models (STT/TTS), update the related values in `docker/.env` before or after deployment. ### 2. Deployment Options @@ -29,27 +31,53 @@ Run the following command to start deployment: bash deploy.sh ``` -After executing this command, the system will provide two different versions for you to choose from: +After running the command, the script opens Bash TUI menus for deployment options. Use arrow keys or `j/k` to move, Space to toggle multi-select items, Enter to confirm, `b`/Backspace to go back, and `q` to quit. + +**Deployment Components:** +- **infrastructure (required)**: Elasticsearch, PostgreSQL, Redis, MinIO +- **application (selected by default, optional)**: config, runtime, mcp, northbound, web +- **data-process (optional)**: data processing service +- **supabase (optional)**: enables user, tenant, and authentication features +- **terminal (optional)**: enables the OpenSSH terminal tool +- **monitoring (optional)**: enables observability components and then prompts for a provider + +**Port Policy:** +- **development (default)**: publishes debug and internal service ports for local troubleshooting +- **production**: publishes only production entry ports + +**Image Source:** +- **general (default)**: uses standard public registries +- **mainland**: uses mainland China mirrors +- **local-latest**: uses local `latest` Nexent images and avoids pulling Nexent application images + +You can also pass options directly: -**Version Selection:** -- **Speed version (Lightweight & Fast Deployment, Default)**: Quick startup of core features, suitable for individual users and small teams -- **Full version (Complete Feature Edition)**: Provides enterprise-level tenant management and resource isolation features, but takes longer to install, suitable for enterprise users +```bash +# Default component set, development port policy, standard image source +bash deploy.sh --components infrastructure,application --port-policy development --image-source general + +# Enable user/tenant features, data processing, and terminal +bash deploy.sh --components infrastructure,application,supabase,data-process,terminal + +# Use mainland China image sources +bash deploy.sh --image-source mainland + +# Use local latest images +bash deploy.sh --image-source local-latest +``` -**Deployment Modes:** -- **Development mode (default)**: Exposes all service ports for debugging -- **Infrastructure mode**: Only starts infrastructure services -- **Production mode**: Only exposes port 3000 for security +After a successful deployment, non-sensitive choices are saved to `docker/deploy.options`. The next interactive deployment can reuse the local config or run a full reconfiguration. -**Optional Components:** -- **Terminal Tool**: Enables openssh-server for AI agent shell command execution -- **Regional optimization**: Mainland China users can use optimized image sources +#### ⚠️ Important Notes -### ⚠️ Important Notes 1️⃣ **When deploying v1.8.0 or later for the first time**, please pay special attention to the `suadmin` super administrator account information output in the Docker logs. This account has the highest system privileges, and the password is only displayed upon first generation. It cannot be viewed again later, so please be sure to save it securely. +> This account is used for permission management only and cannot develop agents or create knowledge bases. Log in with this account and complete: Access tenant resources → Create tenant → Create tenant administrator, then log in with the tenant administrator account to use all features. For role permissions, see [User Management](../user-guide/user-management). + 2️⃣ Forgot to note the `suadmin` account password? Follow these steps: + ```bash -# Step1: Delete su account record in supabase container +# Step 1: Delete su account record in supabase container docker exec -it supabase-db-mini bash psql -U postgres select id, email from auth.users; @@ -57,12 +85,12 @@ select id, email from auth.users; delete from auth.users where id = 'your_user_id'; delete from auth.identities where user_id = 'your_user_id'; -# Step2: Delete su account record in nexent database +# Step 2: Delete su account record in nexent database docker exec -it nexent-postgresql bash psql -U root -d nexent delete from nexent.user_tenant_t where user_id = 'your_user_id'; -# Step3: Redeploy and record the su account password +# Step 3: Redeploy and record the su account password ``` ### 3. Access Your Installation @@ -77,21 +105,54 @@ When deployment completes successfully: ## 🏗️ Service Architecture -Nexent uses a microservices architecture with the following core services: +Nexent uses a microservices architecture deployed via Docker Compose. -**Core Services:** -- `nexent`: Backend service (port 5010) -- `nexent-web`: Frontend interface (port 3000) -- `nexent-data-process`: Data processing service (port 5012) +**Application Services:** +| Service | Description | Default Port | +|---------|-------------|--------------| +| nexent | Backend service | 5010 | +| nexent-web | Web frontend | 3000 | +| nexent-data-process | Data processing service | 5012 | +| nexent-northbound | Northbound API service | 5013 | **Infrastructure Services:** -- `nexent-postgresql`: Database (port 5434) -- `nexent-elasticsearch`: Search engine (port 9210) -- `nexent-minio`: Object storage (port 9010, console 9011) -- `redis`: Cache service (port 6379) +| Service | Description | +|---------|-------------| +| nexent-postgresql | Relational database | +| nexent-elasticsearch | Search and indexing engine | +| nexent-minio | S3-compatible object storage | +| redis | Caching layer | + +**Supabase Services (when `supabase` is selected):** +| Service | Description | +|---------|-------------| +| supabase-kong | API Gateway | +| supabase-auth | Authentication service | +| supabase-db-mini | Database service | **Optional Services:** -- `nexent-openssh-server`: SSH server for Terminal tool (port 2222) +| Service | Description | +|---------|-------------| +| nexent-openssh-server | SSH terminal for AI agents | +| nexent-monitoring | Optional observability stack | + +Internal services communicate using the Docker internal network. + +## 💾 Data Persistence + +Nexent uses Docker volumes for data persistence: + +| Data Type | Volume Name | Default Host Path | +|-----------|------------------|-------------------| +| PostgreSQL | nexent-postgresql-data | `{dataDir}/postgresql` | +| Elasticsearch | nexent-elasticsearch-data | `{dataDir}/elasticsearch` | +| Redis | nexent-redis-data | `{dataDir}/redis` | +| MinIO | nexent-minio-data | `{dataDir}/minio` | +| Supabase DB (when `supabase` is selected) | nexent-supabase-db-data | `{dataDir}/supabase-db` | + +Default `dataDir` is `./volumes` (configurable via `ROOT_DIR` in `.env`). + +Uninstall is handled by `docker/uninstall.sh`. It prompts before deleting persistent data by default; you can also pass `--delete-volumes true|false`, `--remove-volumes`, `--keep-volumes`, or use `bash uninstall.sh delete-all` to remove containers and persistent data. ## 🔌 Port Mapping @@ -100,6 +161,7 @@ Nexent uses a microservices architecture with the following core services: | Web Interface | 3000 | 3000 | Main application access | | Backend API | 5010 | 5010 | Backend service | | Data Processing | 5012 | 5012 | Data processing API | +| Northbound API | 5013 | 5013 | Northbound interface service (A2A/MCP integration) | | PostgreSQL | 5432 | 5434 | Database connection | | Elasticsearch | 9200 | 9210 | Search engine API | | MinIO API | 9000 | 9010 | Object storage API | @@ -109,6 +171,240 @@ Nexent uses a microservices architecture with the following core services: For complete port mapping details, see our [Dev Container Guide](../deployment/devcontainer.md#port-mapping). +## 🔧 Advanced Configuration + +### Monitoring Configuration + +Select the `monitoring` component in the deployment script UI to enable OpenTelemetry monitoring. The script synchronizes `ENABLE_TELEMETRY`, `MONITORING_PROVIDER`, and `MONITORING_DASHBOARD_URL` in `docker/.env`, then starts the matching observability services from `docker/docker-compose-monitoring.yml`. + +```bash +cd nexent/docker +bash deploy.sh +``` + +If `docker/deploy.options` already exists, the script asks whether to reuse local configuration. Choose to reconfigure/overwrite local configuration, then select `monitoring` in the component menu and manually choose `grafana`, `phoenix`, `langfuse`, `langsmith`, `zipkin`, or `otlp` in the provider menu. + +Supported providers: + +| Provider | Purpose | Default URL | +|----------|---------|-------------| +| `otlp` | OpenTelemetry Collector only, useful for forwarding to an external platform | No dashboard | +| `phoenix` | Local Phoenix trace analysis | `http://localhost:6006` | +| `langfuse` | Local Langfuse observability stack | `http://localhost:3001` | +| `langsmith` | Forwarding to hosted LangSmith | `https://smith.langchain.com/` | +| `grafana` | Local Grafana + Tempo | `http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1` | +| `zipkin` | Local Zipkin | `http://localhost:9411` | + +To change ports, image versions, or local Langfuse bootstrap credentials, copy and edit the monitoring environment file first: + +```bash +cp docker/monitoring/monitoring.env.example docker/monitoring/monitoring.env +``` + +Common variables: + +| Variable | Description | +|----------|-------------| +| `MONITORING_PROVIDER` | Default monitoring provider; updated when you choose a provider in the deployment script | +| `OTEL_COLLECTOR_HTTP_PORT` / `OTEL_COLLECTOR_GRPC_PORT` | Published OTLP HTTP/gRPC ports | +| `LANGSMITH_API_KEY` / `LANGSMITH_PROJECT` | LangSmith forwarding configuration | +| `LANGFUSE_INIT_USER_EMAIL` / `LANGFUSE_INIT_USER_PASSWORD` | Local Langfuse bootstrap admin | +| `GRAFANA_ADMIN_USER` / `GRAFANA_ADMIN_PASSWORD` | Local Grafana admin | + +Before choosing the `langsmith` provider, configure `LANGSMITH_API_KEY` in `docker/monitoring/monitoring.env`. If you only need to connect to an existing external Collector, adjust the OTLP target in `docker/.env`: + +```bash +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +MONITORING_DASHBOARD_URL= +``` + +> **Production note**: Replace default passwords, secrets, and the Langfuse `ENCRYPTION_KEY`. Restrict dashboard and Collector access with a reverse proxy or firewall. + +### OAuth Login Configuration + +OAuth login requires the `supabase` component. When enabling third-party login, deploy `supabase` and set `OAUTH_CALLBACK_BASE_URL` to the browser-accessible Nexent Web URL. + +```bash +bash deploy.sh --components infrastructure,application,supabase +``` + +For Docker, configure OAuth in `docker/.env`: + +```bash +# Web entry URL. The full callback path is generated as: +# {OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider= +OAUTH_CALLBACK_BASE_URL=http://localhost:3000 + +# GitHub OAuth +GITHUB_OAUTH_CLIENT_ID= +GITHUB_OAUTH_CLIENT_SECRET= + +# GDE OAuth +GDE_URL= +GDE_OAUTH_CLIENT_ID= +GDE_OAUTH_CLIENT_SECRET= + +# Link App OAuth +LINK_APP_URL= +LINK_APP_OAUTH_CLIENT_ID= +LINK_APP_OAUTH_CLIENT_SECRET= + +# WeChat OAuth +ENABLE_WECHAT_OAUTH=false +WECHAT_OAUTH_APP_ID= +WECHAT_OAUTH_APP_SECRET= + +# TLS verification when contacting OAuth providers +OAUTH_SSL_VERIFY=true +OAUTH_CA_BUNDLE= +``` + +Provider enablement rules: + +| Provider | Required variables | Callback URL | +|----------|--------------------|--------------| +| GitHub | `GITHUB_OAUTH_CLIENT_ID`, `GITHUB_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=github` | +| GDE | `GDE_URL`, `GDE_OAUTH_CLIENT_ID`, `GDE_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=gde` | +| Link App | `LINK_APP_URL`, `LINK_APP_OAUTH_CLIENT_ID`, `LINK_APP_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=link_app` | +| WeChat | `ENABLE_WECHAT_OAUTH=true`, `WECHAT_OAUTH_APP_ID`, `WECHAT_OAUTH_APP_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=wechat` | + +For local Docker, a GitHub callback example is `http://localhost:3000/api/user/oauth/callback?provider=github`. In production, use a public HTTPS domain such as `https://nexent.example.com/api/user/oauth/callback?provider=github` and register the exact same URL in the OAuth provider console. + +### CAS Login Configuration + +CAS SSO does not require the `supabase` component. Set `CAS_CALLBACK_BASE_URL` to the browser-accessible Nexent Web URL without a trailing `/`. `CAS_SERVER_URL` is the CAS Server root URL and should also not include a trailing `/`. + +For Docker, configure CAS in `docker/.env`: + +```bash +CAS_ENABLED=true +CAS_SERVER_URL=http://localhost:8080/cas +CAS_VALIDATE_PATH=/p3/serviceValidate +CAS_CALLBACK_BASE_URL=http://localhost:3000 + +# disabled: disable the CAS login entry and automatic redirects +# button: show CAS as an optional login button +# force: redirect unauthenticated Nexent users to CAS automatically +CAS_LOGIN_MODE=force + +# Empty means use ; set userName to read +CAS_USER_ATTRIBUTE= +CAS_EMAIL_ATTRIBUTE=email +CAS_ROLE_ATTRIBUTE=role +CAS_TENANT_ATTRIBUTE=tenant_id +CAS_ROLE_MAP_JSON={"cas-admin":"ADMIN","cas-user":"USER"} +CAS_SESSION_MAX_AGE_SECONDS=3600 +LOCAL_SESSION_MAX_AGE_SECONDS=3600 +CAS_RENEW_BEFORE_SECONDS=300 +CAS_RENEW_TIMEOUT_SECONDS=10 +CAS_SYNTHETIC_EMAIL_DOMAIN=cas.local + +# Empty means Nexent logout will not call the CAS Server logout endpoint. +# /logout is resolved against CAS_SERVER_URL. +CAS_LOGOUT_URL=/logout +CAS_SSL_VERIFY=true +CAS_CA_BUNDLE= +``` + +Common CAS URLs: + +| Purpose | URL | +|---------|-----| +| Nexent login entry | `{CAS_CALLBACK_BASE_URL}/api/user/cas/login?redirect=/` | +| CAS service callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/callback` | +| CAS silent renewal callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/renew_callback` | +| CAS single logout callback | `POST {CAS_CALLBACK_BASE_URL}/api/user/cas/logout_callback` | + +For Apereo CAS JSON Service Registry, create a service registration file such as `Nexent-10001.json` in the service registry directory configured by your CAS deployment. The `id` must be globally unique. This is a local Docker example: + +```json +{ + "@class": "org.apereo.cas.services.RegexRegisteredService", + "serviceId": "http://localhost:3000.*", + "name": "Nexent CAS Client", + "id": 10001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://localhost:3000/api/user/cas/logout_callback" +} +``` + +In production, keep `CAS_SSL_VERIFY=true`; for self-signed certificates, prefer `CAS_CA_BUNDLE` and only use `CAS_SSL_VERIFY=false` for local testing. + +#### CAS Integration with ModelEngine + +When integrating with ModelEngine through the CAS protocol, deploy Nexent with the following configuration: + +```bash +CAS_ENABLED=true +CAS_SERVER_URL=https://:5443/SSOSvr +CAS_VALIDATE_PATH=/p3/serviceValidate +CAS_CALLBACK_BASE_URL=http://:3000 +CAS_LOGIN_MODE=force +CAS_USER_ATTRIBUTE=userName +CAS_EMAIL_ATTRIBUTE=email +CAS_ROLE_ATTRIBUTE=userType +CAS_TENANT_ATTRIBUTE=tenant_id +CAS_ROLE_MAP_JSON={"1":"ADMIN","3":"DEV"} +CAS_SESSION_MAX_AGE_SECONDS=3600 +LOCAL_SESSION_MAX_AGE_SECONDS=3600 +CAS_RENEW_BEFORE_SECONDS=300 +CAS_RENEW_TIMEOUT_SECONDS=10 +CAS_SYNTHETIC_EMAIL_DOMAIN=cas.local +CAS_LOGOUT_URL=/logout?service=http://:3000 +CAS_SSL_VERIFY=false +CAS_CA_BUNDLE= +``` + +You also need to add a CAS client service registration file in the OMS container. Use the following steps as a reference: + +```bash +# Create the registration file, paste the JSON content into it, and save it. +vim Nexent-10000001.json +{ + "@class": "org.apereo.cas.services.CasRegisteredService", + "serviceId": "http://:3000.*", + "name": "Nexent CAS Client", + "id": 1000001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://:3000/api/user/cas/logout_callback" +} + +# Run the following command to copy the registration file into the container. +kubectl cp Nexent-10000001.json model-engine/$(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}'):/opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json +kubectl exec -i -n model-engine $(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}') -- chown tomcat:fusioncube /opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json +``` + +### Northbound Interface Configuration (NORTHBOUND_EXTERNAL_URL) + +If you need to use any of the following features, configure the `NORTHBOUND_EXTERNAL_URL` environment variable: + +1. **A2A Protocol Integration** - Third-party systems calling Nexent agents via A2A protocol +2. **MCP Tool Access** - Using MCP protocol to access Nexent resources like documents + +**Configuration:** + +Set the publicly accessible URL in your `.env` file: + +```bash +# Format: protocol://host:port/api +# Local development (default): +NORTHBOUND_EXTERNAL_URL=http://localhost:5013/api + +# Production - use your public IP or domain: +NORTHBOUND_EXTERNAL_URL=http://your-public-ip:5013/api +# or +NORTHBOUND_EXTERNAL_URL=https://api.yourdomain.com/api +``` + +> **Important**: The URL must include the `/api` suffix because the Northbound service uses FastAPI's `root_path="/api"` configuration. + ## 💡 Need Help - Browse the [FAQ](./faq) for common install issues @@ -119,4 +415,4 @@ For complete port mapping details, see our [Dev Container Guide](../deployment/d Want to build from source or add new features? Check the [Docker Build Guide](../deployment/docker-build) for step-by-step instructions. -For detailed setup instructions and customization options, see our [Developer Guide](../developer-guide/overview). \ No newline at end of file +For detailed setup instructions and customization options, see our [Developer Guide](../developer-guide/overview). diff --git a/doc/docs/en/quick-start/kubernetes-installation.md b/doc/docs/en/quick-start/kubernetes-installation.md index 44ca3c993..a10873c7c 100644 --- a/doc/docs/en/quick-start/kubernetes-installation.md +++ b/doc/docs/en/quick-start/kubernetes-installation.md @@ -35,21 +35,29 @@ cd nexent/k8s/helm Run the deployment script: ```bash -./deploy-helm.sh apply +./deploy.sh ``` -After executing this command, the system will prompt for configuration options: +After running the command, the script opens Bash TUI menus for configuration. Use arrow keys or `j/k` to move, Space to toggle multi-select items, Enter to confirm, `b`/Backspace to go back, and `q` to quit. -**Version Selection:** -- **Speed version (Lightweight & Fast Deployment, Default)**: Quick startup of core features, suitable for individual users and small teams -- **Full version (Complete Feature Edition)**: Provides enterprise-level tenant management and resource isolation features, includes Supabase authentication +**Deployment Components:** +- **infrastructure (required)**: Elasticsearch, PostgreSQL, Redis, MinIO +- **application (selected by default, optional)**: config, runtime, mcp, northbound, web +- **data-process (optional)**: data processing service +- **supabase (optional)**: enables user, tenant, and authentication features +- **terminal (optional)**: enables the OpenSSH terminal tool +- **monitoring (optional)**: enables observability components and then prompts for a provider -**Image Source Selection:** -- **Mainland China**: Uses optimized regional mirrors for faster image pulling -- **General**: Uses standard Docker Hub registries +**Port Policy:** +- **development (default)**: uses NodePort for Web and selected debug/internal services +- **production**: keeps internal services as ClusterIP and exposes only production entrypoints -**Optional Components:** -- **Terminal Tool**: Enables openssh-server for AI agent shell command execution +**Image Source:** +- **general (default)**: uses standard public registries +- **mainland**: uses mainland China mirrors +- **local-latest**: uses local `latest` images and local-friendly pull policies for Nexent application images + +After a successful deployment, non-sensitive choices are saved to `k8s/helm/deploy.options`. The next interactive deployment can reuse the local config or run a full reconfiguration. ### ⚠️ Important Notes @@ -72,7 +80,7 @@ kubectl exec -it -n nexent deploy/nexent-postgresql -- psql -U root -d nexent -c "DELETE FROM nexent.user_tenant_t WHERE user_id='your_user_id';" # Step 3: Re-deploy and record the su account password -./deploy-helm.sh apply +./deploy.sh ``` ### 4. Access Your Installation @@ -113,7 +121,7 @@ Nexent uses a microservices architecture deployed via Helm charts: | nexent-redis | Caching layer | | nexent-minio | S3-compatible object storage | -**Supabase Services (Full Version Only):** +**Supabase Services (when `supabase` is selected):** | Service | Description | |---------|-------------| | nexent-supabase-kong | API Gateway | @@ -124,13 +132,14 @@ Nexent uses a microservices architecture deployed via Helm charts: | Service | Description | |---------|-------------| | nexent-openssh-server | SSH terminal for AI agents | +| nexent-monitoring | Optional observability stack | ## 🔌 Port Mapping | Service | Internal Port | NodePort | Description | |---------|---------------|----------|-------------| | Web Interface | 3000 | 30000 | Main application access | -| Northbound API | 5010 | 30013 | Northbound API service | +| Northbound API | 5013 | 30013 | Northbound API service | | SSH Server | 22 | 30022 | Terminal tool access | For internal service communication, services use Kubernetes internal DNS (e.g., `http://nexent-config:5010`). @@ -141,34 +150,261 @@ Nexent uses PersistentVolumes for data persistence: | Data Type | PersistentVolume | Default Host Path | |-----------|------------------|-------------------| -| Elasticsearch | nexent-elasticsearch-pv | `{dataDir}/elasticsearch` | -| PostgreSQL | nexent-postgresql-pv | `{dataDir}/postgresql` | -| Redis | nexent-redis-pv | `{dataDir}/redis` | -| MinIO | nexent-minio-pv | `{dataDir}/minio` | -| Supabase DB (Full) | nexent-supabase-db-pv | `{dataDir}/supabase-db` | +| Elasticsearch | nexent-elasticsearch-pv | `/var/lib/nexent-data/nexent-elasticsearch` | +| PostgreSQL | nexent-postgresql-pv | `/var/lib/nexent-data/nexent-postgresql` | +| Redis | nexent-redis-pv | `/var/lib/nexent-data/nexent-redis` | +| MinIO | nexent-minio-pv | `/var/lib/nexent-data/nexent-minio` | +| Supabase DB (when `supabase` is selected) | nexent-supabase-db-pv | `/var/lib/nexent-data/nexent-supabase-db` | -Default `dataDir` is `/var/lib/nexent-data` (configurable in `values.yaml`). +Helm uninstall does not delete local hostPath data by default. Use `./uninstall.sh --delete-local-data true` to delete known Nexent local volume contents under `/var/lib/nexent-data/nexent-*`, or `--keep-local-data` to preserve them explicitly. ## 🔧 Deployment Commands ```bash # Deploy with interactive prompts -./deploy-helm.sh apply +./deploy.sh + +# Non-interactive deployment with the default component set +./deploy.sh --components infrastructure,application --port-policy development --image-source general + +# Enable user/tenant features, data processing, and terminal +./deploy.sh --components infrastructure,application,supabase,data-process,terminal # Deploy with mainland China image sources -./deploy-helm.sh apply --is-mainland Y +./deploy.sh --image-source mainland -# Deploy full version (with Supabase) -./deploy-helm.sh apply --deployment-version full +# Use local latest images +./deploy.sh --image-source local-latest # Clean helm state only (fixes stuck releases) -./deploy-helm.sh clean +./uninstall.sh clean + +# Uninstall; local data is preserved by default, with interactive prompts for namespace and local data deletion +./uninstall.sh + +# Uninstall and delete the namespace +./uninstall.sh --delete-namespace true + +# Uninstall and delete local hostPath data +./uninstall.sh --delete-local-data true + +# Complete uninstall including namespace and local hostPath data +./uninstall.sh delete-all + +# Complete uninstall but preserve local hostPath data +./uninstall.sh delete-all --keep-local-data +``` + +## 🔧 Advanced Configuration + +### Monitoring Configuration + +Kubernetes deployments enable monitoring through the `monitoring` component in the deployment script UI. The deployment script renders runtime Helm values for `global.monitoring.enabled`, `global.monitoring.provider`, and `global.monitoring.dashboardUrl`, and enables the `nexent-monitoring` subchart. + +```bash +cd nexent/k8s/helm +./deploy.sh +``` + +If `k8s/helm/deploy.options` already exists, the script asks whether to reuse local configuration. Choose to reconfigure/overwrite local configuration, then select `monitoring` in the component menu and manually choose `grafana`, `phoenix`, `langfuse`, `langsmith`, `zipkin`, or `otlp` in the provider menu. + +Supported providers: + +| Provider | Purpose | Default URL | +|----------|---------|-------------| +| `otlp` | OpenTelemetry Collector only, useful for forwarding to an external platform | No dashboard | +| `phoenix` | Local Phoenix trace analysis | `http://localhost:30006` | +| `langfuse` | Local Langfuse observability stack | `http://localhost:30001` | +| `langsmith` | Forwarding to hosted LangSmith | `https://smith.langchain.com/` | +| `grafana` | Local Grafana + Tempo | `http://localhost:30002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1` | +| `zipkin` | Local Zipkin | `http://localhost:30011` | + +Before choosing the `langsmith` provider, configure `global.monitoring.langsmithApiKey` and `global.monitoring.langsmithProject` in `k8s/helm/nexent/values.yaml`. To change local Grafana, Langfuse, or dashboard ports, adjust the values file first, then re-run the deployment script, choose to reconfigure, and manually select `monitoring`. + +Common Helm values: + +| Value | Description | +|-------|-------------| +| `global.monitoring.enabled` | Enables OpenTelemetry export in the Nexent backend | +| `global.monitoring.provider` | Backend provider label: `otlp`, `phoenix`, `langfuse`, `langsmith`, `grafana`, `zipkin` | +| `global.monitoring.otlpEndpoint` | Backend OTLP HTTP endpoint, default `http://nexent-otel-collector:4318` | +| `global.monitoring.dashboardUrl` | Frontend monitoring entry URL; leave empty to hide the entry | +| `global.monitoring.traceContentMode` | Trace content capture mode: `summary`, `metrics`, or `full` | +| `nexent-monitoring..service.nodePort` | NodePort override for provider dashboards | +| `nexent-monitoring.langfuse.init.*` | Local Langfuse bootstrap organization, project, and admin account | +| `nexent-monitoring.grafana.adminUser` / `adminPassword` | Local Grafana admin credentials | + +Check monitoring status: + +```bash +kubectl get pods -n nexent | grep -E 'otel|phoenix|grafana|tempo|zipkin|langfuse' +kubectl get svc -n nexent | grep -E 'otel|phoenix|grafana|zipkin|langfuse' +``` + +> **Production note**: Replace default passwords, secrets, and the Langfuse `encryptionKey`. Prefer ClusterIP services or a controlled Ingress for dashboards. + +### OAuth Login Configuration -# Uninstall but preserve data -./deploy-helm.sh delete +OAuth login requires the `supabase` component. When enabling third-party login, deploy `supabase` and set `config.oauth.callbackBaseUrl` to the browser-accessible Nexent Web URL. -# Complete uninstall including all data -./deploy-helm.sh delete-all +```bash +./deploy.sh --components infrastructure,application,supabase +``` + +Kubernetes writes OAuth settings into backend environment variables through `nexent-common` `config.oauth.*` values: + +```bash +helm upgrade --install nexent nexent \ + --namespace nexent --create-namespace \ + --set global.deploymentComponents.supabase=true \ + --set nexent-supabase-kong.enabled=true \ + --set nexent-supabase-auth.enabled=true \ + --set nexent-supabase-db.enabled=true \ + --set nexent-common.config.oauth.callbackBaseUrl=https://nexent.example.com \ + --set nexent-common.config.oauth.githubClientId=your_github_client_id \ + --set nexent-common.config.oauth.githubClientSecret=your_github_client_secret +``` + +Configurable OAuth values: + +| Value | Environment variable | Description | +|-------|----------------------|-------------| +| `nexent-common.config.oauth.callbackBaseUrl` | `OAUTH_CALLBACK_BASE_URL` | Web entry URL; the callback path is appended automatically | +| `nexent-common.config.oauth.githubClientId` | `GITHUB_OAUTH_CLIENT_ID` | GitHub OAuth Client ID | +| `nexent-common.config.oauth.githubClientSecret` | `GITHUB_OAUTH_CLIENT_SECRET` | GitHub OAuth Client Secret | +| `nexent-common.config.oauth.gdeUrl` | `GDE_URL` | GDE OAuth service URL | +| `nexent-common.config.oauth.gdeClientId` | `GDE_OAUTH_CLIENT_ID` | GDE OAuth Client ID | +| `nexent-common.config.oauth.gdeClientSecret` | `GDE_OAUTH_CLIENT_SECRET` | GDE OAuth Client Secret | +| `nexent-common.config.oauth.enableWechat` | `ENABLE_WECHAT_OAUTH` | Enables WeChat OAuth | +| `nexent-common.config.oauth.wechatClientId` | `WECHAT_OAUTH_APP_ID` | WeChat App ID | +| `nexent-common.config.oauth.wechatClientSecret` | `WECHAT_OAUTH_APP_SECRET` | WeChat App Secret | +| `nexent-common.config.oauth.sslVerify` | `OAUTH_SSL_VERIFY` | Whether to verify provider TLS certificates | +| `nexent-common.config.oauth.caBundle` | `OAUTH_CA_BUNDLE` | Custom CA bundle path | + +Provider callback URLs: + +| Provider | Callback URL | +|----------|--------------| +| GitHub | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=github` | +| GDE | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=gde` | +| WeChat | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=wechat` | + +For local NodePort, a GitHub callback example is `http://localhost:30000/api/user/oauth/callback?provider=github`. In production, use a public HTTPS domain and register the exact same URL in the OAuth provider console. + +### CAS Login Configuration + +CAS SSO does not require the `supabase` component. Set `nexent-common.config.cas.callbackBaseUrl` to the browser-accessible Nexent Web URL without a trailing `/`. `nexent-common.config.cas.serverUrl` is the CAS Server root URL and should also not include a trailing `/`. + +Kubernetes writes CAS settings into backend environment variables through `nexent-common` `config.cas.*` values: + +```bash +helm upgrade --install nexent nexent \ + --namespace nexent --create-namespace \ + --set nexent-common.config.cas.enabled=true \ + --set nexent-common.config.cas.serverUrl=https://cas.example.com/cas \ + --set nexent-common.config.cas.callbackBaseUrl=https://nexent.example.com \ + --set nexent-common.config.cas.loginMode=force \ + --set nexent-common.config.cas.logoutUrl=/logout +``` + +Configurable CAS values: + +| Value | Environment variable | Description | +|-------|----------------------|-------------| +| `nexent-common.config.cas.enabled` | `CAS_ENABLED` | Enables CAS | +| `nexent-common.config.cas.serverUrl` | `CAS_SERVER_URL` | CAS Server root URL | +| `nexent-common.config.cas.validatePath` | `CAS_VALIDATE_PATH` | serviceValidate path, default `/p3/serviceValidate` | +| `nexent-common.config.cas.callbackBaseUrl` | `CAS_CALLBACK_BASE_URL` | Web entry URL; CAS callback paths are appended automatically | +| `nexent-common.config.cas.loginMode` | `CAS_LOGIN_MODE` | `disabled`, `button`, or `force` | +| `nexent-common.config.cas.userAttribute` | `CAS_USER_ATTRIBUTE` | User identifier attribute. Empty means use `` | +| `nexent-common.config.cas.emailAttribute` | `CAS_EMAIL_ATTRIBUTE` | Email attribute | +| `nexent-common.config.cas.roleAttribute` | `CAS_ROLE_ATTRIBUTE` | Role attribute | +| `nexent-common.config.cas.tenantAttribute` | `CAS_TENANT_ATTRIBUTE` | Tenant attribute | +| `nexent-common.config.cas.roleMapJson` | `CAS_ROLE_MAP_JSON` | JSON mapping from CAS roles to Nexent roles | +| `nexent-common.config.cas.sessionMaxAgeSeconds` | `CAS_SESSION_MAX_AGE_SECONDS` | Maximum local CAS session lifetime | +| `nexent-common.config.cas.localSessionMaxAgeSeconds` | `LOCAL_SESSION_MAX_AGE_SECONDS` | Nexent local session lifetime | +| `nexent-common.config.cas.renewBeforeSeconds` | `CAS_RENEW_BEFORE_SECONDS` | Trigger silent renewal within this many seconds before expiry | +| `nexent-common.config.cas.renewTimeoutSeconds` | `CAS_RENEW_TIMEOUT_SECONDS` | Silent renewal timeout | +| `nexent-common.config.cas.syntheticEmailDomain` | `CAS_SYNTHETIC_EMAIL_DOMAIN` | Domain used when CAS does not return an email | +| `nexent-common.config.cas.logoutUrl` | `CAS_LOGOUT_URL` | CAS logout URL. Empty means Nexent logout will not call the CAS Server logout endpoint | +| `nexent-common.config.cas.sslVerify` | `CAS_SSL_VERIFY` | Whether to verify CAS Server TLS certificates | +| `nexent-common.config.cas.caBundle` | `CAS_CA_BUNDLE` | Custom CA bundle path | + +Common CAS URLs: + +| Purpose | URL | +|---------|-----| +| Nexent login entry | `{CAS_CALLBACK_BASE_URL}/api/user/cas/login?redirect=/` | +| CAS service callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/callback` | +| CAS silent renewal callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/renew_callback` | +| CAS single logout callback | `POST {CAS_CALLBACK_BASE_URL}/api/user/cas/logout_callback` | + +For Apereo CAS JSON Service Registry, create a service registration file such as `Nexent-10001.json` in the service registry directory configured by your CAS deployment. The `id` must be globally unique. This is a local NodePort example: + +```json +{ + "@class": "org.apereo.cas.services.RegexRegisteredService", + "serviceId": "http://localhost:30000.*", + "name": "Nexent CAS Client", + "id": 10001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://localhost:30000/api/user/cas/logout_callback" +} +``` + +In production, keep `CAS_SSL_VERIFY=true`; for self-signed certificates, prefer `CAS_CA_BUNDLE` and only use `CAS_SSL_VERIFY=false` for local testing. + +#### CAS Integration with ModelEngine + +When integrating with ModelEngine through the CAS protocol, use a values file to configure Nexent. This avoids complex command-line escaping for `CAS_ROLE_MAP_JSON`. + +Create `cas-modelengine-values.yaml`: + +```yaml +nexent-common: + config: + cas: + enabled: true + serverUrl: "https://:5443/SSOSvr" + validatePath: "/p3/serviceValidate" + callbackBaseUrl: "http://:30000" + loginMode: "force" + userAttribute: "userName" + emailAttribute: "email" + roleAttribute: "userType" + tenantAttribute: "tenant_id" + roleMapJson: '{"1":"ADMIN","3":"DEV"}' + sessionMaxAgeSeconds: 3600 + localSessionMaxAgeSeconds: 3600 + renewBeforeSeconds: 300 + renewTimeoutSeconds: 10 + syntheticEmailDomain: "cas.local" + logoutUrl: "/logout?service=http://:30000" + sslVerify: false + caBundle: "" +``` + +You also need to add a CAS client service registration file in the OMS container. Use the following steps as a reference: + +```bash +# Create the registration file, paste the JSON content into it, and save it. +vim Nexent-10000001.json +{ + "@class": "org.apereo.cas.services.CasRegisteredService", + "serviceId": "http://:30000.*", + "name": "Nexent CAS Client", + "id": 1000001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://:30000/api/user/cas/logout_callback" +} + +# Run the following command to copy the registration file into the container. +kubectl cp Nexent-10000001.json model-engine/$(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}'):/opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json +kubectl exec -i -n model-engine $(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}') -- chown tomcat:fusioncube /opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json ``` ## 🔍 Troubleshooting diff --git a/doc/docs/en/quick-start/kubernetes-upgrade-guide.md b/doc/docs/en/quick-start/kubernetes-upgrade-guide.md index 293358d2f..75afcfba9 100644 --- a/doc/docs/en/quick-start/kubernetes-upgrade-guide.md +++ b/doc/docs/en/quick-start/kubernetes-upgrade-guide.md @@ -15,7 +15,7 @@ Follow these steps to upgrade Nexent on Kubernetes safely: Before updating, record the current deployment version and data directory information. - Current Deployment Version Location: `APP_VERSION` in `backend/consts/const.py` -- Data Directory Location: `global.dataDir` in `k8s/helm/nexent/values.yaml` +- Local volume directories: each Helm sub-chart's `storage.hostPath`, defaulting to `/var/lib/nexent-data/nexent-*` **Code downloaded via git** @@ -28,7 +28,7 @@ git pull **Code downloaded via ZIP package or other means** 1. Re-download the latest code from GitHub and extract it. -2. Copy the `.deploy.options` file from the `k8s/helm` directory of your previous deployment to the new code directory. (If the file doesn't exist, you can ignore this step). +2. Copy the `deploy.options` file from the `k8s/helm` directory of your previous deployment to the new code directory. (If the file doesn't exist, you can ignore this step). ## 🔄 Step 2: Execute the Upgrade @@ -36,10 +36,10 @@ Navigate to the k8s/helm directory of the updated code and run the deployment sc ```bash cd k8s/helm -./deploy-helm.sh apply +./deploy.sh ``` -The script will detect your previous deployment settings (version, image source, etc.) from the `.deploy.options` file. If the file is missing, you will be prompted to enter configuration details. +The script will detect your saved deployment settings (components, port policy, image source, etc.) from `deploy.options`. If the file is missing, you will be prompted to enter configuration details. > 💡 Tip > If you need to configure voice models (STT/TTS), please edit the corresponding values in `values.yaml` or pass them via command line. @@ -137,7 +137,7 @@ kubectl exec -i $POSTGRES_POD -n nexent -- psql -U root -d nexent < ./sql/v2.0.0 kubectl exec nexent/$POSTGRES_POD -n nexent -- pg_dump -U root nexent > backup_$(date +%F).sql ``` -> - For Supabase database (full version only), use `nexent-supabase-db` pod instead: +> - For the Supabase database (when `supabase` is selected), use the `nexent-supabase-db` pod instead: ```bash SUPABASE_POD=$(kubectl get pods -n nexent -l app=nexent-supabase-db -o jsonpath='{.items[0].metadata.name}') diff --git a/doc/docs/en/quick-start/upgrade-guide.md b/doc/docs/en/quick-start/upgrade-guide.md index 497212e06..3bc22f254 100644 --- a/doc/docs/en/quick-start/upgrade-guide.md +++ b/doc/docs/en/quick-start/upgrade-guide.md @@ -38,11 +38,11 @@ Navigate to the docker directory of the updated code and run the upgrade script: bash upgrade.sh ``` -If deploy.options is missing, the script will prompt you to manually enter configuration details from the previous deployment, such as the current version and data directory. Enter the information you recorded earlier. +If deploy.options is missing, the script will prompt you to select deployment settings again, such as components, port policy, and image source. Choose the same options you used for the previous deployment. >💡 Tip -> The default scenario is quick deployment, which uses .env.example. -> If you need to configure voice models (STT/TTS), please add the relevant variables to .env.example in advance. We will provide a front-end configuration interface as soon as possible. +> If `docker/.env` is missing, the deploy script automatically copies it from `.env.example`. +> If you need to configure voice models (STT/TTS), add the relevant variables to `docker/.env`. We will provide a front-end configuration interface as soon as possible. ## 🌐 Step 3: Verify the deployment diff --git a/doc/docs/en/sdk/data-process.md b/doc/docs/en/sdk/data-process.md index 2d11202b1..614c4b438 100644 --- a/doc/docs/en/sdk/data-process.md +++ b/doc/docs/en/sdk/data-process.md @@ -43,10 +43,10 @@ def file_process(self, ## 📁 Supported File Formats -- **Text files**: .txt, .md, .csv -- **Documents**: .pdf, .docx, .pptx +- **Text files**: .txt, .md, .csv, .json +- **Documents**: .pdf, .docx, .pptx, .epub - **Images**: .jpg, .png, .gif (with OCR) -- **Web content**: HTML, URLs +- **Web content**: HTML, URLs, XML - **Archives**: .zip, .tar ## 💡 Usage Examples diff --git a/doc/docs/en/sdk/monitoring.md b/doc/docs/en/sdk/monitoring.md index 4aa625132..bb7c1db13 100644 --- a/doc/docs/en/sdk/monitoring.md +++ b/doc/docs/en/sdk/monitoring.md @@ -1,289 +1,327 @@ -# 🚀 Nexent LLM Monitoring System +# Nexent Agent Observability (OTLP) -Enterprise-grade monitoring solution specifically designed for monitoring LLM token generation speed and performance. +Enterprise-grade observability for AI agents using OpenTelemetry OTLP protocol. Supports integration with observability platforms like Arize Phoenix, Langfuse, LangSmith, Grafana Tempo, Zipkin, and more. -## 📊 System Architecture +## Architecture ``` -┌─────────────────────────────────────────────────────────┐ -│ Nexent LLM Monitoring System │ -├─────────────────────────────────────────────────────────┤ -│ │ -│ Nexent API ──► OpenTelemetry ──► Jaeger (Tracing) │ -│ │ │ │ -│ │ └──────► Prometheus (Metrics) │ -│ │ │ │ -│ └─► OpenAI LLM └──► Grafana (Visualization) │ -│ (Token Monitoring) │ -└─────────────────────────────────────────────────────────┘ +NexentAgent ──► OpenTelemetry SDK ──► OTLP Collector ──► Arize Phoenix / Langfuse / LangSmith / Grafana Tempo / Zipkin / OTLP Backend + │ │ + │ OpenInference Semantics │ + │ (llm.*, agent.* attributes) │ + └────────────────────────────────────────┘ ``` -## ⚡ Quick Start (5 minutes) +## Quick Start ```bash -# 1. Start monitoring services -./docker/start-monitoring.sh +cd docker +[ -f .env ] || cp .env.example .env +cp monitoring/monitoring.env.example monitoring/monitoring.env -# 2. Install performance monitoring dependencies -uv sync --extra performance +vim .env +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http -# 3. Enable monitoring -export ENABLE_TELEMETRY=true +vim monitoring/monitoring.env +MONITORING_PROVIDER=otlp -# 4. Start backend service -python backend/config_service.py -python backend/runtime_service.py +./start-monitoring.sh --stack collector ``` -## 📊 Access Monitoring Interfaces +## AI Observability Platforms -| Interface | URL | Purpose | -|-----------|-----|---------| -| **Grafana Dashboard** | http://localhost:3005 | LLM Performance Monitoring | -| **Jaeger Tracing** | http://localhost:16686 | Request Trace Analysis | -| **Prometheus Metrics** | http://localhost:9090 | Raw Monitoring Data | +### Arize Phoenix -### 🔐 Grafana Login Information +Arize Phoenix provides AI-specific observability with OpenInference semantic support. -When first accessing Grafana (http://localhost:3005), you need to login: +**Configuration:** +```bash +MONITORING_PROVIDER=phoenix +OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +OTEL_EXPORTER_OTLP_AUTHORIZATION="Bearer YOUR_PHOENIX_API_KEY" +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -Username: admin -Password: admin -``` - -**After first login, you'll be prompted to change password:** -- Set a new password (recommended) -- Click "Skip" to skip (development environment) -**After login, you can see:** -- 📊 **LLM Performance Dashboard** - Pre-configured performance dashboard -- 📈 **Data Source Configuration** - Auto-connected to Prometheus and Jaeger -- 🎯 **Real-time Monitoring Panel** - Key metrics like token generation speed, latency +**Features:** +- LLM trace visualization with prompt/completion +- Token-level performance metrics +- Agent step tracing +- Cost analysis -## 🎯 Core Features +### Langfuse -### ⚡ LLM-Specific Monitoring -- **Token Generation Speed**: Real-time monitoring of tokens generated per second -- **TTFT (Time to First Token)**: First token return latency -- **Streaming Response Analysis**: Generation timestamp for each token -- **Model Performance Comparison**: Performance benchmarks across different models +Langfuse offers prompt management and LLM observability with OTLP support. -### 🔍 Distributed Tracing -- **Complete Request Chain**: End-to-end tracing from HTTP to LLM -- **Performance Bottleneck Detection**: Automatically identify slow queries and anomalies -- **Error Root Cause Analysis**: Quickly locate problem sources +**Configuration:** -### 🛠️ Developer-Friendly Design -- **One-Line Integration**: Quick monitoring with decorators -- **Zero-Dependency Degradation**: Auto-skip when monitoring dependencies are missing -- **Zero-Touch Usage**: No need to manually check monitoring status, handled automatically -- **Flexible Configuration**: Environment variable controlled behavior - -## 🛠️ Adding Monitoring to Code +```bash +MONITORING_PROVIDER=langfuse +OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel -### 🎯 Recommended Approach: Singleton Pattern (v2.1+) +LANGFUSE_PUBLIC_KEY=pk-xxx +LANGFUSE_SECRET_KEY=sk-xxx -```python -# Backend service usage - directly use globally configured monitoring_manager -from utils.monitoring import monitoring_manager - -# API endpoint monitoring -@monitoring_manager.monitor_endpoint("my_service.my_function") -async def my_api_function(): - return {"status": "ok"} +OTEL_EXPORTER_OTLP_AUTHORIZATION=Basic BASE64_ENCODED_KEY +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 +``` -# LLM call monitoring -@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") -def call_llm(messages): - # Automatically get token-level monitoring - return llm_response +Generate the encoded key: -# Manual monitoring events -monitoring_manager.add_span_event("custom_event", {"key": "value"}) -monitoring_manager.set_span_attributes(user_id="123", action="process") +```bash +echo -n "$LANGFUSE_PUBLIC_KEY:$LANGFUSE_SECRET_KEY" | base64 ``` -### 📦 Direct SDK Usage +**Features:** +- Prompt versioning and management +- Session-based trace grouping +- User feedback collection +- Model cost tracking -```python -from nexent.monitor import get_monitoring_manager - -# Get global monitoring manager - already configured in backend -monitor = get_monitoring_manager() - -# Use decorators -@monitor.monitor_llm_call("claude-3", "completion") -def my_llm_function(): - return "response" - -# Or use directly in business logic -with monitor.trace_llm_request("custom_operation", "my_model") as span: - # Execute business logic - result = process_data() - monitor.add_span_event("processing_completed") - return result -``` +### LangSmith -### ✨ Global Configuration Automation +LangSmith supports online OTLP trace ingestion through the OpenTelemetry endpoint. Nexent can send traces to a local Collector first, and the Collector forwards them to LangSmith. -Monitoring configuration is auto-initialized in `backend/utils/monitoring.py`: +**Collector forwarding:** -```python -# No manual configuration needed - auto-completed at system startup -# monitoring_manager already configured with environment variables -from utils.monitoring import monitoring_manager +```bash +cd docker +vim monitoring/monitoring.env -# Direct usage without checking if enabled -@monitoring_manager.monitor_endpoint("my_function") -def my_function(): - pass +MONITORING_PROVIDER=langsmith +LANGSMITH_API_KEY=lsv2_xxx +LANGSMITH_PROJECT=nexent +LANGSMITH_OTLP_TRACES_ENDPOINT=https://api.smith.langchain.com/otel/v1/traces -# FastAPI application initialization -monitoring_manager.setup_fastapi_app(app) +./start-monitoring.sh --stack langsmith ``` -### 🔒 Auto Start/Stop Design - -- **Smart Monitoring**: Auto start/stop based on `ENABLE_TELEMETRY` environment variable -- **Zero-Touch Usage**: External code doesn't need to check monitoring status, use all features directly -- **Graceful Degradation**: Silent no-effect when disabled, normal operation when enabled -- **Default Off**: Auto-disabled when not configured +Nexent backend configuration when it sends OTLP to the Collector: ```bash -# Enable monitoring -export ENABLE_TELEMETRY=true - -# Disable monitoring -export ENABLE_TELEMETRY=false +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=langsmith +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -## 📊 Core Monitoring Metrics +For direct backend-to-LangSmith export, set `OTEL_EXPORTER_OTLP_ENDPOINT=https://api.smith.langchain.com/otel`, `LANGSMITH_API_KEY`, and optionally `LANGSMITH_PROJECT`. -| Metric | Description | Importance | -|--------|-------------|------------| -| `llm_token_generation_rate` | Token generation speed (tokens/s) | ⭐⭐⭐ | -| `llm_time_to_first_token_seconds` | First token latency | ⭐⭐⭐ | -| `llm_request_duration_seconds` | Complete request duration | ⭐⭐⭐ | -| `llm_total_tokens` | Input/output token count | ⭐⭐ | -| `llm_error_count` | LLM call error count | ⭐⭐⭐ | +### Zipkin -## 🔧 Environment Configuration +Zipkin provides a lightweight local trace query UI. For local deployment, Nexent sends OTLP to the Collector, and the Collector forwards traces to Zipkin. ```bash -# Add to .env file -cat >> .env << EOF -ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 -TELEMETRY_SAMPLE_RATE=1.0 # Development environment, production recommended 0.1 -EOF +MONITORING_PROVIDER=zipkin +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +MONITORING_DASHBOARD_URL=http://localhost:9411 ``` -## 🛠️ System Verification +Set `MONITORING_DASHBOARD_URL` to the browser-accessible monitoring UI URL. The backend returns this value to the frontend top bar without deriving a provider-specific path. ```bash -# Check metrics endpoint -curl http://localhost:8000/metrics - -# Verify dependency installation -python -c "from backend.utils.monitoring import MONITORING_AVAILABLE; print(f'Monitoring Available: {MONITORING_AVAILABLE}')" +MONITORING_DASHBOARD_URL=http://localhost:6006 +MONITORING_DASHBOARD_URL=http://localhost:3001/project/nexent +MONITORING_DASHBOARD_URL=http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1 +MONITORING_DASHBOARD_URL=http://localhost:9411 ``` -## 🆘 Troubleshooting +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `ENABLE_TELEMETRY` | `false` | Enable/disable monitoring | +| `MONITORING_PROVIDER` | `otlp` | Provider profile: `otlp`, `phoenix`, `langfuse`, `langsmith`, `grafana`, `zipkin` | +| `MONITORING_DASHBOARD_URL` | (empty) | Browser-accessible monitoring UI URL used by the frontend top bar | +| `MONITORING_PROJECT_NAME` | `nexent` | Observability platform project name | +| `MONITORING_TRACE_CONTENT_MODE` | `summary` | Trace payload mode: `summary` records bounded previews plus metadata, `metrics` records only structure/size metadata, `full` keeps full payloads subject to `MONITORING_TRACE_MAX_CHARS` | +| `MONITORING_TRACE_MAX_CHARS` | `4000` | Maximum characters for each payload preview written to trace attributes | +| `MONITORING_TRACE_MAX_ITEMS` | `20` | Maximum dict keys/list items included in payload previews | +| `OTEL_SERVICE_NAME` | `nexent-backend` | Service identifier | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://localhost:4318` | OTLP base endpoint; SDK derives `/v1/traces` and `/v1/metrics` | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | (empty) | Optional trace-specific endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | (empty) | Optional metric-specific endpoint | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | `http` | Protocol: `http` or `grpc` | +| `OTEL_EXPORTER_OTLP_HEADERS` | (empty) | Generic auth headers (comma-separated) | +| `OTEL_EXPORTER_OTLP_AUTHORIZATION` | (empty) | `Authorization` header, commonly used by Phoenix bearer auth and Langfuse | +| `OTEL_EXPORTER_OTLP_X_API_KEY` | (empty) | `x-api-key` header for platforms that require it | +| `OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION` | (empty) | Langfuse ingestion version, for example `4` | +| `OTEL_EXPORTER_OTLP_METRICS_ENABLED` | `true` | Whether to export OTLP metrics | +| `LANGSMITH_API_KEY` | (empty) | LangSmith API key; mapped to the `x-api-key` OTLP header | +| `LANGSMITH_PROJECT` | (empty) | Optional LangSmith project header | +| `LANGSMITH_OTLP_TRACES_ENDPOINT` | `https://api.smith.langchain.com/otel/v1/traces` | Collector trace endpoint for online LangSmith | + +## Code Integration + +### Agent Boundary Context + +At the request boundary, business code only binds the resolved user and Agent metadata once. The SDK then creates Agent, LLM, and Tool spans from the runtime lifecycle: -### No monitoring data? -```bash -# Check service status -docker-compose -f docker/docker-compose-monitoring.yml ps +```python +from nexent.monitor.agent_observability import AgentRunMetadata +from utils.monitoring import monitoring_manager -# Check dependency installation -python -c "import opentelemetry; print('✅ Monitoring dependencies installed')" +monitoring_manager.bind_agent_context(AgentRunMetadata( + tenant_id=tenant_id, + user_id=user_id, + agent_id=agent_request.agent_id, + conversation_id=agent_request.conversation_id, + query=agent_request.query, + is_debug=agent_request.is_debug, + language=language, +)) ``` -### Port conflicts? -```bash -# Check port usage -lsof -i :3005 -i :9090 -i :16686 +`monitor_endpoint` is still kept as a compatibility API and low-level escape hatch, but it is no longer the recommended way to add normal Agent observability. + +### Trace Payload Policy + +Tool input/output, retriever output, and Langfuse-compatible `input.value` / `output.value` attributes share the same payload policy. By default Nexent writes a bounded preview plus structured metadata such as `type`, `size_chars`, `item_count`, `truncated`, and `keys`. Memory search spans intentionally record only result summaries and statistics, not full memory text bodies. + +Agent context metrics are emitted from the SDK lifecycle. Each action step records an `agent.step.metrics` event with estimated context tokens, compression calls, cache hits, compression ratio, and token threshold. The final Agent span also receives aggregate step count, max context size, average compression ratio, total compression calls, and cache hit totals. + +### LLM Call Monitoring + +```python +@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") +def call_llm(messages): + return llm_response ``` -### Dependency installation issues? -```bash -# Reinstall performance dependencies -uv sync --extra performance +### Agent Step Tracing -# Check performance configuration in pyproject.toml -cat backend/pyproject.toml | grep -A 20 "performance" +```python +with monitoring_manager.trace_agent_step("agent.run.loop", step_type="agent_loop") as span: + result = execute_tool() + monitoring_manager.set_tool_output(result) ``` -### Service name shows as unknown_service? -```bash -# Check environment variable configuration -echo "SERVICE_NAME: $SERVICE_NAME" +### Tool Call Tracing -# Restart monitoring service to apply new configuration -./docker/start-monitoring.sh +```python +with monitoring_manager.trace_tool_call("web_search", "agent_name", {"query": "test"}) as span: + results = search_web("test") + monitoring_manager.set_tool_output({"results": results}) ``` -## 🧹 Data Management +### Retriever Call Tracing -### Clean Jaeger Trace Data -```bash -# Method 1: Restart Jaeger container (simplest) -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-jaeger +Knowledge-base search tools are classified as retriever spans automatically by the SDK. Custom retriever integrations can use the same semantics directly: -# Method 2: Completely rebuild Jaeger container and data -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml rm -f nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-jaeger +```python +with monitoring_manager.trace_retriever_call("knowledge_base_search", "agent_name", {"query": "test"}) as span: + documents = search_knowledge_base("test") + monitoring_manager.set_retriever_output(documents) +``` -# Method 3: Clean all monitoring data (rebuild all containers) -docker-compose -f docker/docker-compose-monitoring.yml down -docker-compose -f docker/docker-compose-monitoring.yml up -d +## OpenInference Semantic Attributes + +The system uses OpenInference semantic conventions for AI-specific observability: + +### LLM Attributes + +| Attribute | Description | +|-----------|-------------| +| `llm.model_name` | Model identifier (e.g., `gpt-4`) | +| `llm.operation.name` | Operation type (e.g., `chat_completion`) | +| `llm.token_count.prompt` | Input token count | +| `llm.token_count.completion` | Output token count | +| `llm.invocation_parameters` | Model parameters (JSON) | +| `llm.time_to_first_token` | TTFT in seconds | + +### Agent Attributes + +| Attribute | Description | +|-----------|-------------| +| `agent.name` | Agent identifier | +| `agent.step.name` | Step name (e.g., `web_search`) | +| `agent.step.type` | Step type: `tool_call`, `reasoning`, `action_selection` | +| `agent.tool.name` | Tool name | +| `agent.tool.input` | Tool input preview using the configured trace payload policy | +| `agent.tool.input.*` | Structured tool input metadata: type, size, item count, truncation, keys | +| `agent.tool.output` | Tool output preview using the configured trace payload policy | +| `agent.tool.output.*` | Structured tool output metadata: type, size, item count, truncation, keys | +| `agent.tool.success` | Whether the tool call completed successfully | +| `agent.tool.duration_ms` | Tool call duration | +| `retriever.name` | Retriever name | +| `retrieval.query` | Retriever query | +| `retrieval.results.count` | Retriever result count | +| `retrieval.top_score` | Highest numeric result score when available | +| `retriever.input.*` | Structured retriever input metadata | +| `retriever.output` | Retriever output preview using the configured trace payload policy | +| `retriever.output.*` | Structured retriever output metadata | +| `context.tokens.estimated_input` | Estimated context input tokens per Agent step event | +| `context.tokens.uncompressed_estimated` | Estimated uncompressed context tokens per Agent step event | +| `context.compression.calls` | Compression calls per Agent step event | +| `context.compression.cache_hits` | Compression cache hits per Agent step event | +| `context.compression.ratio` | Compression ratio per Agent step event | + +## Metrics + +| Metric | Description | +|--------|-------------| +| `llm.request.duration` | Request latency | +| `llm.token.generation_rate` | Tokens per second | +| `llm.time_to_first_token` | TTFT | +| `llm.token_count.prompt` | Input tokens | +| `llm.token_count.completion` | Output tokens | +| `agent.step.count` | Agent step count | +| `agent.execution.duration` | Agent execution time | +| `agent.error.count` | Agent errors | + +## Collector Configuration + +By default, the OpenTelemetry Collector only logs data through the debug exporter. This avoids forwarding data back into itself when no external backend is configured. To forward through the Collector, add a platform exporter: + +```yaml +exporters: + otlphttp/langsmith: + traces_endpoint: https://api.smith.langchain.com/otel/v1/traces + headers: + x-api-key: YOUR_LANGSMITH_API_KEY + Langsmith-Project: nexent + +service: + pipelines: + traces: + exporters: [otlphttp/langsmith, debug] ``` -### Clean Prometheus Metrics Data -```bash -# Restart Prometheus container -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-prometheus +See `docker/monitoring/otel-collector-config.yml` for full configuration with platform examples. -# Completely clean Prometheus data -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-prometheus -docker volume rm docker_prometheus_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-prometheus -``` +## Graceful Degradation -### Clean Grafana Configuration -```bash -# Reset Grafana configuration and dashboards -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-grafana -docker volume rm docker_grafana_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-grafana +When OpenTelemetry dependencies are not installed, monitoring gracefully disables: + +```python +pip install nexent # Basic package - no monitoring +pip install nexent[performance] # With OTLP support ``` -## 📈 Typical Problem Analysis +All monitoring methods work without errors when disabled - decorators pass through, context managers yield None. -### Slow token generation (< 5 tokens/s) -1. **Analysis**: Grafana → Token Generation Rate panel -2. **Solution**: Check model service load, optimize input prompt length +## Troubleshooting -### Slow request response (> 10s) -1. **Analysis**: Jaeger → View complete trace chain -2. **Solution**: Locate bottleneck (database/LLM/network) +### No data appearing -### Error rate spike (> 10%) -1. **Analysis**: Prometheus → llm_error_count metric -2. **Solution**: Check model service availability, verify API keys +1. Check `ENABLE_TELEMETRY=true` in `.env` +2. Verify OTLP endpoint is reachable +3. Check authentication headers are correct -## 🎉 Getting Started +### Connection errors -After setup completion, you can: +1. Test endpoint: `curl -v $OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces` +2. Verify protocol matches endpoint (`http` vs `grpc`) +3. Check Collector logs: `docker logs nexent-otel-collector` -1. 📊 View **LLM Performance Dashboard** in Grafana -2. 🔍 Trace complete request chains in Jaeger -3. 📈 Analyze token generation speed and performance bottlenecks -4. 🚨 Set performance alerts and thresholds +### Wrong attributes -Enjoy efficient LLM performance monitoring! 🚀 +1. Verify OpenInference attributes in platform UI +2. Check span attribute naming: `llm.model_name` not `model_name` +3. Review platform-specific attribute requirements diff --git a/doc/docs/en/user-guide/agent-development.md b/doc/docs/en/user-guide/agent-development.md index db2614f7d..8e6b47d4f 100644 --- a/doc/docs/en/user-guide/agent-development.md +++ b/doc/docs/en/user-guide/agent-development.md @@ -31,15 +31,98 @@ You can configure other collaborative agents for your created agent, as well as ### 🤝 Collaborative Agents +Collaborative agents help the current agent complete complex tasks. The sources of collaborative agents are divided into two categories: + +- **Internal Agents**: Published agents on the platform +- **External A2A Agents**: Third-party agents discovered through the A2A protocol + 1. Click the plus sign under the "Collaborative Agent" tab to open the selectable agent list -2. Select the agents you want to add from the dropdown list -3. Multiple collaborative agents can be selected -4. Click × to remove an agent from the selection +2. The agent list is divided into two tabs: "Internal Agent" and "External A2A Agent". You can choose based on your needs +3. Select the agent you want to add from the dropdown list +4. Multiple collaborative agents can be selected +5. Click × to remove an agent from the selection + +
+ +
+ +#### 🌐 Add External A2A Agents + +Nexent supports communication with third-party agents through the A2A protocol. You can discover external A2A agents in the following two ways: + +##### Discover Agent via URL + +If you know the Agent Card address of the target agent, you can use the URL discovery method:
- +
+1. In the External A2A Agent list, click the "Add External Agent" button +2. Select the "URL Discovery" tab +3. Fill in the Agent Card URL address, for example: `https://example.com/.well-known/agent.json` +4. Click the "Discover" button; the system will automatically retrieve the agent's related information +5. After successful discovery, you can view the agent's name, description, capabilities and other information +6. Click "Add to List" to complete the addition + +> 💡 **Tip**: The Agent Card is an Agent description file that complies with the A2A 1.0 specification, containing the agent's name, description, calling address, capabilities and other information. + +##### Discover Agent via Nacos + +If your agent is registered with the Nacos service discovery platform, you can use the Nacos discovery method: + +
+ +
+ +1. In the External A2A Agent list, click the "Add External Agent" button +2. Select the "Nacos Discovery" tab +3. For first-time use, you need to configure the Nacos connection information: + - **Nacos Server Address**: Fill in the Nacos server address, such as `http://127.0.0.1:8848` + - **Namespace ID**: Fill in the Nacos namespace ID (optional) + - **Group Name**: Fill in the service group name, default is `DEFAULT_GROUP` + - **Username/Password**: Fill in the Nacos access credentials (optional) +4. Click "Save Configuration" to save the Nacos connection information +5. Fill in the Agent service name to scan +6. Click the "Scan" button; the system will obtain matching Agent information from Nacos +7. The scan results will list all matching Agents. You can select the agents you need and add them to the list + +> ⚠️ **Note**: Make sure the Nacos service is running properly and the target Agent is correctly registered with Nacos. + +##### Manage Discovered External Agents + +In the External A2A Agent list, you can view and manage all discovered external agents: + +
+ +
+ +1. **View Agent Details**: Click on the agent card to view its complete information, including name, description, URL, capability list, etc. +2. **Test Agent**: Click the "Test" button to send a test message to the agent and verify if it is working properly +3. **Chat with Agent**: Click the "Chat" button to open a chat window and interact with the agent in real time +4. **Configure Calling Protocol**: Click the "Protocol Configuration" button to select the calling protocol for this agent: + - **HTTP + JSON**: Use REST API style calls + - **JSON-RPC**: Use JSON-RPC protocol calls +5. **Refresh Agent Information**: If the agent information changes, click the "Refresh" button to re-fetch the latest Agent Card +6. **Remove Agent**: Click the "Remove" button to delete the agent from the discovered list + +> 💡 **Use Cases**: +> - Quickly integrate known third-party agent services through URL discovery +> - Batch integrate all agents from the same service registry through Nacos discovery +> - Configure protocols to meet the requirements of different agent service providers + +###### Integrate [DataAgent](https://gitcode.com/datagallery/dataagent) A2A Agent via URL + +1. Refer to the [DataAgent documentation](https://gitcode.com/datagallery/dataagent#%F0%9F%8C%90-a2a-10-%E6%9C%8D%E5%8A%A1%E6%A8%A1%E5%BC%8F) and start DataAgent in A2A service mode. + > Nexent does not currently support agents that require authentication. Do not set `auth-token` when starting DataAgent. + +
+ +
+ +2. Refer to [Discover Agent via URL](#discover-agent-via-url) to integrate the agent. The URL is `http://:9999/.well-known/agent-card.json`. +3. Refer to [Manage Discovered External Agents](#manage-discovered-external-agents) to configure the invocation protocol, and select HTTP + JSON for integration. + ### 🛠️ Select Agent Tools Agents can use various tools to complete tasks, such as knowledge base search, file parsing, image parsing, email sending/receiving, file management, and other local tools. They can also integrate third-party MCP tools or custom tools. @@ -60,6 +143,8 @@ Agents can use various tools to complete tasks, such as knowledge base search, f > 2. Please select the `analyze_text_file` tool to enable the parsing function for document and text files. > 3. Please select the `analyze_image` tool to enable the parsing function for image files. > +> ⚠️ **Embedding Model Configuration**: When using the `knowledge_base_search` tool, ensure that the knowledge base has an embedding model configured. For existing knowledge bases, the system will prompt you to select an embedding model. Make sure to select **the same embedding model used when creating the knowledge base**. If the selected model differs from the one used during knowledge base creation, it may cause search failures or inaccurate results. +> > 📚 Want to learn about all the built-in local tools available in the system? Please refer to [Local Tools Overview](./local-tools/index.md). ### 🔌 Add MCP Tools @@ -108,6 +193,39 @@ You can add MCP services to Nexent in the following two ways: Many third-party services such as [ModelScope](https://www.modelscope.cn/mcp) provide MCP services, which you can quickly integrate and use. You can also develop your own MCP services and connect them to Nexent; see [MCP Tool Development](../backend/tools/mcp). +**3️⃣ Convert Stock API to MCP Service** + +🔔 This method is suitable for quickly converting existing REST API endpoints into MCP tools without additional development, allowing agents to call existing API capabilities: + +>1. In the MCP Config module, select **"API to MCP"** as the access type +> +>2. Fill in the API basic information in the input box below: +> - **Service Name**: Display name for the MCP service +> - **OpenAPI JSON**: OpenAPI 3.x specification in JSON format +> - **Base Service URL**: Base address of the API service (supports http/https) +> +>3. Click the **+ Add** button in the lower right corner to complete the MCP service conversion + +
+ +
+ +>4. After conversion, you can view all externally converted MCP tools in the **Outer APIs** tab + +
+ +
+ +
+ +
+ +>💡 **Use Cases**: +>- Quickly integrate internal enterprise REST API endpoints +>- Convert third-party service HTTP APIs into MCP tools +>- Generate tools directly from OpenAPI specifications without writing MCP Server code + + ### ⚙️ Custom Tools You can refer to the following guides to develop your own tools and integrate them into Nexent to enrich agent capabilities: @@ -129,7 +247,7 @@ Nexent provides a "Tool Testing" capability for all types of tools—whether the - The test `query`, such as "benefits of vitamin C" - The search `search_mode` (default is `hybrid`) - The target index list `index_names`, such as `["Medical", "Vitamin Encyclopedia"]` - - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page + - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page 6. After entering the parameters, click "Execute Test" to start the test and view the test results below
@@ -181,6 +299,134 @@ After completing the initial agent configuration, you can debug the agent and fi After successful debugging, click the "Save" button in the lower right corner, and the agent will be saved and appear in the agent list. +## 📋 Version Management + +Nexent supports agent version management. You can save different versions of agent configurations during the debugging process. + +Once the agent configuration is verified, you can publish the agent. After publishing, the agent will be visible in the Agent Space and Start Chat pages. + +![Version Management 1](./assets/agent-development/version_management_1.png) + +If you need to rollback to a previous version, click the "Rollback" button on the version management page. + +![Version Management 2](./assets/agent-development/version_management_2.png) + +### 🚀 Publish as A2A Agent + +Nexent supports exposing published agents as A2A Agents for external systems to call. When publishing a version, you can check the "Publish as A2A Agent" option to register the current agent as an A2A 1.0 compliant Agent. + +
+ +
+ +After successful publishing, the system will display the A2A Agent's call information: + +
+ +
+ +| Field | Description | +|-------|-------------| +| **Endpoint ID** | Unique identifier for the A2A Agent | +| **Agent Card URL** | Agent discovery endpoint; external systems use this address to retrieve Agent descriptions | +| **Protocol Version** | A2A protocol version; currently 1.0 | +| **REST Endpoints** | REST-style API endpoints | +| **JSON-RPC Endpoint** | JSON-RPC 2.0 protocol calling endpoint | + +#### Calling Methods + +The published A2A Agent supports the following two calling protocols: + +##### REST API + +```bash +# Get Agent Card (for Agent discovery) +GET /nb/a2a/{endpoint_id}/.well-known/agent-card.json + +# Send synchronous message +POST /nb/a2a/{endpoint_id}/message:send +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "Please help me complete a task" + } +} + +# Send streaming message (SSE) +POST /nb/a2a/{endpoint_id}/message:stream +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "Please help me complete a task" + } +} + +# Get task status +GET /nb/a2a/{endpoint_id}/tasks/{task_id} +``` + +##### JSON-RPC 2.0 + +```bash +POST /nb/a2a/{endpoint_id}/v1 +Content-Type: application/json + +# Send synchronous message +{ + "jsonrpc": "2.0", + "method": "SendMessage", + "params": { + "message": { + "role": "user", + "content": "Please help me complete a task" + } + }, + "id": 1 +} + +# Send streaming message +{ + "jsonrpc": "2.0", + "method": "SendStreamingMessage", + "params": { + "message": { + "role": "user", + "content": "Please help me complete a task" + } + }, + "id": 2 +} + +# Get task status +{ + "jsonrpc": "2.0", + "method": "GetTask", + "params": { + "taskId": "task_abc123" + }, + "id": 3 +} +``` + +> 💡 **Tips**: +> - For local development, replace the `/nb/a2a` prefix with `http://localhost:5013/nb/a2a` +> - For production environments, replace the prefix with your server domain name or public IP address + +> ⚠️ **Notes**: +> - Calling A2A Agents requires carrying valid authentication information in the request headers +> - Agent Card information is cached with a refresh interval of 1 hour +> - If you need to update Agent information, you need to republish the agent version + +When an agent is published as an A2A-compliant Agent, users can view the detailed A2A Agent calling information by clicking the button shown below in the agent list: + +
+ +
+ ## 📋 Manage Agents In the agent list on the left, you can perform the following operations on existing agents: diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg new file mode 100644 index 000000000..399af1c56 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg new file mode 100644 index 000000000..5c523f7b1 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg new file mode 100644 index 000000000..4c42104ec Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg new file mode 100644 index 000000000..fdfa2e826 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg new file mode 100644 index 000000000..5c523f7b1 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg new file mode 100644 index 000000000..4632206fb Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png new file mode 100644 index 000000000..2cce2a44a Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png differ diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png new file mode 100644 index 000000000..12e9358c5 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png differ diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png new file mode 100644 index 000000000..4221b41f5 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png differ diff --git a/doc/docs/en/user-guide/assets/agent-development/dataagent_deploy.png b/doc/docs/en/user-guide/assets/agent-development/dataagent_deploy.png new file mode 100644 index 000000000..46fa9fde3 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/dataagent_deploy.png differ diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg new file mode 100644 index 000000000..fdfa2e826 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png deleted file mode 100644 index 7f47ba1a2..000000000 Binary files a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png and /dev/null differ diff --git a/doc/docs/en/user-guide/knowledge-base.md b/doc/docs/en/user-guide/knowledge-base.md index e5e5714ff..05456e5fa 100644 --- a/doc/docs/en/user-guide/knowledge-base.md +++ b/doc/docs/en/user-guide/knowledge-base.md @@ -26,12 +26,14 @@ Create and manage knowledge bases, upload documents, and generate summaries. Kno ### Supported File Formats Nexent supports multiple file formats, including: -- **Text:** .txt, .md +- **Text:** .txt, .md, .csv, .json - **PDF:** .pdf - **Word:** .docx - **PowerPoint:** .pptx +- **EPUB:** .epub - **Excel:** .xlsx - **Data files:** .csv +- **Web content:** .html, .xml ## 📊 Knowledge Base Summary diff --git a/doc/docs/en/user-guide/local-tools/index.md b/doc/docs/en/user-guide/local-tools/index.md index 27dc72ebc..9006f415c 100644 --- a/doc/docs/en/user-guide/local-tools/index.md +++ b/doc/docs/en/user-guide/local-tools/index.md @@ -9,6 +9,8 @@ Local tools let agents interact with the workspace, remote hosts, and external s - [Search Tools](./search-tools): Local/DataMate KB search plus Exa/Tavily/Linkup web search. - [Multimodal Tools](./multimodal-tools): Download/parse/analyze text files and images. - [Terminal Tool](./terminal-tool): Persistent SSH sessions for remote commands. +- [SQL Tools](./sql-tools): Connect to MySQL, PostgreSQL, SQL Server to execute SQL queries. +- [Skills](../skills): Nexent's built-in tool combinations or custom capability packs with NL generation and version management. ## ⚙️ Configuration Entry diff --git a/doc/docs/en/user-guide/local-tools/multimodal-tools.md b/doc/docs/en/user-guide/local-tools/multimodal-tools.md index 6780f5f1e..986682c40 100644 --- a/doc/docs/en/user-guide/local-tools/multimodal-tools.md +++ b/doc/docs/en/user-guide/local-tools/multimodal-tools.md @@ -4,18 +4,22 @@ title: Multimodal Tools # Multimodal Tools -Multimodal tools analyze text files and images with model support. URLs can be S3, HTTP, or HTTPS. +Multimodal tools analyze text files, images, videos, and audio with model support. URLs can be S3, HTTP, or HTTPS. ## 🧭 Tool List - `analyze_text_file`: Download and extract text, then analyze per question - `analyze_image`: Download images and interpret them with a vision-language model +- `analyze_video`: Download videos and analyze them with a video understanding model +- `analyze_audio`: Download audio and analyze it with an audio understanding model ## 🧰 Example Use Cases - Summarize documents stored in buckets - Explain screenshots, product photos, or chart images -- Produce per-file or per-image answers aligned with the input order +- Understand video content, such as extracting key frame information, human actions, or scene descriptions +- Analyze audio content, such as transcription, speaker identification, or content summarization +- Produce per-file or per-image/video/audio answers aligned with the input order ## 🧾 Parameters & Behavior @@ -29,16 +33,26 @@ Multimodal tools analyze text files and images with model support. URLs can be S - `query`: User focus/question. - Downloads each image, runs VLM analysis, and returns an array matching input order. +### analyze_video +- `video_url`: Video URL (`s3://bucket/key`, `/bucket/key`, `http(s)://`). +- `query`: User focus/question. +- Downloads the video, runs video understanding model analysis, and returns the result. + +### analyze_audio +- `audio_url`: Audio URL (`s3://bucket/key`, `/bucket/key`, `http(s)://`). +- `query`: User focus/question. +- Downloads the audio, runs audio understanding model analysis, and returns the result. + ## ⚙️ Prerequisites - Configure storage access (e.g., MinIO/S3) and data processing service to fetch files. -- Provide an LLM for `analyze_text_file` and a VLM for `analyze_image`. +- Provide an LLM for `analyze_text_file`, a VLM for `analyze_image`, and a video understanding model for `analyze_video` and `analyze_audio` (must support audio/video input, e.g., Qwen3-Omni series). ## 🛠️ How to Use -1. Prepare accessible URLs and confirm permissions. -2. Call the corresponding tool with the URL list and question; multiple resources are supported at once. -3. Use results in the same order as inputs for display or follow-up steps. +1. Prepare accessible URLs for files, images, videos, or audio; confirm permissions. +2. Call the corresponding tool with the URL and question; multiple resources are supported at once. +3. Verify results before using them in follow-up steps. ## 💡 Best Practices diff --git a/doc/docs/en/user-guide/local-tools/sql-tools.md b/doc/docs/en/user-guide/local-tools/sql-tools.md new file mode 100644 index 000000000..859b5fbba --- /dev/null +++ b/doc/docs/en/user-guide/local-tools/sql-tools.md @@ -0,0 +1,78 @@ +--- +title: SQL Database Tools +--- + +# SQL Database Tools + +The SQL database toolset enables AI agents to connect to and query relational databases such as MySQL, PostgreSQL, and SQL Server, allowing direct data access and manipulation. + +## Tool List + +- `mysql_database`: Connect to MySQL and execute SQL queries +- `postgres_database`: Connect to PostgreSQL and execute SQL queries +- `mssql_database`: Connect to SQL Server and execute SQL queries + +## Usage Scenarios + +- Query report data from business databases for agent analysis and summarization +- Cross-database joins to retrieve related information scattered across multiple tables +- Real-time queries of business status to provide agents with up-to-date data + +## Parameters and Behavior + +### Common Parameters + +- `sql`: The SQL query to execute (required) +- `parameters`: Parameter values for parameterized queries (optional) +- `max_rows`: Maximum number of rows to return (default: 100) +- `timeout`: Query timeout in seconds (default: 10) + +### Database Connection Parameters + +| Database | Connection Parameters | +|-------------|---------------------------------------------------------------------------| +| MySQL | `host`, `user`, `password`, `database`, `port` (default 3306) | +| PostgreSQL | `host`, `user`, `password`, `database`, `port` (default 5432) | +| SQL Server | `host`, `user`, `password`, `database`, `port` (default 1433) | + +### Security Restrictions + +- Forbidden operations: `DROP DATABASE`, `GRANT`, `REVOKE`, `CREATE USER`, `INTO OUTFILE`, `LOAD DATA INFILE` +- `UPDATE` and `DELETE` statements must include a `WHERE` clause +- `LIMIT` is automatically added to restrict returned rows + +### Response Format + +```json +{ + "status": "success", + "columns": ["id", "name", "email"], + "rows": [[1, "John Doe", "john@example.com"]], + "row_count": 1, + "execution_time_ms": 45.23 +} +``` + +## Getting Started + +1. **Prepare connection info**: Obtain host address, port, database name, username, and password +2. **Configure the tool**: Add the appropriate database tool in agent configuration and fill in connection parameters +3. **Test connection**: Use a simple query to verify connectivity +4. **Construct queries**: Let the agent understand natural language requirements and generate corresponding SQL + +## Security Best Practices + +- Use read-only accounts in production to limit operation permissions +- Store sensitive information like database passwords in a key management service +- Set reasonable `max_rows` values to avoid returning excessive data at once +- Enable SSL/TLS encryption for database connections + +## Common Database Connection Examples + +| Database | Connection Example | Parameter Placeholder | +|-------------|-------------------|---------------------| +| MySQL | `localhost:3306` | `?` | +| PostgreSQL | `localhost:5432` | `$1, $2, ...` | +| SQL Server | `localhost:1433` | `?` | + +> Note: Different databases use different parameter placeholder formats. PostgreSQL uses `$1, $2`, while others use `?`. diff --git a/doc/docs/en/user-guide/mcp-tools.md b/doc/docs/en/user-guide/mcp-tools.md index b55859cbe..cd1190e0e 100644 --- a/doc/docs/en/user-guide/mcp-tools.md +++ b/doc/docs/en/user-guide/mcp-tools.md @@ -1,28 +1,159 @@ # MCP Tools -The upcoming MCP Tools management module will let you centrally manage MCP servers and tools on a single page, easily completing connection configuration, tool synchronization, and health status monitoring. +In the MCP Tools module, you can centrally manage all MCP (Model Context Protocol) servers and tools. It supports custom addition, Registry import, and Community import, covering connection configuration, tool synchronization, health monitoring, and community sharing. -## 🎯 Feature Preview +The MCP Tools page has two parallel tabs: -1. Register and manage multiple MCP servers -2. Quickly sync, view, and organize MCP tool lists -3. Monitor MCP connection status and usage in real time +- **Imported Services**: Manage MCP services already accessed by the current tenant — configure, monitor, and maintain your MCP services here. +- **Published Services**: Manage the MCP services you have published to the community — browse, edit, and unpublish. -## ⏳ Stay Tuned +--- -The MCP Tools management feature is under development. We are committed to building an efficient and intuitive management platform that enables you to: +## ➕ Add MCP Services -1. Centrally manage all MCP servers -2. Conveniently sync and organize tools -3. Monitor server connections and tool runtime status in real time +Click the **Add MCP Service** button to open the add dialog. The dialog provides three tabs, each corresponding to a different source. -## 🚀 Related Features +### Local Add -While waiting for **MCP Tools** to launch, you can: +The **Local Add** tab lets you manually configure an MCP service with two transport types. -1. Manage your MCP tools in **[Agent Development](./agent-development)** -2. View agent and MCP collaboration relationships through **[Agent Space](./agent-space)** -3. Experience platform features in **[Start Chat](./start-chat)** +#### Add via URL -If you encounter any issues during use, please refer to our **[FAQ](../quick-start/faq)** or ask for support in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions). +For independently deployed MCP services (HTTP / SSE), connect by entering the endpoint URL. + +1. In the **Local Add** tab, set **Transport Type** to "URL" +2. Fill in the service details: + - **Service Name (required)**: A recognizable name for the MCP service + - **Service URL (required)**: The MCP service endpoint address + - **Description** (optional): A brief description of the service + - **Authorization Token** (optional): Bearer token if the service requires authentication +3. Click **Confirm** — the system will connect to the service and retrieve the available tool list + +#### Add via Container Configuration + +For MCP services that need to run locally in a container (e.g., services launched via npx), the system automatically creates and manages a container based on your JSON configuration. + +1. In the **Local Add** tab, set **Transport Type** to "Container" +2. Fill in the container configuration: + - **Service Name (required)**: A recognizable name for the MCP service + - **Description** (optional): A brief description of the service + - **Container Configuration JSON (required)**: Enter the standard MCP configuration format, for example: + ```json + { + "mcpServers": { + "service-name": { + "args": ["mcp-package-name@version"], + "command": "npx", + "env": { + "API_KEY": "xxxx" + } + } + } + } + ``` + - **Port**: The port exposed by the container service — the system automatically detects port conflicts and suggests available ports +3. Click **Confirm** — the system parses the JSON, creates the container, and registers the service + +### Import from MCP Registry + +Nexent integrates with the MCP Registry, allowing you to browse and import community-maintained MCP services in one click. + +1. Switch to the **MCP Registry** tab +2. Browse the available MCP services — search by name or tags +3. Click a service to view its details (description, version, required parameters, etc.) +4. Configure required parameters (e.g., API Key and other environment variables) +5. Click **Import** — the system automatically installs and configures the service + +### Import from Community + +Browse MCP services published by other Nexent users and quickly import them. + +1. Switch to the **Community Market** tab +2. Browse published community MCP services — filter by name, tags, or transport type +3. Click a service to view details, then click **Import** to add it to your service list + +--- + +## 📋 Imported Services + +The **Imported Services** tab displays all MCP services accessed by the current tenant as cards. View, edit, monitor, and publish your services here. + +### View & Filter + +Each service card shows: + +- Service name and description +- Source indicator (Custom / Registry / Community) +- Enable / Disable toggle +- Tags + +Use the filter bar at the top to filter by **Source**, **Transport Type**, and **Tags**, or use the search box to quickly locate services by name. + +### Edit Service Details + +Click any service card to open the detail modal, where you can: + +- **Edit basic info**: Modify name, description, URL, Authorization Token, and tags +- **Enable / Disable**: Toggle the service on or off — tools from a disabled service will not appear in agent tool selection +- **Delete**: Remove the MCP service record — containerized services will also have their container resources cleaned up + +### View Tool List + +In the service detail modal, click **Tool List** to view all tools provided by this MCP service. + +### Health Check + +Click the **Health Check** button in the detail modal to test the connection to the MCP service. Possible statuses: + +- **Healthy**: The service is reachable +- **Unhealthy**: The service cannot be reached or responded abnormally +- **Unchecked**: A health check has not been performed yet + +### Container Management + +For containerized MCP services, the detail modal also provides: + +- **View Container Logs**: Real-time logs from the running container for troubleshooting +- **View Container Config**: The configuration JSON used when creating the container + +### Publish to Community + +In the service detail modal, click **Publish to Community**: + +1. Review or edit the publication info (name, description, tags, etc.) +2. Click **Confirm Publish** — the service will be published to the community +3. Other users can then browse and import it from the **Community Market** tab in the add dialog + +--- + +## 🌐 Published Services + +The **Published Services** tab shows all MCP services you have published to the community. Manage your published content here. + +Each card shows the service name, description, version, and tags. Filter by name, tags, and transport type. + +Click a service card to view details, where you can: + +- **Edit published service**: Modify the published service's name, description, and tags +- **Delete published service**: Withdraw the service from the community — it will no longer be visible to other users + +--- + +## 🔗 Integrating with Agents + +Once an MCP service is added, its tools are automatically synced to the agent tool selection list. When configuring an agent on the **[Agent Development](./agent-development)** page: + +1. In the **Select Agent Tools** tab, locate the corresponding MCP service group +2. Click a tool name to enable it +3. Click ⚙️ to view the tool description and configure its parameters + +## 🚀 Next Steps + +After configuring MCP services, we recommend: + +1. **[Agent Development](./agent-development)** — Assign MCP tools to your agents +2. **[Agent Space](./agent-space)** — View collaboration between agents and MCP services +3. **[Start Chat](./start-chat)** — Experience agents calling MCP tools in conversations + +If you encounter any issues, please refer to our **[FAQ](../quick-start/faq)** or ask for support in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions). diff --git a/doc/docs/en/user-guide/skills.md b/doc/docs/en/user-guide/skills.md new file mode 100644 index 000000000..0cdc2a288 --- /dev/null +++ b/doc/docs/en/user-guide/skills.md @@ -0,0 +1,572 @@ +--- +title: Skill Management +--- + +# Skill Management + +A Skill is a core mechanism in Nexent for extending agent capabilities. Each skill packages multiple tools with usage documentation into a reusable unit of capability, enabling agents to handle complex tasks like assembling building blocks — without consuming excessive context space. + +## Table of Contents + +- [Skills vs. Tools](#-skills-vs-tools): Understanding the core concepts +- [Using Skills](#-using-skills): How to use skills in agent development +- [Skill Management](#-skill-management): Create, edit, import, and export skills +- [Skill Upload Guide](#-skill-upload-guide): SKILL.md format, ZIP structure, special tags, and writing standards +- [NL-to-Skill](#-nl-to-skill): Automatically generate skills from natural language descriptions +- [Official Skills Overview](#-official-skills-overview): Built-in skills and their capabilities + +## The Relationship Between Skills and Tools + +In Nexent, **Tools** and **Skills** are two distinct layers. Understanding their differences helps you configure agent capabilities more effectively. + +A **Tool** is a single atomic operation the agent can call, such as `read_file` or `tavily_search`. When a tool is enabled for an agent, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for the current conversation, the LLM still consumes context tokens to "see" it. + +A **Skill** bundles the capabilities of multiple tools into a complete workflow, complete with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance. Based on the user's actual needs, it decides whether to activate a skill. Only when activated does the system load the corresponding toolset — effectively saving Token consumption. + +| Dimension | Tool | Skill | +|-----------|------|-------| +| Granularity | Single atomic operation | Bundle of multiple tools + configuration + documentation | +| Token consumption | Occupies context on every turn | Loaded only when activated | +| Parameters | Fixed parameter schema | Customizable parameter templates | +| Versioning | No version management | Supports draft/published versions | +| Distribution | Code-level | ZIP package distribution, plug-and-play | + +**Analogy**: Tools are individual items like a screwdriver, hammer, or saw. A Skill is a toolbox — with tools pre-matched for a work scenario and accompanied by usage instructions. Open the right toolbox for the task at hand. + +## Using Skills + +### Configuring Skills for an Agent + +1. Open the **[Agent Development](./agent-development)** page +2. On the "Select Tools" tab, find the **Skills** group +3. Click a skill name to select it; click again to deselect +4. After selecting a skill, click the ⚙️ button next to it to configure skill parameters +5. Save the agent configuration + +
+ +
+ +> 💡 **Tip**: If a skill has required parameters that are not configured, a guided parameter-filling prompt will appear upon selection. + +### Skill Parameters + +Each skill's parameter definitions come from the `config/schema.yaml` file in the skill package. The configuration interface auto-generates a parameter form based on the schema, including: + +- **Parameter name and description** (bilingual: English and Chinese) +- **Required/optional markers** +- **Default values** +- **Parameter types** (string, number, boolean, array, object) +- **YAML comment auto-mapped tooltips** + +### Skill Versions + +Each skill supports multi-version management: + +- **Draft version (version=0)**: Development and debugging stage; changes take effect immediately +- **Published version (version>=1)**: Production use; parameters are locked + +When configuring the same skill for different agents, you can set different parameter values independently. + +## Skill Management + +### Viewing Installed Skills + +The "Select Tools" skill group displays all installed skills, including: +- Official skills (`official` source) +- Custom skills (`custom` source) + +### Creating Custom Skills + +Nexent supports two ways to create custom skills: uploading a skill package file, or generating one automatically from a natural language description. + +#### Method 1: Upload SKILL.md or ZIP + +1. Go to the skill configuration interface +2. Click the "Upload Skill" button +3. Select a `SKILL.md` file (single file) or a `.zip` package (complete skill package) +4. The system automatically parses and creates the skill + +#### Method 2: NL-to-Skill Natural Language Creation + +Click the **"NL Create Skill"** button on the skill management page. See the [NL-to-Skill](#-nl-to-skill) section below for details. + +### Editing Skills + +1. Find the target skill in the skill list +2. Click the skill card to enter the edit page +3. Modify the skill name, description, tags, parameter configuration, etc. +4. Save changes + +### Importing/Exporting Skills + +- **Export**: Click "Export" on the skill detail page to download as a JSON configuration file +- **Import**: Click "Import Skill" on the Agent Development page to upload a JSON configuration file + +> ⚠️ **Note**: When importing skills containing knowledge base tools (such as `knowledge_base_search`), these tools will only search **knowledge bases that the currently logged-in user is permitted to access in this environment**. The original skill's knowledge base configuration will not be automatically inherited. + +## Skill Upload Guide + +### Skill Package Structure + +A skill can be a single file or a ZIP package containing multiple files: + +``` +skill-name/ +├── SKILL.md # Skill definition file (required) +├── config/ +│ ├── config.yaml # Default parameter values +│ └── schema.yaml # Parameter types and descriptions +├── scripts/ +│ └── *.py # Python scripts +├── examples.md # Usage examples +└── assets/ # Static assets +``` + +### SKILL.md Format in Detail + +`SKILL.md` is the core file of a skill, consisting of a YAML frontmatter section and a body section. + +**YAML Frontmatter (required)** + +The file must start with YAML frontmatter: + +```yaml +--- +name: skill-name +description: | + A description of what this skill does and when to use it. + Write in third person. +tags: + - tag1 + - tag2 +--- +``` + +| Field | Required | Description | Example | +|-------|----------|-------------|---------| +| `name` | Yes | Skill name; English only, lowercase, hyphenated | `github-repo-analyzer` | +| `description` | Yes | Skill function description; 1-3 sentences, include use case | `This skill analyzes GitHub repositories and extracts key metrics` | +| `tags` | No | Skill tag list for categorization and search | `["code", "github", "analysis"]` | +| `allowed-tools` | No | List of allowed tools (all available by default) | `[file_read, web_search]` | +| `always` | No | Whether to auto-activate on every turn (default: false) | `false` | + +**Body (optional)** + +Below the frontmatter, you can write Markdown content including usage instructions, best practices, example code, and more. + +### Two Skill Types + +Based on their purpose, skills fall into two categories with different writing styles: + +**Tool Skills**: Used to expose tool capabilities. The body should include tool parameter descriptions, usage examples, return formats, and error handling. + +**Agent Skills**: Used to teach the agent how to perform a complex task. The body should include workflow instructions, domain knowledge, boundary conditions, and best practices. + +### config/schema.yaml: Defining Parameter Forms + +If a skill requires user-supplied parameters, create a `config/schema.yaml` file. The system will auto-generate a parameter configuration form in the frontend based on this file. + +```yaml +param_name: + type: string | number | boolean | array | object + required: true | false + default: + description: "English description of the parameter" + description_zh: "Chinese description of the parameter" +``` + +**Supported types**: `string`, `number`, `boolean`, `array`, `object` + +**Complete example**: + +```yaml +query: + type: string + required: true + description: "Search query string" + description_zh: "Search keyword" + default: "" + +top_k: + type: number + required: false + description: "Number of results to return" + description_zh: "Number of returned results" + default: 3 + +enable_rerank: + type: boolean + required: false + description: "Enable result reranking" + description_zh: "Whether to enable result reranking" + default: false +``` + +### config/config.yaml: Setting Parameter Defaults + +If you want certain parameters to have default values, create `config/config.yaml`: + +```yaml +# Initial workspace path +init_path: "/mnt/nexent" + +# Maximum number of results +top_k: 5 +``` + +### Special Tags + +You can use the following special tags in the SKILL.md body: + +#### ``: Lazy-loading Example Files + +Use the `` tag to reference external files. The referenced file is loaded only when needed, keeping the main `SKILL.md` file lightweight. + +```markdown +## Example Reference + +> **Note**: Only load the reference example file when the default Usage examples cannot meet your needs. + + +``` + +#### ``: Declaring Bundled Scripts + +If the skill package contains Python or Shell scripts, declare them in `SKILL.md`: + +```markdown + +``` + +#### ``: Displaying Executable Code Examples + +Use the `` tag to wrap executable code examples (usually Python code): + +```markdown + +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py", "--verbose": True} +) +print(result) + +``` + +### Helper Functions + +In agent skill bodies and examples, you can use the following functions: + +**`run_skill_script(skill_name, script_path, params)`**: Execute a script bundled in the skill package + +```python +# Execute a Python script +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py"} +) + +# Execute a Shell script +result = run_skill_script( + "database-migration", + "scripts/migrate.sh", + {"--direction": "up", "--steps": 1} +) +``` + +**`read_skill_md(skill_name, files)`**: Read files from the skill package + +```python +# By default, only reads SKILL.md (referenced files are not auto-included) +content = read_skill_md("my-skill") + +# Explicitly specify which files to read +full_content = read_skill_md("my-skill", [ + "SKILL.md", + "reference/api-reference.md" +]) +``` + +### Writing Standards and Best Practices + +**SKILL.md Writing Standards**: + +1. **Be specific**: Explain when to use the skill, not just what it does + - ✓ "Used when you need to analyze GitHub repository popularity metrics" + - ✗ "GitHub search function" + +2. **Avoid time-sensitive information**: Do not include specific dates, version numbers, or other content that will become outdated + +3. **Stay concise**: Keep the `SKILL.md` body under 500 lines. Use `` for complex content that can be lazy-loaded + +4. **Path format**: Always use forward slashes `/`, even on Windows + - ✓ `src/services/payment_service.py` + - ✗ `src\services\payment_service.py` + +5. **Consistent parameter naming**: Use the same terminology and naming style throughout + +6. **Include boundary conditions**: Explain the skill's scope and limitations + +**Parameter Description Best Practices**: + +```yaml +# ✓ Good: Clearly specify purpose and format +query: + type: string + required: true + description: "GitHub repository owner/name or full URL" + description_zh: "GitHub repository in owner/name format or full URL" + +# ✗ Bad: Too vague +query: + type: string + required: true + description: "Search query" + description_zh: "Query" +``` + +**Code Example Best Practices**: + +- Provide at least 2 different-scenario examples for each tool +- Include common parameter combinations in examples +- Demonstrate both successful calls and common error handling + +### Learning from Existing Skills + +The system includes several complete skill reference examples in `test_skill_examples/official-skills/`: + +| Skill Name | Reference Value | +|-----------|-----------------| +| `create-file-directory` | Standard writing for tool skills, with complete parameter tables, usage examples, and error handling tables | +| `search-knowledge-base` | Parameter configuration for search skills, with complete `schema.yaml` and `config.yaml` examples | +| `analyze-image` | Multimodal tool example with `` call format | +| `code_review_expert` | Agent skill reference with bundled scripts and `` tag usage | + +### FAQ + +**Q: Upload reports "SKILL.md not found"** + +Make sure the `SKILL.md` file is in the ZIP package's root directory, not inside a subfolder. + +**Q: Parameter form didn't generate correctly** + +Check that `config/schema.yaml` is formatted correctly. Ensure each field has both `type` and `description` fields. + +**Q: Skill description isn't taking effect** + +The skill description should be written in the YAML frontmatter's `description` field, not in the Markdown body section. Body content is not parsed as the skill description. + +## NL-to-Skill + +NL-to-Skill is an intelligent creation feature provided by Nexent. You simply describe a skill requirement in natural language, and the system automatically generates a complete skill package — including skill definition, parameter configuration, and even accompanying script code. The entire generation process is visible in real time, as if an AI assistant is writing code for you. + +In simple terms: + +> You say "I want a skill that can search GitHub repositories and extract Star counts," and the system automatically generates a complete, usable skill for you. + +### Quick Start + +#### Step 1: Describe Your Requirement + +In the input box, describe the skill you want in natural language. The clearer your description, the better the generated result. + +**Good examples**: +- "Create a skill that searches GitHub repositories by keywords and returns Star counts, descriptions, and links" +- "Create a skill that reads an Excel file, calculates statistics for each column, and generates a chart" +- "Create a skill that extracts order numbers, amounts, and dates from emails and compiles them into a table" + +**Bad examples**: +- "Help me make a chat skill" (too vague) +- "Search tool" (lacks specific capability description) + +#### Step 2: Watch the Generation Process + +After clicking "Generate," the page displays the AI's thinking and writing process in real time: +- See the AI analyzing your requirement +- See it writing the skill definition file +- See it planning the parameter structure + +This process is like watching AI write code live. You can click "Stop" at any time to interrupt. + +#### Step 3: Preview and Save + +After generation completes, the system displays the complete skill content: +- Skill name and description +- Parameter list (what each parameter is, whether required) +- Usage examples + +Check the preview carefully: +- To make adjustments, click "Edit" to fine-tune +- If it meets your expectations, click "Save" to add the skill to your skill library + +### Writing Tips + +#### How to Write a Good Skill Description + +**1. Clarify inputs and outputs** + +Tell the system what information the skill needs and what it will return. + +``` +✓ "Input a GitHub repository address; return the repository name, Star count, Fork count, and last update time" +✗ "Search GitHub" (too vague) +``` + +**2. Explain the use case** + +Help the AI understand in what situations this skill would be used. + +``` +✓ "Used to quickly query the popularity of open-source projects and assist with technical selection decisions" +✗ "Get data" (no context) +``` + +**3. Describe boundary conditions** + +If there are special processing logic or limitations, mention them. + +``` +✓ "If the repository doesn't exist, return a friendly message instead of an error" +✓ "Skip invalid image URLs and log them" +``` + +**4. Explicitly request examples** + +If the skill has complex usage scenarios with high accuracy requirements, explicitly request detailed examples. + +``` +✓ "Generate comprehensive and detailed usage examples" +``` + +#### Usage Scenario Examples + +| Scenario | Description Example | +|---------|-------------------| +| **Data collection** | "Search Zhihu for Q&A related to the keywords and extract summaries of the highest-liked answers" | +| **File processing** | "Upload a CSV file; automatically calculate statistics for each column and generate a line chart" | +| **API encapsulation** | "Create a skill that calls a weather API and returns a three-day forecast" | +| **Multi-tool combination** | "Input a product link; automatically compare prices (calling multiple e-commerce searches) and return the lowest-price link" | +| **Data cleaning** | "Read a messy text block; extract emails, phone numbers, and dates, and format the output" | + +### What You Can Do During Generation + +#### Real-time Preview + +During generation, skill content progressively appears in the preview area: +- `SKILL.md` content: skill definition, description, tags +- `examples.md`: skill usage examples +- `scripts/*.py`: tool scripts (in complex mode) + +#### Stop Anytime + +If the generation direction deviates from expectations: +- Click the "Stop" button; the AI immediately stops +- Existing generated results are preserved; you can review or discard them + +#### Multiple Attempts + +If the first generation result is unsatisfactory: +- Directly add more requirement details; modify based on the existing result +- Or manually adjust in the preview +- If you want to start completely fresh, click the "trash" icon in the upper right corner to clear all skill content + +### Limitations and Notes + +#### Model Capability Affects Quality + +NL-to-Skill uses the LLM model configured for your tenant to generate skills. The model's capability directly determines the generation quality: +- Smarter models accurately understand requirements and generate well-structured, easy-to-understand skills +- Weaker models may produce incomplete or misleading content, affecting agent efficiency and accuracy + +If the generation result is unsatisfactory, try: +1. Simplify the requirement description +2. Switch to a smarter, more capable model +3. Create in steps (make a simple version first, then manually expand) + +#### Token Consumption + +Complex skill generation consumes more tokens: +- **Simple mode**: Usually consumes less; suitable for quick validation +- **Complex mode**: Consumes more; suitable for formally creating complete skills + +It is recommended to first test the idea in simple mode, then use complex mode for formal creation after confirming feasibility. + +#### Not All Requirements Can Be Realized + +NL-to-Skill excels at generating skills for: +- Single tool wrapping (e.g., encapsulating a search capability) +- Simple multi-tool chaining (e.g., search → read → summarize) +- Common data processing flows (e.g., file format conversion, data extraction) + +The following types of skills may be beyond its capabilities: +- Requiring external APIs that are not integrated +- Involving complex state management or concurrency logic +- Requiring access to underlying platform interfaces that are not open + +When encountering requirements that cannot be fulfilled, the system will provide a prompt. You can consider creating manually or contacting technical support. + +#### Modifying Skills + +In the NL-to-Skill interface, you can select an existing skill. After selecting, the skill information loads automatically. You can then use natural language to attempt updating the skill in the left dialog. + +If the skill name you create conflicts with an existing skill, Nexent will automatically switch from skill creation mode to skill update mode. All content will overwrite the original skill. + +## Official Skills Overview + +### File Operations + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `read-file` | Read file content and metadata within the workspace | `read_file` | +| `create-file-directory` | Create files or directories | `create_file`, `create_directory` | +| `delete-file-directory` | Delete files or directories (irreversible) | `delete_file`, `delete_directory` | +| `move-file-directory` | Move or rename files/directories | `move_item` | +| `list-directory` | List directory structure in a tree view | `list_directory` | + +### Knowledge Base Search + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `search-knowledge-base` | Local knowledge base semantic search | `knowledge_base_search` | +| `search-dify` | Dify knowledge base search (supports semantic / keyword / full_text / hybrid modes) | `dify_search` | +| `search-idata` | iData knowledge base search | `idata_search` | +| `search-datamate` | DataMate knowledge base search (with similarity threshold control) | `datamate_search` | + +### Web Search + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `search-web-tavily` | Tavily real-time web search | `tavily_search` | +| `search-web-linkup` | Linkup image and text mixed search | `linkup_search` | +| `search-web-exa` | Exa deep web search | `exa_search` | + +### Multimodal Analysis + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `analyze-image` | VLM-based image content analysis and Q&A | `analyze_image` | +| `analyze-text-file` | PDF/Word/Excel file content extraction and Q&A | `analyze_text_file` | + +### Communication and Remote Operations + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `email-utils` | IMAP receive / SMTP send (supports HTML / CC / BCC) | `get_email`, `send_email` | +| `run-shell-ssh` | Persistent SSH session for remote command execution | `terminal` | + +## Security and Best Practices + +- **Knowledge base access control**: When importing skills containing knowledge base tools, actual search scope is limited by the current user's permissions +- **Web search**: Tavily / Linkup / Exa web search requires the corresponding API Key to be configured in the platform security settings first +- **Path security**: File operations within skill packages are limited to the skill directory scope and cannot access arbitrary system paths +- **Irreversible operations**: Delete and move operations are irreversible; confirm the target before executing +- **NL-to-Skill Token consumption**: Complex skill generation consumes more model tokens; it is recommended to test in simple mode first + +## Related References + +- [Agent Development](./agent-development) +- [Local Tools Overview](./local-tools/index) +- [MCP Tool Configuration](./mcp-tools) +- [Skills System Overview](../backend/skills/overview) diff --git a/doc/docs/en/user-guide/start-chat.md b/doc/docs/en/user-guide/start-chat.md index 9593cb6ec..5834521ea 100644 --- a/doc/docs/en/user-guide/start-chat.md +++ b/doc/docs/en/user-guide/start-chat.md @@ -79,8 +79,8 @@ You can upload files during a chat so the agent can reason over their content: - Or drag files directly into the chat area 2. **Supported File Formats** - - **Documents:** PDF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx) - - **Text:** Markdown (.md), Plain text (.txt) + - **Documents:** PDF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx), EPUB (.epub), HTML (.html), XML (.xml) + - **Text & Data:** Markdown (.md), Plain text (.txt), JSON (.json), CSV (.csv) - **Images:** JPG, PNG, GIF, and other common formats 3. **File Processing Flow** diff --git a/doc/docs/zh/backend/skills/index.md b/doc/docs/zh/backend/skills/index.md new file mode 100644 index 000000000..10b37bc90 --- /dev/null +++ b/doc/docs/zh/backend/skills/index.md @@ -0,0 +1,37 @@ +# 后端技能(Skill)文档 + +本节介绍 Nexent 后端基础设施中 Skills 技能系统的完整生态,包括技能定义、技能包结构与系统架构。 + +## 可用文档 + +### 概览与架构 +- [技能系统概览](./overview):技能类型、生命周期与版本管理 + +## 技能与工具的关系 + +在 Nexent 中,**工具(Tool)** 与 **技能(Skill)** 是两个不同层次的概念: + +- **工具**:智能体可调用的单个原子操作。启用后,LLM 的每次思考都会在工具列表中搜索——即使本次对话完全不需要某个工具,LLM 仍然会消耗上下文额度。 +- **技能**:通过 `SKILL.md` 将多个工具的能力组合为一个完整的工作流,并附带参数配置与使用文档。LLM 根据用户实际需求自行判断是否激活技能,激活后才加载对应工具集——有效节省 Token 消耗。 + +## 快速开始 + +1. **了解能力**:阅读 [技能系统概览](./overview) 了解已支持的技能类型 +2. **体验创建**:在 [技能管理](../../user-guide/skills) 页面体验 NL-to-Skill 创建 +3. **手动创建**:上传 `SKILL.md` 或 ZIP 包创建自定义技能 +4. **为智能体配置**:在智能体工具配置中勾选技能 + +## 相关参考 + +- [技能管理(用户指南)](../../user-guide/skills) +- [智能体开发指南](../../user-guide/agent-development) +- [本地工具概览](../../user-guide/local-tools/index) +- [SDK 工具开发规范](../../sdk/core/tools) +- [MCP 工具开发](../tools/mcp) +- [常见问题](../../quick-start/faq) + +## 获取帮助 + +- 查看 [常见问题](../../quick-start/faq) 了解常见技能使用问题 +- 在 [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions) 中提问 +- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题 diff --git a/doc/docs/zh/backend/skills/overview.md b/doc/docs/zh/backend/skills/overview.md new file mode 100644 index 000000000..f3d866f78 --- /dev/null +++ b/doc/docs/zh/backend/skills/overview.md @@ -0,0 +1,138 @@ +# 技能系统概览 + +技能(Skill)是 Nexent 为智能体扩展能力的方式。每个技能由以下部分组成: + +- **技能描述**:这个技能是做什么的、什么时候该用它 +- **工具组合**:一个或多个 nexent sdk方法或用户自定义工具的打包 +- **参数模板**:用户可为技能填写哪些参数 +- **使用示例**:这个技能通常怎么用 + +与直接选择一个一个工具相比,技能让复杂能力的配置变得简单——只需安装一个技能包,无需分别配置每个工具。 + +## 技能包结构 + +技能包可以是单个 `SKILL.md` 文件,也可以是包含多个文件的 ZIP 包: + +``` +skill-name/ +├── SKILL.md # 技能定义文件(必需) +├── config/ +│ ├── config.yaml # 参数默认值(可选) +│ └── schema.yaml # 参数类型与说明(可选) +├── scripts/ +│ └── *.py # Python 脚本(可选) +├── examples.md # 使用示例(可选) +└── assets/ # 静态资源(可选) +``` + +### SKILL.md 的结构 + +每个技能必须有一个 `SKILL.md` 文件,分为两部分: + +**第一部分:YAML 元数据(必须)** + +```yaml +--- +name: skill-name +description: | + 一段描述,说明这个技能是做什么的、什么时候该用它。 + 建议用第三人称书写,如:"这个技能用于..." +tags: + - tag1 + - tag2 +--- +``` + +**第二部分:技能正文** + +元数据下方可以继续写 Markdown 内容,包括: +- 技能的详细说明与使用指南 +- 工具调用方式的示例代码 +- 错误处理说明 +- 使用限制与注意事项 + +### 两种技能类型 + +根据用途,技能分为两类: + +**工具类技能**:用于暴露一个或多个 Nexent sdk方法的能力,包含工具的参数说明、调用示例、返回格式、错误处理等。用户配置好参数后,智能体即可调用这些工具。 + +**智能体类技能**:用于教智能体如何执行一个复杂任务,包含工作流程说明、领域知识、最佳实践,有时附带辅助脚本。这类技能的正文会包含详细的步骤指引。 + +## 官方技能一览 + +### 文件操作类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `read-file` | 读取工作空间内文件内容与元信息 | +| `create-file-directory` | 创建文件或目录 | +| `delete-file-directory` | 删除文件或目录 | +| `move-file-directory` | 移动或重命名文件/目录 | +| `list-directory` | 树形列出目录结构 | + +### 知识库搜索类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `search-knowledge-base` | 本地知识库语义检索(支持 hybrid / accurate / semantic 模式) | +| `search-dify` | Dify 知识库检索 | +| `search-idata` | iData 知识库检索 | +| `search-datamate` | DataMate 知识库检索(支持相似度阈值控制) | + +### 公网搜索类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `search-web-tavily` | Tavily 公网实时搜索 | +| `search-web-linkup` | Linkup 图文混合搜索 | +| `search-web-exa` | Exa 深度网页搜索 | + +### 多模态分析类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `analyze-image` | 基于 VLM 的图片内容分析问答 | +| `analyze-text-file` | PDF/Word/Excel 等文件内容提取与问答 | + +### 通信与远程操作类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `email-utils` | IMAP 收件 / SMTP 发件(支持 HTML / CC / BCC) | +| `run-shell-ssh` | 持久化 SSH 会话远程执行命令 | + +## 技能生命周期 + +### 版本管理 + +每个技能支持两个版本状态: + +- **草稿版本(version=0)**:开发调试阶段,修改即时生效,适合反复调整 +- **已发布版本(version>=1)**:正式使用,参数锁定,防止误改 + +### 技能实例 + +同一个技能可以为不同的智能体配置不同的参数值,互不影响。 + +例如,搜索技能可以为"技术文档 Agent"配置只搜索技术知识库,为"客服 Agent"配置只搜索客服知识库。 + +### 常见操作流程 + +``` +创建技能 → 配置参数 → 为智能体选择技能 → 调试 → 发布 + ↓ + 修改草稿版本 +``` + +## 安全说明 + +- **路径隔离**:技能包内文件仅能在技能目录范围内访问 +- **参数校验**:schema.yaml 中定义的参数均经过前端表单校验 +- **权限控制**:技能实例按租户隔离,API 需携带认证 Token + +## 相关参考 + +- [技能管理(用户指南)](../../user-guide/skills) +- [智能体开发指南](../../user-guide/agent-development) +- [本地工具概览](../../user-guide/local-tools/index) diff --git a/doc/docs/zh/backend/tools/index.md b/doc/docs/zh/backend/tools/index.md index 94e1fe36e..88560fdcf 100644 --- a/doc/docs/zh/backend/tools/index.md +++ b/doc/docs/zh/backend/tools/index.md @@ -12,6 +12,10 @@ 模型上下文协议工具,用于标准化 AI 智能体通信。 → [MCP 工具开发](./mcp) +### Skills 技能系统 +通过自然语言或 ZIP 包创建可复用的技能包,为智能体赋予更加灵活的工具调用能力。 +→ [Skills 技能文档](../skills/index) + ## 快速开始 1. **选择工具类型**: LangChain 用于通用 AI 工作流,MCP 用于标准化智能体通信 @@ -28,4 +32,4 @@ - 查看我们的 [常见问题](../../quick-start/faq) 了解常见工具集成问题 - 加入我们的 [Discord 社区](https://discord.gg/tb5H3S3wyv) 获取实时支持 -- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题 \ No newline at end of file +- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题 diff --git a/doc/docs/zh/deployment/devcontainer.md b/doc/docs/zh/deployment/devcontainer.md index 2ce184901..b5b934187 100644 --- a/doc/docs/zh/deployment/devcontainer.md +++ b/doc/docs/zh/deployment/devcontainer.md @@ -25,7 +25,7 @@ 1. 克隆项目到本地 2. 在 Cursor 中打开项目文件夹 -3. 运行 `docker/deploy.sh` 脚本,在`infrastructure` 模式下启动容器 +3. 在 `docker` 目录运行 `./deploy.sh --components infrastructure,application --port-policy development` 启动基础容器 4. 进入 `nexent-minio` 与 `nexent-elasticsearch` 容器, 将 `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `ELASTICSEARCH_API_KEY` 环境变量复制到 `docker/docker-compose.dev.yml` 中的相应环境变量位置 5. 按下 `F1` 或 `Ctrl+Shift+P`,输入 `Dev Containers: Reopen in Container ...` 6. Cursor 将根据 `.devcontainer` 目录中的配置启动开发容器 diff --git a/doc/docs/zh/deployment/docker-build.md b/doc/docs/zh/deployment/docker-build.md index 8dad0612e..8e360d95d 100644 --- a/doc/docs/zh/deployment/docker-build.md +++ b/doc/docs/zh/deployment/docker-build.md @@ -160,6 +160,11 @@ docker rm nexent-docs ## 🚀 部署建议 -构建完成后,可以使用 `docker/deploy.sh` 脚本进行部署,或者直接使用 `docker-compose` 启动服务。 +构建完成后,可以进入 `docker` 目录使用部署脚本启动本地镜像: -> 启动测试本地构建的镜像时,需要修改下`docker/deploy.sh`中的`APP_VERSION="$(get_app_version)"` -> `APP_VERSION="latest"`,因为部署时默认会使用当前版本对应的镜像。 \ No newline at end of file +```bash +cd docker +bash deploy.sh --image-source local-latest +``` + +> `local-latest` 会使用本地 `latest` Nexent 应用镜像并避免重新拉取这些镜像,无需修改 `docker/deploy.sh`。 diff --git a/doc/docs/zh/developer-guide/environment-setup.md b/doc/docs/zh/developer-guide/environment-setup.md index 0a81ca10d..cc98ff58a 100644 --- a/doc/docs/zh/developer-guide/environment-setup.md +++ b/doc/docs/zh/developer-guide/environment-setup.md @@ -23,7 +23,7 @@ title: 环境准备 ```bash # 在项目根目录的 docker 目录执行 cd docker -./deploy.sh --mode infrastructure +./deploy.sh --components infrastructure --port-policy development ``` :::: info 重要提示 @@ -131,4 +131,3 @@ uv pip install -e ".[dev]" - 测试框架(pytest) - 数据处理依赖(unstructured) - 其他开发辅助依赖 - diff --git a/doc/docs/zh/getting-started/features.md b/doc/docs/zh/getting-started/features.md index 8d1adf47c..658a89e18 100644 --- a/doc/docs/zh/getting-started/features.md +++ b/doc/docs/zh/getting-started/features.md @@ -1,45 +1,74 @@ # 核心特性 -Nexent 提供强大的功能来构建和部署 AI 智能体,只需最少的工作量。以下是让 Nexent 独特的核心特性。 +Nexent v2.0 提供了强大的 AI 智能体构建与部署能力,以下是让 Nexent 与众不同的核心特性。 -## 🧠 智能体提示词生成 +## ⚙️ 多模型集成 -将自然语言转换为可执行的提示词。Nexent 自动选择正确的工具并为每个请求规划最佳的执行路径。 +Nexent 支持 OpenAI 兼容任意模型提供商,一站式覆盖 LLM、Embedding、VLM、STT、TTS 全类型模型。支持与 ModelEngine 平台无缝同步。平台支持接入任意兼容 OpenAI API 协议的服务商,轻松实现模型多样化与国产化切换。 -![特性 1](../../assets/Feature1.png) +## 🤖 智能体零代码生成 -## ⚡ 可扩展的数据处理引擎 +只需用自然语言描述你的需求,Nexent 便能自动将意图转化为可执行的智能体配置。系统会智能选择合适的工具,规划最优的执行路径,并生成专业的提示词。无需编写代码,无需拖拽配置,真正实现"所想即所得"的智能体创建体验。同时支持智能体导入导出,方便分享与复用;提供在线调试能力,边调边改,快速迭代。 -处理 20+ 种数据格式,具备快速 OCR 和表格结构提取能力,从单一流程平滑扩展到大批量管道处理。 +## 🤝 A2A 协议与智能体协作 -![特性 2](../../assets/Feature2.png) +Nexent 支持 **Agent-to-Agent(A2A)** 通信协议,让多个智能体能够无缝协作。主智能体可以调用子智能体完成特定任务,子智能体执行完成后将结果汇总给主智能体。支持配置多个协作型子智能体,每个子智能体可拥有独立的工具集、模型配置和执行策略,轻松构建复杂的分布式智能体工作流。 -## 📚 个人级知识库 +## 🧠 分层记忆机制 -实时导入文件,自动总结内容,让智能体能够即时访问个人和全局知识,并知道从每个知识库能获取什么。 +智能的上下文管理是智能体真正"懂你"的关键。Nexent 提供两层记忆体系: -![特性 3](../../assets/Feature3.png) +- **用户级记忆**:个人偏好、习惯和使用方式 +- **用户-智能体级记忆**:特定用户在特定智能体中的协作历史与上下文 -## 🌐 互联网知识搜索 +系统自动从对话中提取关键信息生成记忆条目,无需手动输入;记忆条目支持手工添加修改,更加灵活;智能检索机制确保每次对话都能自动获取最相关的上下文记忆,实现真正的个性化服务。 -连接 5+ 个网络搜索提供商,让智能体能够将最新的互联网信息与你的私有数据相结合。 +## 📝 Skill 渐进式披露 -![特性 4](../../assets/Feature4.png) +Nexent 引入了 **渐进式 Skill 披露**机制。当用户输入任务时,系统会根据当前上下文动态揭示最相关的 Skill 建议,帮助用户快速找到适合当前任务的工具和方法。这一机制能够防止上下文爆炸,高效利用上下文窗口。 -## 🔍 知识级溯源 +## 🗄️ 个人级知识库 -提供来自网络和知识库来源的精确引用,让每个事实都可验证。 +支持用户在 Nexent 平台创建个人知识库,支持实时导入文件,自动解析并向量化内容,让智能体能够即时访问私有数据。支持 20+ 种文档格式,包括文本、PDF、Word、PowerPoint、Excel、CSV 等,并提供快速 OCR 和表格结构提取能力。自动为每个知识库生成摘要,帮助智能体准确判断何时应该从该知识库检索信息。可设置细粒度的访问权限:私有、部门级共享或全组织可见。 -![特性 5](../../assets/Feature5.png) +## 🔧 MCP 工具生态系统 -## 🎭 多模态理解与对话 +Nexent 基于 **Model Context Protocol(MCP)** 构建工具生态,MCP 被誉为"AI 的 USB-C",是连接 AI 智能体与外部世界的通用接口标准。 -支持语音、文字、文件或图像输入。Nexent 理解语音、文本和图片,甚至可以按需生成新图像。 +- 支持通过 URL 或 JSON 配置快速添加第三方 MCP 服务 +- 支持本地 MCP 工具开发,可接入 LangChain 工具、自定义 Python 插件 +- 可热插拔地更换工具、模型和工具链,无需触碰核心代码 +- 内置工具测试能力,创建智能体前即可验证工具是否按预期工作 -![特性 6](../../assets/Feature6.png) +## 🌐 互联网知识集成 -## 🔧 MCP 工具生态系统 +连接多个网络搜索提供商,让智能体能够将最新鲜的互联网信息与私有数据相结合。支持混合搜索模式,兼顾实时性和准确性。 + +## 🔍 知识溯源与引用 + +每个回答都附带精确的引用来源,来自网络搜索结果或知识库文档,让每个事实都透明可查。来源信息可一键追溯,增强回答的可信度。 + +## 🎭 多模态交互 + +支持语音、文本、图像和文件多种输入方式。智能体能够理解语音、文本和图片,可以按需生成新图像,提供真正自然的多模态对话体验。 + +## 🔢 智能体版本管理 + +完善的版本控制体系,支持智能体的版本迭代与历史回溯。每个版本独立存档,可随时查看变更历史、比较版本差异,并在必要时回退到历史版本。支持智能体配置导入导出(JSON 格式),方便跨环境迁移和团队协作。 + +## 🏪 智能体市场 + +内置智能体市场,汇聚官方和社区创建的优质智能体。一键下载即可使用,也可将其作为子智能体集成到自己的智能体工作流中,快速构建复杂应用。 + +## 👥 分权分域与用户管理 + +Nexent 提供完善的多租户、分角色权限管理体系: + +- **四层角色**:超级管理员、租户管理员、开发者、普通用户,职责分明 +- **多租户隔离**:租户间数据完全隔离,支持跨租户的平台级管理 +- **用户组机制**:通过用户组管理资源和访问权限,支持灵活的权限委托 +- **邀请码机制**:受控注册,保障平台安全性 +- **资源级权限**:智能体、知识库等资源可精细控制到用户组级别 -插入或构建遵循 MCP 规范的 Python 插件;在不触及核心代码的情况下交换模型、工具和链。 +关于 Nexent 软件架构和技术优势的详细信息,请参阅我们的**[软件架构](./software-architecture)**指南。 -![特性 7](../../assets/Feature7.png) diff --git a/doc/docs/zh/getting-started/overview.md b/doc/docs/zh/getting-started/overview.md index e5bc95549..77aa78f71 100644 --- a/doc/docs/zh/getting-started/overview.md +++ b/doc/docs/zh/getting-started/overview.md @@ -17,10 +17,10 @@ Nexent 是一个基于 **Harness Engineering** 原则打造的零代码智能体 > *If you want to go fast, go alone; if you want to go far, go together.* -我们已经发布了 **Nexent v1**,目前功能已经相对稳定,但仍可能存在一些 bug,我们会持续改进并不断增加新功能。敬请期待,我们很快也会公布 **v2.0** 版本! +我们已发布 **Nexent v2.0**!在 v1.0 的基础上全面升级,带来 A2A 协议支持、Skill 渐进式披露、分层记忆机制、用户管理与分权分域、智能体版本管理、智能体市场等重磅功能。同时保留并强化了知识库集成、多模态交互、MCP 工具生态等核心能力。平台功能日趋完善,欢迎试用并提出您的宝贵意见。 -* **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。 -* **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。 +- **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。 +- **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。 > *Rome wasn't built in a day.* @@ -28,19 +28,25 @@ Nexent 是一个基于 **Harness Engineering** 原则打造的零代码智能体 早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。 -最重要的是,我们需要关注度。请 [前往GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。 +最重要的是,我们需要关注度。请 [前往 GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。 ## ✨ 核心特性 -Nexent 为构建强大的 AI 智能体提供全面的功能集: - -- **🤖 智能体生成** - 使用自然语言进行零代码智能体创建 -- **📊 可扩展数据处理** - 处理 20+ 种文件格式和智能提取 -- **🧠 个人知识库** - 实时文件导入和自动摘要 -- **🌐 互联网集成** - 连接多个搜索提供商和网络资源 -- **🔍 知识溯源** - 精确引用和来源验证 -- **🎭 多模态支持** - 语音、文本、图像和文件处理 -- **🔧 MCP 生态系统** - 可扩展的工具集成和自定义开发 +Nexent v2.0 为构建强大的 AI 智能体提供全面的功能集: + +- **⚙️ 多模型集成** — OpenAI 兼容任意提供商,Embedding/VLM/STT/TTS 全覆盖 +- **🤖 智能体零代码生成** — 纯自然语言描述需求,一键生成可执行智能体 +- **🤝 A2A 智能体协作** — Agent-to-Agent 协议支持多智能体无缝协作 +- **🧠 分层记忆机制** — 两层记忆体系,跨对话持续积累上下文 +- **📝 Skill 渐进式披露** — 动态揭示最相关工具,渐进探索系统能力 +- **🗄️ 个人级知识库** — 20+ 格式文档实时导入与智能检索 +- **🔧 MCP 工具生态** — 即插即用的扩展工具体系,可自定义开发 +- **🌐 互联网知识集成** — 多搜索源混合,实时信息与私有数据融合 +- **🔍 知识级溯源** — 精确引用与来源验证,每个事实透明可查 +- **🎭 多模态交互** — 语音、文字、图像、文件,全方位自然对话 +- **🔢 智能体版本管理** — 版本迭代与历史回溯,安全可控 +- **🏪 智能体市场** — 官方与社区优质智能体,一键安装即用 +- **👥 分权分域管理** — 多租户隔离,RBAC 权限体系,精细化资源管控 有关详细的功能信息和示例,请参阅我们的 **[核心特性](./features)**。 @@ -49,20 +55,23 @@ Nexent 为构建强大的 AI 智能体提供全面的功能集: Nexent 采用现代化的分布式微服务架构,专为高性能、可扩展的 AI 智能体平台而设计。整个系统基于容器化部署,支持云原生和企业级应用场景。 ### 🌐 分层架构设计 -- **前端层** - Next.js + React + TypeScript 构建的现代化用户界面 -- **API 网关层** - FastAPI 高性能 Web 框架,负责请求路由和负载均衡 -- **业务逻辑层** - 智能体管理、对话管理、知识库管理和模型管理 -- **数据层** - PostgreSQL、Elasticsearch、Redis、MinIO 分布式存储架构 + +- **前端层** — Next.js + React + TypeScript 构建的现代化用户界面 +- **API 网关层** — FastAPI 高性能 Web 框架,负责请求路由和负载均衡 +- **业务逻辑层** — 智能体管理、对话管理、知识库管理和模型管理 +- **数据层** — PostgreSQL、Elasticsearch、Redis、MinIO 分布式存储架构 ### 🚀 核心服务架构 -- **智能体服务** - 基于 SmolAgents 框架的智能体生成和执行 -- **数据处理服务** - 支持 20+ 种文件格式的实时和批量处理 -- **MCP 生态系统** - 标准化的工具接口和插件架构 + +- **智能体服务** — 基于 SmolAgents 框架的智能体生成和执行 +- **数据处理服务** — 支持 20+ 种文件格式的实时和批量处理 +- **MCP 生态系统** — 标准化的工具接口和插件架构 ### ⚡ 分布式特性 -- **异步处理** - 基于 asyncio 的高性能异步处理架构 -- **微服务设计** - 服务解耦,独立扩展和部署 -- **容器化部署** - Docker Compose 服务编排,支持云原生部署 + +- **异步处理** — 基于 asyncio 的高性能异步处理架构 +- **微服务设计** — 服务解耦,独立扩展和部署 +- **容器化部署** — Docker Compose 服务编排,支持云原生部署 有关详细的架构设计和技术实现,请参阅我们的 **[软件架构](./software-architecture)**。 @@ -70,9 +79,9 @@ Nexent 采用现代化的分布式微服务架构,专为高性能、可扩展 准备好开始了吗?以下是您的下一步: -1. **📋 [安装部署](../quick-start/installation)** - 系统要求和部署指南 -2. **🔧 [开发者指南](../developer-guide/overview)** - 从源码构建和自定义 -3. **❓ [常见问题](../quick-start/faq)** - 常见问题和故障排除 +1. **📋 [安装部署](../quick-start/installation)** — 系统要求和部署指南 +2. **🔧 [开发者指南](../developer-guide/overview)** — 从源码构建和自定义 +3. **❓ [常见问题](../quick-start/faq)** — 常见问题和故障排除 ## 💬 社区与联系方式 diff --git a/doc/docs/zh/getting-started/software-architecture.md b/doc/docs/zh/getting-started/software-architecture.md index 620d476ef..8676992a4 100644 --- a/doc/docs/zh/getting-started/software-architecture.md +++ b/doc/docs/zh/getting-started/software-architecture.md @@ -11,156 +11,284 @@ Nexent 的软件架构遵循分层设计原则,从上到下分为以下几个 ### 🌐 前端层(Frontend Layer) - **技术栈**:Next.js + React + TypeScript - **功能**:用户界面、智能体交互、多模态输入处理 -- **特性**:响应式设计、实时通信、国际化支持 +- **特性**:响应式设计、WebSocket 实时通信、国际化(i18n)支持 ### 🔌 API 网关层(API Gateway Layer) -- **核心服务**:FastAPI 高性能 Web 框架 -- **职责**:请求路由、身份验证、API 版本管理、负载均衡 -- **端口**:5010(主服务)、5012(数据处理服务) +基于 FastAPI 构建的分布式 API 服务: + +| 服务 | 端口 | 说明 | +|------|------|------| +| **nexent-config** | 5010 | 主 API 服务 - 智能体 CRUD、配置管理 | +| **nexent-runtime** | 5014 | 运行时服务 - 智能体执行、流式响应 | +| **nexent-mcp** | 5011/5015 | MCP 服务 - 工具协议管理、FastMCP 服务器 | +| **nexent-northbound** | 5013 | 外部 API 服务 - A2A 协议、合作伙伴集成 | +| **nexent-data-process** | 5012 | 数据处理服务 - 文档解析、向量化 | ### 🧠 业务逻辑层(Business Logic Layer) -- **智能体管理**:智能体生成、执行、监控 -- **会话管理**:多轮对话、上下文维护、历史记录 -- **知识库管理**:文档处理、向量化、检索 -- **模型管理**:多模型支持、健康检查、负载均衡 +后端采用清晰的分层架构: + +#### App 层(`backend/apps/`) +- **职责**:HTTP 边界层 - 解析/验证输入、调用服务、映射错误到 HTTP +- **核心模块**: + - `agent_app.py` - 智能体 CRUD、版本管理、流式执行 + - `conversation_management_app.py` - 多轮对话、历史追踪 + - `model_managment_app.py` - 模型配置、健康检查 + - `skill_app.py` - 技能创建与管理 + - `knowledge_summary_app.py` - 知识库操作 + - `remote_mcp_app.py` - 远程 MCP 工具管理 + - `a2a_client_app.py` / `a2a_server_app.py` - A2A 协议支持 + +#### Service 层(`backend/services/`) +- **职责**:核心业务逻辑编排,协调仓库/SDK +- **核心模块**: + - `agent_service.py` - 智能体生命周期、执行编排、记忆管理 + - `agent_version_service.py` - 版本发布、回滚、对比 + - `model_management_service.py` - 多模型支持、负载均衡 + - `memory_config_service.py` - 记忆配置、上下文构建 + - `conversation_management_service.py` - 会话管理、历史持久化 + - `skill_service.py` - 技能生成、模板处理 + - `data_process_service.py` - 文档处理管道 + - `mcp_container_service.py` - MCP 容器生命周期管理 + - `remote_mcp_service.py` - 远程 MCP 服务器集成 + - `a2a_client_service.py` / `a2a_server_service.py` - A2A 智能体通信 + - `redis_service.py` - 缓存、分布式锁、会话存储 + +#### 智能体核心层(`backend/agents/`) +- **职责**:基于 SmolAgents 的智能体执行框架 +- **核心组件**: + - `agent_run_manager.py` - 智能体运行生命周期、流式协调 + - `create_agent_info.py` - 智能体配置构建、工具集成 + - `preprocess_manager.py` - 文档预处理编排 + - `skill_creation_agent.py` - LLM 驱动的技能生成 ### 📊 数据层(Data Layer) 分布式数据存储架构,包含多种专用数据库: #### 🗄️ 结构化数据存储 -- **PostgreSQL**:主数据库,存储用户信息、智能体配置、会话记录 -- **端口**:5434 -- **特性**:ACID 事务、关系型数据完整性 - -#### 🔍 搜索引擎 -- **Elasticsearch**:向量数据库和全文搜索引擎 -- **端口**:9210 -- **功能**:向量相似度搜索、混合搜索、大规模优化 +- **PostgreSQL**(端口 5434):主关系型数据库 + - 用户和租户管理(`user_tenant_db.py`) + - 智能体配置和版本(`agent_db.py`、`agent_version_db.py`) + - 工具定义和实例(`tool_db.py`) + - 对话历史(`conversation_db.py`) + - 群组和权限管理(`group_db.py`、`role_permission_db.py`) + - 记忆配置(`memory_config_db.py`) + - 技能定义(`skill_db.py`) +- **特性**:ACID 事务、关系完整性、多租户支持 + +#### 🔍 向量搜索与全文搜索 +- **Elasticsearch**(端口 9210):向量和全文搜索引擎 + - 知识库存储(`knowledge_db.py`) + - 向量相似度搜索、混合搜索 + - 语义分块和索引 +- **特性**:可扩展搜索、相关性排序、大规模优化 #### 💾 缓存层 -- **Redis**:高性能内存数据库 -- **端口**:6379 -- **用途**:会话缓存、临时数据、分布式锁 +- **Redis**(端口 6379):高性能内存数据库 + - 会话缓存 + - 临时数据存储 + - 分布式锁(`redis_service.py`) + - Celery 任务队列的消息代理 +- **特性**:亚毫秒级延迟、AOF 持久化 #### 📁 对象存储 -- **MinIO**:分布式对象存储服务 -- **端口**:9010 -- **功能**:文件存储、多媒体资源管理、大文件处理 +- **MinIO**(端口 9010/9011):分布式对象存储 + - 文件上传和附件(`attachment_db.py`) + - 知识库文档存储 + - 预览生成和临时文件 +- **特性**:S3 兼容 API、大文件处理 ## 🔧 核心服务架构 ### 🤖 智能体服务(Agent Services) ``` -智能体框架基于 SmolAgents,提供: -├── 智能体生成与配置 -├── 工具调用与集成 -├── 推理与决策执行 +智能体框架(基于 SmolAgents): +├── 智能体创建与配置 +│ ├── 名称/显示名生成(LLM 驱动) +│ ├── 工具集成与选择 +│ ├── 子智能体关系管理 +│ └── 版本控制与发布 +├── 智能体执行引擎 +│ ├── 流式响应(SSE) +│ ├── 工具调用与编排 +│ ├── 多模型支持(LLM + 业务逻辑) +│ └── 记忆上下文构建 +├── 版本管理 +│ ├── 发布与回滚 +│ ├── 版本对比 +│ └── A2A 智能体卡片注册 └── 生命周期管理 + ├── 运行注册与追踪 + ├── 停止与清理 + └── 预处理协调 ``` ### 📈 数据处理服务(Data Processing Services) ``` -分布式数据处理架构: -├── 实时文档处理(20+ 格式支持) -├── 批量数据处理管道 -├── OCR 与表格结构提取 -└── 向量化与索引构建 +分布式数据处理管道: +├── 文档摄入 +│ ├── 多格式支持(20+ 格式) +│ ├── PDF 解析与 OCR +│ └── 表格结构提取 +├── 分块与处理 +│ ├── 语义分块算法 +│ ├── Celery 批量处理 +│ └── Ray 分布式计算 +├── 向量化与索引 +│ ├── Embedding 生成 +│ ├── Elasticsearch 索引 +│ └── 增量更新 +└── 预览生成 + ├── PDF 预览转换 + └── 图片缩略图生成 ``` ### 🌐 MCP 生态系统(MCP Ecosystem) ``` -模型上下文协议工具集成: -├── 标准化工具接口 -├── 插件化架构 -├── 第三方服务集成 -└── 自定义工具开发 +模型上下文协议集成: +├── 本地 MCP 服务 +│ ├── 稳定的内置工具 +│ └── Docker 容器化工具 +├── 远程 MCP 服务 +│ ├── 动态远程 MCP 服务器代理 +│ └── 外部 API 工具集成 +├── MCP 容器管理 +│ ├── 容器生命周期(Docker) +│ ├── 日志聚合 +│ └── 资源监控 +└── FastMCP 服务器 + ├── 工具注册与发现 + └── 标准化工具接口 +``` + +### 🔄 A2A 协议支持(A2A Protocol Support) +``` +智能体间通信: +├── A2A 客户端 +│ ├── 智能体卡片发现 +│ ├── 任务提交与流式处理 +│ └── 响应处理 +├── A2A 服务器 +│ ├── 智能体卡片注册 +│ ├── 任务处理 +│ └── 消息流式传输 +└── 智能体适配器 + ├── Nexent ↔ A2A 协议转换 + └── 技能执行协调 ``` ## 🚀 分布式架构特性 ### ⚡ 异步处理架构 - **基础框架**:基于 asyncio 的高性能异步处理 +- **任务队列**:Celery + Redis 分布式任务执行 +- **计算框架**:Ray 用于数据处理中的分布式计算 +- **流式处理**:Server-Sent Events(SSE)实现实时流式响应 - **并发控制**:线程安全的并发处理机制 -- **任务队列**:Celery + Ray 分布式任务执行 -- **流式处理**:实时数据流和响应流处理 ### 🔄 微服务设计 ``` 服务拆分策略: -├── nexent(主服务)- 智能体核心逻辑 -├── nexent-data-process(数据处理)- 文档处理管道 -├── nexent-mcp-service(MCP服务)- 工具协议服务 -└── 可选服务(SSH、监控等) +├── nexent-config (5010) +│ └── 智能体 CRUD、配置、用户管理 +├── nexent-runtime (5014) +│ └── 智能体执行、流式响应 +├── nexent-mcp (5011/5015) +│ └── MCP 工具协议、容器管理 +├── nexent-northbound (5013) +│ └── 外部 API、A2A 协议、合作伙伴集成 +├── nexent-data-process (5012) +│ └── 文档处理、向量化、Celery 工作者 +├── nexent-web (3000) +│ └── 前端 Next.js 应用 +└── 可选服务 + ├── nexent-redis (6379) - 缓存和消息代理 + ├── nexent-elasticsearch (9210) - 向量搜索 + ├── nexent-postgresql (5434) - 关系数据 + └── nexent-minio (9010) - 对象存储 ``` ### 🌍 容器化部署 ``` -Docker Compose 服务编排: +Docker Compose 编排: ├── 应用服务容器化 ├── 数据库服务隔离 -├── 网络层安全配置 -└── 卷挂载数据持久化 +├── 网络层安全配置(bridge 网络) +├── 卷挂载数据持久化 +├── 健康检查与自动重启 +└── Kubernetes 支持(IS_DEPLOYED_BY_KUBERNETES) ``` ## 🔐 安全与扩展性 ### 🛡️ 安全架构 - **身份验证**:多租户支持、用户权限管理 -- **数据安全**:端到端加密、安全传输协议 -- **网络安全**:服务间安全通信、防火墙配置 +- **授权**:基于角色的访问控制(RBAC)、群组权限 +- **数据安全**:租户数据隔离、安全传输(HTTPS) +- **网络安全**:服务间安全通信、Docker 网络隔离 ### 📈 可扩展性设计 - **水平扩展**:微服务独立扩展、负载均衡 - **垂直扩展**:资源池管理、智能调度 -- **存储扩展**:分布式存储、数据分片 +- **存储扩展**:分布式存储(MinIO)、数据分片(Elasticsearch) +- **缓存扩展**:Redis 集群用于会话和数据缓存 ### 🔧 模块化架构 - **松耦合设计**:服务间低依赖、接口标准化 - **插件化架构**:工具和模型的热插拔 - **配置管理**:环境隔离、动态配置更新 +- **单一数据源**:环境变量集中管理于 `backend/consts/const.py` ## 🔄 数据流架构 ### 📥 用户请求流 ``` -用户输入 → 前端验证 → API网关 → 路由分发 → 业务服务 → 数据访问 → 数据库 +用户输入 → 前端验证 → API 网关(nexent-config) + → 路由分发 → 业务服务(Service 层) + → 数据访问(Database 层)→ PostgreSQL/Elasticsearch/Redis/MinIO ``` ### 🤖 智能体执行流 ``` -用户消息 → 智能体创建 → 工具调用 → 模型推理 → 流式响应 → 结果存储 +用户消息 → nexent-runtime → Agent Service + → 记忆上下文构建 → 工具解析 + → 模型推理(流式)→ SSE 响应 + → 对话保存 → 历史存储 ``` ### 📚 知识库处理流 ``` -文件上传 → 临时存储 → 数据处理 → 向量化 → 知识库存储 → 索引更新 +文件上传 → nexent-config → nexent-data-process + → 文档解析 → 分块 → 向量化 + → Elasticsearch 索引 → 搜索就绪 ``` ### ⚡ 实时处理流 ``` -实时输入 → 即时处理 → 智能体响应 → 流式输出 +实时输入 → 流式端点 → 异步处理 + → SSE 流 → 前端展示 ``` ## 🎯 架构优势 ### 🏢 企业级特性 -- **高可用性**:多层冗余、故障转移 -- **高性能**:异步处理、智能缓存 +- **高可用性**:多服务冗余、健康检查、自动重启 +- **高性能**:异步处理、Redis 缓存、向量搜索优化 - **高并发**:分布式架构、负载均衡 -- **监控友好**:完善的日志和状态监控 +- **监控友好**:OpenTelemetry 可观测性、Grafana Tempo 追踪、结构化日志 ### 🔧 开发友好 -- **模块化开发**:清晰的层次结构 -- **标准化接口**:统一的 API 设计 -- **灵活配置**:环境适配、功能开关 -- **易于测试**:单元测试、集成测试支持 +- **模块化开发**:清晰的分层架构(App → Service → Database) +- **标准化接口**:统一的 API 设计(FastAPI) +- **灵活配置**:环境配置、热重载 +- **易于测试**:完善的测试套件、依赖注入 ### 🌱 生态兼容 -- **MCP 标准**:遵循模型上下文协议 -- **开源生态**:集成丰富的开源工具 -- **云原生**:支持 Kubernetes、Docker 部署 +- **MCP 标准**:完整的模型上下文协议实现 +- **A2A 协议**:智能体间通信支持 +- **开源生态**:集成 SmolAgents、FastMCP、LangChain +- **云原生**:支持 Docker Compose 和 Kubernetes 部署 - **多模型支持**:兼容主流 AI 模型提供商 --- -这种架构设计确保了 Nexent 能够在保持高性能的同时,为用户提供稳定、可扩展的 AI 智能体服务平台。无论是个人用户还是企业级部署,都能够获得优秀的使用体验和技术保障。 \ No newline at end of file +这种架构设计确保了 Nexent 能够在保持高性能的同时,为用户提供稳定、可扩展的 AI 智能体服务平台。无论是个人用户还是企业级部署,都能够获得优秀的使用体验和技术保障。 diff --git a/doc/docs/zh/quick-start/installation.md b/doc/docs/zh/quick-start/installation.md index 87df5abde..6d3538b90 100644 --- a/doc/docs/zh/quick-start/installation.md +++ b/doc/docs/zh/quick-start/installation.md @@ -1,13 +1,16 @@ -# 安装部署 +# 基于 Docker 安装部署 ## 🎯 系统要求 -| 资源 | 最低要求 | -|----------|---------| -| **CPU** | 2 核 | -| **内存** | 6 GiB | -| **架构** | x86_64 / ARM64 | -| **软件** | 已安装 Docker 和 Docker Compose | +| 资源 | 最低要求 | 推荐配置 | +|----------|---------|-------------| +| **CPU** | 4 核 | 8 核 | +| **内存** | 8 GiB | 16 GiB | +| **磁盘** | 40 GiB | 100 GiB | +| **架构** | x86_64 / ARM64 | | +| **软件** | 已安装 Docker 和 Docker Compose | Docker 24+, Docker Compose v2+ | + +> **💡 注意**:推荐的 **8 核 16 GiB 内存** 配置可确保生产环境下的良好性能。 ## 🚀 快速开始 @@ -16,10 +19,9 @@ ```bash git clone https://github.com/ModelEngine-Group/nexent.git cd nexent/docker -cp .env.example .env # 复制环境变量配置文件 ``` -> **💡 提示**: 若无特殊需求,您可直接使用 `.env.example` 进行部署,无需进行任何修改。若您需要配置语音模型(STT/TTS),则需要在 `.env` 中配置相关参数。我们会尽快将此部分配置前端化,敬请期待。 +> **💡 提示**: `deploy.sh` 会在 `docker/.env` 不存在时自动从 `.env.example` 复制一份。若无特殊需求,可直接部署;若需要配置语音模型(STT/TTS),请部署前或部署后修改 `docker/.env` 中的相关参数。 ### 2. 部署选项 @@ -29,23 +31,46 @@ cp .env.example .env # 复制环境变量配置文件 bash deploy.sh ``` -执行此命令后,系统会提供两个不同的版本供您选择: +执行此命令后,系统会通过 Bash TUI 选择部署参数。可使用方向键或 `j/k` 移动,空格切换多选项,回车确认,`b`/Backspace 返回上一步,`q` 退出。 + +**组件组合:** +- **infrastructure(必选)**: Elasticsearch、PostgreSQL、Redis、MinIO +- **application(默认选中,可取消)**: config、runtime、mcp、northbound、web +- **data-process(可选)**: 数据处理服务 +- **supabase(可选)**: 启用用户、租户和认证能力 +- **terminal(可选)**: 启用 OpenSSH 终端工具 +- **monitoring(可选)**: 启用观测组件,选择后会继续选择 provider + +**端口策略:** +- **development(默认)**: 暴露调试和内部服务端口,便于本地排查 +- **production**: 仅发布生产入口端口 + +**镜像来源:** +- **general(默认)**: 使用标准公开镜像仓库 +- **mainland**: 使用中国大陆镜像源 +- **local-latest**: 使用本地 `latest` 镜像,避免拉取 Nexent 应用镜像 + +您也可以通过参数跳过交互: -**版本选择:** -- **Speed version(轻量快速部署,默认)**: 快速启动核心功能,适合个人用户和小团队使用 -- **Full version(完整功能版)**: 提供企业级租户管理和资源隔离等高级功能,但安装时间略长,适合企业用户 +```bash +# 默认组件组合,development 端口策略,标准镜像源 +bash deploy.sh --components infrastructure,application --port-policy development --image-source general + +# 启用用户/租户能力、数据处理和终端工具 +bash deploy.sh --components infrastructure,application,supabase,data-process,terminal + +# 使用中国大陆镜像源 +bash deploy.sh --image-source mainland + +# 使用本地 latest 镜像 +bash deploy.sh --image-source local-latest +``` -**部署模式:** -- **开发模式 (默认)**: 暴露所有服务端口以便调试 -- **基础设施模式**: 仅启动基础设施服务 -- **生产模式**: 为安全起见仅暴露端口 3000 +部署成功后,非敏感部署选项会保存到 `docker/deploy.options`。下次交互部署时可选择复用本地配置或重新全量配置。 -**可选组件:** -- **终端工具**: 启用 openssh-server 供 AI 智能体执行 shell 命令 -- **区域优化**: 中国大陆用户可使用优化的镜像源 +#### ⚠️ 重要提示 -### ⚠️ 重要提示 1️⃣ **首次部署 v1.8.0 及以上版本时**,需特别留意 Docker 日志中输出的 `suadmin` 超级管理员账号信息。该账号为系统最高权限账户,密码仅在首次生成时显示,后续无法再次查看,请务必妥善保存。 > 该账号仅用于权限管理,无权开发智能体或创建知识库。请登录该账号,依次完成:访问租户资源→创建租户→创建租户管理员,然后使用租户管理员账号登录,即可使用全部功能。角色权限详情参见 [用户管理](../user-guide/user-management) @@ -55,16 +80,16 @@ bash deploy.sh docker exec -it supabase-db-mini bash psql -U postgres select id, email from auth.users; -#获取到suadmin@nexent.com账号的user_id -delete from auth.users where id = '你的user_id'; -delete from auth.identities where user_id = '你的user_id'; +# 获取 suadmin@nexent.com 账号的 user_id +delete from auth.users where id = 'your_user_id'; +delete from auth.identities where user_id = 'your_user_id'; -#Step2:在nexent的数据库中删除su账号记录 +# Step 2: 在 nexent 数据库中删除 su 账号记录 docker exec -it nexent-postgresql bash psql -U root -d nexent -delete from nexent.user_tenant_t where user_id = '你的user_id'; +delete from nexent.user_tenant_t where user_id = 'your_user_id'; -#Step3:重新部署并记录su账号密码 +# Step 3: 重新部署并记录 su 账号密码 ``` ### 3. 访问您的安装 @@ -73,26 +98,57 @@ delete from nexent.user_tenant_t where user_id = '你的user_id'; 2. 登录超级管理员账号 3. 访问租户资源 → 创建租户及租户管理员 4. 登录租户管理员账号 -2. 参考 [用户指南](../user-guide/home-page) 进行智能体的开发 +5. 参考 [用户指南](../user-guide/home-page) 进行智能体的开发 ## 📦 服务架构 -Nexent 采用微服务架构,包含以下核心服务: +Nexent 采用微服务架构,通过 Docker Compose 进行部署。 -**核心服务:** -- `nexent`: 后端服务 (端口 5010) -- `nexent-web`: 前端界面 (端口 3000) -- `nexent-data-process`: 数据处理服务 (端口 5012) +**应用服务:** +| 服务 | 描述 | 默认端口 | +|---------|-------------|--------------| +| nexent | 后端服务 | 5010 | +| nexent-web | Web 前端 | 3000 | +| nexent-data-process | 数据处理服务 | 5012 | +| nexent-northbound | 北向 API 服务 | 5013 | **基础设施服务:** -- `nexent-postgresql`: 数据库 (端口 5434) -- `nexent-elasticsearch`: 搜索引擎 (端口 9210) -- `nexent-minio`: 对象存储 (端口 9010,控制台 9011) -- `redis`: 缓存服务 (端口 6379) +| 服务 | 描述 | +|---------|-------------| +| nexent-postgresql | 关系型数据库 | +| nexent-elasticsearch | 搜索引擎和索引服务 | +| nexent-minio | S3 兼容对象存储 | +| redis | 缓存层 | + +**Supabase 服务(选择 `supabase` 组件时):** +| 服务 | 描述 | +|---------|-------------| +| supabase-kong | API 网关 | +| supabase-auth | 认证服务 | +| supabase-db-mini | 数据库服务 | **可选服务:** -- `nexent-openssh-server`: 终端工具的 SSH 服务器 (端口 2222) +| 服务 | 描述 | +|---------|-------------| +| nexent-openssh-server | AI 智能体 SSH 终端 | +| nexent-monitoring | 可选观测组件 | + +## 💾 数据持久化 + +Nexent 使用 Docker volumes 进行数据持久化: + +| 数据类型 | Volume 名称 | 默认宿主机路径 | +|-----------|------------------|-------------------| +| PostgreSQL | nexent-postgresql-data | `{dataDir}/postgresql` | +| Elasticsearch | nexent-elasticsearch-data | `{dataDir}/elasticsearch` | +| Redis | nexent-redis-data | `{dataDir}/redis` | +| MinIO | nexent-minio-data | `{dataDir}/minio` | +| Supabase DB(选择 supabase 时)| nexent-supabase-db-data | `{dataDir}/supabase-db` | + +默认 `dataDir` 为 `./volumes`(可在 `.env` 中配置 `ROOT_DIR`)。 + +卸载由 `docker/uninstall.sh` 负责。默认交互询问是否删除持久化数据;也可使用 `--delete-volumes true|false`、`--remove-volumes`、`--keep-volumes`,或使用 `bash uninstall.sh delete-all` 删除容器和持久化数据。 ## 🔌 端口映射 @@ -101,6 +157,7 @@ Nexent 采用微服务架构,包含以下核心服务: | Web 界面 | 3000 | 3000 | 主应用程序访问 | | 后端 API | 5010 | 5010 | 后端服务 | | 数据处理 | 5012 | 5012 | 数据处理 API | +| 北向 API | 5013 | 5013 | 北向接口服务 (A2A/MCP 集成) | | PostgreSQL | 5432 | 5434 | 数据库连接 | | Elasticsearch | 9200 | 9210 | 搜索引擎 API | | MinIO API | 9000 | 9010 | 对象存储 API | @@ -110,6 +167,237 @@ Nexent 采用微服务架构,包含以下核心服务: 有关完整的端口映射详细信息,请参阅我们的 [开发容器指南](../deployment/devcontainer.md#port-mapping)。 +## 🔧 高级配置 + +### 监控配置 + +部署时在脚本交互界面中选择 `monitoring` 组件即可启用 OpenTelemetry 监控。脚本会同步更新 `docker/.env` 中的 `ENABLE_TELEMETRY`、`MONITORING_PROVIDER` 和 `MONITORING_DASHBOARD_URL`,并启动 `docker/docker-compose-monitoring.yml` 中对应的观测组件。 + +```bash +cd nexent/docker +bash deploy.sh +``` + +如果本地已有 `docker/deploy.options`,脚本会询问是否复用本地配置。请选择重新配置/覆盖本地配置,然后在组件选择界面勾选 `monitoring`,再在 provider 选择界面手动选择 `grafana`、`phoenix`、`langfuse`、`langsmith`、`zipkin` 或 `otlp`。 + +支持的 provider: + +| Provider | 用途 | 默认访问地址 | +|----------|------|--------------| +| `otlp` | 仅启动 OpenTelemetry Collector,适合转发到外部平台 | 无 Dashboard | +| `phoenix` | 本地 Phoenix 追踪分析 | `http://localhost:6006` | +| `langfuse` | 本地 Langfuse 观测栈 | `http://localhost:3001` | +| `langsmith` | 转发到托管 LangSmith | `https://smith.langchain.com/` | +| `grafana` | 本地 Grafana + Tempo | `http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1` | +| `zipkin` | 本地 Zipkin | `http://localhost:9411` | + +如需调整端口、镜像版本或 Langfuse 初始账号,请先复制并编辑监控环境变量: + +```bash +cp docker/monitoring/monitoring.env.example docker/monitoring/monitoring.env +``` + +常用变量: + +| 变量 | 说明 | +|------|------| +| `MONITORING_PROVIDER` | 默认监控 provider;部署脚本中手动选择 provider 后会同步更新 | +| `OTEL_COLLECTOR_HTTP_PORT` / `OTEL_COLLECTOR_GRPC_PORT` | Collector 对外暴露的 OTLP HTTP/gRPC 端口 | +| `LANGSMITH_API_KEY` / `LANGSMITH_PROJECT` | LangSmith 转发配置 | +| `LANGFUSE_INIT_USER_EMAIL` / `LANGFUSE_INIT_USER_PASSWORD` | 本地 Langfuse 初始管理员账号 | +| `GRAFANA_ADMIN_USER` / `GRAFANA_ADMIN_PASSWORD` | 本地 Grafana 管理员账号 | + +选择 `langsmith` provider 前,请先在 `docker/monitoring/monitoring.env` 中配置 `LANGSMITH_API_KEY`。如果只需要连接已有外部 Collector,也可以在 `docker/.env` 中调整 OTLP 目标地址: + +```bash +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +MONITORING_DASHBOARD_URL= +``` + +> **生产建议**:请替换示例中的默认密码、密钥和 Langfuse `ENCRYPTION_KEY`,并通过反向代理或防火墙限制 Dashboard、Collector 端口的访问范围。 + +### OAuth 登录配置 + +OAuth 登录依赖 `supabase` 组件。启用第三方登录时,请同时部署 `supabase`,并将 `OAUTH_CALLBACK_BASE_URL` 设置为浏览器可访问的 Nexent Web 地址。 + +```bash +bash deploy.sh --components infrastructure,application,supabase +``` + +Docker 部署在 `docker/.env` 中配置 OAuth: + +```bash +# Web 入口地址。回调完整路径会自动拼接为: +# {OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider= +OAUTH_CALLBACK_BASE_URL=http://localhost:3000 + +# GitHub OAuth +GITHUB_OAUTH_CLIENT_ID= +GITHUB_OAUTH_CLIENT_SECRET= + +# GDE OAuth +GDE_URL= +GDE_OAUTH_CLIENT_ID= +GDE_OAUTH_CLIENT_SECRET= + +# Link App OAuth +LINK_APP_URL= +LINK_APP_OAUTH_CLIENT_ID= +LINK_APP_OAUTH_CLIENT_SECRET= + +# WeChat OAuth +ENABLE_WECHAT_OAUTH=false +WECHAT_OAUTH_APP_ID= +WECHAT_OAUTH_APP_SECRET= + +# 访问 OAuth provider 时的 TLS 校验 +OAUTH_SSL_VERIFY=true +OAUTH_CA_BUNDLE= +``` + +Provider 启用规则: + +| Provider | 必填变量 | 回调地址 | +|----------|----------|----------| +| GitHub | `GITHUB_OAUTH_CLIENT_ID`、`GITHUB_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=github` | +| GDE | `GDE_URL`、`GDE_OAUTH_CLIENT_ID`、`GDE_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=gde` | +| Link App | `LINK_APP_URL`、`LINK_APP_OAUTH_CLIENT_ID`、`LINK_APP_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=link_app` | +| WeChat | `ENABLE_WECHAT_OAUTH=true`、`WECHAT_OAUTH_APP_ID`、`WECHAT_OAUTH_APP_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=wechat` | + +本地默认回调示例为 `http://localhost:3000/api/user/oauth/callback?provider=github`。生产环境应改为公网 HTTPS 域名,例如 `https://nexent.example.com/api/user/oauth/callback?provider=github`,并在 OAuth provider 控制台中登记相同地址。 + +### CAS 登录配置 + +CAS SSO 不依赖 `supabase`。启用 CAS 时,请将 `CAS_CALLBACK_BASE_URL` 设置为浏览器可访问的 Nexent Web 地址,且不要带结尾 `/`。`CAS_SERVER_URL` 是 CAS Server 根地址,也不要带结尾 `/`。 + +Docker 部署在 `docker/.env` 中配置 CAS: + +```bash +CAS_ENABLED=true +CAS_SERVER_URL=http://localhost:8080/cas +CAS_VALIDATE_PATH=/p3/serviceValidate +CAS_CALLBACK_BASE_URL=http://localhost:3000 + +# disabled: 禁用 CAS 登录入口和自动跳转 +# button: 在登录页显示 CAS 登录按钮 +# force: 未登录访问 Nexent 时自动跳转到 CAS +CAS_LOGIN_MODE=force + +# 为空时使用 ;填写 userName 时从 取用户标识 +CAS_USER_ATTRIBUTE= +CAS_EMAIL_ATTRIBUTE=email +CAS_ROLE_ATTRIBUTE=role +CAS_TENANT_ATTRIBUTE=tenant_id +CAS_ROLE_MAP_JSON={"cas-admin":"ADMIN","cas-user":"USER"} +CAS_SESSION_MAX_AGE_SECONDS=3600 +LOCAL_SESSION_MAX_AGE_SECONDS=3600 +CAS_RENEW_BEFORE_SECONDS=300 +CAS_RENEW_TIMEOUT_SECONDS=10 +CAS_SYNTHETIC_EMAIL_DOMAIN=cas.local + +# 为空时 Nexent 主动退出不会调用 CAS Server 登出接口。 +# 可配置为 /logout,系统会基于 CAS_SERVER_URL 拼接。 +CAS_LOGOUT_URL=/logout +CAS_SSL_VERIFY=true +CAS_CA_BUNDLE= +``` + +常用 CAS 地址: + +| 用途 | 地址 | +|------|------| +| Nexent 登录入口 | `{CAS_CALLBACK_BASE_URL}/api/user/cas/login?redirect=/` | +| CAS service 回调 | `{CAS_CALLBACK_BASE_URL}/api/user/cas/callback` | +| CAS 无感续期回调 | `{CAS_CALLBACK_BASE_URL}/api/user/cas/renew_callback` | +| CAS 单点登出回调 | `POST {CAS_CALLBACK_BASE_URL}/api/user/cas/logout_callback` | + +Apereo CAS 使用 JSON Service Registry 时,可以新增一个服务注册文件,例如 `Nexent-10001.json`。文件需要放到 CAS 部署配置的 service registry 目录中,`id` 必须全局唯一。下面是本地 Docker 示例: + +```json +{ + "@class": "org.apereo.cas.services.RegexRegisteredService", + "serviceId": "http://localhost:3000.*", + "name": "Nexent CAS Client", + "id": 10001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://localhost:3000/api/user/cas/logout_callback" +} +``` + +生产环境建议保持 `CAS_SSL_VERIFY=true`;自签名证书优先配置 `CAS_CA_BUNDLE`,仅本地验证时再临时设置 `CAS_SSL_VERIFY=false`。 + +#### CAS对接ModelEngine +当使用CAS协议对接ModelEngine时,可以使用如下配置部署Nexent: +```bash +CAS_ENABLED=true +CAS_SERVER_URL=https://:5443/SSOSvr +CAS_VALIDATE_PATH=/p3/serviceValidate +CAS_CALLBACK_BASE_URL=http://:3000 +CAS_LOGIN_MODE=force +CAS_USER_ATTRIBUTE=userName +CAS_EMAIL_ATTRIBUTE=email +CAS_ROLE_ATTRIBUTE=userType +CAS_TENANT_ATTRIBUTE=tenant_id +CAS_ROLE_MAP_JSON={"1":"ADMIN","3":"DEV"} +CAS_SESSION_MAX_AGE_SECONDS=3600 +LOCAL_SESSION_MAX_AGE_SECONDS=3600 +CAS_RENEW_BEFORE_SECONDS=300 +CAS_RENEW_TIMEOUT_SECONDS=10 +CAS_SYNTHETIC_EMAIL_DOMAIN=cas.local +CAS_LOGOUT_URL=/logout?service=http://:3000 +CAS_SSL_VERIFY=false +CAS_CA_BUNDLE= +``` + +同时,需要进入oms容器添加cas client的注册配置文件,参考如下步骤: +```bash +# 创建注册配置文件,将json部分输入文件并保存 +vim Nexent-10000001.json +{ + "@class": "org.apereo.cas.services.CasRegisteredService", + "serviceId": "http://:3000.*", + "name": "Nexent CAS Client", + "id": 1000001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://:3000/api/user/cas/logout_callback" +} + +# 执行如下命令,将配置文件拷贝到容器中 +kubectl cp Nexent-10000001.json model-engine/$(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}'):/opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json +kubectl exec -i -n model-engine $(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}') -- chown tomcat:fusioncube /opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json +``` + +### 北向接口配置 (NORTHBOUND_EXTERNAL_URL) + +如果您需要使用以下功能,需要配置 `NORTHBOUND_EXTERNAL_URL` 环境变量: + +1. **A2A 协议集成** - 第三方系统通过 A2A 协议调用 Nexent 智能体 +2. **MCP 工具访问** - 使用第三方 MCP 工具访问 Nexent 文档文件等资源 + +**配置方法:** + +在 `.env` 文件中设置公网可访问的 URL: + +```bash +# 格式:协议://主机:端口/api +# 本地开发(默认): +NORTHBOUND_EXTERNAL_URL=http://localhost:5013/api + +# 生产环境 - 使用您的公网 IP 或域名: +NORTHBOUND_EXTERNAL_URL=http://your-public-ip:5013/api +# 或 +NORTHBOUND_EXTERNAL_URL=https://api.yourdomain.com/api +``` + +> **重要**: URL 必须包含 `/api` 后缀,因为 Northbound 服务使用 FastAPI 的 `root_path="/api"` 配置。 + ## 💡 需要帮助 - 浏览 [常见问题](./faq) 了解常见安装问题 @@ -120,4 +408,4 @@ Nexent 采用微服务架构,包含以下核心服务: 想要从源码构建或添加新功能?查看 [Docker 构建指南](../deployment/docker-build) 获取详细说明。 -有关详细的安装说明和自定义选项,请查看我们的 [开发者指南](../developer-guide/overview)。 \ No newline at end of file +有关详细的安装说明和自定义选项,请查看我们的 [开发者指南](../developer-guide/overview)。 diff --git a/doc/docs/zh/quick-start/kubernetes-installation.md b/doc/docs/zh/quick-start/kubernetes-installation.md index be7857fb2..7229f1ea8 100644 --- a/doc/docs/zh/quick-start/kubernetes-installation.md +++ b/doc/docs/zh/quick-start/kubernetes-installation.md @@ -35,21 +35,29 @@ cd nexent/k8s/helm 运行部署脚本: ```bash -./deploy-helm.sh apply +./deploy.sh ``` -执行此命令后,系统会提示您选择配置选项: +执行此命令后,系统会通过 Bash TUI 选择配置选项。可使用方向键或 `j/k` 移动,空格切换多选项,回车确认,`b`/Backspace 返回上一步,`q` 退出。 -**版本选择:** -- **Speed version(轻量快速部署,默认)**: 快速启动核心功能,适合个人用户和小团队使用 -- **Full version(完整功能版)**: 提供企业级租户管理和资源隔离等高级功能,包含 Supabase 认证服务 +**组件组合:** +- **infrastructure(必选)**: Elasticsearch、PostgreSQL、Redis、MinIO +- **application(默认选中,可取消)**: config、runtime、mcp、northbound、web +- **data-process(可选)**: 数据处理服务 +- **supabase(可选)**: 启用用户、租户和认证能力 +- **terminal(可选)**: 启用 OpenSSH 终端工具 +- **monitoring(可选)**: 启用观测组件,选择后会继续选择 provider -**镜像源选择:** -- **中国大陆**: 使用优化的区域镜像源,加快镜像拉取速度 -- **通用**: 使用标准 Docker Hub 镜像源 +**端口策略:** +- **development(默认)**: 使用 NodePort 暴露 Web 和调试/内部服务 +- **production**: 内部服务使用 ClusterIP,仅暴露生产入口 -**可选组件:** -- **终端工具**: 启用 openssh-server 供 AI 智能体执行 shell 命令 +**镜像来源:** +- **general(默认)**: 使用标准公开镜像仓库 +- **mainland**: 使用中国大陆镜像源 +- **local-latest**: 使用本地 `latest` 镜像,并将 Nexent 应用镜像的拉取策略设为本地优先 + +部署成功后,非敏感部署选项会保存到 `k8s/helm/deploy.options`。下次交互部署时可选择复用本地配置或重新全量配置。 ### ⚠️ 重要提示 @@ -72,7 +80,7 @@ kubectl exec -it -n nexent deploy/nexent-postgresql -- psql -U root -d nexent -c "DELETE FROM nexent.user_tenant_t WHERE user_id='your_user_id';" # Step 3: 重新部署并记录 su 账号密码 -./deploy-helm.sh apply +./deploy.sh ``` ### 4. 访问您的安装 @@ -113,7 +121,7 @@ Nexent 采用微服务架构,通过 Helm Chart 进行部署: | nexent-redis | 缓存层 | | nexent-minio | S3 兼容对象存储 | -**Supabase 服务(完整版独有):** +**Supabase 服务(选择 `supabase` 组件时):** | 服务 | 描述 | |---------|-------------| | nexent-supabase-kong | API 网关 | @@ -124,13 +132,14 @@ Nexent 采用微服务架构,通过 Helm Chart 进行部署: | 服务 | 描述 | |---------|-------------| | nexent-openssh-server | AI 智能体 SSH 终端 | +| nexent-monitoring | 可选观测组件 | ## 🔌 端口映射 | 服务 | 内部端口 | NodePort | 描述 | |---------|---------------|----------|-------------| | Web 界面 | 3000 | 30000 | 主应用程序访问 | -| Northbound API | 5010 | 30013 | 北向 API 服务 | +| Northbound API | 5013 | 30013 | 北向 API 服务 | | SSH 服务器 | 22 | 30022 | 终端工具访问 | 内部服务通信使用 Kubernetes 内部 DNS(例如 `http://nexent-config:5010`)。 @@ -141,34 +150,261 @@ Nexent 使用 PersistentVolume 进行数据持久化: | 数据类型 | PersistentVolume | 默认宿主机路径 | |-----------|------------------|-------------------| -| Elasticsearch | nexent-elasticsearch-pv | `{dataDir}/elasticsearch` | -| PostgreSQL | nexent-postgresql-pv | `{dataDir}/postgresql` | -| Redis | nexent-redis-pv | `{dataDir}/redis` | -| MinIO | nexent-minio-pv | `{dataDir}/minio` | -| Supabase DB(完整版)| nexent-supabase-db-pv | `{dataDir}/supabase-db` | +| Elasticsearch | nexent-elasticsearch-pv | `/var/lib/nexent-data/nexent-elasticsearch` | +| PostgreSQL | nexent-postgresql-pv | `/var/lib/nexent-data/nexent-postgresql` | +| Redis | nexent-redis-pv | `/var/lib/nexent-data/nexent-redis` | +| MinIO | nexent-minio-pv | `/var/lib/nexent-data/nexent-minio` | +| Supabase DB(选择 supabase 时)| nexent-supabase-db-pv | `/var/lib/nexent-data/nexent-supabase-db` | -默认 `dataDir` 为 `/var/lib/nexent-data`(可在 `values.yaml` 中配置)。 +卸载 Helm release 默认不会删除本地 hostPath 数据。可使用 `./uninstall.sh --delete-local-data true` 删除 `/var/lib/nexent-data/nexent-*` 下的 Nexent 本地卷内容,使用 `--keep-local-data` 显式保留。 ## 🔧 部署命令 ```bash # 交互式部署 -./deploy-helm.sh apply +./deploy.sh + +# 非交互式部署默认组件 +./deploy.sh --components infrastructure,application --port-policy development --image-source general + +# 启用用户/租户能力、数据处理和终端工具 +./deploy.sh --components infrastructure,application,supabase,data-process,terminal # 使用中国大陆镜像源部署 -./deploy-helm.sh apply --is-mainland Y +./deploy.sh --image-source mainland -# 部署完整版本(包含 Supabase) -./deploy-helm.sh apply --deployment-version full +# 使用本地 latest 镜像 +./deploy.sh --image-source local-latest # 仅清理 Helm 状态(修复卡住的发布) -./deploy-helm.sh clean +./uninstall.sh clean + +# 卸载,默认保留本地数据;交互确认是否删除 namespace 和本地数据 +./uninstall.sh + +# 卸载并删除 namespace +./uninstall.sh --delete-namespace true + +# 卸载并删除本地 hostPath 数据 +./uninstall.sh --delete-local-data true + +# 完全卸载,包括 namespace 和本地 hostPath 数据 +./uninstall.sh delete-all + +# 完全卸载但保留本地 hostPath 数据 +./uninstall.sh delete-all --keep-local-data +``` + +## 🔧 高级配置 + +### 监控配置 + +Kubernetes 部署通过脚本交互界面中的 `monitoring` 组件启用监控。部署脚本会生成运行时 Helm values,设置 `global.monitoring.enabled`、`global.monitoring.provider`、`global.monitoring.dashboardUrl`,并启用 `nexent-monitoring` 子 Chart。 + +```bash +cd nexent/k8s/helm +./deploy.sh +``` + +如果本地已有 `k8s/helm/deploy.options`,脚本会询问是否复用本地配置。请选择重新配置/覆盖本地配置,然后在组件选择界面勾选 `monitoring`,再在 provider 选择界面手动选择 `grafana`、`phoenix`、`langfuse`、`langsmith`、`zipkin` 或 `otlp`。 + +支持的 provider: + +| Provider | 用途 | 默认访问地址 | +|----------|------|--------------| +| `otlp` | 仅启动 OpenTelemetry Collector,适合转发到外部平台 | 无 Dashboard | +| `phoenix` | 本地 Phoenix 追踪分析 | `http://localhost:30006` | +| `langfuse` | 本地 Langfuse 观测栈 | `http://localhost:30001` | +| `langsmith` | 转发到托管 LangSmith | `https://smith.langchain.com/` | +| `grafana` | 本地 Grafana + Tempo | `http://localhost:30002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1` | +| `zipkin` | 本地 Zipkin | `http://localhost:30011` | + +选择 `langsmith` provider 前,请先在 `k8s/helm/nexent/values.yaml` 中配置 `global.monitoring.langsmithApiKey` 和 `global.monitoring.langsmithProject`。如需修改本地 Grafana、Langfuse 或各 Dashboard 的端口,也建议先在 values 文件中调整,再通过部署脚本重新配置并手动选择 `monitoring`。 + +常用 Helm values: + +| Values | 说明 | +|--------|------| +| `global.monitoring.enabled` | 是否让 Nexent 后端开启 OpenTelemetry 上报 | +| `global.monitoring.provider` | 后端 provider 标识:`otlp`、`phoenix`、`langfuse`、`langsmith`、`grafana`、`zipkin` | +| `global.monitoring.otlpEndpoint` | 后端 OTLP HTTP 上报地址,默认 `http://nexent-otel-collector:4318` | +| `global.monitoring.dashboardUrl` | 前端监控入口地址,留空则隐藏入口 | +| `global.monitoring.traceContentMode` | Trace 内容采集模式:`summary`、`metrics`、`full` | +| `nexent-monitoring..service.nodePort` | 调整各 Dashboard 的 NodePort | +| `nexent-monitoring.langfuse.init.*` | 本地 Langfuse 初始组织、项目和管理员账号 | +| `nexent-monitoring.grafana.adminUser` / `adminPassword` | 本地 Grafana 管理员账号 | + +查看监控组件状态: + +```bash +kubectl get pods -n nexent | grep -E 'otel|phoenix|grafana|tempo|zipkin|langfuse' +kubectl get svc -n nexent | grep -E 'otel|phoenix|grafana|zipkin|langfuse' +``` -# 卸载但保留数据 -./deploy-helm.sh delete +> **生产建议**:请替换默认密码、密钥和 Langfuse `encryptionKey`,并将 Dashboard Service 改为 ClusterIP 或通过受控 Ingress 暴露。 -# 完全卸载包括所有数据 -./deploy-helm.sh delete-all +### OAuth 登录配置 + +OAuth 登录依赖 `supabase` 组件。启用第三方登录时,请同时部署 `supabase`,并将 `config.oauth.callbackBaseUrl` 设置为浏览器可访问的 Nexent Web 地址。 + +```bash +./deploy.sh --components infrastructure,application,supabase +``` + +Kubernetes 部署通过 `nexent-common` 的 `config.oauth.*` values 写入后端环境变量: + +```bash +helm upgrade --install nexent nexent \ + --namespace nexent --create-namespace \ + --set global.deploymentComponents.supabase=true \ + --set nexent-supabase-kong.enabled=true \ + --set nexent-supabase-auth.enabled=true \ + --set nexent-supabase-db.enabled=true \ + --set nexent-common.config.oauth.callbackBaseUrl=https://nexent.example.com \ + --set nexent-common.config.oauth.githubClientId=your_github_client_id \ + --set nexent-common.config.oauth.githubClientSecret=your_github_client_secret +``` + +可配置的 OAuth values: + +| Values | 对应环境变量 | 说明 | +|--------|--------------|------| +| `nexent-common.config.oauth.callbackBaseUrl` | `OAUTH_CALLBACK_BASE_URL` | Web 入口地址,回调路径会自动拼接 | +| `nexent-common.config.oauth.githubClientId` | `GITHUB_OAUTH_CLIENT_ID` | GitHub OAuth Client ID | +| `nexent-common.config.oauth.githubClientSecret` | `GITHUB_OAUTH_CLIENT_SECRET` | GitHub OAuth Client Secret | +| `nexent-common.config.oauth.gdeUrl` | `GDE_URL` | GDE OAuth 服务地址 | +| `nexent-common.config.oauth.gdeClientId` | `GDE_OAUTH_CLIENT_ID` | GDE OAuth Client ID | +| `nexent-common.config.oauth.gdeClientSecret` | `GDE_OAUTH_CLIENT_SECRET` | GDE OAuth Client Secret | +| `nexent-common.config.oauth.enableWechat` | `ENABLE_WECHAT_OAUTH` | 是否启用 WeChat OAuth | +| `nexent-common.config.oauth.wechatClientId` | `WECHAT_OAUTH_APP_ID` | WeChat App ID | +| `nexent-common.config.oauth.wechatClientSecret` | `WECHAT_OAUTH_APP_SECRET` | WeChat App Secret | +| `nexent-common.config.oauth.sslVerify` | `OAUTH_SSL_VERIFY` | 访问 OAuth provider 时是否校验证书 | +| `nexent-common.config.oauth.caBundle` | `OAUTH_CA_BUNDLE` | 自定义 CA bundle 路径 | + +Provider 回调地址: + +| Provider | 回调地址 | +|----------|----------| +| GitHub | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=github` | +| GDE | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=gde` | +| WeChat | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=wechat` | + +本地 NodePort 默认回调示例为 `http://localhost:30000/api/user/oauth/callback?provider=github`。生产环境应改为公网 HTTPS 域名,并在 OAuth provider 控制台中登记相同地址。 + +### CAS 登录配置 + +CAS SSO 不依赖 `supabase`。启用 CAS 时,请将 `nexent-common.config.cas.callbackBaseUrl` 设置为浏览器可访问的 Nexent Web 地址,且不要带结尾 `/`。`nexent-common.config.cas.serverUrl` 是 CAS Server 根地址,也不要带结尾 `/`。 + +Kubernetes 部署通过 `nexent-common` 的 `config.cas.*` values 写入后端环境变量: + +```bash +helm upgrade --install nexent nexent \ + --namespace nexent --create-namespace \ + --set nexent-common.config.cas.enabled=true \ + --set nexent-common.config.cas.serverUrl=https://cas.example.com/cas \ + --set nexent-common.config.cas.callbackBaseUrl=https://nexent.example.com \ + --set nexent-common.config.cas.loginMode=force \ + --set nexent-common.config.cas.logoutUrl=/logout +``` + +可配置的 CAS values: + +| Values | 对应环境变量 | 说明 | +|--------|--------------|------| +| `nexent-common.config.cas.enabled` | `CAS_ENABLED` | 是否启用 CAS | +| `nexent-common.config.cas.serverUrl` | `CAS_SERVER_URL` | CAS Server 根地址 | +| `nexent-common.config.cas.validatePath` | `CAS_VALIDATE_PATH` | serviceValidate 路径,默认 `/p3/serviceValidate` | +| `nexent-common.config.cas.callbackBaseUrl` | `CAS_CALLBACK_BASE_URL` | Web 入口地址,CAS 回调路径会自动拼接 | +| `nexent-common.config.cas.loginMode` | `CAS_LOGIN_MODE` | `disabled`、`button` 或 `force` | +| `nexent-common.config.cas.userAttribute` | `CAS_USER_ATTRIBUTE` | 用户标识属性。为空时使用 `` | +| `nexent-common.config.cas.emailAttribute` | `CAS_EMAIL_ATTRIBUTE` | 邮箱属性 | +| `nexent-common.config.cas.roleAttribute` | `CAS_ROLE_ATTRIBUTE` | 角色属性 | +| `nexent-common.config.cas.tenantAttribute` | `CAS_TENANT_ATTRIBUTE` | 租户属性 | +| `nexent-common.config.cas.roleMapJson` | `CAS_ROLE_MAP_JSON` | CAS 角色到 Nexent 角色的 JSON 映射 | +| `nexent-common.config.cas.sessionMaxAgeSeconds` | `CAS_SESSION_MAX_AGE_SECONDS` | CAS 本地会话最长有效期 | +| `nexent-common.config.cas.localSessionMaxAgeSeconds` | `LOCAL_SESSION_MAX_AGE_SECONDS` | Nexent 本地会话有效期 | +| `nexent-common.config.cas.renewBeforeSeconds` | `CAS_RENEW_BEFORE_SECONDS` | 距离过期多少秒内触发无感续期 | +| `nexent-common.config.cas.renewTimeoutSeconds` | `CAS_RENEW_TIMEOUT_SECONDS` | 无感续期等待超时时间 | +| `nexent-common.config.cas.syntheticEmailDomain` | `CAS_SYNTHETIC_EMAIL_DOMAIN` | CAS 未返回邮箱时生成邮箱使用的域名 | +| `nexent-common.config.cas.logoutUrl` | `CAS_LOGOUT_URL` | CAS 登出地址。为空时 Nexent 主动退出不调用 CAS Server 登出接口 | +| `nexent-common.config.cas.sslVerify` | `CAS_SSL_VERIFY` | 访问 CAS Server 时是否校验证书 | +| `nexent-common.config.cas.caBundle` | `CAS_CA_BUNDLE` | 自定义 CA bundle 路径 | + +常用 CAS 地址: + +| 用途 | 地址 | +|------|------| +| Nexent 登录入口 | `{CAS_CALLBACK_BASE_URL}/api/user/cas/login?redirect=/` | +| CAS service 回调 | `{CAS_CALLBACK_BASE_URL}/api/user/cas/callback` | +| CAS 无感续期回调 | `{CAS_CALLBACK_BASE_URL}/api/user/cas/renew_callback` | +| CAS 单点登出回调 | `POST {CAS_CALLBACK_BASE_URL}/api/user/cas/logout_callback` | + +Apereo CAS 使用 JSON Service Registry 时,可以新增一个服务注册文件,例如 `Nexent-10001.json`。文件需要放到 CAS 部署配置的 service registry 目录中,`id` 必须全局唯一。本地 NodePort 示例: + +```json +{ + "@class": "org.apereo.cas.services.RegexRegisteredService", + "serviceId": "http://localhost:30000.*", + "name": "Nexent CAS Client", + "id": 10001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://localhost:30000/api/user/cas/logout_callback" +} +``` + +生产环境建议保持 `CAS_SSL_VERIFY=true`;自签名证书优先配置 `CAS_CA_BUNDLE`,仅本地验证时再临时设置 `CAS_SSL_VERIFY=false`。 + +#### CAS 对接 ModelEngine + +当使用 CAS 协议对接 ModelEngine 时,建议通过 values 文件配置 Nexent,避免 `CAS_ROLE_MAP_JSON` 在命令行中转义复杂。 + +创建 `cas-modelengine-values.yaml`: + +```yaml +nexent-common: + config: + cas: + enabled: true + serverUrl: "https://:5443/SSOSvr" + validatePath: "/p3/serviceValidate" + callbackBaseUrl: "http://:30000" + loginMode: "force" + userAttribute: "userName" + emailAttribute: "email" + roleAttribute: "userType" + tenantAttribute: "tenant_id" + roleMapJson: '{"1":"ADMIN","3":"DEV"}' + sessionMaxAgeSeconds: 3600 + localSessionMaxAgeSeconds: 3600 + renewBeforeSeconds: 300 + renewTimeoutSeconds: 10 + syntheticEmailDomain: "cas.local" + logoutUrl: "/logout?service=http://:30000" + sslVerify: false + caBundle: "" +``` + +同时,需要进入 OMS 容器添加 CAS client 的注册配置文件,参考如下步骤: + +```bash +# 创建注册配置文件,将 JSON 部分输入文件并保存 +vim Nexent-10000001.json +{ + "@class": "org.apereo.cas.services.CasRegisteredService", + "serviceId": "http://:30000.*", + "name": "Nexent CAS Client", + "id": 1000001, + "description": "Nexent CAS SSO client", + "evaluationOrder": 1, + "logoutType": "BACK_CHANNEL", + "logoutUrl": "http://:30000/api/user/cas/logout_callback" +} + +# 执行如下命令,将配置文件拷贝到容器中 +kubectl cp Nexent-10000001.json model-engine/$(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}'):/opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json +kubectl exec -i -n model-engine $(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}') -- chown tomcat:fusioncube /opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json ``` ## 🔍 故障排查 diff --git a/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md b/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md index 43f5c1d49..f2ec9226a 100644 --- a/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md +++ b/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md @@ -15,7 +15,7 @@ 更新之前,先记录下当前部署的版本和数据目录信息。 - 当前部署版本信息的位置:`backend/consts/const.py` 中的 `APP_VERSION` -- 数据目录信息的位置:`k8s/helm/nexent/values.yaml` 中的 `global.dataDir` +- 本地卷目录信息的位置:各 Helm 子 chart 的 `storage.hostPath`,默认位于 `/var/lib/nexent-data/nexent-*` **git 方式下载的代码** @@ -28,7 +28,7 @@ git pull **zip 包等方式下载的代码** 1. 需要去 GitHub 上重新下载一份最新代码,并解压缩。 -2. 将之前执行部署脚本目录下 `k8s/helm` 目录中的 `.deploy.options` 文件拷贝到新代码目录的 `k8s/helm` 目录中。(如果不存在该文件则忽略此步骤)。 +2. 将之前执行部署脚本目录下 `k8s/helm` 目录中的 `deploy.options` 文件拷贝到新代码目录的 `k8s/helm` 目录中。(如果不存在该文件则忽略此步骤)。 ## 🔄 步骤二:执行升级 @@ -36,10 +36,10 @@ git pull ```bash cd k8s/helm -./deploy-helm.sh apply +./deploy.sh ``` -脚本会自动检测您之前的部署设置(版本、镜像源等)。如果 `.deploy.options` 文件不存在,系统会提示您输入配置信息。 +脚本会自动检测您之前保存的部署设置(组件组合、端口策略、镜像来源等)。如果 `deploy.options` 文件不存在,系统会提示您输入配置信息。 > 💡 提示 > - 若需配置语音模型(STT/TTS),请在对应的 `values.yaml` 中修改相关配置,或通过命令行参数传入。 @@ -137,7 +137,7 @@ kubectl exec -i $POSTGRES_POD -n nexent -- psql -U root -d nexent < ./sql/v2.0.0 kubectl exec nexent/$POSTGRES_POD -n nexent -- pg_dump -U root nexent > backup_$(date +%F).sql ``` -> - 对于 Supabase 数据库(仅完整版本),请使用 `nexent-supabase-db` Pod: +> - 对于 Supabase 数据库(选择 `supabase` 组件时),请使用 `nexent-supabase-db` Pod: ```bash SUPABASE_POD=$(kubectl get pods -n nexent -l app=nexent-supabase-db -o jsonpath='{.items[0].metadata.name}') diff --git a/doc/docs/zh/quick-start/upgrade-guide.md b/doc/docs/zh/quick-start/upgrade-guide.md index b888e2ada..4f8b429e0 100644 --- a/doc/docs/zh/quick-start/upgrade-guide.md +++ b/doc/docs/zh/quick-start/upgrade-guide.md @@ -37,11 +37,11 @@ git pull bash upgrade.sh ``` -缺少 deploy.options 的情况下,会提示需要手动输入之前部署的一些配置,比如:当前部署版本、数据目录等。按照提示输入之前记录的信息即可。 +缺少 deploy.options 的情况下,会提示需要重新选择部署配置,例如组件组合、端口策略、镜像来源等。按照您之前的部署方式重新选择即可。 > 💡 提示 -> - 默认为快速部署场景,使用 `.env.example`。 -> - 若需配置语音模型(STT/TTS),请提前在 `.env.example` 中补充相关变量,我们将尽快提供前端配置入口。 +> - 若 `docker/.env` 不存在,部署脚本会从 `.env.example` 自动复制一份。 +> - 若需配置语音模型(STT/TTS),请在 `docker/.env` 中补充相关变量,我们将尽快提供前端配置入口。 ## 🌐 步骤三:验证部署 diff --git a/doc/docs/zh/sdk/data-process.md b/doc/docs/zh/sdk/data-process.md index a887c8442..1f1c27fde 100644 --- a/doc/docs/zh/sdk/data-process.md +++ b/doc/docs/zh/sdk/data-process.md @@ -98,6 +98,9 @@ def file_process(self, - `.odt` - OpenDocument文本 - `.pptx` - PowerPoint 2007及更高版本 - `.ppt` - PowerPoint 97-2003版本 +- `.xml` - XML数据文件 +- `.json` - JSON数据文件 +- `.csv` - 逗号分隔值文件 ## 💡 使用示例 diff --git a/doc/docs/zh/sdk/monitoring.md b/doc/docs/zh/sdk/monitoring.md index c592df267..2483b505b 100644 --- a/doc/docs/zh/sdk/monitoring.md +++ b/doc/docs/zh/sdk/monitoring.md @@ -1,289 +1,473 @@ -# 🚀 Nexent LLM 监控系统 +# Nexent Agent 可观测性(OTLP) -专门监控大模型 Token 生成速度和性能的企业级监控解决方案。 +基于 OpenTelemetry OTLP 协议的 AI Agent 企业级可观测性方案。支持对接 Arize Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 等可观测性平台。 -## 📊 系统架构 +## 系统架构 ``` -┌─────────────────────────────────────────────────────────┐ -│ Nexent LLM 监控系统 │ -├─────────────────────────────────────────────────────────┤ -│ │ -│ Nexent API ──► OpenTelemetry ──► Jaeger (链路追踪) │ -│ │ │ │ -│ │ └──────► Prometheus (指标收集) │ -│ │ │ │ -│ └─► OpenAI LLM └──► Grafana (可视化) │ -│ (Token 监控) │ -└─────────────────────────────────────────────────────────┘ +NexentAgent ──► OpenTelemetry SDK ──► OTLP Collector ──► Arize Phoenix / Langfuse / LangSmith / Grafana Tempo / Zipkin / OTLP Backend + │ │ + │ OpenInference 语义约定 │ + │ (llm.*, agent.* 属性) │ + └────────────────────────────────────────┘ ``` -## ⚡ 快速启动(5分钟) +## 快速启动 ```bash -# 1. 启动监控服务 -./docker/start-monitoring.sh +cd docker +[ -f .env ] || cp .env.example .env +cp monitoring/monitoring.env.example monitoring/monitoring.env -# 2. 安装性能监控依赖 -uv sync --extra performance +vim .env +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http -# 3. 启用监控 -export ENABLE_TELEMETRY=true +vim monitoring/monitoring.env +MONITORING_PROVIDER=otlp -# 4. 启动后端服务 -python backend/config_service.py -python backend/runtime_service.py +./start-monitoring.sh --stack collector ``` -## 📊 访问监控界面 +## 本地化部署形态 -| 界面 | 地址 | 用途 | -|------|------|------| -| **Grafana 仪表板** | http://localhost:3005 | LLM 性能监控 | -| **Jaeger 链路追踪** | http://localhost:16686 | 请求链路分析 | -| **Prometheus 指标** | http://localhost:9090 | 原始监控数据 | +`docker/start-monitoring.sh` 支持多种形态,均以 OpenTelemetry Collector 作为统一入口。业务服务只需要把 OTLP 发到 Collector,不需要感知后端平台差异。 -### 🔐 Grafana 登录信息 +| 形态 | 命令 | 包含服务 | 适用场景 | +|------|------|----------|----------| +| `collector` | `./start-monitoring.sh --stack collector` | OpenTelemetry Collector | 只验证埋点、或转发到外部云端平台 | +| `phoenix` | `./start-monitoring.sh --stack phoenix` | Collector + Phoenix | 本地 trace 调试、OpenInference 属性查看、实验分析 | +| `langfuse` | `./start-monitoring.sh --stack langfuse` | Collector + Langfuse Web/Worker + Postgres + ClickHouse + MinIO + Redis | 本地完整 LLMOps 体验、会话/用户/反馈/成本分析 | +| `langsmith` | `./start-monitoring.sh --stack langsmith` | OpenTelemetry Collector | 转发 traces 到在线 LangSmith 平台 | +| `grafana` | `./start-monitoring.sh --stack grafana` | Collector + Grafana + Tempo | 本地 Tempo trace 查询 | +| `zipkin` | `./start-monitoring.sh --stack zipkin` | Collector + Zipkin | 本地 trace 查询 | -首次访问 Grafana (http://localhost:3005) 时需要登录: +也可以在 `docker/monitoring/monitoring.env` 中设置默认形态: +```bash +MONITORING_PROVIDER=phoenix ``` -用户名: admin -密码: admin + +### 本地 Phoenix + +Phoenix 本地部署使用 `arizephoenix/phoenix` 镜像,默认 UI 端口为 `6006`,gRPC OTLP 端口映射为 `4319`,数据持久化到 Docker volume `phoenix-data`。 + +```bash +cd docker +./start-monitoring.sh --stack phoenix ``` -**首次登录后会要求修改密码,可以:** -- 设置新密码(推荐) -- 点击 "Skip" 跳过(开发环境) +访问地址: -**登录后可以看到:** -- 📊 **LLM Performance Dashboard** - 预配置的性能仪表板 -- 📈 **数据源配置** - 自动连接到 Prometheus 和 Jaeger -- 🎯 **实时监控面板** - Token 生成速度、延迟等关键指标 +- Phoenix UI:`http://localhost:6006` +- Collector OTLP HTTP:`http://localhost:4318` +- Collector OTLP gRPC:`localhost:4317` -## 🎯 核心功能特性 +Nexent 后端在 Docker 网络内运行时: -### ⚡ LLM 专用监控 -- **Token 生成速度**: 实时监控每秒生成的 token 数量 -- **TTFT (Time to First Token)**: 首个 token 返回延迟 -- **流式响应分析**: 每个 token 的生成时间戳 -- **模型性能对比**: 不同模型的性能基准 +```bash +ENABLE_TELEMETRY=true +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +``` -### 🔍 分布式链路追踪 -- **完整请求链路**: 从 HTTP 到 LLM 的端到端追踪 -- **性能瓶颈识别**: 自动定位慢查询和异常 -- **错误根因分析**: 快速定位问题根源 +后端直接在宿主机运行时,把 endpoint 改为 `http://localhost:4318`。 -### 🛠️ 开发友好设计 -- **一行代码接入**: 使用装饰器快速添加监控 -- **零依赖降级**: 未安装监控依赖时自动跳过 -- **零感知使用**: 无需手动检查监控状态,自动处理 -- **灵活配置**: 环境变量控制监控行为 +### 本地 Langfuse -## 🛠️ 添加监控到代码 +Langfuse 本地部署使用 v3 架构:Web、Worker、Postgres、ClickHouse、MinIO、Redis。默认 UI 端口为 `3001`,初始化项目和 API Key 来自 `monitoring.env`。 -### 🎯 推荐方式:单例模式 (v2.1+) +```bash +cd docker +./start-monitoring.sh --stack langfuse +``` -```python -# 后端服务中使用 - 直接使用全局配置好的 monitoring_manager -from utils.monitoring import monitoring_manager +访问地址: -# API 端点监控 -@monitoring_manager.monitor_endpoint("my_service.my_function") -async def my_api_function(): - return {"status": "ok"} +- Langfuse UI:`http://localhost:3001` +- 默认管理员:`admin@nexent.local` / `nexent-langfuse-admin` +- 默认项目 Key:`pk-lf-nexent-local` / `sk-lf-nexent-local` -# LLM 调用监控 -@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") -def call_llm(messages): - # 自动获得 Token 级别监控 - return llm_response +启动脚本会在 `LANGFUSE_OTLP_AUTH_HEADER` 为空时自动生成 `Basic base64(public_key:secret_key)`,并让 Collector 将 trace 转发到 `http://langfuse-web:3000/api/public/otel`。本地默认密钥只适合开发验证,生产部署必须替换 `LANGFUSE_NEXTAUTH_SECRET`、`LANGFUSE_SALT`、`LANGFUSE_ENCRYPTION_KEY`、数据库密码和对象存储密钥。 + +### 在线 LangSmith + +LangSmith 支持通过在线 OTLP endpoint 摄取 traces。Nexent 可以先把 OTLP 发到本地 Collector,再由 Collector 转发到 LangSmith,业务服务无需直接保存 LangSmith API Key。 + +```bash +cd docker +vim monitoring/monitoring.env + +MONITORING_PROVIDER=langsmith +LANGSMITH_API_KEY=lsv2_xxx +LANGSMITH_PROJECT=nexent +LANGSMITH_OTLP_TRACES_ENDPOINT=https://api.smith.langchain.com/otel/v1/traces -# 手动添加监控事件 -monitoring_manager.add_span_event("custom_event", {"key": "value"}) -monitoring_manager.set_span_attributes(user_id="123", action="process") +./start-monitoring.sh --stack langsmith ``` -### 📦 SDK中直接使用 +后端在 Docker 网络内运行时: -```python -from nexent.monitor import get_monitoring_manager - -# 获取全局监控管理器 - 在backend已自动配置 -monitor = get_monitoring_manager() - -# 使用装饰器 -@monitor.monitor_llm_call("claude-3", "completion") -def my_llm_function(): - return "response" - -# 或者在业务逻辑中直接使用 -with monitor.trace_llm_request("custom_operation", "my_model") as span: - # 执行业务逻辑 - result = process_data() - monitor.add_span_event("processing_completed") - return result +```bash +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=langsmith +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -### ✨ 全局配置自动化 +LangSmith 当前配置只转发 traces,OTLP metrics 会留在 Collector debug pipeline。若需要后端直接写入 LangSmith,可设置 `OTEL_EXPORTER_OTLP_ENDPOINT=https://api.smith.langchain.com/otel`、`LANGSMITH_API_KEY` 和可选的 `LANGSMITH_PROJECT`。 -监控配置已在 `backend/utils/monitoring.py` 中自动初始化: +### 本地 Grafana + Tempo -```python -# 无需手动配置 - 系统启动时自动完成 -# monitoring_manager 已经使用环境变量配置完成 -from utils.monitoring import monitoring_manager +Grafana 本地部署使用 Grafana Tempo 存储 traces,并启用 Tempo `metrics-generator` 的 `local-blocks` processor 支持 Grafana trace breakdown 中的 TraceQL metrics 查询。Collector 接收 Nexent 后端的 OTLP traces/metrics,其中 traces 通过 OTLP gRPC 转发到 Tempo;OTLP metrics 只进入 Collector debug pipeline,不提供独立指标存储或指标 dashboard。 + +```bash +cd docker +./start-monitoring.sh --stack grafana +``` -# 直接使用即可,无需检查是否开启 -@monitoring_manager.monitor_endpoint("my_function") -def my_function(): - pass +后端 `.env` 使用 `MONITORING_DASHBOARD_URL` 控制前端顶栏监控入口: -# FastAPI应用初始化 -monitoring_manager.setup_fastapi_app(app) +```bash +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=grafana +MONITORING_DASHBOARD_URL=http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1 +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 ``` -### 🔒 自动启停设计 +访问地址: -- **智能监控**: 根据 `ENABLE_TELEMETRY` 环境变量自动启停 -- **零感知使用**: 外部代码无需检查监控状态,直接使用所有功能 -- **优雅降级**: 未开启时静默无效果,开启时正常工作 -- **默认关闭**: 未配置时自动视为关闭状态 +- Grafana UI:`http://localhost:3002` +- 默认管理员:`admin` / `nexent-grafana-admin` +- Tempo API:`http://localhost:3200` -```bash -# 开启监控 -export ENABLE_TELEMETRY=true +Grafana 会自动预置 Tempo datasource,并加载 `Nexent Agent Trace Monitoring` dashboard。Trace 查询入口在 Grafana Explore 中选择 `Tempo` datasource,示例 TraceQL 为 `{ resource.service.name = "nexent-backend" }`。 -# 关闭监控 -export ENABLE_TELEMETRY=false -``` +### 本地 Zipkin -## 📊 核心监控指标 +Zipkin 本地部署使用 `openzipkin/zipkin` 镜像。Collector 接收 Nexent 后端的 OTLP traces/metrics,其中 traces 转发到 Zipkin v2 spans endpoint;OTLP metrics 当前只进入 Collector debug pipeline。 -| 指标 | 描述 | 重要性 | -|------|------|-------| -| `llm_token_generation_rate` | Token 生成速度 (tokens/s) | ⭐⭐⭐ | -| `llm_time_to_first_token_seconds` | 首 Token 延迟 | ⭐⭐⭐ | -| `llm_request_duration_seconds` | 完整请求耗时 | ⭐⭐⭐ | -| `llm_total_tokens` | 输入/输出 Token 数量 | ⭐⭐ | -| `llm_error_count` | LLM 调用错误数 | ⭐⭐⭐ | +```bash +cd docker +./start-monitoring.sh --stack zipkin +``` -## 🔧 环境配置 +后端 `.env`: ```bash -# 添加到 .env 文件 -cat >> .env << EOF ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 -TELEMETRY_SAMPLE_RATE=1.0 # 开发环境,生产环境推荐 0.1 -EOF +MONITORING_PROVIDER=zipkin +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +MONITORING_DASHBOARD_URL=http://localhost:9411 ``` -## 🛠️ 验证系统 +访问地址: -```bash -# 检查指标端点 -curl http://localhost:8000/metrics +- Zipkin UI:`http://localhost:9411` + +## AI 可观测性平台对接 + +### Arize Phoenix -# 验证依赖安装 -python -c "from backend.utils.monitoring import MONITORING_AVAILABLE; print(f'监控可用: {MONITORING_AVAILABLE}')" +Arize Phoenix 提供针对 AI 的专业可观测性,原生支持 OpenInference 语义。 + +**配置:** + +```bash +MONITORING_PROVIDER=phoenix +OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +OTEL_EXPORTER_OTLP_AUTHORIZATION="Bearer YOUR_PHOENIX_API_KEY" +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -## 🆘 故障排除 +**功能特性:** +- LLM 调用链可视化(Prompt/Completion) +- Token 级性能指标 +- Agent 步骤追踪 +- 成本分析 + +### Langfuse + +Langfuse 提供 Prompt 管理和 LLM 可观测性,支持 OTLP 协议。 + +**配置:** -### 监控数据为空? ```bash -# 检查服务状态 -docker-compose -f docker/docker-compose-monitoring.yml ps +MONITORING_PROVIDER=langfuse +OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel + +LANGFUSE_PUBLIC_KEY=pk-xxx +LANGFUSE_SECRET_KEY=sk-xxx -# 检查依赖安装 -python -c "import opentelemetry; print('✅ 监控依赖已安装')" +OTEL_EXPORTER_OTLP_AUTHORIZATION=Basic BASE64_ENCODED_KEY +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 ``` -### 端口冲突? +生成认证 Key: + ```bash -# 检查端口占用 -lsof -i :3005 -i :9090 -i :16686 +echo -n "$LANGFUSE_PUBLIC_KEY:$LANGFUSE_SECRET_KEY" | base64 ``` -### 依赖安装问题? -```bash -# 重新安装性能依赖 -uv sync --extra performance +**功能特性:** +- Prompt 版本管理 +- 会话级 Trace 分组 +- 用户反馈收集 +- 模型成本追踪 + +## 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `ENABLE_TELEMETRY` | `false` | 启用/禁用监控 | +| `MONITORING_PROVIDER` | `otlp` | 平台配置和本地部署形态:`otlp`、`phoenix`、`langfuse`、`langsmith`、`grafana`、`zipkin` | +| `MONITORING_DASHBOARD_URL` | (空) | 前端顶栏监控入口跳转 URL,需配置为浏览器可访问地址 | +| `MONITORING_PROJECT_NAME` | `nexent` | 监控平台项目名 | +| `MONITORING_TRACE_CONTENT_MODE` | `summary` | Trace payload 记录模式:`summary` 写入有界预览和结构元数据,`metrics` 只写结构/大小元数据,`full` 在 `MONITORING_TRACE_MAX_CHARS` 限制内保留完整 payload | +| `MONITORING_TRACE_MAX_CHARS` | `4000` | 每个 payload 预览最多写入的字符数 | +| `MONITORING_TRACE_MAX_ITEMS` | `20` | dict/list 预览最多写入的 key 或 item 数 | +| `OTEL_SERVICE_NAME` | `nexent-backend` | 服务标识 | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://localhost:4318` | OTLP base endpoint,SDK 会派生 `/v1/traces` 和 `/v1/metrics` | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | (空) | 可选 trace 专用 endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | (空) | 可选 metric 专用 endpoint | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | `http` | 协议:`http` 或 `grpc` | +| `OTEL_EXPORTER_OTLP_HEADERS` | (空) | 通用认证头(逗号分隔) | +| `OTEL_EXPORTER_OTLP_AUTHORIZATION` | (空) | `Authorization` header,常用于 Phoenix bearer auth 和 Langfuse | +| `OTEL_EXPORTER_OTLP_X_API_KEY` | (空) | `x-api-key` header,用于兼容需要该 header 的平台 | +| `OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION` | (空) | Langfuse 实时摄取版本,例如 `4` | +| `OTEL_EXPORTER_OTLP_METRICS_ENABLED` | `true` | 是否导出 OTLP metrics | +| `LANGSMITH_API_KEY` | (空) | LangSmith API Key,会映射为 OTLP `x-api-key` header | +| `LANGSMITH_PROJECT` | (空) | 可选 LangSmith project header | +| `LANGSMITH_OTLP_TRACES_ENDPOINT` | `https://api.smith.langchain.com/otel/v1/traces` | Collector 转发到在线 LangSmith 的 trace endpoint | +| `MONITORING_INSTRUMENT_REQUESTS` | `false` | 是否启用 requests 自动 HTTP client span;默认关闭,避免 AI trace 被普通 HTTP 请求刷屏 | +| `MONITORING_FASTAPI_EXCLUDED_URLS` | (空) | FastAPI 自动埋点排除 URL,逗号分隔正则;例如只看 agent 业务 span 时可设为 `/agent/run` | +| `MONITORING_FASTAPI_EXCLUDE_SPANS` | `receive,send` | 排除 ASGI 内部 `receive/send` span;流式接口建议保持默认值 | +| `OTEL_COLLECTOR_VERSION` | `0.150.0` | 本地 OpenTelemetry Collector Contrib 镜像版本 | +| `PHOENIX_VERSION` | `15` | 本地 Phoenix 镜像版本 | +| `LANGFUSE_VERSION` | `3` | 本地 Langfuse Web/Worker 镜像版本 | +| `LANGFUSE_POSTGRES_VERSION` | `15-alpine` | 本地 Langfuse Postgres 镜像版本 | +| `LANGFUSE_CLICKHOUSE_VERSION` | `26.3-alpine` | 本地 Langfuse ClickHouse 镜像版本 | +| `LANGFUSE_MINIO_VERSION` | `RELEASE.2023-12-20T01-00-02Z` | 本地 Langfuse MinIO 镜像版本 | +| `LANGFUSE_REDIS_VERSION` | `alpine` | 本地 Langfuse Redis 镜像版本 | +| `GRAFANA_VERSION` | `12.4` | 本地 Grafana 镜像版本 | +| `GRAFANA_PORT` | `3002` | 本地 Grafana UI 端口 | +| `GRAFANA_ADMIN_USER` | `admin` | 本地 Grafana 管理员用户名 | +| `GRAFANA_ADMIN_PASSWORD` | `nexent-grafana-admin` | 本地 Grafana 管理员密码 | +| `GRAFANA_DEFAULT_LANGUAGE` | `zh-Hans` | 本地 Grafana 默认界面语言 | +| `TEMPO_VERSION` | `2.10.5` | 本地 Tempo 镜像版本,避免浮动 tag 带来的配置兼容性漂移 | +| `TEMPO_PORT` | `3200` | 本地 Tempo HTTP API 端口 | +| `ZIPKIN_VERSION` | `latest` | 本地 Zipkin 镜像版本 | +| `ZIPKIN_PORT` | `9411` | 本地 Zipkin UI/API 端口 | + +## 代码集成 + +### Agent 边界上下文 + +业务层只需要在请求入口解析出用户和 Agent 信息后绑定一次上下文,后续 Agent、LLM、Tool span 由 SDK 生命周期自动生成: -# 检查 pyproject.toml 中的 performance 配置 -cat backend/pyproject.toml | grep -A 20 "performance" +```python +from nexent.monitor.agent_observability import AgentRunMetadata +from utils.monitoring import monitoring_manager + +monitoring_manager.bind_agent_context(AgentRunMetadata( + tenant_id=tenant_id, + user_id=user_id, + agent_id=agent_request.agent_id, + conversation_id=agent_request.conversation_id, + query=agent_request.query, + is_debug=agent_request.is_debug, + language=language, +)) ``` -### 服务名显示为 unknown_service? -```bash -# 检查环境变量配置 -echo "SERVICE_NAME: $SERVICE_NAME" +`monitor_endpoint` 仍保留为兼容 API 和低层 escape hatch,不建议业务层新增常规埋点时继续使用。 + +### Trace Payload 策略 + +工具输入输出、检索输出,以及 OpenInference 的 `input.value` / `output.value` 属性统一使用同一套 payload 策略。默认写入有界预览,并额外写入 `type`、`size_chars`、`item_count`、`truncated`、`keys` 等结构化属性。记忆检索 span 只记录结果摘要和统计信息,不写完整 memory 正文。 -# 重启监控服务以应用新配置 -./docker/start-monitoring.sh +Agent 上下文指标由 SDK 生命周期自动写入。每个 action step 会产生 `agent.step.metrics` event,包含上下文 token 估算、压缩调用数、缓存命中、压缩率和 token 阈值。Agent 结束时还会在顶层 span 写入聚合 step 数、最大上下文 token、平均压缩率、压缩调用总数和缓存命中总数。 + +### LLM 调用监控 + +```python +@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") +def call_llm(messages): + return llm_response ``` -## 🧹 数据管理 +### Agent 步骤追踪 -### 清理 Jaeger 追踪数据 -```bash -# 方法1: 重启 Jaeger 容器(最简单) -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-jaeger +```python +with monitoring_manager.trace_agent_step("web_search", step_type="tool_call") as span: + result = execute_tool() + monitoring_manager.set_tool_output(result) +``` -# 方法2: 完全重建 Jaeger 容器和数据 -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml rm -f nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-jaeger +### 工具调用追踪 -# 方法3: 清理所有监控数据(重建所有容器) -docker-compose -f docker/docker-compose-monitoring.yml down -docker-compose -f docker/docker-compose-monitoring.yml up -d +```python +with monitoring_manager.trace_tool_call("web_search", "agent_name", {"query": "test"}) as span: + results = search_web("test") + monitoring_manager.set_tool_output({"results": results}) ``` -### 清理 Prometheus 指标数据 -```bash -# 重启 Prometheus 容器 -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-prometheus +### Phoenix 自定义层级埋点 + +如果希望 Phoenix 展示 `agent -> chain -> llm/retriever/tool` 的层级结构,使用 SDK Agent 生命周期入口和 OpenInference span kind 封装方法: + +```python +from nexent.monitor.agent_observability import AgentRunMetadata, get_monitoring_manager + +monitoring_manager = get_monitoring_manager() + +metadata = AgentRunMetadata( + tenant_id="tenant_id", + user_id="user_id", + agent_id=1, + conversation_id=1001, + agent_name="TestAgent", + query="你好", +) + +with monitoring_manager.start_agent_run(metadata): + with monitoring_manager.trace_agent_step("Step 0", metadata, step_type="agent_loop"): + with monitoring_manager.trace_llm_request("OpenAIModel.generate", "gpt-4"): + result = call_llm() + + with monitoring_manager.trace_retriever_call( + "knowledge_base_search", + "TestAgent", + {"query": "你好"}, + ): + documents = search_knowledge_base("你好") + monitoring_manager.set_retriever_output(documents) + + with monitoring_manager.trace_tool_call("FinalAnswerTool", "TestAgent", {"query": "你好"}): + monitoring_manager.set_tool_output({"answer": result}) + + monitoring_manager.set_openinference_output({"answer": result}) +``` -# 完全清理 Prometheus 数据 -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-prometheus -docker volume rm docker_prometheus_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-prometheus +Phoenix 左侧的 `agent`、`chain`、`llm`、`retriever`、`tool` 标签来自 `openinference.span.kind`。span 必须通过嵌套 `with` 创建,Phoenix 才会显示成树形结构。 + +同一套方法只写入通用 OpenInference / Nexent 属性,不再写入 Langfuse 专用 span 字段。Langfuse provider 仍通过 OTLP endpoint 接收 trace,但展示和过滤以通用 OTLP/OpenInference 属性为准。 + +## OpenInference 语义属性 + +系统使用 OpenInference 语义约定,专为 AI 可观测性设计: + +### LLM 属性 + +| 属性 | 说明 | +|------|------| +| `llm.model_name` | 模型标识(如 `gpt-4`) | +| `llm.operation.name` | 操作类型(如 `chat_completion`) | +| `llm.token_count.prompt` | 输入 Token 数 | +| `llm.token_count.completion` | 输出 Token 数 | +| `llm.invocation_parameters` | 模型参数(JSON) | +| `llm.time_to_first_token` | TTFT(秒) | + +### Agent 属性 + +| 属性 | 说明 | +|------|------| +| `agent.name` | Agent 标识 | +| `agent.step.name` | 步骤名称(如 `web_search`) | +| `agent.step.type` | 步骤类型:`tool_call`、`reasoning`、`action_selection` | +| `agent.tool.name` | 工具名称 | +| `agent.tool.input` | 按 trace payload 策略处理后的工具输入预览 | +| `agent.tool.input.*` | 工具输入结构化元数据:类型、大小、item 数、截断状态、keys | +| `agent.tool.output` | 按 trace payload 策略处理后的工具输出预览 | +| `agent.tool.output.*` | 工具输出结构化元数据:类型、大小、item 数、截断状态、keys | +| `agent.tool.success` | 工具调用是否成功 | +| `agent.tool.duration_ms` | 工具调用耗时 | +| `retriever.name` | 检索器名称 | +| `retrieval.query` | 检索查询 | +| `retrieval.results.count` | 检索结果数量 | +| `retrieval.top_score` | 可用时记录最高检索分数 | +| `retriever.input.*` | 检索输入结构化元数据 | +| `retriever.output` | 按 trace payload 策略处理后的检索输出预览 | +| `retriever.output.*` | 检索输出结构化元数据 | +| `context.tokens.estimated_input` | 每个 Agent step event 的上下文输入 token 估算 | +| `context.tokens.uncompressed_estimated` | 每个 Agent step event 的未压缩上下文 token 估算 | +| `context.compression.calls` | 每个 Agent step event 的压缩调用数 | +| `context.compression.cache_hits` | 每个 Agent step event 的压缩缓存命中数 | +| `context.compression.ratio` | 每个 Agent step event 的压缩率 | + +## 指标 + +| 指标 | 说明 | +|------|------| +| `llm.request.duration` | 请求延迟 | +| `llm.token.generation_rate` | Token 生成速率 | +| `llm.time_to_first_token` | TTFT | +| `llm.token_count.prompt` | 输入 Token | +| `llm.token_count.completion` | 输出 Token | +| `agent.step.count` | Agent 步骤数 | +| `agent.execution.duration` | Agent 执行时间 | +| `agent.error.count` | Agent 错误数 | + +## Collector 配置 + +OpenTelemetry Collector 默认只通过 debug exporter 打印数据,避免没有外部后端时把数据转发回自身。需要通过 Collector 转发到平台时,增加对应 exporter: + +```yaml +exporters: + otlphttp/langsmith: + traces_endpoint: https://api.smith.langchain.com/otel/v1/traces + headers: + x-api-key: YOUR_LANGSMITH_API_KEY + Langsmith-Project: nexent + +service: + pipelines: + traces: + exporters: [otlphttp/langsmith, debug] ``` -### 清理 Grafana 配置 -```bash -# 重置 Grafana 配置和仪表板 -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-grafana -docker volume rm docker_grafana_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-grafana +本地 Phoenix 和 Langfuse 分别使用独立 Collector 配置: + +- `docker/monitoring/otel-collector-phoenix-config.yml` +- `docker/monitoring/otel-collector-langfuse-config.yml` +- `docker/monitoring/otel-collector-langsmith-config.yml` + +基础 debug 配置见 `docker/monitoring/otel-collector-config.yml`。 + +## 优雅降级 + +未安装 OpenTelemetry 依赖时,监控自动禁用: + +```python +pip install nexent # 基础包 - 无监控 +pip install nexent[performance] # 包含 OTLP 支持 ``` -## 📈 典型问题分析 +禁用时所有监控方法均正常工作 - 装饰器透传,上下文管理器返回 None。 -### Token 生成速度慢 (< 5 tokens/s) -1. **分析**: Grafana → Token Generation Rate 面板 -2. **解决**: 检查模型服务负载、优化输入 prompt 长度 +## 故障排除 -### 请求响应慢 (> 10s) -1. **分析**: Jaeger → 查看完整链路追踪 -2. **解决**: 定位瓶颈环节(数据库/LLM/网络) +### 数据未显示 -### 错误率突增 (> 10%) -1. **分析**: Prometheus → llm_error_count 指标 -2. **解决**: 检查模型服务可用性、验证 API 密钥 +1. 检查 `.env` 中 `ENABLE_TELEMETRY=true` +2. 验证 OTLP 端点可访问 +3. 检查认证头配置正确 -## 🎉 开始使用 +### 连接错误 -设置完成后你可以: +1. 测试端点:`curl -v $OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces` +2. 确认协议匹配端点(`http` vs `grpc`) +3. 查看 Collector 日志:`docker logs nexent-otel-collector` -1. 📊 在 Grafana 中查看 **LLM Performance Dashboard** -2. 🔍 在 Jaeger 中追踪每个请求的完整链路 -3. 📈 分析 Token 生成速度和性能瓶颈 -4. 🚨 设置性能告警和阈值 +### 属性错误 -享受高效的 LLM 性能监控! 🚀 +1. 在平台 UI 中验证 OpenInference 属性 +2. 检查 Span 属性命名:使用 `llm.model_name` 而非 `model_name` +3. 查看平台特定属性要求 diff --git a/doc/docs/zh/sdk/opentelemetry-design.md b/doc/docs/zh/sdk/opentelemetry-design.md new file mode 100644 index 000000000..2f8f0a678 --- /dev/null +++ b/doc/docs/zh/sdk/opentelemetry-design.md @@ -0,0 +1,699 @@ +# Nexent OpenTelemetry 可观测性设计 + +生成日期:2026-05-06 +基准分支:当前 OpenTelemetry 功能分支 + +## 可观测性基础 + +可观测性关注的是系统在运行过程中是否能够被理解和定位问题。相比只回答“系统是否还活着”的传统监控,可观测性更强调从运行时信号反推出系统内部状态,帮助研发和运维回答以下问题: + +- 当前请求为什么慢? +- Agent 在哪一步失败? +- 大模型调用耗时、首 token 时间和 token 速率是否异常? +- 某个用户、会话或 Agent 的完整执行链路是什么? +- 问题发生时有哪些输入、输出、工具调用和错误上下文? + +业界通常把可观测性拆成三大支柱:Metrics、Logs、Traces。三者解决的问题不同,需要组合使用。 + +| 支柱 | 核心问题 | 典型数据 | 适合场景 | 在 Nexent 中的作用 | +|------|----------|----------|----------|--------------------| +| Metrics | “整体是否异常?” | 计数器、直方图、速率、分位数 | 看趋势、告警、容量评估、SLO/SLA | 统计 LLM 请求耗时、TTFT、token 速率、错误数、Agent step/tool 调用数 | +| Logs | “当时发生了什么?” | 按时间顺序输出的文本或结构化事件 | 查看异常上下文、排查单点错误、审计关键行为 | 保留运行日志,并通过 span event/attribute 记录关键 Agent、LLM、Tool 事件 | +| Traces | “一次请求经历了哪些步骤?” | trace、span、span event、上下游关系 | 分布式调用链、流式 Agent 执行链路、跨服务耗时定位 | 串联 HTTP 接口、Agent run、LLM generate、Tool call 和最终答案 | + +三大支柱之间不是替代关系。Metrics 适合发现问题,例如某段时间 LLM 错误数上升;Traces 适合定位问题,例如找到某次 `agent.run` 卡在某个 tool;Logs 适合补充细节,例如错误堆栈、原始提示词摘要或工具返回内容。对于 LLM Agent 场景,单纯的 HTTP 接口指标不足以解释 Agent 行为,因此必须把 Agent、LLM、Tool 等业务语义写入 trace 层级中。 + +## 智能体可观测性行业洞察 + +截至当前,智能体可观测性正在从传统 APM 的“接口是否健康、服务是否变慢”,扩展到“智能体为什么这样决策、哪一步引入了错误上下文、工具或检索是否误导了模型、成本和质量是否可控”。这类系统的核心难点不是单次 LLM 调用本身,而是一次用户请求会跨越路由、记忆、规划、检索、工具调用、模型生成、最终答案和反馈评价等多个阶段,并且每个阶段都可能影响最终结果。 + +智能体可观测性的接入路径通常有几类: + +| 接入路径 | 典型方式 | 适合场景 | 需要注意 | +|----------|----------|----------|----------| +| 平台 SDK 直连 | Langfuse SDK、LangSmith SDK、Datadog / New Relic SDK、框架 callback | 快速接入某个平台的专有能力,例如 prompt 管理、评分、评估、成本分析 | 平台绑定更强,后续迁移或双写到其他后端成本较高 | +| OpenTelemetry SDK 直连平台 OTLP endpoint | 应用直接用 OTLP HTTP/gRPC exporter 写入 Phoenix、Langfuse、LangSmith、Datadog 等兼容入口 | 希望保留 OTel 埋点模型,同时减少本地组件 | 鉴权、脱敏、采样、多后端分发逻辑会落在应用配置或平台侧 | +| OpenTelemetry Collector 中转 | 应用只写 Collector,由 Collector 转发到 Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 或企业 APM | 需要统一批处理、采样、脱敏、header 注入、多后端转发和私有化部署 | 多一个运行组件,需要维护 Collector 配置和部署可用性 | +| 平台 agent / 网关中转 | Datadog Agent、New Relic agent 或企业内部 telemetry gateway | 企业已有 APM 基础设施、权限、网络出口和审计要求明确 | 数据模型可能会被平台转换,AI 语义字段需要确认兼容性 | + +从知名 Agent/LLM 框架和平台的公开文档看,可观测性方案已经明显分成两层:框架或平台负责表达 Agent/LLM 运行时语义,OpenTelemetry/OTLP 负责把 trace、metric、log 导出到后端。差异主要在于:有些框架原生使用 OTel,有些通过 OpenInference/OpenLIT/OpenLLMetry 等 instrumentation 转成 OTel span,有些则先进入自有 tracing SDK,再通过 processor、callback 或平台集成转发。 + +| Agent / 平台 | 原生可观测性能力 | 常用观测框架 / SDK | OTel / OTLP 路径 | 语义覆盖重点 | 局限与注意 | +|--------------|------------------|--------------------|------------------|--------------|------------| +| LangChain / LangGraph | LangSmith tracing、thread、feedback、evaluation,面向 chain、graph、run 的调试和评估 | LangSmith SDK、LangSmith OTel、OpenTelemetry SDK、Collector | `LANGSMITH_OTEL_ENABLED=true` 后可生成 OTel spans;LangSmith 提供 OTLP traces endpoint;也支持经 Collector fan-out 到多后端 | chain、graph node、LLM、tool、retriever、thread、feedback、eval | LangSmith 语义最完整;若只使用通用 OTel 后端,需要自行补齐 graph/thread/eval 维度 | +| LlamaIndex | 内置 instrumentation/callback 体系,官方观测页覆盖 LlamaTrace、Phoenix、SigNoz、MLflow、Langfuse、OpenLLMetry、OpenLIT、AgentOps 等 | OpenInference LlamaIndex instrumentation、LlamaTrace/Phoenix、Langfuse、OpenLLMetry、OpenLIT、MLflow | Phoenix/LlamaTrace、SigNoz、Langfuse、OpenLIT 等路径都可通过 OTel/OTLP 导出;常见方式是 `openinference-instrumentation-llama-index` + OTLP exporter | RAG query engine、retriever、index、agent workflow、LLM、tool、token、latency | RAG 语义强,但不同集成对属性映射和评估能力不完全一致 | +| OpenAI Agents SDK | SDK 内置 tracing,默认记录 runner、agent、generation、function tool、guardrail、handoff、speech 等 span | OpenAI Traces dashboard、custom trace processor、外部 tracing processors(Phoenix、MLflow、LangSmith、Langfuse、AgentOps、Datadog 等) | 默认不是 OTel span,而是 OpenAI Agents tracing 模型;要进入 OTLP 通常需要外部 tracing processor 或自定义 processor 做 OTel/OTLP 适配 | agent run、LLM generation、function tool、handoff、guardrail、自定义事件、会话分组 | Agent 语义完整,但与标准 OTel 数据模型之间需要转换层;敏感输入输出默认可能被采集,需显式配置 | +| AutoGen | 新版 AutoGen 内置 tracing/observability,运行时支持 OpenTelemetry,并遵循 agent/tool 与 GenAI 语义约定;旧版 0.2 主要是 logging 和 partner providers | OpenTelemetry SDK、OTLP exporter、Jaeger/Zipkin、OpenAI instrumentor、AgentOps 等 | 可直接配置 OTel `TracerProvider` 和 OTLP exporter,把 AgentChat/GroupChat 运行时事件发到 OTel 兼容后端 | 多 Agent 消息、agent runtime、tool、LLM 调用、group chat、消息元数据 | 版本差异明显;需确认使用的是新版 AgentChat/Core 还是旧版 0.2 logging 集成 | +| Dify | 产品内置 Monitoring Dashboard 和 Run History,可查看应用指标、workflow/node tracing;外部监控支持 Langfuse、LangSmith | Dify 内置监控、Langfuse integration、LangSmith integration | 官方文档主要体现为平台到 Langfuse/LangSmith 的集成和字段映射 | app、workflow/chatflow、node、message、dataset retrieval、tool、moderation、token、user/session | 产品语义强,适合低代码应用监控;开放 OTLP 可迁移性弱于原生 OTel instrumentation | +| CrewAI | CrewAI AMP 内置 tracing,可通过 `tracing=True` 或 `CREWAI_TRACING_ENABLED=true` 追踪 crew/flow;官方观测页列出多种外部平台 | CrewAI AMP、OpenLIT、Langfuse、LangSmith OTel、Langtrace、Arize Phoenix、MLflow、Opik、Weave、Portkey 等 | OpenLIT 是 OTel-native,可配置 `OTEL_EXPORTER_OTLP_ENDPOINT`;LangSmith/CrewAI 集成使用 `opentelemetry-instrumentation-crewai`;Langfuse 可通过 OpenInference CrewAI instrumentation 产生 OTel spans | agent、task、crew、flow、tool、LLM、任务序列、成本、延迟 | 集成选择多但语义不完全统一;CrewAI AMP 与第三方 OTel 路径需要明确数据归属和脱敏策略 | +| smolagents | 官方“Inspecting runs with OpenTelemetry”明确采用 OpenTelemetry 标准记录 agent runs | `smolagents[telemetry]`、OpenInference `SmolagentsInstrumentor`、Phoenix、Langfuse、OpenTelemetry SDK | 使用 `SmolagentsInstrumentor` 生成 OTel spans,可通过 `OTLPSpanExporter` 写 Phoenix,也可通过 Langfuse/其他 OTel 兼容平台接收 | CodeAgent、ToolCallingAgent、managed agents、工具调用、LLM 交互、多步执行 | 轻量、OTel 路径清晰;复杂评估、反馈和产品内权限仍依赖后端平台补齐 | + +从对比结果看,行业并不是简单地“统一使用某一个观测平台”,而是在向三种形态收敛: + +- 框架原生 OTel:AutoGen 新版、smolagents、Vercel AI SDK、Semantic Kernel 这类更容易直接进入 OTLP/Collector/企业 APM。 +- OTel instrumentation 桥接:LlamaIndex、CrewAI、LangChain/LangGraph 常通过 OpenInference、OpenLIT、OpenLLMetry、LangSmith OTel 等层把框架语义转成 OTel span。 +- 平台私有 tracing 再导出:OpenAI Agents SDK、Dify、CrewAI AMP 这类先保留自有产品语义,再通过 processor、callback、外部平台集成或字段映射与 OTel/LLMOps 平台互通。 + +对 Nexent 来说,比较稳妥的策略是:核心埋点直接生成 OpenTelemetry span,并在 span 属性上兼容 OpenInference、OpenTelemetry GenAI、Langfuse/LangSmith 等主流语义;对外只承诺 OTLP 可导出,不把业务链路绑定到某一个平台 SDK。这样既能接入 Phoenix/Langfuse/LangSmith 这类 LLMOps 平台,也能接入 Grafana Tempo、Zipkin、Datadog、New Relic、Elastic、Honeycomb 等通用或企业级观测后端。 + +因此,智能体可观测性的关键不是选择一个“唯一平台”,也不是强制所有链路都经过 Collector,而是先把遥测数据建模成可迁移、可组合、可扩展的结构:底层用标准 trace/metric/log 表达运行路径和性能,上层用 Agent/LLM/Tool/Retriever/Session/User/Evaluation 等语义补足业务解释能力。这样既能直连 Phoenix、Langfuse、LangSmith 等 AI 可观测平台,也能通过 Collector 接入 Grafana Tempo、Zipkin 或企业已有 APM,避免在产品早期把监控能力锁死在某个供应商或某套私有 SDK 中。 + +## 为什么使用 OpenTelemetry + +```mermaid +timeline + title 可观测性框架与协议演进时间线 + 2010 : Google 发表 Dapper 论文 + 2012 : Prometheus 在 SoundCloud 起步 + 2015 : Jaeger 在 Uber 内部形成并发展 + 2016 : OpenTracing 进入 CNCF + 2017 : OpenCensus 推广 tracing + stats/metrics + tags + 2019 : OpenTracing 与 OpenCensus 合并为 OpenTelemetry + 2021 : OpenTelemetry 晋升 CNCF Incubating + 2022 : OpenTracing 被归档;OpenTelemetry Metrics 发布 RC 并进入 GA 周期 + 2023 : OpenCensus 于 7 月 31 日后停止维护 + 2024 : Prometheus 持续增强对 OpenTelemetry/OTLP 的互操作 + 2026 : OpenTelemetry 于 5 月 11 日 Graduated;OpenTracing compatibility 于 3 月被 deprecated +``` + +OpenTelemetry 是当前主流的可观测性开放标准,提供统一的 API、SDK、语义约定和 OTLP 传输协议。Nexent 选择 OpenTelemetry 作为监控主干,主要基于以下原因: + +- 标准化:用统一的 span、event、metric 表达 HTTP、Agent、LLM、Tool 等运行时信号,减少平台私有模型对业务代码的侵入。 +- 可移植:同一套埋点可以通过 OTLP 上报到 Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 或其他兼容后端,切换平台主要调整配置和 Collector pipeline。 +- 可扩展:OpenTelemetry Collector 可以在不改业务代码的情况下完成转发、过滤、批处理、认证 header 注入和多后端分发。 +- 生态成熟:FastAPI、requests 等基础组件已有自动埋点能力,Nexent 只需要补充 Agent/LLM/Tool 的业务 span。 +- 避免锁定:监控平台 SDK 可以作为增强层,但核心链路不依赖某一家平台 SDK,避免平台迁移或本地化部署时重写埋点。 +- 适合 Agent 场景:trace 的父子 span 结构天然适合表达 `agent.run -> chain step -> LLM generate/tool call -> final answer` 这类多步骤执行过程。 + +因此,Nexent 的实现原则是:业务代码只产生 OpenTelemetry 标准信号和少量平台兼容属性,平台差异收敛在配置、Collector 和展示层。 + +## OTel 规范概要 + +本文中的 OTel 规范通常指 OpenTelemetry Specification 及其配套规范。它不是某个 SDK,也不是某个监控平台,而是一套兼容性契约:规定可观测性数据应该如何生成、命名、传播、处理和导出。各语言 SDK、Collector、后端平台和自动埋点库按这套契约实现,才能保证跨语言、跨框架、跨后端互通。 + +一句话概括:OTel 规范是 OpenTelemetry 为 traces、metrics、logs 等可观测性数据制定的一套标准,保证不同语言、框架、Collector 和后端之间能够互通。 + +OpenTelemetry 规范按 signal 维度独立演进。Tracing、Metrics、Logs、Baggage 是当前主要 signal;Profiles 正在发展中,Events 通常作为 Logs 的特定事件形态讨论。每个成熟 signal 通常由 API、SDK、OTLP、Collector 和 instrumentation/contrib 生态共同组成,语义约定用于保证不同语言和组件在观测同类操作时输出一致的数据。 + +从实现视角看,OTel 规范可以拆成六个常用层面: + +| 规范领域 | 核心概念 | 作用 | +|----------|----------|------| +| Signals | Traces、Metrics、Logs、Baggage、Profiles | 定义可观测性数据类型。Nexent 当前重点使用 Traces 和 Metrics,Logs 通过应用日志与 span event 补充上下文;Profiles 暂不接入 | +| API | Tracer、Meter、Logger、Context、Propagator | 面向业务代码和 instrumentation 的稳定接口,业务埋点只依赖 API,不直接绑定具体 exporter | +| SDK | TracerProvider、MeterProvider、SpanProcessor、MetricReader、Sampler、Resource | 提供采样、批处理、资源描述、导出等运行时能力 | +| Data Model | Span、Metric、LogRecord、Resource、Instrumentation Scope | 定义 telemetry 数据结构,确保不同语言和平台对数据有一致理解 | +| Context Propagation | Context、SpanContext、Baggage、Propagator | 在服务、线程、异步任务和下游请求之间传递 trace 上下文,保证调用链可以串起来 | +| OTLP | OTLP HTTP、OTLP gRPC、protobuf payload | OpenTelemetry 原生传输协议,负责把 traces、metrics、logs 从应用或 Collector 发到后端 | +| Semantic Conventions | 标准属性名、span name、metric name、单位和枚举值 | 统一 HTTP、数据库、RPC、Messaging 等通用语义;AI 场景中 Nexent 额外兼容 OpenInference 和 Langfuse 属性 | + +### Signals + +OTel 把可观测性数据抽象为多个 signal。每个 signal 有独立 API 和数据模型,但共享 Resource、Context 和传播机制。 + +- Traces:由一组具有父子关系的 span 构成,用于描述一次逻辑操作的完整路径。Nexent 用 trace 表达 `agent.run` 到 LLM、Tool、Final Answer 的执行链路。 +- Metrics:由 counter、histogram、gauge 等 instrument 产生,用于描述聚合后的趋势和分布。Nexent 用 metrics 统计 LLM 延迟、TTFT、token 速率和错误数。 +- Logs:以 LogRecord 或传统日志集成的方式表达离散事件。Nexent 当前不把 Logs signal 作为主链路 exporter,但会通过应用日志和 span event 补充错误上下文。 +- Baggage:跨进程传播的键值上下文,适合传递租户、用户、实验分组等需要参与过滤和关联的业务标签。使用时需要控制基数和敏感信息。 +- Profiles:用于记录代码级资源消耗画像,当前在 OpenTelemetry 体系中仍处于发展阶段。Nexent 暂不采集 profiles,避免引入额外运行时开销。 + +Nexent 的当前落地策略是:Traces 优先,因为 Agent 运行链路需要父子 span 表达;Metrics 保留,用于趋势、告警和 dashboard;Logs 暂以应用日志和 span event 形态承载,后续如需统一日志采集,可以通过 Collector 增加 Logs pipeline。 + +### API 与 SDK + +OTel 区分 API 和 SDK: + +- API 是埋点代码依赖的稳定接口,例如 `trace.get_tracer()`、`start_as_current_span()`、`meter.create_counter()`。 +- SDK 是运行时实现,负责创建 provider、处理 span/metric、采样、批量导出和错误处理。 + +这种分层让库代码可以只依赖 API,而应用在启动时统一配置 SDK。Nexent 的 SDK 埋点遵循这个模型:业务函数只创建 span、event、metric;是否启用、导出到哪里、使用 HTTP 还是 gRPC,全部由 `MonitoringConfig` 和环境变量决定。 + +这种分层也决定了 Nexent 的边界: + +- 业务代码不直接创建 exporter,也不直接引用 Phoenix、Langfuse、Tempo 等平台客户端。 +- 初始化层负责创建 SDK provider、resource、processor、reader 和 exporter。 +- 平台差异通过 provider profile、OTLP endpoint、header 和 Collector pipeline 表达。 + +### Resource 与 Instrumentation Scope + +Resource 描述 telemetry 来源实体,例如服务名、版本、实例、部署环境、项目名。Nexent 当前写入: + +- `service.name`:默认 `nexent-backend` +- `service.version`:当前固定为 `1.0.0` +- `service.instance.id`:当前固定为 `nexent-instance-1` +- `telemetry.provider`:当前 provider profile,例如 `otlp`、`phoenix`、`langfuse`、`grafana`、`zipkin` +- `project.name`:当配置 `MONITORING_PROJECT_NAME` 时写入 + +Instrumentation Scope 描述产生 telemetry 的 instrumentation 库或模块。后续如果需要区分 Nexent SDK、FastAPI 自动埋点、第三方库埋点,可以在 scope 层面辅助过滤。 + +### Context Propagation + +Trace 的核心是上下文传播。一个请求从 HTTP 入口进入后,后续 Agent step、LLM 调用、Tool 调用必须处在同一个 trace 上下文中,监控页面才能显示正确的父子层级。 + +OTel 的 Context 是执行范围内的不可变上下文容器,用于承载当前 span、baggage 等跨切面数据。Propagator 负责把这些上下文编码到请求边界,例如 HTTP header,再由下游服务还原。对 Nexent 来说,同进程内的 async、generator、线程和工具调用上下文保持比跨服务 header 传播更关键。 + +Nexent 的关键处理包括: + +- 业务入口只绑定一次 `AgentRunMetadata`,保存 tenant、user、agent、conversation、query、language、memory 等请求级元数据。 +- SDK 在 `NexentAgent.agent_run_with_observer` 中创建顶层 `agent.run` span,并在 Agent loop、LLM、Tool 等生命周期中自动继承上下文。 +- `monitor_endpoint` 保留为兼容 API 和低层 escape hatch,不再作为业务层新增埋点的推荐方式。 +- Agent、LLM、Tool span 统一写入 OpenInference 和 Nexent 自定义属性,避免业务 trace 绑定到单一平台字段。 + +### Semantic Conventions + +Semantic Conventions 规定常见遥测字段的命名和含义,例如 HTTP 方法、URL、状态码、错误类型、metric 单位等。使用语义约定的价值是让不同服务、语言和平台对同一类数据有一致理解。 + +Nexent 采用三层语义: + +- OTel 通用语义:用于 service、resource、HTTP 自动埋点、metric instrument 等基础字段。 +- OpenInference 语义:用于 AI span 类型,例如 `openinference.span.kind=AGENT|CHAIN|LLM|TOOL|RETRIEVER`,适配 Phoenix 等 AI observability 平台。 + +当平台展示存在差异时,Nexent 优先保持业务 span 的通用 OpenTelemetry / OpenInference 语义,不写入平台专用字段。 + +### OTLP 与 Collector Pipeline + +OTLP 是 OpenTelemetry 原生传输协议,支持 HTTP 和 gRPC。Nexent 后端只需要把数据发到 OTLP endpoint,后端平台差异交给 Collector 处理。 + +Collector pipeline 通常由三部分组成: + +- Receiver:接收应用上报的 OTLP traces/metrics/logs。 +- Processor:执行批处理、内存限制、资源属性补充、过滤、采样等处理。 +- Exporter:把数据转发到 Phoenix、Langfuse、Tempo 或其他 OTLP 兼容后端。 + +OTLP 是 request/response 风格协议,客户端发送 export 请求,服务端返回成功、部分成功或失败响应。Nexent 当前支持: + +- OTLP HTTP:默认协议,便于通过网关、云平台和本地 Collector 接入。 +- OTLP gRPC:适合内部网络或偏高吞吐场景。 +- base endpoint 与 signal endpoint:支持配置 base endpoint,再由 SDK 推导 `/v1/traces` 和 `/v1/metrics`,也支持直接配置 signal-specific endpoint,避免路径重复拼接。 + +这种架构的好处是:应用侧配置保持稳定,平台迁移和本地化部署主要改 Collector 配置。例如 `grafana` 形态下 traces 转发到 Tempo;`phoenix` 形态下 traces 转发到 Phoenix;`otlp` 形态下先通过 debug exporter 验证数据是否产生。 + +## 设计目标 + +Nexent 的监控能力以 OpenTelemetry 为主干,SDK 和后端只负责生成标准 span、event、metric,并通过 OTLP 导出。Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 和标准 OTLP 后端作为可配置 exporter 接入,业务代码不绑定单一平台。 + +核心目标: + +- Agent 流式运行期间保持 trace 上下文,覆盖 API、服务准备、Agent 异步 generator、Agent 线程、LLM 流式输出、Python 解释器执行、真实工具调用和最终答案。 +- 通过 OpenInference 属性描述 Agent/LLM/Tool/Retriever 语义,同一套业务埋点可服务多个 OTLP 后端。 +- 支持 `otlp`、`phoenix`、`langfuse`、`langsmith`、`grafana`、`zipkin` provider profile。 +- 通过环境变量统一控制后端导出配置、本地部署形态和前端监控入口。 +- 支持 base endpoint 和 signal-specific endpoint,避免 `/v1/traces`、`/v1/metrics` 路径重复拼接。 +- FastAPI/requests 自动埋点可配置,默认压制流式接口中的 ASGI `receive/send` 噪声。 + +## 技术栈 + +| 分类 | 实现 | +|------|------| +| 标准框架 | OpenTelemetry API/SDK | +| 导出协议 | OTLP HTTP、OTLP gRPC | +| Trace exporter | `opentelemetry-exporter-otlp` HTTP/gRPC trace exporter | +| Metric exporter | `opentelemetry-exporter-otlp` HTTP/gRPC metric exporter | +| 自动埋点 | FastAPI instrumentation、requests instrumentation;requests 默认关闭 | +| AI 语义 | OpenInference 属性、Langfuse OTel 属性、Nexent 自定义业务属性 | +| Agent 框架 | SmolAgents `CodeAgent` 扩展、Nexent `CoreAgent`、`NexentAgent` | +| 配置 | 环境变量 | +| Collector | `otel/opentelemetry-collector-contrib`,支持 debug、Phoenix、Langfuse、LangSmith、Grafana/Tempo、Zipkin 部署形态 | + +## 总体架构 + +```mermaid +flowchart LR + Backend[Nexent Backend / SDK] --> OTel[OpenTelemetry TracerProvider / MeterProvider] + OTel --> Exporter[OTLP Trace / Metric Exporter] + Exporter --> Collector[OpenTelemetry Collector] + Collector --> Phoenix[Arize Phoenix] + Collector --> Langfuse[Langfuse] + Collector --> Tempo[Grafana Tempo] + Collector --> Zipkin[Zipkin] + Collector --> Other[OTLP Backend] + + Backend --> FastAPI[FastAPI Auto Instrumentation] + Backend --> Manual[Manual AI Spans] + Manual --> OI[OpenInference Attributes] + Manual --> LF[Langfuse Attributes] +``` + +## 配置模型 + +### 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `ENABLE_TELEMETRY` | `false` | 监控总开关 | +| `MONITORING_PROVIDER` | `otlp` | 监控 provider 和部署形态:`otlp`、`phoenix`、`langfuse`、`langsmith`、`grafana`、`zipkin` | +| `MONITORING_DASHBOARD_URL` | 空 | 前端顶栏监控入口跳转 URL,后端只读取并透传该值 | +| `MONITORING_PROJECT_NAME` | `nexent` | 平台项目名 | +| `OTEL_SERVICE_NAME` | `nexent-backend` | OpenTelemetry service name | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://localhost:4318` | OTLP base endpoint | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | 空 | 可选 trace 专用 endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | 空 | 可选 metric 专用 endpoint | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | `http` | `http` 或 `grpc` | +| `OTEL_EXPORTER_OTLP_HEADERS` | 空 | 通用 `key=value,key2=value2` header | +| `OTEL_EXPORTER_OTLP_AUTHORIZATION` | 空 | `Authorization` header,常用于 Phoenix bearer auth 和 Langfuse Basic Auth | +| `OTEL_EXPORTER_OTLP_X_API_KEY` | 空 | `x-api-key` header,用于兼容需要该 header 的平台 | +| `OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION` | 空 | Langfuse 摄取版本,例如 `4` | +| `LANGSMITH_API_KEY` | 空 | LangSmith API Key,后端直连时映射为 `x-api-key`,Collector 转发时注入 exporter header | +| `LANGSMITH_PROJECT` | 空 | 可选 LangSmith project header | +| `LANGSMITH_OTLP_TRACES_ENDPOINT` | `https://api.smith.langchain.com/otel/v1/traces` | Collector 转发到在线 LangSmith 的 trace endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENABLED` | `true` | 是否导出 metric | +| `MONITORING_INSTRUMENT_REQUESTS` | `false` | 是否启用 requests 自动 HTTP client span | +| `MONITORING_FASTAPI_EXCLUDED_URLS` | 空 | FastAPI 自动埋点排除 URL,逗号分隔正则 | +| `MONITORING_FASTAPI_EXCLUDE_SPANS` | `receive,send` | 排除 ASGI 内部 `receive/send` span,流式接口建议保持默认 | +| `OTEL_COLLECTOR_VERSION` | `0.150.0` | 本地 OpenTelemetry Collector Contrib 镜像版本 | +| `PHOENIX_VERSION` | `15` | 本地 Phoenix 镜像版本 | +| `LANGFUSE_VERSION` | `3` | 本地 Langfuse Web/Worker 镜像版本 | +| `LANGFUSE_POSTGRES_VERSION` | `15-alpine` | 本地 Langfuse Postgres 镜像版本 | +| `LANGFUSE_CLICKHOUSE_VERSION` | `26.3-alpine` | 本地 Langfuse ClickHouse 镜像版本 | +| `LANGFUSE_MINIO_VERSION` | `RELEASE.2023-12-20T01-00-02Z` | 本地 Langfuse MinIO 镜像版本 | +| `LANGFUSE_REDIS_VERSION` | `alpine` | 本地 Langfuse Redis 镜像版本 | +| `GRAFANA_VERSION` | `12.4` | 本地 Grafana 镜像版本 | +| `GRAFANA_PORT` | `3002` | 本地 Grafana UI 端口 | +| `GRAFANA_DEFAULT_LANGUAGE` | `zh-Hans` | 本地 Grafana 默认界面语言 | +| `TEMPO_VERSION` | `2.10.5` | 本地 Tempo 镜像版本,避免浮动 tag 带来的配置兼容性漂移 | +| `TEMPO_PORT` | `3200` | 本地 Tempo HTTP API 端口 | +| `ZIPKIN_VERSION` | `latest` | 本地 Zipkin 镜像版本 | +| `ZIPKIN_PORT` | `9411` | 本地 Zipkin UI/API 端口 | + +## Endpoint 规则 + +HTTP exporter 支持两种输入: + +- base endpoint:`https://cloud.langfuse.com/api/public/otel` +- signal endpoint:`https://cloud.langfuse.com/api/public/otel/v1/traces` + +SDK 会按 signal 派生最终地址: + +| 输入 | Trace endpoint | Metric endpoint | +|------|----------------|-----------------| +| `https://host/api/public/otel` | `https://host/api/public/otel/v1/traces` | `https://host/api/public/otel/v1/metrics` | +| `https://host/api/public/otel/v1/traces` | 原值 | `https://host/api/public/otel/v1/metrics` | +| `https://host/api/public/otel/v1/metrics` | `https://host/api/public/otel/v1/traces` | 原值 | + +## 平台接入 + +### 纯 OTLP / 自建 Collector + +```bash +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +``` + +前端顶栏监控入口不再根据 provider 在代码中映射 UI 端口和路径。后端读取 `MONITORING_DASHBOARD_URL` 并通过 `/monitoring/status` 返回给前端;该值为空时前端不显示监控入口。因此本地 Grafana 形态需要在后端 `.env` 中设置: + +```bash +MONITORING_PROVIDER=grafana +MONITORING_DASHBOARD_URL=http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1 +``` + +### Phoenix + +Phoenix 通过 OpenInference 属性识别 AI span 类型,核心字段是 `openinference.span.kind`。 + +```bash +MONITORING_PROVIDER=phoenix +OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +OTEL_EXPORTER_OTLP_AUTHORIZATION="Bearer YOUR_PHOENIX_API_KEY" +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +MONITORING_PROJECT_NAME=nexent-production +``` + +### Langfuse + +Langfuse 的 OTLP HTTP base endpoint 是 `/api/public/otel`,使用 Basic Auth。实时摄取建议带 `x-langfuse-ingestion-version=4`。 + +```bash +MONITORING_PROVIDER=langfuse +OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel +OTEL_EXPORTER_OTLP_AUTHORIZATION="Basic BASE64_PUBLIC_SECRET" +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +``` + +当前实现不写入 `langfuse.*` 专用 span 属性,Langfuse 通过 OTLP 接收通用 OpenTelemetry / OpenInference span。 + +### LangSmith + +LangSmith 的在线 OTLP trace endpoint 为 `https://api.smith.langchain.com/otel/v1/traces`,使用 `x-api-key` header 认证,可通过 `Langsmith-Project` header 指定项目。推荐仍让 Nexent 后端上报到本地 Collector,由 Collector 注入 LangSmith API Key 并转发 traces: + +```bash +MONITORING_PROVIDER=langsmith +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +``` + +Collector 侧配置 `LANGSMITH_API_KEY`、`LANGSMITH_PROJECT` 和 `LANGSMITH_OTLP_TRACES_ENDPOINT`。LangSmith 当前形态只转发 traces,metrics 进入 Collector debug pipeline。 + +### Zipkin + +Zipkin 通过 Collector 的 Zipkin exporter 接收 traces。推荐 Nexent 后端仍然只上报到本地 Collector,由 Collector 转发到 Zipkin v2 spans endpoint: + +```bash +MONITORING_PROVIDER=zipkin +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +MONITORING_DASHBOARD_URL=http://localhost:9411 +``` + +Zipkin 当前本地形态只转发 traces;metrics 进入 Collector debug pipeline。 + +## 本地化部署设计 + +本地化部署通过 `docker/start-monitoring.sh` 选择形态。所有形态都保留 OpenTelemetry Collector 作为入口,Nexent 后端统一上报到 `http://otel-collector:4318` 或宿主机的 `http://localhost:4318`,平台差异只体现在 Collector exporter 和本地服务组合上。 + +| 形态 | Collector 配置 | 本地服务 | 数据去向 | 说明 | +|------|----------------|----------|----------|------| +| `otlp` | `otel-collector-config.yml` | Collector | debug exporter | 最小形态,用于验证 span/metric 是否产生,或手动改配置转发到云端平台;`collector` 仅作为启动脚本兼容别名 | +| `phoenix` | `otel-collector-phoenix-config.yml` | Collector + Phoenix | `http://phoenix:6006/v1/traces` | Phoenix 容器同时提供 UI 和 OTLP HTTP/gRPC trace collector,适合本地 trace debug | +| `langfuse` | `otel-collector-langfuse-config.yml` | Collector + Langfuse Web/Worker + Postgres + ClickHouse + MinIO + Redis | `http://langfuse-web:3000/api/public/otel/v1/traces` | Langfuse v3 依赖多组件,适合完整 LLMOps 能力验证 | +| `langsmith` | `otel-collector-langsmith-config.yml` | Collector | `https://api.smith.langchain.com/otel/v1/traces` | 在线 LangSmith trace 分析;API Key 只配置在 Collector 环境 | +| `grafana` | `otel-collector-grafana-config.yml` | Collector + Grafana + Tempo | traces 转发到 `tempo:4317`,metrics 只进入 Collector debug pipeline | Grafana + Tempo trace 查询 | +| `zipkin` | `otel-collector-zipkin-config.yml` | Collector + Zipkin | traces 转发到 `zipkin:9411/api/v2/spans`,metrics 只进入 Collector debug pipeline | Zipkin trace 查询 | + +启动命令: + +```bash +cd docker +./start-monitoring.sh --stack otlp +./start-monitoring.sh --stack phoenix +./start-monitoring.sh --stack langfuse +./start-monitoring.sh --stack langsmith +./start-monitoring.sh --stack grafana +./start-monitoring.sh --stack zipkin +``` + +部署脚本职责: + +- 创建或复用 `nexent-network`。 +- 首次启动时从 `monitoring.env.example` 生成 `monitoring.env`。 +- 根据 `MONITORING_PROVIDER` 或 `--stack` 选择 Docker Compose profile。 +- 根据部署形态设置 `OTEL_COLLECTOR_CONFIG_FILE`。 +- Langfuse 本地形态下,如果 `LANGFUSE_OTLP_AUTH_HEADER` 未显式配置,则使用初始化项目的 public/secret key 生成 Basic Auth header。 +- LangSmith 在线形态要求 `LANGSMITH_API_KEY`,启动时会校验该变量,避免 Collector 静默丢弃鉴权失败的 trace。 + +### Phoenix 本地形态 + +Phoenix 使用 `arizephoenix/phoenix` 镜像,默认暴露: + +| 端口 | 用途 | +|------|------| +| `6006` | Phoenix UI 和 OTLP HTTP `/v1/traces` | +| `4319` | 映射到容器内 gRPC OTLP `4317`,避免与 Collector gRPC 端口冲突 | + +Compose 中设置 `PHOENIX_WORKING_DIR=/mnt/data` 并挂载 `phoenix-data` volume,确保本地重启后 trace 数据不丢失。Collector 使用 `otlphttp/phoenix` exporter 的 base endpoint `http://phoenix:6006`,由 Collector 按 OTLP HTTP 规则追加 `/v1/traces`。 + +### Langfuse 本地形态 + +Langfuse v3 本地形态按自托管架构拆分为应用容器和存储组件: + +| 组件 | 用途 | +|------|------| +| `langfuse-web` | UI、API、OTLP HTTP ingestion | +| `langfuse-worker` | 异步消费和处理 trace 事件 | +| `langfuse-postgres` | 事务型元数据 | +| `langfuse-clickhouse` | trace/observation/score 分析数据 | +| `langfuse-minio` | S3 兼容对象存储,保存事件和大对象 | +| `langfuse-redis` | 队列和缓存 | + +初始化参数通过 `LANGFUSE_INIT_*` 配置,默认创建 `nexent-local` 项目和本地 API Key。Collector 使用 `otlphttp/langfuse` exporter,endpoint 为 `http://langfuse-web:3000/api/public/otel`,并携带: + +```yaml +headers: + Authorization: ${env:LANGFUSE_OTLP_AUTH_HEADER} + x-langfuse-ingestion-version: "4" +``` + +默认密钥仅用于本地验证。生产或共享环境必须替换认证密钥、数据库密码、对象存储密钥和 `LANGFUSE_ENCRYPTION_KEY`,并补充备份、高可用和升级策略。 + +### Grafana 本地形态 + +Grafana 本地形态面向 trace 调试: + +| 组件 | 用途 | +|------|------| +| `grafana` | 展示 Nexent Agent trace dashboard,并预置 Tempo datasource | +| `tempo` | 接收 Collector 转发的 OTLP traces,并提供 Grafana Explore 查询后端 | + +Collector trace pipeline 使用 `otlp/tempo` exporter 转发到 `tempo:4317`。Tempo 启用 `metrics-generator` 的 `local-blocks` processor,用于支持 Grafana trace breakdown 中的 TraceQL metrics 查询。Collector metrics pipeline 保留为 debug exporter,用于兼容后端仍开启 OTLP metrics 的场景,但本地 Grafana 形态不提供独立指标存储和指标 dashboard。 + +### Zipkin 本地形态 + +Zipkin 本地形态面向轻量 trace 查询: + +| 组件 | 用途 | +|------|------| +| `zipkin` | 接收 Collector 转发的 traces,并提供 trace 查询 UI | + +Collector trace pipeline 使用 `zipkin` exporter 转发到 `http://zipkin:9411/api/v2/spans`。Collector metrics pipeline 保留为 debug exporter。 + +默认访问地址: + +- Zipkin UI:`http://localhost:9411` + +## Span 语义映射 + +| Nexent 场景 | OpenInference | +|-------------|---------------| +| Agent 入口 | `openinference.span.kind=AGENT` | +| 服务准备、流式生成、线程执行、普通步骤 | `openinference.span.kind=CHAIN` | +| LLM 调用 | `openinference.span.kind=LLM` | +| 工具调用 | `openinference.span.kind=TOOL` | +| 检索类调用 | `openinference.span.kind=RETRIEVER` | + +上下文属性: + +| 属性 | 说明 | +|------|------| +| `input.value` / `output.value` | OpenInference 输入输出 | +| `metadata` | OpenInference JSON metadata | +| `session.id` / `user.id` | OpenInference 会话和用户 | +| `tag.tags` | OpenInference tags | + +## 埋点信息 + +| 埋点 | 位置 | 类型 | 内容 | 目的 | +|------|------|------|------|------| +| FastAPI 自动 span | `MonitoringManager.setup_fastapi_app` | HTTP server | route、method、status、duration | API 入口耗时和错误定位 | +| FastAPI `receive/send` 排除 | `fastapi_exclude_spans` | 降噪配置 | 默认 `receive,send` | 避免 SSE 流式接口生成大量 `unknown POST /agent/run http ...` | +| requests 自动 span | `MonitoringConfig.instrument_requests` | HTTP client | 外部请求 URL、method、status | 默认关闭;需要分析外部 HTTP 依赖时开启 | +| `AgentRunMetadata` | `run_agent_stream` 边界 | context | tenant、user、agent、conversation、query、language、memory、文件数 | 业务层只绑定一次请求上下文,后续 span 由 SDK 自动继承 | +| `agent.run` | `NexentAgent.agent_run_with_observer` | AGENT | query、session、user、tenant、agent、metadata、tags | 作为一次 Agent 运行的顶层业务 trace | +| `agent.run.loop` | `NexentAgent.agent_run_with_observer` | CHAIN | Agent loop、step、最终输出 | 追踪实际 Agent 执行生命周期 | +| `{display_name or model_id}.generate` | `sdk/nexent/core/models/openai_llm.py` | LLM / generation | 模型、温度、top_p、消息、输入输出、token、TTFT、chunk 数 | LLM 性能、成本、输出和异常分析 | +| `python_interpreter` | `sdk/nexent/core/agents/core_agent.py` | TOOL | 生成代码、step number、执行输出、日志、是否最终答案 | 观测 CodeAgent 解释器执行 | +| 真实工具名 | `sdk/nexent/core/agents/nexent_agent.py` | TOOL | local/MCP/langchain/builtin 工具输入输出 | 观测真实工具可用性、延迟、错误和输入输出 | +| `FinalAnswerTool` | `sdk/nexent/core/agents/core_agent.py` | TOOL | 最终答案输出 | 让 Phoenix/Langfuse 中能明确看到最终答案节点 | +| `monitor_endpoint` | SDK 兼容 API | AGENT / CHAIN | 自定义 operation、参数、错误 | 低层 escape hatch;不推荐业务层新增常规埋点 | +| `start_agent_run` / `trace_agent_step` / `trace_retriever_call` | SDK 公共 API | AGENT / CHAIN / RETRIEVER | Agent metadata、输入输出、session、user | SDK 生命周期埋点和少量自定义层级埋点 | +| `trace_tool_call` | SDK 公共 API | TOOL | 工具名、输入、输出、耗时、错误 | SDK 用户自定义工具埋点 | + +### 事件清单 + +| Span / 位置 | Event | 主要属性 | 目的 | +|-------------|-------|----------|------| +| `agent.run` | `agent.run.started` / `agent.run.completed` / `agent.run.error` | `error.*` | 观测一次 Agent 运行的开始、结束和异常 | +| LLM span | `completion_started` / `first_token_received` / `token_generated` / `completion_finished` / `model_stopped` / `error_occurred` | `model_id`、`temperature`、`top_p`、`message_count`、`total_duration`、`output_length`、`chunk_count`、`error.*` | 分析模型参数、流式输出耗时、停止和异常 | +| Tool span | span 属性 `agent.tool.input` / `agent.tool.output` | JSON 字符串、`agent.tool.duration_ms`、`error.*` | 分析工具输入输出、耗时和异常 | + +## 指标 + +| 指标 | 类型 | 维度 | 用途 | +|------|------|------|------| +| `llm.request.duration` | histogram | model、operation | LLM 请求延迟 | +| `llm.token.generation_rate` | histogram | model | token/s | +| `llm.time_to_first_token` | histogram | model | 首 token 延迟 | +| `llm.token_count.prompt` | counter | model | 输入 token 成本 | +| `llm.token_count.completion` | counter | model | 输出 token 成本 | +| `llm.error.count` | counter | model、operation | LLM 错误率 | +| `agent.step.count` | counter | agent、step type、tool | Agent 步骤和工具调用量 | +| `agent.execution.duration` | histogram | agent、status | Agent 总耗时 | +| `agent.error.count` | counter | agent、error type | Agent 异常统计 | + +## Agent 运行数据流 + +```mermaid +flowchart TD + U[用户] --> FE[前端 Chat] + FE --> API[POST /agent/run] + API --> HTTP[FastAPI HTTP span: 可配置隐藏] + API --> Bind[绑定 AgentRunMetadata] + Bind --> Mem[解析 memory 开关] + Mem --> Strategy{with_memory / no_memory} + Strategy -->|with_memory| G1[generate_stream_with_memory] + Strategy -->|no_memory| G2[generate_stream_no_memory] + G1 --> AR[agent_run async generator] + G2 --> AR + AR --> Thread[agent_run_thread] + Thread --> NX[NexentAgent / CoreAgent] + NX --> A0[agent.run span: AGENT] + A0 --> Step[agent.run.loop: CHAIN] + Step --> LLM[Model.generate: LLM / generation] + Step --> PY[python_interpreter: TOOL] + PY --> Tool[Real local / MCP / langchain / builtin tool: TOOL] + PY --> Final[FinalAnswerTool: TOOL] + LLM --> Attr1[OpenInference + Langfuse attrs] + Tool --> Attr1 + Final --> Attr1 + Attr1 --> OTel[OpenTelemetry Tracer/Meter Provider] + OTel --> Collector[OTLP Collector] + Collector --> Phoenix[Phoenix] + Collector --> Langfuse[Langfuse] + Collector --> Tempo[Grafana Tempo] + Collector --> Zipkin[Zipkin] + Collector --> Other[OTLP Backend] +``` + +预期平台树形结构: + +```text +agent.run agent +└─ agent.run.loop chain + ├─ Model.generate llm / generation + ├─ python_interpreter tool + │ └─ RealTool tool + └─ FinalAnswerTool tool +``` + +FastAPI HTTP span 可以保留在最上层用于接口视角,也可以通过 `MONITORING_FASTAPI_EXCLUDED_URLS=/agent/run` 在 AI trace 视图中隐藏。 + +## 监控页面结构 + +```mermaid +flowchart TB + Page[Agent 监控页] --> Filters[筛选区: 时间 / 租户 / 用户 / Agent / 会话 / 模型 / 状态] + Page --> KPIs[指标区: 成功率 / P95 / TTFT / tokens/s / token 成本 / 工具错误数] + Page --> TraceList[Trace 列表: Agent / 会话 / 用户 / 状态 / 耗时 / Token / 模型 / 最后错误] + Page --> Detail[Trace 详情] + Detail --> Waterfall[Span 瀑布图: agent / chain / llm / tool] + Detail --> Timeline[Agent 时间线: 准备 / 记忆 / LLM / 工具 / 最终答案] + Detail --> LLMPanel[LLM 面板: prompt / output / token / TTFT / generation rate] + Detail --> ToolPanel[工具面板: 工具名 / 输入 / 输出 / 耗时 / 错误] + Detail --> Session[会话和用户上下文] + Detail --> Raw[原始 OTel 属性和 events] + Detail --> Eval[反馈、评分和评估] +``` + +监控平台之间不能只按“是否能收 trace”比较。对智能体场景,更关键的是是否理解 LLM/Agent 语义、是否支持评估和反馈、是否适合本地化部署、是否能与企业已有 APM 合流。下面按 Nexent 可能接入的平台做比较: + +| 平台 | 类型 | 部署形态 | 主要接入方式 | AI / Agent 语义 | Metrics / Logs | 评估 / 反馈 | 适合场景 | Nexent 当前适配 | +|------|------|----------|--------------|-----------------|----------------|-------------|----------|----------------| +| Phoenix | AI 原生可观测性 / 实验分析 | 云服务或自托管 | OTLP、OpenInference、Phoenix SDK | OpenInference 生态匹配好,适合展示 LLM、retriever、agent、tool 等语义 | 重点在 trace 和实验分析,通用 infra 监控不是核心 | 支持 eval、dataset、实验分析 | 本地 trace debug、RAG/LLM 质量分析、OpenInference 语义验证 | 写入 OpenInference 属性;支持本地 Phoenix stack 和 OTLP 转发 | +| Langfuse | LLMOps / Prompt 与 Trace 平台 | 云服务或自托管 | OTLP、Langfuse SDK、API | 对 trace、observation、session、user、prompt、metadata 支持完整 | 提供 LLM 应用维度 dashboard,通用 infra 监控不是重点 | 支持 score、feedback、eval、prompt 管理 | 需要 prompt 管理、用户会话、反馈和成本闭环的 LLM 应用 | 支持本地 Langfuse stack 和 OTLP 转发;业务 span 不写入 `langfuse.*` 专用属性 | +| LangSmith | LangChain / LangGraph 生态观测与评估 | 云服务为主 | LangSmith SDK、OTLP endpoint | 与 LangChain/LangGraph run、thread、feedback、evaluation 生态贴合 | 重点在应用 trace 和评估,不替代通用 APM | 评估、dataset、反馈、回归测试能力强 | 使用 LangChain/LangGraph 或需要在线评估闭环 | 支持 Collector 注入 `x-api-key` 和 `Langsmith-Project` 转发 traces | +| Grafana Tempo + Grafana | 通用 trace 后端 / Dashboard | 自托管或云服务 | OTLP、Jaeger、Zipkin 等,经 Collector 常见 | 不内置 LLM/Agent 专用语义,需要 dashboard 和属性约定补充 | Grafana 生态可接 Prometheus、Loki、Tempo 组合 | 不提供原生 LLM 评估,需要外部系统 | 私有化、本地化、已有 Grafana/Prometheus/Loki 体系 | 支持本地 Tempo + Grafana stack,预置 Tempo datasource 和 trace dashboard | +| Zipkin | 轻量分布式 tracing | 自托管 | Zipkin API,通常由 Collector exporter 转发 | 只理解通用 trace/span,不理解 LLM/Agent 语义 | 不提供 metrics/logs 平台能力 | 不提供评估能力 | 最小本地 trace 查询、验证转发链路、低成本调试 | 支持本地 Zipkin stack,Collector 转发 traces | +| Datadog LLM Observability | 全栈 APM + LLM Observability | 云服务 / Agent | Datadog SDK、Agent、OTel/OTLP 等 | 支持 LLM 应用 traces、prompt/completion、成本、质量和安全维度 | 全栈 metrics/logs/traces/APM/infra 能力强 | 支持 LLM evaluations、质量和安全监控 | 企业已有 Datadog,需把 AI 应用纳入统一生产监控 | 可通过标准 OTLP/Collector 或平台 SDK 接入,当前未内置本地 stack | +| New Relic AI Monitoring | 全栈 APM + AI Monitoring | 云服务 / Agent | New Relic agent、OTel/OTLP 等 | 关注 LLM app 性能、错误、成本和模型交互 | 全栈 APM、infra、logs、browser/mobile 生态完整 | 提供 AI 应用监控与分析能力,评估深度依赖平台能力 | 企业已有 New Relic,关注生产运行和统一告警 | 可通过标准 OTLP/Collector 或平台 agent 接入,当前未内置本地 stack | +| Elastic Observability | 全栈可观测性 / 搜索分析 | 云服务或自托管 | Elastic APM agent、OTel/OTLP、EDOT | 支持 LLM observability 和 OTel 语义,适合把 AI trace 与日志、指标、搜索分析合并 | logs、metrics、traces、搜索分析能力强 | 侧重监控、分析和 dashboard,业务评估闭环仍需额外设计 | 已有 Elastic Stack、重视日志检索、私有化和统一搜索分析 | 可通过 OTLP/Collector 对接,当前未内置本地 stack | +| Honeycomb | 事件驱动可观测性 / 高基数分析 | 云服务 | OTLP、OpenTelemetry SDK、Events API / libhoney | 擅长高基数 trace/event 分析,AI 语义通过属性和 OTel GenAI 约定表达 | 强在 trace/event 和指标分析,日志通常通过事件化方式分析 | 不提供完整 LLMOps 评估闭环 | 需要按租户、用户、agent、tool 做高维切片分析 | 可通过 OTLP/Collector 对接,当前未内置本地 stack | +| Nexent 自建页 | 产品内业务观测 | 自建 | 复用 OTel 属性和业务数据库 | 最能理解租户、会话、Agent 配置、权限、版本和业务动作 | 需要自建指标、查询、存储和告警 | 可与产品反馈、评分和评估闭环深度结合 | 产品内闭环、权限隔离、面向终端用户或运维角色的监控页 | 当前先通过 OTLP 对接外部平台,后续可基于同一批属性构建自有页面 | + +从选型上可以把平台分成三类: + +- AI 原生平台优先解决“Agent 为什么这样回答、prompt/tool/retrieval 是否有效、质量如何评估”的问题,适合研发调试和 LLMOps 闭环。 +- 通用 trace 后端优先解决“链路是否完整、哪一步慢、部署是否轻量和可私有化”的问题,适合本地调试和私有化基础能力。 +- 全栈 APM 优先解决“生产系统整体是否健康、AI 服务如何纳入企业统一监控、告警和审计”的问题,适合已有企业监控体系的团队。 + +按使用场景选择时,可以简化成下面的矩阵: + +| 场景 | 优先平台 | 原因 | 代价 | +|------|----------|------|------| +| 本地开发和快速看 trace | Phoenix、Zipkin、Grafana Tempo | 自托管简单,能快速验证 span 层级、Collector 转发和属性是否正确 | 对质量评估、prompt 管理和业务闭环支持有限 | +| RAG / Agent 质量分析 | Phoenix、Langfuse、LangSmith | 更理解 prompt、completion、retriever、tool、session、feedback 和 eval | 平台语义差异较大,需要保留可迁移的 OTel 属性 | +| 企业生产统一监控 | Datadog、New Relic、Elastic、Honeycomb | 能和服务、基础设施、日志、指标、告警、权限体系合流 | AI 业务语义需要通过 OTel GenAI/OpenInference/自定义属性补齐 | +| 产品内用户态监控页 | Nexent 自建页 + 外部 trace 后端 | 能结合租户、权限、Agent 配置、会话、反馈和产品操作 | 需要自建查询、聚合、权限隔离和可视化能力 | + +因此 Nexent 的策略不是只绑定一个平台,而是以 OpenTelemetry/OTLP 和兼容语义属性作为主干:本地默认支持 Phoenix、Langfuse、Grafana Tempo、Zipkin 等便于验证的形态;线上或企业环境可以把同一批 traces 转发到 LangSmith、Datadog、New Relic、Elastic、Honeycomb 或其他 OTLP 兼容后端。 + +推荐路径: + +1. 短期使用 OTLP 对接 Phoenix/Langfuse/LangSmith,满足调试和分析。 +2. 中期在 Nexent 增加 trace 跳转、轻量指标概览和异常聚合。 +3. 长期按租户、会话、Agent 版本建立自有监控页,同时保留 OTLP 双写能力。 + +## 已修复的设计风险 + +| 风险 | 修复 | +|------|------| +| 业务层埋点耦合过高 | 业务入口只绑定 `AgentRunMetadata`,Agent/LLM/Tool 语义 span 下沉到 SDK 生命周期 | +| `/v1/traces` 路径重复拼接 | SDK 支持 base endpoint 和 signal endpoint 自动归一化 | +| Collector header 无法兼容平台 | Collector 默认只 debug;平台转发配置拆分 `Authorization`、`x-api-key`、`x-langfuse-ingestion-version` | +| Phoenix 只看到接口看不到 Agent | SDK 顶层 `agent.run` 标记为 AGENT,内部 `agent.run.loop` 标记为 CHAIN | +| Phoenix/Langfuse 中出现大量 `unknown POST /agent/run http ...` | 默认排除 FastAPI ASGI `receive/send` span;requests 自动埋点默认关闭;可配置隐藏 `/agent/run` HTTP span | +| Langfuse 字段耦合过重 | 不写入 `langfuse.*` 专用 span 属性,仅保留 OTLP 转发和 OpenInference 语义 | +| LLM span 不明显或缺输出 | LLM span 命名为 `{display_name or model_id}.generate`,并写入 `output.value` | +| 工具 span 缺失 | 在 `NexentAgent.create_single_agent` 统一包装 local/MCP/langchain/builtin 工具,并在 `CoreAgent` 增加 `python_interpreter` 和 `FinalAnswerTool` span | +| 单测漏掉 SDK 生命周期路径 | 增加 AgentRunMetadata、Agent/chain、LLM/Tool 继承上下文测试 | + +## 使用建议 + +只看 Agent 业务链路时: + +```bash +MONITORING_FASTAPI_EXCLUDE_SPANS=receive,send +MONITORING_FASTAPI_EXCLUDED_URLS=/agent/run +MONITORING_INSTRUMENT_REQUESTS=false +``` + +同时看接口入口和 Agent 业务链路时: + +```bash +MONITORING_FASTAPI_EXCLUDE_SPANS=receive,send +MONITORING_FASTAPI_EXCLUDED_URLS= +MONITORING_INSTRUMENT_REQUESTS=false +``` + +需要排查外部 HTTP 依赖时: + +```bash +MONITORING_INSTRUMENT_REQUESTS=true +``` + +## 参考 + +- OpenTelemetry Collector: https://opentelemetry.io/docs/collector/ +- OpenTelemetry OTLP Specification: https://opentelemetry.io/docs/specs/otlp/ +- OpenTelemetry GenAI Semantic Conventions: https://opentelemetry.io/docs/specs/semconv/gen-ai/ +- OpenInference Semantic Conventions: https://arize-ai.github.io/openinference/spec/semantic_conventions.html +- LangSmith Trace with OpenTelemetry: https://docs.langchain.com/langsmith/trace-with-opentelemetry +- LangGraph Observability: https://docs.langchain.com/langgraph-platform/langsmith-observability +- LlamaIndex Observability: https://docs.llamaindex.ai/en/stable/module_guides/observability/ +- LlamaIndex OpenTelemetry Integration: https://docs.llamaindex.ai/en/stable/api_reference/observability/otel/ +- OpenAI Agents SDK Tracing: https://openai.github.io/openai-agents-python/tracing/ +- Semantic Kernel Telemetry: https://learn.microsoft.com/en-us/semantic-kernel/concepts/enterprise-readiness/observability/telemetry-with-console +- CrewAI Tracing: https://docs.crewai.com/en/observability/tracing +- CrewAI OpenTelemetry Export: https://docs.crewai.com/en/enterprise/guides/capture_telemetry_logs +- CrewAI OpenLIT Integration: https://docs.crewai.com/en/observability/openlit +- AgentOps CrewAI Integration: https://docs.agentops.ai/v1/integrations/crewai +- AutoGen Agent Observability: https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/agent-observability.html +- AutoGen Tracing and Observability: https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/tracing.html +- Dify Monitoring Dashboard: https://docs.dify.ai/en/use-dify/monitor/analysis +- Dify Langfuse Integration: https://docs.dify.ai/en/use-dify/monitor/integrations/integrate-langfuse +- Dify LangSmith Integration: https://docs.dify.ai/en/use-dify/monitor/integrations/integrate-langsmith +- Dify Agent Node: https://docs.dify.ai/en/guides/workflow/node/agent +- smolagents Inspecting runs with OpenTelemetry: https://huggingface.co/docs/smolagents/en/tutorials/inspect_runs +- smolagents Phoenix tracing guide: https://huggingface.co/blog/smolagents-phoenix +- Vercel AI SDK Telemetry: https://ai-sdk.dev/docs/ai-sdk-core/telemetry +- Haystack Tracing: https://docs.haystack.deepset.ai/docs/tracing +- Phoenix Setup Tracing: https://arize.com/docs/phoenix/tracing/how-to-tracing/setup-tracing +- Phoenix Setup OTEL: https://arize.com/docs/phoenix/tracing/how-to-tracing/setup-tracing/setup-using-phoenix-otel +- Phoenix Authentication: https://arize.com/docs/phoenix/deployment/authentication +- Phoenix Self-Hosting: https://arize.com/docs/phoenix/self-hosting +- Phoenix Docker Deployment: https://arize.com/docs/phoenix/self-hosting/deployment-options/docker +- Langfuse OpenTelemetry: https://langfuse.com/integrations/native/opentelemetry +- Langfuse Self-Hosting: https://langfuse.com/self-hosting +- Langfuse Docker Compose: https://langfuse.com/self-hosting/local +- Langfuse Overview: https://langfuse.com/docs +- LangSmith OpenTelemetry: https://docs.langchain.com/langsmith/otel-gateway-trace-redaction +- Datadog LLM Observability: https://docs.datadoghq.com/llm_observability/ +- New Relic AI Monitoring: https://docs.newrelic.com/docs/ai-monitoring/intro-to-ai-monitoring/ +- Elastic OpenTelemetry: https://www.elastic.co/docs/solutions/observability/apm/opentelemetry/ +- Elastic EDOT data streams: https://www.elastic.co/docs/reference/opentelemetry/data-streams +- Honeycomb Send Data: https://docs.honeycomb.io/send-data/ +- Honeycomb for LLMs: https://docs.honeycomb.io/send-data/llm/ +- Grafana Tempo: https://grafana.com/docs/tempo/latest/ +- Zipkin OpenTelemetry Collector exporter: https://opentelemetry.io/docs/collector/configuration/#exporters +- Zipkin Docker image: https://hub.docker.com/r/openzipkin/zipkin diff --git a/doc/docs/zh/sdk/vector-database.md b/doc/docs/zh/sdk/vector-database.md index 940af9c33..b940400fd 100644 --- a/doc/docs/zh/sdk/vector-database.md +++ b/doc/docs/zh/sdk/vector-database.md @@ -579,7 +579,11 @@ python -m nexent.service.vectordatabase_service - 参数: - `index_name`: 索引名称 (路径参数) - `path_or_url`: 文档路径或URL (查询参数) - - 返回示例: `{"status": "success", "deleted_count": 1}` + - `scope`: 删除范围 (查询参数,默认 `full`) + - `source_only`: 仅删除 MinIO 源文件,保留 ES 中的切片与向量(检索仍可用,预览不可用) + - `full`: 删除 ES 文档、MinIO 源文件,并清理相关 Redis 任务记录 + - 返回示例 (`source_only`): `{"status": "success", "scope": "source_only", "deleted_es_count": 0, "deleted_minio": true, "source_available": false}` + - 返回示例 (`full`): `{"status": "success", "scope": "full", "deleted_es_count": 5, "deleted_minio": true}` #### 搜索操作 @@ -728,8 +732,11 @@ curl -X POST "http://localhost:8000/indices/search/hybrid" \ "weight_accurate": 0.3 }' -# 删除文档 -curl -X DELETE "http://localhost:8000/indices/my_documents/documents?path_or_url=https://example.com/doc1" +# 删除源文件(保留索引) +curl -X DELETE "http://localhost:8000/indices/my_documents/documents?path_or_url=knowledge_base/doc1.pdf&scope=source_only" + +# 从知识库彻底移除文档 +curl -X DELETE "http://localhost:8000/indices/my_documents/documents?path_or_url=knowledge_base/doc1.pdf&scope=full" # 创建索引 curl -X POST "http://localhost:8000/indices/my_documents" diff --git a/doc/docs/zh/user-guide/agent-development.md b/doc/docs/zh/user-guide/agent-development.md index 67d3c8311..40805aeea 100644 --- a/doc/docs/zh/user-guide/agent-development.md +++ b/doc/docs/zh/user-guide/agent-development.md @@ -31,15 +31,99 @@ ### 🤝 协作 Agent +协作智能体用于帮助当前智能体完成复杂任务。协作智能体的来源分为两类: + +- **内部 Agent**:平台已发布的智能体 +- **外部 A2A Agent**:通过 A2A 协议发现的第三方 Agent + 1. 点击"协作 Agent"页签下的加号,弹出可选择的智能体列表 -2. 在下拉列表中选择要添加的智能体 -3. 允许选择多个协作智能体 -4. 可点击 × 取消选择此智能体 +2. 智能体列表分为"内部 Agent"和"外部 A2A Agent"两个页签,您可以根据需要选择 +3. 在下拉列表中选择要添加的智能体 +4. 允许选择多个协作智能体 +5. 可点击 × 取消选择此智能体 + +
+ +
+ +#### 🌐 添加外部 A2A Agent + +Nexent 支持通过 A2A 协议与第三方 Agent 进行通信。您可以通过以下两种方式发现外部 A2A Agent: + +##### 通过 URL 发现 Agent + +如果您知道目标 Agent 的 Agent Card 地址,可以使用 URL 发现方式:
- +
+1. 在外部 A2A Agent 列表中,点击"添加外部 Agent"按钮 +2. 选择"URL 发现"页签 +3. 填写 Agent Card URL 地址,例如:`https://example.com/.well-known/agent.json` +4. 点击"发现"按钮,系统会自动获取 Agent 的相关信息 +5. 发现成功后,可以查看 Agent 的名称、描述、能力等信息 +6. 点击"添加到列表"完成添加 + +> 💡 **提示**:Agent Card 是符合 A2A 1.0 规范的 Agent 描述文件,包含了 Agent 的名称、描述、调用地址、能力等信息。 + +##### 通过 Nacos 发现 Agent + +如果您的 Agent 注册在 Nacos 服务发现平台,可以使用 Nacos 发现方式: + +
+ +
+ +1. 在外部 A2A Agent 列表中,点击"添加外部 Agent"按钮 +2. 选择"Nacos 发现"页签 +3. 首次使用时,需要先配置 Nacos 连接信息: + - **Nacos 服务器地址**:填写 Nacos 服务器地址,如 `http://127.0.0.1:8848` + - **命名空间 ID**:填写 Nacos 命名空间 ID(可选) + - **分组名**:填写服务分组名,默认为 `DEFAULT_GROUP` + - **用户名/密码**:填写 Nacos 访问凭证(可选) +4. 点击"保存配置"保存 Nacos 连接信息 +5. 填写要扫描的 Agent 服务名称 +6. 点击"扫描"按钮,系统会从 Nacos 中获取匹配的 Agent 信息 +7. 扫描结果会列出所有匹配的 Agent,可以选择需要的 Agent 添加到列表 + +> ⚠️ **注意**:确保 Nacos 服务正常运行,且目标 Agent 已正确注册到 Nacos。 + +##### 管理已发现的外部 Agent + +在外部 A2A Agent 列表中,您可以查看和管理所有已发现的外部 Agent: + + + +
+ +
+ +1. **查看 Agent 详情**:点击 Agent 卡片,可以查看其完整信息,包括名称、描述、URL、能力列表等 +2. **测试 Agent**:点击"测试"按钮,可以向该 Agent 发送测试消息,验证其是否正常工作 +3. **与 Agent 对话**:点击"对话"按钮,可以打开对话窗口,与该 Agent 进行实时交互 +4. **配置调用协议**:点击"协议配置"按钮,可以选择该 Agent 的调用协议: + - **HTTP + JSON**:使用 REST API 风格调用 + - **JSON-RPC**:使用 JSON-RPC 协议调用 +5. **刷新 Agent 信息**:如果 Agent 信息发生变化,可以点击"刷新"按钮重新获取最新的 Agent Card +6. **移除 Agent**:点击"移除"按钮,可以将该 Agent 从已发现列表中删除 + +> 💡 **使用场景**: +> - 通过 URL 发现快速接入已知的第三方 Agent 服务 +> - 通过 Nacos 发现批量接入同一服务注册中心的所有 Agent +> - 配置协议以兼容不同 Agent 服务提供商的要求 + + +###### 通过URL对接[DataAgent](https://gitcode.com/datagallery/dataagent) A2A Agent +1. 参考[DataAgent文档](https://gitcode.com/datagallery/dataagent#%F0%9F%8C%90-a2a-10-%E6%9C%8D%E5%8A%A1%E6%A8%A1%E5%BC%8F)以A2A服务模式启动DataAgent + >当前Nexent不支持带认证的agent,启动DataAgent时请勿设置auth-token +
+ +
+ +2. 参考[通过 URL 发现 Agent](#通过-url-发现-agent)接入agent,url为http://\:9999/.well-known/agent-card.json +3. 参考[管理已发现的外部 Agent](#管理已发现的外部-agent)配置调用协议,选择HTTP+JSON方式接入 + ### 🛠️ 选择智能体的工具 智能体可以使用各种工具来完成任务,如知识库检索、文件解析、图片解析、收发邮件、文件管理等本地工具,也可接入第三方 MCP 工具,或自定义工具。 @@ -60,7 +144,10 @@ > 2. 请选择 `analyze_text_file` 工具,启用文档类、文本类文件的解析功能。 > 3. 请选择 `analyze_image` 工具,启用图片类文件的解析功能。 > +> ⚠️ **向量化模型配置**:使用 `knowledge_base_search` 工具时,需要确保知识库已配置向量化模型。对于存量知识库,系统会提示选择向量化模型,请务必选择**创建该知识库时使用的向量化模型**。若选择的模型与知识库创建时使用的模型不一致,可能导致检索失败或结果不准确。 +> > 📚 想了解系统已经内置的所有本地工具能力?请参阅 [本地工具概览](./local-tools/index.md)。 +> 📚 想了解技能能力?请参阅 [技能管理](./skills.md)。 ### 🔌 添加 MCP 工具 @@ -108,6 +195,40 @@ 有许多第三方服务如 [ModelScope](https://www.modelscope.cn/mcp) 提供了 MCP 服务,您可以快速接入使用。 您也可以自行开发 MCP 服务并接入 Nexent 使用,参考文档 [MCP 工具开发](../backend/tools/mcp)。 +**3️⃣ 存量 API 转换为 MCP 服务** + +🔔 该方法适用于将已有的 REST API 接口快速转换为 MCP 工具,无需额外开发即可让智能体调用现有 API 能力: + +>1. 在 MCP 配置模块选择 **"API 转换为 MCP"** 接入类型 +> +>2. 在下方的输入框中填写 API 基础信息: +> - **服务名称**:MCP 服务的展示名称 +> - **OpenAPI JSON**:OpenAPI 3.x 规范的 JSON 内容 +> - **基础服务 URL**:API 服务的基础地址(支持 http/https) +> +>3. 点击右下角 **+ 添加** 按钮,完成对应 MCP 服务的转换 + +
+ +
+ +> +>4. 转换完成后,可在 **Outer APIs** 页签下查看所有外部 API 转换的 MCP 工具 + +
+ +
+ +
+ +
+ +>💡 **使用场景**: +>- 快速接入企业内部的 REST API 接口 +>- 将第三方服务的 HTTP API 转换为 MCP 工具 +>- 无需编写 MCP Server 代码,直接通过 OpenAPI 规范生成工具 + + ### ⚙️ 自定义工具 您可参考以下指导文档,开发自己的工具,并接入 Nexent 使用,丰富智能体能力。 @@ -129,8 +250,8 @@ - 测试的 `query`,例如"维生素C的功效" - 检索的模式 `search_mode`(默认为 `hybrid`) - 目标检索的知识库列表 `index_names`,如 `["医疗", "维生素知识大全"]` - - 若不输入 `index_names`,则默认检索知识库页面所选中的全部知识库 - - 是否启用重排模型(默认为 `false`),启用后配置重排模型,实现对检索结果的重排优化 + - 若不输入 `index_names`,则默认检索知识库页面所选中的全部知识库 + - 是否启用重排模型(默认为 `false`),启用后配置重排模型,实现对检索结果的重排优化 6. 输入完成后点击"执行测试"开始测试,并在下方查看测试结果
@@ -172,7 +293,8 @@
-### 🐛 调试与保存 +## 🐛 调试与保存 + 在完成初步智能体配置后,您可以对智能体进行调试,根据调试结果微调提示词,持续提升智能体表现。 @@ -182,7 +304,7 @@ 调试成功后,可点击右下角"保存"按钮,此智能体将会被保存并出现在智能体列表中。 -### 🐛 版本管理 +## 🐛 版本管理 Nexent 支持智能体的版本管理,您可以在调试过程中,保存不同版本的智能体配置。 @@ -194,6 +316,121 @@ Nexent 支持智能体的版本管理,您可以在调试过程中,保存不 ![版本管理2](./assets/agent-development/version_management_2.png) +### 🚀 发布为 A2A Agent + +Nexent 支持将已发布的智能体作为 A2A Agent 暴露给外部系统调用。在发布版本时,您可以勾选"发布为 A2A Agent"选项,将当前智能体注册为符合 A2A 1.0 规范的 Agent。 + +
+ +
+ +发布成功后,系统会显示 A2A Agent 的调用信息,包括: + +
+ +
+ +| 信息项 | 说明 | +|--------|------| +| **Endpoint ID** | A2A Agent 的唯一标识符 | +| **Agent Card URL** | Agent 发现端点,外部系统通过此地址获取 Agent 描述 | +| **协议版本** | A2A 协议版本,当前为 1.0 | +| **REST 端点** | 基于 REST 风格的 API 端点 | +| **JSON-RPC 端点** | 基于 JSON-RPC 2.0 协议的调用端点 | + +#### 调用方式 + +发布后的 A2A Agent 支持以下两种调用协议: + +##### REST API + +```bash +# 获取 Agent Card(用于 Agent 发现) +GET /nb/a2a/{endpoint_id}/.well-known/agent-card.json + +# 发送同步消息 +POST /nb/a2a/{endpoint_id}/message:send +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } +} + +# 发送流式消息(SSE) +POST /nb/a2a/{endpoint_id}/message:stream +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } +} + +# 获取任务状态 +GET /nb/a2a/{endpoint_id}/tasks/{task_id} +``` + +##### JSON-RPC 2.0 + +```bash +POST /nb/a2a/{endpoint_id}/v1 +Content-Type: application/json + +# 发送同步消息 +{ + "jsonrpc": "2.0", + "method": "SendMessage", + "params": { + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } + }, + "id": 1 +} + +# 发送流式消息 +{ + "jsonrpc": "2.0", + "method": "SendStreamingMessage", + "params": { + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } + }, + "id": 2 +} + +# 获取任务状态 +{ + "jsonrpc": "2.0", + "method": "GetTask", + "params": { + "taskId": "task_abc123" + }, + "id": 3 +} +``` + +> 💡 **提示**: +> - 本地开发时,请将路径前面的 `/nb/a2a` 部分替换为 `http://localhost:5013/nb/a2a` +> - 生产环境请将路径替换为您的服务器域名或公网 IP 地址 + +> ⚠️ **注意事项**: +> - 调用 A2A Agent 需要在请求头中携带有效的认证信息 +> - Agent Card 信息会被缓存,刷新间隔为 1 小时 +> - 如需更新 Agent 信息,需要重新发布智能体版本 + +当发布的Agent为符合A2A协议的Agent时,在智能体列表中,用户可以在智能体列表中点击下面这个按钮查看A2A Agent调用具体信息: + +
+ +
## 🔧 管理智能体 diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-detail.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-detail.jpg new file mode 100644 index 000000000..e0ce35f1f Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-detail.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-discovery-list.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-discovery-list.jpg new file mode 100644 index 000000000..0464ce760 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-discovery-list.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-find-detail.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-find-detail.jpg new file mode 100644 index 000000000..ed9912627 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-find-detail.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-nacos-discovery.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-nacos-discovery.jpg new file mode 100644 index 000000000..f1fba231d Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-nacos-discovery.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-published-as.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-published-as.jpg new file mode 100644 index 000000000..7bfc7d170 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-published-as.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-url-discovery.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-url-discovery.jpg new file mode 100644 index 000000000..a6e244ff1 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-url-discovery.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api.png b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api.png new file mode 100644 index 000000000..ed03af94f Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api.png differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_1.png b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_1.png new file mode 100644 index 000000000..4dda4579d Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_1.png differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_2.png b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_2.png new file mode 100644 index 000000000..faba05fec Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_2.png differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/dataagent_deploy.png b/doc/docs/zh/user-guide/assets/agent-development/dataagent_deploy.png new file mode 100644 index 000000000..46fa9fde3 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/dataagent_deploy.png differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.jpg b/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.jpg new file mode 100644 index 000000000..ccb8a2f6b Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.png b/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.png deleted file mode 100644 index 719f9b6ac..000000000 Binary files a/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.png and /dev/null differ diff --git a/doc/docs/zh/user-guide/knowledge-base.md b/doc/docs/zh/user-guide/knowledge-base.md index fa98eac62..b0ebb53f5 100644 --- a/doc/docs/zh/user-guide/knowledge-base.md +++ b/doc/docs/zh/user-guide/knowledge-base.md @@ -26,12 +26,14 @@ Nexent支持多种文件格式,包括: -- **文本**: .txt, .md文件 +- **文本**: .txt, .md, .json文件 - **PDF**: .pdf文件 - **Word**: .docx文件 - **PowerPoint**: .pptx文件 - **Excel**: .xlsx文件 +- **EPUB** .epub文件 - **数据文件**: .csv文件 +- **Web content**: .html, .xml文件 ## 📊 知识库总结 diff --git a/doc/docs/zh/user-guide/local-tools/index.md b/doc/docs/zh/user-guide/local-tools/index.md index ebd7de972..71ba3e950 100644 --- a/doc/docs/zh/user-guide/local-tools/index.md +++ b/doc/docs/zh/user-guide/local-tools/index.md @@ -9,6 +9,8 @@ - [搜索工具](./search-tools):本地/DataMate/Dify 知识库检索与 Exa/Tavily/Linkup 公网搜索。 - [多模态工具](./multimodal-tools):文本文件与图片的下载、解析、模型分析。 - [终端工具](./terminal-tool):持久化 SSH 会话,远程执行命令。 +- [SQL 工具](./sql-tools):连接 MySQL、PostgreSQL、SQL Server 执行 SQL 查询。 +- [技能(Skills)](../skills):Nexent内置工具组合或自定义能力包,支持 NL 生成与版本管理。 ## ⚙️ 配置入口 @@ -21,4 +23,4 @@ - 路径类操作仅限工作空间范围,请使用相对路径。 - 公网搜索需先在平台安全配置中填写 API Key。 - 终端工具涉及远程主机,请确认网络与账号安全策略。 -- 删除、移动类操作不可恢复,执行前先确认目标。 \ No newline at end of file +- 删除、移动类操作不可恢复,执行前先确认目标。 diff --git a/doc/docs/zh/user-guide/local-tools/multimodal-tools.md b/doc/docs/zh/user-guide/local-tools/multimodal-tools.md index 114504365..3470a2c1c 100644 --- a/doc/docs/zh/user-guide/local-tools/multimodal-tools.md +++ b/doc/docs/zh/user-guide/local-tools/multimodal-tools.md @@ -4,18 +4,22 @@ title: 多模态工具 # 多模态工具 -多模态工具组支持分析文本文件与图片,结合模型能力生成用户问题相关的解读结果。支持 S3、HTTP、HTTPS 等 URL。 +多模态工具组支持分析文本文件、图片、视频与音频,结合模型能力生成用户问题相关的解读结果。支持 S3、HTTP、HTTPS 等 URL。 ## 🧭 工具清单 - `analyze_text_file`:下载并提取文本文件内容后进行分析 - `analyze_image`:下载图片并使用视觉语言模型进行理解与描述 +- `analyze_video`:下载视频并使用视频理解模型进行分析 +- `analyze_audio`:下载音频并使用音频理解模型进行分析 ## 🧰 使用场景示例 - 对上传到存储桶的文档进行快速摘要或要点提取 - 对截图、产品图片、报表图进行内容解读或关键信息提取 -- 结合问题指令,对多份文件/图片分别生成答案列表 +- 对上传的视频进行内容理解,如提取关键帧信息、人物动作、场景描述等 +- 对音频文件进行内容分析,如转录、说话人识别、内容摘要等 +- 结合问题指令,对多份文件/图片/视频/音频分别生成答案列表 ## 🧾 参数要求与行为 @@ -29,16 +33,26 @@ title: 多模态工具 - `query`:用户问题/关注点。 - 会逐张图片下载并调用视觉语言模型,返回与顺序对应的描述或答案数组。 +### analyze_video +- `video_url`:视频 URL,支持 `s3://bucket/key`、`/bucket/key`、`http(s)://`。 +- `query`:用户问题/关注点。 +- 下载视频后调用视频理解模型,返回视频分析结果。 + +### analyze_audio +- `audio_url`:音频 URL,支持 `s3://bucket/key`、`/bucket/key`、`http(s)://`。 +- `query`:用户问题/关注点。 +- 下载音频后调用音频理解模型,返回音频分析结果。 + ## ⚙️ 前置配置 - 确保已在平台配置可用的存储客户端(如 MinIO/S3)及数据处理服务地址,保证能下载文件。 -- 为 `analyze_text_file` 配置可用的 LLM;为 `analyze_image` 配置可用的视觉语言模型。 +- 为 `analyze_text_file` 配置可用的 LLM;为 `analyze_image` 配置可用的视觉语言模型;为 `analyze_video` 和 `analyze_audio` 配置可用的视频理解模型(需支持音视频输入,如 Qwen3-Omni 系列模型)。 ## 🛠️ 操作指引 -1. 准备文件或图片的可访问 URL,确认权限与路径正确。 -2. 调用相应工具,填写 URL 列表与问题描述;支持一次处理多条资源。 -3. 检查返回的数组结果顺序与输入列表一致,便于继续引用或展示。 +1. 准备文件、图片、视频或音频的可访问 URL,确认权限与路径正确。 +2. 调用相应工具,填写 URL 与问题描述;支持一次处理多条资源。 +3. 检查返回结果,确认内容符合预期后再继续引用或展示。 ## 💡 最佳实践 diff --git a/doc/docs/zh/user-guide/local-tools/sql-tools.md b/doc/docs/zh/user-guide/local-tools/sql-tools.md new file mode 100644 index 000000000..b5b50af59 --- /dev/null +++ b/doc/docs/zh/user-guide/local-tools/sql-tools.md @@ -0,0 +1,75 @@ +--- +title: SQL 数据库工具 +--- + +# SQL 数据库工具 + +SQL 数据库工具组支持连接和查询 MySQL、PostgreSQL、SQL Server 等关系型数据库,让 AI 智能体能够直接读取和操作数据库数据。 + +## 工具清单 + +- `mysql_database`:连接 MySQL 数据库执行 SQL 查询 +- `postgres_database`:连接 PostgreSQL 数据库执行 SQL 查询 +- `mssql_database`:连接 SQL Server 数据库执行 SQL 查询 + +## 使用场景示例 + +- 从业务数据库中查询报表数据,供智能体分析汇总 +- 跨数据库关联查询,获取分散在多个表中的关联信息 +- 实时查询业务状态,为智能体提供最新数据参考 + +## 参数要求与行为 + +### 通用参数 +- `sql`:要执行的 SQL 查询语句,必填 +- `parameters`:参数化查询的参数值列表,可选 +- `max_rows`:最大返回行数,默认 100 +- `timeout`:查询超时时间(秒),默认 10 + +### 数据库连接参数 + +| 数据库 | 连接参数 | +|--------|----------| +| MySQL | `host`、`user`、`password`、`database`、`port`(默认 3306) | +| PostgreSQL | `host`、`user`、`password`、`database`、`port`(默认 5432) | +| SQL Server | `host`、`user`、`password`、`database`、`port`(默认 1433) | + +### 安全限制 +- 禁止执行 `DROP DATABASE`、`GRANT`、`REVOKE`、`CREATE USER`、`INTO OUTFILE`、`LOAD DATA INFILE` 等危险操作 +- `UPDATE` 和 `DELETE` 语句必须包含 `WHERE` 子句 +- 自动添加 `LIMIT` 限制返回行数 + +### 返回格式 +```json +{ + "status": "success", + "columns": ["id", "name", "email"], + "rows": [[1, "张三", "zhang@example.com"]], + "row_count": 1, + "execution_time_ms": 45.23 +} +``` + +## 操作指引 + +1. **准备数据库连接信息**:获取主机地址、端口、数据库名、用户名和密码 +2. **配置工具**:在智能体工具配置中添加对应数据库工具,填写连接参数 +3. **测试连接**:使用简单查询验证连接是否正常 +4. **构造查询**:让智能体理解自然语言需求,生成对应 SQL 执行 + +## 安全与最佳实践 + +- 生产环境建议使用只读账号,限制操作权限 +- 敏感信息如数据库密码可通过密钥管理服务存储 +- 合理设置 `max_rows` 避免一次性返回过多数据 +- 建议开启数据库连接的 SSL/TLS 加密选项 + +## 常见数据库连接示例 + +| 数据库 | 连接地址示例 | 参数占位符 | +|--------|-------------|------------| +| MySQL | `localhost:3306` | `?` | +| PostgreSQL | `localhost:5432` | `$1, $2, ...` | +| SQL Server | `localhost:1433` | `?` | + +> 不同数据库的参数占位符格式不同,PostgreSQL 使用 `$1, $2` 格式,其他使用 `?`。 diff --git a/doc/docs/zh/user-guide/mcp-tools.md b/doc/docs/zh/user-guide/mcp-tools.md index 912306284..94bf7c656 100755 --- a/doc/docs/zh/user-guide/mcp-tools.md +++ b/doc/docs/zh/user-guide/mcp-tools.md @@ -1,27 +1,158 @@ # MCP 工具 -即将推出的 MCP 工具管理模块将让您在一个页面集中管理 MCP 服务器与工具,轻松完成连接配置、工具同步和健康状态监控 +在 MCP 工具模块中,您可以集中管理所有 MCP(Model Context Protocol)服务器与工具,支持自定义添加、注册表导入和社区导入等多种接入方式,完成连接配置、工具同步、健康监控以及社区共享。 -## 🎯 功能预览 +MCP 工具页面包含两个并列页签: -1. 注册并管理多个 MCP 服务器 -2. 快速同步、查看并整理 MCP 工具列表 -3. 实时监控 MCP 连接状态和使用情况 +- **导入的服务**:管理当前租户已接入的 MCP 服务,在此配置、监控和维护您的 MCP 服务。 +- **发布的服务**:管理当前租户发布到社区的 MCP 服务,支持浏览、编辑和取消发布。 -## ⏳ 敬请期待 +--- -MCP 工具管理功能正在开发中,我们致力于打造一个高效、直观的管理平台,让您能够: +## ➕ 添加 MCP 服务 -1. 集中管理所有 MCP 服务器 -2. 便捷同步和组织工具 -3. 实时掌握服务器连接与工具运行状态 +点击页面上的"添加 MCP 服务"按钮,打开添加弹窗。弹窗提供三个页签,对应不同的接入来源。 -## 🚀 相关功能 +### 自定义添加 -在等待 **MCP 工具** 上线期间,您可以: +"自定义添加"页签支持手动配置 MCP 服务,分为两种传输类型。 -1. 在 **[智能体开发](./agent-development)** 中管理您的 MCP 工具 -2. 通过 **[智能体空间](./agent-market)** 查看智能体与 MCP 的协作关系 -3. 在 **[开始问答](./start-chat)** 中体验平台功能 +#### 通过 URL 添加 -如果您在使用过程中遇到任何问题,请参考我们的 **[常见问题](../quick-start/faq)** 或在[GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions)中进行提问获取支持。 \ No newline at end of file +适用于已有独立部署的 MCP 服务(支持 HTTP / SSE 协议),通过输入端点 URL 直接接入。 + +1. 在"本地添加"页签中,**传输类型**选择"URL" +2. 填写服务信息: + - **服务名称(必填)**:为 MCP 服务设置一个易于识别的名称 + - **服务 URL(必填)**:输入 MCP 服务的端点地址 + - **描述**:可选,填写服务的用途说明 + - **Authorization Token**:可选,若服务需要认证,在此填入 Bearer Token +3. 点击"确定"完成添加,系统会自动连接服务并获取可用工具列表 + +#### 通过容器配置添加 + +适用于需要本地容器化运行的 MCP 服务(如通过 npx 启动的服务),系统会根据您提供的 JSON 配置自动创建并管理容器。 + +1. 在"本地添加"页签中,**传输类型**选择"容器" +2. 填写容器配置信息: + - **服务名称(必填)**:为 MCP 服务设置一个易于识别的名称 + - **描述**:可选,填写服务的用途说明 + - **容器配置 JSON(必填)**:按标准 MCP 配置格式填写,例如: + ```json + { + "mcpServers": { + "service-name": { + "args": ["mcp-package-name@version"], + "command": "npx", + "env": { + "API_KEY": "xxxx" + } + } + } + } + ``` + - **端口号**:填写容器服务暴露的端口,系统会自动检测端口冲突并提示可用端口 +3. 点击"确定",系统将解析 JSON 配置、创建容器并完成服务注册 + +### 从 MCP Registry 导入 + +Nexent 集成了 MCP Registry,您可以浏览并一键导入社区维护的 MCP 服务。 + +1. 切换到"外部市场"页签 +2. 浏览可用的 MCP 服务列表,支持按名称或标签搜索 +3. 点击目标服务,查看服务详情(描述、版本、所需参数等) +4. 配置必填参数(如 API Key 等环境变量) +5. 点击"导入",系统会自动安装并配置该 MCP 服务 + +### 从社区导入 + +浏览其他用户在 Nexent 平台内发布的 MCP 服务,快速导入使用。 + +1. 切换到"社区市场"页签 +2. 浏览社区已发布的 MCP 服务,支持按名称、标签或传输协议筛选 +3. 点击目标服务查看详情,点击"导入"即可添加到您的服务列表中 + +--- + +## 📋 导入的服务 + +"导入的服务"页签以卡片形式展示当前租户所有已接入的 MCP 服务,您可以在此查看、编辑、监控和发布。 + +### 查看与筛选 + +每张服务卡片展示以下信息: + +- 服务名称与描述 +- 来源标识(本地 / 注册表 / 社区) +- 启用 / 禁用开关 +- 标签 + +您可以使用顶部的筛选栏,按**来源**、**传输类型**和**标签**进行过滤,也可以通过搜索框按名称快速定位服务。 + +### 编辑服务详情 + +点击任意服务卡片,打开详情弹窗,可以进行以下操作: + +- **编辑基本信息**:修改服务名称、描述、URL、Authorization Token 和标签 +- **启用 / 禁用服务**:通过开关控制服务的启用状态,禁用后该服务的工具将不会出现在智能体工具选择中 +- **删除服务**:移除 MCP 服务记录,容器化服务会同步清理容器资源 + +### 查看工具列表 + +在服务详情弹窗中,点击"工具列表"按钮,可以查看该 MCP 服务提供的所有工具。 + +### 健康检查 + +点击详情弹窗中的"健康检查"按钮,系统会对 MCP 服务发起连接测试并返回当前状态: + +- **正常**:服务可正常连接 +- **异常**:服务无法连接或响应异常 +- **未检测**:尚未进行健康检查 + +### 容器管理 + +对于容器化部署的 MCP 服务,详情弹窗中还提供以下操作: + +- **查看容器日志**:实时查看运行中容器的输出日志,方便排查问题 +- **查看容器配置**:查看创建容器时使用的配置 JSON + +### 发布到社区 + +在服务详情弹窗中,点击"发布到社区"按钮: + +1. 确认或修改发布信息(名称、描述、标签等) +2. 点击"确认发布",该服务将发布到社区 +3. 发布后其他用户可在添加服务的"社区市场"页签中浏览和导入 + +--- + +## 🌐 发布的服务 + +"发布的服务"页签展示您自己发布到社区的所有 MCP 服务,您可以在此集中管理已发布的内容。 + +每张卡片展示服务名称、描述、版本和标签,支持按名称、标签和传输协议进行筛选。 + +点击服务卡片可查看详细信息,您可以: + +- **编辑发布的服务**:修改已发布服务的名称、描述和标签 +- **删除发布的服务**:将服务从社区撤回,不再对其他用户可见 + +--- + +## 🔗 与智能体协作 + +添加 MCP 服务后,其提供的工具会自动同步到智能体的工具选择列表中。在 **[智能体开发](./agent-development)** 页面配置智能体时: + +1. 在"选择智能体的工具"页签下,找到对应 MCP 服务分组 +2. 点击工具名称即可启用该工具 +3. 可点击 ⚙️ 查看工具描述并进行参数配置 + +## 🚀 下一步 + +完成 MCP 服务配置后,建议您: + +1. **[智能体开发](./agent-development)** - 将 MCP 工具配置给智能体使用 +2. **[智能体空间](./agent-space)** - 查看智能体与 MCP 的协作关系 +3. **[开始问答](./start-chat)** - 在对话中体验智能体调用 MCP 工具的效果 + +如果您在使用过程中遇到任何问题,请参考我们的 **[常见问题](../quick-start/faq)** 或在 [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions) 中进行提问获取支持。 \ No newline at end of file diff --git a/doc/docs/zh/user-guide/model-management.md b/doc/docs/zh/user-guide/model-management.md index 46c1b25b4..6870f5544 100644 --- a/doc/docs/zh/user-guide/model-management.md +++ b/doc/docs/zh/user-guide/model-management.md @@ -169,6 +169,14 @@ Nexent支持与ModelEngine平台的无缝对接
+#### 语音合成模型 +语音合成模型用于将文本内容即时转换为自然流畅的语音输出,使系统能够以接近真人的方式进行语音交互与反馈。通过低延迟、高拟真度的语音生成能力,确保用户在对话过程中获得连贯、自然的听觉体验。配置合适的实时语音合成模型,可以显著提升语音交互系统的表现力和用户体验。 +- 点击语音合成模型下拉框,从已添加的视觉语言模型中选择一个。 + +#### 语音识别模型 +语音识别模型用于将用户输入的语音内容实时转换为文本,实现对语音指令和自然语言的准确理解与解析。通过高精度的语音转写与噪声鲁棒能力,确保在复杂环境下依然能够稳定识别用户意图。配置合适的语音识别模型,可以显著提升语音交互系统的理解能力和整体响应效率。 +- 点击语音识别模型下拉框,从已添加的视觉语言模型中选择一个。 + ### ✅ 检查模型连通性 定期检查模型连通性是确保系统稳定运行的重要环节。通过连通性检查功能,您可以及时发现和解决模型连接问题,保证服务的连续性和可靠性。 @@ -224,18 +232,29 @@ Nexent 支持任何 **遵循OpenAI API规范** 的大语言模型供应商,包 使用与大语言模型相同的API Key,但模型URL一般会有所差异,一般以`/v1/rerank`为结尾。 #### 🎤 语音模型 -目前仅支持火山引擎语音,且需要在`.env`中进行配置 +目前支持阿里灵积和火山引擎语音模型,阿里灵积需配置与大语言模型相同的apikey,火山引擎模型需配置appid与token +**火山引擎** - **网站**: [volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech) - **免费额度**: 个人使用可用 - **特色**: 高质量中英文语音合成 - -**开始使用**: - -1. 注册火山引擎账户 -2. 访问语音技术服务 -3. 创建应用并获取 API Key -4. 在环境中配置 TTS/STT 设置 +- 推荐使用**豆包语音合成模型2.0和大模型流式语音识别模型** +- **开始使用**: + + 1. 注册火山引擎账户 + 2. 访问语音技术服务 + 3. 创建应用并获取appid和token + 4. 在添加模型页面中配置 TTS/STT 设置 + +**阿里灵积** +- **网站**: [aliyun.com/benefit/scene/voice](https://www.aliyun.com/benefit/scene/voice) +- 推荐使用**千问3-TTS-Instruct-Flash-Realtime/千问3-TTS-Flash-Realtime和千问3-ASR-Flash-Realtime** +- **开始使用**: + + 1. 注册阿里云账户 + 2. 访问阿里千问实时语音技术服务 + 3. 创建应用并获取 API Key + 4. 在添加模型页面中配置 TTS/STT 设置 ## 💡 需要帮助 diff --git a/doc/docs/zh/user-guide/skills.md b/doc/docs/zh/user-guide/skills.md new file mode 100644 index 000000000..54d0f97bb --- /dev/null +++ b/doc/docs/zh/user-guide/skills.md @@ -0,0 +1,476 @@ +--- +title: 技能管理 +--- + +# 技能管理 + +技能(Skill)是 Nexent 为智能体扩展能力的核心机制。每个技能将多个工具与使用文档打包为一个可复用的能力单元,可以像搭积木一样为智能体赋予复杂的工作能力。 + +## 目录 + +- [技能与工具的关系](#-技能与工具的关系):理解技能的核心概念 +- [技能使用指南](#-技能使用指南):如何在智能体开发中使用技能 +- [技能管理](#-技能管理):创建、编辑、安装外部技能 +- [技能上传指南](#-技能上传指南):SKILL.md 格式、ZIP 结构、特殊标签与书写规范 +- [NL-to-Skill](#-nl-to-skill):通过自然语言描述自动生成技能 +- [官方技能一览](#-官方技能一览):预置技能及其能力说明 + +## 技能与工具的关系 + +在 Nexent 中,**工具(Tool)** 与 **技能(Skill)** 是两个不同层次的概念,理解它们的区别有助于更好地为智能体配置能力。 + +**工具**是智能体可调用的单个原子操作。为智能体启用工具时,LLM 的每次思考都会在工具列表中搜索——这意味着即使某个工具本次对话完全不需要,LLM 仍然会消耗上下文额度去"看到"它。 + +**技能**则通过 `SKILL.md` 将多个工具的能力组合为一个完整的工作流,并附带参数配置与使用文档。LLM 不需要预先"看到"所有工具,而是根据用户的实际需求,自行判断是否激活某个技能。激活后,系统才会加载对应的工具集——从而有效节省 Token 消耗。 + +| 维度 | 工具 | 技能 | +|------|------|------| +| 粒度 | 单个原子操作 | 多个工具 + 配置 + 文档的组合 | +| Token 消耗 | 每次对话都占用上下文 | 仅在激活时才加载 | +| 参数 | 固定参数 schema | 可自定义参数模板 | +| 分发 | 代码级 | ZIP 包分发,即插即用 | + +## 技能使用指南 + +### 为智能体配置技能 + +1. 打开 **[智能体开发](./agent-development)** 页面 +2. 在"选择智能体的工具"页签中,找到 **技能(Skills)** 分组 +3. 点击技能名称即可选中,再次点击取消选择 +4. 保存智能体配置 + +## 技能管理 + +### 查看已安装的技能 + +在"选择智能体的工具"技能分组中,系统会展示所有已安装的技能列表,包括: +- 官方技能 +- 自定义技能 + +### 创建自定义技能 + +Nexent 支持两种方式创建自定义技能:上传技能包文件,或通过自然语言描述自动生成。 + +#### 方式一:上传 SKILL.md 或 ZIP + +1. 进入技能配置界面 +2. 点击"上传技能"按钮 +3. 选择 `SKILL.md` 文件(单文件)或 `.zip` 压缩包(完整技能包) +4. 系统自动解析并创建技能 + +#### 方式二:NL-to-Skill 自然语言创建 + +在技能管理页面,点击"**NL 创建技能**"按钮即可进入。具体用法详见下方 [NL-to-Skill](#-nl-to-skill) 专区。 + +## 技能上传指南 + +### 技能包结构 + +技能包可以是单个文件,也可以是包含多个文件的 ZIP 包: + +``` +skill-name/ +├── SKILL.md # 技能定义文件(必需) +├── config/ +│ ├── config.yaml # 参数默认值 +│ └── schema.yaml # 参数类型与说明 +├── scripts/ +│ └── *.py # Python 脚本 +├── examples.md # 使用示例 +└── assets/ # 静态资源 +``` + +### SKILL.md 格式详解 + +`SKILL.md` 是技能的核心文件,分为 YAML 元数据区和正文两部分。 + +**YAML 元数据(必需)** + +文件顶部必须有 YAML frontmatter,格式如下: + +```yaml +--- +name: skill-name +description: | + 一段描述,说明这个技能是做什么的、什么时候该用它。 + 建议用第三人称书写。 +tags: + - tag1 + - tag2 +--- +``` + +| 字段 | 必填 | 说明 | 示例 | +|------|------|------|------| +| `name` | 是 | 技能名称,全英文、小写、单词间用连字符 | `github-repo-analyzer` | +| `description` | 是 | 技能功能描述,建议 1-3 句话,包含使用场景 | `这个技能用于分析 GitHub 仓库并提取关键指标` | +| `tags` | 否 | 技能标签列表,便于分类检索 | `["code", "github", "analysis"]` | + +**正文** + +元数据下方可以写 Markdown 正文,包含技能的使用说明、最佳实践、示例代码等。 + +### 两种技能类型 + +根据用途,技能分为两类,书写方式有所不同: + +**工具类技能**:用于暴露工具能力。正文应包含工具的参数说明、调用示例、返回格式、错误处理等。 + +**智能体类技能**:用于教智能体执行复杂任务。正文应包含工作流程、领域知识、边界条件、最佳实践等。 + +### config/schema.yaml:定义参数表单 + +如果技能需要用户填写参数,可以创建 `config/schema.yaml` 文件。系统会根据此文件在前端自动生成参数配置表单。 + +```yaml +param_name: + type: string | number | boolean | array | object + required: true | false + default: <默认值> + description: "参数的英文说明" + description_zh: "参数的中文说明" +``` + +**支持的类型**:`string`、`number`、`boolean`、`array`、`object` + +**完整示例**: + +```yaml +query: + type: string + required: true + description: "Search query string" + description_zh: "搜索关键词" + default: "" + +top_k: + type: number + required: false + description: "Number of results to return" + description_zh: "返回结果数量" + default: 3 + +enable_rerank: + type: boolean + required: false + description: "Enable result reranking" + description_zh: "是否启用结果重排序" + default: false +``` + +### config/config.yaml:设置参数默认值 + +如果希望某些参数有默认值,可以创建 `config/config.yaml`: + +```yaml +# Initial workspace path +init_path: "/mnt/nexent" + +# Maximum number of results +top_k: 5 +``` + +### 特殊标签 + +在 SKILL.md 正文中,可以使用以下特殊标签: + +#### ``:按需加载示例文件 + +使用 `` 标签引用外部文件,该文件仅在需要时才被加载,不会增加 SKILL.md 的主文件大小。 + +```markdown +## 示例参考 + + +``` + +#### ``:声明捆绑的脚本 + +如果技能包中包含 Python 或 Shell 脚本,需要在 SKILL.md 中声明: + +```markdown + +``` + +#### ``:展示可执行代码示例 + +使用 `` 标签包裹可执行的代码示例(通常为 Python 代码): + +```markdown + +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py", "--verbose": True} +) +print(result) + +``` + +### 辅助函数 + +在智能体类技能的正文和示例中,可以使用以下函数: + +**`run_skill_script(skill_name, script_path, params)`**:执行技能包中的脚本 + +```python +# 执行 Python 脚本 +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py"} +) + +# 执行 Shell 脚本 +result = run_skill_script( + "database-migration", + "scripts/migrate.sh", + {"--direction": "up", "--steps": 1} +) +``` + +**`read_skill_md(skill_name, files)`**:读取技能包中的文件内容 + +```python +# 默认只读取 SKILL.md(如果存在引用文件,不会自动包含) +content = read_skill_md("my-skill") + +# 显式指定要读取的文件 +full_content = read_skill_md("my-skill", [ + "SKILL.md", + "reference/api-reference.md" +]) +``` + +### 书写规范与最佳实践 + +**SKILL.md 书写规范**: + +1. **描述要具体**:说明技能在什么场景下使用,而不是仅仅描述功能 + - ✓ "当用户需要分析 GitHub 仓库的流行度指标时使用" + - ✗ "GitHub 搜索功能" + +2. **避免时间敏感信息**:不要包含具体日期、版本号等会过期的内容 + +3. **保持简洁**:SKILL.md 正文建议控制在 500 行以内。复杂内容用 `` 按需加载 + +4. **路径格式**:始终使用正斜杠 `/`,即使在 Windows 下也如此 + - ✓ `src/services/payment_service.py` + - ✗ `src\services\payment_service.py` + +5. **参数命名一致**:全文统一使用相同的术语和命名风格 + +6. **包含边界条件**:说明技能的适用范围和限制 + +**参数描述最佳实践**: + +```yaml +# ✓ 好:明确说明用途和格式 +query: + type: string + required: true + description: "GitHub repository owner/name or full URL" + description_zh: "GitHub 仓库的 owner/name 格式或完整 URL" + +# ✗ 差:过于模糊 +query: + type: string + required: true + description: "Search query" + description_zh: "查询" +``` + +**代码示例最佳实践**: + +- 每个工具至少提供 2 个不同场景的示例 +- 示例中包含常见参数组合 +- 示例展示成功调用和常见错误处理 + +### 从现有技能学习 + +系统内置了多个完整技能的参考示例,您可以在 `test_skill_examples/official-skills/` 目录下找到它们: + +| 技能名 | 参考价值 | +|--------|---------| +| `create-file-directory` | 工具类技能的标准写法,包含完整参数表、调用示例、错误处理表 | +| `search-knowledge-base` | 搜索类技能的参数配置,包含 schema.yaml 和 config.yaml 的完整示例 | +| `analyze-image` | 多模态工具的示例,包含 `` 调用格式 | +| `code_review_expert` | 智能体类技能的参考,包含捆绑脚本和 `` 标签用法 | + +### 常见问题 + +**Q: 上传 ZIP 包时报错"缺少 SKILL.md"** + +确保 ZIP 包根目录下包含 `SKILL.md` 文件,而不是将其放在子文件夹中。 + +**Q: 技能描述不生效** + +技能描述应写在 YAML frontmatter 的 `description` 字段中,而非正文的 Markdown 部分。正文内容不会被解析为技能描述。 + +## NL-to-Skill + +NL-to-Skill 是 Nexent 提供的一项智能创建功能。您只需要用**自然语言描述**一个技能的需求,系统就能自动生成完整的技能包,包括技能定义、参数配置、甚至配套的脚本代码。整个生成过程实时可见,就像有一个 AI 助手在帮您写代码一样。 + +简单来说: + +> 您说"我想要一个能搜索 GitHub 仓库并提取 Star 数的技能",系统就自动为您生成一个完整可用的技能。 + +### 快速上手 + +#### 第一步:描述您的需求 + +在输入框中,用自然语言描述您想要的技能。描述越清晰,生成效果越好。 + +**正例**: +- "创建一个技能,可以根据关键词搜索 GitHub 仓库并返回 Star 数、描述和链接" +- "创建一个读取 Excel 文件、统计各列数据并生成图表的技能" +- "创建一个技能,能从邮件中提取订单号、金额和日期,汇总成表格" + +**反例**: +- "帮我做一个聊天技能"(太模糊) +- "搜索工具"(缺少具体能力描述) + +#### 第二步:查看生成过程 + +点击"生成"后,页面会实时展示 AI 的思考和编写过程: +- 看到 AI 在分析您的需求 +- 看到它正在编写技能定义文件 +- 看到它在规划参数结构 + +这个过程就像看 AI 现场写代码,您可以随时点击"停止"中断。 + +#### 第三步:预览并保存 + +生成完成后,系统会展示技能的完整内容: +- 技能名称和描述 +- 参数列表(每个参数是什么、是否必填) +- 使用示例 + +仔细检查预览内容: +- 如需调整,点击"编辑"微调 +- 如符合预期,点击"保存"将技能添加到您的技能库 + +### 写作技巧 + +#### 如何写好技能描述 + +**1. 明确输入输出** + +告诉系统这个技能需要什么信息、会返回什么结果。 + +``` +✓ "输入一个 GitHub 仓库地址,返回仓库名称、Star 数、Fork 数和最新更新时间" +✗ "搜索 GitHub"(太模糊) +``` + +**2. 说明使用场景** + +让 AI 理解在什么情况下会用到这个技能。 + +``` +✓ "用于快速查询开源项目的流行程度,帮助做技术选型决策" +✗ "查数据"(没有场景) +``` + +**3. 描述边界条件** + +如果有特殊的处理逻辑或限制,一并说明。 + +``` +✓ "如果仓库不存在,返回友好提示而不是报错" +✓ "图片 URL 无效时跳过该图片并记录日志" +``` + +**4. 显式要求生成示例** + +如果技能使用场景复杂,且对边缘场景响应准确率要求较高,则可以在要求中明确提出生成更详细的示例。 + +``` +✓ "生成全面且详细的使用示例" +``` + +#### 适用场景举例 + +| 场景 | 描述示例 | +|------|---------| +| **数据采集** | "输入关键词,在知乎上搜索相关问答并提取最高赞回答的摘要" | +| **文件处理** | "上传一个 CSV 文件,自动统计各列数据并生成折线图" | +| **API 封装** | "创建一个调用天气 API 并返回未来三天预报的技能" | +| **多工具组合** | "输入商品链接,自动比价(调用多个电商搜索)并返回最低价链接" | +| **数据清洗** | "读取一段混乱的文本,提取其中的邮箱、手机号、日期并格式化输出" | + +### 生成过程中可以做什么 + +#### 实时预览 + +生成过程中,技能内容会逐步显示在预览区域: +- `SKILL.md` 内容:技能定义、描述、标签 +- `examples.md`:技能使用示例 +- `scripts/*.py`:工具脚本(复杂模式下) + +#### 随时停止 + +如果生成方向偏离预期: +- 点击"停止"按钮,AI 立即停止 +- 已有生成结果会保留,您可以查看或放弃 + +#### 多次尝试 + +如果第一次生成结果不理想: +- 直接补充需求细节,在原有基础上直接修改 +- 或者在预览中手动调整 +- 不满意当前生成的技能,希望重新再来时,您可以点击右上角的"垃圾桶"图标清空所有技能内容 + +### 使用限制与注意事项 + +#### 模型能力影响质量 + +NL-to-Skill 使用您租户配置的 LLM 模型来生成技能。模型的能力直接决定生成质量: +- 聪明的模型能准确理解需求,生成结构清晰、易于理解的技能 +- 较弱的模型可能生成不完整或有误导性的内容,影响智能体的效率与准确率 + +如果生成结果不理想,可以尝试: +1. 简化需求描述 +2. 切换到更聪明、更强大的模型 +3. 分步骤创建(先做简单版本,再手动扩展) + +#### Token 消耗 + +复杂技能生成会消耗更多 Token: +- **简单模式**:通常消耗较少,适合快速验证 +- **复杂模式**:消耗较多,适合正式创建完整技能 + +建议先用简单模式测试想法,确认可行后再用复杂模式正式创建。 + +#### 并非所有需求都能实现 + +NL-to-Skill 擅长生成以下类型的技能: +- 单一工具的包装(如封装一个搜索能力) +- 多工具的简单串联(如搜 → 读 → 总结) +- 常见数据处理流程(如文件格式转换、数据提取) + +以下类型的技能可能超出能力范围: +- 需要调用未接入的外部 API +- 涉及复杂的状态管理或并发逻辑 +- 需要访问平台未开放的底层接口 + +遇到无法实现的需求时,系统会给出提示,您可以考虑手动创建或联系技术支持。 + +#### 技能修改 + +在 NL-to-Skill 界面可以选中已经存在的技能。选中技能后,该技能信息将自动加载。您可以在左侧对话框中使用自然语言尝试对该技能进行更新。 + +如果您创建的技能名与已有技能重名,Nexent 将自动从技能创建模式切换为技能更新模式。所有内容将覆盖更新至原有技能。 + +## 安全与最佳实践 + +- **知识库访问控制**:导入包含知识库工具的技能时,实际检索范围受当前用户权限限制 +- **公网搜索**:Tavily / Linkup / Exa 等公网搜索需先在平台安全配置中填写对应 API Key +- **路径安全**:技能包内文件操作仅限技能目录范围内,无法访问系统任意路径 + +## 相关参考 + +- [智能体开发](./agent-development) +- [本地工具概览](./local-tools/index) +- [MCP 工具配置](./mcp-tools) +- [技能系统概览](../backend/skills/overview) diff --git a/doc/docs/zh/user-guide/start-chat.md b/doc/docs/zh/user-guide/start-chat.md index 4e9dce692..fb3e4f0c6 100644 --- a/doc/docs/zh/user-guide/start-chat.md +++ b/doc/docs/zh/user-guide/start-chat.md @@ -80,8 +80,8 @@ Nexent支持语音输入功能,让您可以通过语音与智能体交互。 - 或直接将文件拖拽到对话区域 2. **支持的文件格式** - - **文档类**:PDF、Word (.docx)、PowerPoint (.pptx)、Excel (.xlsx) - - **文本类**:Markdown (.md)、纯文本 (.txt) + - **文档类**:PDF、Word (.docx)、PowerPoint (.pptx)、Excel (.xlsx), EPUB (.epub), HTML (.html), XML (.xml) + - **文本类**:Markdown (.md)、纯文本 (.txt), JSON (.json), CSV (.csv) - **图片类**:JPG、PNG、GIF 等常见图片格式 3. **文件处理流程** diff --git a/doc/procedural-memory-verification.md b/doc/procedural-memory-verification.md new file mode 100644 index 000000000..ea9f53290 --- /dev/null +++ b/doc/procedural-memory-verification.md @@ -0,0 +1,315 @@ +# Procedural Memory Verification Report + +## Summary +**Status: ⚠️ FULLY SUPPORTED but REQUIRES OPTIONAL DEPENDENCY** + +Procedural memory is a fully implemented feature in mem0ai version 0.1.117, **BUT it requires `langchain-core` to be installed separately**. Without this dependency, the feature will fail at runtime. + +--- + +## ⚠️ CRITICAL FINDING: Optional Dependency Required + +**Your colleague is partially correct.** The procedural memory code is NOT empty (it's 50 lines of real implementation), but it has a critical dependency issue: + +### The Problem + +The `_create_procedural_memory()` method contains: + +```python +try: + from langchain_core.messages.utils import convert_to_messages +except Exception: + logger.error( + "Import error while loading langchain-core. " + "Please install 'langchain-core' to use procedural memory." + ) + raise # ← Fails here if langchain-core not installed +``` + +### Reality Check + +| Aspect | Status | +|--------|--------| +| Code exists? | ✅ Yes, 50 lines of real implementation | +| Code is empty/stub? | ❌ No, it's fully implemented | +| Works out of the box? | ❌ **NO** - requires `langchain-core` package | +| Documented requirement? | ⚠️ Only in error message, not in main docs | + +### Why Your Colleague Thought It Was Empty + +1. They called `memory.add(..., memory_type="procedural_memory")` +2. Got `ImportError: No module named 'langchain_core'` +3. Saw the error and concluded "it doesn't work" or "it's empty" +4. This is understandable - the feature exists but is **disabled by default** + +--- + +## Verification Results + +### 1. API Support ✅ +The `memory_type` parameter is available in both `AsyncMemory.add()` and `Memory.add()`: + +```python +async def add( + self, + messages, + *, + user_id: Optional[str] = None, + agent_id: Optional[str] = None, + run_id: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + infer: bool = True, + memory_type: Optional[str] = None, # ✅ SUPPORTED + prompt: Optional[str] = None, + llm=None +) +``` + +### 2. MemoryType Enum ✅ +Located in `mem0.configs.enums.MemoryType`: + +```python +class MemoryType(Enum): + SEMANTIC = "semantic_memory" + EPISODIC = "episodic_memory" + PROCEDURAL = "procedural_memory" # ✅ AVAILABLE +``` + +### 3. Implementation ✅ +The `_create_procedural_memory()` method exists in both `AsyncMemory` and `Memory` classes: + +**AsyncMemory signature:** +```python +async def _create_procedural_memory( + self, + messages, + metadata=None, + llm=None, + prompt=None +) +``` + +**Memory (sync) signature:** +```python +def _create_procedural_memory( + self, + messages, + metadata=None, + prompt=None +) +``` + +### 4. Validation Logic ✅ +The `add()` method validates `memory_type` and enforces constraints: + +```python +# Only "procedural_memory" is accepted +if memory_type is not None and memory_type != MemoryType.PROCEDURAL.value: + raise ValueError( + f"Invalid 'memory_type'. Please pass {MemoryType.PROCEDURAL.value} " + "to create procedural memories." + ) + +# agent_id is REQUIRED for procedural memory +if agent_id is not None and memory_type == MemoryType.PROCEDURAL.value: + results = await self._create_procedural_memory( + messages, metadata=processed_metadata, prompt=prompt, llm=llm + ) + return results +``` + +### 5. System Prompt ✅ +A comprehensive 5,100-character system prompt exists in `mem0.configs.prompts.PROCEDURAL_MEMORY_SYSTEM_PROMPT`: + +**Purpose:** Records and preserves complete interaction history between human and AI agent + +**Structure:** +- Overview (Global Metadata) + - Task Objective + - Progress Status +- Sequential Agent Actions (Numbered Steps) + - Agent Action + - Action Result (Mandatory, Unmodified) + - Embedded Metadata (Key Findings, Navigation History, Errors, Current Context) + +**Key Guidelines:** +1. Preserve every output verbatim +2. Maintain chronological order +3. Include exact data (URLs, element indexes, error messages, JSON responses) +4. Output only the structured summary + +--- + +## Usage Example + +```python +from mem0 import AsyncMemory + +# Initialize memory +memory = await AsyncMemory.from_config(config) + +# Create procedural memory +messages = [ + {"role": "user", "content": "Search for AI news"}, + {"role": "assistant", "content": "I'll search for recent AI news..."}, + # ... more conversation history +] + +result = await memory.add( + messages=messages, + user_id="user_123", + agent_id="research_agent", # ⚠️ REQUIRED for procedural memory + memory_type="procedural_memory", + metadata={ + "task": "AI news research", + "session_id": "session_456" + } +) + +# Result format: +# { +# "results": [ +# { +# "id": "memory_id_here", +# "memory": "## Summary of the agent's execution history...", +# "event": "ADD" +# } +# ] +# } +``` + +--- + +## Requirements & Constraints + +### Required Parameters +- ✅ `agent_id`: **MUST** be provided when using `memory_type="procedural_memory"` +- ✅ `metadata`: **MUST** be provided (cannot be None) +- ✅ `messages`: List of conversation messages to summarize + +### Optional Parameters +- `prompt`: Custom prompt to override default `PROCEDURAL_MEMORY_SYSTEM_PROMPT` +- `llm`: Custom LangChain ChatModel (async version only) + +### Validation Rules +1. `memory_type` must be exactly `"procedural_memory"` (or None) +2. If `memory_type="procedural_memory"` is set, `agent_id` must be provided +3. `metadata` cannot be None for procedural memories + +--- + +## Implementation Details + +### How It Works +1. **Validation**: Checks `memory_type` and required parameters +2. **Prompt Construction**: Uses default or custom system prompt +3. **LLM Summarization**: Calls LLM to generate comprehensive execution summary +4. **Embedding**: Generates embedding for the summary +5. **Storage**: Stores in vector database with `metadata["memory_type"] = "procedural_memory"` +6. **Return**: Returns memory ID and summary text + +### Async vs Sync +- **AsyncMemory**: Supports custom LangChain `llm` parameter +- **Memory**: Uses internal LLM from config only + +--- + +## Integration with Nexent + +### Current Status +The Nexent codebase does **NOT** currently use procedural memory. The `memory_type` parameter is not passed in any `add_memory()` calls. + +### Recommended Integration Points + +1. **Agent Service** (`backend/services/agent_service.py`): + - Detect when agent completes a multi-step task + - Call `add_memory_in_levels()` with `memory_type="procedural_memory"` + - Pass the full conversation history as messages + +2. **Memory Service** (`sdk/nexent/memory/memory_service.py`): + - Add `memory_type` parameter to `add_memory()` and `add_memory_in_levels()` + - Pass through to mem0's `add()` method + +3. **Agent Run Info** (`sdk/nexent/core/agents/agent_model.py`): + - Add `memory_type` field to track if current run should create procedural memory + +### Example Integration + +```python +# In agent_service.py, after agent completes a complex task +if task_complexity >= threshold: # Your logic here + await add_memory_in_levels( + messages=conversation_history, + memory_config=memory_ctx.memory_config, + tenant_id=memory_ctx.tenant_id, + user_id=memory_ctx.user_id, + agent_id=memory_ctx.agent_id, + memory_levels=["agent", "user_agent"], + memory_type="procedural_memory", # ✅ NEW PARAMETER + metadata={ + "task_type": "complex_research", + "duration_seconds": duration, + "steps_completed": step_count + } + ) +``` + +--- + +## Conclusion + +Procedural memory is a **fully functional feature** in mem0ai==0.1.117, **BUT it requires an optional dependency**. It provides: + +- ✅ Complete API support +- ✅ Comprehensive system prompt (5,100 characters) +- ✅ Proper validation and error handling +- ✅ Both sync and async implementations +- ✅ Integration with existing memory infrastructure +- ⚠️ **REQUIRES `langchain-core` package to be installed** + +### The Truth About "Empty Function" Claims + +**The code is NOT empty.** It's a 50-line implementation that: +1. Calls LLM to generate execution summary +2. Creates embeddings +3. Stores in vector database +4. Returns proper results + +**However, it fails at runtime** if `langchain-core` is not installed, which is why your colleague might have thought it was a no-op. + +### How to Enable + +**Option 1: Install the dependency** +```bash +pip install langchain-core +``` + +**Option 2: Add to Nexent's dependencies** +```toml +# In sdk/pyproject.toml +dependencies = [ + # ... existing deps ... + "langchain-core>=0.1.0", # Required for procedural memory +] +``` + +**Option 3: Make it optional with fallback** +```python +try: + result = await memory.add(..., memory_type="procedural_memory") +except ImportError as e: + if "langchain-core" in str(e): + logger.warning("Procedural memory requires langchain-core. Using regular memory.") + result = await memory.add(...) # Fallback + else: + raise +``` + +### Final Recommendation + +This feature **can be integrated into Nexent**, but you must: +1. Add `langchain-core` to dependencies, OR +2. Implement graceful fallback when dependency is missing, OR +3. Document it as an optional feature requiring extra installation + +Without addressing the dependency issue, procedural memory will fail at runtime despite having complete implementation code. diff --git a/docker/.env.example b/docker/.env.example index a8ec6dedb..3970efb95 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -22,6 +22,13 @@ SPEED_RATIO=1.3 CLIP_MODEL_PATH=/opt/models/clip-vit-base-patch32 NLTK_DATA=/opt/models/nltk_data +# ===== Table and Structure Recognition Models ===== + +# Table Transformer and YOLOX models for extracting tables and layout structure from PDF/DOC/DOCX files. +# Both paths must be set to valid directories/files to enable extraction; if either is left empty, the feature is disabled. +TABLE_TRANSFORMER_MODEL_PATH=/opt/models/table-transformer-structure-recognition +UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH=/opt/models/yolox/config.json + # Elasticsearch Service ELASTICSEARCH_HOST=http://nexent-elasticsearch:9200 ELASTIC_PASSWORD=nexent@2025 @@ -52,11 +59,12 @@ DATA_PROCESS_SERVICE=http://nexent-data-process:5012/api # Northbound service (port 5013) - Northbound API service NORTHBOUND_API_SERVER=http://nexent-northbound:5013/api -# Northbound External URL (for A2A Agent Card URLs when behind reverse proxy) +# Northbound External URL # Defaults to http://localhost:5013 for local development # Set this to the public-facing URL for external A2A clients -# Example: https://api.yourdomain.com or http://your-public-ip:5013 -# NORTHBOUND_EXTERNAL_URL=http://your-public-url:5013 +# Must include /api prefix since FastAPI uses root_path="/api" +# Example: https://api.yourdomain.com/api or http://your-public-ip:5013/api +# NORTHBOUND_EXTERNAL_URL=http://your-public-url:5013/api # Postgres Config POSTGRES_HOST=nexent-postgresql @@ -150,16 +158,95 @@ WORKER_NAME= WORKER_CONCURRENCY=4 # Skills Configuration -SKILLS_PATH=/mnt/nexent/skills +SKILLS_PATH=/mnt/nexent-data/skills -# Telemetry and Monitoring Configuration +# Telemetry and Monitoring Configuration (OTLP Protocol) +# Enable OpenTelemetry monitoring for agent observability ENABLE_TELEMETRY=false -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -PROMETHEUS_PORT=8000 +# Provider profile: otlp, phoenix, langfuse, langsmith, grafana, zipkin +MONITORING_PROVIDER=otlp +MONITORING_PROJECT_NAME=nexent +# Browser-accessible monitoring UI URL. Leave empty to hide the frontend entry. +MONITORING_DASHBOARD_URL= +# Trace payload capture mode: +# summary: bounded preview + type/size/count metadata; metrics: metadata only; full: full preview capped by max chars. +# MAX_CHARS limits preview length; MAX_ITEMS limits dict/list preview items. +MONITORING_TRACE_CONTENT_MODE=full +MONITORING_TRACE_MAX_CHARS=4000 +MONITORING_TRACE_MAX_ITEMS=20 +# Service name for identifying traces in observability platforms +OTEL_SERVICE_NAME=nexent-backend +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +# Optional signal-specific endpoints. Leave empty unless the backend requires them. +OTEL_EXPORTER_OTLP_TRACES_ENDPOINT= +OTEL_EXPORTER_OTLP_METRICS_ENDPOINT= +# Protocol: "http" or "grpc" +OTEL_EXPORTER_OTLP_PROTOCOL=http + +# Authentication headers (format: key1=value1,key2=value2) +# Prefer platform-specific variables when using the Collector. +OTEL_EXPORTER_OTLP_HEADERS= +OTEL_EXPORTER_OTLP_AUTHORIZATION= +OTEL_EXPORTER_OTLP_X_API_KEY= +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION= +OTEL_EXPORTER_OTLP_METRICS_ENABLED=true +MONITORING_INSTRUMENT_REQUESTS=false +# FastAPI endpoint monitoring filters. Values are comma-separated regex patterns. +# Excluded URLs are always skipped. If included URLs is empty, all non-excluded endpoints are monitored. +# If included URLs is non-empty, only matching endpoints are monitored. +MONITORING_FASTAPI_INCLUDED_URLS= +MONITORING_FASTAPI_EXCLUDED_URLS= +MONITORING_FASTAPI_EXCLUDE_SPANS=receive,send + TELEMETRY_SAMPLE_RATE=1.0 -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 # Market Backend Address MARKET_BACKEND=http://60.204.251.153:8010 + +# ===== OAuth Configuration ===== +# GitHub OAuth - get credentials from https://github.com/settings/developers +GITHUB_OAUTH_CLIENT_ID= +GITHUB_OAUTH_CLIENT_SECRET= +# GDE OAuth +GDE_URL= +GDE_OAUTH_CLIENT_ID= +GDE_OAUTH_CLIENT_SECRET= +# Link App OAuth +LINK_APP_URL= +LINK_APP_OAUTH_CLIENT_ID= +LINK_APP_OAUTH_CLIENT_SECRET= +# WeChat OAuth (set ENABLE_WECHAT_OAUTH=true to enable) +ENABLE_WECHAT_OAUTH=false +WECHAT_OAUTH_APP_ID= +WECHAT_OAUTH_APP_SECRET= +# Base URL for OAuth callback (e.g., http://localhost:3000 for local dev) +OAUTH_SSL_VERIFY=true +OAUTH_CA_BUNDLE= +OAUTH_CALLBACK_BASE_URL=http://localhost:3000 + +# Asset owner role (opt-in; default false). Set true to enable ASSET_OWNER. +ENABLE_ASSET_OWNER_ROLE=false + +# ===== CAS SSO Configuration ===== +CAS_ENABLED=false +CAS_SERVER_URL= +CAS_VALIDATE_PATH=/p3/serviceValidate +CAS_CALLBACK_BASE_URL=http://localhost:3000 +# Supported values: +# - disabled: disable CAS login entry and automatic CAS redirects. +# - button: show CAS as an optional login entry. +# - force: automatically redirect unauthenticated users to CAS login. +CAS_LOGIN_MODE=disabled +CAS_USER_ATTRIBUTE= +CAS_EMAIL_ATTRIBUTE=email +CAS_ROLE_ATTRIBUTE=role +CAS_TENANT_ATTRIBUTE=tenant_id +CAS_ROLE_MAP_JSON= +CAS_SESSION_MAX_AGE_SECONDS=3600 +LOCAL_SESSION_MAX_AGE_SECONDS=3600 +CAS_RENEW_BEFORE_SECONDS=300 +CAS_RENEW_TIMEOUT_SECONDS=10 +CAS_SYNTHETIC_EMAIL_DOMAIN=cas.local +CAS_LOGOUT_URL=/logout +CAS_SSL_VERIFY=true +CAS_CA_BUNDLE= diff --git a/docker/create-su.sh b/docker/create-su.sh old mode 100644 new mode 100755 diff --git a/docker/deploy.sh b/docker/deploy.sh index e30e6e75a..fbf3664b5 100755 --- a/docker/deploy.sh +++ b/docker/deploy.sh @@ -13,16 +13,37 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" CONST_FILE="$PROJECT_ROOT/backend/consts/const.py" DEPLOY_OPTIONS_FILE="$SCRIPT_DIR/deploy.options" +DEPLOYMENT_COMMON="$PROJECT_ROOT/scripts/deployment/common.sh" +ORIGINAL_ARGS=("$@") + +if [ -f "$DEPLOYMENT_COMMON" ]; then + # shellcheck source=/dev/null + source "$DEPLOYMENT_COMMON" +else + echo "❌ Shared deployment helper not found: $DEPLOYMENT_COMMON" + exit 1 +fi MODE_CHOICE_SAVED="" VERSION_CHOICE_SAVED="" IS_MAINLAND_SAVED="" +ENABLE_SKILLS_SAVED="Y" ENABLE_TERMINAL_SAVED="N" TERMINAL_MOUNT_DIR_SAVED="${TERMINAL_MOUNT_DIR:-}" APP_VERSION="" cd "$SCRIPT_DIR" +if [ ! -f ".env" ]; then + if [ -f ".env.example" ]; then + cp .env.example .env + echo "✅ Created docker/.env from docker/.env.example" + else + echo "❌ .env not found and .env.example is missing in $SCRIPT_DIR" + exit 1 + fi +fi + set -a source .env @@ -38,6 +59,25 @@ export COMPOSE_IGNORE_ORPHANS=True while [[ $# -gt 0 ]]; do case "$1" in + delete|delete-all|--delete-volumes|--remove-volumes|--keep-volumes) + echo "❌ Docker uninstall has moved to uninstall.sh. Use: bash uninstall.sh" + exit 1 + ;; + --help|-h) + echo "Usage: $0 [options]" + echo "" + echo "Deploy options:" + echo " --components LIST" + echo " --port-policy development|production" + echo " --image-source general|mainland|local-latest" + echo " --use-local-config" + echo " --reconfigure" + echo " --config PATH" + echo " --root-dir PATH" + echo "" + echo "Uninstall: bash uninstall.sh" + exit 0 + ;; --mode) MODE_CHOICE="$2" shift 2 @@ -111,6 +151,49 @@ is_port_in_use() { return 1 } +is_nexent_container_name() { + local container_name="$1" + + case "$container_name" in + nexent-*|nexent_*|supabase-*-mini) + return 0 + ;; + *) + return 1 + ;; + esac +} + +docker_containers_using_host_port() { + local port="$1" + + if ! command -v docker >/dev/null 2>&1; then + return 0 + fi + + while IFS=$'\t' read -r container_name published_ports; do + if [ -n "$container_name" ] && [[ "$published_ports" == *":${port}->"* ]]; then + echo "$container_name" + fi + done < <(docker ps --format '{{.Names}}\t{{.Ports}}' 2>/dev/null) +} + +is_port_used_by_nexent_only() { + local port="$1" + local container_name + local found="false" + + while IFS= read -r container_name; do + [ -n "$container_name" ] || continue + found="true" + if ! is_nexent_container_name "$container_name"; then + return 1 + fi + done < <(docker_containers_using_host_port "$port") + + [ "$found" = "true" ] +} + add_port_if_new() { # Helper to add a port to global arrays only if not already present local port="$1" @@ -193,6 +276,8 @@ check_ports_in_env_files() { echo "🔍 Checking port availability defined in environment files..." local occupied_ports=() local occupied_sources=() + local ignored_nexent_ports=0 + local free_ports=0 local idx for idx in "${!PORTS_TO_CHECK[@]}"; do @@ -200,14 +285,26 @@ check_ports_in_env_files() { local source="${PORT_SOURCES[$idx]}" if is_port_in_use "$port"; then + if is_port_used_by_nexent_only "$port"; then + ignored_nexent_ports=$((ignored_nexent_ports + 1)) + continue + fi occupied_ports+=("$port") occupied_sources+=("$source") echo " ❌ Port $port is already in use." else - echo " ✅ Port $port is free." + free_ports=$((free_ports + 1)) fi done + if [ "$free_ports" -gt 0 ]; then + echo " ✅ $free_ports port(s) available." + fi + + if [ "$ignored_nexent_ports" -gt 0 ]; then + echo " ↺ Ignored $ignored_nexent_ports port(s) already used by Nexent containers." + fi + if [ ${#occupied_ports[@]} -gt 0 ]; then echo "" echo "❌ Port conflict detected. The following ports required by Nexent are already in use:" @@ -236,6 +333,72 @@ check_ports_in_env_files() { echo "" } +check_deployment_ports() { + PORTS_TO_CHECK=() + PORT_SOURCES=() + + local port + for port in $DEPLOYMENT_DOCKER_PORTS; do + add_port_if_new "$port" "deployment port policy: $DEPLOYMENT_PORT_POLICY" + done + + if [ ${#PORTS_TO_CHECK[@]} -eq 0 ]; then + echo "🔍 No host ports are published by the selected deployment configuration." + echo "" + echo "--------------------------------" + echo "" + return 0 + fi + + echo "🔍 Checking port availability for selected deployment policy..." + local occupied_ports=() + local ignored_nexent_ports=0 + local free_ports=0 + local idx + for idx in "${!PORTS_TO_CHECK[@]}"; do + local selected_port="${PORTS_TO_CHECK[$idx]}" + if is_port_in_use "$selected_port"; then + if is_port_used_by_nexent_only "$selected_port"; then + ignored_nexent_ports=$((ignored_nexent_ports + 1)) + continue + fi + occupied_ports+=("$selected_port") + echo " ❌ Port $selected_port is already in use." + else + free_ports=$((free_ports + 1)) + fi + done + + if [ "$free_ports" -gt 0 ]; then + echo " ✅ $free_ports port(s) available." + fi + + if [ "$ignored_nexent_ports" -gt 0 ]; then + echo " ↺ Ignored $ignored_nexent_ports port(s) already used by Nexent containers." + fi + + if [ ${#occupied_ports[@]} -gt 0 ]; then + echo "" + echo "❌ Port conflict detected for selected deployment policy:" + local occupied + for occupied in "${occupied_ports[@]}"; do + echo " - Port $occupied" + done + echo "" + local confirm_continue + read -p "👉 Do you still want to continue deployment even though some ports are in use? [y/N]: " confirm_continue + confirm_continue=$(sanitize_input "$confirm_continue") + if ! [[ "$confirm_continue" =~ ^[Yy]$ ]]; then + echo "🚫 Deployment aborted due to port conflicts." + exit 1 + fi + fi + + echo "" + echo "--------------------------------" + echo "" +} + trim_quotes() { local value="$1" value="${value%$'\r'}" @@ -266,12 +429,22 @@ persist_deploy_options() { echo "MODE_CHOICE=\"${MODE_CHOICE_SAVED}\"" echo "VERSION_CHOICE=\"${VERSION_CHOICE_SAVED}\"" echo "IS_MAINLAND=\"${IS_MAINLAND_SAVED}\"" + echo "ENABLE_SKILLS=\"${ENABLE_SKILLS_SAVED}\"" echo "ENABLE_TERMINAL=\"${ENABLE_TERMINAL_SAVED}\"" echo "TERMINAL_MOUNT_DIR=\"${TERMINAL_MOUNT_DIR_SAVED}\"" } > "$DEPLOY_OPTIONS_FILE" } generate_minio_ak_sk() { + if [ -n "${MINIO_ACCESS_KEY:-}" ] && [ -n "${MINIO_SECRET_KEY:-}" ]; then + echo " Reusing existing MinIO access keys from docker/.env" + export MINIO_ACCESS_KEY + export MINIO_SECRET_KEY + update_env_var "MINIO_ACCESS_KEY" "$MINIO_ACCESS_KEY" + update_env_var "MINIO_SECRET_KEY" "$MINIO_SECRET_KEY" + return 0 + fi + echo "🔑 Generating MinIO keys..." if [ "$(uname -s | tr '[:upper:]' '[:lower:]')" = "mingw" ] || [ "$(uname -s | tr '[:upper:]' '[:lower:]')" = "msys" ]; then @@ -365,7 +538,7 @@ generate_elasticsearch_api_key() { generate_env_for_infrastructure() { # Function to generate complete environment file for infrastructure mode using generate_env.sh - echo "🔑 Generating complete environment file in root directory..." + echo "🔑 Updating docker/.env for infrastructure mode..." echo " 🚀 Running generate_env.sh..." # Check if generate_env.sh exists @@ -381,16 +554,14 @@ generate_env_for_infrastructure() { export DEPLOYMENT_VERSION if ./generate_env.sh; then - echo " ✅ Environment file generated successfully for infrastructure mode!" - # Source the generated .env file to make variables available - if [ -f "../.env" ]; then - echo " ⏏️ Sourcing generated root .env file..." + echo " ✅ docker/.env updated successfully for infrastructure mode!" + if [ -f ".env" ]; then set -a - source ../.env + source .env set +a - echo " ✅ Environment variables loaded from ../.env" + echo " ✅ Environment variables loaded from docker/.env" else - echo " ⚠️ Warning: ../.env file not found after generation" + echo " ⚠️ Warning: docker/.env file not found after generation" return 1 fi else @@ -407,7 +578,7 @@ get_compose_version() { # Function to get the version of docker compose if command -v docker &> /dev/null; then version_output=$(docker compose version 2>/dev/null) - if [[ $version_output =~ (v[0-9]+\.[0-9]+\.[0-9]+) ]]; then + if [[ $version_output =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then echo "v2 ${BASH_REMATCH[1]}" return 0 fi @@ -430,7 +601,21 @@ disable_dashboard() { update_env_var "DISABLE_CELERY_FLOWER" "true" } +sync_monitoring_env_vars() { + update_env_var "ENABLE_TELEMETRY" "$(deployment_monitoring_enabled)" + update_env_var "MONITORING_PROVIDER" "$DEPLOYMENT_MONITORING_PROVIDER" + update_env_var "MONITORING_DASHBOARD_URL" "$(deployment_monitoring_dashboard_url docker)" +} + pull_mcp_image() { + if [ "$DEPLOYMENT_IMAGE_SOURCE" = "local-latest" ]; then + echo "🔄 Skipping MCP image pull because image source is local-latest." + echo "" + echo "--------------------------------" + echo "" + return 0 + fi + echo "🔄 Checking MCP Docker image..." # Get MCP image name from environment or use default @@ -538,9 +723,6 @@ clean() { if [ -f ".env.bak" ]; then rm .env.bak fi - if [ -f "../.env.bak" ]; then - rm ../.env.bak - fi } update_env_var() { @@ -614,6 +796,15 @@ prepare_directory_and_data() { create_dir_with_permission "$NEXENT_USER_DIR" 775 echo " 🖥️ Nexent user workspace: $NEXENT_USER_DIR" + # Copy official-skills-zip folder to /mnt/nexent + if [ -d "official-skills-zip" ]; then + cp -rn official-skills-zip "$NEXENT_USER_DIR/" + chmod -R 775 "$NEXENT_USER_DIR/official-skills-zip" + echo " 📦 Official skills copied to $NEXENT_USER_DIR/official-skills-zip" + else + echo " ⚠️ official-skills-zip directory not found, skipping skills copy" + fi + # Export for docker-compose export NEXENT_USER_DIR @@ -624,35 +815,69 @@ prepare_directory_and_data() { deploy_core_services() { # Function to deploy core services - echo "👀 Starting core services..." - if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d nexent-config nexent-runtime nexent-mcp nexent-northbound nexent-web nexent-data-process; then + local core_services=() + local service + for service in $DEPLOYMENT_SELECTED_DOCKER_SERVICES; do + case "$service" in + nexent-config|nexent-runtime|nexent-mcp|nexent-northbound|nexent-web|nexent-data-process) + core_services+=("$service") + ;; + esac + done + + if [ ${#core_services[@]} -eq 0 ]; then + echo "👀 No core services selected, skipping core service startup." + return 0 + fi + + echo "👀 Starting core services: ${core_services[*]}" + if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d "${core_services[@]}"; then echo " ❌ ERROR Failed to start core services" return 1 fi } +stop_unselected_data_process_service() { + deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "data-process" && return 0 + + local compose_file="docker-compose${COMPOSE_FILE_SUFFIX}" + [ -f "$compose_file" ] || return 0 + + echo "data-process is not selected; stopping existing Docker container if present..." + ${docker_compose_command} -p nexent -f "$compose_file" stop nexent-data-process >/dev/null 2>&1 || true + ${docker_compose_command} -p nexent -f "$compose_file" rm -f nexent-data-process >/dev/null 2>&1 || true +} + deploy_infrastructure() { # Start infrastructure services (basic services only) echo "🔧 Starting infrastructure services..." - INFRA_SERVICES="nexent-elasticsearch nexent-postgresql nexent-minio redis" + INFRA_SERVICES="" + + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "infrastructure"; then + INFRA_SERVICES="nexent-elasticsearch nexent-postgresql nexent-minio redis" + fi # Add openssh-server if Terminal tool container is enabled - if [ "$ENABLE_TERMINAL_TOOL_CONTAINER" = "true" ]; then + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "terminal"; then INFRA_SERVICES="$INFRA_SERVICES nexent-openssh-server" echo "🔧 Terminal tool container enabled - openssh-server will be included in infrastructure" fi - if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d $INFRA_SERVICES; then - echo " ❌ ERROR Failed to start infrastructure services" - return 1 + if [ -n "$INFRA_SERVICES" ]; then + if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d $INFRA_SERVICES; then + echo " ❌ ERROR Failed to start infrastructure services" + return 1 + fi + else + echo "🔧 No infrastructure services selected, skipping infrastructure startup." fi - if [ "$ENABLE_TERMINAL_TOOL_CONTAINER" = "true" ]; then + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "terminal"; then echo "🔧 Terminal tool container (openssh-server) is now available for AI agents" fi - # Deploy Supabase services based on DEPLOYMENT_VERSION - if [ "$DEPLOYMENT_VERSION" = "full" ]; then + # Deploy Supabase services based on selected components + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "supabase"; then echo "" echo "🔧 Starting Supabase services..." # Check if the supabase compose file exists @@ -675,6 +900,105 @@ deploy_infrastructure() { echo " ✅ Infrastructure services started successfully" } +deploy_monitoring() { + deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "monitoring" || return 0 + + if [ ! -f "docker-compose-monitoring.yml" ]; then + echo " ❌ ERROR Monitoring compose file not found: docker-compose-monitoring.yml" + return 1 + fi + + local profile_args=() + case "$DEPLOYMENT_MONITORING_PROVIDER" in + phoenix|grafana|zipkin|langfuse) + profile_args+=(--profile "$DEPLOYMENT_MONITORING_PROVIDER") + ;; + esac + + echo "🔭 Starting monitoring services..." + if ! ${docker_compose_command} "${profile_args[@]}" -f "docker-compose-monitoring.yml" up -d; then + echo " ❌ ERROR Failed to start monitoring services" + return 1 + fi +} + +configure_root_dir_from_env() { + if [ -n "$ROOT_DIR_PARAM" ]; then + ROOT_DIR="$ROOT_DIR_PARAM" + echo " 📁 Using ROOT_DIR from parameter: $ROOT_DIR" + update_env_var "ROOT_DIR" "$ROOT_DIR" + elif grep -q "^ROOT_DIR=" .env; then + ROOT_DIR="$(grep "^ROOT_DIR=" .env | cut -d'=' -f2 | sed 's/^"//;s/"$//')" + echo " 📁 Use existing ROOT_DIR path: $ROOT_DIR" + else + local default_root_dir="$HOME/nexent-data" + if [ -t 0 ]; then + local user_root_dir + read -p " 📁 Enter ROOT_DIR path (default: $default_root_dir): " user_root_dir + ROOT_DIR="${user_root_dir:-$default_root_dir}" + else + ROOT_DIR="$default_root_dir" + fi + update_env_var "ROOT_DIR" "$ROOT_DIR" + fi + export ROOT_DIR + echo "" + echo "--------------------------------" + echo "" +} + +apply_deployment_common_config() { + deployment_prepare_config "${ORIGINAL_ARGS[@]}" || return 1 + + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "supabase"; then + export DEPLOYMENT_VERSION="full" + else + export DEPLOYMENT_VERSION="speed" + fi + update_env_var "DEPLOYMENT_VERSION" "$DEPLOYMENT_VERSION" + + if [ "$DEPLOYMENT_PORT_POLICY" = "production" ]; then + export DEPLOYMENT_MODE="production" + export COMPOSE_FILE_SUFFIX=".prod.yml" + disable_dashboard + elif [ "$DEPLOYMENT_COMPONENTS" = "infrastructure" ]; then + export DEPLOYMENT_MODE="infrastructure" + export COMPOSE_FILE_SUFFIX=".yml" + else + export DEPLOYMENT_MODE="development" + export COMPOSE_FILE_SUFFIX=".yml" + fi + + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "terminal"; then + ENABLE_TERMINAL_SAVED="Y" + export ENABLE_TERMINAL_TOOL_CONTAINER="true" + export COMPOSE_PROFILES="${COMPOSE_PROFILES:+$COMPOSE_PROFILES,}terminal" + else + ENABLE_TERMINAL_SAVED="N" + export ENABLE_TERMINAL_TOOL_CONTAINER="false" + fi + + export APP_VERSION="$DEPLOYMENT_APP_VERSION" + case "$DEPLOYMENT_REGISTRY_PROFILE" in + mainland) + IS_MAINLAND_SAVED="Y" + source .env.mainland + ;; + general|local-latest) + IS_MAINLAND_SAVED="N" + source .env.general + ;; + esac + + deployment_apply_image_source + deployment_render_docker_env "$SCRIPT_DIR/.env.generated" + set -a + source "$SCRIPT_DIR/.env.generated" + set +a + sync_monitoring_env_vars + deployment_print_summary docker +} + select_deployment_version() { # Function to select deployment version echo "🚀 Please select deployment version:" @@ -867,7 +1191,7 @@ select_terminal_tool() { check_super_admin_user_exists() { # Check if super admin user exists in Supabase - local email="suadmin@nexent.com" + local email="${1:-suadmin@nexent.com}" local curl_container="nexent-config" # Determine which container to use for curl command @@ -1003,8 +1327,10 @@ create_default_super_admin_user() { # Execute the script with password as argument if bash "$script_path" "$password"; then + unset password return 0 else + unset password return 1 fi } @@ -1048,14 +1374,15 @@ main_deploy() { fi echo "🌐 App version: $APP_VERSION" - # Check all relevant ports from environment files before starting deployment - check_ports_in_env_files + # Select deployment components, port policy and image source via shared config. + apply_deployment_common_config || { echo "❌ Deployment configuration failed"; exit 1; } - # Select deployment version, mode and image source - select_deployment_version || { echo "❌ Deployment version selection failed"; exit 1; } - select_deployment_mode || { echo "❌ Deployment mode selection failed"; exit 1; } - select_terminal_tool || { echo "❌ Terminal tool container configuration failed"; exit 1; } - choose_image_env || { echo "❌ Image environment setup failed"; exit 1; } + deployment_persist_local_config + + # Check only the ports published by the selected deployment configuration. + check_deployment_ports + + configure_root_dir_from_env || { echo "❌ ROOT_DIR configuration failed"; exit 1; } # Set NEXENT_MCP_DOCKER_IMAGE in .env file if [ -n "${NEXENT_MCP_DOCKER_IMAGE:-}" ]; then @@ -1076,6 +1403,10 @@ main_deploy() { # Deploy infrastructure services deploy_infrastructure || { echo "❌ Infrastructure deployment failed"; exit 1; } + deploy_monitoring || { echo "❌ Monitoring deployment failed"; exit 1; } + + stop_unselected_data_process_service + # Generate Elasticsearch API key generate_elasticsearch_api_key || { echo "❌ Elasticsearch API key generation failed"; exit 1; } @@ -1094,13 +1425,14 @@ main_deploy() { echo "🎉 Infrastructure deployment completed successfully!" echo " You can now start the core services manually using dev containers" - echo " Environment file available at: $(cd .. && pwd)/.env" - echo "💡 Use 'source .env' to load environment variables in your development shell" + echo " Environment file available at: $SCRIPT_DIR/.env" + echo "💡 Use 'source docker/.env' from the project root to load environment variables" # Pull MCP image for later use pull_mcp_image persist_deploy_options + deployment_persist_local_config return 0 fi @@ -1118,6 +1450,7 @@ main_deploy() { fi persist_deploy_options + deployment_persist_local_config # Pull MCP image for later use pull_mcp_image @@ -1142,7 +1475,7 @@ docker_compose_command="" case $version_type in "v1") echo "Detected Docker Compose V1, version: $version_number" - # The version ​​v1.28.0​​ is the minimum requirement in Docker Compose v1 that explicitly supports interpolation syntax with default values like ${VAR:-default} + # The version 1.28.0 is the minimum requirement in Docker Compose v1 for default interpolation syntax. if [[ $version_number < "1.28.0" ]]; then echo "Warning: V1 version is too old, consider upgrading to V2" exit 1 diff --git a/docker/docker-compose-monitoring.yml b/docker/docker-compose-monitoring.yml index fb4aa5eaf..976a57c97 100644 --- a/docker/docker-compose-monitoring.yml +++ b/docker/docker-compose-monitoring.yml @@ -1,88 +1,268 @@ +name: monitor + services: - # Jaeger - Distributed Tracing - jaeger: - image: jaegertracing/all-in-one:1.52 - container_name: nexent-jaeger - ports: - - "16686:16686" # Jaeger UI - - "14268:14268" # Jaeger collector HTTP - - "14250:14250" # Jaeger collector gRPC - - "6831:6831/udp" # Agent UDP - - "6832:6832/udp" # Agent UDP + otel-collector: + image: otel/opentelemetry-collector-contrib:${OTEL_COLLECTOR_VERSION:-0.151.0} + container_name: nexent-otel-collector + command: ["--config=/etc/otel-collector-config.yml"] environment: - - COLLECTOR_OTLP_ENABLED=true - - COLLECTOR_ZIPKIN_HOST_PORT=:9411 + LANGFUSE_OTLP_AUTH_HEADER: ${LANGFUSE_OTLP_AUTH_HEADER:-} + LANGSMITH_API_KEY: ${LANGSMITH_API_KEY:-} + LANGSMITH_PROJECT: ${LANGSMITH_PROJECT:-nexent} + LANGSMITH_OTLP_TRACES_ENDPOINT: ${LANGSMITH_OTLP_TRACES_ENDPOINT:-https://api.smith.langchain.com/otel/v1/traces} + volumes: + - ${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-config.yml}:/etc/otel-collector-config.yml + ports: + - "${OTEL_COLLECTOR_GRPC_PORT:-4317}:4317" + - "${OTEL_COLLECTOR_HTTP_PORT:-4318}:4318" networks: - - nexent-network + - nexent restart: unless-stopped - volumes: - - jaeger-data:/tmp - # Prometheus - Metrics Collection - prometheus: - image: prom/prometheus:v2.48.0 - container_name: nexent-prometheus + phoenix: + image: arizephoenix/phoenix:${PHOENIX_VERSION:-15} + container_name: nexent-phoenix + profiles: ["phoenix"] + environment: + PHOENIX_WORKING_DIR: /mnt/data + volumes: + - phoenix-data:/mnt/data ports: - - "9090:9090" - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.console.libraries=/etc/prometheus/console_libraries' - - '--web.console.templates=/etc/prometheus/consoles' - - '--storage.tsdb.retention.time=15d' - - '--web.enable-lifecycle' - - '--web.enable-admin-api' + - "${PHOENIX_PORT:-6006}:6006" + - "${PHOENIX_GRPC_HOST_PORT:-4319}:4317" + networks: + - nexent + restart: unless-stopped + + tempo: + image: grafana/tempo:${TEMPO_VERSION:-2.10.5} + container_name: nexent-tempo + profiles: ["grafana"] + command: ["--config.file=/etc/tempo.yml"] volumes: - - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml - - prometheus-data:/prometheus + - ./monitoring/tempo.yml:/etc/tempo.yml:ro + - tempo-data:/var/tempo + ports: + - "${TEMPO_PORT:-3200}:3200" networks: - - nexent-network + - nexent restart: unless-stopped - # Grafana - Metrics Visualization grafana: - image: grafana/grafana:10.2.0 + image: grafana/grafana:${GRAFANA_VERSION:-12.4} container_name: nexent-grafana - ports: - - "3005:3000" + profiles: ["grafana"] environment: - - GF_SECURITY_ADMIN_PASSWORD=admin - - GF_USERS_ALLOW_SIGN_UP=false - - GF_INSTALL_PLUGINS=grafana-piechart-panel + GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-nexent-grafana-admin} + GF_USERS_ALLOW_SIGN_UP: "false" + GF_USERS_DEFAULT_LANGUAGE: ${GRAFANA_DEFAULT_LANGUAGE:-zh-Hans} + GF_PLUGINS_PREINSTALL_AUTO_UPDATE: "false" volumes: - grafana-data:/var/lib/grafana - - ./monitoring/grafana/provisioning:/etc/grafana/provisioning - - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards + - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro + - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro + ports: + - "${GRAFANA_PORT:-3002}:3000" + depends_on: + - tempo networks: - - nexent-network + - nexent restart: unless-stopped - depends_on: - - prometheus - # OpenTelemetry Collector (Optional - for advanced setups) - otel-collector: - image: otel/opentelemetry-collector-contrib:0.89.0 - container_name: nexent-otel-collector - command: ["--config=/etc/otel-collector-config.yml"] + zipkin: + image: openzipkin/zipkin:${ZIPKIN_VERSION:-latest} + container_name: nexent-zipkin + profiles: ["zipkin"] + ports: + - "${ZIPKIN_PORT:-9411}:9411" + networks: + - nexent + restart: unless-stopped + + langfuse-worker: + image: docker.io/langfuse/langfuse-worker:${LANGFUSE_VERSION:-3} + container_name: nexent-langfuse-worker + profiles: ["langfuse"] + restart: unless-stopped + depends_on: &langfuse-depends-on + langfuse-postgres: + condition: service_healthy + langfuse-minio: + condition: service_healthy + langfuse-redis: + condition: service_healthy + langfuse-clickhouse: + condition: service_healthy + environment: &langfuse-env + NEXTAUTH_URL: ${LANGFUSE_NEXTAUTH_URL:-http://localhost:3001} + NEXTAUTH_SECRET: ${LANGFUSE_NEXTAUTH_SECRET:-nexent-langfuse-secret} + DATABASE_URL: postgresql://${LANGFUSE_POSTGRES_USER:-postgres}:${LANGFUSE_POSTGRES_PASSWORD:-postgres}@langfuse-postgres:5432/${LANGFUSE_POSTGRES_DB:-postgres} + SALT: ${LANGFUSE_SALT:-nexent-langfuse-salt} + ENCRYPTION_KEY: ${LANGFUSE_ENCRYPTION_KEY:-0000000000000000000000000000000000000000000000000000000000000000} + TELEMETRY_ENABLED: ${LANGFUSE_TELEMETRY_ENABLED:-false} + LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES: ${LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES:-false} + CLICKHOUSE_MIGRATION_URL: clickhouse://langfuse-clickhouse:9000 + CLICKHOUSE_URL: http://langfuse-clickhouse:8123 + CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse} + CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse} + CLICKHOUSE_CLUSTER_ENABLED: ${LANGFUSE_CLICKHOUSE_CLUSTER_ENABLED:-false} + REDIS_HOST: langfuse-redis + REDIS_PORT: 6379 + REDIS_AUTH: ${LANGFUSE_REDIS_AUTH:-myredissecret} + REDIS_TLS_ENABLED: "false" + LANGFUSE_USE_AZURE_BLOB: "false" + LANGFUSE_USE_OCI_NATIVE_OBJECT_STORAGE: "false" + LANGFUSE_S3_EVENT_UPLOAD_BUCKET: ${LANGFUSE_S3_BUCKET:-langfuse} + LANGFUSE_S3_EVENT_UPLOAD_REGION: auto + LANGFUSE_S3_EVENT_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_ROOT_USER:-minio} + LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + LANGFUSE_S3_EVENT_UPLOAD_ENDPOINT: http://langfuse-minio:9000 + LANGFUSE_S3_EVENT_UPLOAD_FORCE_PATH_STYLE: "true" + LANGFUSE_S3_EVENT_UPLOAD_PREFIX: events/ + LANGFUSE_S3_MEDIA_UPLOAD_BUCKET: ${LANGFUSE_S3_BUCKET:-langfuse} + LANGFUSE_S3_MEDIA_UPLOAD_REGION: auto + LANGFUSE_S3_MEDIA_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_ROOT_USER:-minio} + LANGFUSE_S3_MEDIA_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT: http://langfuse-minio:9000 + LANGFUSE_S3_MEDIA_UPLOAD_FORCE_PATH_STYLE: "true" + LANGFUSE_S3_MEDIA_UPLOAD_PREFIX: media/ + LANGFUSE_S3_BATCH_EXPORT_ENABLED: "false" + LANGFUSE_S3_BATCH_EXPORT_BUCKET: ${LANGFUSE_S3_BUCKET:-langfuse} + LANGFUSE_S3_BATCH_EXPORT_REGION: auto + LANGFUSE_S3_BATCH_EXPORT_ENDPOINT: http://langfuse-minio:9000 + LANGFUSE_S3_BATCH_EXPORT_EXTERNAL_ENDPOINT: http://localhost:${LANGFUSE_MINIO_API_PORT:-9092} + LANGFUSE_S3_BATCH_EXPORT_ACCESS_KEY_ID: ${LANGFUSE_MINIO_ROOT_USER:-minio} + LANGFUSE_S3_BATCH_EXPORT_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + LANGFUSE_S3_BATCH_EXPORT_FORCE_PATH_STYLE: "true" + networks: + - nexent + + langfuse-web: + image: docker.io/langfuse/langfuse:${LANGFUSE_VERSION:-3} + container_name: nexent-langfuse-web + profiles: ["langfuse"] + restart: unless-stopped + depends_on: *langfuse-depends-on + environment: + <<: *langfuse-env + LANGFUSE_INIT_ORG_ID: ${LANGFUSE_INIT_ORG_ID:-nexent} + LANGFUSE_INIT_ORG_NAME: ${LANGFUSE_INIT_ORG_NAME:-Nexent} + LANGFUSE_INIT_PROJECT_ID: ${LANGFUSE_INIT_PROJECT_ID:-nexent-local} + LANGFUSE_INIT_PROJECT_NAME: ${LANGFUSE_INIT_PROJECT_NAME:-Nexent Local} + LANGFUSE_INIT_PROJECT_PUBLIC_KEY: ${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-pk-lf-nexent-local} + LANGFUSE_INIT_PROJECT_SECRET_KEY: ${LANGFUSE_INIT_PROJECT_SECRET_KEY:-sk-lf-nexent-local} + LANGFUSE_INIT_USER_EMAIL: ${LANGFUSE_INIT_USER_EMAIL:-admin@nexent.local} + LANGFUSE_INIT_USER_NAME: ${LANGFUSE_INIT_USER_NAME:-Nexent Admin} + LANGFUSE_INIT_USER_PASSWORD: ${LANGFUSE_INIT_USER_PASSWORD:-nexent-langfuse-admin} + ports: + - "${LANGFUSE_PORT:-3001}:3000" + networks: + - nexent + + langfuse-clickhouse: + image: docker.io/clickhouse/clickhouse-server:${LANGFUSE_CLICKHOUSE_VERSION:-26.3-alpine} + container_name: nexent-langfuse-clickhouse + profiles: ["langfuse"] + restart: unless-stopped + user: "101:101" + environment: + CLICKHOUSE_DB: default + CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse} + CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse} volumes: - - ./monitoring/otel-collector-config.yml:/etc/otel-collector-config.yml + - langfuse-clickhouse-data:/var/lib/clickhouse + - langfuse-clickhouse-logs:/var/log/clickhouse-server ports: - - "4317:4317" # OTLP gRPC receiver - - "4318:4318" # OTLP HTTP receiver - - "8888:8888" # Prometheus metrics exposed by the collector - - "8889:8889" # Prometheus exporter metrics - depends_on: - - jaeger - - prometheus + - "127.0.0.1:${LANGFUSE_CLICKHOUSE_HTTP_PORT:-8124}:8123" + - "127.0.0.1:${LANGFUSE_CLICKHOUSE_NATIVE_PORT:-9002}:9000" + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:8123/ping || exit 1"] + interval: 5s + timeout: 5s + retries: 10 + start_period: 1s networks: - - nexent-network + - nexent + + langfuse-minio: + image: docker.io/minio/minio:${LANGFUSE_MINIO_VERSION:-RELEASE.2023-12-20T01-00-02Z} + container_name: nexent-langfuse-minio + profiles: ["langfuse"] restart: unless-stopped + entrypoint: sh + command: -c 'mkdir -p /data/${LANGFUSE_S3_BUCKET:-langfuse} && minio server --address ":9000" --console-address ":9001" /data' + environment: + MINIO_ROOT_USER: ${LANGFUSE_MINIO_ROOT_USER:-minio} + MINIO_ROOT_PASSWORD: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + ports: + - "${LANGFUSE_MINIO_API_PORT:-9092}:9000" + - "127.0.0.1:${LANGFUSE_MINIO_CONSOLE_PORT:-9093}:9001" + volumes: + - langfuse-minio-data:/data + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 1s + timeout: 5s + retries: 5 + start_period: 1s + networks: + - nexent -volumes: - jaeger-data: - prometheus-data: - grafana-data: + langfuse-redis: + image: docker.io/redis:${LANGFUSE_REDIS_VERSION:-alpine} + container_name: nexent-langfuse-redis + profiles: ["langfuse"] + restart: unless-stopped + command: > + --requirepass ${LANGFUSE_REDIS_AUTH:-myredissecret} + --maxmemory-policy noeviction + ports: + - "127.0.0.1:${LANGFUSE_REDIS_PORT:-6380}:6379" + volumes: + - langfuse-redis-data:/data + healthcheck: + test: ["CMD-SHELL", "redis-cli -a ${LANGFUSE_REDIS_AUTH:-myredissecret} ping | grep PONG"] + interval: 3s + timeout: 10s + retries: 10 + networks: + - nexent + + langfuse-postgres: + image: docker.io/postgres:${LANGFUSE_POSTGRES_VERSION:-15-alpine} + container_name: nexent-langfuse-postgres + profiles: ["langfuse"] + restart: unless-stopped + environment: + POSTGRES_USER: ${LANGFUSE_POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${LANGFUSE_POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: ${LANGFUSE_POSTGRES_DB:-postgres} + TZ: UTC + PGTZ: UTC + ports: + - "127.0.0.1:${LANGFUSE_POSTGRES_PORT:-5440}:5432" + volumes: + - langfuse-postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${LANGFUSE_POSTGRES_USER:-postgres}"] + interval: 3s + timeout: 3s + retries: 10 + networks: + - nexent networks: - nexent-network: + nexent: + name: nexent_network external: true + +volumes: + phoenix-data: + langfuse-postgres-data: + langfuse-clickhouse-data: + langfuse-clickhouse-logs: + langfuse-minio-data: + langfuse-redis-data: + grafana-data: + tempo-data: diff --git a/docker/docker-compose-supabase.prod.yml b/docker/docker-compose-supabase.prod.yml index 234185b0b..6ad7ac134 100644 --- a/docker/docker-compose-supabase.prod.yml +++ b/docker/docker-compose-supabase.prod.yml @@ -142,4 +142,5 @@ volumes: networks: nexent: - driver: bridge \ No newline at end of file + name: nexent_network + driver: bridge diff --git a/docker/docker-compose-supabase.yml b/docker/docker-compose-supabase.yml index 21a4e6958..b781b4444 100644 --- a/docker/docker-compose-supabase.yml +++ b/docker/docker-compose-supabase.yml @@ -147,4 +147,5 @@ volumes: networks: nexent: - driver: bridge \ No newline at end of file + name: nexent_network + driver: bridge diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index cfb20f6e8..f23e4210c 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -95,4 +95,5 @@ services: networks: nexent: + name: nexent_network driver: bridge diff --git a/docker/docker-compose.prod.yml b/docker/docker-compose.prod.yml index 934fe8b2f..29bd41d9f 100644 --- a/docker/docker-compose.prod.yml +++ b/docker/docker-compose.prod.yml @@ -75,6 +75,7 @@ services: restart: always volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro - ${ROOT_DIR}/scripts/sync_user_supabase2pg.py:/opt/sync_user_supabase2pg.py:ro - /var/run/docker.sock:/var/run/docker.sock:ro # Docker socket for MCP container management @@ -103,6 +104,7 @@ services: restart: always volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -155,6 +157,7 @@ services: restart: always volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -300,6 +303,7 @@ services: networks: nexent: + name: nexent_network driver: bridge volumes: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 89088f2c3..fd3851ab4 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -86,6 +86,7 @@ services: - "5010:5010" # Config service port volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro - ${ROOT_DIR}/scripts/sync_user_supabase2pg.py:/opt/sync_user_supabase2pg.py:ro - /var/run/docker.sock:/var/run/docker.sock:ro # Docker socket for MCP container management @@ -116,6 +117,7 @@ services: - "5014:5014" # Runtime service port volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -173,6 +175,7 @@ services: - "5013:5013" # Northbound service port volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -327,6 +330,7 @@ services: networks: nexent: + name: nexent_network driver: bridge volumes: diff --git a/docker/generate_env.sh b/docker/generate_env.sh index 962102f1d..c6b20f0b1 100755 --- a/docker/generate_env.sh +++ b/docker/generate_env.sh @@ -2,34 +2,18 @@ # Exit immediately if a command exits with a non-zero status set -e -echo " 📁 Target .env location: Root directory (../)" +echo " 📁 Target .env location: docker/.env" # Function to copy and prepare .env file prepare_env_file() { - echo " 📝 Preparing root .env file..." - - # Check if .env already exists in root directory (parent directory) - if [ -f "../.env" ]; then - echo " ⚠️ .env already exists in root directory" - echo "" - read -p "👉 Do you want to overwrite it? [Y/N] (default: Y): " overwrite - # If input is empty, use default "Y" - overwrite=${overwrite:-Y} - if [[ ! "$overwrite" =~ ^[Yy]$ ]]; then - echo " Using existing .env file" - return 0 - fi - fi + echo " 📝 Preparing docker/.env file..." - # Check if .env exists in current docker directory if [ -f ".env" ]; then - echo " 📋 Copying docker/.env to root directory..." - cp ".env" "../.env" - echo " ✅ Copied docker/.env to ../.env" + echo " ✅ Using existing docker/.env" elif [ -f ".env.example" ]; then - echo " 📋 docker/.env not found, copying .env.example to root directory..." - cp ".env.example" "../.env" - echo " ✅ Copied docker/.env.example to ../.env" + echo " 📋 docker/.env not found, copying docker/.env.example..." + cp ".env.example" ".env" + echo " ✅ Created docker/.env from docker/.env.example" else echo " ❌ ERROR Neither docker/.env nor docker/.env.example exists in docker directory" ERROR_OCCURRED=1 @@ -39,57 +23,57 @@ prepare_env_file() { # Function to update .env file with generated keys update_env_file() { - echo " 📝 Updating root .env file with generated keys..." + echo " 📝 Updating docker/.env file with generated keys..." - if [ ! -f "../.env" ]; then - echo " ❌ ERROR .env file does not exist in root directory" + if [ ! -f ".env" ]; then + echo " ❌ ERROR docker/.env file does not exist" ERROR_OCCURRED=1 return 1 fi # Update or add MINIO_ACCESS_KEY - if grep -q "^MINIO_ACCESS_KEY=" ../.env; then - sed -i.bak "s~^MINIO_ACCESS_KEY=.*~MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY~" ../.env + if grep -q "^MINIO_ACCESS_KEY=" .env; then + sed -i.bak "s~^MINIO_ACCESS_KEY=.*~MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY~" .env else - echo "" >> ../.env - echo "# Generated MinIO Keys" >> ../.env - echo "MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY" >> ../.env + echo "" >> .env + echo "# Generated MinIO Keys" >> .env + echo "MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY" >> .env fi # Update or add MINIO_SECRET_KEY - if grep -q "^MINIO_SECRET_KEY=" ../.env; then - sed -i.bak "s~^MINIO_SECRET_KEY=.*~MINIO_SECRET_KEY=$MINIO_SECRET_KEY~" ../.env + if grep -q "^MINIO_SECRET_KEY=" .env; then + sed -i.bak "s~^MINIO_SECRET_KEY=.*~MINIO_SECRET_KEY=$MINIO_SECRET_KEY~" .env else - echo "MINIO_SECRET_KEY=$MINIO_SECRET_KEY" >> ../.env + echo "MINIO_SECRET_KEY=$MINIO_SECRET_KEY" >> .env fi # Update or add ELASTICSEARCH_API_KEY (only if it was generated successfully) if [ -n "$ELASTICSEARCH_API_KEY" ]; then - if grep -q "^ELASTICSEARCH_API_KEY=" ../.env; then - sed -i.bak "s~^ELASTICSEARCH_API_KEY=.*~ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY~" ../.env + if grep -q "^ELASTICSEARCH_API_KEY=" .env; then + sed -i.bak "s~^ELASTICSEARCH_API_KEY=.*~ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY~" .env else - echo "" >> ../.env - echo "# Generated Elasticsearch API Key" >> ../.env - echo "ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY" >> ../.env + echo "" >> .env + echo "# Generated Elasticsearch API Key" >> .env + echo "ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY" >> .env fi fi # Update or add SSH credentials (only if they were set) if [ -n "$SSH_USERNAME" ]; then - if grep -q "^SSH_USERNAME=" ../.env; then - sed -i.bak "s~^SSH_USERNAME=.*~SSH_USERNAME=$SSH_USERNAME~" ../.env + if grep -q "^SSH_USERNAME=" .env; then + sed -i.bak "s~^SSH_USERNAME=.*~SSH_USERNAME=$SSH_USERNAME~" .env else - echo "" >> ../.env - echo "# SSH Terminal Tool Credentials" >> ../.env - echo "SSH_USERNAME=$SSH_USERNAME" >> ../.env + echo "" >> .env + echo "# SSH Terminal Tool Credentials" >> .env + echo "SSH_USERNAME=$SSH_USERNAME" >> .env fi fi if [ -n "$SSH_PASSWORD" ]; then - if grep -q "^SSH_PASSWORD=" ../.env; then - sed -i.bak "s~^SSH_PASSWORD=.*~SSH_PASSWORD=$SSH_PASSWORD~" ../.env + if grep -q "^SSH_PASSWORD=" .env; then + sed -i.bak "s~^SSH_PASSWORD=.*~SSH_PASSWORD=$SSH_PASSWORD~" .env else - echo "SSH_PASSWORD=$SSH_PASSWORD" >> ../.env + echo "SSH_PASSWORD=$SSH_PASSWORD" >> .env fi fi echo " ✅ Generated keys updated successfully" @@ -98,145 +82,145 @@ update_env_file() { echo " 🔧 Updating service URLs for localhost development environment..." # ELASTICSEARCH_HOST - if grep -q "^ELASTICSEARCH_HOST=" ../.env; then - sed -i.bak "s~^ELASTICSEARCH_HOST=.*~ELASTICSEARCH_HOST=http://localhost:9210~" ../.env + if grep -q "^ELASTICSEARCH_HOST=" .env; then + sed -i.bak "s~^ELASTICSEARCH_HOST=.*~ELASTICSEARCH_HOST=http://localhost:9210~" .env else - echo "" >> ../.env - echo "# Development Environment URLs" >> ../.env - echo "ELASTICSEARCH_HOST=http://localhost:9210" >> ../.env + echo "" >> .env + echo "# Development Environment URLs" >> .env + echo "ELASTICSEARCH_HOST=http://localhost:9210" >> .env fi # Main Services # CONFIG_SERVICE_URL - if grep -q "^CONFIG_SERVICE_URL=" ../.env; then - sed -i.bak "s~^CONFIG_SERVICE_URL=.*~CONFIG_SERVICE_URL=http://localhost:5010~" ../.env + if grep -q "^CONFIG_SERVICE_URL=" .env; then + sed -i.bak "s~^CONFIG_SERVICE_URL=.*~CONFIG_SERVICE_URL=http://localhost:5010~" .env else - echo "" >> ../.env - echo "# Main Services" >> ../.env - echo "CONFIG_SERVICE_URL=http://localhost:5010" >> ../.env + echo "" >> .env + echo "# Main Services" >> .env + echo "CONFIG_SERVICE_URL=http://localhost:5010" >> .env fi # RUNTIME_SERVICE_URL - if grep -q "^RUNTIME_SERVICE_URL=" ../.env; then - sed -i.bak "s~^RUNTIME_SERVICE_URL=.*~RUNTIME_SERVICE_URL=http://localhost:5014~" ../.env + if grep -q "^RUNTIME_SERVICE_URL=" .env; then + sed -i.bak "s~^RUNTIME_SERVICE_URL=.*~RUNTIME_SERVICE_URL=http://localhost:5014~" .env else - echo "RUNTIME_SERVICE_URL=http://localhost:5014" >> ../.env + echo "RUNTIME_SERVICE_URL=http://localhost:5014" >> .env fi # ELASTICSEARCH_SERVICE - if grep -q "^ELASTICSEARCH_SERVICE=" ../.env; then - sed -i.bak "s~^ELASTICSEARCH_SERVICE=.*~ELASTICSEARCH_SERVICE=http://localhost:5010/api~" ../.env + if grep -q "^ELASTICSEARCH_SERVICE=" .env; then + sed -i.bak "s~^ELASTICSEARCH_SERVICE=.*~ELASTICSEARCH_SERVICE=http://localhost:5010/api~" .env else - echo "ELASTICSEARCH_SERVICE=http://localhost:5010/api" >> ../.env + echo "ELASTICSEARCH_SERVICE=http://localhost:5010/api" >> .env fi # NEXENT_MCP_SERVER - if grep -q "^NEXENT_MCP_SERVER=" ../.env; then - sed -i.bak "s~^NEXENT_MCP_SERVER=.*~NEXENT_MCP_SERVER=http://localhost:5011~" ../.env + if grep -q "^NEXENT_MCP_SERVER=" .env; then + sed -i.bak "s~^NEXENT_MCP_SERVER=.*~NEXENT_MCP_SERVER=http://localhost:5011~" .env else - echo "NEXENT_MCP_SERVER=http://localhost:5011" >> ../.env + echo "NEXENT_MCP_SERVER=http://localhost:5011" >> .env fi # DATA_PROCESS_SERVICE - if grep -q "^DATA_PROCESS_SERVICE=" ../.env; then - sed -i.bak "s~^DATA_PROCESS_SERVICE=.*~DATA_PROCESS_SERVICE=http://localhost:5012/api~" ../.env + if grep -q "^DATA_PROCESS_SERVICE=" .env; then + sed -i.bak "s~^DATA_PROCESS_SERVICE=.*~DATA_PROCESS_SERVICE=http://localhost:5012/api~" .env else - echo "DATA_PROCESS_SERVICE=http://localhost:5012/api" >> ../.env + echo "DATA_PROCESS_SERVICE=http://localhost:5012/api" >> .env fi # NORTHBOUND_API_SERVER - if grep -q "^NORTHBOUND_API_SERVER=" ../.env; then - sed -i.bak "s~^NORTHBOUND_API_SERVER=.*~NORTHBOUND_API_SERVER=http://localhost:5013/api~" ../.env + if grep -q "^NORTHBOUND_API_SERVER=" .env; then + sed -i.bak "s~^NORTHBOUND_API_SERVER=.*~NORTHBOUND_API_SERVER=http://localhost:5013/api~" .env else - echo "NORTHBOUND_API_SERVER=http://localhost:5013/api" >> ../.env + echo "NORTHBOUND_API_SERVER=http://localhost:5013/api" >> .env fi # MCP_MANAGEMENT_API - if grep -q "^MCP_MANAGEMENT_API=" ../.env; then - sed -i.bak "s~^MCP_MANAGEMENT_API=.*~MCP_MANAGEMENT_API=http://localhost:5015~" ../.env + if grep -q "^MCP_MANAGEMENT_API=" .env; then + sed -i.bak "s~^MCP_MANAGEMENT_API=.*~MCP_MANAGEMENT_API=http://localhost:5015~" .env else - echo "MCP_MANAGEMENT_API=http://localhost:5015" >> ../.env + echo "MCP_MANAGEMENT_API=http://localhost:5015" >> .env fi # MINIO_ENDPOINT - if grep -q "^MINIO_ENDPOINT=" ../.env; then - sed -i.bak "s~^MINIO_ENDPOINT=.*~MINIO_ENDPOINT=http://localhost:9010~" ../.env + if grep -q "^MINIO_ENDPOINT=" .env; then + sed -i.bak "s~^MINIO_ENDPOINT=.*~MINIO_ENDPOINT=http://localhost:9010~" .env else - echo "MINIO_ENDPOINT=http://localhost:9010" >> ../.env + echo "MINIO_ENDPOINT=http://localhost:9010" >> .env fi # REDIS_URL - if grep -q "^REDIS_URL=" ../.env; then - sed -i.bak "s~^REDIS_URL=.*~REDIS_URL=redis://localhost:6379/0~" ../.env + if grep -q "^REDIS_URL=" .env; then + sed -i.bak "s~^REDIS_URL=.*~REDIS_URL=redis://localhost:6379/0~" .env else - echo "REDIS_URL=redis://localhost:6379/0" >> ../.env + echo "REDIS_URL=redis://localhost:6379/0" >> .env fi # REDIS_BACKEND_URL - if grep -q "^REDIS_BACKEND_URL=" ../.env; then - sed -i.bak "s~^REDIS_BACKEND_URL=.*~REDIS_BACKEND_URL=redis://localhost:6379/1~" ../.env + if grep -q "^REDIS_BACKEND_URL=" .env; then + sed -i.bak "s~^REDIS_BACKEND_URL=.*~REDIS_BACKEND_URL=redis://localhost:6379/1~" .env else - echo "REDIS_BACKEND_URL=redis://localhost:6379/1" >> ../.env + echo "REDIS_BACKEND_URL=redis://localhost:6379/1" >> .env fi # POSTGRES_HOST - if grep -q "^POSTGRES_HOST=" ../.env; then - sed -i.bak "s~^POSTGRES_HOST=.*~POSTGRES_HOST=localhost~" ../.env + if grep -q "^POSTGRES_HOST=" .env; then + sed -i.bak "s~^POSTGRES_HOST=.*~POSTGRES_HOST=localhost~" .env else - echo "POSTGRES_HOST=localhost" >> ../.env + echo "POSTGRES_HOST=localhost" >> .env fi # POSTGRES_PORT - if grep -q "^POSTGRES_PORT=" ../.env; then - sed -i.bak "s~^POSTGRES_PORT=.*~POSTGRES_PORT=5434~" ../.env + if grep -q "^POSTGRES_PORT=" .env; then + sed -i.bak "s~^POSTGRES_PORT=.*~POSTGRES_PORT=5434~" .env else - echo "POSTGRES_PORT=5434" >> ../.env + echo "POSTGRES_PORT=5434" >> .env fi # Supabase Configuration (Only for full version) if [ "$DEPLOYMENT_VERSION" = "full" ]; then if [ -n "$SUPABASE_KEY" ]; then - if grep -q "^SUPABASE_KEY=" ../.env; then - sed -i.bak "s~^SUPABASE_KEY=.*~SUPABASE_KEY=$SUPABASE_KEY~" ../.env + if grep -q "^SUPABASE_KEY=" .env; then + sed -i.bak "s~^SUPABASE_KEY=.*~SUPABASE_KEY=$SUPABASE_KEY~" .env else - echo "" >> ../.env - echo "# Supabase Keys" >> ../.env - echo "SUPABASE_KEY=$SUPABASE_KEY" >> ../.env + echo "" >> .env + echo "# Supabase Keys" >> .env + echo "SUPABASE_KEY=$SUPABASE_KEY" >> .env fi fi if [ -n "$SERVICE_ROLE_KEY" ]; then - if grep -q "^SERVICE_ROLE_KEY=" ../.env; then - sed -i.bak "s~^SERVICE_ROLE_KEY=.*~SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY~" ../.env + if grep -q "^SERVICE_ROLE_KEY=" .env; then + sed -i.bak "s~^SERVICE_ROLE_KEY=.*~SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY~" .env else - echo "SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY" >> ../.env + echo "SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY" >> .env fi fi # Additional Supabase configuration - if grep -q "^SUPABASE_URL=" ../.env; then - sed -i.bak "s~^SUPABASE_URL=.*~SUPABASE_URL=http://localhost:8000~" ../.env + if grep -q "^SUPABASE_URL=" .env; then + sed -i.bak "s~^SUPABASE_URL=.*~SUPABASE_URL=http://localhost:8000~" .env else - echo "SUPABASE_URL=http://localhost:8000" >> ../.env + echo "SUPABASE_URL=http://localhost:8000" >> .env fi - if grep -q "^API_EXTERNAL_URL=" ../.env; then - sed -i.bak "s~^API_EXTERNAL_URL=.*~API_EXTERNAL_URL=http://localhost:8000~" ../.env + if grep -q "^API_EXTERNAL_URL=" .env; then + sed -i.bak "s~^API_EXTERNAL_URL=.*~API_EXTERNAL_URL=http://localhost:8000~" .env else - echo "API_EXTERNAL_URL=http://localhost:8000" >> ../.env + echo "API_EXTERNAL_URL=http://localhost:8000" >> .env fi - if grep -q "^SITE_URL=" ../.env; then - sed -i.bak "s~^SITE_URL=.*~SITE_URL=http://localhost:3011~" ../.env + if grep -q "^SITE_URL=" .env; then + sed -i.bak "s~^SITE_URL=.*~SITE_URL=http://localhost:3011~" .env else - echo "SITE_URL=http://localhost:3011" >> ../.env + echo "SITE_URL=http://localhost:3011" >> .env fi fi # Remove backup file - rm -f ../.env.bak + rm -f .env.bak - echo " ✅ Root .env file updated successfully with localhost development URLs" + echo " ✅ docker/.env updated successfully with localhost development URLs" } # Function to show summary diff --git a/docker/init.sql b/docker/init.sql index 6ca77f731..046bdecf1 100644 --- a/docker/init.sql +++ b/docker/init.sql @@ -175,6 +175,10 @@ CREATE TABLE IF NOT EXISTS "model_record_t" ( "updated_by" varchar(100) COLLATE "pg_catalog"."default", "created_by" varchar(100) COLLATE "pg_catalog"."default", "tenant_id" varchar(100) COLLATE "pg_catalog"."default" DEFAULT 'tenant_id', + "model_appid" varchar(100) COLLATE "pg_catalog"."default" DEFAULT '', + "access_token" varchar(100) COLLATE "pg_catalog"."default" DEFAULT '', + "concurrency_limit" INTEGER DEFAULT NULL, + "timeout_seconds" INTEGER DEFAULT 120, CONSTRAINT "nexent_models_t_pk" PRIMARY KEY ("model_id") ); ALTER TABLE "model_record_t" OWNER TO "root"; @@ -198,6 +202,10 @@ COMMENT ON COLUMN "model_record_t"."update_time" IS 'Update time, audit field'; COMMENT ON COLUMN "model_record_t"."updated_by" IS 'Last updater ID, audit field'; COMMENT ON COLUMN "model_record_t"."created_by" IS 'Creator ID, audit field'; COMMENT ON COLUMN "model_record_t"."tenant_id" IS 'Tenant ID for filtering'; +COMMENT ON COLUMN "model_record_t"."model_appid" IS 'Application ID for model authentication.'; +COMMENT ON COLUMN "model_record_t"."access_token" IS 'Access token for model authentication.'; +COMMENT ON COLUMN "model_record_t"."concurrency_limit" IS 'Maximum concurrent requests for this model. Default is NULL (unlimited).'; +COMMENT ON COLUMN "model_record_t"."timeout_seconds" IS 'Request timeout in seconds for this model. Default is 120 seconds.'; COMMENT ON TABLE "model_record_t" IS 'List of models defined by users in the configuration page'; INSERT INTO "nexent"."model_record_t" ("model_repo", "model_name", "model_factory", "model_type", "api_key", "base_url", "max_tokens", "used_token", "display_name", "connect_status") VALUES ('', 'volcano_tts', 'OpenAI-API-Compatible', 'tts', '', '', 0, 0, 'volcano_tts', 'unavailable'); @@ -211,6 +219,7 @@ CREATE TABLE IF NOT EXISTS "knowledge_record_t" ( "tenant_id" varchar(100) COLLATE "pg_catalog"."default", "knowledge_sources" varchar(100) COLLATE "pg_catalog"."default", "embedding_model_name" varchar(200) COLLATE "pg_catalog"."default", + "embedding_model_id" INTEGER, "group_ids" varchar, "ingroup_permission" varchar(30), "create_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP, @@ -218,6 +227,10 @@ CREATE TABLE IF NOT EXISTS "knowledge_record_t" ( "delete_flag" varchar(1) COLLATE "pg_catalog"."default" DEFAULT 'N'::character varying, "updated_by" varchar(100) COLLATE "pg_catalog"."default", "created_by" varchar(100) COLLATE "pg_catalog"."default", + "summary_frequency" varchar(10) COLLATE "pg_catalog"."default", + "last_summary_time" timestamp(0), + "last_doc_update_time" timestamp(0), + "preserve_source_file" boolean NOT NULL DEFAULT true, CONSTRAINT "knowledge_record_t_pk" PRIMARY KEY ("knowledge_id") ); ALTER TABLE "knowledge_record_t" OWNER TO "root"; @@ -228,11 +241,18 @@ COMMENT ON COLUMN "knowledge_record_t"."knowledge_describe" IS 'Knowledge base d COMMENT ON COLUMN "knowledge_record_t"."tenant_id" IS 'Tenant ID'; COMMENT ON COLUMN "knowledge_record_t"."knowledge_sources" IS 'Knowledge base sources'; COMMENT ON COLUMN "knowledge_record_t"."embedding_model_name" IS 'Embedding model name, used to record the embedding model used by the knowledge base'; +COMMENT ON COLUMN "knowledge_record_t"."embedding_model_id" IS 'Embedding model ID, foreign key reference to model_record_t.model_id'; COMMENT ON COLUMN "knowledge_record_t"."group_ids" IS 'Knowledge base group IDs list'; COMMENT ON COLUMN "knowledge_record_t"."ingroup_permission" IS 'In-group permission: EDIT, READ_ONLY, PRIVATE'; COMMENT ON COLUMN "knowledge_record_t"."create_time" IS 'Creation time, audit field'; COMMENT ON COLUMN "knowledge_record_t"."update_time" IS 'Update time, audit field'; COMMENT ON COLUMN "knowledge_record_t"."delete_flag" IS 'When deleted by user frontend, delete flag will be set to true, achieving soft delete effect. Optional values Y/N'; +COMMENT ON COLUMN "knowledge_record_t"."updated_by" IS 'User who last updated the record, audit field'; +COMMENT ON COLUMN "knowledge_record_t"."created_by" IS 'User who created the record, audit field'; +COMMENT ON COLUMN "knowledge_record_t"."summary_frequency" IS 'Auto-summary frequency: 1h, 3h, 6h, 1d, 1w, or NULL (disabled)'; +COMMENT ON COLUMN "knowledge_record_t"."last_summary_time" IS 'Timestamp of last summary generation'; +COMMENT ON COLUMN "knowledge_record_t"."last_doc_update_time" IS 'Timestamp of last document add/delete operation, used for auto-summary optimization to skip unnecessary summary regeneration'; +COMMENT ON COLUMN "knowledge_record_t"."preserve_source_file" IS 'Whether to preserve uploaded source documents after vectorization'; COMMENT ON COLUMN "knowledge_record_t"."updated_by" IS 'Last updater ID, audit field'; COMMENT ON COLUMN "knowledge_record_t"."created_by" IS 'Creator ID, audit field'; COMMENT ON TABLE "knowledge_record_t" IS 'Records knowledge base description and status information'; @@ -306,6 +326,8 @@ CREATE TABLE IF NOT EXISTS nexent.ag_tenant_agent_t ( model_id INTEGER, business_logic_model_name VARCHAR(100), business_logic_model_id INTEGER, + prompt_template_id INTEGER, + prompt_template_name VARCHAR(100), max_steps INTEGER, duty_prompt TEXT, constraint_prompt TEXT, @@ -316,9 +338,13 @@ CREATE TABLE IF NOT EXISTS nexent.ag_tenant_agent_t ( enabled BOOLEAN DEFAULT FALSE, is_new BOOLEAN DEFAULT FALSE, provide_run_summary BOOLEAN DEFAULT FALSE, + enable_context_manager BOOLEAN DEFAULT FALSE, + verification_config JSONB, version_no INTEGER DEFAULT 0 NOT NULL, current_version_no INTEGER NULL, ingroup_permission VARCHAR(30), + greeting_message TEXT, + example_questions JSONB, create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, created_by VARCHAR(100), @@ -355,6 +381,8 @@ COMMENT ON COLUMN nexent.ag_tenant_agent_t.model_name IS '[DEPRECATED] Name of t COMMENT ON COLUMN nexent.ag_tenant_agent_t.model_id IS 'Model ID, foreign key reference to model_record_t.model_id'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.business_logic_model_name IS 'Model name used for business logic prompt generation'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.business_logic_model_id IS 'Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_id IS 'Prompt template ID used for business logic prompt generation'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_name IS 'Prompt template name used for business logic prompt generation'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.max_steps IS 'Maximum number of steps'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.duty_prompt IS 'Duty prompt'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.constraint_prompt IS 'Constraint prompt'; @@ -373,12 +401,107 @@ COMMENT ON COLUMN nexent.ag_tenant_agent_t.is_new IS 'Whether this agent is mark COMMENT ON COLUMN nexent.ag_tenant_agent_t.version_no IS 'Version number. 0 = draft/editing state, >=1 = published snapshot'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.current_version_no IS 'Current published version number. NULL means no version published yet'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.ingroup_permission IS 'In-group permission: EDIT, READ_ONLY, PRIVATE'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.enable_context_manager IS 'Whether to enable context management (compression) for this agent'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.verification_config IS 'Layered ReAct self-verification configuration'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.greeting_message IS 'Agent greeting message displayed on chat initial screen'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.example_questions IS 'List of example questions for starting a conversation with this agent'; -- Create index for is_new queries CREATE INDEX IF NOT EXISTS idx_ag_tenant_agent_t_is_new ON nexent.ag_tenant_agent_t (tenant_id, is_new) WHERE delete_flag = 'N'; +CREATE TABLE IF NOT EXISTS nexent.ag_prompt_template_t ( + template_id SERIAL PRIMARY KEY, + template_name VARCHAR(100) NOT NULL, + description VARCHAR(500), + template_type VARCHAR(50) NOT NULL DEFAULT 'agent_generate', + tenant_id VARCHAR(100) NOT NULL, + user_id VARCHAR(100) NOT NULL, + template_content_zh JSONB NOT NULL, + template_content_en JSONB, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.ag_prompt_template_t OWNER TO "root"; + +CREATE OR REPLACE FUNCTION update_ag_prompt_template_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER update_ag_prompt_template_update_time_trigger +BEFORE UPDATE ON nexent.ag_prompt_template_t +FOR EACH ROW +EXECUTE FUNCTION update_ag_prompt_template_update_time(); + +COMMENT ON TABLE nexent.ag_prompt_template_t IS 'Prompt template table for user-defined business logic generation prompts'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_id IS 'Prompt template ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_name IS 'Prompt template name'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.description IS 'Prompt template description'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_type IS 'Prompt template type'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.tenant_id IS 'Tenant ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.user_id IS 'User ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_zh IS 'Chinese prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_en IS 'English prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.created_by IS 'Creator'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.updated_by IS 'Updater'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.delete_flag IS 'Whether it is deleted. Optional values: Y/N'; + +CREATE UNIQUE INDEX IF NOT EXISTS uq_prompt_template_user_name_active +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_name) +WHERE delete_flag = 'N'; + +CREATE INDEX IF NOT EXISTS idx_ag_prompt_template_t_user +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_type) +WHERE delete_flag = 'N'; + +INSERT INTO nexent.ag_prompt_template_t ( + template_id, + template_name, + description, + template_type, + tenant_id, + user_id, + template_content_zh, + template_content_en, + created_by, + updated_by, + delete_flag +) +VALUES ( + 0, + 'system_default', + 'System default prompt template', + 'agent_generate', + 'tenant_id', + 'user_id', + '{}'::jsonb, + '{}'::jsonb, + 'user_id', + 'user_id', + 'N' +) +ON CONFLICT (template_id) DO UPDATE SET + template_name = EXCLUDED.template_name, + description = EXCLUDED.description, + template_type = EXCLUDED.template_type, + tenant_id = EXCLUDED.tenant_id, + user_id = EXCLUDED.user_id, + template_content_zh = EXCLUDED.template_content_zh, + template_content_en = EXCLUDED.template_content_en, + updated_by = EXCLUDED.updated_by, + delete_flag = 'N'; + -- Create the ag_tool_instance_t table in the nexent schema CREATE TABLE IF NOT EXISTS nexent.ag_tool_instance_t ( @@ -490,6 +613,14 @@ CREATE TABLE IF NOT EXISTS nexent.mcp_record_t ( status BOOLEAN DEFAULT NULL, container_id VARCHAR(200) DEFAULT NULL, authorization_token VARCHAR(500) DEFAULT NULL, + custom_headers JSON DEFAULT NULL, + source VARCHAR(30), + registry_json JSONB, + config_json JSON, + enabled BOOLEAN DEFAULT TRUE, + tags TEXT[], + description TEXT, + container_port INTEGER, create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, created_by VARCHAR(100), @@ -509,11 +640,19 @@ COMMENT ON COLUMN nexent.mcp_record_t.mcp_server IS 'MCP server address'; COMMENT ON COLUMN nexent.mcp_record_t.status IS 'MCP server connection status, true=connected, false=disconnected, null=unknown'; COMMENT ON COLUMN nexent.mcp_record_t.container_id IS 'Docker container ID for MCP service, NULL for non-containerized MCP'; COMMENT ON COLUMN nexent.mcp_record_t.authorization_token IS 'Authorization token for MCP server authentication (e.g., Bearer token)'; +COMMENT ON COLUMN nexent.mcp_record_t.custom_headers IS 'Custom HTTP headers as JSON object for MCP server requests'; COMMENT ON COLUMN nexent.mcp_record_t.create_time IS 'Creation time, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.update_time IS 'Update time, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.created_by IS 'Creator ID, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.updated_by IS 'Last updater ID, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.delete_flag IS 'When deleted by user frontend, delete flag will be set to true, achieving soft delete effect. Optional values Y/N'; +COMMENT ON COLUMN nexent.mcp_record_t.source IS 'Source type: local/mcp_registry/community'; +COMMENT ON COLUMN nexent.mcp_record_t.registry_json IS 'Full MCP registry server.json snapshot'; +COMMENT ON COLUMN nexent.mcp_record_t.config_json IS 'MCP config data'; +COMMENT ON COLUMN nexent.mcp_record_t.enabled IS 'Enabled'; +COMMENT ON COLUMN nexent.mcp_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_record_t.container_port IS 'Host port bound for containerized MCP service'; -- Create a function to update the update_time column CREATE OR REPLACE FUNCTION update_mcp_record_update_time() @@ -536,6 +675,19 @@ EXECUTE FUNCTION update_mcp_record_update_time(); -- Add comment to the trigger COMMENT ON TRIGGER update_mcp_record_update_time_trigger ON nexent.mcp_record_t IS 'Trigger to call update_mcp_record_update_time function before each update on mcp_record_t table'; +-- Add indexes for common management queries +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_delete + ON nexent.mcp_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_name + ON nexent.mcp_record_t (tenant_id, mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_server + ON nexent.mcp_record_t (tenant_id, mcp_server, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tags_gin + ON nexent.mcp_record_t USING GIN (tags); + -- Create user tenant relationship table CREATE TABLE IF NOT EXISTS nexent.user_tenant_t ( user_tenant_id SERIAL PRIMARY KEY, @@ -571,6 +723,7 @@ CREATE TABLE IF NOT EXISTS nexent.ag_agent_relation_t ( parent_agent_id INTEGER, tenant_id VARCHAR(100), version_no INTEGER DEFAULT 0 NOT NULL, + selected_agent_version_no INTEGER, create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, created_by VARCHAR(100), @@ -603,6 +756,7 @@ COMMENT ON COLUMN nexent.ag_agent_relation_t.selected_agent_id IS 'Selected agen COMMENT ON COLUMN nexent.ag_agent_relation_t.parent_agent_id IS 'Parent agent ID'; COMMENT ON COLUMN nexent.ag_agent_relation_t.tenant_id IS 'Tenant ID'; COMMENT ON COLUMN nexent.ag_agent_relation_t.version_no IS 'Version number. 0 = draft/editing state, >=1 = published snapshot'; +COMMENT ON COLUMN nexent.ag_agent_relation_t.selected_agent_version_no IS 'Pinned version of selected_agent_id. NULL = use child current published version at runtime (legacy/draft).'; COMMENT ON COLUMN nexent.ag_agent_relation_t.create_time IS 'Creation time, audit field'; COMMENT ON COLUMN nexent.ag_agent_relation_t.update_time IS 'Update time, audit field'; COMMENT ON COLUMN nexent.ag_agent_relation_t.created_by IS 'Creator ID, audit field'; @@ -678,7 +832,7 @@ COMMENT ON COLUMN nexent.tenant_invitation_code_t.group_ids IS 'Associated group COMMENT ON COLUMN nexent.tenant_invitation_code_t.capacity IS 'Invitation code capacity'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.expiry_date IS 'Invitation code expiry date'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.status IS 'Invitation code status: IN_USE, EXPIRE, DISABLE, RUN_OUT'; -COMMENT ON COLUMN nexent.tenant_invitation_code_t.code_type IS 'Invitation code type: ADMIN_INVITE, DEV_INVITE, USER_INVITE'; +COMMENT ON COLUMN nexent.tenant_invitation_code_t.code_type IS 'Invitation code type: ADMIN_INVITE, DEV_INVITE, USER_INVITE, ASSET_OWNER_INVITE'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.create_time IS 'Create time'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.update_time IS 'Update time'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.created_by IS 'Created by'; @@ -959,7 +1113,42 @@ INSERT INTO nexent.role_permission_t (role_permission_id, user_role, permission_ (184, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'CREATE'), (185, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'READ'), (186, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'UPDATE'), -(187, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'DELETE'); +(187, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'DELETE'), +(188, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'CREATE'), +(189, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'READ'), +(190, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'UPDATE'), +(191, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'DELETE'), +(192, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/'), +(193, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/agents'), +(194, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/knowledges'), +(195, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/chat'), +(196, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/space'), +(197, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/market'), +(198, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/models'), +(199, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'CREATE'), +(200, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'READ'), +(201, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'UPDATE'), +(202, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'DELETE'), +(203, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'CREATE'), +(204, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'READ'), +(205, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'UPDATE'), +(206, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'DELETE'), +(207, 'ASSET_OWNER', 'RESOURCE', 'KB', 'CREATE'), +(208, 'ASSET_OWNER', 'RESOURCE', 'KB', 'READ'), +(209, 'ASSET_OWNER', 'RESOURCE', 'KB', 'UPDATE'), +(210, 'ASSET_OWNER', 'RESOURCE', 'KB', 'DELETE'), +(211, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'CREATE'), +(212, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'READ'), +(213, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'UPDATE'), +(214, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'DELETE'), +(215, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'CREATE'), +(216, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'READ'), +(217, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'UPDATE'), +(218, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'DELETE'), +(219, 'ASSET_OWNER', 'RESOURCE', 'USER.ROLE', 'READ'), +(220, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/users'), +(221, 'SU', 'VISIBILITY', 'LEFT_NAV_MENU', '/asset-owner-resources') +; -- Insert SPEED role user into user_tenant_t table if not exists INSERT INTO nexent.user_tenant_t (user_id, tenant_id, user_role, user_email, created_by, updated_by) @@ -977,6 +1166,7 @@ CREATE TABLE IF NOT EXISTS nexent.ag_tenant_agent_version_t ( source_version_no INTEGER NULL, source_type VARCHAR(30) NULL, status VARCHAR(30) DEFAULT 'RELEASED', + is_a2a BOOLEAN DEFAULT FALSE, created_by VARCHAR(100) NOT NULL, create_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, updated_by VARCHAR(100), @@ -1003,6 +1193,7 @@ COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.release_note IS 'Release note COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.source_version_no IS 'Source version number. If this version is a rollback, record the source version number.'; COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.source_type IS 'Source type: NORMAL (normal publish) / ROLLBACK (rollback and republish).'; COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.status IS 'Version status: RELEASED / DISABLED / ARCHIVED'; +COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.is_a2a IS 'Whether this version is published as an A2A Server agent'; COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.created_by IS 'User who published this version'; COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.create_time IS 'Version creation timestamp'; COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.updated_by IS 'Last user who updated this version'; @@ -1072,10 +1263,12 @@ COMMENT ON COLUMN nexent.user_token_usage_log_t.delete_flag IS 'Soft delete flag CREATE TABLE IF NOT EXISTS nexent.ag_skill_info_t ( skill_id SERIAL4 PRIMARY KEY NOT NULL, skill_name VARCHAR(100) NOT NULL, + tenant_id VARCHAR(100), skill_description VARCHAR(1000), skill_tags JSON, skill_content TEXT, - params JSON, + config_schemas JSON, + config_values JSON, source VARCHAR(30) DEFAULT 'official', created_by VARCHAR(100), create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, @@ -1091,11 +1284,13 @@ COMMENT ON TABLE nexent.ag_skill_info_t IS 'Skill information table for managing -- Add comments to the columns COMMENT ON COLUMN nexent.ag_skill_info_t.skill_id IS 'Skill ID, unique primary key'; -COMMENT ON COLUMN nexent.ag_skill_info_t.skill_name IS 'Skill name, globally unique'; +COMMENT ON COLUMN nexent.ag_skill_info_t.skill_name IS 'Skill name, unique within tenant'; +COMMENT ON COLUMN nexent.ag_skill_info_t.tenant_id IS 'Tenant ID for multi-tenancy. NULL for pre-existing skills.'; COMMENT ON COLUMN nexent.ag_skill_info_t.skill_description IS 'Skill description text'; COMMENT ON COLUMN nexent.ag_skill_info_t.skill_tags IS 'Skill tags stored as JSON array'; COMMENT ON COLUMN nexent.ag_skill_info_t.skill_content IS 'Skill content or prompt text'; -COMMENT ON COLUMN nexent.ag_skill_info_t.params IS 'Skill configuration parameters stored as JSON object'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_schemas IS 'Parameter metadata from config/schema.yaml'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_values IS 'Runtime parameter values from config/config.yaml'; COMMENT ON COLUMN nexent.ag_skill_info_t.source IS 'Skill source: official, custom, or partner'; COMMENT ON COLUMN nexent.ag_skill_info_t.created_by IS 'Creator ID'; COMMENT ON COLUMN nexent.ag_skill_info_t.create_time IS 'Creation timestamp'; @@ -1141,6 +1336,8 @@ CREATE TABLE IF NOT EXISTS nexent.ag_skill_instance_t ( tenant_id VARCHAR(100), enabled BOOLEAN DEFAULT TRUE, version_no INTEGER DEFAULT 0 NOT NULL, + config_values JSON, + config_schemas JSON, created_by VARCHAR(100), create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_by VARCHAR(100), @@ -1162,6 +1359,8 @@ COMMENT ON COLUMN nexent.ag_skill_instance_t.user_id IS 'User ID'; COMMENT ON COLUMN nexent.ag_skill_instance_t.tenant_id IS 'Tenant ID'; COMMENT ON COLUMN nexent.ag_skill_instance_t.enabled IS 'Whether this skill is enabled for the agent'; COMMENT ON COLUMN nexent.ag_skill_instance_t.version_no IS 'Version number. 0 = draft/editing state, >=1 = published snapshot'; +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_values IS 'Per-agent runtime parameter values from config/config.yaml'; +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_schemas IS 'Per-agent parameter schema overrides from config/schema.yaml'; COMMENT ON COLUMN nexent.ag_skill_instance_t.created_by IS 'Creator ID'; COMMENT ON COLUMN nexent.ag_skill_instance_t.create_time IS 'Creation timestamp'; COMMENT ON COLUMN nexent.ag_skill_instance_t.updated_by IS 'Last updater ID'; @@ -1302,6 +1501,9 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_external_agent_t ( nacos_config_id VARCHAR(64), nacos_agent_name VARCHAR(255), + -- Base URL for infrastructure health checks + base_url VARCHAR(512), + -- Tenant isolation tenant_id VARCHAR(100) NOT NULL, created_by VARCHAR(100) NOT NULL, @@ -1348,6 +1550,7 @@ COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.last_check_result IS 'Last heal COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.create_time IS 'Record creation timestamp'; COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.update_time IS 'Record last update timestamp'; COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.delete_flag IS 'Soft delete flag: Y/N'; -- NOSONAR +COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.base_url IS 'Base URL for health checks (service root address)'; CREATE TABLE IF NOT EXISTS nexent.ag_a2a_external_agent_relation_t ( @@ -1361,8 +1564,7 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_external_agent_relation_t ( create_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, delete_flag VARCHAR(1) DEFAULT 'N', - CONSTRAINT uq_local_external_agent UNIQUE (local_agent_id, external_agent_id), - CONSTRAINT fk_external_agent FOREIGN KEY (external_agent_id) REFERENCES nexent.ag_a2a_external_agent_t(id) + CONSTRAINT uq_local_external_agent UNIQUE (local_agent_id, external_agent_id) ); ALTER TABLE nexent.ag_a2a_external_agent_relation_t OWNER TO "root"; @@ -1472,9 +1674,7 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_message_t ( extensions JSONB, -- Extension URI list reference_task_ids JSONB, -- Referenced task IDs array create_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, - UNIQUE(task_id, message_index), - CONSTRAINT ag_a2a_message_t_task_id_fk FOREIGN KEY (task_id) - REFERENCES nexent.ag_a2a_task_t(id) ON DELETE CASCADE + UNIQUE(task_id, message_index) ); ALTER TABLE nexent.ag_a2a_message_t OWNER TO "root"; @@ -1500,8 +1700,6 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_artifact_t ( meta_data JSONB, -- Metadata extensions JSONB, -- Extension URI list create_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, - CONSTRAINT fk_artifact_task FOREIGN KEY (task_id) - REFERENCES nexent.ag_a2a_task_t(id) ON DELETE CASCADE, UNIQUE(task_id, artifact_id) ); @@ -1517,3 +1715,225 @@ COMMENT ON COLUMN nexent.ag_a2a_artifact_t.parts IS 'Artifact parts following A2 COMMENT ON COLUMN nexent.ag_a2a_artifact_t.meta_data IS 'Artifact metadata'; COMMENT ON COLUMN nexent.ag_a2a_artifact_t.extensions IS 'Extension URI list'; COMMENT ON COLUMN nexent.ag_a2a_artifact_t.create_time IS 'Artifact creation timestamp'; + +-- Create the model_monitoring_record_t table for LLM performance metrics +CREATE TABLE IF NOT EXISTS nexent.model_monitoring_record_t ( + monitoring_id SERIAL PRIMARY KEY, + model_id INT4, + model_name VARCHAR(100) NOT NULL, + model_type VARCHAR(20) DEFAULT 'llm', + agent_id INT4, + agent_name VARCHAR(100), + conversation_id INT4, + tenant_id VARCHAR(100) NOT NULL, + user_id VARCHAR(100), + display_name VARCHAR(100), + request_duration_ms INT4, + ttft_ms INT4, + input_tokens INT4, + output_tokens INT4, + total_tokens INT4, + generation_rate FLOAT, + is_streaming BOOLEAN DEFAULT FALSE, + is_success BOOLEAN DEFAULT TRUE, + is_error BOOLEAN DEFAULT FALSE, + error_type VARCHAR(50), + error_message TEXT, + retry_count INT4 DEFAULT 0, + operation VARCHAR(50), + create_time TIMESTAMP DEFAULT NOW(), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.model_monitoring_record_t OWNER TO "root"; + +COMMENT ON TABLE nexent.model_monitoring_record_t IS 'Per-request LLM performance metrics for model monitoring'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.monitoring_id IS 'Monitoring record ID, unique primary key'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.model_id IS 'Foreign key to model_record_t.model_id'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.model_name IS 'Model identifier (repo/name format)'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.model_type IS 'Model type: llm, vlm, embedding, multi_embedding, rerank'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.agent_id IS 'Agent ID that initiated the request'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.agent_name IS 'Agent display name'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.conversation_id IS 'Conversation ID associated with the request'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.tenant_id IS 'Tenant ID for multi-tenancy isolation'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.user_id IS 'User ID who initiated the request'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.display_name IS 'Human-readable model display name'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.request_duration_ms IS 'Total request duration in milliseconds'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.ttft_ms IS 'Time to first token in milliseconds (streaming only)'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.input_tokens IS 'Number of input prompt tokens'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.output_tokens IS 'Number of output completion tokens'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.total_tokens IS 'Total tokens (input + output)'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.generation_rate IS 'Token generation rate in tokens per second'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.is_streaming IS 'Whether the request used streaming response'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.is_success IS 'Whether the request completed successfully'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.is_error IS 'Whether the request resulted in an error'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.error_type IS 'Error exception class name'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.error_message IS 'Error message text'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.retry_count IS 'Number of retry attempts'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.operation IS 'Operation type: chat_completion, title_generation, connectivity_check, embedding_call, system_prompt_generation'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.create_time IS 'Record creation timestamp'; +COMMENT ON COLUMN nexent.model_monitoring_record_t.delete_flag IS 'Soft delete flag: Y/N'; + +CREATE INDEX IF NOT EXISTS ix_monitoring_model_id ON nexent.model_monitoring_record_t (model_id); +CREATE INDEX IF NOT EXISTS ix_monitoring_tenant_id ON nexent.model_monitoring_record_t (tenant_id); +CREATE INDEX IF NOT EXISTS ix_monitoring_agent_id ON nexent.model_monitoring_record_t (agent_id); +CREATE INDEX IF NOT EXISTS ix_monitoring_create_time ON nexent.model_monitoring_record_t (create_time); +CREATE INDEX IF NOT EXISTS ix_monitoring_is_error ON nexent.model_monitoring_record_t (is_error); +CREATE INDEX IF NOT EXISTS ix_monitoring_model_type ON nexent.model_monitoring_record_t (model_type); +CREATE INDEX IF NOT EXISTS ix_monitoring_model_time ON nexent.model_monitoring_record_t (model_id, create_time); + +-- Create user OAuth account table for third-party login (GitHub, WeChat, etc.) +CREATE TABLE IF NOT EXISTS nexent.user_oauth_account_t ( + oauth_account_id SERIAL PRIMARY KEY, + user_id VARCHAR(100) NOT NULL, + provider VARCHAR(30) NOT NULL, + provider_user_id VARCHAR(200) NOT NULL, + provider_email VARCHAR(255), + provider_username VARCHAR(200), + tenant_id VARCHAR(100), + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW(), + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW(), + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag CHAR(1) DEFAULT 'N', + CONSTRAINT uq_oauth_provider_user UNIQUE (provider, provider_user_id) +); + +ALTER TABLE nexent.user_oauth_account_t OWNER TO "root"; + +-- Create a function to update the update_time column +CREATE OR REPLACE FUNCTION update_user_oauth_account_t_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- Create a trigger to call the function before each update +CREATE TRIGGER update_user_oauth_account_t_update_time_trigger +BEFORE UPDATE ON nexent.user_oauth_account_t +FOR EACH ROW +EXECUTE FUNCTION update_user_oauth_account_t_update_time(); + +-- Add comments +COMMENT ON TABLE nexent.user_oauth_account_t IS 'User OAuth account table - third-party login bindings'; +COMMENT ON COLUMN nexent.user_oauth_account_t.oauth_account_id IS 'OAuth account ID, primary key'; +COMMENT ON COLUMN nexent.user_oauth_account_t.user_id IS 'Nexent user ID (Supabase UUID)'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider IS 'OAuth provider name: github, wechat, gde, link_app'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider_user_id IS 'User ID from the OAuth provider'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider_email IS 'Email from the OAuth provider'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider_username IS 'Display name from the OAuth provider'; +COMMENT ON COLUMN nexent.user_oauth_account_t.tenant_id IS 'Tenant ID at time of linking'; +COMMENT ON COLUMN nexent.user_oauth_account_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.user_oauth_account_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.user_oauth_account_t.created_by IS 'Creator'; +COMMENT ON COLUMN nexent.user_oauth_account_t.updated_by IS 'Updater'; +COMMENT ON COLUMN nexent.user_oauth_account_t.delete_flag IS 'Whether it is deleted. Optional values: Y/N'; + +-- Create index for user_id queries +CREATE INDEX IF NOT EXISTS idx_user_oauth_account_t_user_id +ON nexent.user_oauth_account_t (user_id); + +-- mcp_community_record_t: Community MCP market table +CREATE TABLE IF NOT EXISTS nexent.mcp_community_record_t ( + community_id SERIAL PRIMARY KEY NOT NULL, + tenant_id VARCHAR(100), + user_id VARCHAR(100), + mcp_name VARCHAR(100) NOT NULL, + mcp_server VARCHAR(500) NOT NULL, + source VARCHAR(30) DEFAULT 'community', + version VARCHAR(50), + registry_json JSONB, + transport_type VARCHAR(30), + config_json JSON, + tags TEXT[], + description TEXT, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.mcp_community_record_t OWNER TO root; + +COMMENT ON TABLE nexent.mcp_community_record_t IS 'Community MCP market records, publishable from tenant MCP services'; +COMMENT ON COLUMN nexent.mcp_community_record_t.community_id IS 'Community record ID, unique primary key'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tenant_id IS 'Publisher tenant ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.user_id IS 'Publisher user ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_name IS 'MCP name'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_server IS 'MCP server URL'; +COMMENT ON COLUMN nexent.mcp_community_record_t.source IS 'Source type, fixed to community for this table'; +COMMENT ON COLUMN nexent.mcp_community_record_t.version IS 'MCP version'; +COMMENT ON COLUMN nexent.mcp_community_record_t.registry_json IS 'Full MCP server metadata JSON for discovery and quick import'; +COMMENT ON COLUMN nexent.mcp_community_record_t.transport_type IS 'Transport type: url/container'; +COMMENT ON COLUMN nexent.mcp_community_record_t.config_json IS 'Public-shareable MCP configuration JSON'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_community_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_community_record_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.created_by IS 'Creator ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.updated_by IS 'Updater ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.delete_flag IS 'Soft delete flag: Y/N'; + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tenant_delete + ON nexent.mcp_community_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_name_delete + ON nexent.mcp_community_record_t (mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_transport_delete + ON nexent.mcp_community_record_t (transport_type, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_user_delete + ON nexent.mcp_community_record_t (user_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tags_gin + ON nexent.mcp_community_record_t USING GIN (tags); + +CREATE OR REPLACE FUNCTION update_mcp_community_record_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION update_mcp_community_record_update_time() IS 'Auto-update update_time for mcp_community_record_t'; + +DROP TRIGGER IF EXISTS update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t; +CREATE TRIGGER update_mcp_community_record_update_time_trigger +BEFORE UPDATE ON nexent.mcp_community_record_t +FOR EACH ROW +EXECUTE FUNCTION update_mcp_community_record_update_time(); + +COMMENT ON TRIGGER update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t IS 'Trigger to maintain update_time'; + +CREATE TABLE IF NOT EXISTS nexent.user_cas_session_t ( + cas_session_id SERIAL PRIMARY KEY, + session_id VARCHAR(100) NOT NULL UNIQUE, + user_id VARCHAR(100) NOT NULL, + cas_user_id VARCHAR(200) NOT NULL, + cas_session_index VARCHAR(500), + status VARCHAR(30) NOT NULL DEFAULT 'active', + expires_at TIMESTAMP NOT NULL, + revoked_at TIMESTAMP, + create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +CREATE INDEX IF NOT EXISTS ix_user_cas_session_session_id + ON nexent.user_cas_session_t (session_id); +CREATE INDEX IF NOT EXISTS ix_user_cas_session_user_id + ON nexent.user_cas_session_t (user_id); +CREATE INDEX IF NOT EXISTS ix_user_cas_session_cas_user_id + ON nexent.user_cas_session_t (cas_user_id); + +COMMENT ON TABLE nexent.user_cas_session_t IS 'Server-side session records for CAS SSO login and logout synchronization'; +COMMENT ON COLUMN nexent.user_cas_session_t.session_id IS 'JWT sid claim for revocation checks'; +COMMENT ON COLUMN nexent.user_cas_session_t.cas_user_id IS 'User identifier returned by CAS'; +COMMENT ON COLUMN nexent.user_cas_session_t.cas_session_index IS 'CAS SessionIndex or service ticket'; diff --git a/docker/monitoring/grafana/dashboards/nexent-llm-agent.json b/docker/monitoring/grafana/dashboards/nexent-llm-agent.json new file mode 100644 index 000000000..d4e2c321b --- /dev/null +++ b/docker/monitoring/grafana/dashboards/nexent-llm-agent.json @@ -0,0 +1,150 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Nexent Agent traces backed by Grafana Tempo.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": true, + "tags": [], + "targetBlank": false, + "title": "Open Tempo Explore", + "tooltip": "Open Grafana Explore with the Tempo datasource", + "type": "link", + "url": "/explore?left=%7B%22datasource%22:%22Tempo%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22query%22:%22%7B%20resource.service.name%20%3D%20%5C%22nexent-backend%5C%22%20%7D%22,%22queryType%22:%22traceql%22%7D%5D%7D" + } + ], + "panels": [ + { + "datasource": { + "type": "tempo", + "uid": "Tempo" + }, + "description": "Recent traces for Nexent backend. Open a trace row to inspect the agent, chain, LLM, and tool span waterfall.", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 16, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "Tempo" + }, + "limit": 100, + "query": "{ resource.service.name = \"nexent-backend\" }", + "queryType": "traceql", + "refId": "A", + "tableType": "traces" + } + ], + "title": "Recent Agent Traces", + "type": "table" + }, + { + "description": "TraceQL shortcuts for common Nexent views.", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 2, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Service traces:\n{ resource.service.name = \"nexent-backend\" }\n\nAgent spans:\n{ resource.service.name = \"nexent-backend\" && span.openinference.span.kind = \"AGENT\" }\n\nLLM spans:\n{ resource.service.name = \"nexent-backend\" && span.openinference.span.kind = \"LLM\" }\n\nTool spans:\n{ resource.service.name = \"nexent-backend\" && span.openinference.span.kind = \"TOOL\" }\n\nError traces:\n{ resource.service.name = \"nexent-backend\" && status = error }", + "mode": "markdown" + }, + "pluginVersion": "11.0.0", + "title": "TraceQL Examples", + "type": "text" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "nexent", + "agent", + "tempo" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Nexent Agent Trace Monitoring", + "uid": "nexent-llm-agent", + "version": 1, + "weekStart": "" +} diff --git a/docker/monitoring/grafana/dashboards/nexent-llm-performance.json b/docker/monitoring/grafana/dashboards/nexent-llm-performance.json deleted file mode 100644 index ec8d0434a..000000000 --- a/docker/monitoring/grafana/dashboards/nexent-llm-performance.json +++ /dev/null @@ -1,544 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": null, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.95, rate(llm_request_duration_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "95th percentile", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.50, rate(llm_request_duration_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "50th percentile (median)", - "refId": "B" - } - ], - "title": "LLM Request Duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "tokens/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.95, rate(llm_token_generation_rate_bucket[5m]))", - "interval": "", - "legendFormat": "95th percentile", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.50, rate(llm_token_generation_rate_bucket[5m]))", - "interval": "", - "legendFormat": "50th percentile (median)", - "refId": "B" - } - ], - "title": "Token Generation Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.95, rate(llm_time_to_first_token_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "95th percentile TTFT", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.50, rate(llm_time_to_first_token_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "50th percentile TTFT", - "refId": "B" - } - ], - "title": "Time to First Token (TTFT)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "tokens" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(llm_total_tokens_total{type=\"input\"}[5m])", - "interval": "", - "legendFormat": "Input tokens/sec", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(llm_total_tokens_total{type=\"output\"}[5m])", - "interval": "", - "legendFormat": "Output tokens/sec", - "refId": "B" - } - ], - "title": "Token Throughput", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "errors/sec" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(llm_error_count_total[5m])", - "interval": "", - "legendFormat": "Error rate by model: {{model}}", - "refId": "A" - } - ], - "title": "LLM Error Rate", - "type": "timeseries" - } - ], - "refresh": "5s", - "schemaVersion": 37, - "style": "dark", - "tags": ["nexent", "llm", "performance"], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Nexent LLM Performance Dashboard", - "uid": "nexent-llm-perf", - "version": 1, - "weekStart": "" -} - diff --git a/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml b/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml index b89a1fa81..b863e9d16 100644 --- a/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml +++ b/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml @@ -1,13 +1,12 @@ apiVersion: 1 providers: - - name: 'Nexent LLM Monitoring' + - name: Nexent Monitoring orgId: 1 - folder: 'Nexent' + folder: Nexent type: file disableDeletion: false - updateIntervalSeconds: 10 + updateIntervalSeconds: 30 allowUiUpdates: true options: path: /var/lib/grafana/dashboards - diff --git a/docker/monitoring/grafana/provisioning/datasources/datasources.yml b/docker/monitoring/grafana/provisioning/datasources/datasources.yml index 9bdc40d61..d23e4cba9 100644 --- a/docker/monitoring/grafana/provisioning/datasources/datasources.yml +++ b/docker/monitoring/grafana/provisioning/datasources/datasources.yml @@ -1,16 +1,23 @@ apiVersion: 1 datasources: - - name: Prometheus - type: prometheus + - name: Tempo + uid: Tempo + type: tempo access: proxy - url: http://prometheus:9090 + url: http://nexent-tempo:3200 isDefault: true editable: true - - - name: Jaeger - type: jaeger - access: proxy - url: http://jaeger:16686 - editable: true - + basicAuth: false + jsonData: + nodeGraph: + enabled: true + search: + hide: false + traceQuery: + timeShiftEnabled: true + spanStartTimeShift: "-1h" + spanEndTimeShift: "1h" + streamingEnabled: + search: false + metrics: false diff --git a/docker/monitoring/monitoring.env b/docker/monitoring/monitoring.env deleted file mode 100644 index 2506c03a6..000000000 --- a/docker/monitoring/monitoring.env +++ /dev/null @@ -1,21 +0,0 @@ -# Telemetry and Monitoring Configuration -ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -PROMETHEUS_PORT=8000 -TELEMETRY_SAMPLE_RATE=1.0 - -# Performance monitoring thresholds -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 - -# Grafana Configuration -GF_SECURITY_ADMIN_PASSWORD=admin -GF_USERS_ALLOW_SIGN_UP=false - -# Service ports -JAEGER_UI_PORT=16686 -PROMETHEUS_UI_PORT=9090 -GRAFANA_UI_PORT=3000 -OTEL_COLLECTOR_GRPC_PORT=4317 -OTEL_COLLECTOR_HTTP_PORT=4318 diff --git a/docker/monitoring/monitoring.env.example b/docker/monitoring/monitoring.env.example index 26ab041c8..17f75a3c9 100644 --- a/docker/monitoring/monitoring.env.example +++ b/docker/monitoring/monitoring.env.example @@ -1,22 +1,72 @@ -# Telemetry and Monitoring Configuration -ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -PROMETHEUS_PORT=8000 -TELEMETRY_SAMPLE_RATE=1.0 +# Monitoring stack selector for ./start-monitoring.sh. +# Supported values: otlp, collector, phoenix, langfuse, langsmith, grafana, zipkin. +MONITORING_PROVIDER=otlp -# Performance monitoring thresholds -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 - -# Grafana Configuration -GF_SECURITY_ADMIN_PASSWORD=admin -GF_USERS_ALLOW_SIGN_UP=false - -# Service ports -JAEGER_UI_PORT=16686 -PROMETHEUS_UI_PORT=9090 -GRAFANA_UI_PORT=3000 OTEL_COLLECTOR_GRPC_PORT=4317 OTEL_COLLECTOR_HTTP_PORT=4318 +OTEL_COLLECTOR_CONFIG_FILE= +OTEL_COLLECTOR_VERSION=0.151.0 + +# Local Phoenix stack. Used by: ./start-monitoring.sh --stack phoenix +PHOENIX_VERSION=15 +PHOENIX_PORT=6006 +PHOENIX_GRPC_HOST_PORT=4319 + +# Local Langfuse stack. Used by: ./start-monitoring.sh --stack langfuse +# Defaults are for local development only. Replace secrets before production use. +LANGFUSE_VERSION=3 +LANGFUSE_PORT=3001 +LANGFUSE_NEXTAUTH_URL=http://localhost:3001 +LANGFUSE_NEXTAUTH_SECRET=nexent-langfuse-secret +LANGFUSE_SALT=nexent-langfuse-salt +LANGFUSE_ENCRYPTION_KEY=0000000000000000000000000000000000000000000000000000000000000000 +LANGFUSE_TELEMETRY_ENABLED=false +LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES=false +LANGFUSE_INIT_ORG_ID=nexent +LANGFUSE_INIT_ORG_NAME=Nexent +LANGFUSE_INIT_PROJECT_ID=nexent +LANGFUSE_INIT_PROJECT_NAME=Nexent +LANGFUSE_INIT_PROJECT_PUBLIC_KEY=pk-lf-nexent-local +LANGFUSE_INIT_PROJECT_SECRET_KEY=sk-lf-nexent-local +LANGFUSE_INIT_USER_EMAIL=admin@nexent.com +LANGFUSE_INIT_USER_NAME=admin +LANGFUSE_INIT_USER_PASSWORD=nexent@4321 +LANGFUSE_OTLP_AUTH_HEADER= +LANGFUSE_POSTGRES_VERSION=15-alpine +LANGFUSE_POSTGRES_USER=postgres +LANGFUSE_POSTGRES_PASSWORD=nexent@4321 +LANGFUSE_POSTGRES_DB=postgres +LANGFUSE_POSTGRES_PORT=5440 +LANGFUSE_CLICKHOUSE_VERSION=26.3-alpine +LANGFUSE_CLICKHOUSE_USER=clickhouse +LANGFUSE_CLICKHOUSE_PASSWORD=clickhouse +LANGFUSE_CLICKHOUSE_HTTP_PORT=8124 +LANGFUSE_CLICKHOUSE_NATIVE_PORT=9002 +LANGFUSE_MINIO_VERSION=RELEASE.2023-12-20T01-00-02Z +LANGFUSE_MINIO_ROOT_USER=minio +LANGFUSE_MINIO_ROOT_PASSWORD=miniosecret +LANGFUSE_MINIO_API_PORT=9092 +LANGFUSE_MINIO_CONSOLE_PORT=9093 +LANGFUSE_S3_BUCKET=langfuse +LANGFUSE_REDIS_AUTH=myredissecret +LANGFUSE_REDIS_VERSION=alpine +LANGFUSE_REDIS_PORT=6380 + +# Online LangSmith forwarding. Used by: ./start-monitoring.sh --stack langsmith +# LangSmith currently ingests OTLP traces. Metrics remain in the Collector debug pipeline. +LANGSMITH_API_KEY= +LANGSMITH_PROJECT=nexent +LANGSMITH_OTLP_TRACES_ENDPOINT=https://api.smith.langchain.com/otel/v1/traces + +# Local Grafana stack. Used by: ./start-monitoring.sh --stack grafana +GRAFANA_VERSION=12.4 +GRAFANA_PORT=3002 +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=nexent@4321 +GRAFANA_DEFAULT_LANGUAGE=zh-Hans +TEMPO_VERSION=2.10.5 +TEMPO_PORT=3200 +# Local Zipkin stack. Used by: ./start-monitoring.sh --stack zipkin +ZIPKIN_VERSION=latest +ZIPKIN_PORT=9411 diff --git a/docker/monitoring/otel-collector-config.yml b/docker/monitoring/otel-collector-config.yml index f14f427b5..8d2332361 100644 --- a/docker/monitoring/otel-collector-config.yml +++ b/docker/monitoring/otel-collector-config.yml @@ -5,22 +5,16 @@ receivers: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 - - # Prometheus receiver to collect metrics from instrumented apps - prometheus: - config: - scrape_configs: - - job_name: 'nexent-backend-otel' - static_configs: - - targets: ['host.docker.internal:8000'] - scrape_interval: 5s processors: batch: timeout: 1s send_batch_size: 512 - - # Resource processor to add common attributes + + memory_limiter: + limit_mib: 256 + check_interval: 1s + resource: attributes: - key: service.name @@ -30,51 +24,71 @@ processors: from_attribute: version action: insert - # Memory limiter to prevent OOM - memory_limiter: - limit_mib: 256 - check_interval: 1s - - # Add attributes specifically for LLM monitoring - attributes: - actions: - - key: llm.system - value: openai - action: insert - - key: deployment.environment - value: development - action: insert - exporters: - # Export traces to Jaeger via OTLP - otlp/jaeger: - endpoint: jaeger:14250 - tls: - insecure: true - - # Export metrics to Prometheus - prometheus: - endpoint: "0.0.0.0:8889" - resource_to_telemetry_conversion: - enabled: true - - # Logging exporter for debugging - logging: + debug: verbosity: normal service: - extensions: [] pipelines: traces: receivers: [otlp] processors: [memory_limiter, resource, batch] - exporters: [otlp/jaeger, logging] - + exporters: [debug] + metrics: - receivers: [otlp, prometheus] - processors: [memory_limiter, resource, attributes, batch] - exporters: [prometheus, logging] - + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + telemetry: logs: level: "info" + +# Example configurations for AI observability platforms: +# +# === Arize Phoenix === +# Set environment variables: +# OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +# OTEL_EXPORTER_OTLP_AUTHORIZATION=Bearer YOUR_PHOENIX_API_KEY +# OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +# +# Or configure directly in exporters section: +# otlphttp/arize: +# endpoint: https://app.phoenix.arize.com/s/YOUR_SPACE +# headers: +# Authorization: Bearer YOUR_PHOENIX_API_KEY +# Then add otlphttp/arize to the traces pipeline exporters. +# +# === Langfuse === +# Set environment variables: +# OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel +# OTEL_EXPORTER_OTLP_AUTHORIZATION=Basic BASE64_ENCODED_KEY +# OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 +# +# Where BASE64_ENCODED_KEY = base64(public_key:secret_key) +# +# Or configure directly: +# otlphttp/langfuse: +# endpoint: https://cloud.langfuse.com/api/public/otel +# headers: +# Authorization: Basic BASE64_ENCODED_KEY +# x-langfuse-ingestion-version: "4" +# Then add otlphttp/langfuse to the traces pipeline exporters. +# +# === LangSmith === +# Set environment variables: +# LANGSMITH_API_KEY=lsv2_... +# LANGSMITH_PROJECT=nexent +# +# Or configure directly: +# otlphttp/langsmith: +# traces_endpoint: https://api.smith.langchain.com/otel/v1/traces +# headers: +# x-api-key: YOUR_LANGSMITH_API_KEY +# Langsmith-Project: nexent +# Then add otlphttp/langsmith to the traces pipeline exporters. +# +# === Multiple Exporters === +# To export to multiple backends simultaneously, create multiple exporters +# and add them to the pipelines: +# exporters: [otlphttp/arize, otlphttp/langfuse, otlphttp/langsmith, debug] diff --git a/docker/monitoring/otel-collector-grafana-config.yml b/docker/monitoring/otel-collector-grafana-config.yml new file mode 100644 index 000000000..d69e69811 --- /dev/null +++ b/docker/monitoring/otel-collector-grafana-config.yml @@ -0,0 +1,50 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlp/tempo, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-langfuse-config.yml b/docker/monitoring/otel-collector-langfuse-config.yml new file mode 100644 index 000000000..9304d93e9 --- /dev/null +++ b/docker/monitoring/otel-collector-langfuse-config.yml @@ -0,0 +1,69 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlphttp/langfuse: + endpoint: http://langfuse-web:3000/api/public/otel + headers: + Authorization: ${env:LANGFUSE_OTLP_AUTH_HEADER} + x-langfuse-ingestion-version: "4" + # 1. 超时控制 (Timeout) + # 防止 Collector 等待太久导致协程暴涨 + timeout: 5s + + # 2. 发送队列 (Sending Queue) + # 当后端处理变慢时,把数据先缓存在 Collector 内存中 + sending_queue: + enabled: true + num_consumers: 10 # 并发发送的工作线程数(可提升发送吞吐量) + queue_size: 5000 # 队列最大可容纳的批次数。如果队列满了,新来的数据将被丢弃! + + # 3. 失败重试 (Retry on Failure) + # 遇到网络抖动或后端返回 503 等临时性错误时,进行指数退避重试 + retry_on_failure: + enabled: true + initial_interval: 1s # 第一次重试间隔 1s + max_interval: 30s # 最大重试间隔不超过 30s + max_elapsed_time: 300s # 一条数据最多重试 5 分钟,超过则彻底放弃并丢弃 + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlphttp/langfuse, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-langsmith-config.yml b/docker/monitoring/otel-collector-langsmith-config.yml new file mode 100644 index 000000000..28222c1cf --- /dev/null +++ b/docker/monitoring/otel-collector-langsmith-config.yml @@ -0,0 +1,63 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlphttp/langsmith: + traces_endpoint: ${env:LANGSMITH_OTLP_TRACES_ENDPOINT} + headers: + x-api-key: ${env:LANGSMITH_API_KEY} + Langsmith-Project: ${env:LANGSMITH_PROJECT} + timeout: 10s + + sending_queue: + enabled: true + num_consumers: 10 + queue_size: 5000 + + retry_on_failure: + enabled: true + initial_interval: 1s + max_interval: 30s + max_elapsed_time: 300s + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlphttp/langsmith, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-phoenix-config.yml b/docker/monitoring/otel-collector-phoenix-config.yml new file mode 100644 index 000000000..0682a6e4d --- /dev/null +++ b/docker/monitoring/otel-collector-phoenix-config.yml @@ -0,0 +1,66 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlphttp/phoenix: + endpoint: http://phoenix:6006 + # 1. 超时控制 (Timeout) + # 防止 Collector 等待太久导致协程暴涨 + timeout: 5s + + # 2. 发送队列 (Sending Queue) + # 当后端处理变慢时,把数据先缓存在 Collector 内存中 + sending_queue: + enabled: true + num_consumers: 10 # 并发发送的工作线程数(可提升发送吞吐量) + queue_size: 5000 # 队列最大可容纳的批次数。如果队列满了,新来的数据将被丢弃! + + # 3. 失败重试 (Retry on Failure) + # 遇到网络抖动或后端返回 503 等临时性错误时,进行指数退避重试 + retry_on_failure: + enabled: true + initial_interval: 1s # 第一次重试间隔 1s + max_interval: 30s # 最大重试间隔不超过 30s + max_elapsed_time: 300s # 一条数据最多重试 5 分钟,超过则彻底放弃并丢弃 + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlphttp/phoenix, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-zipkin-config.yml b/docker/monitoring/otel-collector-zipkin-config.yml new file mode 100644 index 000000000..ab26a84a9 --- /dev/null +++ b/docker/monitoring/otel-collector-zipkin-config.yml @@ -0,0 +1,49 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + zipkin: + endpoint: http://zipkin:9411/api/v2/spans + format: proto + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [zipkin, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" \ No newline at end of file diff --git a/docker/monitoring/prometheus.yml b/docker/monitoring/prometheus.yml deleted file mode 100644 index 49258c097..000000000 --- a/docker/monitoring/prometheus.yml +++ /dev/null @@ -1,39 +0,0 @@ -global: - scrape_interval: 15s - evaluation_interval: 15s - -rule_files: - # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. - - "nexent_alerts.yml" - -scrape_configs: - # Nexent Backend - LLM Metrics - - job_name: 'nexent-backend' - static_configs: - - targets: ['host.docker.internal:8000'] # Adjust based on your backend service - scrape_interval: 15s - metrics_path: /metrics - scrape_timeout: 10s - - # OpenTelemetry Collector - - job_name: 'otel-collector' - static_configs: - - targets: ['otel-collector:8888'] - scrape_interval: 10s - - # Prometheus self-monitoring - - job_name: 'prometheus' - static_configs: - - targets: ['localhost:9090'] - - # Jaeger Metrics - - job_name: 'jaeger' - static_configs: - - targets: ['jaeger:14269'] - -# Alertmanager configuration (optional) -# alerting: -# alertmanagers: -# - static_configs: -# - targets: -# - alertmanager:9093 diff --git a/docker/monitoring/tempo.yml b/docker/monitoring/tempo.yml new file mode 100644 index 000000000..414ea42b9 --- /dev/null +++ b/docker/monitoring/tempo.yml @@ -0,0 +1,43 @@ +target: all +multitenancy_enabled: false +stream_over_http_enabled: true + +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +metrics_generator: + ring: + kvstore: + store: inmemory + storage: + path: /var/tempo/generator/wal + remote_write: [] + traces_storage: + path: /var/tempo/generator/traces + processor: + local_blocks: + filter_server_spans: false + flush_to_storage: true + +storage: + trace: + backend: local + wal: + path: /var/tempo/wal + local: + path: /var/tempo/blocks + +overrides: + defaults: + metrics_generator: + processors: + - local-blocks diff --git a/docker/official-skills-zip/analyze-image.zip b/docker/official-skills-zip/analyze-image.zip new file mode 100644 index 000000000..9ec4c2fb1 Binary files /dev/null and b/docker/official-skills-zip/analyze-image.zip differ diff --git a/docker/official-skills-zip/analyze-text-file.zip b/docker/official-skills-zip/analyze-text-file.zip new file mode 100644 index 000000000..8c4478872 Binary files /dev/null and b/docker/official-skills-zip/analyze-text-file.zip differ diff --git a/docker/official-skills-zip/create-docx.zip b/docker/official-skills-zip/create-docx.zip new file mode 100644 index 000000000..aa53e82b0 Binary files /dev/null and b/docker/official-skills-zip/create-docx.zip differ diff --git a/docker/official-skills-zip/create-file-directory.zip b/docker/official-skills-zip/create-file-directory.zip new file mode 100644 index 000000000..1e2d21ef0 Binary files /dev/null and b/docker/official-skills-zip/create-file-directory.zip differ diff --git a/docker/official-skills-zip/delete-file-directory.zip b/docker/official-skills-zip/delete-file-directory.zip new file mode 100644 index 000000000..0f0067d02 Binary files /dev/null and b/docker/official-skills-zip/delete-file-directory.zip differ diff --git a/docker/official-skills-zip/email-utils.zip b/docker/official-skills-zip/email-utils.zip new file mode 100644 index 000000000..c708a252c Binary files /dev/null and b/docker/official-skills-zip/email-utils.zip differ diff --git a/docker/official-skills-zip/list-directory.zip b/docker/official-skills-zip/list-directory.zip new file mode 100644 index 000000000..e3eaeba27 Binary files /dev/null and b/docker/official-skills-zip/list-directory.zip differ diff --git a/docker/official-skills-zip/move-file-directory.zip b/docker/official-skills-zip/move-file-directory.zip new file mode 100644 index 000000000..d01897231 Binary files /dev/null and b/docker/official-skills-zip/move-file-directory.zip differ diff --git a/docker/official-skills-zip/read-file.zip b/docker/official-skills-zip/read-file.zip new file mode 100644 index 000000000..b394c2b38 Binary files /dev/null and b/docker/official-skills-zip/read-file.zip differ diff --git a/docker/official-skills-zip/run-shell-ssh.zip b/docker/official-skills-zip/run-shell-ssh.zip new file mode 100644 index 000000000..868eee7c5 Binary files /dev/null and b/docker/official-skills-zip/run-shell-ssh.zip differ diff --git a/docker/official-skills-zip/search-datamate.zip b/docker/official-skills-zip/search-datamate.zip new file mode 100644 index 000000000..0cb18ded6 Binary files /dev/null and b/docker/official-skills-zip/search-datamate.zip differ diff --git a/docker/official-skills-zip/search-dify.zip b/docker/official-skills-zip/search-dify.zip new file mode 100644 index 000000000..2bd7c8ccf Binary files /dev/null and b/docker/official-skills-zip/search-dify.zip differ diff --git a/docker/official-skills-zip/search-idata.zip b/docker/official-skills-zip/search-idata.zip new file mode 100644 index 000000000..85a7e1b72 Binary files /dev/null and b/docker/official-skills-zip/search-idata.zip differ diff --git a/docker/official-skills-zip/search-knowledge-base.zip b/docker/official-skills-zip/search-knowledge-base.zip new file mode 100644 index 000000000..48fabec2a Binary files /dev/null and b/docker/official-skills-zip/search-knowledge-base.zip differ diff --git a/docker/official-skills-zip/search-web-exa.zip b/docker/official-skills-zip/search-web-exa.zip new file mode 100644 index 000000000..19c209588 Binary files /dev/null and b/docker/official-skills-zip/search-web-exa.zip differ diff --git a/docker/official-skills-zip/search-web-linkup.zip b/docker/official-skills-zip/search-web-linkup.zip new file mode 100644 index 000000000..4657bc165 Binary files /dev/null and b/docker/official-skills-zip/search-web-linkup.zip differ diff --git a/docker/official-skills-zip/search-web-tavily.zip b/docker/official-skills-zip/search-web-tavily.zip new file mode 100644 index 000000000..628f73ef6 Binary files /dev/null and b/docker/official-skills-zip/search-web-tavily.zip differ diff --git a/docker/scripts/sync_skill_directory.py b/docker/scripts/sync_skill_directory.py new file mode 100644 index 000000000..d5819d251 --- /dev/null +++ b/docker/scripts/sync_skill_directory.py @@ -0,0 +1,659 @@ +#!/usr/bin/env python3 +""" +Skills Directory Migration Script for v2.2.0 upgrade. + +This script migrates skills from the legacy flat directory structure to +tenant-isolated directories. + +Migration: + FROM: ${ROOT_DIR}/skills/ (flat directory, skills directly under skills/) + TO: ${ROOT_DIR}/skills/{tenant_id}/ + +The tenant_id is determined by querying user_tenant_t for the first record +where user_role = 'ADMIN'. + +Usage (run on host machine): + python sync_skill_directory.py [--dry-run] + +Options: + --dry-run: Show what would be migrated without making changes + --verbose: Enable verbose debug output +""" + +import os +import sys +import argparse +import logging +import shutil +import subprocess +import base64 +import tempfile +from pathlib import Path +from typing import Optional + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Constants +CONTAINER_NAME = "nexent-config" +DEFAULT_TENANT_ID = "tenant_id" + + +def get_env(key: str, default: str = "") -> str: + """Get environment variable with optional default.""" + return os.environ.get(key, default) + + +def load_environment_from_host(): + """ + Load environment variables from host .env file. + Looks for .env in the same directory as this script's parent (docker/). + """ + script_dir = Path(__file__).resolve().parent + docker_dir = script_dir.parent + env_file = docker_dir / ".env" + + if env_file.is_file(): + logger.info(f"Loading environment from: {env_file}") + with open(env_file, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, _, value = line.partition('=') + key = key.strip() + value = value.strip().strip('"').strip("'") + if key and key not in os.environ: + os.environ[key] = value + return True + else: + logger.warning(f".env file not found at: {env_file}") + logger.info("Will use existing environment variables or defaults") + return False + + +def get_root_dir() -> str: + """Get ROOT_DIR from environment, normalized for the current OS.""" + root_dir = get_env("ROOT_DIR") + if not root_dir: + script_dir = Path(__file__).resolve().parent + docker_dir = script_dir.parent + env_file = docker_dir / ".env" + if env_file.is_file(): + with open(env_file, 'r') as f: + for line in f: + if line.startswith("ROOT_DIR="): + root_dir = line.split("=", 1)[1].strip().strip('"').strip("'") + break + + # Normalize path separators for current OS + if root_dir: + root_dir = str(Path(root_dir)) + return root_dir + + +def check_container_running(): + """Check if nexent-config container is running.""" + try: + result = subprocess.run( + ['docker', 'ps', '--format', '{{.Names}}'], + capture_output=True, + text=True, + timeout=10 + ) + + if result.returncode == 0: + containers = result.stdout.strip().split('\n') + if CONTAINER_NAME in containers: + logger.info(f"Container '{CONTAINER_NAME}' is running") + return True + else: + logger.error(f"Container '{CONTAINER_NAME}' is not running") + logger.info("Please start the containers with: cd docker && docker compose up -d") + return False + else: + logger.error("Could not query Docker containers") + return False + except FileNotFoundError: + logger.error("Docker not available on this system") + return False + except Exception as e: + logger.error(f"Error checking Docker containers: {e}") + return False + + +def exec_python_in_container(python_code: str) -> tuple: + """ + Execute Python code inside the container using base64 encoding. + + This approach avoids shell escaping issues by encoding the Python code + as base64 and decoding it inside the container. + + Args: + python_code: Python code to execute inside the container + + Returns: + Tuple of (return_code, stdout, stderr) + """ + # Encode Python code as base64 + encoded = base64.b64encode(python_code.encode('utf-8')).decode('ascii') + + # Create the shell command that decodes and executes the Python code + shell_cmd = f'python3 -c "import base64, sys; exec(base64.b64decode(sys.stdin.read()).decode(\'utf-8\'))"' + + try: + # Use stdin for the base64 data + full_cmd = ['docker', 'exec', '-i', CONTAINER_NAME, 'sh', '-c', shell_cmd] + result = subprocess.run( + full_cmd, + input=encoded, + capture_output=True, + text=True, + timeout=30 + ) + return result.returncode, result.stdout, result.stderr + except subprocess.TimeoutExpired: + logger.error("Command timed out") + return -1, "", "Command timed out" + except Exception as e: + logger.error(f"Failed to execute command in container: {e}") + return -1, "", str(e) + + +def test_postgres_connection_in_container() -> bool: + """ + Test PostgreSQL connection from inside the container using Python. + + Returns: + True if connection successful, False otherwise + """ + logger.info("Testing PostgreSQL connection from inside container...") + + python_code = ''' +import os +import sys +try: + import psycopg2 + conn = psycopg2.connect( + host=os.getenv('POSTGRES_HOST', 'nexent-postgresql'), + port=os.getenv('POSTGRES_PORT', '5432'), + database=os.getenv('POSTGRES_DB', 'nexent'), + user=os.getenv('POSTGRES_USER', 'nexent'), + password=os.getenv('NEXENT_POSTGRES_PASSWORD', '') + ) + conn.close() + print("Connection successful") + sys.exit(0) +except Exception as e: + print(f"Connection failed: {e}", file=sys.stderr) + sys.exit(1) +''' + + returncode, stdout, stderr = exec_python_in_container(python_code) + + if returncode == 0: + logger.info("PostgreSQL connection test: SUCCESS") + return True + else: + logger.warning(f"PostgreSQL connection test failed: {stderr.strip()}") + return False + + +def get_admin_tenant_id_in_container() -> Optional[str]: + """ + Get tenant_id from the first user_tenant_t record where user_role = 'ADMIN'. + + Executes the query inside the container using Python. + + Returns: + tenant_id string or None if not found + """ + logger.info("Querying admin tenant_id from inside container...") + + python_code = ''' +import os +import sys + +try: + import psycopg2 + + conn = psycopg2.connect( + host=os.getenv('POSTGRES_HOST', 'nexent-postgresql'), + port=os.getenv('POSTGRES_PORT', '5432'), + database=os.getenv('POSTGRES_DB', 'nexent'), + user=os.getenv('POSTGRES_USER', 'nexent'), + password=os.getenv('NEXENT_POSTGRES_PASSWORD', '') + ) + + cur = conn.cursor() + cur.execute(""" + SELECT tenant_id + FROM nexent.user_tenant_t + WHERE user_role = 'ADMIN' + AND delete_flag = 'N' + AND tenant_id IS NOT NULL + AND tenant_id != '' + ORDER BY user_tenant_id ASC + LIMIT 1 + """) + + result = cur.fetchone() + cur.close() + conn.close() + + if result: + print(result[0]) + sys.exit(0) + else: + print("No ADMIN user found", file=sys.stderr) + sys.exit(1) + +except Exception as e: + print(f"Query failed: {e}", file=sys.stderr) + sys.exit(1) +''' + + returncode, stdout, stderr = exec_python_in_container(python_code) + + if returncode == 0: + tenant_id = stdout.strip() + if tenant_id: + logger.info(f"Found ADMIN tenant_id: {tenant_id}") + return tenant_id + else: + logger.warning("No user with user_role='ADMIN' found in user_tenant_t") + return None + else: + logger.error(f"Failed to query admin tenant_id: {stderr.strip()}") + return None + + +def discover_legacy_skills_dir(root_dir: str) -> str: + """ + Discover the legacy skills directory. + + The legacy skills are located in the old nexent folder (sibling to nexent-data). + The new skills base is under {root_dir}/skills/{tenant_id}. + + Legacy path: {root_dir}/../nexent/skills (old nexent folder) + New base: {root_dir}/skills + + Returns: + Path to the legacy skills directory (normalized for current OS) + """ + candidates = [] + if root_dir: + # Legacy path FIRST: check old nexent folder (nexent-data's sibling) + # This is the actual source of legacy skills + root_path = Path(root_dir) + legacy_candidate = root_path.parent / "nexent" / "skills" + candidates.append(str(legacy_candidate)) + # New base path (NOT the legacy, this is the destination base) + candidates.append(str(Path(root_dir) / "skills")) + candidates.append("skills") + candidates.append("./skills") + + for candidate in candidates: + if Path(candidate).is_dir(): + logger.info(f"Found legacy skills directory: {candidate}") + return candidate + + logger.warning("Could not find legacy skills directory") + return candidates[0] if candidates[0] else "skills" + + +def discover_skill_directories(skills_path: str) -> list: + """ + List all skill directories under the given base path. + + A valid skill directory contains at least a SKILL.md file. + + Args: + skills_path: Base skills directory path + + Returns: + List of skill directory names (not full paths) + """ + skills_path_obj = Path(skills_path) + if not skills_path_obj.is_dir(): + logger.warning(f"Skills directory does not exist: {skills_path}") + return [] + + skills = [] + try: + for item in skills_path_obj.iterdir(): + if item.is_dir(): + if (item / "SKILL.md").is_file(): + skills.append(item.name) + else: + logger.debug(f"Skipping non-skill directory: {item.name}") + except Exception as e: + logger.error(f"Error listing skills directory: {e}") + + return skills + + +def validate_skill_directory(skill_dir: str) -> dict: + """ + Validate a skill directory structure. + + Args: + skill_dir: Path to the skill directory + + Returns: + Dict with validation results + """ + skill_dir_obj = Path(skill_dir) + result = { + "is_valid": True, + "skill_name": skill_dir_obj.name, + "files": [], + "errors": [] + } + + if not skill_dir_obj.is_dir(): + result["is_valid"] = False + result["errors"].append("Directory does not exist") + return result + + skill_md = skill_dir_obj / "SKILL.md" + if not skill_md.is_file(): + result["is_valid"] = False + result["errors"].append("SKILL.md not found") + + try: + for item in skill_dir_obj.rglob('*'): + if item.is_file(): + rel_path = item.relative_to(skill_dir_obj) + result["files"].append(str(rel_path)) + except Exception as e: + result["errors"].append(f"Error scanning files: {e}") + + return result + + +def migrate_skills( + legacy_dir: str, + target_dir: str, + skills: list, + dry_run: bool = False +) -> dict: + """ + Migrate skills from legacy directory to target directory. + + Args: + legacy_dir: Source directory path (host path) + target_dir: Target directory path (host path) + skills: List of skill names to migrate + dry_run: If True, only show what would be done + + Returns: + Migration results dict + """ + results = { + "total": len(skills), + "migrated": 0, + "skipped": 0, + "failed": 0, + "details": [] + } + + legacy_dir_obj = Path(legacy_dir) + target_dir_obj = Path(target_dir) + + for skill_name in skills: + source = legacy_dir_obj / skill_name + target = target_dir_obj / skill_name + + logger.info(f"Processing skill: {skill_name}") + + validation = validate_skill_directory(str(source)) + if not validation["is_valid"]: + logger.warning(f" Invalid skill directory: {', '.join(validation['errors'])}") + results["skipped"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "skipped", + "reason": f"Validation failed: {', '.join(validation['errors'])}" + }) + continue + + if target.exists(): + logger.info(f" Target already exists, skipping: {target}") + results["skipped"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "skipped", + "reason": "Already exists in target directory" + }) + continue + + if dry_run: + logger.info(f" [DRY-RUN] Would migrate to: {target}") + logger.info(f" Files: {', '.join(validation['files'])}") + results["migrated"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "dry-run", + "source": str(source), + "target": str(target), + "files_count": len(validation["files"]) + }) + else: + try: + target.mkdir(parents=True, exist_ok=True) + + for item in source.rglob('*'): + if item.is_file(): + rel_path = item.relative_to(source) + dst_file = target / rel_path + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(item, dst_file) + + logger.info(f" Migrated successfully: {len(validation['files'])} files") + results["migrated"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "success", + "source": str(source), + "target": str(target), + "files_count": len(validation["files"]) + }) + + except Exception as e: + logger.error(f" Failed to migrate: {e}") + results["failed"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "failed", + "reason": str(e) + }) + + return results + + +def print_results(results: dict): + """Print migration results summary.""" + logger.info("=" * 60) + logger.info("Migration Results:") + logger.info(f" Total skills found: {results['total']}") + logger.info(f" Migrated: {results['migrated']}") + logger.info(f" Skipped: {results['skipped']}") + logger.info(f" Failed: {results['failed']}") + logger.info("=" * 60) + + if results['details']: + logger.info("\nDetails:") + for detail in results['details']: + status = detail['status'] + skill = detail['skill'] + if status == 'success': + logger.info(f" [OK] {skill}: {detail.get('files_count', 0)} files -> {detail.get('target', 'N/A')}") + elif status == 'dry-run': + logger.info(f" [DRY-RUN] {skill}: would migrate {detail.get('files_count', 0)} files to {detail.get('target', 'N/A')}") + elif status == 'skipped': + logger.info(f" [SKIP] {skill}: {detail.get('reason', 'unknown reason')}") + else: + logger.info(f" [FAIL] {skill}: {detail.get('reason', 'unknown error')}") + + +def main(): + """Main function.""" + parser = argparse.ArgumentParser( + description='Migrate skills directory for v2.2.0 upgrade (run on host)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be migrated without making changes' + ) + parser.add_argument( + '--verbose', + action='store_true', + help='Enable verbose debug output' + ) + parser.add_argument( + '--legacy-dir', + type=str, + default=None, + help='Override legacy skills directory path (host path)' + ) + parser.add_argument( + '--target-dir', + type=str, + default=None, + help='Override target skills directory path (host path)' + ) + parser.add_argument( + '--skip-db', + action='store_true', + help='Skip database connection and use existing tenant directories' + ) + args = parser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + logger.info("=" * 60) + logger.info("Skills Directory Migration Script (v2.2.0)") + logger.info("=" * 60) + + if args.dry_run: + logger.info("Mode: DRY-RUN (no changes will be made)") + + # Step 1: Load environment from .env file + logger.info("\n[Step 1/6] Loading environment variables...") + load_environment_from_host() + + # Get ROOT_DIR + root_dir = get_root_dir() + if root_dir: + logger.info(f" ROOT_DIR: {root_dir}") + else: + logger.warning(" ROOT_DIR not set, using current directory") + + # Determine host paths + skills_base = str(Path(root_dir) / "skills") if root_dir else "skills" + + # Step 2: Check if container is running + logger.info("\n[Step 2/6] Checking container status...") + container_running = check_container_running() + if not container_running: + logger.error("nexent-config container is not running") + sys.exit(1) + + # Step 3: Test PostgreSQL connection and get tenant_id from container + tenant_id = None + if not args.skip_db: + logger.info("\n[Step 3/6] Testing PostgreSQL connection from inside container...") + + if test_postgres_connection_in_container(): + logger.info("\n[Step 4/6] Querying admin tenant_id...") + tenant_id = get_admin_tenant_id_in_container() + + if not tenant_id: + logger.warning("Could not determine tenant_id from database") + else: + logger.warning("Could not connect to PostgreSQL") + else: + logger.info("\n[Step 3/6] Skipping database connection (--skip-db)") + + # Fallback: check existing tenant directories on host + if not tenant_id: + logger.info("Checking for existing tenant directories...") + skills_base_obj = Path(skills_base) + if skills_base_obj.is_dir(): + existing_tenants = [ + d.name for d in skills_base_obj.iterdir() + if d.is_dir() and d.name not in ['.', '..'] + ] + if existing_tenants: + tenant_id = existing_tenants[0] + logger.info(f"Using existing tenant directory: {tenant_id}") + + # Step 5: Determine directories + legacy_dir = args.legacy_dir or discover_legacy_skills_dir(root_dir or ".") + logger.info(f"\n[Step 5/6] Migration paths:") + logger.info(f" Legacy directory (host): {legacy_dir}") + logger.info(f" Skills base (host): {skills_base}") + + if args.target_dir: + target_base = args.target_dir + logger.info(f" Target directory (host): {target_base}") + elif tenant_id: + target_base = str(Path(skills_base) / tenant_id) + logger.info(f" Target directory (host): {target_base}") + else: + logger.error("Cannot determine target directory: no tenant_id found") + logger.info("Options:") + logger.info(" 1. Ensure user_tenant_t has at least one ADMIN user") + logger.info(" 2. Provide --target-dir explicitly") + logger.info(" 3. Use --skip-db and ensure existing tenant directories exist") + sys.exit(1) + + # Step 6: Discover and migrate skills + logger.info("\n[Step 6/6] Discovering skills in legacy directory...") + + if not Path(legacy_dir).is_dir(): + logger.warning(f"Legacy directory does not exist: {legacy_dir}") + logger.info("No migration needed (source directory not found)") + return + + skills = discover_skill_directories(legacy_dir) + if not skills: + logger.info("No skills found in legacy directory") + logger.info("Migration complete (nothing to migrate)") + return + + logger.info(f"Found {len(skills)} skill(s): {', '.join(skills)}") + + # Execute migration + results = migrate_skills( + legacy_dir=legacy_dir, + target_dir=target_base, + skills=skills, + dry_run=args.dry_run + ) + + print_results(results) + + # Final summary + logger.info("\n" + "=" * 60) + if args.dry_run: + logger.info("DRY-RUN complete. To apply migration, run without --dry-run") + else: + logger.info("Migration completed") + if results['migrated'] > 0: + logger.info(f"\nSuccessfully migrated {results['migrated']} skill(s)") + logger.info(f"Skills are now available at: {target_base}") + logger.info("\nNote: The legacy directory has been preserved.") + logger.info("You can remove it manually after verifying the migration:") + logger.info(f" rm -rf {legacy_dir}") + logger.info("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/docker/scripts/v220_sync_skill_directory.sh b/docker/scripts/v220_sync_skill_directory.sh new file mode 100644 index 000000000..572ffeb30 --- /dev/null +++ b/docker/scripts/v220_sync_skill_directory.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# v2.2.0 Skills Directory Migration Script +# Migrates skills from legacy location to tenant-isolated directories. +# +# Migration: +# FROM: ${ROOT_DIR}/skills/ (flat directory, skills directly under skills/) +# TO: ${ROOT_DIR}/skills/{tenant_id}/ +# +# The tenant_id is determined by querying user_tenant_t for the first record +# with user_role = 'ADMIN'. +# +# Usage: +# ./v220_sync_skill_directory.sh [--dry-run] +# +# Options: +# --dry-run Show what would be migrated without making changes +# + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SCRIPT_PATH="${SCRIPT_DIR}/sync_skill_directory.py" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +DRY_RUN=false +for arg in "$@"; do + case $arg in + --dry-run) + DRY_RUN=true + shift + ;; + *) + ;; + esac +done + +if [ ! -f "$SCRIPT_PATH" ]; then + log_error "Script not found: $SCRIPT_PATH" + exit 1 +fi + +# Load environment from .env if exists +ENV_FILE="${SCRIPT_DIR}/../.env" +if [ -f "$ENV_FILE" ]; then + log_info "Loading environment from: $ENV_FILE" + set -a + source "$ENV_FILE" + set +a +fi + +log_info "Executing migration script..." + +if [ "$DRY_RUN" = true ]; then + log_info "Mode: DRY-RUN (no changes will be made)" + python "$SCRIPT_PATH" --dry-run "$@" +else + python "$SCRIPT_PATH" "$@" +fi + +EXIT_CODE=$? + +if [ $EXIT_CODE -eq 0 ]; then + log_info "Migration completed successfully" +else + log_error "Migration failed with exit code: $EXIT_CODE" + exit $EXIT_CODE +fi diff --git a/docker/sql/v2.0.2_0425_add_is_a2a_to_ag_tenant_agent_version_t.sql b/docker/sql/v2.0.2_0425_add_is_a2a_to_ag_tenant_agent_version_t.sql new file mode 100644 index 000000000..3eb6ac5e9 --- /dev/null +++ b/docker/sql/v2.0.2_0425_add_is_a2a_to_ag_tenant_agent_version_t.sql @@ -0,0 +1,7 @@ +-- Add is_a2a column to ag_tenant_agent_version_t for tracking A2A Server agent publish status +-- This field indicates whether this version was published as an A2A Server agent + +ALTER TABLE nexent.ag_tenant_agent_version_t +ADD COLUMN IF NOT EXISTS is_a2a BOOLEAN DEFAULT FALSE; + +COMMENT ON COLUMN nexent.ag_tenant_agent_version_t.is_a2a IS 'Whether this version is published as an A2A Server agent'; diff --git a/docker/sql/v2.0.3_0423_create_model_monitoring_record_t.sql b/docker/sql/v2.0.3_0423_create_model_monitoring_record_t.sql new file mode 100644 index 000000000..438ca4863 --- /dev/null +++ b/docker/sql/v2.0.3_0423_create_model_monitoring_record_t.sql @@ -0,0 +1,42 @@ +-- Model Monitoring Record Table +-- Stores per-request LLM performance metrics for the monitoring feature. +-- Run this script against the 'nexent' schema in PostgreSQL. + +CREATE TABLE IF NOT EXISTS nexent.model_monitoring_record_t ( + monitoring_id SERIAL PRIMARY KEY, + model_id INT4, + model_name VARCHAR(100) NOT NULL, + model_type VARCHAR(20) DEFAULT 'llm', + agent_id INT4, + agent_name VARCHAR(100), + conversation_id INT4, + tenant_id VARCHAR(100) NOT NULL, + user_id VARCHAR(100), + display_name VARCHAR(100), + request_duration_ms INT4, + ttft_ms INT4, + input_tokens INT4, + output_tokens INT4, + total_tokens INT4, + generation_rate FLOAT, + is_streaming BOOLEAN DEFAULT FALSE, + is_success BOOLEAN DEFAULT TRUE, + is_error BOOLEAN DEFAULT FALSE, + error_type VARCHAR(50), + error_message TEXT, + retry_count INT4 DEFAULT 0, + operation VARCHAR(50), + create_time TIMESTAMP DEFAULT NOW(), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +-- Single-column indexes for common query patterns +CREATE INDEX IF NOT EXISTS ix_monitoring_model_id ON nexent.model_monitoring_record_t (model_id); +CREATE INDEX IF NOT EXISTS ix_monitoring_tenant_id ON nexent.model_monitoring_record_t (tenant_id); +CREATE INDEX IF NOT EXISTS ix_monitoring_agent_id ON nexent.model_monitoring_record_t (agent_id); +CREATE INDEX IF NOT EXISTS ix_monitoring_create_time ON nexent.model_monitoring_record_t (create_time); +CREATE INDEX IF NOT EXISTS ix_monitoring_is_error ON nexent.model_monitoring_record_t (is_error); +CREATE INDEX IF NOT EXISTS ix_monitoring_model_type ON nexent.model_monitoring_record_t (model_type); + +-- Composite index for time-range queries per model +CREATE INDEX IF NOT EXISTS ix_monitoring_model_time ON nexent.model_monitoring_record_t (model_id, create_time); diff --git a/docker/sql/v2.0.3_0430_add_user_oauth_account_t.sql b/docker/sql/v2.0.3_0430_add_user_oauth_account_t.sql new file mode 100644 index 000000000..faa9adab2 --- /dev/null +++ b/docker/sql/v2.0.3_0430_add_user_oauth_account_t.sql @@ -0,0 +1,52 @@ +-- Create user OAuth account table for third-party login (GitHub, WeChat, etc.) +CREATE TABLE IF NOT EXISTS nexent.user_oauth_account_t ( + oauth_account_id SERIAL PRIMARY KEY, + user_id VARCHAR(100) NOT NULL, + provider VARCHAR(30) NOT NULL, + provider_user_id VARCHAR(200) NOT NULL, + provider_email VARCHAR(255), + provider_username VARCHAR(200), + tenant_id VARCHAR(100), + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW(), + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW(), + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag CHAR(1) DEFAULT 'N', + CONSTRAINT uq_oauth_provider_user UNIQUE (provider, provider_user_id) +); + +ALTER TABLE nexent.user_oauth_account_t OWNER TO "root"; + +-- Create a function to update the update_time column +CREATE OR REPLACE FUNCTION update_user_oauth_account_t_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- Create a trigger to call the function before each update +CREATE TRIGGER update_user_oauth_account_t_update_time_trigger +BEFORE UPDATE ON nexent.user_oauth_account_t +FOR EACH ROW +EXECUTE FUNCTION update_user_oauth_account_t_update_time(); + +-- Add comments +COMMENT ON TABLE nexent.user_oauth_account_t IS 'User OAuth account table - third-party login bindings'; +COMMENT ON COLUMN nexent.user_oauth_account_t.oauth_account_id IS 'OAuth account ID, primary key'; +COMMENT ON COLUMN nexent.user_oauth_account_t.user_id IS 'Nexent user ID (Supabase UUID)'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider IS 'OAuth provider name: github, wechat, gde, link_app'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider_user_id IS 'User ID from the OAuth provider'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider_email IS 'Email from the OAuth provider'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider_username IS 'Display name from the OAuth provider'; +COMMENT ON COLUMN nexent.user_oauth_account_t.tenant_id IS 'Tenant ID at time of linking'; +COMMENT ON COLUMN nexent.user_oauth_account_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.user_oauth_account_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.user_oauth_account_t.created_by IS 'Creator'; +COMMENT ON COLUMN nexent.user_oauth_account_t.updated_by IS 'Updater'; +COMMENT ON COLUMN nexent.user_oauth_account_t.delete_flag IS 'Whether it is deleted. Optional values: Y/N'; + +-- Create index for user_id queries +CREATE INDEX IF NOT EXISTS idx_user_oauth_account_t_user_id +ON nexent.user_oauth_account_t (user_id); diff --git a/docker/sql/v2.0.4_0427_add_enable_context_manager_to_ag_tenant_agent_t.sql b/docker/sql/v2.0.4_0427_add_enable_context_manager_to_ag_tenant_agent_t.sql new file mode 100644 index 000000000..b89a19e04 --- /dev/null +++ b/docker/sql/v2.0.4_0427_add_enable_context_manager_to_ag_tenant_agent_t.sql @@ -0,0 +1,10 @@ +-- Migration: Add enable_context_manager column to ag_tenant_agent_t table +-- Date: 2025-04-27 +-- Description: Add enable_context_manager field to control context management (compression) per agent + +-- Add enable_context_manager column to ag_tenant_agent_t table +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS enable_context_manager BOOLEAN DEFAULT FALSE; + +-- Add comment to the column +COMMENT ON COLUMN nexent.ag_tenant_agent_t.enable_context_manager IS 'Whether to enable context management (compression) for this agent'; \ No newline at end of file diff --git a/docker/sql/v2.0.4_0506_add_base_url_in_external_agent.sql b/docker/sql/v2.0.4_0506_add_base_url_in_external_agent.sql new file mode 100644 index 000000000..e4723bc96 --- /dev/null +++ b/docker/sql/v2.0.4_0506_add_base_url_in_external_agent.sql @@ -0,0 +1,13 @@ +ALTER TABLE nexent.ag_a2a_external_agent_t +ADD COLUMN IF NOT EXISTS base_url VARCHAR(512); + +COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.base_url IS 'Base URL for health checks (service root address)'; + +ALTER TABLE nexent.ag_a2a_message_t + DROP CONSTRAINT IF EXISTS ag_a2a_message_t_task_id_fk; + +ALTER TABLE nexent.ag_a2a_external_agent_relation_t + DROP CONSTRAINT IF EXISTS fk_external_agent; + +ALTER TABLE nexent.ag_a2a_artifact_t + DROP CONSTRAINT IF EXISTS fk_artifact_task; \ No newline at end of file diff --git a/docker/sql/v2.0.5_0511_add_auto_summary_fields_to_knowledge_record_t.sql b/docker/sql/v2.0.5_0511_add_auto_summary_fields_to_knowledge_record_t.sql new file mode 100644 index 000000000..491f6b27b --- /dev/null +++ b/docker/sql/v2.0.5_0511_add_auto_summary_fields_to_knowledge_record_t.sql @@ -0,0 +1,21 @@ +-- Migration: Add auto-summary fields to knowledge_record_t table +-- Date: 2026-05-11 +-- Description: Add summary_frequency, last_summary_time, and last_doc_update_time fields for auto-summary feature +-- This SQL consolidates fields added in multiple commits for clean upgrade path + +-- Add summary_frequency column (auto-summary frequency configuration) +ALTER TABLE nexent.knowledge_record_t +ADD COLUMN IF NOT EXISTS summary_frequency VARCHAR(10); + +-- Add last_summary_time column (timestamp of last summary generation) +ALTER TABLE nexent.knowledge_record_t +ADD COLUMN IF NOT EXISTS last_summary_time TIMESTAMP; + +-- Add last_doc_update_time column (timestamp of last document add/delete operation) +ALTER TABLE nexent.knowledge_record_t +ADD COLUMN IF NOT EXISTS last_doc_update_time TIMESTAMP; + +-- Add comments to the columns +COMMENT ON COLUMN nexent.knowledge_record_t.summary_frequency IS 'Auto-summary frequency: 1h, 3h, 6h, 1d, 1w, or NULL (disabled)'; +COMMENT ON COLUMN nexent.knowledge_record_t.last_summary_time IS 'Timestamp of last summary generation'; +COMMENT ON COLUMN nexent.knowledge_record_t.last_doc_update_time IS 'Timestamp of last document add/delete operation, used for auto-summary optimization to skip unnecessary summary regeneration'; \ No newline at end of file diff --git a/docker/sql/v2.1.0_0503_add_prompt_template_t.sql b/docker/sql/v2.1.0_0503_add_prompt_template_t.sql new file mode 100644 index 000000000..3db9a9701 --- /dev/null +++ b/docker/sql/v2.1.0_0503_add_prompt_template_t.sql @@ -0,0 +1,115 @@ +-- Migration: Add prompt template table and agent prompt template fields +-- Date: 2026-05-03 +-- Description: Add user-scoped prompt template storage and bind selected prompt template to agents + +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS prompt_template_id INTEGER; + +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS prompt_template_name VARCHAR(100); + +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_id IS 'Prompt template ID used for business logic prompt generation'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_name IS 'Prompt template name used for business logic prompt generation'; + +UPDATE nexent.ag_tenant_agent_t +SET prompt_template_id = 0, + prompt_template_name = 'system_default' +WHERE delete_flag = 'N' + AND (prompt_template_id IS NULL OR prompt_template_name IS NULL); + +CREATE TABLE IF NOT EXISTS nexent.ag_prompt_template_t ( + template_id SERIAL PRIMARY KEY, + template_name VARCHAR(100) NOT NULL, + description VARCHAR(500), + template_type VARCHAR(50) NOT NULL DEFAULT 'agent_generate', + tenant_id VARCHAR(100) NOT NULL, + user_id VARCHAR(100) NOT NULL, + template_content_zh JSONB NOT NULL, + template_content_en JSONB, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.ag_prompt_template_t OWNER TO "root"; + +CREATE OR REPLACE FUNCTION update_ag_prompt_template_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS update_ag_prompt_template_update_time_trigger ON nexent.ag_prompt_template_t; + +CREATE TRIGGER update_ag_prompt_template_update_time_trigger +BEFORE UPDATE ON nexent.ag_prompt_template_t +FOR EACH ROW +EXECUTE FUNCTION update_ag_prompt_template_update_time(); + +ALTER TABLE nexent.ag_prompt_template_t +DROP CONSTRAINT IF EXISTS uq_prompt_template_user_name; + +COMMENT ON TABLE nexent.ag_prompt_template_t IS 'Prompt template table for user-defined business logic generation prompts'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_id IS 'Prompt template ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_name IS 'Prompt template name'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.description IS 'Prompt template description'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_type IS 'Prompt template type'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.tenant_id IS 'Tenant ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.user_id IS 'User ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_zh IS 'Chinese prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_en IS 'English prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.created_by IS 'Creator'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.updated_by IS 'Updater'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.delete_flag IS 'Whether it is deleted. Optional values: Y/N'; + +DROP INDEX IF EXISTS nexent.uq_prompt_template_user_name_active; +CREATE UNIQUE INDEX IF NOT EXISTS uq_prompt_template_user_name_active +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_name) +WHERE delete_flag = 'N'; + +CREATE INDEX IF NOT EXISTS idx_ag_prompt_template_t_user +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_type) +WHERE delete_flag = 'N'; + +INSERT INTO nexent.ag_prompt_template_t ( + template_id, + template_name, + description, + template_type, + tenant_id, + user_id, + template_content_zh, + template_content_en, + created_by, + updated_by, + delete_flag +) +VALUES ( + 0, + 'system_default', + 'System default prompt template', + 'agent_generate', + 'tenant_id', + 'user_id', + '{}'::jsonb, + '{}'::jsonb, + 'user_id', + 'user_id', + 'N' +) +ON CONFLICT (template_id) DO UPDATE SET + template_name = EXCLUDED.template_name, + description = EXCLUDED.description, + template_type = EXCLUDED.template_type, + tenant_id = EXCLUDED.tenant_id, + user_id = EXCLUDED.user_id, + template_content_zh = EXCLUDED.template_content_zh, + template_content_en = EXCLUDED.template_content_en, + updated_by = EXCLUDED.updated_by, + delete_flag = 'N'; diff --git a/docker/sql/v2.1.1_0508_add_embedding_model_id_to_knowledge_record_t.sql b/docker/sql/v2.1.1_0508_add_embedding_model_id_to_knowledge_record_t.sql new file mode 100644 index 000000000..0305a2590 --- /dev/null +++ b/docker/sql/v2.1.1_0508_add_embedding_model_id_to_knowledge_record_t.sql @@ -0,0 +1,9 @@ +-- Add embedding_model_id column to knowledge_record_t table +-- This field stores the ID of the embedding model used by the knowledge base + +-- Add embedding_model_id column +ALTER TABLE "knowledge_record_t" +ADD COLUMN IF NOT EXISTS "embedding_model_id" INTEGER; + +-- Add column comment +COMMENT ON COLUMN "knowledge_record_t"."embedding_model_id" IS 'Embedding model ID, foreign key reference to model_record_t.model_id'; diff --git a/docker/sql/v2.1.1_0509_add_model_appid_token_to_model_record_t.sql b/docker/sql/v2.1.1_0509_add_model_appid_token_to_model_record_t.sql new file mode 100644 index 000000000..521fa38a4 --- /dev/null +++ b/docker/sql/v2.1.1_0509_add_model_appid_token_to_model_record_t.sql @@ -0,0 +1,9 @@ +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS model_appid VARCHAR(100) DEFAULT ''; + + +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS access_token VARCHAR(100) DEFAULT ''; + +COMMENT ON COLUMN nexent.model_record_t.model_appid IS 'Application ID for model authentication.'; +COMMENT ON COLUMN nexent.model_record_t.access_token IS 'Access token for model authentication.'; diff --git a/docker/sql/v2.2.0_0514_skill_config_schema.sql b/docker/sql/v2.2.0_0514_skill_config_schema.sql new file mode 100644 index 000000000..12e549175 --- /dev/null +++ b/docker/sql/v2.2.0_0514_skill_config_schema.sql @@ -0,0 +1,30 @@ +-- Rename params -> config_values, add config_schemas to ag_skill_info_t +-- Add tenant_id column for multi-tenancy support +ALTER TABLE nexent.ag_skill_info_t ADD COLUMN IF NOT EXISTS tenant_id VARCHAR(100); + +-- Add config_values and config_schemas to ag_skill_info_t +DO $$ +BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_schema = 'nexent' + AND table_name = 'ag_skill_info_t' + AND column_name = 'params' + ) THEN + ALTER TABLE nexent.ag_skill_info_t RENAME COLUMN params TO config_values; + END IF; +END $$; +ALTER TABLE nexent.ag_skill_info_t ADD COLUMN IF NOT EXISTS config_schemas JSON; + +-- Comments for ag_skill_info_t columns +COMMENT ON COLUMN nexent.ag_skill_info_t.tenant_id IS 'Tenant ID for multi-tenancy. NULL for pre-existing skills.'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_values IS 'Runtime parameter values from config/config.yaml'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_schemas IS 'Parameter metadata list from config/schema.yaml'; + +-- Add config_values and config_schemas to ag_skill_instance_t +ALTER TABLE nexent.ag_skill_instance_t ADD COLUMN IF NOT EXISTS config_values JSON; +ALTER TABLE nexent.ag_skill_instance_t ADD COLUMN IF NOT EXISTS config_schemas JSON; + +-- Comments for ag_skill_instance_t columns +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_values IS 'Per-agent runtime parameter values from config/config.yaml'; +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_schemas IS 'Per-agent parameter schema overrides from config/schema.yaml'; diff --git a/docker/sql/v2.2.0_0520_add_concurrency_and_timeout_to_model_record_t.sql b/docker/sql/v2.2.0_0520_add_concurrency_and_timeout_to_model_record_t.sql new file mode 100644 index 000000000..59632f8ed --- /dev/null +++ b/docker/sql/v2.2.0_0520_add_concurrency_and_timeout_to_model_record_t.sql @@ -0,0 +1,13 @@ +-- Add concurrency_limit column to model_record_t table +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS concurrency_limit INTEGER DEFAULT NULL; + +-- Add comment to the column +COMMENT ON COLUMN nexent.model_record_t.concurrency_limit IS 'Maximum concurrent requests for this model. Default is NULL (unlimited).'; + +-- Add timeout_seconds column to model_record_t table +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS timeout_seconds INTEGER DEFAULT 120; + +-- Add comment to the column +COMMENT ON COLUMN nexent.model_record_t.timeout_seconds IS 'Request timeout in seconds for this model. Default is 120 seconds.'; diff --git a/docker/sql/v2.2.0_0521_add_mcp_community_record_t.sql b/docker/sql/v2.2.0_0521_add_mcp_community_record_t.sql new file mode 100644 index 000000000..83f9d9a56 --- /dev/null +++ b/docker/sql/v2.2.0_0521_add_mcp_community_record_t.sql @@ -0,0 +1,83 @@ +-- Migration: Add mcp_community_record_t table +-- Date: 2026-03-26 +-- Description: Community MCP market table aligned with public-shareable fields from mcp_record_t. + +SET search_path TO nexent; + +BEGIN; + +CREATE TABLE IF NOT EXISTS nexent.mcp_community_record_t ( + community_id SERIAL PRIMARY KEY NOT NULL, + tenant_id VARCHAR(100), + user_id VARCHAR(100), + mcp_name VARCHAR(100) NOT NULL, + mcp_server VARCHAR(500) NOT NULL, + source VARCHAR(30) DEFAULT 'community', + version VARCHAR(50), + registry_json JSONB, + transport_type VARCHAR(30), + config_json JSON, + tags TEXT[], + description TEXT, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.mcp_community_record_t OWNER TO root; + +COMMENT ON TABLE nexent.mcp_community_record_t IS 'Community MCP market records, publishable from tenant MCP services'; +COMMENT ON COLUMN nexent.mcp_community_record_t.community_id IS 'Community record ID, unique primary key'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tenant_id IS 'Publisher tenant ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.user_id IS 'Publisher user ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_name IS 'MCP name'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_server IS 'MCP server URL'; +COMMENT ON COLUMN nexent.mcp_community_record_t.source IS 'Source type, fixed to community for this table'; +COMMENT ON COLUMN nexent.mcp_community_record_t.version IS 'MCP version'; +COMMENT ON COLUMN nexent.mcp_community_record_t.registry_json IS 'Full MCP server metadata JSON for discovery and quick import'; +COMMENT ON COLUMN nexent.mcp_community_record_t.transport_type IS 'Transport type: url/container'; +COMMENT ON COLUMN nexent.mcp_community_record_t.config_json IS 'Public-shareable MCP configuration JSON'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_community_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_community_record_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.created_by IS 'Creator ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.updated_by IS 'Updater ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.delete_flag IS 'Soft delete flag: Y/N'; + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tenant_delete + ON nexent.mcp_community_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_name_delete + ON nexent.mcp_community_record_t (mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_transport_delete + ON nexent.mcp_community_record_t (transport_type, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_user_delete + ON nexent.mcp_community_record_t (user_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tags_gin + ON nexent.mcp_community_record_t USING GIN (tags); + +CREATE OR REPLACE FUNCTION update_mcp_community_record_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION update_mcp_community_record_update_time() IS 'Auto-update update_time for mcp_community_record_t'; + +DROP TRIGGER IF EXISTS update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t; +CREATE TRIGGER update_mcp_community_record_update_time_trigger +BEFORE UPDATE ON nexent.mcp_community_record_t +FOR EACH ROW +EXECUTE FUNCTION update_mcp_community_record_update_time(); + +COMMENT ON TRIGGER update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t IS 'Trigger to maintain update_time'; + +COMMIT; diff --git a/docker/sql/v2.2.0_0521_expand_mcp_record_t.sql b/docker/sql/v2.2.0_0521_expand_mcp_record_t.sql new file mode 100644 index 000000000..6c92a392e --- /dev/null +++ b/docker/sql/v2.2.0_0521_expand_mcp_record_t.sql @@ -0,0 +1,41 @@ +-- Migration: Extend mcp_record_t for MCP tools (direct schema) +-- Date: 2026-03-18 +-- Description: One-step schema extension for mcp_record_t. No table merge, no data migration. + +SET search_path TO nexent; + +BEGIN; + +-- 1) Extend mcp_record_t with final column names (idempotent) +ALTER TABLE IF EXISTS nexent.mcp_record_t + ADD COLUMN IF NOT EXISTS source VARCHAR(30), + ADD COLUMN IF NOT EXISTS registry_json JSONB, + ADD COLUMN IF NOT EXISTS config_json JSON, + ADD COLUMN IF NOT EXISTS enabled BOOLEAN DEFAULT TRUE, + ADD COLUMN IF NOT EXISTS tags TEXT[], + ADD COLUMN IF NOT EXISTS description TEXT, + ADD COLUMN IF NOT EXISTS container_port INTEGER; + +-- 2) Add comments for new columns +COMMENT ON COLUMN nexent.mcp_record_t.source IS 'Source type: local/mcp_registry/community'; +COMMENT ON COLUMN nexent.mcp_record_t.registry_json IS 'Full MCP registry server.json snapshot'; +COMMENT ON COLUMN nexent.mcp_record_t.config_json IS 'MCP config data'; +COMMENT ON COLUMN nexent.mcp_record_t.enabled IS 'Enabled'; +COMMENT ON COLUMN nexent.mcp_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_record_t.container_port IS 'Host port bound for containerized MCP service'; + +-- 3) Add indexes for common management queries +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_delete + ON nexent.mcp_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_name + ON nexent.mcp_record_t (tenant_id, mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_server + ON nexent.mcp_record_t (tenant_id, mcp_server, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tags_gin + ON nexent.mcp_record_t USING GIN (tags); + +COMMIT; diff --git a/docker/sql/v2.2.0_0526_add_cas_session_t.sql b/docker/sql/v2.2.0_0526_add_cas_session_t.sql new file mode 100644 index 000000000..3f1aab4fa --- /dev/null +++ b/docker/sql/v2.2.0_0526_add_cas_session_t.sql @@ -0,0 +1,27 @@ +CREATE TABLE IF NOT EXISTS nexent.user_cas_session_t ( + cas_session_id SERIAL PRIMARY KEY, + session_id VARCHAR(100) NOT NULL UNIQUE, + user_id VARCHAR(100) NOT NULL, + cas_user_id VARCHAR(200) NOT NULL, + cas_session_index VARCHAR(500), + status VARCHAR(30) NOT NULL DEFAULT 'active', + expires_at TIMESTAMP NOT NULL, + revoked_at TIMESTAMP, + create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +CREATE INDEX IF NOT EXISTS ix_user_cas_session_session_id + ON nexent.user_cas_session_t (session_id); +CREATE INDEX IF NOT EXISTS ix_user_cas_session_user_id + ON nexent.user_cas_session_t (user_id); +CREATE INDEX IF NOT EXISTS ix_user_cas_session_cas_user_id + ON nexent.user_cas_session_t (cas_user_id); + +COMMENT ON TABLE nexent.user_cas_session_t IS 'Server-side session records for CAS SSO login and logout synchronization'; +COMMENT ON COLUMN nexent.user_cas_session_t.session_id IS 'JWT sid claim for revocation checks'; +COMMENT ON COLUMN nexent.user_cas_session_t.cas_user_id IS 'User identifier returned by CAS'; +COMMENT ON COLUMN nexent.user_cas_session_t.cas_session_index IS 'CAS SessionIndex or service ticket'; diff --git a/docker/sql/v2.2.0_0527_add_custom_headers_to_mcp_record_t.sql b/docker/sql/v2.2.0_0527_add_custom_headers_to_mcp_record_t.sql new file mode 100644 index 000000000..00933c523 --- /dev/null +++ b/docker/sql/v2.2.0_0527_add_custom_headers_to_mcp_record_t.sql @@ -0,0 +1,26 @@ +-- Migration: Add custom_headers column to mcp_record_t +-- Date: 2026-05-26 +-- Description: Add custom_headers field to store custom HTTP headers for MCP server requests + +SET search_path TO nexent; + +BEGIN; + +-- Add custom_headers column if it doesn't exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_schema = 'nexent' + AND table_name = 'mcp_record_t' + AND column_name = 'custom_headers' + ) THEN + ALTER TABLE nexent.mcp_record_t + ADD COLUMN custom_headers JSON DEFAULT NULL; + END IF; +END $$; + +-- Add comment to the column +COMMENT ON COLUMN nexent.mcp_record_t.custom_headers IS 'Custom HTTP headers as JSON object for MCP server requests'; + +COMMIT; diff --git a/docker/sql/v2.2.0_0529_add_asset_owner_role_permissions.sql b/docker/sql/v2.2.0_0529_add_asset_owner_role_permissions.sql new file mode 100644 index 000000000..8f21b110b --- /dev/null +++ b/docker/sql/v2.2.0_0529_add_asset_owner_role_permissions.sql @@ -0,0 +1,53 @@ +-- Migration: ASSET_OWNER role permissions and invitation type comment +-- Date: 2026-05-29 +-- Description: Add ASSET_OWNER role permissions, SU asset-owner invite permissions, +-- update invitation code_type comment, and ensure ag_skill_info_t.tenant_id exists +-- Source: commit 15cece97692db2372a978cbdf21b5d5316e79f30 (init.sql) + +SET search_path TO nexent; + +BEGIN; + +COMMENT ON COLUMN nexent.tenant_invitation_code_t.code_type IS + 'Invitation code type: ADMIN_INVITE, DEV_INVITE, USER_INVITE, ASSET_OWNER_INVITE'; + +INSERT INTO nexent.role_permission_t + (role_permission_id, user_role, permission_category, permission_type, permission_subtype) +VALUES + (188, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'CREATE'), + (189, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'READ'), + (190, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'UPDATE'), + (191, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'DELETE'), + (192, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/'), + (193, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/agents'), + (194, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/knowledges'), + (195, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/chat'), + (196, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/space'), + (197, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/market'), + (198, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/models'), + (199, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'CREATE'), + (200, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'READ'), + (201, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'UPDATE'), + (202, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'DELETE'), + (203, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'CREATE'), + (204, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'READ'), + (205, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'UPDATE'), + (206, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'DELETE'), + (207, 'ASSET_OWNER', 'RESOURCE', 'KB', 'CREATE'), + (208, 'ASSET_OWNER', 'RESOURCE', 'KB', 'READ'), + (209, 'ASSET_OWNER', 'RESOURCE', 'KB', 'UPDATE'), + (210, 'ASSET_OWNER', 'RESOURCE', 'KB', 'DELETE'), + (211, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'CREATE'), + (212, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'READ'), + (213, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'UPDATE'), + (214, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'DELETE'), + (215, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'CREATE'), + (216, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'READ'), + (217, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'UPDATE'), + (218, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'DELETE'), + (219, 'ASSET_OWNER', 'RESOURCE', 'USER.ROLE', 'READ'), + (220, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/users'), + (221, 'SU', 'VISIBILITY', 'LEFT_NAV_MENU', '/asset-owner-resources') +ON CONFLICT (role_permission_id) DO NOTHING; + +COMMIT; diff --git a/docker/sql/v2.2.1_0601_add_agent_verification_config.sql b/docker/sql/v2.2.1_0601_add_agent_verification_config.sql new file mode 100644 index 000000000..d3882e1e2 --- /dev/null +++ b/docker/sql/v2.2.1_0601_add_agent_verification_config.sql @@ -0,0 +1,7 @@ +-- Migration: Add layered ReAct self-verification config to agents +-- Description: Stores per-agent verification controls for step-level and final-answer validation. + +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS verification_config JSONB; + +COMMENT ON COLUMN nexent.ag_tenant_agent_t.verification_config IS 'Layered ReAct self-verification configuration'; diff --git a/docker/sql/v2.2.1_0601_add_preserve_source_file_to_knowledge_record_t.sql b/docker/sql/v2.2.1_0601_add_preserve_source_file_to_knowledge_record_t.sql new file mode 100644 index 000000000..30b588a51 --- /dev/null +++ b/docker/sql/v2.2.1_0601_add_preserve_source_file_to_knowledge_record_t.sql @@ -0,0 +1,8 @@ +-- Migration: Add preserve_source_file to knowledge_record_t table +-- Date: 2026-06-01 +-- Description: Whether to preserve uploaded source documents after vectorization (default: true) + +ALTER TABLE nexent.knowledge_record_t +ADD COLUMN IF NOT EXISTS preserve_source_file BOOLEAN NOT NULL DEFAULT true; + +COMMENT ON COLUMN nexent.knowledge_record_t.preserve_source_file IS 'Whether to preserve uploaded source documents after vectorization'; diff --git a/docker/sql/v2.2.1_0603_add_greeting_fields_to_ag_tenant_agent_t.sql b/docker/sql/v2.2.1_0603_add_greeting_fields_to_ag_tenant_agent_t.sql new file mode 100644 index 000000000..7786bb902 --- /dev/null +++ b/docker/sql/v2.2.1_0603_add_greeting_fields_to_ag_tenant_agent_t.sql @@ -0,0 +1,15 @@ +-- Migration: Add greeting_message and example_questions columns to ag_tenant_agent_t table +-- Date: 2026-06-03 +-- Description: Add greeting message and example questions fields for agent chat initial screen + +-- Add greeting_message column to ag_tenant_agent_t table +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS greeting_message TEXT; + +-- Add example_questions column to ag_tenant_agent_t table +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS example_questions JSONB; + +-- Add comments to the columns +COMMENT ON COLUMN nexent.ag_tenant_agent_t.greeting_message IS 'Agent greeting message displayed on chat initial screen'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.example_questions IS 'List of example questions for starting a conversation with this agent'; \ No newline at end of file diff --git a/docker/sql/v2.2.1_0605_add_ag_agent_repository_t.sql b/docker/sql/v2.2.1_0605_add_ag_agent_repository_t.sql new file mode 100644 index 000000000..d719fc5aa --- /dev/null +++ b/docker/sql/v2.2.1_0605_add_ag_agent_repository_t.sql @@ -0,0 +1,96 @@ +-- Migration: Add ag_agent_repository_t table +-- Date: 2026-06-05 +-- Description: Agent marketplace repository for frozen shareable agent snapshots. + +SET search_path TO nexent; + +BEGIN; + +CREATE SEQUENCE IF NOT EXISTS nexent.ag_agent_repository_t_agent_repository_id_seq; + +CREATE TABLE IF NOT EXISTS nexent.ag_agent_repository_t ( + agent_repository_id BIGINT NOT NULL DEFAULT nextval('nexent.ag_agent_repository_t_agent_repository_id_seq'), + publisher_tenant_id VARCHAR(100) NOT NULL, + publisher_user_id VARCHAR(100) NOT NULL, + agent_id INTEGER NOT NULL, + source_version_no INTEGER NOT NULL, + name VARCHAR(100) NOT NULL, + display_name VARCHAR(100), + description TEXT, + author VARCHAR(100), + category_id INTEGER, + tags TEXT[], + tool_count INTEGER, + version_label VARCHAR(100), + agent_info_json JSONB NOT NULL, + status VARCHAR(30) DEFAULT 'NOT_SHARED', + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N', + CONSTRAINT ag_agent_repository_t_pkey PRIMARY KEY (agent_repository_id) +); + +ALTER SEQUENCE nexent.ag_agent_repository_t_agent_repository_id_seq + OWNED BY nexent.ag_agent_repository_t.agent_repository_id; + +ALTER TABLE nexent.ag_agent_repository_t OWNER TO root; + +COMMENT ON TABLE nexent.ag_agent_repository_t IS 'Agent marketplace repository for frozen shareable agent snapshots'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.agent_repository_id IS 'Agent repository listing ID, unique primary key'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.publisher_tenant_id IS 'Publisher tenant ID'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.publisher_user_id IS 'Publisher user ID'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.agent_id IS 'Root agent ID from ag_tenant_agent_t; upsert key with publisher_tenant_id'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.source_version_no IS 'Published version number frozen at share time'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.name IS 'Root agent programmatic name for display and search'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.display_name IS 'Root agent display name'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.description IS 'Root agent description'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.author IS 'Agent author'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.category_id IS 'Optional marketplace category ID'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.tags IS 'Marketplace tags'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.tool_count IS 'Total tool count across all agents in the bundle (display only)'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.version_label IS 'Repository entry version label for display (e.g. v1.0)'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.agent_info_json IS 'Frozen ExportAndImportDataFormat snapshot with optional skills'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.status IS 'Listing status: NOT_SHARED (未共享) / PENDING_REVIEW (待审核) / REJECTED (审核驳回) / SHARED (已共享)'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.created_by IS 'Creator ID'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.updated_by IS 'Updater ID'; +COMMENT ON COLUMN nexent.ag_agent_repository_t.delete_flag IS 'Soft delete flag: Y/N'; + +CREATE UNIQUE INDEX IF NOT EXISTS uq_agent_repository_tenant_agent_active + ON nexent.ag_agent_repository_t (publisher_tenant_id, agent_id) + WHERE delete_flag = 'N'; + +CREATE INDEX IF NOT EXISTS idx_agent_repository_publisher_delete + ON nexent.ag_agent_repository_t (publisher_tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_agent_repository_status_delete + ON nexent.ag_agent_repository_t (status, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_agent_repository_name_delete + ON nexent.ag_agent_repository_t (name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_agent_repository_tags_gin + ON nexent.ag_agent_repository_t USING GIN (tags); + +CREATE OR REPLACE FUNCTION update_ag_agent_repository_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION update_ag_agent_repository_update_time() IS 'Auto-update update_time for ag_agent_repository_t'; + +DROP TRIGGER IF EXISTS update_ag_agent_repository_update_time_trigger ON nexent.ag_agent_repository_t; +CREATE TRIGGER update_ag_agent_repository_update_time_trigger +BEFORE UPDATE ON nexent.ag_agent_repository_t +FOR EACH ROW +EXECUTE FUNCTION update_ag_agent_repository_update_time(); + +COMMENT ON TRIGGER update_ag_agent_repository_update_time_trigger ON nexent.ag_agent_repository_t IS 'Trigger to maintain update_time'; + +COMMIT; diff --git a/docker/sql/v2.2.1_0609_add_selected_agent_version_no_to_agent_relation_t.sql b/docker/sql/v2.2.1_0609_add_selected_agent_version_no_to_agent_relation_t.sql new file mode 100644 index 000000000..9a67c1ab2 --- /dev/null +++ b/docker/sql/v2.2.1_0609_add_selected_agent_version_no_to_agent_relation_t.sql @@ -0,0 +1,15 @@ +-- Migration: Add selected_agent_version_no to ag_agent_relation_t +-- Date: 2026-06-09 +-- Description: Pin child agent version on parent-child relations at publish time. + +SET search_path TO nexent; + +BEGIN; + +ALTER TABLE nexent.ag_agent_relation_t + ADD COLUMN IF NOT EXISTS selected_agent_version_no INTEGER; + +COMMENT ON COLUMN nexent.ag_agent_relation_t.selected_agent_version_no IS + 'Pinned version of selected_agent_id. NULL = use child current published version at runtime (legacy/draft).'; + +COMMIT; diff --git a/docker/start-monitoring.sh b/docker/start-monitoring.sh index 8cd8561f0..48ca6cd3f 100755 --- a/docker/start-monitoring.sh +++ b/docker/start-monitoring.sh @@ -1,53 +1,420 @@ #!/bin/bash # Nexent LLM Performance Monitoring Setup Script -# This script sets up OpenTelemetry + Jaeger + Prometheus + Grafana for monitoring +# This script starts the OpenTelemetry Collector alone, or with a local +# Phoenix/Langfuse/Grafana/Zipkin observability backend, or forwards to +# online LangSmith. set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" MONITORING_DIR="$SCRIPT_DIR/monitoring" +COMPOSE_FILE="$SCRIPT_DIR/docker-compose-monitoring.yml" -echo "🚀 Starting Nexent LLM Performance Monitoring Setup..." +SUPPORTED_STACKS="otlp, collector, phoenix, langfuse, langsmith, grafana, zipkin" -# Check if Docker is running -if ! docker info > /dev/null 2>&1; then - echo "❌ Error: Docker is not running. Please start Docker first." - exit 1 -fi +usage() { + cat < + $(basename "$0") [stack] + $(basename "$0") [stack] + $(basename "$0") [stack] -# Create external network if it doesn't exist -if ! docker network ls | grep -q nexent-network; then - echo "🔗 Creating nexent-network..." - docker network create nexent-network -else - echo "✅ nexent-network already exists" -fi +Stacks are mutually exclusive. Starting one stack removes containers from the +other monitoring stacks while preserving their data volumes. + +Stacks: + otlp Start OpenTelemetry Collector only. This is the default. + collector Alias for otlp. + phoenix Start Collector and local Arize Phoenix. + langfuse Start Collector and local Langfuse self-host stack. + langsmith Start Collector and forward traces to online LangSmith. + grafana Start Collector, Grafana, and Tempo. + zipkin Start Collector and local Zipkin. + +Actions: + start/up Start the selected stack and stop containers from other stacks. + stop/down Stop and remove containers for the selected stack. Data is kept. + uninstall Stop and remove containers and data volumes for the selected stack. + +Set MONITORING_PROVIDER in monitoring/monitoring.env to change the default stack. +EOF +} + +ACTION="start" +STACK_ARG="" + +set_stack_arg() { + local value="$1" + if [ -n "$STACK_ARG" ] && [ "$STACK_ARG" != "$value" ]; then + echo "❌ Error: multiple monitoring stacks specified: '$STACK_ARG' and '$value'." + usage + exit 1 + fi + STACK_ARG="$value" +} + +while [ $# -gt 0 ]; do + case "$1" in + --stack) + if [ $# -lt 2 ]; then + echo "❌ Error: --stack requires a value." + usage + exit 1 + fi + set_stack_arg "$2" + shift 2 + ;; + --stop|--down) + ACTION="stop" + shift + ;; + --uninstall|--remove) + ACTION="uninstall" + shift + ;; + start|up) + ACTION="start" + shift + ;; + stop|down) + ACTION="stop" + shift + ;; + uninstall|remove) + ACTION="uninstall" + shift + ;; + -h|--help) + usage + exit 0 + ;; + otlp|collector|phoenix|langfuse|langsmith|grafana|zipkin) + set_stack_arg "$1" + shift + ;; + *) + echo "❌ Error: unknown argument '$1'." + usage + exit 1 + ;; + esac +done -# Copy environment file if it doesn't exist -if [ ! -f "$MONITORING_DIR/monitoring.env" ]; then - echo "📋 Creating monitoring.env from example..." - cp "$MONITORING_DIR/monitoring.env.example" "$MONITORING_DIR/monitoring.env" - echo "⚠️ Please review and update $MONITORING_DIR/monitoring.env as needed" +normalize_stack() { + case "$1" in + ""|otlp|collector) + echo "collector" + ;; + phoenix|langfuse|langsmith|grafana|zipkin) + echo "$1" + ;; + *) + echo "❌ Error: unsupported monitoring provider '$1'. Supported: $SUPPORTED_STACKS." >&2 + exit 1 + ;; + esac +} + +if [ -n "$STACK_ARG" ]; then + normalize_stack "$STACK_ARG" > /dev/null fi -# Start monitoring services -echo "🐳 Starting monitoring services..." -docker-compose -f "$SCRIPT_DIR/docker-compose-monitoring.yml" --env-file "$MONITORING_DIR/monitoring.env" up -d +remove_containers() { + if [ "$#" -eq 0 ]; then + return + fi + + local existing=() + local container + for container in "$@"; do + if docker ps -a --format '{{.Names}}' | grep -qx "$container"; then + existing+=("$container") + fi + done -# Wait for services to be ready -echo "⏳ Waiting for services to start..." -sleep 10 + if [ "${#existing[@]}" -gt 0 ]; then + docker rm -f "${existing[@]}" > /dev/null + echo "🧹 Removed containers: ${existing[*]}" + fi +} -# Check service health with timeout -echo "🔍 Checking service health..." +remove_volumes() { + if [ "$#" -eq 0 ]; then + return + fi + + local existing=() + local volume + for volume in "$@"; do + if docker volume ls --format '{{.Name}}' | grep -qx "$volume"; then + existing+=("$volume") + fi + done + + if [ "${#existing[@]}" -gt 0 ]; then + docker volume rm "${existing[@]}" > /dev/null + echo "🧹 Removed volumes: ${existing[*]}" + fi +} + +stack_containers() { + case "$1" in + collector|langsmith) + echo "nexent-otel-collector" + ;; + phoenix) + echo "nexent-otel-collector nexent-phoenix" + ;; + langfuse) + echo "nexent-otel-collector nexent-langfuse-worker nexent-langfuse-web nexent-langfuse-clickhouse nexent-langfuse-minio nexent-langfuse-redis nexent-langfuse-postgres" + ;; + grafana) + echo "nexent-otel-collector nexent-grafana nexent-tempo" + ;; + zipkin) + echo "nexent-otel-collector nexent-zipkin" + ;; + esac +} + +stack_data_volumes() { + case "$1" in + phoenix) + echo "monitor_phoenix-data" + ;; + langfuse) + echo "monitor_langfuse-postgres-data monitor_langfuse-clickhouse-data monitor_langfuse-clickhouse-logs monitor_langfuse-minio-data monitor_langfuse-redis-data" + ;; + grafana) + echo "monitor_grafana-data monitor_tempo-data" + ;; + collector|langsmith|zipkin) + echo "" + ;; + esac +} + +all_backend_containers() { + echo "nexent-phoenix nexent-langfuse-worker nexent-langfuse-web nexent-langfuse-clickhouse nexent-langfuse-minio nexent-langfuse-redis nexent-langfuse-postgres nexent-grafana nexent-tempo nexent-zipkin" +} + +incompatible_containers() { + local stack="$1" + local containers + containers="$(all_backend_containers)" + case "$stack" in + phoenix) + echo "$containers" | sed 's/nexent-phoenix//g' + ;; + langfuse) + echo "$containers" | sed 's/nexent-langfuse-worker//g; s/nexent-langfuse-web//g; s/nexent-langfuse-clickhouse//g; s/nexent-langfuse-minio//g; s/nexent-langfuse-redis//g; s/nexent-langfuse-postgres//g' + ;; + grafana) + echo "$containers" | sed 's/nexent-grafana//g; s/nexent-tempo//g' + ;; + zipkin) + echo "$containers" | sed 's/nexent-zipkin//g' + ;; + collector|langsmith) + echo "$containers" + ;; + esac +} + +configure_stack() { + MONITORING_PROVIDER="${STACK_ARG:-${MONITORING_PROVIDER:-otlp}}" + LOCAL_STACK="$(normalize_stack "$MONITORING_PROVIDER")" + + case "$LOCAL_STACK" in + collector) + BACKEND_MONITORING_PROVIDER="otlp" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-config.yml}" + COMPOSE_PROFILES=() + ;; + phoenix) + BACKEND_MONITORING_PROVIDER="phoenix" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-phoenix-config.yml}" + COMPOSE_PROFILES=(--profile phoenix) + ;; + langfuse) + BACKEND_MONITORING_PROVIDER="langfuse" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-langfuse-config.yml}" + COMPOSE_PROFILES=(--profile langfuse) + LANGFUSE_INIT_PROJECT_PUBLIC_KEY="${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-pk-lf-nexent-local}" + LANGFUSE_INIT_PROJECT_SECRET_KEY="${LANGFUSE_INIT_PROJECT_SECRET_KEY:-sk-lf-nexent-local}" + if [ -z "${LANGFUSE_OTLP_AUTH_HEADER:-}" ]; then + LANGFUSE_OTLP_AUTH_HEADER="Basic $(printf "%s:%s" "$LANGFUSE_INIT_PROJECT_PUBLIC_KEY" "$LANGFUSE_INIT_PROJECT_SECRET_KEY" | base64 | tr -d '\n')" + fi + export LANGFUSE_OTLP_AUTH_HEADER + ;; + langsmith) + BACKEND_MONITORING_PROVIDER="langsmith" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-langsmith-config.yml}" + COMPOSE_PROFILES=() + LANGSMITH_OTLP_TRACES_ENDPOINT="${LANGSMITH_OTLP_TRACES_ENDPOINT:-https://api.smith.langchain.com/otel/v1/traces}" + LANGSMITH_PROJECT="${LANGSMITH_PROJECT:-nexent}" + if [ "$ACTION" = "start" ] && [ -z "${LANGSMITH_API_KEY:-}" ]; then + echo "❌ Error: LANGSMITH_API_KEY is required for the langsmith stack." + echo " Set it in $MONITORING_DIR/monitoring.env or export it before running this script." + exit 1 + fi + export LANGSMITH_API_KEY LANGSMITH_PROJECT LANGSMITH_OTLP_TRACES_ENDPOINT + ;; + grafana) + BACKEND_MONITORING_PROVIDER="grafana" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-grafana-config.yml}" + COMPOSE_PROFILES=(--profile grafana) + ;; + zipkin) + BACKEND_MONITORING_PROVIDER="zipkin" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-zipkin-config.yml}" + COMPOSE_PROFILES=(--profile zipkin) + ;; + esac + export OTEL_COLLECTOR_CONFIG_FILE +} + +dashboard_url() { + case "$LOCAL_STACK" in + phoenix) + echo "http://localhost:${PHOENIX_PORT:-6006}" + ;; + langfuse) + echo "http://localhost:${LANGFUSE_PORT:-3001}" + ;; + langsmith) + echo "https://smith.langchain.com/" + ;; + grafana) + echo "http://localhost:${GRAFANA_PORT:-3002}/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1" + ;; + zipkin) + echo "http://localhost:${ZIPKIN_PORT:-9411}" + ;; + collector) + echo "" + ;; + esac +} + +print_access_hints() { + local dashboard + dashboard="$(dashboard_url)" + + echo "" + echo "📊 Access your monitoring tools:" + echo " • OTLP HTTP receiver: http://localhost:${OTEL_COLLECTOR_HTTP_PORT:-4318}" + echo " • OTLP gRPC receiver: localhost:${OTEL_COLLECTOR_GRPC_PORT:-4317}" + echo " • Docker backend endpoint: http://otel-collector:4318" + + case "$LOCAL_STACK" in + phoenix) + echo " • Phoenix UI: $dashboard" + echo " • Phoenix direct gRPC ingest: localhost:${PHOENIX_GRPC_HOST_PORT:-4319}" + ;; + langfuse) + echo " • Langfuse UI: $dashboard" + echo " • Langfuse admin: ${LANGFUSE_INIT_USER_EMAIL:-admin@nexent.com} / ${LANGFUSE_INIT_USER_PASSWORD:-nexent@4321}" + echo " • Langfuse project keys: ${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-pk-lf-nexent-local} / ${LANGFUSE_INIT_PROJECT_SECRET_KEY:-sk-lf-nexent-local}" + echo " • MinIO API: http://localhost:${LANGFUSE_MINIO_API_PORT:-9092}" + echo " • MinIO console: http://localhost:${LANGFUSE_MINIO_CONSOLE_PORT:-9093}" + ;; + langsmith) + echo " • LangSmith UI: $dashboard" + echo " • LangSmith project: ${LANGSMITH_PROJECT:-nexent}" + echo " • LangSmith OTLP traces endpoint: ${LANGSMITH_OTLP_TRACES_ENDPOINT:-https://api.smith.langchain.com/otel/v1/traces}" + echo " • No local LangSmith UI is started; open the hosted UI and select the project above." + ;; + grafana) + echo " • Grafana dashboard: $dashboard" + echo " • Grafana home: http://localhost:${GRAFANA_PORT:-3002}" + echo " • Grafana admin: ${GRAFANA_ADMIN_USER:-admin} / ${GRAFANA_ADMIN_PASSWORD:-nexent@4321}" + echo " • Tempo API: http://localhost:${TEMPO_PORT:-3200}" + ;; + zipkin) + echo " • Zipkin UI: $dashboard" + ;; + collector) + echo " • Collector-only mode has no monitoring UI." + echo " • View Collector logs: docker logs -f nexent-otel-collector" + echo " • Configure Phoenix, Langfuse, LangSmith, Grafana/Tempo, Zipkin, or another OTLP backend when you need a UI." + ;; + esac + + echo "" + echo "🔗 Frontend monitoring entry:" + if [ -n "$dashboard" ]; then + echo " Set MONITORING_DASHBOARD_URL=$dashboard" + else + echo " Leave MONITORING_DASHBOARD_URL empty to hide the monitoring entry." + fi +} + +print_backend_hints() { + echo "" + echo "🔧 To enable monitoring in your Nexent backend:" + echo " 1. Set ENABLE_TELEMETRY=true in docker/.env" + echo " 2. Set MONITORING_PROVIDER=$BACKEND_MONITORING_PROVIDER in docker/.env" + echo " 3. Set OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 for Docker services" + echo " or http://localhost:${OTEL_COLLECTOR_HTTP_PORT:-4318} for a backend running on the host" + echo " 4. Set MONITORING_DASHBOARD_URL as shown above when a UI is available" + echo " 5. Install performance dependencies:" + echo " uv sync --extra performance" + echo " 6. Restart your Nexent backend service" +} + +print_uninstall_hints() { + echo "" + echo "🛑 Stop or uninstall this monitoring stack:" + echo " • Stop containers and keep data:" + echo " $(basename "$0") stop $LOCAL_STACK" + echo " • Remove containers and this stack's data volumes:" + echo " $(basename "$0") uninstall $LOCAL_STACK" + echo "" + echo " Stacks are mutually exclusive; do not run multiple monitoring providers in parallel." +} + +load_env_for_start() { + if [ ! -f "$MONITORING_DIR/monitoring.env" ]; then + echo "📋 Creating monitoring.env from example..." + cp "$MONITORING_DIR/monitoring.env.example" "$MONITORING_DIR/monitoring.env" + echo "⚠️ Please review and update $MONITORING_DIR/monitoring.env as needed" + fi + + set -a + # shellcheck disable=SC1091 + . "$MONITORING_DIR/monitoring.env" + set +a +} + +load_env_if_present() { + if [ -f "$MONITORING_DIR/monitoring.env" ]; then + set -a + # shellcheck disable=SC1091 + . "$MONITORING_DIR/monitoring.env" + set +a + fi +} + +resolve_compose_cmd() { + if docker compose version > /dev/null 2>&1; then + COMPOSE_CMD=(docker compose) + elif command -v docker-compose > /dev/null 2>&1; then + COMPOSE_CMD=(docker-compose) + else + echo "❌ Error: Docker Compose is not installed." + exit 1 + fi +} -# Function to check service health with timeout check_service() { local name=$1 local url=$2 local port=$3 - + if curl -s --max-time 5 --connect-timeout 3 "$url" > /dev/null 2>&1; then echo "✅ $name is running at http://localhost:$port" return 0 @@ -57,33 +424,123 @@ check_service() { fi } -# Check Jaeger -check_service "Jaeger" "http://localhost:16686/api/services" "16686" || true - -# Check Prometheus -check_service "Prometheus" "http://localhost:9090/-/healthy" "9090" || true - -# Check Grafana -check_service "Grafana" "http://localhost:3005/api/health" "3005" || true - -echo "" -echo "🎉 Monitoring setup complete!" -echo "" -echo "📊 Access your monitoring tools:" -echo " • Jaeger UI: http://localhost:16686" -echo " • Prometheus: http://localhost:9090" -echo " • Grafana: http://localhost:3005 (admin/admin)" -echo "" -echo "🔧 To enable monitoring in your Nexent backend:" -echo " 1. Set ENABLE_TELEMETRY=true in your .env file" -echo " 2. Install performance dependencies:" -echo " uv sync --extra performance" -echo " 3. Restart your Nexent backend service" -echo "" -echo "📈 Key Metrics to Monitor:" -echo " • Token Generation Rate (tokens/second)" -echo " • Time to First Token (TTFT)" -echo " • Request Duration" -echo " • Error Rates" -echo "" -echo "🛑 To stop monitoring services: docker-compose -f docker-compose-monitoring.yml down" +check_stack_health() { + echo "🔍 Checking service health..." + check_service "OpenTelemetry Collector HTTP receiver" "http://localhost:${OTEL_COLLECTOR_HTTP_PORT:-4318}" "${OTEL_COLLECTOR_HTTP_PORT:-4318}" || true + + case "$LOCAL_STACK" in + phoenix) + check_service "Phoenix UI" "http://localhost:${PHOENIX_PORT:-6006}" "${PHOENIX_PORT:-6006}" || true + ;; + langfuse) + check_service "Langfuse UI" "http://localhost:${LANGFUSE_PORT:-3001}" "${LANGFUSE_PORT:-3001}" || true + ;; + langsmith) + echo "✅ LangSmith forwarding is configured for project: ${LANGSMITH_PROJECT:-nexent}" + ;; + grafana) + check_service "Grafana" "http://localhost:${GRAFANA_PORT:-3002}/api/health" "${GRAFANA_PORT:-3002}" || true + check_service "Tempo API" "http://localhost:${TEMPO_PORT:-3200}/ready" "${TEMPO_PORT:-3200}" || true + ;; + zipkin) + check_service "Zipkin UI" "http://localhost:${ZIPKIN_PORT:-9411}" "${ZIPKIN_PORT:-9411}" || true + ;; + esac +} + +start_stack() { + echo "🚀 Starting Nexent LLM Performance Monitoring Setup..." + + if ! docker info > /dev/null 2>&1; then + echo "❌ Error: Docker is not running. Please start Docker first." + exit 1 + fi + + resolve_compose_cmd + + if ! docker network ls --format '{{.Name}}' | grep -qx nexent_network; then + echo "🔗 Creating nexent_network..." + docker network create nexent_network + else + echo "✅ nexent_network already exists" + fi + + load_env_for_start + configure_stack + + local incompatible + incompatible="$(incompatible_containers "$LOCAL_STACK")" + if [ -n "$incompatible" ]; then + # shellcheck disable=SC2086 + remove_containers $incompatible + fi + + echo "🐳 Starting monitoring services with provider: $MONITORING_PROVIDER" + echo " Selected stack: $LOCAL_STACK" + "${COMPOSE_CMD[@]}" -f "$COMPOSE_FILE" --env-file "$MONITORING_DIR/monitoring.env" "${COMPOSE_PROFILES[@]}" up -d --remove-orphans + + echo "⏳ Waiting for services to start..." + sleep 10 + check_stack_health + + echo "" + echo "🎉 Monitoring setup complete!" + print_access_hints + print_backend_hints + echo "" + echo "🔎 Key Trace Data to Inspect:" + echo " • Agent span hierarchy" + echo " • LLM generation spans" + echo " • Retriever and memory spans" + echo " • Tool call spans" + echo " • Error events" + print_uninstall_hints +} + +stop_or_uninstall_stack() { + local remove_data="$1" + + if ! docker info > /dev/null 2>&1; then + echo "❌ Error: Docker is not running. Please start Docker first." + exit 1 + fi + + load_env_if_present + configure_stack + + local containers + containers="$(stack_containers "$LOCAL_STACK")" + echo "🛑 Removing monitoring containers for stack: $LOCAL_STACK" + # shellcheck disable=SC2086 + remove_containers $containers + + if [ "$remove_data" = "true" ]; then + local volumes + volumes="$(stack_data_volumes "$LOCAL_STACK")" + if [ -n "$volumes" ]; then + echo "🧹 Removing data volumes for stack: $LOCAL_STACK" + # shellcheck disable=SC2086 + remove_volumes $volumes + else + echo "ℹ️ Stack '$LOCAL_STACK' has no dedicated local data volumes." + fi + echo "✅ Monitoring stack '$LOCAL_STACK' has been uninstalled." + else + echo "✅ Monitoring stack '$LOCAL_STACK' has been stopped. Data volumes were kept." + fi + + echo "" + echo "ℹ️ The shared Docker network 'nexent_network' is kept because it is also used by Nexent services." +} + +case "$ACTION" in + start) + start_stack + ;; + stop) + stop_or_uninstall_stack false + ;; + uninstall) + stop_or_uninstall_stack true + ;; +esac diff --git a/docker/uninstall.sh b/docker/uninstall.sh old mode 100644 new mode 100755 index a37ec3bf9..801a9f4f7 --- a/docker/uninstall.sh +++ b/docker/uninstall.sh @@ -1,13 +1,240 @@ #!/bin/bash -docker rm -f nexent -docker rm -f nexent-postgresql -docker rm -f nexent-minio -docker rm -f nexent-elasticsearch -docker rm -f nexent-data-process -docker rm -f nexent-web -docker rm -f nexent-redis -docker rm -f supabase-kong-mini -docker rm -f supabase-auth-mini -docker rm -f supabase-db-mini -docker network rm nexent_nexent \ No newline at end of file +if [ -z "$BASH_VERSION" ]; then + echo "❌ This script must be run with bash. Please use: bash uninstall.sh or ./uninstall.sh" + exit 1 +fi + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +DELETE_VOLUMES="" + +print_usage() { + echo "Usage: $0 [delete-all] [options]" + echo "" + echo "Uninstall Docker deployment for Nexent." + echo "" + echo "Options:" + echo " --delete-volumes true|false Control whether persistent data is removed" + echo " --remove-volumes Alias for --delete-volumes true" + echo " --keep-volumes Alias for --delete-volumes false" + echo " --help, -h Show this help message" + echo "" + echo "Examples:" + echo " bash uninstall.sh" + echo " bash uninstall.sh --delete-volumes false" + echo " bash uninstall.sh --delete-volumes true" + echo " bash uninstall.sh delete-all" +} + +sanitize_input() { + local input="$1" + printf "%s" "$input" | tr -d '\r' +} + +parse_bool_option() { + local value + value="$(sanitize_input "${1:-}")" + case "$value" in + true|TRUE|True|yes|YES|Yes|y|Y|1) return 0 ;; + false|FALSE|False|no|NO|No|n|N|0) return 1 ;; + *) + echo "❌ Invalid boolean value: $value. Use true or false." + exit 1 + ;; + esac +} + +while [[ $# -gt 0 ]]; do + case "$1" in + delete-all) + DELETE_VOLUMES="true" + shift + ;; + --delete-volumes) + DELETE_VOLUMES="$2" + shift 2 + ;; + --remove-volumes) + DELETE_VOLUMES="true" + shift + ;; + --keep-volumes) + DELETE_VOLUMES="false" + shift + ;; + --help|-h) + print_usage + exit 0 + ;; + *) + echo "❌ Unknown option: $1" + print_usage + exit 1 + ;; + esac +done + +if [ -f ".env" ]; then + set -a + # shellcheck source=/dev/null + source .env + set +a +fi + +if [ -f ".env.generated" ]; then + set -a + # shellcheck source=/dev/null + source .env.generated + set +a +fi + +get_compose_version() { + if command -v docker &> /dev/null; then + local version_output + version_output=$(docker compose version 2>/dev/null) + if [[ $version_output =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then + echo "v2 ${BASH_REMATCH[1]}" + return 0 + fi + fi + + if command -v docker-compose &> /dev/null; then + local version_output + version_output=$(docker-compose --version 2>/dev/null) + if [[ $version_output =~ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then + echo "v1 ${BASH_REMATCH[1]}" + return 0 + fi + fi + + echo "unknown" + return 0 +} + +resolve_compose_command() { + local version_info + version_info="$(get_compose_version)" + if [[ $version_info == "unknown" ]]; then + echo "❌ Docker Compose not found or version detection failed" + exit 1 + fi + + local version_type version_number + version_type="$(echo "$version_info" | awk '{print $1}')" + version_number="$(echo "$version_info" | awk '{print $2}')" + + case "$version_type" in + v1) + if [[ $version_number < "1.28.0" ]]; then + echo "❌ Docker Compose V1 version is too old; please upgrade to V1.28.0+ or V2." + exit 1 + fi + docker_compose_command="docker-compose" + ;; + v2) + docker_compose_command="docker compose" + ;; + *) + echo "❌ Unknown Docker Compose version type: $version_type" + exit 1 + ;; + esac +} + +resolve_delete_volumes() { + if [ -n "$DELETE_VOLUMES" ]; then + parse_bool_option "$DELETE_VOLUMES" + return $? + fi + + [ -t 0 ] || return 1 + + echo "" + echo "🧹 Delete Docker volumes and Nexent data directories?" + echo " This removes persistent data under ROOT_DIR, including elasticsearch, postgresql, redis, minio, scripts, and supabase volumes." + local answer + read -r -p " Delete data volumes? [y/N]: " answer + answer="$(sanitize_input "$answer")" + [[ "$answer" =~ ^[Yy]$ ]] +} + +docker_compose_down_file() { + local compose_file="$1" + local use_project_name="$2" + local remove_volumes="$3" + + [ -f "$compose_file" ] || return 0 + + local volume_args=() + if [ "$remove_volumes" = "true" ]; then + volume_args=(-v) + fi + + if [ "$use_project_name" = "true" ]; then + $docker_compose_command -p nexent -f "$compose_file" down --remove-orphans "${volume_args[@]}" || true + else + $docker_compose_command -f "$compose_file" down --remove-orphans "${volume_args[@]}" || true + fi +} + +remove_nexent_data_dirs() { + local root_dir="${ROOT_DIR:-$HOME/nexent-data}" + root_dir="${root_dir%/}" + + if [ -z "$root_dir" ] || [ "$root_dir" = "/" ]; then + echo "❌ Refusing to remove unsafe ROOT_DIR: ${root_dir:-}" + return 1 + fi + + local dirs=( + "$root_dir/elasticsearch" + "$root_dir/postgresql" + "$root_dir/redis" + "$root_dir/minio" + "$root_dir/volumes" + "$root_dir/openssh-server" + "$root_dir/scripts" + ) + + local dir + for dir in "${dirs[@]}"; do + if [ -e "$dir" ]; then + echo "🧹 Removing data directory: $dir" + rm -rf "$dir" + fi + done +} + +main() { + local remove_volumes="false" + if resolve_delete_volumes; then + remove_volumes="true" + fi + + resolve_compose_command + + echo "🛑 Stopping and removing Docker deployment..." + if [ "$remove_volumes" = "true" ]; then + echo "⚠️ Data volumes will be deleted." + else + echo "ℹ️ Data volumes will be preserved." + fi + + docker_compose_down_file "docker-compose-monitoring.yml" false "$remove_volumes" + docker_compose_down_file "docker-compose-supabase.prod.yml" true "$remove_volumes" + docker_compose_down_file "docker-compose-supabase.yml" true "$remove_volumes" + docker_compose_down_file "docker-compose.prod.yml" true "$remove_volumes" + docker_compose_down_file "docker-compose.yml" true "$remove_volumes" + + if [ "$remove_volumes" = "true" ]; then + remove_nexent_data_dirs + fi + + echo "✅ Docker deployment removed." +} + +main diff --git a/frontend/app/[locale]/agents/AgentVersionCard.tsx b/frontend/app/[locale]/agents/AgentVersionCard.tsx index 5eaa0e1e0..4ef6f052e 100644 --- a/frontend/app/[locale]/agents/AgentVersionCard.tsx +++ b/frontend/app/[locale]/agents/AgentVersionCard.tsx @@ -39,11 +39,13 @@ import type { Agent, Tool } from "@/types/agentConfig"; import { useToolList } from "@/hooks/agent/useToolList"; import { useAgentList } from "@/hooks/agent/useAgentList"; import { useAgentVersionList } from "@/hooks/agent/useAgentVersionList"; -import { useAgentInfo } from "@/hooks/agent/useAgentInfo"; import { useAgentVersionDetail } from "@/hooks/agent/useAgentVersionDetail"; import { rollbackVersion, compareVersions, deleteVersion } from "@/services/agentVersionService"; +import { searchAgentInfo } from "@/services/agentConfigService"; +import { useAgentConfigStore } from "@/stores/agentConfigStore"; import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider"; import log from "@/lib/logger"; +import { resolveAgentListTenantKey } from "@/lib/agentListTenant"; import { message } from "antd"; import { useQueryClient } from "@tanstack/react-query"; import AgentVersionCompareModal from "./versions/AgentVersionCompareModal"; @@ -139,7 +141,6 @@ export function VersionCardItem({ // Get invalidate functions for refreshing data const { agentVersionList, invalidate: invalidateAgentVersionList } = useAgentVersionList(agentId); - const { invalidate: invalidateAgentInfo } = useAgentInfo(agentId); // Fetch version detail when expanded const { agentVersionDetail } = useAgentVersionDetail( @@ -148,7 +149,7 @@ export function VersionCardItem({ ); const { tools: toolList } = useToolList(); - const { agents: agentList } = useAgentList(user?.tenantId ?? null); + const { agents: agentList } = useAgentList(""); // Get current agent's permission from agent list const currentAgent = useMemo(() => { @@ -246,8 +247,18 @@ export function VersionCardItem({ message.success(t("agent.version.rollbackSuccess")); setCompareModalOpen(false); invalidateAgentVersionList?.(); - invalidateAgentInfo?.(); + queryClient.invalidateQueries({ queryKey: ["agentInfo", agentId] }); queryClient.invalidateQueries({ queryKey: ["agents"] }); + + // Refresh agent detail and sync to Zustand store + const store = useAgentConfigStore.getState(); + if (store.currentAgentId === agentId) { + const agentResult = await searchAgentInfo(agentId); + if (agentResult.success && agentResult.data) { + store.setCurrentAgent(agentResult.data); + store.triggerForceRefresh(); + } + } } else { message.error(result.message || t("agent.version.rollbackError")); } @@ -282,7 +293,7 @@ export function VersionCardItem({ message.success(t("agent.version.deleteSuccess")); setDeleteModalOpen(false); invalidateAgentVersionList?.(); - invalidateAgentInfo?.(); + queryClient.invalidateQueries({ queryKey: ["agentInfo", agentId] }); queryClient.invalidateQueries({ queryKey: ["agents"] }); } else { message.error(result.message || t("agent.version.deleteError")); @@ -579,6 +590,7 @@ export function VersionCardItem({ initialValues={{ version_name: version.version_name, release_note: version.release_note, + is_a2a: version.is_a2a, }} onUpdated={() => { // Refresh version list using the proper invalidate function diff --git a/frontend/app/[locale]/agents/components/AgentConfigComp.tsx b/frontend/app/[locale]/agents/components/AgentConfigComp.tsx index 3a60e146d..1e750d5eb 100644 --- a/frontend/app/[locale]/agents/components/AgentConfigComp.tsx +++ b/frontend/app/[locale]/agents/components/AgentConfigComp.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useCallback, useEffect } from "react"; +import { useState, useCallback } from "react"; import { useTranslation } from "react-i18next"; import { App, Button, Row, Col, Flex, Tooltip, Badge, Divider } from "antd"; import CollaborativeAgent from "./agentConfig/CollaborativeAgent"; @@ -12,12 +12,12 @@ import { updateToolList } from "@/services/mcpService"; import { useAgentConfigStore } from "@/stores/agentConfigStore"; import { useToolList } from "@/hooks/agent/useToolList"; import { useSkillList } from "@/hooks/agent/useSkillList"; -import { useAgentSkillInstances } from "@/hooks/agent/useAgentSkillInstances"; import { useExternalAgents } from "@/hooks/agent/useExternalAgents"; import McpConfigModal from "./agentConfig/McpConfigModal"; import A2AAgentDiscoveryModal from "./a2a/A2AAgentDiscoveryModal"; import { RefreshCw, Lightbulb, Plug, BlocksIcon, Globe } from "lucide-react"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; interface AgentConfigCompProps {} @@ -28,26 +28,21 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { // Get state from store const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); + const isReadOnly = useAgentConfigStore((state) => state.isReadOnly()); + const selectedTools = useAgentConfigStore((state) => state.editedAgent.tools); + const selectedSkills = useAgentConfigStore((state) => state.editedAgent.skills); const [isMcpModalOpen, setIsMcpModalOpen] = useState(false); const [isSkillModalOpen, setIsSkillModalOpen] = useState(false); const [isRefreshing, setIsRefreshing] = useState(false); const [isRefreshingSkill, setIsRefreshingSkill] = useState(false); const [showA2ADiscovery, setShowA2ADiscovery] = useState(false); + const showLegacyMcpConfig = false; + + // Use tool list hook for data management const { groupedTools, invalidate } = useToolList(); const { groupedSkills, invalidate: invalidateSkills } = useSkillList(); - const { skillInstances, invalidate: invalidateSkillInstances } = useAgentSkillInstances( - currentAgentId ?? null - ); const { invalidate: invalidateExternalAgents } = useExternalAgents(); - const setInitialSkills = useAgentConfigStore((state) => state.setInitialSkills); - - // Load skill instances when agent changes - useEffect(() => { - if (currentAgentId && skillInstances.length > 0) { - setInitialSkills(skillInstances); - } - }, [currentAgentId, skillInstances, setInitialSkills]); const handleRefreshTools = useCallback(async () => { setIsRefreshing(true); @@ -72,21 +67,17 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { setIsRefreshingSkill(true); try { invalidateSkills(); - invalidateSkillInstances(); message.success(t("skillManagement.message.refreshSuccess")); } catch (error) { message.error(t("skillManagement.message.refreshFailed")); } finally { setIsRefreshingSkill(false); } - }, [invalidateSkills, invalidateSkillInstances]); + }, [invalidateSkills]); const handleSkillBuildSuccess = useCallback(() => { invalidateSkills(); - if (currentAgentId) { - invalidateSkillInstances(); - } - }, [invalidateSkills, invalidateSkillInstances, currentAgentId]); + }, [invalidateSkills]); return ( <> @@ -95,15 +86,15 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { - -

{t("businessLogic.config.title")}

+ +

{t("businessLogic.config.title")}

- +

{t("collaborativeAgent.title")}

@@ -116,7 +107,6 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { size="small" icon={} onClick={() => setShowA2ADiscovery(true)} - loading={isRefreshing} className="text-green-500 hover:!text-green-600 hover:!bg-green-50" title={t("toolManagement.refresh.title")} > @@ -126,118 +116,133 @@ export default function AgentConfigComp({}: AgentConfigCompProps) {
- + - - - -

{t("toolPool.title")}

- {t("toolPool.tooltip.functionGuide")}} - color="#ffffff" - styles={{ - root: { - backgroundColor: "#ffffff", - border: "1px solid #e5e7eb", - borderRadius: "6px", - boxShadow: "0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)", - maxWidth: "800px", - minWidth: "700px", - width: "fit-content", - }, - }} - > - - -
- - - - - - - -
- + {/* Tool/Skill Tabs */} + + + + + {t("toolPool.title")} + {selectedTools.length > 0 && ( + + )} + + {t("toolPool.tooltip.functionGuide")}} + color="#ffffff" + styles={{ + root: { + backgroundColor: "#ffffff", + border: "1px solid #e5e7eb", + borderRadius: "6px", + boxShadow: "0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)", + maxWidth: "800px", + minWidth: "700px", + width: "fit-content", + }, + }} + > + + + + + + {t("skillPool.title")} + {selectedSkills && selectedSkills.length > 0 && ( + + )} + + + - - - - - + - - - -

{t("skillPool.title")}

-
- - - - - - - -
+ + + + + + + + - + + + + + +
- - - - - + + + + + + + + + + + + + + + + +
setIsMcpModalOpen(false)} /> diff --git a/frontend/app/[locale]/agents/components/AgentInfoComp.tsx b/frontend/app/[locale]/agents/components/AgentInfoComp.tsx index 9a9cd37c0..b49842fb7 100644 --- a/frontend/app/[locale]/agents/components/AgentInfoComp.tsx +++ b/frontend/app/[locale]/agents/components/AgentInfoComp.tsx @@ -16,22 +16,12 @@ import { useAgentVersionDetail } from "@/hooks/agent/useAgentVersionDetail"; import { useAgentInfo } from "@/hooks/agent/useAgentInfo"; import AgentVersionPubulishModal from "../versions/AgentVersionPubulishModal"; -export interface AgentInfoCompProps { - isShowVersionManagePanel: boolean; - openVersionManagePanel: () => void; - closeVersionManagementPanel: () => void; -} - -export default function AgentInfoComp({ - isShowVersionManagePanel, - openVersionManagePanel, - closeVersionManagementPanel, -}: AgentInfoCompProps) { +export default function AgentInfoComp() { const { t } = useTranslation("common"); const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); - const currentAgentPermission = useAgentConfigStore((state) => state.currentAgentPermission); const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); + const isGenerating = useAgentConfigStore((state) => state.isGenerating); const isPanelActive = (currentAgentId != null && currentAgentId != undefined) || isCreatingMode; const { agentVersionList, total, invalidate: invalidateAgentVersionList } = useAgentVersionList(currentAgentId); @@ -42,8 +32,7 @@ export default function AgentInfoComp({ currentAgentId, agentInfo?.current_version_no ); - const isReadOnly = isPanelActive && !isCreatingMode && currentAgentPermission === "READ_ONLY"; - const isEditable = isPanelActive && !isReadOnly; + const isReadOnly = useAgentConfigStore((state) => state.isReadOnly()); // Save guard hook const saveGuard = useSaveGuard(); @@ -51,13 +40,14 @@ export default function AgentInfoComp({ // Debug drawer state const [isDebugDrawerOpen, setIsDebugDrawerOpen] = useState(false); - // Generation state shared with AgentGenerateDetail - const [isGenerating, setIsGenerating] = useState(false); - const [isPublishModalOpen, setIsPublishModalOpen] = useState(false); const handlePublishClick = () => { - setIsPublishModalOpen(true); + saveGuard.saveWithModal().then((success) => { + if (success) { + setIsPublishModalOpen(true); + } + }); }; const handlePublished = () => { @@ -79,54 +69,21 @@ export default function AgentInfoComp({ className="w-full" > - -

+ +

{t("guide.steps.describeBusinessLogic.title")}

- - {!isCreatingMode && agentInfo?.current_version_no !== 0 && total > 0 && ( - - - - - - - {t("agent.version.currentVersion")} : - - {agentVersionDetail?.version.version_name} - - - {t("agent.version.totalVersions", { count: total ?? 0 })} - - - - - )} - + diff --git a/frontend/app/[locale]/agents/components/AgentManageComp.tsx b/frontend/app/[locale]/agents/components/AgentManageComp.tsx index c636486ab..7dabff4dd 100644 --- a/frontend/app/[locale]/agents/components/AgentManageComp.tsx +++ b/frontend/app/[locale]/agents/components/AgentManageComp.tsx @@ -7,20 +7,22 @@ import { FileInput, Plus, X } from "lucide-react"; import AgentList from "./agentManage/AgentList"; import { useAgentConfigStore } from "@/stores/agentConfigStore"; -import { importAgent } from "@/services/agentConfigService"; -import { useMutation, useQueryClient } from "@tanstack/react-query"; import { useAgentList } from "@/hooks/agent/useAgentList"; import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider"; import log from "@/lib/logger"; import { useState } from "react"; -import { ImportAgentData } from "@/hooks/useAgentImport"; +import { + parseAgentImportFile, + selectFile, + type ImportAgentData, +} from "@/lib/agentImportUtils"; import AgentImportWizard from "@/components/agent/AgentImportWizard"; export default function AgentManageComp() { const { t } = useTranslation("common"); const { message } = App.useApp(); - const { user } = useAuthorizationContext(); + useAuthorizationContext(); // Get state from store const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); @@ -32,51 +34,27 @@ export default function AgentManageComp() { const [importWizardData, setImportWizardData] = useState(null); - // Shared agent list via React Query - const { agents: agentList, isLoading: loading, refetch } = useAgentList(user?.tenantId ?? null); + // Always resolve tenant from auth on the agent dev page (matches published_list; avoids stale/wrong tenant_id query params) + const { agents: agentList, isLoading: loading, refetch } = useAgentList(""); // Handle import agent for space view - open wizard instead of direct import - const handleImportAgent = () => { - const fileInput = document.createElement("input"); - fileInput.type = "file"; - fileInput.accept = ".json"; - fileInput.onchange = async (event) => { - const file = (event.target as HTMLInputElement).files?.[0]; - if (!file) return; - - if (!file.name.endsWith(".json")) { - message.error(t("businessLogic.config.error.invalidFileType")); - return; - } - - try { - // Read and parse file - const fileContent = await file.text(); - let agentData: ImportAgentData; - - try { - agentData = JSON.parse(fileContent); - } catch (parseError) { - message.error(t("businessLogic.config.error.invalidFileType")); - return; - } - - // Validate structure - if (!agentData.agent_id || !agentData.agent_info) { - message.error(t("businessLogic.config.error.invalidFileType")); - return; - } - - // Open wizard with parsed data - setImportWizardData(agentData); - setImportWizardVisible(true); - } catch (error) { + const handleImportAgent = async () => { + const file = await selectFile(".json"); + if (!file) return; + + const agentData = await parseAgentImportFile(file, { + onParseError: (msgKey) => message.error(t(msgKey)), + onValidationError: (msgKey) => message.error(t(msgKey)), + onGenericError: (error) => { log.error("Failed to read import file:", error); message.error(t("businessLogic.config.error.agentImportFailed")); - } - }; + }, + }); - fileInput.click(); + if (!agentData) return; + + setImportWizardData(agentData); + setImportWizardVisible(true); }; return ( @@ -160,7 +138,7 @@ export default function AgentManageComp() {
void handleImportAgent()} > void; + isShowVersionManagePanel?: boolean; + onCloseVersionManagePanel?: () => void; +} + +export default function AgentSelectorHeader({ + onOpenVersionManage, + isShowVersionManagePanel = false, + onCloseVersionManagePanel, +}: AgentSelectorHeaderProps) { + const { t } = useTranslation("common"); + const { message } = App.useApp(); + const queryClient = useQueryClient(); + const checkUnsavedChanges = useSaveGuard(); + const confirm = useConfirmModal(); + const { token } = theme?.useToken?.() || {}; + const { user } = useAuthorizationContext(); + + // Fetch agent list internally + const { agents } = useAgentList(user?.tenantId ?? null); + + // Store state + const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); + const setCurrentAgent = useAgentConfigStore((state) => state.setCurrentAgent); + const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); + const enterCreateMode = useAgentConfigStore((state) => state.enterCreateMode); + const reset = useAgentConfigStore((state) => state.reset); + const hasUnsavedChanges = useAgentConfigStore((state) => state.hasUnsavedChanges); + + const { agentInfo } = useAgentInfo(currentAgentId); + const { agentVersionList, total } = useAgentVersionList(currentAgentId); + const { agentVersionDetail } = useAgentVersionDetail(currentAgentId, agentInfo?.current_version_no); + + // Call relationship modal state + const [callRelationshipModalVisible, setCallRelationshipModalVisible] = useState(false); + const [selectedAgentForRelationship, setSelectedAgentForRelationship] = useState(null); + + // A2A settings modal state + const [showA2ASettings, setShowA2ASettings] = useState(false); + const [selectedAgentForA2A, setSelectedAgentForA2A] = useState(null); + + // Dropdown open state + const [dropdownOpen, setDropdownOpen] = useState(false); + + // Mutations + const updateAgentMutation = useMutation({ + mutationFn: (payload: any) => updateAgentInfo(payload), + }); + + const deleteAgentMutation = useMutation({ + mutationFn: (agentId: number) => deleteAgent(agentId), + }); + + // Fetch A2A Server Settings when modal opens + const { data: a2aSettingsData, isLoading: isLoadingA2ASettings } = useQuery({ + queryKey: ["a2aServerSettings", selectedAgentForA2A?.id], + queryFn: () => a2aClientService.getServerSettings(Number(selectedAgentForA2A!.id)), + enabled: showA2ASettings && !!selectedAgentForA2A, + }); + + // Construct a2aAgentCard from supported_interfaces + const constructedA2AAgentCard = (() => { + const data = a2aSettingsData?.data; + if (!data?.supported_interfaces) return undefined; + + const interfaces = data.supported_interfaces; + const endpointId = data.endpoint_id; + const restEndpoints = interfaces.filter( + (iface: any) => iface.protocolBinding.toLowerCase() === "http+json" || iface.protocolBinding.toLowerCase() === "httprest" + ); + const jsonrpcEndpoints = interfaces.filter( + (iface: any) => + iface.protocolBinding.toLowerCase() === "http-json-rpc" || + iface.protocolBinding.toLowerCase() === "jsonrpc" || + iface.protocolBinding.toLowerCase() === "httpjsonrpc" + ); + + return { + endpoint_id: endpointId, + name: data.name || "", + description: data.description, + version: data.version, + streaming: data.streaming, + agent_card_url: `/nb/a2a/${endpointId}/.well-known/agent-card.json`, + rest_endpoints: { + message_send: `${restEndpoints[0]?.url}/message:send`, + message_stream: `${restEndpoints[0]?.url}/message:stream`, + tasks_get: `${restEndpoints[0]?.url}/tasks/{task_id}`, + }, + jsonrpc_url: jsonrpcEndpoints[0]?.url || "", + jsonrpc_methods: ["SendMessage", "SendStreamingMessage", "GetTask"], + }; + })(); + + // Import wizard state + const [importWizardVisible, setImportWizardVisible] = useState(false); + const [importWizardData, setImportWizardData] = useState(null); + + // Get current selected agent + const currentAgent = agents.find( + (agent: Agent) => currentAgentId !== null && String(agent.id) === String(currentAgentId) + ); + + // Handle import agent + const handleImportAgent = () => { + const fileInput = document.createElement("input"); + fileInput.type = "file"; + fileInput.accept = ".json"; + fileInput.onchange = async (event) => { + const file = (event.target as HTMLInputElement).files?.[0]; + if (!file) return; + + if (!file.name.endsWith(".json")) { + message.error(t("businessLogic.config.error.invalidFileType")); + return; + } + + try { + const fileContent = await file.text(); + let agentData: ImportAgentData; + + try { + agentData = JSON.parse(fileContent); + } catch (parseError) { + message.error(t("businessLogic.config.error.invalidFileType")); + return; + } + + if (!agentData.agent_id || !agentData.agent_info) { + message.error(t("businessLogic.config.error.invalidFileType")); + return; + } + + setImportWizardData(agentData); + setImportWizardVisible(true); + } catch (error) { + log.error("Failed to read import file:", error); + message.error(t("businessLogic.config.error.agentImportFailed")); + } + }; + + fileInput.click(); + }; + + // Handle view call relationship + const handleViewCallRelationship = (agent: Agent) => { + setSelectedAgentForRelationship(agent); + setCallRelationshipModalVisible(true); + setDropdownOpen(false); + }; + + const handleCloseCallRelationshipModal = () => { + setCallRelationshipModalVisible(false); + setSelectedAgentForRelationship(null); + }; + + // Handle view A2A agent settings + const handleViewA2AAgentSettings = (agent: Agent) => { + setSelectedAgentForA2A(agent); + setShowA2ASettings(true); + setDropdownOpen(false); + }; + + // Handle export agent + const handleExportAgent = async (agent: Agent) => { + try { + const result = await exportAgent(Number(agent.id)); + if (result.success && result.data) { + const blob = new Blob([JSON.stringify(result.data, null, 2)], { + type: "application/json", + }); + const url = URL.createObjectURL(blob); + const link = document.createElement("a"); + link.href = url; + link.download = `${agent.name || "agent"}.json`; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + message.success(t("businessLogic.config.message.agentExportSuccess")); + } else { + message.error( + result.message || t("businessLogic.config.error.agentImportFailed") + ); + } + } catch (error) { + message.error(t("businessLogic.config.error.agentExportFailed")); + } + }; + + // Handle copy agent + const handleCopyAgent = async (agent: Agent) => { + try { + const detailResult = await searchAgentInfo(Number(agent.id)); + if (!detailResult.success || !detailResult.data) { + message.error(detailResult.message); + return; + } + const detail = detailResult.data; + + const copyName = `${detail.name || "agent"}_copy`; + const copyDisplayName = `${ + detail.display_name || t("agentConfig.agents.defaultDisplayName") + }${t("agent.copySuffix")}`; + + const tools = Array.isArray(detail.tools) ? detail.tools : []; + const unavailableTools = tools.filter( + (tool: any) => tool && tool.is_available === false + ); + const unavailableToolNames = unavailableTools + .map( + (tool: any) => + tool?.display_name || tool?.name || tool?.tool_name || "" + ) + .filter((name: string) => Boolean(name)); + + const enabledToolIds = tools + .filter((tool: any) => tool && tool.is_available !== false) + .map((tool: any) => Number(tool.id)) + .filter((id: number) => Number.isFinite(id)); + + const subAgentIds = ( + Array.isArray(detail.sub_agent_id_list) ? detail.sub_agent_id_list : [] + ) + .map((id: any) => Number(id)) + .filter((id: number) => Number.isFinite(id)); + + const createResult = await updateAgentMutation.mutateAsync({ + agent_id: undefined, // create + name: copyName, + display_name: copyDisplayName, + description: detail.description, + author: detail.author, + model_name: detail.model, + model_id: detail.model_id ?? undefined, + max_steps: detail.max_step, + provide_run_summary: detail.provide_run_summary, + enabled: detail.enabled, + business_description: detail.business_description, + duty_prompt: detail.duty_prompt, + constraint_prompt: detail.constraint_prompt, + few_shots_prompt: detail.few_shots_prompt, + business_logic_model_name: detail.business_logic_model_name ?? undefined, + business_logic_model_id: detail.business_logic_model_id ?? undefined, + enabled_tool_ids: enabledToolIds, + related_agent_ids: subAgentIds, + }); + + if (!createResult.success || !createResult.data?.agent_id) { + message.error( + createResult.message || t("agentConfig.agents.copyFailed") + ); + return; + } + const newAgentId = Number(createResult.data.agent_id); + + // Copy tool configuration + for (const tool of tools) { + if (!tool || tool.is_available === false) continue; + const params = + tool.initParams?.reduce((acc: Record, param: any) => { + acc[param.name] = param.value; + return acc; + }, {}) || {}; + try { + await updateToolConfig(Number(tool.id), newAgentId, params, true); + } catch (error) { + log.error("Failed to copy tool configuration:", error); + message.error(t("agentConfig.agents.copyFailed")); + return; + } + } + + // Refresh agent list + queryClient.invalidateQueries({ queryKey: ["agents"] }); + message.success(t("agentConfig.agents.copySuccess")); + + if (unavailableTools.length > 0) { + const names = + unavailableToolNames.join(", ") || + unavailableTools + .map((tool: any) => Number(tool?.id)) + .filter((id: number) => !Number.isNaN(id)) + .join(", "); + message.warning( + t("agentConfig.agents.copyUnavailableTools", { + count: unavailableTools.length, + names, + }) + ); + } + } catch (error) { + log.error("Failed to copy agent:", error); + message.error(t("agentConfig.agents.copyFailed")); + } + }; + + // Handle copy with confirmation + const handleCopyAgentWithConfirm = (agent: Agent) => { + confirm.confirm({ + title: t("agentConfig.agents.copyConfirmTitle"), + content: t("agentConfig.agents.copyConfirmContent", { + name: agent?.display_name || agent?.name || "", + }), + onOk: () => handleCopyAgent(agent), + }); + }; + + // Handle delete agent + const handleDeleteAgent = async (agent: Agent) => { + deleteAgentMutation.mutate(Number(agent.id), { + onSuccess: () => { + message.success( + t("businessLogic.config.error.agentDeleteSuccess", { + name: agent.display_name || agent.name || "", + }) + ); + + // Clear current agent if this was the selected agent + if ( + currentAgentId !== null && + String(currentAgentId) === String(agent.id) + ) { + setCurrentAgent(null); + } + + // Refresh agent list + queryClient.invalidateQueries({ queryKey: ["agents"] }); + }, + onError: () => { + message.error(t("businessLogic.config.error.agentDeleteFailed")); + }, + }); + }; + + // Handle delete with confirmation + const handleDeleteAgentWithConfirm = (agent: Agent) => { + confirm.confirm({ + title: t("businessLogic.config.modal.deleteTitle"), + content: t("businessLogic.config.modal.deleteContent", { + name: agent.display_name || agent.name || "", + }), + onOk: () => handleDeleteAgent(agent), + }); + }; + + // Handle select agent from dropdown + const handleSelectAgent = async (agentId: number | null) => { + if (agentId === null) return; + + const agent = agents.find((a: Agent) => String(a.id) === String(agentId)); + if (!agent) return; + + // Clear NEW mark when agent is selected for editing + if (agent.is_new === true) { + try { + const res = await clearAgentNewMark(agent.id); + if (!res?.success) { + log.warn("Failed to clear NEW mark on select:", res); + queryClient.invalidateQueries({ queryKey: ["agents"] }); + } + } catch (err) { + log.error("Failed to clear NEW mark on select:", err); + } + } + + // Guard unsaved changes + if (currentAgentId !== null || isCreatingMode) { + const canSwitch = await checkUnsavedChanges.saveWithModal(); + if (!canSwitch) return; + } + + // Load and set agent + try { + const result = await searchAgentInfo(Number(agent.id)); + if (result.success && result.data) { + setCurrentAgent(result.data); + } else { + message.error(result.message || t("agentConfig.agents.detailsLoadFailed")); + } + } catch (error) { + log.error("Failed to load agent detail:", error); + message.error(t("agentConfig.agents.detailsLoadFailed")); + } + }; + + // Dropdown menu items (only agents) + const agentMenuItems = agents.flatMap((agent: Agent, index: number) => { + const isAvailable = agent.is_available !== false; + const displayName = agent.display_name || ""; + const name = agent.name || ""; + + const agentItem = { + key: `agent-${agent.id}`, + label: ( +
+ + {/* Row 1: Name + Status */} +
+
+ + {!isAvailable && ( + { + const reasons = agent.unavailable_reasons || []; + if (reasons.includes('agent_not_found')) { + return t('subAgentPool.tooltip.unavailableAgent'); + } else if (reasons.includes('tool_unavailable')) { + return t('toolPool.tooltip.unavailableTool'); + } else if (reasons.includes('duplicate_name')) { + return t('agent.error.nameExists', { name }); + } else if (reasons.includes('duplicate_display_name')) { + return t('agent.error.displayNameExists', { displayName }); + } else if (reasons.includes('model_unavailable')) { + return t('agent.error.modelUnavailable'); + } + return t('subAgentPool.tooltip.unavailableAgent'); + })()} + > + + + )} + {agent.is_new && ( + + + {t("space.new", "NEW")} + + + )} + {displayName && ( + {displayName} + )} + +
+ {agent.is_a2a_server && ( + + +
+
+
+ {/* Row 2: Description */} +
+ {agent.description} +
+
+
+ ), + onClick: () => handleSelectAgent(Number(agent.id)), + }; + + // Add divider after each item except the last one + const divider = index < agents.length - 1 + ? { key: `divider-${agent.id}`, type: 'divider' as const } + : null; + + return divider ? [agentItem, divider] : [agentItem]; + }); + + return ( + <> +
+ + {/* Left column: Agent Config */} + + triggerNode.parentNode as HTMLElement} + styles={{ + root: { + width: 'calc(100% - 32px)', + } + }} + > +
+
+ {hasUnsavedChanges && ( + + + + )} + {!hasUnsavedChanges && } +
+
+
+ {isCreatingMode + ? t("agent.action.create") + : currentAgent?.display_name || currentAgent?.name || t("agentConfig.agents.selectAgent")} +
+
+ {isCreatingMode + ? t("agent.action.createOrSelect") + : currentAgent?.description || t("agentConfig.agents.noAgentSelected")} +
+
+ +
+
+ + + + {/* Right column: Agent Info */} + + {currentAgentId != null && agentInfo?.current_version_no !== 0 && total > 0 && ( + + + + + {agentVersionDetail?.version.version_name} + + + / {t("agent.version.totalVersions", { count: total ?? 0 })} + + + )} + {/* Right side: Agent count + Version management button */} + + {/* Create and Import buttons outside dropdown */} + + + + + + + + +
+ +
+ + {/* Import Wizard Modal */} + { + setImportWizardVisible(false); + setImportWizardData(null); + }} + initialData={importWizardData} + onImportComplete={() => { + setImportWizardVisible(false); + setImportWizardData(null); + queryClient.invalidateQueries({ queryKey: ["agents"] }); + }} + /> + + {/* Call Relationship Modal */} + {selectedAgentForRelationship && ( + + )} + + {/* A2A Server Settings Modal */} + { + setShowA2ASettings(false); + setSelectedAgentForA2A(null); + }} + loading={isLoadingA2ASettings} + footer={null} + zIndex={1050} + > + {selectedAgentForA2A && constructedA2AAgentCard ? ( + + ) : ( +
+ {t("a2a.service.getServerSettingsFailed", "Failed to load A2A settings")} +
+ )} +
+ + ); +} diff --git a/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx b/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx index 1988d6a8d..bc9260a29 100644 --- a/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx +++ b/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx @@ -33,8 +33,9 @@ import { Settings, MessageCircle, } from "lucide-react"; -import { a2aClientService, A2AExternalAgent, NacosConfig } from "@/services/a2aService"; +import { a2aClientService, A2AExternalAgent } from "@/services/a2aService"; import A2AChatModal from "./A2AChatModal"; +import NacosDiscoveryPanel from "./NacosDiscoveryPanel"; import log from "@/lib/logger"; const { Text, Title } = Typography; @@ -195,7 +196,7 @@ export default function A2AAgentDiscoveryModal({ const [chatAgent, setChatAgent] = useState(null); // Discovery mode - const [mode, setMode] = useState<"url" | "nacos">("url"); + const [mode, setMode] = useState<"url" | "nacos" | "list">("url"); const [loading, setLoading] = useState(false); const [discoveredAgents, setDiscoveredAgents] = useState([]); @@ -203,47 +204,11 @@ export default function A2AAgentDiscoveryModal({ const [url, setUrl] = useState(""); const [selectedAgent, setSelectedAgent] = useState(null); - // Nacos mode state - Add new config form (toggleable) - const [showAddNacosForm, setShowAddNacosForm] = useState(false); - const [newNacosConfig, setNewNacosConfig] = useState({ - name: "", - nacos_addr: "", - username: "", - password: "", - namespace_id: "public", - }); - const [savingNacosConfig, setSavingNacosConfig] = useState(false); - - // Nacos mode state - Existing configs list - const [nacosConfigs, setNacosConfigs] = useState([]); - const [loadingNacosConfigs, setLoadingNacosConfigs] = useState(false); - const [selectedNacosConfigId, setSelectedNacosConfigId] = useState(null); - - // Nacos scan state - const [agentNames, setAgentNames] = useState([]); - const [scanning, setScanning] = useState(false); - // List mode state const [agents, setAgents] = useState([]); const [loadingAgents, setLoadingAgents] = useState(false); const [refreshingId, setRefreshingId] = useState(null); - // Load Nacos configs and existing agents on mount - useEffect(() => { - if (open) { - loadNacosConfigs(); - loadAgents(); - } - }, [open]); - - const loadNacosConfigs = async () => { - setLoadingNacosConfigs(true); - const result = await a2aClientService.listNacosConfigs(); - if (result.success && result.data) { - setNacosConfigs(result.data); - } - setLoadingNacosConfigs(false); - }; const loadAgents = async () => { setLoadingAgents(true); @@ -275,7 +240,6 @@ export default function A2AAgentDiscoveryModal({ if (result.success && result.data) { setSelectedAgent(result.data); setDiscoveredAgents([result.data]); - loadAgents(); if (onDiscoverSuccess) { onDiscoverSuccess(); } @@ -285,90 +249,6 @@ export default function A2AAgentDiscoveryModal({ } }; - // Add new Nacos config - const handleAddNacosConfig = async () => { - if (!newNacosConfig.name.trim()) { - messageApi.error(t("a2a.discovery.nacosNameRequired")); - return; - } - if (!newNacosConfig.nacos_addr.trim()) { - messageApi.error(t("a2a.discovery.nacosAddrRequired")); - return; - } - - setSavingNacosConfig(true); - try { - const result = await a2aClientService.createNacosConfig({ - name: newNacosConfig.name.trim(), - nacos_addr: newNacosConfig.nacos_addr.trim(), - namespace_id: newNacosConfig.namespace_id || "public", - nacos_username: newNacosConfig.username.trim() || undefined, - nacos_password: newNacosConfig.password.trim() || undefined, - }); - - if (result.success && result.data) { - messageApi.success(t("a2a.discovery.addNacosConfigSuccess")); - await loadNacosConfigs(); - setSelectedNacosConfigId(result.data.config_id); - setNewNacosConfig({ name: "", nacos_addr: "", username: "", password: "", namespace_id: "public" }); - } else { - messageApi.error(result.message || t("a2a.discovery.addNacosConfigFailed")); - } - } catch (error) { - log.error("Failed to add Nacos config:", error); - messageApi.error(t("a2a.discovery.addNacosConfigFailed")); - } - setSavingNacosConfig(false); - }; - - // Delete Nacos config - const handleDeleteNacosConfig = async (configId: string) => { - const result = await a2aClientService.deleteNacosConfig(configId); - if (result.success) { - messageApi.success(t("a2a.discovery.deleteNacosConfigSuccess")); - if (selectedNacosConfigId === configId) { - setSelectedNacosConfigId(null); - } - await loadNacosConfigs(); - } else { - messageApi.error(result.message || t("a2a.discovery.deleteNacosConfigFailed")); - } - }; - - // Discover from Nacos - const handleDiscoverFromNacos = async () => { - if (!selectedNacosConfigId) { - messageApi.error(t("a2a.discovery.selectNacosConfig")); - return; - } - - if (agentNames.length === 0) { - messageApi.error(t("a2a.discovery.enterAgentNames")); - return; - } - - setScanning(true); - const result = await a2aClientService.discoverFromNacos({ - nacos_config_id: selectedNacosConfigId, - agent_names: agentNames, - namespace: newNacosConfig.namespace_id || "public", - }); - setScanning(false); - - if (result.success && result.data) { - setDiscoveredAgents(result.data); - if (result.data.length === 0) { - messageApi.warning(t("a2a.discovery.noAgentsFound")); - } else { - messageApi.success( - t("a2a.discovery.foundAgents", { count: result.data.length }) - ); - } - } else { - messageApi.error(result.message || t("a2a.discovery.failed")); - } - }; - // Refresh agent card const handleRefresh = async (agentId: string) => { setRefreshingId(agentId); @@ -456,59 +336,6 @@ export default function A2AAgentDiscoveryModal({ ); }; - // Nacos config table columns - const nacosConfigColumns = [ - { - title: t("a2a.discovery.nacosName"), - dataIndex: "name", - key: "name", - width: "30%", - ellipsis: true, - render: (text: string) => {text}, - }, - { - title: t("a2a.discovery.nacosAddr"), - dataIndex: "nacos_addr", - key: "nacos_addr", - width: "40%", - ellipsis: true, - render: (text: string) => {text}, - }, - { - title: t("a2a.discovery.namespace"), - dataIndex: "namespace_id", - key: "namespace_id", - width: "15%", - render: (text: string) => {text}, - }, - { - title: t("common.actions"), - key: "action", - width: "15%", - render: (_: any, record: NacosConfig) => ( - - -
), }, - // Nacos Discovery Tab (disabled - feature pending) + // Nacos Discovery Tab { key: "nacos", label: ( {t("a2a.discovery.tab.nacos")} - Coming Soon ), - disabled: true, + disabled: false, children: ( -
- {/* Existing Nacos Configs List */} -
-
- - {t("a2a.discovery.nacosConfigList")} - - - - - -
- - {/* Add Nacos Config Form - Toggleable */} - {showAddNacosForm && ( - -
- - - setNewNacosConfig({ ...newNacosConfig, name: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, nacos_addr: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, namespace_id: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, username: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, password: e.target.value }) - } - disabled={savingNacosConfig} - /> - - -
- - -
-
-
- )} - - - record.config_id === selectedNacosConfigId ? "bg-blue-50" : "" - } - onRow={(record) => ({ - onClick: () => setSelectedNacosConfigId(record.config_id), - style: { cursor: "pointer" }, - })} - /> - - - {/* Scan Section - Only show when config is selected */} - {selectedNacosConfigId && ( - -
- - + setNacosConfig({ ...nacosConfig, name: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, nacos_addr: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, namespace_id: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, username: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, password: e.target.value }) + } + disabled={savingNacosConfig} + /> + + +
+ + + +
+ +
+ )} + +
+ record.config_id === selectedNacosConfigId ? "bg-blue-50" : "" + } + onRow={(record) => ({ + onClick: () => setSelectedNacosConfigId(record.config_id), + style: { cursor: "pointer" }, + })} + /> + + + {/* Scan Section - Only show when config is selected */} + {selectedNacosConfigId && ( + +
+ + setContainerServiceName(e.target.value)} + style={{ width: 150 }} + maxLength={20} + disabled={actionsLocked} + /> + {t("mcpConfig.addContainer.port")}: @@ -1158,15 +1223,20 @@ export default function McpConfigModal({ style={{ flex: 3 }} /> -
- setOpenApiJson(e.target.value)} - rows={6} - disabled={actionsLocked || importingOpenApi} - /> -
+ setOpenApiHeadersTemplate(e.target.value)} + rows={2} + disabled={actionsLocked || importingOpenApi} + /> + setOpenApiJson(e.target.value)} + rows={6} + disabled={actionsLocked || importingOpenApi} + />
@@ -1253,7 +1322,6 @@ export default function McpConfigModal({ size="small" pagination={false} locale={{ emptyText: t("mcpConfig.containerList.empty") }} - scroll={{ y: 300 }} style={{ width: "100%" }} /> @@ -1277,7 +1345,6 @@ export default function McpConfigModal({ size="small" pagination={false} locale={{ emptyText: t("mcpConfig.openapiService.list.empty") }} - scroll={{ y: 300 }} style={{ width: "100%" }} /> @@ -1304,6 +1371,7 @@ export default function McpConfigModal({ initialName={editingServer?.service_name || ""} initialUrl={editingServer?.mcp_url || ""} initialAuthorizationToken={editingServer?.authorization_token || null} + initialCustomHeaders={editingServer?.custom_headers || null} loading={updatingServer || loadingMcpRecord} /> diff --git a/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx index eff41b6d9..8f040d4b3 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx @@ -15,6 +15,7 @@ import { Row, Col, Spin, + Tooltip, } from "antd"; import { Upload as UploadIcon, @@ -23,13 +24,19 @@ import { MessagesSquare, HardDriveUpload, Loader2, + Plus, + X, + Pencil, + Square, } from "lucide-react"; import { extractSkillInfo, extractSkillInfoFromContent } from "@/lib/skillFileUtils"; +import yaml from "js-yaml"; import { MAX_RECENT_SKILLS, THINKING_STEPS_ZH, type SkillFormData, type ChatMessage, + type SkillFileContent, } from "@/types/skill"; import { fetchSkillsList, @@ -37,11 +44,19 @@ import { submitSkillFromFile, findSkillByName, searchSkillsByName as searchSkillsByNameUtil, - createSimpleSkillStream, + createSkillStream, clearChatAndTempFile, + stopSkillCreation, type SkillListItem, + type SkillData, } from "@/services/skillService"; -import { MarkdownRenderer } from "@/components/ui/markdownRenderer"; +import { + fetchSkillFiles, + fetchSkillFileContent, + SkillFilesAccessDeniedError, + type SkillFileNode, +} from "@/services/agentConfigService"; +import { MarkdownRenderer } from "@/components/common/markdownRenderer"; import log from "@/lib/logger"; const { TextArea } = Input; @@ -72,19 +87,59 @@ export default function SkillBuildModal({ const [chatMessages, setChatMessages] = useState([]); const [chatInput, setChatInput] = useState(""); const [isChatLoading, setIsChatLoading] = useState(false); - const [thinkingStep, setThinkingStep] = useState(0); const [thinkingDescription, setThinkingDescription] = useState(""); const [isThinkingVisible, setIsThinkingVisible] = useState(false); const [interactiveSkillName, setInteractiveSkillName] = useState(""); const chatContainerRef = useRef(null); - const contentTextAreaId = useRef("skill-content-textarea-" + Date.now()); - // Content input streaming state - const [formStreamingContent, setFormStreamingContent] = useState(""); - const [isContentStreaming, setIsContentStreaming] = useState(false); - const [thinkingStreamingContent, setThinkingStreamingContent] = useState(""); - const [summaryStreamingContent, setSummaryStreamingContent] = useState(""); - const [isSummaryVisible, setIsSummaryVisible] = useState(false); + // Content input streaming state - multi-file tabs + const [skillTabs, setSkillTabs] = useState([ + { path: "SKILL.md", content: "" }, + ]); + const [activeSkillTab, setActiveSkillTab] = useState("SKILL.md"); + const [isStreaming, setIsStreaming] = useState(false); + + // Tab management state + const [editingTabKey, setEditingTabKey] = useState(null); + const [editingTabName, setEditingTabName] = useState(""); + + // Summary content for chat bubble + const [summaryContent, setSummaryContent] = useState(""); + + // Frontmatter buffer for streaming - accumulate and parse at completion + const frontmatterBufferRef = useRef(""); + + // Refs for per-tab scroll state: tracks whether each textarea should auto-scroll + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const textareaRefs = useRef>({}); + const shouldAutoScrollRef = useRef>({}); + + // Detect if the textarea is currently near the bottom (within threshold pixels) + const isTextareaAtBottom = (tabPath: string): boolean => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const ref = textareaRefs.current[tabPath] as any; + const textarea = ref?.resizableTextArea?.textArea || ref?.textArea || ref; + if (!textarea) return true; + return textarea.scrollHeight - textarea.scrollTop - textarea.clientHeight < 20; + }; + + // Update shouldAutoScrollRef when user scrolls manually + const handleTextareaScroll = (tabPath: string) => { + shouldAutoScrollRef.current[tabPath] = isTextareaAtBottom(tabPath); + }; + + // Scroll textarea to bottom, respecting user scroll preference and throttled via RAF + const scrollTextareaToBottom = (tabPath: string) => { + if (!shouldAutoScrollRef.current[tabPath]) return; + requestAnimationFrame(() => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const ref = textareaRefs.current[tabPath] as any; + const textarea = ref?.resizableTextArea?.textArea || ref?.textArea || ref; + if (textarea) { + textarea.scrollTop = textarea.scrollHeight; + } + }); + }; // Track if component is mounted to prevent state updates after unmount const isMountedRef = useRef(true); @@ -92,6 +147,29 @@ export default function SkillBuildModal({ // Track if streaming is complete to prevent late onFormContent callbacks from overwriting cleaned content const isStreamingCompleteRef = useRef(false); + // Track current tabs during streaming to avoid stale closure issues + const streamingTabsRef = useRef([{ path: "SKILL.md", content: "" }]); + + // AbortController ref for stopping streaming + const abortControllerRef = useRef(null); + + // Task ID ref for backend stop API + const taskIdRef = useRef(""); + + // Multi-turn conversation state: accumulated skill draft from previous turns. + // When the user sends a follow-up message, this draft is passed as existing_skill + // so the backend can refine the skill rather than generating from scratch. + const [accumulatedDraft, setAccumulatedDraft] = useState<{ + name: string; + description: string; + tags: string[]; + content: string; + } | null>(null); + + // Whether the user is in multi-turn refinement mode (has already received a draft). + // Used to switch the placeholder from "创建" to "继续修改" and to pass existing_skill. + const [isMultiTurn, setIsMultiTurn] = useState(false); + // Name input dropdown control const [isNameDropdownOpen, setIsNameDropdownOpen] = useState(false); const [isTagsFocused, setIsTagsFocused] = useState(false); @@ -116,7 +194,9 @@ export default function SkillBuildModal({ let cancelled = false; fetchSkillsList() .then((list) => { - if (!cancelled) setAllSkills(list); + if (!cancelled) { + setAllSkills(list); + } }) .catch((err) => { log.error("Failed to load skills for SkillBuildModal", err); @@ -128,6 +208,13 @@ export default function SkillBuildModal({ useEffect(() => { if (!isOpen) { + // Abort any ongoing streaming request + if (abortControllerRef.current) { + abortControllerRef.current.abort("Modal closed"); + abortControllerRef.current = null; + } + // Reset task ID + taskIdRef.current = ""; form.resetFields(); setActiveTab("interactive"); setSelectedSkillName(""); @@ -141,15 +228,19 @@ export default function SkillBuildModal({ setIsCreateMode(true); setUploadExtractingName(false); setUploadExtractedSkillName(""); - setThinkingStep(0); setThinkingDescription(""); setIsThinkingVisible(false); - setFormStreamingContent(""); - setThinkingStreamingContent(""); - setSummaryStreamingContent(""); - setIsSummaryVisible(false); - setIsContentStreaming(false); + setSkillTabs([{ path: "SKILL.md", content: "" }]); + streamingTabsRef.current = [{ path: "SKILL.md", content: "" }]; + shouldAutoScrollRef.current = {}; + setActiveSkillTab("SKILL.md"); + setIsStreaming(false); + setSummaryContent(""); currentAssistantIdRef.current = ""; + setAccumulatedDraft(null); + setIsMultiTurn(false); + setEditingTabKey(null); + setEditingTabName(""); } }, [isOpen, form]); @@ -161,26 +252,19 @@ export default function SkillBuildModal({ }; }, []); - // Sync streaming content to the current assistant chat message for real-time display. - // Show thinking content while thinking is visible, then switch to summary. + // Sync summary content to the current assistant chat message for real-time display. useEffect(() => { if (!currentAssistantIdRef.current) return; - const displayContent = isSummaryVisible ? summaryStreamingContent : thinkingStreamingContent; - if (!displayContent) return; - setChatMessages((prev) => - prev.map((msg) => + if (!summaryContent) return; + setChatMessages((prev) => { + if (!prev.some((m) => m.id === currentAssistantIdRef.current)) return prev; + return prev.map((msg) => msg.id === currentAssistantIdRef.current - ? { ...msg, content: displayContent } + ? { ...msg, content: summaryContent } : msg - ) - ); - }, [thinkingStreamingContent, summaryStreamingContent, isSummaryVisible]); - - // Sync formStreamingContent to the form content field for real-time display - useEffect(() => { - if (!formStreamingContent) return; - form.setFieldValue("content", formStreamingContent); - }, [formStreamingContent, form]); + ); + }); + }, [summaryContent]); // Detect create/update mode when skill name changes useEffect(() => { @@ -190,11 +274,8 @@ export default function SkillBuildModal({ setIsCreateMode(!matchedSkill); if (matchedSkill) { setSelectedSkillName(matchedSkill.name); - form.setFieldsValue({ - description: matchedSkill.description || "", - source: matchedSkill.source || "自定义", - content: matchedSkill.content || "", - }); + // Load all skill data including files + loadSkillData(nameValue); } } else { setIsCreateMode(true); @@ -255,21 +336,32 @@ export default function SkillBuildModal({ setSelectedSkillName(value); setInteractiveSkillName(value); setIsNameDropdownOpen(false); - const skill = allSkills.find((s) => s.name === value); - if (skill) { - form.setFieldsValue({ - name: skill.name, - description: skill.description || "", - source: skill.source || "自定义", - content: skill.content || "", - }); - } + }; + + // Load skill data when name is selected or typed + const loadSkillData = async (skillName: string) => { + const skill = allSkills.find((s) => s.name === skillName); + if (!skill) return; + + const fieldsToSet = { + name: skill.name, + description: skill.description || "", + source: skill.source || "自定义", + tags: skill.tags || [], + content: skill.content || "", + }; + form.setFieldsValue(fieldsToSet); + + await loadSkillFiles(skillName); }; const handleNameChange = (value: string) => { setInteractiveSkillName(value); if (!value || value.trim() === "") { setSelectedSkillName(""); + // Reset skillTabs when input is cleared + setSkillTabs([{ path: "SKILL.md", content: "" }]); + setActiveSkillTab("SKILL.md"); } }; @@ -292,8 +384,19 @@ export default function SkillBuildModal({ try { const values = await form.validateFields(); setIsSubmitting(true); + + const skillTab = skillTabs.find(t => t.path === "SKILL.md"); + const content = skillTab?.content || ""; + + const extraFiles = skillTabs + .filter(t => t.path !== "SKILL.md") + .map(t => ({ + path: t.path, + content: t.content || "", + })); + await submitSkillForm( - values, + { ...values, content, files: extraFiles.length > 0 ? extraFiles : undefined } as SkillData, allSkills, onSuccess, onCancel, @@ -332,6 +435,135 @@ export default function SkillBuildModal({ } }; + // Helper function to update tab content + const updateTabContent = (tabPath: string, content: string) => { + setSkillTabs((prev) => { + const newTabs = prev.map((tab) => + tab.path === tabPath ? { ...tab, content: tab.content + content } : tab + ); + streamingTabsRef.current = newTabs; + return newTabs; + }); + // Scroll to bottom after content update during streaming + if (isStreaming) { + setTimeout(() => scrollTextareaToBottom(tabPath), 0); + } + }; + + // Assemble skill files into XML-like format for agent consumption + const assembleSkillContent = (tabs: SkillFileContent[]): string => { + const parts: string[] = []; + + for (const tab of tabs) { + if (tab.path === "SKILL.md") { + parts.push(`\n${tab.content}\n`); + } else { + parts.push(`\n${tab.content}\n`); + } + } + + return parts.join("\n\n"); + }; + + // Load all files for a skill into skillTabs + const loadSkillFiles = async (skillName: string) => { + try { + const files = await fetchSkillFiles(skillName); + if (files.length === 0) { + // Fallback: load SKILL.md content from the skill list item + const skill = allSkills.find((s) => s.name === skillName); + if (skill?.content) { + setSkillTabs([{ path: "SKILL.md", content: skill.content }]); + } + return; + } + + // Flatten file tree and get all file paths. + // The root node's name IS the skill_name — skip the root itself and + // start from its children so paths stay relative (e.g. "SKILL.md", not "skill_name/SKILL.md"). + const flattenFiles = (nodes: SkillFileNode[], prefix = ""): string[] => { + const result: string[] = []; + for (const node of nodes) { + if (node.type === "directory" && node.name === skillName && prefix === "") { + // Root directory — recurse into children without prepending the root name + if (node.children) { + result.push(...flattenFiles(node.children, "")); + } + } else { + const fullPath = prefix ? `${prefix}/${node.name}` : node.name; + if (node.type === "file") { + result.push(fullPath); + } else if (node.children) { + result.push(...flattenFiles(node.children, fullPath)); + } + } + } + return result; + }; + + const filePaths = flattenFiles(files); + + // Load content for each file + const tabsContent: SkillFileContent[] = []; + for (const filePath of filePaths) { + const content = await fetchSkillFileContent(skillName, filePath); + tabsContent.push({ path: filePath, content: content || "" }); + } + + // Sort so SKILL.md is always first + tabsContent.sort((a, b) => { + if (a.path === "SKILL.md") return -1; + if (b.path === "SKILL.md") return 1; + return a.path.localeCompare(b.path); + }); + + setSkillTabs(tabsContent); + setActiveSkillTab("SKILL.md"); + } catch (error) { + log.error("Failed to load skill files:", error); + if (error instanceof SkillFilesAccessDeniedError) { + message.warning(error.message); + return; + } + // Fallback to basic content + const skill = allSkills.find((s) => s.name === skillName); + if (skill?.content) { + setSkillTabs([{ path: "SKILL.md", content: skill.content }]); + setActiveSkillTab("SKILL.md"); + } + } + }; + + // Parse frontmatter YAML and update form fields + const parseAndUpdateFrontmatter = (frontmatterYaml: string) => { + try { + // Parse the frontmatter using js-yaml + const parsed = yaml.load(frontmatterYaml) as Record | null; + if (parsed && typeof parsed === "object") { + const name = typeof parsed.name === "string" ? parsed.name.trim() : ""; + const description = typeof parsed.description === "string" ? parsed.description.trim() : ""; + const tags = Array.isArray(parsed.tags) ? parsed.tags.filter((t): t is string => typeof t === "string") : []; + + if (name) { + form.setFieldsValue({ name }); + setInteractiveSkillName(name); + const existingSkill = allSkills.find( + (s) => s.name.toLowerCase() === name.toLowerCase() + ); + setIsCreateMode(!existingSkill); + } + if (description) { + form.setFieldsValue({ description }); + } + if (tags.length > 0) { + form.setFieldsValue({ tags }); + } + } + } catch (e) { + log.warn("Failed to parse frontmatter:", e); + } + }; + // Handle chat send for interactive creation const handleChatSend = async () => { if (!chatInput.trim() || isChatLoading) return; @@ -339,13 +571,17 @@ export default function SkillBuildModal({ const currentInput = chatInput.trim(); setChatInput(""); - // Read current form fields to provide context to the model + // Read current form fields to provide context to the model. const formValues = form.getFieldsValue(); + const draft = accumulatedDraft; + + // Assemble skill content from all tabs + const assembledContent = assembleSkillContent(skillTabs); const formContext = [ formValues.name ? `当前技能名称:${formValues.name}` : "", formValues.description ? `当前技能描述:${formValues.description}` : "", formValues.tags?.length ? `当前标签:${formValues.tags.join(", ")}` : "", - formValues.content ? `当前内容:\n${formValues.content}` : "", + assembledContent ? `当前技能文件内容:\n${assembledContent}` : "", ].filter(Boolean).join("\n\n"); const userMessage: ChatMessage = { @@ -357,18 +593,17 @@ export default function SkillBuildModal({ setChatMessages((prev) => [...prev, userMessage]); setIsChatLoading(true); - setThinkingStep(1); - setThinkingDescription(THINKING_STEPS_ZH.find((s) => s.step === 1)?.description || "生成技能内容中 ..."); setIsThinkingVisible(true); + setThinkingDescription(t("skillManagement.generatingSkill") || "生成技能内容中 ..."); - // Clear content input before streaming - form.setFieldValue("content", ""); - setFormStreamingContent(""); - setThinkingStreamingContent(""); - setSummaryStreamingContent(""); - setIsSummaryVisible(false); - setIsContentStreaming(true); - // Reset streaming complete flag + // Clear content input before streaming — start fresh so the streamed content + // reflects the (possibly refined) result of this turn. + setSkillTabs([{ path: "SKILL.md", content: "" }]); + streamingTabsRef.current = [{ path: "SKILL.md", content: "" }]; + shouldAutoScrollRef.current = { "SKILL.md": true }; + setActiveSkillTab("SKILL.md"); + setIsStreaming(true); + setSummaryContent(""); isStreamingCompleteRef.current = false; const assistantId = (Date.now() + 1).toString(); @@ -378,56 +613,143 @@ export default function SkillBuildModal({ { id: assistantId, role: "assistant", content: "", timestamp: new Date() }, ]); - // Track current assistant message ID for streaming updates currentAssistantIdRef.current = assistantId; try { - // Build user prompt with form context + // Create AbortController for this request + abortControllerRef.current = new AbortController(); + + // On first turn, no existing_skill is sent → backend creates from scratch. + // On subsequent turns (accumulatedDraft exists), existing_skill is passed + // → backend follows the modify-workflow template and refines the draft. const userPrompt = formContext ? `用户需求:${currentInput}\n\n${formContext}` : `用户需求:${currentInput}`; - await createSimpleSkillStream( + await createSkillStream( { user_request: userPrompt, - existing_skill: !isCreateMode ? { - name: formValues.name || "", - description: formValues.description || "", - tags: formValues.tags || [], - content: formValues.content || "", + existing_skill: draft ? { + name: draft.name || formValues.name || "", + description: draft.description || formValues.description || "", + tags: draft.tags?.length ? draft.tags : (formValues.tags || []), + content: assembledContent, } : undefined, + complexity: "complicated", + language: "zh", }, { + onTaskId: (taskId) => { + taskIdRef.current = taskId; + }, onThinkingUpdate: (step, desc) => { - setThinkingStep(step); - setThinkingDescription(desc || THINKING_STEPS_ZH.find((s) => s.step === step)?.description || ""); + setThinkingDescription(desc || "生成技能内容中 ..."); }, onThinkingVisible: (visible) => { setIsThinkingVisible(visible); }, onStepCount: (step) => { - setThinkingStep(step); setThinkingDescription(THINKING_STEPS_ZH.find((s) => s.step === step)?.description || "生成技能内容中 ..."); }, - onFormContent: (content) => { + onFrontmatter: (content) => { + // Accumulate frontmatter content as it streams in + // Parse frontmatter incrementally as it streams to update form fields + frontmatterBufferRef.current += content; + // Try to parse incrementally for form field updates + try { + const parsed = yaml.load(frontmatterBufferRef.current) as Record | null; + if (parsed && typeof parsed === "object") { + const name = typeof parsed.name === "string" ? parsed.name.trim() : ""; + const description = typeof parsed.description === "string" ? parsed.description.trim() : ""; + const tags = Array.isArray(parsed.tags) ? parsed.tags.filter((t): t is string => typeof t === "string") : []; + + if (name) { + form.setFieldsValue({ name }); + setInteractiveSkillName(name); + } + if (description) { + form.setFieldsValue({ description }); + } + if (tags.length > 0) { + form.setFieldsValue({ tags }); + } + } + } catch { + // YAML not complete yet, will parse when skill body starts + } + }, + onSkillBody: (content) => { if (isStreamingCompleteRef.current) return; - setFormStreamingContent((prev) => prev + content); + // Frontmatter is complete when skill_body starts - clear the buffer + frontmatterBufferRef.current = ""; + // Only add body content to textarea (no frontmatter) + updateTabContent("SKILL.md", content); + }, + onFileContent: (path, content, isNewFile) => { + if (isStreamingCompleteRef.current) return; + + if (isNewFile) { + // New file detected, create a new tab + setSkillTabs((prev) => { + const newTabs = prev.find((t) => t.path === path) ? prev : [...prev, { path, content: "" }]; + streamingTabsRef.current = newTabs; + shouldAutoScrollRef.current[path] = true; + return newTabs; + }); + } + + updateTabContent(path, content); + setActiveSkillTab(path); }, - onSummaryContent: (content) => { - setSummaryStreamingContent((prev) => prev + content); - setIsSummaryVisible(true); + onSummary: (content) => { + if (isStreamingCompleteRef.current) return; + setSummaryContent((prev) => prev + content); }, - onDone: (finalResult) => { + onDone: (result) => { if (!isMountedRef.current) return; setIsThinkingVisible(false); - setIsContentStreaming(false); + setIsStreaming(false); currentAssistantIdRef.current = ""; isStreamingCompleteRef.current = true; - const finalFormContent = finalResult.formContent; - if (finalFormContent) { - const skillInfo = extractSkillInfoFromContent(finalFormContent); + // Get SKILL.md content and strip frontmatter for textarea display + const skillTab = result.skillTabs.find(t => t.path === "SKILL.md"); + const fullContent = skillTab?.content || ""; + + if (fullContent || result.skillTabs.length > 0) { + // Strip frontmatter from SKILL.md content for textarea display + const skillInfo = extractSkillInfoFromContent(fullContent); + const contentWithoutFrontmatter = skillInfo?.contentWithoutFrontmatter || ""; + + // Use the current tabs from ref (avoids stale closure) + const currentTabs = streamingTabsRef.current; + // Build updated tabs: start with current tabs, update matching ones from backend + const updatedTabs = currentTabs.map((tab) => { + const backendTab = result.skillTabs.find((t) => t.path === tab.path); + if (tab.path === "SKILL.md") { + return { ...tab, content: contentWithoutFrontmatter }; + } + if (backendTab) { + return { ...tab, content: backendTab.content || tab.content }; + } + return tab; + }); + + // Add any new tabs from backend that don't exist in current tabs + const newTabsFromBackend = result.skillTabs.filter((t) => !currentTabs.find((tab) => tab.path === t.path)); + const finalTabs = [...updatedTabs, ...newTabsFromBackend]; + + // Sort so SKILL.md is always first + finalTabs.sort((a, b) => { + if (a.path === "SKILL.md") return -1; + if (b.path === "SKILL.md") return 1; + return a.path.localeCompare(b.path); + }); + + setSkillTabs(finalTabs); + + // Update form fields from parsed skill info if (skillInfo && skillInfo.name) { form.setFieldsValue({ name: skillInfo.name }); setInteractiveSkillName(skillInfo.name); @@ -442,10 +764,21 @@ export default function SkillBuildModal({ if (skillInfo && skillInfo.tags && skillInfo.tags.length > 0) { form.setFieldsValue({ tags: skillInfo.tags }); } - if (skillInfo && skillInfo.contentWithoutFrontmatter) { - form.setFieldsValue({ content: skillInfo.contentWithoutFrontmatter }); - setFormStreamingContent(skillInfo.contentWithoutFrontmatter); - } + + // Update accumulated draft with assembled content for next turn + const assembledDraft = assembleSkillContent(updatedTabs); + const newDraft = { + name: skillInfo?.name || draft?.name || "", + description: skillInfo?.description || draft?.description || "", + tags: skillInfo?.tags?.length ? skillInfo.tags : (draft?.tags || []), + content: assembledDraft, + }; + setAccumulatedDraft(newDraft); + setIsMultiTurn(true); + + // Scroll to bottom after content is fully loaded + setTimeout(() => scrollTextareaToBottom("SKILL.md"), 0); + message.success(t("skillManagement.message.skillReadyForSave")); } }, @@ -453,17 +786,28 @@ export default function SkillBuildModal({ log.error("Interactive skill creation error:", errorMsg); message.error(t("skillManagement.message.chatError")); setChatMessages((prev) => prev.filter((m) => m.id !== assistantId)); - setIsContentStreaming(false); + setIsStreaming(false); currentAssistantIdRef.current = ""; }, - } + }, + { signal: abortControllerRef.current.signal } ); } catch (error) { + // Handle AbortError gracefully when user stops the stream + const err = error as Error; + if (err?.name === "AbortError") { + // User stopped - just reset states silently + setIsChatLoading(false); + setIsStreaming(false); + setIsThinkingVisible(false); + return; + } log.error("Interactive skill creation error:", error); message.error(t("skillManagement.message.chatError")); setChatMessages((prev) => prev.filter((m) => m.id !== assistantId)); - setIsContentStreaming(false); + setIsStreaming(false); } finally { + abortControllerRef.current = null; setIsChatLoading(false); } }; @@ -474,10 +818,38 @@ export default function SkillBuildModal({ setChatMessages([]); form.resetFields(["name", "description", "source", "tags", "content"]); setInteractiveSkillName(""); - setFormStreamingContent(""); - setThinkingStreamingContent(""); - setSummaryStreamingContent(""); - setIsSummaryVisible(false); + setSkillTabs([{ path: "SKILL.md", content: "" }]); + streamingTabsRef.current = [{ path: "SKILL.md", content: "" }]; + setActiveSkillTab("SKILL.md"); + setSummaryContent(""); + setAccumulatedDraft(null); + setIsMultiTurn(false); + }; + + // Handle stop - cancel the ongoing streaming request + const handleStop = async () => { + // Call backend stop API first + if (taskIdRef.current) { + try { + await stopSkillCreation(taskIdRef.current); + } catch (error) { + log.error("Failed to stop backend task:", error); + } + } + + // Abort frontend fetch + if (abortControllerRef.current) { + abortControllerRef.current.abort("User stopped"); + abortControllerRef.current = null; + } + + // Reset all states + setIsChatLoading(false); + setIsStreaming(false); + setIsThinkingVisible(false); + currentAssistantIdRef.current = ""; + taskIdRef.current = ""; + isStreamingCompleteRef.current = true; }; // Scroll to bottom of chat when new messages arrive @@ -487,16 +859,6 @@ export default function SkillBuildModal({ } }, [chatMessages]); - // Scroll to bottom of content textarea when streaming content updates - useEffect(() => { - if (formStreamingContent) { - const textarea = document.getElementById(contentTextAreaId.current); - if (textarea) { - textarea.scrollTop = textarea.scrollHeight; - } - } - }, [formStreamingContent]); - const renderInteractiveTab = () => { return (
@@ -543,7 +905,7 @@ export default function SkillBuildModal({ : "bg-gray-100 text-gray-800" }`} > - {msg.role === "assistant" && isThinkingVisible && !isSummaryVisible ? ( + {msg.role === "assistant" && msg.id === currentAssistantIdRef.current && isThinkingVisible ? (
{thinkingDescription && ( @@ -555,7 +917,7 @@ export default function SkillBuildModal({ ) : msg.role === "assistant" ? (
@@ -576,126 +938,271 @@ export default function SkillBuildModal({ onPressEnter={(e) => { if (!e.shiftKey) { e.preventDefault(); - handleChatSend(); + if (!isChatLoading && !isStreaming) { + handleChatSend(); + } } }} - placeholder={t("skillManagement.form.chatPlaceholder")} - disabled={isChatLoading} + placeholder={isMultiTurn + ? t("skillManagement.form.multiTurnPlaceholder") + : t("skillManagement.form.chatPlaceholder") + } + disabled={isChatLoading || isStreaming} autoSize={{ minRows: 1, maxRows: 3 }} className="resize-none" /> -
{/* Right side: Form */} -
- - - {t("skillManagement.form.newSkillHint")} - - ) : ( - - {t("skillManagement.form.existingSkillHint")} - - ) - ) : undefined} - validateStatus={interactiveSkillName.trim() ? (isCreateMode ? "success" : "warning") : undefined} - > - 0} - options={dropdownOptions} - onSearch={handleNameSearch} - onSelect={handleNameSelect} - onChange={handleNameChange} - onFocus={handleNameFocus} - onBlur={handleNameBlur} - value={interactiveSkillName} - placeholder={t("skillManagement.form.namePlaceholder")} - allowClear - /> - - - + {/* Form header area */} +
+ -