diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 5edb7b309fa..00000000000 --- a/.dockerignore +++ /dev/null @@ -1,26 +0,0 @@ -.dockerignore -.devcontainer -.git -.gitignore -.github -.mypy_cache -.pytest_cache -.deployment -.venv - -MindsDB.egg-info -github-actions -assets - -docker/db_images -docker/docker-bake.hcl -docker/*.Dockerfile -docker/docker-compose* -docker/README.md - -build -dist -docs -helm -var -**/__pycache__ diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b991db19346..e6c0d7d5d74 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,4 +1 @@ /.github/workflows/ @mindsdb/devops -/tests/scripts/ @mindsdb/devops -/docker/docker-bake.hcl @mindsdb/devops -/pyproject.toml @mindsdb/devops diff --git a/.github/workflows/add_to_pr_review.yml b/.github/workflows/add_to_pr_review.yml deleted file mode 100644 index cb55d59e94c..00000000000 --- a/.github/workflows/add_to_pr_review.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Add Pull Requests to PR review project - -permissions: - contents: read - -on: - pull_request: - types: - - opened - -jobs: - add-to-project: - runs-on: ubuntu-latest - - steps: - - name: Check if actor is an organization member - id: check-membership - run: | - if [[ "${{ github.actor }}" == "mindsdb/"* ]]; then - echo "::set-output name=isOrgMember::true" - else - echo "::set-output name=isOrgMember::false" - fi - shell: bash - - - name: Add issue to project - if: steps.check-membership.outputs.isOrgMember == 'true' - uses: actions/add-to-project@v0.5.0 - with: - project-url: https://github.com/orgs/mindsdb/projects/65 - github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} diff --git a/.github/workflows/add_to_roadmap_project_v2.yml b/.github/workflows/add_to_roadmap_project_v2.yml deleted file mode 100644 index 510d37be567..00000000000 --- a/.github/workflows/add_to_roadmap_project_v2.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Add issue to roadmap project - -permissions: - contents: read - -on: - issues: - types: - - opened -jobs: - add-to-project: - name: Add issue to roadmap project - runs-on: ubuntu-latest - steps: - - uses: actions/add-to-project@v0.4.0 - with: - project-url: https://github.com/orgs/mindsdb/projects/53 - github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} \ No newline at end of file diff --git a/.github/workflows/build_deploy_dev.yml b/.github/workflows/build_deploy_dev.yml deleted file mode 100644 index cc9d25f7edd..00000000000 --- a/.github/workflows/build_deploy_dev.yml +++ /dev/null @@ -1,149 +0,0 @@ -name: Build and deploy to dev - -permissions: - contents: read - pull-requests: write - pages: write - id-token: write - -on: - pull_request: - types: [opened, reopened, synchronize, labeled] - branches: - - 'main' - - 'releases/*' - -# Cancel any existing runs of this workflow on the same branch/pr -# We always want to build/deploy/test a new commit over an older one -concurrency: - group: ${{ github.workflow_ref }} - cancel-in-progress: true - -jobs: - - changes: - name: Filter changed files - runs-on: mdb-dev - outputs: - not-docs: ${{ steps.filter.outputs.not-docs }} - steps: - - uses: dorny/paths-filter@v3 - id: filter - with: - predicate-quantifier: "every" - filters: | - not-docs: - - '!docs/**' - - '!assets/**' - - '!**/*.md' - - '!.github/workflows/build_deploy_dev.yml' - - '!.github/workflows/test_on_deploy.yml' - - '!mindsdb/__about__.py' - - # Start running unit tests early - we want to run them always - # and they don't depend on build or deployment - run_unit_tests: - name: Run Unit Tests - needs: [changes] - if: ${{ needs.changes.outputs.not-docs == 'true' }} - uses: ./.github/workflows/tests_unit.yml - secrets: inherit - - # Looks for labels like "deploy-to-" attached to a PR so we can deploy to those envs - get-deploy-labels: - if: ${{ !github.event.pull_request.head.repo.fork }} - name: Get Deploy Envs - runs-on: mdb-dev - needs: [changes] - outputs: - deploy-envs: ${{ steps.get-labels.outputs.deploy-envs }} - steps: - - id: get-labels - uses: mindsdb/github-actions/get-deploy-labels@main - - # Build our docker images based on our bake file - build: - if: ${{ !github.event.pull_request.head.repo.fork && needs.get-deploy-labels.outputs.deploy-envs != '[]' }} - name: Build Docker Images - runs-on: mdb-dev - needs: [get-deploy-labels] - steps: - - uses: actions/checkout@v4 - # Build the bakefile and push - - uses: mindsdb/github-actions/docker-bake@main - with: - git-sha: ${{ github.event.pull_request.head.sha }} - target: cloud-cpu - platforms: linux/amd64 - push-cache: false - - scan-keycloak: - runs-on: mdb-dev - needs: [ build ] - name: Scan cloud-cpu image - steps: - - uses: actions/checkout@v4 - - uses: mindsdb/github-actions/snyk-docker-scan@main - with: - image: 168681354662.dkr.ecr.us-east-1.amazonaws.com/mindsdb:${{ github.event.pull_request.head.sha }}-cloud-cpu - snyk-token: ${{ secrets.SNYK_TOKEN }} - dockerfile: docker/mindsdb.Dockerfile - - # Push cache layers to docker registry - # This is separate to the build step so we can do other stuff in parallel - build-cache: - if: ${{ !github.event.pull_request.head.repo.fork }} - name: Push Docker Cache - runs-on: mdb-dev - needs: [build] - steps: - - uses: actions/checkout@v4 - # Build the bakefile and push - - uses: mindsdb/github-actions/docker-bake@main - with: - git-sha: ${{ github.event.pull_request.head.sha }} - target: cloud-cpu - platforms: linux/amd64 - push-cache: true - cache-only: true - - # This will run the deployment workflow in the base branch, not in the PR. - # So if you change the deploy workflow in your PR, the changes won't be reflected in this run. - deploy: - if: ${{ !github.event.pull_request.head.repo.fork && needs.get-deploy-labels.outputs.deploy-envs != '[]' }} - name: Deploy - needs: [build, get-deploy-labels] - uses: ./.github/workflows/deploy.yml - with: - deploy-envs: ${{ needs.get-deploy-labels.outputs.deploy-envs }} - image-tag: ${{ github.event.pull_request.head.sha }} - secrets: inherit - - # Run integration tests against the deployed environment - run_integration_tests: - if: ${{ !github.event.pull_request.head.repo.fork }} - name: Run Integration Tests - needs: [deploy, get-deploy-labels] - strategy: - fail-fast: false - matrix: - deploy-env: ${{ fromJson(needs.get-deploy-labels.outputs.deploy-envs) }} - concurrency: - group: deploy-${{ matrix.deploy-env }} - cancel-in-progress: false - uses: ./.github/workflows/tests_integration.yml - with: - deploy-env: ${{ matrix.deploy-env }} - secrets: inherit - - # This is a collection point for all of the matrix tests above so we can have a single required job - tests_completed: - name: All Tests Succeeded - needs: [run_unit_tests, run_integration_tests, changes] - runs-on: mdb-dev - if: always() - steps: - - name: fail if tests failed or didnt run - if: ${{ (needs.run_unit_tests.result != 'success' || (needs.run_integration_tests.result != 'success' && !github.event.pull_request.head.repo.fork)) && needs.changes.outputs.not-docs == 'true' }} - run: exit 1 - - run: echo "Tests ran successfully" diff --git a/.github/workflows/build_deploy_prod.yml b/.github/workflows/build_deploy_prod.yml deleted file mode 100644 index f4fc723047c..00000000000 --- a/.github/workflows/build_deploy_prod.yml +++ /dev/null @@ -1,206 +0,0 @@ -name: Build and deploy release - -permissions: - contents: read - pull-requests: write - pages: write - id-token: write - -on: - release: - types: [published] - paths-ignore: - - "docs/**" - - "README.md" - -env: - UV_LINK_MODE: "symlink" - -concurrency: - group: release - cancel-in-progress: false - -jobs: - run_unit_tests: - name: Run Unit Tests - uses: ./.github/workflows/tests_unit.yml - secrets: inherit - - # Check that the version defined in the github release is valid - check-version: - name: Check Code Version - runs-on: mdb-dev - needs: [run_unit_tests] - if: github.actor != 'mindsdbadmin' - steps: - - uses: actions/checkout@v4 - - uses: FranzDiebold/github-env-vars-action@v2 - - name: Set up Python - uses: actions/setup-python@v5.1.0 - with: - python-version: ${{ vars.CI_PYTHON_VERSION }} - - name: Check Version - run: | - PYTHONPATH=./ python tests/scripts/check_version.py ${{ env.CI_REF_NAME }} ${{ github.event.release.prerelease }} - - # Push a new release to PyPI - deploy_to_pypi: - name: Publish to PyPI - runs-on: mdb-dev - needs: [check-version, run_unit_tests] - if: github.actor != 'mindsdbadmin' - steps: - - uses: actions/checkout@v4 - - name: Setup uv - uses: astral-sh/setup-uv@v5 - with: - cache-local-path: "/home/runner/_work/_tool/uv-local-cache" # Place cache in the tool dir because we mount this in our runnners - prune-cache: false # We want to save all cache because it's in the mount^ - python-version: ${{ vars.CI_PYTHON_VERSION || '3.11' }} # Default to 3.11 where vars aren't available (PRs from forks) - - name: Install dependencies - run: | - uv pip install -r requirements/requirements-dev.txt - - name: Build and publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - # This uses the version string from __about__.py, which we checked matches the git tag above - uv pip install build - python -m build - twine upload dist/* - - # Build our docker images based on our bake file - # This will tag with the release version tag and push to both dockerhub and ECR - build: - name: Build Docker Images - runs-on: mdb-dev - needs: [check-version, run_unit_tests] - if: github.actor != 'mindsdbadmin' - steps: - - uses: actions/checkout@v4 - - name: Docker Login - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - # Build the bakefile and push - - uses: mindsdb/github-actions/docker-bake@main - with: - push-to-dockerhub: true - push-cache: false - - # Push cache layers to docker registry - # This is separate to the build step so we can do other stuff in parallel - build-cache: - name: Push Docker Cache - runs-on: mdb-dev - needs: [build] - steps: - - uses: actions/checkout@v4 - # Build the bakefile and push - - uses: mindsdb/github-actions/docker-bake@main - with: - push-cache: true - cache-only: true - - # Call our deployment workflow - deploy: - name: Deploy to Prod - needs: [build] - uses: ./.github/workflows/deploy.yml - with: - deploy-envs: '["prod"]' - image-tag: ${{ github.event.release.tag_name }} - prod: true - secrets: inherit - - # Trigger private repo to deploy the docker desktop extension - trigger_dd_extension_release: - name: Deploy Docker Desktop Extension - runs-on: mdb-dev - needs: [build] - if: github.actor != 'mindsdbadmin' - steps: - - uses: FranzDiebold/github-env-vars-action@v2 - - uses: convictional/trigger-workflow-and-wait@v1.6.5 - with: - owner: mindsdb - repo: mindsdb-docker-extension - github_token: ${{ secrets.REPO_DISPATCH_PAT_TOKEN }} - workflow_file_name: bump-mindsdb-version.yml - ref: main - client_payload: '{"image-tag": "${{ env.CI_REF_NAME }}"}' - - # Run integration tests - run_integration_tests: - name: Run Integration Tests - needs: [deploy] - concurrency: - group: deploy-prod - cancel-in-progress: false - uses: ./.github/workflows/tests_integration.yml - with: - git-sha: ${{ github.event.release.tag_name }} - deploy-env: prod - runs-on: mdb-prod - secrets: inherit - - tests_completed: - name: All Tests Succeeded - needs: [run_unit_tests, run_integration_tests] - runs-on: mdb-dev - steps: - - name: fail if tests failed or didnt run - if: ${{ needs.run_unit_tests.result != 'success' || needs.run_integration_tests.result != 'success'}} - run: exit 1 - - run: echo "Tests ran successfully" - - slack_message: - if: failure() && !cancelled() - name: Notify Slack - # Every previous job needs to be in here, because failure() will only return true if the job that failed is in 'needs' - needs: [check-version, run_unit_tests, deploy_to_pypi, build, build-cache, deploy, trigger_dd_extension_release, run_integration_tests, tests_completed] - runs-on: mdb-dev - steps: - - name: Notify of failing tests - uses: slackapi/slack-github-action@v1.26.0 - with: - channel-id: ${{ secrets.SLACK_ENG_CHANNEL_ID }} - payload: | - { - "attachments": [ - { - "color": "#FF4444", - "blocks": [ - { - "type": "header", - "text": { - "type": "plain_text", - "text": "TEST RUN FAILED ON RELEASE ${{ github.event.release.tag_name }}", - "emoji": true - } - }, - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": " " - }, - "fields": [ - { - "type": "mrkdwn", - "text": "*Commit*\n<${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}|${{ github.sha }}>" - }, - { - "type": "mrkdwn", - "text": "*Workflow Run*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ github.workflow }}>" - } - ] - } - ] - } - ] - } - env: - SLACK_BOT_TOKEN: ${{ secrets.GH_ACTIONS_SLACK_BOT_TOKEN }} diff --git a/.github/workflows/build_deploy_staging.yml b/.github/workflows/build_deploy_staging.yml deleted file mode 100644 index f580f8baa5c..00000000000 --- a/.github/workflows/build_deploy_staging.yml +++ /dev/null @@ -1,147 +0,0 @@ -name: Build and deploy to staging - -permissions: - contents: read - pull-requests: write - pages: write - id-token: write - -on: - # Using pull_request instead of push on main because we want access to the pull request's details via 'github.event' - # But it means we need to check below if this PR was merged and not just closed - pull_request: - types: - - closed - branches: - - 'main' - - 'releases/*' - -concurrency: - group: ${{ github.workflow_ref }} - cancel-in-progress: true - -jobs: - - run_unit_tests: - name: Run Unit Tests - if: github.event.pull_request.merged == true - uses: ./.github/workflows/tests_unit.yml - secrets: inherit - - # Build our docker images based on our bake file - build: - if: github.event.pull_request.merged == true - name: Build Docker Images - runs-on: mdb-dev - steps: - # Check out the merge commit on the base branch - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.merge_commit_sha }} - # Build the bakefile and push - - uses: mindsdb/github-actions/docker-bake@main - with: - push-cache: false - - # Push cache layers to docker registry - # This is separate to the build step so we can do other stuff in parallel - build-cache: - name: Push Docker Cache - runs-on: mdb-dev - needs: [build] - steps: - # Check out the merge commit on the base branch - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.merge_commit_sha }} - # Build the bakefile and push - - uses: mindsdb/github-actions/docker-bake@main - with: - push-cache: true - cache-only: true - - # Call our deployment workflow - deploy: - name: Deploy to Staging - needs: [build] - uses: ./.github/workflows/deploy.yml - with: - deploy-envs: '["staging", "dev", "alpha-dev"]' - image-tag: ${{ github.event.pull_request.merge_commit_sha }} - secrets: inherit - - # Run integration tests - run_integration_tests: - if: github.event.pull_request.merged == true - name: Run Integration Tests - needs: [deploy] - concurrency: - group: deploy-staging - cancel-in-progress: false - uses: ./.github/workflows/tests_integration.yml - with: - git-sha: ${{ github.event.pull_request.merge_commit_sha }} - deploy-env: staging - secrets: inherit - - tests_completed: - if: always() && github.event.pull_request.merged == true - name: All Tests Succeeded - needs: [run_unit_tests, run_integration_tests] - runs-on: mdb-dev - steps: - - name: fail if tests failed or didnt run - if: ${{ needs.run_unit_tests.result != 'success' || needs.run_integration_tests.result != 'success'}} - run: exit 1 - - run: echo "Tests ran successfully" - - slack_message: - if: failure() && !cancelled() && github.event.pull_request.merged == true - name: Notify Slack - # Every previous job needs to be in here, because failure() will only return true if the job that failed is in 'needs' - needs: [run_unit_tests, build, build-cache, deploy, run_integration_tests, tests_completed] - runs-on: mdb-dev - steps: - - name: Notify of failing tests - if: ${{ needs.tests_completed.result != 'success' && needs.tests_completed.result != 'cancelled' }} - uses: slackapi/slack-github-action@v1.26.0 - with: - channel-id: ${{ secrets.SLACK_ENG_CHANNEL_ID }} - payload: | - { - "attachments": [ - { - "color": "#FF4444", - "blocks": [ - { - "type": "header", - "text": { - "type": "plain_text", - "text": "TEST RUN FAILED ON ${{ github.base_ref }}", - "emoji": true - } - }, - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": " " - }, - "fields": [ - { - "type": "mrkdwn", - "text": "*Commit*\n<${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}|${{ github.sha }}>" - }, - { - "type": "mrkdwn", - "text": "*Workflow Run*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ github.workflow }}>" - } - ] - } - ] - } - ] - } - env: - SLACK_BOT_TOKEN: ${{ secrets.GH_ACTIONS_SLACK_BOT_TOKEN }} - diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml deleted file mode 100644 index 796fc2c0a28..00000000000 --- a/.github/workflows/cla.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: "MindsDB CLA Assistant" - -permissions: - actions: write - contents: write - pull-requests: write - statuses: write - -on: - issue_comment: - types: [created] - pull_request_target: - types: [opened,closed,synchronize] - -jobs: - CLAssistant: - runs-on: mdb-dev - steps: - - name: "CLA Assistant" - if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target' - uses: contributor-assistant/github-action@v2.6.1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - path-to-signatures: 'assets/contributions-agreement/signatures/cla.json' - path-to-document: 'https://github.com/mindsdb/mindsdb/blob/main/assets/contributions-agreement/individual-contributor.md' - branch: 'cla' - allowlist: bot*, ZoranPandovski, torrmal, Stpmax, mindsdbadmin, ea-rus, tmichaeldb, dusvyat, hamishfagg, MinuraPunchihewa, martyna-mindsdb, dylanketterer, ala12326571 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000000..c1555839311 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,60 @@ +name: "CodeQL" + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + schedule: + - cron: "0 4 * * 0" + +permissions: + contents: read + +jobs: + detect-languages: + name: Detect CodeQL languages + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Build language matrix + id: set-matrix + run: | + include='[{"language":"actions","build-mode":"none"}]' + + if git ls-files '*.py' | grep -q .; then + include='[{"language":"actions","build-mode":"none"},{"language":"python","build-mode":"none"}]' + fi + + echo "matrix={\"include\":$include}" >> "$GITHUB_OUTPUT" + + analyze: + name: Analyze (${{ matrix.language }}) + needs: detect-languages + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.detect-languages.outputs.matrix) }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix['build-mode'] }} + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 + with: + category: /language:${{ matrix.language }} diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml deleted file mode 100644 index 93f6326ea21..00000000000 --- a/.github/workflows/deploy.yml +++ /dev/null @@ -1,107 +0,0 @@ -permissions: - contents: read - -on: - workflow_call: - inputs: - deploy-envs: - required: true - type: string - image-tag: - required: true - type: string - prod: - required: false - type: boolean - default: false - secrets: - SLACK_DEPLOYMENTS_CHANNEL_ID: - required: true - REPO_DISPATCH_PAT_TOKEN: - required: true - MINDSDB_DB_URI: - required: true - - -jobs: - migrate: - if: github.actor != 'mindsdbadmin' - runs-on: ${{ matrix.deploy-env == 'prod' && 'mdb-prod' || 'mdb-dev' }} - strategy: - fail-fast: false - matrix: - deploy-env: ${{ fromJson(inputs.deploy-envs) }} - concurrency: - group: deploy-${{ matrix.deploy-env }} - cancel-in-progress: false - environment: - name: ${{ matrix.deploy-env }} - url: ${{ vars.MDB_ENV_URL }} - # We only want to run one deploy job for an env at a time - # Don't cancel in progress jobs because it may be for a different PR - env: - MINDSDB_DB_CON: ${{ secrets.MINDSDB_DB_URI }} - UV_LINK_MODE: "symlink" - steps: - - uses: actions/checkout@v4 - - name: Setup uv - uses: astral-sh/setup-uv@v5 - with: - # Place cache in the tool dir because we mount this in our runnners - cache-local-path: "/home/runner/_work/_tool/uv-local-cache" - prune-cache: false - python-version: ${{ vars.CI_PYTHON_VERSION || '3.11' }} - - name: Install dependencies - run: | - uv pip install -r requirements/requirements.txt - - name: Migrate DB - run: | - cd mindsdb/migrations - env PYTHONPATH=../../ alembic upgrade head - - - # Trigger private repo to deploy - trigger_deploy: - if: github.actor != 'mindsdbadmin' - needs: migrate - runs-on: mdb-dev - strategy: - fail-fast: false - matrix: - deploy-env: ${{ fromJson(inputs.deploy-envs) }} - concurrency: - group: deploy-${{ matrix.deploy-env }} - cancel-in-progress: false - environment: - name: ${{ matrix.deploy-env }} - url: ${{ vars.MDB_ENV_URL }} - steps: - - uses: FranzDiebold/github-env-vars-action@v2 - - name: Notify of deployment starting - id: slack - uses: mindsdb/github-actions/slack-deploy-msg@main - with: - channel-id: ${{ secrets.SLACK_DEPLOYMENTS_CHANNEL_ID }} - status: "started" - color: "#0099CC" - env-name: ${{ matrix.deploy-env }} - env-url: ${{ vars.MDB_ENV_URL }} - slack-token: ${{ secrets.GH_ACTIONS_SLACK_BOT_TOKEN }} - - uses: mindsdb/github-actions/dispatch-and-wait@main - with: - owner: mindsdb - repo: INTERNAL-mindsdb-build-deploy-to-kubernetes - token: ${{ secrets.REPO_DISPATCH_PAT_TOKEN }} - workflow: ${{ inputs.prod && 'deploy-prod.yml' || 'deploy-dev.yml' }} - workflow_inputs: '{"image-tag-prefix": "${{ inputs.image-tag }}", "deploy-env": "${{ matrix.deploy-env }}"}' - - name: Notify of deployment finish - uses: mindsdb/github-actions/slack-deploy-msg@main - if: always() - with: - channel-id: ${{ secrets.SLACK_DEPLOYMENTS_CHANNEL_ID }} - status: "${{ job.status == 'success' && 'finished' || 'failed' }}" - color: "${{ job.status == 'success' && '#00C851' || '#FF4444' }}" - env-name: ${{ matrix.deploy-env }} - env-url: ${{ vars.MDB_ENV_URL }} - slack-token: ${{ secrets.GH_ACTIONS_SLACK_BOT_TOKEN }} - update-message-id: ${{ steps.slack.outputs.ts }} \ No newline at end of file diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml new file mode 100644 index 00000000000..9eaa9d89154 --- /dev/null +++ b/.github/workflows/deploy_docs.yml @@ -0,0 +1,37 @@ +name: Deploy Docs to GitHub Pages + +on: + push: + branches: [main] + paths: + - "docs/**" + + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: true + +jobs: + deploy: + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deploy.outputs.page_url }} + + steps: + - uses: actions/checkout@v4 + + - uses: actions/configure-pages@v5 + + - uses: actions/upload-pages-artifact@v3 + with: + path: docs/ + + - id: deploy + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/matrix_includes.json b/.github/workflows/matrix_includes.json deleted file mode 100644 index f57f05776d2..00000000000 --- a/.github/workflows/matrix_includes.json +++ /dev/null @@ -1,64 +0,0 @@ -[ - { - "runs_on": "ubuntu-latest", - "python-version": "3.10", - "runOnBranch": "main" - }, - { - "runs_on": "ubuntu-latest", - "python-version": "3.11", - "runOnBranch": "always" - }, - { - "runs_on": "ubuntu-latest", - "python-version": "3.12", - "runOnBranch": "main" - }, - { - "runs_on": "ubuntu-latest", - "python-version": "3.13", - "runOnBranch": "main" - }, - - { - "runs_on": "windows-latest", - "python-version": "3.10", - "runOnBranch": "main" - }, - { - "runs_on": "windows-latest", - "python-version": "3.11", - "runOnBranch": "always" - }, - { - "runs_on": "windows-latest", - "python-version": "3.12", - "runOnBranch": "main" - }, - { - "runs_on": "windows-latest", - "python-version": "3.13", - "runOnBranch": "main" - }, - - { - "runs_on": "macos-latest", - "python-version": "3.10", - "runOnBranch": "main" - }, - { - "runs_on": "macos-latest", - "python-version": "3.11", - "runOnBranch": "always" - }, - { - "runs_on": "macos-latest", - "python-version": "3.12", - "runOnBranch": "main" - }, - { - "runs_on": "macos-latest", - "python-version": "3.13", - "runOnBranch": "main" - } -] diff --git a/.github/workflows/release_notes.yml b/.github/workflows/release_notes.yml deleted file mode 100644 index 07bc736283c..00000000000 --- a/.github/workflows/release_notes.yml +++ /dev/null @@ -1,76 +0,0 @@ -# Uses OpenAI to summarise github's auto-generated release notes, -# Then update the release notes to contain the new summary - - -name: Update Release Notes - -permissions: - contents: write - -on: - release: - types: - - published - -jobs: - get-release-notes: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Enhance release notes with LLM - id: enhance_notes - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GH_TOKEN: ${{ github.token }} - RELEASE_BODY: ${{ github.event.release.body }} - run: | - echo "Release body:\n$RELEASE_BODY" - - # Prepare the LLM API payload - payload=$(jq -n \ - --arg model "gpt-4" \ - --arg system_content "Follow the instructions to update the GitHub release notes provided below. The writing style should be simple, clear, and easy to read, yet it should follow these points: 1. Write a section titled \"TL;DR\" that describes everything contained in the \"What's Changed\" section. Provide a description of each change listed under the \"What's Changed\" section including a short description of why is this change beneficial to users and what good it brings. Make this \"TL;DR\" section informative and divide information into sections of related items; these section titles should include \"Changes to SQL\", \"Changes to Integrations\", \"Changes to Knowledge Bases\", \"Bug Fixes and Improvements\", \"Changes to Documentation\", unless a section does not have any items to be listed then do not list it. Do not mention any change authors in the \"TL;DR\" section. 2. The input comes in the Markdown format as below. Generate the output in the Markdown format as well. 3. Final format should adhere to the following: - start with the \"TL;DR\" section, which is a summary of the \"What's Changed\" section, - follow it with a new line containing \"---\" that separates the sections, - follow it with the \"What's Changed\" section, and DO NOT change the provided \"What's Changed\" section, - generate a Markdown format output that concatenates these two sections."\ - --arg user_content "$RELEASE_BODY" \ - '{ - model: $model, - messages: [ - { - role: "system", - content: $system_content - }, - { - role: "user", - content: $user_content - } - ] - }') - - echo "Payload sent to OpenAI:\n$payload" - - # Call the OpenAI API - response=$(curl -s -X POST https://api.openai.com/v1/chat/completions \ - -H "Authorization: Bearer $OPENAI_API_KEY" \ - -H "Content-Type: application/json" \ - -d "$payload") - - echo "Raw response from OpenAI:\n$response" - - # Extract enhanced notes from the API response - ENHANCED_NOTES=$(echo "$response" | jq -r '.choices[0].message.content // empty') - - if [ -z "$ENHANCED_NOTES" ]; then - echo "LLM enhancement failed. Exiting." - exit 1 - fi - - echo "Enhanced release notes:\n$ENHANCED_NOTES" - - - # Write enhanced notes to a file - echo "$ENHANCED_NOTES" > enhanced_notes.md - - # Update the release with the enhanced notes - gh release edit ${{ github.event.release.tag_name }} --notes-file enhanced_notes.md diff --git a/.github/workflows/tests_integration.yml b/.github/workflows/tests_integration.yml deleted file mode 100644 index 3edd71e1c04..00000000000 --- a/.github/workflows/tests_integration.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: Test on Deploy - -permissions: - contents: read - -on: - workflow_call: - inputs: - git-sha: - required: false - type: string - default: "" - deploy-env: - required: true - type: string - runs-on: - required: false - type: string - default: "mdb-dev" - secrets: - OPENAI_API_KEY: - required: true - workflow_dispatch: - inputs: - git-sha: - required: false - type: string - default: "" - deploy-env: - required: true - type: string - runs-on: - required: false - type: string - default: "mdb-dev" - secrets: - OPENAI_API_KEY: - required: true - -defaults: - run: - shell: bash - -env: - UV_LINK_MODE: "symlink" - -jobs: - # Run our integration tests - test: - environment: - name: ${{ inputs.deploy-env }} - url: ${{ vars.MDB_ENV_URL }} - name: Run integration tests on deploy - runs-on: ${{ inputs.runs-on }} - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ inputs.git-sha }} - - name: Setup uv - uses: astral-sh/setup-uv@v5 - with: - # Place cache in the tool dir because we mount this in our runnners - cache-local-path: "/home/runner/_work/_tool/uv-local-cache" - prune-cache: false - python-version: ${{ vars.CI_PYTHON_VERSION || '3.11' }} - - name: Install dependencies - run: | - uv pip install -r requirements/requirements-test.txt - - name: Run Integration Tests on Deploy - run: | - make integration_tests_slow - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - INTERNAL_URL: ${{ vars.MINDSDB_INTERNAL_URL }} diff --git a/.github/workflows/tests_unit.yml b/.github/workflows/tests_unit.yml deleted file mode 100644 index 876972852f7..00000000000 --- a/.github/workflows/tests_unit.yml +++ /dev/null @@ -1,205 +0,0 @@ -name: Unit Tests - -on: - workflow_call: - -defaults: - run: - shell: bash - -permissions: - contents: read # For checkout and comparing commits - pull-requests: write # For creating/updating PR comments - pages: write - id-token: write - - -env: - HANDLERS_TO_INSTALL: | - postgres - mysql - salesforce - snowflake - timescaledb - mssql - oracle - redshift - bigquery - web - databricks - duckdb_faiss - openai - # We measure 80% on this handlers, as they are the verified - HANDLERS_TO_VERIFY: | - mysql - salesforce - postgres - snowflake - timescaledb - mssql - oracle - file - redshift - bigquery - COVERAGE_FAIL_UNDER: "80" - -jobs: - # Run all of our static code checks here - code_checking: - name: Run static code checks - runs-on: mdb-dev - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 # required to grab the history of the PR so pre-commit can work out what's changed - ref: ${{ github.event.pull_request.head.sha }} - - name: Setup uv - uses: astral-sh/setup-uv@v5 - with: - cache-local-path: "/home/runner/_work/_tool/uv-local-cache" # Place cache in the tool dir because we mount this in our runnners - prune-cache: false # We want to save all cache because it's in the mount^ - python-version: ${{ vars.CI_PYTHON_VERSION || '3.11' }} # Default to 3.11 where vars aren't available (PRs from forks) - # Checks the codebase for print() statements and fails if any are found - # We should be using loggers instead - - name: Check for print statements - run: | - # The pyproject file confuses uv: https://github.com/astral-sh/uv/issues/6838 - rm pyproject.toml - uv run tests/scripts/check_print_statements.py - - - name: Install MDB dev requirements - run: | - uv pip install -r requirements/requirements-dev.txt - - # Run pre-commit on all changed files - # See .pre-commit-config.yaml for the list of checks - - name: Run pre-commit - run: | - pre-commit run --show-diff-on-failure --color=always --from-ref ${{ github.event.pull_request.base.sha || 'HEAD~1' }} --to-ref ${{ github.event.pull_request.head.sha || 'HEAD' }} - - # Runs a few different checks against our many requirements files - # to make sure they're in order - - name: Check requirements files - run: | - uv run tests/scripts/check_requirements.py - # Creates a matrix of environments to test against using matrix_includes.json - matrix_prep: - name: Prepare matrix - runs-on: mdb-dev - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - id: set-matrix - uses: JoshuaTheMiller/conditional-build-matrix@v2.0.1 - with: - filter: "[?runOnBranch==`${{ github.ref }}` || runOnBranch==`always`]" - - check_install: - name: Check pip installation - needs: [matrix_prep, code_checking] - strategy: - matrix: ${{ fromJson(needs.matrix_prep.outputs.matrix) }} - runs-on: ${{ matrix.runs_on }} - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Setup uv - uses: astral-sh/setup-uv@v5 - with: - enable-cache: true - python-version: ${{ matrix.python-version }} - - name: Check pip package builds and installs - run: | - uv pip install -r requirements/requirements-dev.txt - # Install from the pip package - # If we install from source, we don't know if the pip package is installable. - python setup.py sdist - cd dist - uv pip install *.tar.gz - - unit_tests: - name: Run Unit Tests - needs: [matrix_prep, code_checking] - strategy: - matrix: ${{ fromJson(needs.matrix_prep.outputs.matrix) }} - runs-on: ${{ matrix.runs_on }} - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Setup uv - uses: astral-sh/setup-uv@v5 - with: - enable-cache: true - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - INSTALL_HANDLERS=() - while IFS= read -r handler; do - handler=${handler//$'\r'/} - [[ -z "${handler}" || "${handler}" =~ ^[[:space:]]*# ]] && continue - INSTALL_HANDLERS+=("${handler}") - done <<< "${HANDLERS_TO_INSTALL}" - HANDLER_EXTRAS=() - for handler in "${INSTALL_HANDLERS[@]}"; do - HANDLER_EXTRAS+=(".[${handler}]") - done - uv pip install ".[agents,kb]" \ - -r requirements/requirements-test.txt \ - "${HANDLER_EXTRAS[@]}" - git clone --branch v$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests - - - name: Run unit tests - run: | - make unit_tests_slow - - - name: Handler coverage (report only) - if: ${{ matrix.python-version == '3.11' && matrix.runs_on == 'ubuntu-latest' }} - run: | - uv run tests/scripts/check_handler_coverage.py > pytest-coverage-handlers.txt - env: - COVERAGE_FILE: .coverage.unit - - - name: Generate HTML coverage - if: ${{ matrix.python-version == '3.11' && matrix.runs_on == 'ubuntu-latest' }} - run: | - uv run coverage html -d reports/htmlcov - env: - COVERAGE_FILE: .coverage.unit - - - name: Upload test artifacts - if: ${{ matrix.python-version == '3.11' && matrix.runs_on == 'ubuntu-latest' && github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'publish_artifacts') }} - uses: actions/upload-artifact@v4 - with: - name: unit-test-artifacts - path: | - pytest.xml - coverage.xml - .coverage.unit - pytest-coverage.txt - pytest-coverage-handlers.txt - reports/htmlcov/** - - - name: Configure Pages - if: ${{ matrix.python-version == '3.11' && matrix.runs_on == 'ubuntu-latest' && github.event_name == 'pull_request' && (github.event.pull_request.head.repo.fork == false) && contains(github.event.pull_request.labels.*.name, 'publish_artifacts') }} - uses: actions/configure-pages@v5 - - - name: Upload Pages artifact - if: ${{ matrix.python-version == '3.11' && matrix.runs_on == 'ubuntu-latest' && github.event_name == 'pull_request' && (github.event.pull_request.head.repo.fork == false) && contains(github.event.pull_request.labels.*.name, 'publish_artifacts') }} - uses: actions/upload-pages-artifact@v3 - with: - path: reports/htmlcov - - - id: deploy - name: Deploy to GitHub Pages - if: ${{ matrix.python-version == '3.11' && matrix.runs_on == 'ubuntu-latest' && github.event_name == 'pull_request' && (github.event.pull_request.head.repo.fork == false) && contains(github.event.pull_request.labels.*.name, 'publish_artifacts') }} - uses: actions/deploy-pages@v4 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000000..e53c72eb035 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,12 @@ +[submodule "frontend"] + path = frontend + url = https://github.com/mindsdb/cowork +[submodule "backend/core_api"] + path = backend/core_api + url = https://github.com/mindsdb/cowork-server +[submodule "backend/core_agent"] + path = backend/core_agent + url = https://github.com/mindsdb/anton +[submodule "backend/data-vault"] + path = backend/data-vault + url = https://github.com/mindsdb/data-vault diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile deleted file mode 100644 index 06407f3f883..00000000000 --- a/.gitpod.Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -FROM gitpod/workspace-postgres - -ENV NODE_VERSION=18 -ENV PYTHON_VERSION=3.10 - -RUN pyenv install $PYTHON_VERSION -s \ - && pyenv global $PYTHON_VERSION - -RUN bash -c 'source $HOME/.nvm/nvm.sh && nvm install $NODE_VERSION \ - && nvm use $NODE_VERSION && nvm alias default $NODE_VERSION' - -RUN echo "nvm use default &>/dev/null" >> ~/.bashrc.d/51-nvm-fix diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 8b989ba835d..00000000000 --- a/.gitpod.yml +++ /dev/null @@ -1,19 +0,0 @@ -image: - file: .gitpod.Dockerfile - -ports: - # Mindsdb GUI - - port: 47334 - onOpen: open-preview - - # PostgreSQL server - - port: 5432 - onOpen: ignore - -tasks: - - name: mindsdb - init: | - pip install -U pip - pip install -r requirements.txt - command: | - python -m mindsdb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index a62493b693f..00000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,19 +0,0 @@ -exclude: "^databricks_src|^data_engineering/" -repos: - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.11 - hooks: - - id: ruff-check - - id: ruff-format - args: [ --check ] - - id: ruff-format - stages: [manual] - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: check-added-large-files - - id: check-toml - - id: check-yaml - exclude: "^helm/" - args: [--unsafe] - - id: check-merge-conflict \ No newline at end of file diff --git a/LICENSE b/LICENSE index d025666443d..3f97e197667 100644 --- a/LICENSE +++ b/LICENSE @@ -1,15 +1,3 @@ -# Understanding the Diverse Licensing Structure of MindsDB's Repo - -1. MindsDB Core: The MindsDB Core component specifically uses the Elastic License 2.0. This is a distinct license that applies to this particular part of the project. - -2. License File in Work's Directory: If there is a LICENSE file located in the same directory as the work, that license will apply to the work: - * `/mindsdb/integrations` directory that contains all integrations is Licensed under MIT License. - -3. Default to Elastic License 2.0: If no specific LICENSE file is found following the above rules, the work defaults to being licensed under the Elastic License 2.0. - -For any questions or clarifications regarding licensing, please contact us at (admin@mindsdb.com). - ------------------------------------------------------------------------------------- ## MIT License Copyright (c) 2019 MindsDB, Inc. @@ -31,55 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ------------------------------------------------------------------------------------- - -# Elastic License 2.0 (ELv2) - -### Acceptance - -By using the software, you agree to all of the terms and conditions below. - -### Copyright License - -The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below. - -### Limitations - -You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software. -You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key. -You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law. - -### Patents -The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company. - -### Notices - -You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms. -If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software. - -### No Other Rights - -These terms do not imply any licenses other than those expressly granted in these terms. - -### Termination - -If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently. - -### No Liability - -As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim. - -### Definitions - -The licensor is the entity offering these terms, and the software is the software the licensor makes available under these terms, including any portion of it. - -**you** refers to the individual or entity agreeing to these terms. - -**your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. control means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. - -**your licenses** are all the licenses granted to you for the software under these terms. - -**use** means anything you do with the software requiring one of your licenses. - -**trademark** means trademarks, service marks, and similar rights. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 55339934ea1..00000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -recursive-include requirements *.txt -include mindsdb/migrations/alembic.ini -recursive-include mindsdb/integrations/utilities/datasets *.csv -recursive-include mindsdb/integrations/handlers *.txt *.png *.svg *.jpg diff --git a/Makefile b/Makefile index 75c05b7c647..322fc36899d 100644 --- a/Makefile +++ b/Makefile @@ -1,94 +1,52 @@ -PYTEST_ARGS = -v -xrs --disable-warnings -n 1 --dist loadfile -PYTEST_ARGS_DEBUG = --runslow -vs -rs -DSI_PYTEST_ARGS = --run-dsi-tests -DSI_REPORT_ARGS = --json-report --json-report-file=reports/report.json -SHELL := /usr/bin/env bash +FRONTEND := frontend +API := backend/core_api +AGENT := backend/core_agent -install_mindsdb: - pip install -e . - pip install -r requirements/requirements-dev.txt - pre-commit install +_NPM_STAMP := $(FRONTEND)/node_modules/.package-lock.json +_API_STAMP := $(API)/.venv +_AGENT_STAMP := $(AGENT)/.venv -install_handler: - @if [[ -n "$(HANDLER_NAME)" ]]; then\ - pip install -e .[$(HANDLER_NAME)];\ - else\ - echo 'Please set $$HANDLER_NAME to the handler to install.';\ - fi -precommit: - pre-commit install - pre-commit run --files $$(git diff --cached --name-only) +.PHONY: setup dev dev-web build dist-mac dist-win docker-build docker-up docker-down -format: - pre-commit run --hook-stage manual +$(_NPM_STAMP): $(FRONTEND)/package-lock.json + npm --prefix $(FRONTEND) ci -run_mindsdb: - python -m mindsdb +$(_API_STAMP): $(API)/uv.lock + uv sync --directory $(API) -check: - python tests/scripts/check_requirements.py - python tests/scripts/check_print_statements.py - pre-commit install - pre-commit run --files $$(git diff --cached --name-only) +$(_AGENT_STAMP): $(AGENT)/uv.lock + uv sync --directory $(AGENT) -build_docker: - docker buildx build -t mdb --load -f docker/mindsdb.Dockerfile . +setup: $(_NPM_STAMP) $(_API_STAMP) $(_AGENT_STAMP) -run_docker: build_docker - docker run -it -p 47334:47334 mdb +dev: $(_NPM_STAMP) $(_API_STAMP) $(_AGENT_STAMP) + @trap 'kill 0' SIGINT SIGTERM EXIT; \ + uv run --directory $(API) uvicorn cowork.server:app --reload \ + --reload-dir $(CURDIR)/$(API)/cowork \ + --reload-dir $(CURDIR)/$(AGENT)/anton & \ + npm --prefix $(FRONTEND) run dev -integration_tests: - pytest $(PYTEST_ARGS) tests/integration/ -k "not test_auth" - pytest $(PYTEST_ARGS) tests/integration/ -k test_auth # Run this test separately because it alters the auth requirements, which breaks other tests +dev-web: $(_NPM_STAMP) $(_API_STAMP) $(_AGENT_STAMP) + @trap 'kill 0' SIGINT SIGTERM EXIT; \ + uv run --directory $(API) uvicorn cowork.server:app --reload \ + --reload-dir $(CURDIR)/$(API)/cowork \ + --reload-dir $(CURDIR)/$(AGENT)/anton & \ + cd $(FRONTEND) && BUILD_TARGET=web npm run dev:renderer -- --open -integration_tests_slow: - pytest --runslow $(PYTEST_ARGS) tests/integration/ -k "not test_auth" - pytest --runslow $(PYTEST_ARGS) tests/integration/ -k test_auth +build: $(_NPM_STAMP) + npm --prefix $(FRONTEND) run build -integration_tests_debug: - pytest $(PYTEST_ARGS_DEBUG) tests/integration/ +dist-mac: $(_NPM_STAMP) + npm --prefix $(FRONTEND) run dist:mac -datasource_integration_tests: - @echo "--- Running Datasource Integration (DSI) Tests ---" - # Ensure the reports directory exists before running tests - mkdir -p reports - # Added DSI_REPORT_ARGS to generate JSON report - pytest $(PYTEST_ARGS) $(DSI_PYTEST_ARGS) $(DSI_REPORT_ARGS) tests/integration/handlers/ +dist-win: $(_NPM_STAMP) + npm --prefix $(FRONTEND) run dist:win -datasource_integration_tests_debug: - @echo "--- Running Datasource Integration (DSI) Tests (Debug) ---" - mkdir -p reports - pytest $(PYTEST_ARGS_DEBUG) $(DSI_PYTEST_ARGS) $(DSI_REPORT_ARGS) tests/integration/handlers/ +docker-build: + docker compose build -unit_tests: - # We have to run executor tests separately because they do weird things that break everything else - env PYTHONPATH=./ pytest $(PYTEST_ARGS) tests/unit/executor/ - @set -o pipefail; \ - mkdir -p reports; \ - COVERAGE_FILE=.coverage.unit PYTHONPATH=./ pytest $(PYTEST_ARGS) --ignore=tests/unit/executor tests/unit/ \ - --junitxml=pytest.xml \ - --cov=mindsdb \ - --cov-report=term \ - --cov-report=xml:coverage.xml \ - --cov-branch | tee pytest-coverage.txt +docker-up: + docker compose up -unit_tests_slow: - env PYTHONPATH=./ pytest --runslow $(PYTEST_ARGS) tests/unit/executor/ # We have to run executor tests separately because they do weird things that break everything else - @set -o pipefail; \ - mkdir -p reports; \ - COVERAGE_FILE=.coverage.unit PYTHONPATH=./ pytest --runslow $(PYTEST_ARGS) --ignore=tests/unit/executor tests/unit/ \ - --junitxml=pytest.xml \ - --cov=mindsdb \ - --cov-report=term \ - --cov-report=xml:coverage.xml \ - --cov-branch | tee pytest-coverage.txt - -unit_tests_debug: - env PYTHONPATH=./ pytest $(PYTEST_ARGS_DEBUG) tests/unit/executor/ - pytest $(PYTEST_ARGS_DEBUG) --ignore=tests/unit/executor tests/unit/ - -.PHONY: tests-artifacts -tests-artifacts: - ./scripts/test-artifacts.sh - -.PHONY: install_mindsdb install_handler precommit format run_mindsdb check build_docker run_docker integration_tests integration_tests_slow integration_tests_debug datasource_integration_tests datasource_integration_tests_debug unit_tests unit_tests_slow unit_tests_debug +docker-down: + docker compose down diff --git a/README.md b/README.md index 0d3fe51b470..f4101db295d 100644 --- a/README.md +++ b/README.md @@ -24,30 +24,58 @@ --- -## MINDS PLATFORM +# MINDS-COWORK PLATFORM -Minds Platform is dedicated to building an open foundation for frontier Artificial Intelligence, designed for developers, businesses, and individuals seeking AI systems they can truly control, extend and deploy anywhere (VPC, on-prem or Cloud). +Minds Platform is dedicated to building a general-purpose AI designed for knowledge workers — creators, strategists, and operators — and individuals seeking AI systems they can truly control to help them get work done, with full flexibility to extend and deploy anywhere (VPC, on-prem, or cloud). -We believe useful AI systems require two fundamental capabilities: the ability to perform meaningful actions autonomously **(Automation)** and the ability to find and retrieve the right information **(Semantic Search)**. Our products are designed around these two foundations: -* [Minds Anton](https://github.com/mindsdb/anton) - Self-improving **Automation** Agent that can get any type of work done. Tell it what you need in plain language and it takes it from there - creating reports, organizing data, sending emails, calling APIs, building dashboards, scheduling tasks, etc. -* [Minds Query Engine](https://github.com/mindsdb/engine) - **Semantic Search** query engine, used to index and organize large amounts of data from hundreds of structured and unstructured datasources. +## USE CASES -# USE CASES +**For every knowledge worker** +- **Automate** any repetitive multi-step task that involves reading and writing (reports, monitoring, workflows) +- **Build** internal AI tools/artifacts without engineering and deploy to your team (apps, decks, docs, analyses) -| Use Case | Solution | +--- + +## GET STARTED + +### Desktop App: +Simplest way to use this is the latest build App, available on web or desktop: + +- **web**: Click [here to register/login](https://mindshub.ai) the Minds-cowork app, packaged and ready for you in one click. + +- **macOS**: Click [here to download](https://downloads.mindsdb.com/anton/mac/anton-latest.pkg) the Minds-cowork for MacOS. + +- **Windows**: Click [here to download](https://downloads.mindsdb.com/anton/windows/anton-latest.exe) the Minds-cowork for Windows. + + +### Build from source: +**1. Clone the repository** +```bash +git clone --recurse-submodules https://github.com/mindsdb/minds-platform.git +cd minds-platform +``` + +**2. Install dependencies** +```bash +make setup +``` + +**3. Run** + +| Mode | Command | |---|---| -| Automated reporting, recurring workflows, and operational task execution | **Anton** | -| Embeddable conversational business intelligence | **Query Engine** | -| Search across large knowledge bases; documents, tickets, etc | **Query Engine** | -| Search and Analyze data across large knowledge bases; documents, tickets, etc | **Anton + Query Engine** | -| AI operations assistants for sales, support, finance, and engineering teams | **Anton** | -| Large scale Enterprise AI systems combining memory, retrieval, reasoning, and execution | **Anton + Query Engine** | +| Desktop app (Electron) with hot reload | `make dev` | +| Web app in browser with hot reload | `make dev-web` | +| Production build | `make build` | +| Package for macOS | `make dist-mac` | +| Package for Windows | `make dist-win` | + --- -# DEPLOY ANYWHERE +## DEPLOY ANYWHERE Minds Platform is designed for flexible deployment across: diff --git a/assets/MindsDB-org-readme-diagram.jpg b/assets/MindsDB-org-readme-diagram.jpg deleted file mode 100644 index 2327c09e541..00000000000 Binary files a/assets/MindsDB-org-readme-diagram.jpg and /dev/null differ diff --git a/assets/mindsdb-header-github.png b/assets/mindsdb-header-github.png deleted file mode 100644 index 784066629d3..00000000000 Binary files a/assets/mindsdb-header-github.png and /dev/null differ diff --git a/assets/mindsdb.png b/assets/mindsdb.png deleted file mode 100644 index 35500d70059..00000000000 Binary files a/assets/mindsdb.png and /dev/null differ diff --git a/assets/mindsdb_demo.gif b/assets/mindsdb_demo.gif deleted file mode 100644 index 4e335544872..00000000000 Binary files a/assets/mindsdb_demo.gif and /dev/null differ diff --git a/assets/swag.png b/assets/swag.png deleted file mode 100644 index 25e727b3ccc..00000000000 Binary files a/assets/swag.png and /dev/null differ diff --git a/backend/core_agent b/backend/core_agent new file mode 160000 index 00000000000..5861998c1f6 --- /dev/null +++ b/backend/core_agent @@ -0,0 +1 @@ +Subproject commit 5861998c1f63a062b33061288ebeaeb0ce3ad832 diff --git a/backend/core_api b/backend/core_api new file mode 160000 index 00000000000..446289a8232 --- /dev/null +++ b/backend/core_api @@ -0,0 +1 @@ +Subproject commit 446289a8232aa5479c0ef420bfde040d477e9fe5 diff --git a/backend/data-vault b/backend/data-vault new file mode 160000 index 00000000000..37ccd07febc --- /dev/null +++ b/backend/data-vault @@ -0,0 +1 @@ +Subproject commit 37ccd07febc9c4a20bd5c1fcd4f677efac553dc9 diff --git a/docker-compose.yml b/docker-compose.yml index 8d92bb77c9e..2532528e684 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,181 +1,37 @@ services: - - # Auto-restarts containers that are unhealthy - autoheal: - container_name: autoheal - image: willfarrell/autoheal - volumes: - - /var/run/docker.sock:/var/run/docker.sock - <<: &autoRestartOnFailure - deploy: - restart_policy: - condition: on-failure - max_attempts: 10 - - mindsdb: - <<: *autoRestartOnFailure - image: mindsdb/mindsdb:devel - - depends_on: - migrate: - condition: service_completed_successfully -# Uncomment the following lines if you want to use the local version of otel-collector and langfuse -# otel-collector: -# condition: service_started -# langfuse: -# condition: service_started - # If you want to build the image instead: -# build: -# context: . -# dockerfile: docker/mindsdb.Dockerfile -# target: dev # Makes sure dev dependencies are installed - restart: always - ports: - - '47334:47334' - - '47335:47335' + api: + build: + context: . + dockerfile: docker/api.Dockerfile environment: - MINDSDB_DB_CON: "postgresql://postgres:postgres@postgres/mindsdb" - KB_PGVECTOR_URL: "postgresql://postgres:postgres@postgres/kb" # Use pgvector as the default store for knowledge bases - MINDSDB_DOCKER_ENV: "True" - MINDSDB_STORAGE_DIR: "/mindsdb/var" - FLASK_DEBUG: 1 # This will make sure http requests are logged regardless of log level - SENTRY_IO_DSN: "" - SENTRY_IO_ENVIRONMENT: "local" - #MINDSDB_APIS: "http" # Explicitly set the APIs to enable - # SENTRY_IO_FORCE_RUN: "true" # Uncomment me to force-start sentry on local development. Good for profiling, but may annoy other devs on sentry.io with the "noise" - # MINDSDB_LOG_LEVEL: "DEBUG" - # OPENAI_API_KEY: "..." - LANGFUSE_HOST: "http://langfuse:3000" # Define the host for langfuse - LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890" # Define the public key for langfuse - LANGFUSE_SECRET_KEY: "sk-lf-1234567890" # Define the secret key for langfuse - LANGFUSE_ENVIRONMENT: "local" - LANGFUSE_RELEASE: "local" - # LANGFUSE_DEBUG: "True" - LANGFUSE_TIMEOUT: "10" - LANGFUSE_SAMPLE_RATE: "1.0" - # LANGFUSE_FORCE_RUN: "True" - OTEL_EXPORTER_TYPE: "console" # or "console" # Define the exporter type (console/otlp) - # OTEL_EXPORTER_PROTOCOL: "grpc" # or "http" # Define the protocol for the otlp exporter. - # OTEL_OTLP_ENDPOINT: "http://otel-collector:4317" # Define the endpoint for the otlp exporter. - # OTEL_OTLP_LOGGING_ENDPOINT: "http://otel-collector:4317" # Define if log exporter is different from OTEL_OTLP_ENDPOINT. - # OTEL_OTLP_TRACING_ENDPOINT: "http://otel-collector:4317" # Define if tracing exporter is different from OTEL_OTLP_ENDPOINT. - # OTEL_OTLP_METRICS_ENDPOINT: "http://otel-collector:4317" # Define if metrics exporter is different from OTEL_OTLP_ENDPOINT. - OTEL_SERVICE_NAME: "mindsdb" # Define the service name - OTEL_SERVICE_INSTANCE_ID: "mindsdb-instance" # Define the service instance id - OTEL_SERVICE_ENVIRONMENT: "local" # Define the service environment - OTEL_SERVICE_RELEASE: "local" # Define the service release - OTEL_TRACE_SAMPLE_RATE: "1.0" # Define the trace sample rate - OTEL_EXTRA_ATTRIBUTES: "" # Define the extra attributes - # OTEL_SDK_DISABLED: "false" # Define if the sdk is disabled - # OTEL_LOGGING_DISABLED: "false" # Define if the logging is disabled. - # OTEL_TRACING_DISABLED: "false" # Define if the tracing is disabled. - # OTEL_METRICS_DISABLED: "false" # Define if the metrics are disabled. - # OTEL_SDK_FORCE_RUN: "true" # Uncomment me to force-start opentelemetry on local development. - + COWORK_SERVER_HOST: "0.0.0.0" + COWORK_SERVER_PORT: "26866" + DATABASE_URI: "sqlite:////home/cowork/.cowork/cowork.db" + # ANTON_ANTHROPIC_API_KEY: "sk-ant-..." volumes: - - type: bind - source: . - target: /mindsdb - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:47334/api/util/ping"] - interval: 30s - timeout: 4s - retries: 100 - - otel-collector: - image: otel/opentelemetry-collector-contrib:0.116.1 - environment: - OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED: "true" - volumes: - - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml - ports: - - 1888:1888 # pprof extension - - 8888:8888 # Prometheus metrics exposed by the Collector - - 8889:8889 # Prometheus exporter metrics - - 13133:13133 # health_check extension - - 4317:4317 # OTLP gRPC receiver - - 4318:4318 # OTLP http receiver - - 55679:55679 # zpages extension - - langfuse: - <<: *autoRestartOnFailure - image: langfuse/langfuse:2.87.0 - restart: always - depends_on: - migrate: - condition: service_completed_successfully + - cowork-data:/home/cowork/.cowork ports: - - "3000:3000" + - "26866:26866" + restart: unless-stopped healthcheck: - test: ["CMD-SHELL", "curl -f http://localhost:3000/api/public/health"] - interval: 3s - timeout: 3s - retries: 10 - environment: - - DATABASE_URL=postgresql://postgres:postgres@postgres/langfuse - - NEXTAUTH_SECRET=secret # generate secret with at least 256 entropy using `openssl rand -base64 32`. - - SALT=salt # generate secret with at least 256 entropy using `openssl rand -base64 32`. - - ENCRYPTION_KEY=0000000000000000000000000000000000000000000000000000000000000000 # generate secret with at least 256 bits and 64 characters in hex format using `openssl rand -hex 32`. - - NEXTAUTH_URL=http://localhost:3000 # URL of the frontend - - LANGFUSE_INIT_ORG_ID=mindsdb # Organization ID - - LANGFUSE_INIT_ORG_NAME=MindsDB # Organization name - - LANGFUSE_INIT_PROJECT_ID=23152f1d-9604-4629-a69d-27790d47b7fa # Project ID (UUID) - - LANGFUSE_INIT_PROJECT_NAME=MindsDB # Project name - - LANGFUSE_INIT_PROJECT_PUBLIC_KEY=pk-lf-1234567890 # Project public key - - LANGFUSE_INIT_PROJECT_SECRET_KEY=sk-lf-1234567890 # Project secret key - - LANGFUSE_INIT_USER_EMAIL=admin@langfuse.com # User email - - LANGFUSE_INIT_USER_NAME=User # User name - - LANGFUSE_INIT_USER_PASSWORD=password # User password - - postgres: - <<: *autoRestartOnFailure - image: pgvector/pgvector:pg17 - restart: always - healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] - interval: 3s - timeout: 3s - retries: 10 - environment: - - POSTGRES_USER=postgres - - POSTGRES_PASSWORD=postgres - - POSTGRES_DB=postgres + test: ["CMD", "python", "-c", + "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:26866/health',timeout=3).status==200 else 1)"] + interval: 30s + timeout: 5s + start_period: 20s + retries: 3 + + web: + build: + context: . + dockerfile: docker/web.Dockerfile ports: - - "15432:5432" # Expose the port to the host. Use 15432 to avoid conflicts with local postgres installations - volumes: - - data_pg17:/var/lib/postgresql/data - - # Uncomment if you need to upgrade from Postgres 16 to 17 - # Follow the upgrade proceedure in the MindsDB 25.11.1 release notes - # - # postgres16: - # <<: *autoRestartOnFailure - # image: pgvector/pgvector:pg16 - # restart: always - # environment: - # - POSTGRES_USER=postgres - # - POSTGRES_PASSWORD=postgres - # - POSTGRES_DB=postgres - # volumes: - # - db_data:/var/lib/postgresql/data - - # Ensures the required databases and extensions are created before other services start - migrate: - image: pgvector/pgvector:pg17 + - "3000:80" depends_on: - postgres: + api: condition: service_healthy - restart: "no" - environment: - - POSTGRES_USER=postgres - - POSTGRES_PASSWORD=postgres - - POSTGRES_DB=postgres - volumes: - - ./scripts/init-dbs.sh:/init-dbs.sh:ro - entrypoint: ["/bin/bash", "/init-dbs.sh"] + restart: unless-stopped volumes: - db_data: - driver: local - data_pg17: + cowork-data: driver: local diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 67f61dd9e64..00000000000 --- a/docker/README.md +++ /dev/null @@ -1,41 +0,0 @@ -### Docker images for MindsDB - -* https://docs.mindsdb.com/setup/self-hosted/docker/ -* https://hub.docker.com/u/mindsdb - -## Building - -Docker images are using only released versions of MindsDB from -https://pypi.org/project/MindsDB/ so no files in parent dir are used. - -To build `release` image using version reported at -https://public.api.mindsdb.com/installer/release/docker___success___None - - docker build -f release --no-cache -t mindsdb/mindsdb . - -To build `release` image with specific MindsDB version. - - docker build -f release --build-arg VERSION=2.57.0 -t mindsdb/mindsdb . - -### `beta` vs `release` - -`release` image pins MindsDB version and builds from fixed PyTorch docker -image. `beta` uses latest PyTorch image and updates MindsDB when container -is started to a version set at -https://public.api.mindsdb.com/installer/beta/docker___success___None - -## Releasing - -The `build.py ` script is used in CI to build and push images -on release. - -## Running local docker compose environment (in development) - -Run `docker-compose up` or `docker-compose up -d` (for `detach` mode) to launch mindsdb environment in docker compose - - -## Running local docker compose environment (in old manner development) - - - -1. Run `docker-compose -f docker-compose-old-manner up` or `docker-compose -f docker-compose-old-manner up -d` (for `detach` mode) to launch mindsdb in docker-compose in old school manner (monolithic on 100%) diff --git a/docker/api.Dockerfile b/docker/api.Dockerfile new file mode 100644 index 00000000000..a1f4a6d8fc2 --- /dev/null +++ b/docker/api.Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.12-slim AS builder + +RUN pip install uv --no-cache-dir + +WORKDIR /src +COPY backend/core_agent/ ./core_agent/ +COPY backend/core_api/ ./core_api/ + +RUN python -m venv /opt/venv && \ + /opt/venv/bin/pip install uv --no-cache-dir && \ + /opt/venv/bin/uv pip install ./core_agent ./core_api + +FROM python:3.12-slim AS runtime + +LABEL org.opencontainers.image.title="cowork-api" +LABEL org.opencontainers.image.source="https://github.com/mindsdb/minds-platform" + +RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates \ + && rm -rf /var/lib/apt/lists/* \ + && useradd -m -u 1000 -s /bin/bash cowork + +COPY --from=builder /opt/venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" \ + PYTHONUNBUFFERED=1 \ + COWORK_SERVER_HOST=0.0.0.0 \ + COWORK_SERVER_PORT=26866 + +RUN mkdir -p /home/cowork/.cowork && chown cowork:cowork /home/cowork/.cowork + +USER cowork + +EXPOSE 26866 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \ + CMD python -c "import urllib.request,sys; \ +sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:26866/health',timeout=3).status==200 else 1)" + +CMD ["cowork-server"] diff --git a/docker/db_images/README.md b/docker/db_images/README.md deleted file mode 100644 index 2b168eff572..00000000000 --- a/docker/db_images/README.md +++ /dev/null @@ -1 +0,0 @@ -### This directory contains all data to build docker images for db handlers testing diff --git a/docker/db_images/mariadb/Dockerfile b/docker/db_images/mariadb/Dockerfile deleted file mode 100644 index e48291bb1d5..00000000000 --- a/docker/db_images/mariadb/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM mariadb - -ENV MARIADB_DATABASE test - -COPY ./sql-scripts/ /docker-entrypoint-initdb.d/ -COPY home_rentals.csv / diff --git a/docker/db_images/mariadb/home_rentals.csv b/docker/db_images/mariadb/home_rentals.csv deleted file mode 100755 index d74efda921c..00000000000 --- a/docker/db_images/mariadb/home_rentals.csv +++ /dev/null @@ -1,5038 +0,0 @@ -number_of_rooms,number_of_bathrooms,sqft,location,days_on_market,initial_price,neighborhood,rental_price -0,1,484,great,10,2271,south_side,2271 -1,1,674,good,1,2167,downtown,2167 -1,1,554,poor,19,1883,westbrae,1883 -0,1,529,great,3,2431,south_side,2431 -3,2,1219,great,3,5510,south_side,5510 -1,1,398,great,11,2272,south_side,2272 -3,2,1190,poor,58,4463,westbrae,4123.812 -1,1,730,good,0,2224,downtown,2224 -0,1,298,great,9,2104,south_side,2104 -2,1,878,great,8,3861,south_side,3861 -1,1,677,good,14,2041,downtown,2041 -0,1,509,poor,18,1725,westbrae,1725 -0,1,481,poor,49,1388,westbrae,1307.496 -3,2,808,good,1,4677,downtown,4677 -1,1,522,poor,30,1713,westbrae,1678.74 -1,1,533,good,10,1903,downtown,1903 -3,2,937,good,2,4736,downtown,4736 -0,1,258,good,10,1544,downtown,1544 -1,1,630,great,11,2543,south_side,2543 -0,1,397,great,11,2168,south_side,2168 -2,1,932,good,10,3413,downtown,3413 -3,2,1069,good,9,4810,downtown,4810 -0,1,267,poor,32,1302,westbrae,1270.752 -0,1,332,good,6,1697,downtown,1697 -1,1,712,poor,56,1617,westbrae,1500.576 -3,2,1231,poor,25,4873,westbrae,4824.27 -2,1,818,good,3,3359,downtown,3359 -2,1,805,good,8,3358,downtown,3358 -3,2,1158,poor,44,4601,westbrae,4380.152 -3,2,952,great,7,5207,south_side,5207 -2,1,771,good,8,3305,downtown,3305 -1,1,333,great,6,2284,south_side,2284 -1,1,500,poor,54,1448,westbrae,1349.536 -2,1,690,poor,16,3095,westbrae,3095 -0,1,524,great,13,2317,south_side,2317 -2,1,762,good,1,3323,downtown,3323 -2,1,872,good,14,3375,downtown,3375 -1,1,673,great,7,2604,south_side,2604 -2,1,792,good,5,3390,downtown,3390 -2,1,640,good,8,3153,downtown,3153 -0,1,454,poor,47,1353,westbrae,1279.938 -2,1,932,good,10,3447,downtown,3447 -0,1,340,good,5,1722,downtown,1722 -1,1,595,good,1,2064,downtown,2064 -2,1,558,good,3,3118,downtown,3118 -3,2,823,good,10,4545,downtown,4545 -3,2,1104,poor,16,4750,westbrae,4750 -1,1,543,poor,18,1871,westbrae,1871 -0,1,455,great,13,2205,south_side,2205 -0,1,113,good,13,1378,downtown,1378 -2,1,553,good,8,3073,downtown,3073 -3,2,1030,great,14,5260,south_side,5260 -3,2,1175,great,6,5446,south_side,5446 -1,1,644,great,12,2572,south_side,2572 -1,1,421,great,8,2333,south_side,2333 -2,1,786,poor,63,2716,westbrae,2482.424 -0,1,518,poor,21,1674,westbrae,1670.652 -1,1,532,great,4,2491,south_side,2491 -1,1,533,poor,26,1748,westbrae,1727.024 -1,1,733,poor,56,1671,westbrae,1550.688 -0,1,417,good,9,1687,downtown,1687 -2,1,891,good,3,3504,downtown,3504 -2,1,938,poor,50,3008,westbrae,2827.52 -2,1,688,good,12,3154,downtown,3154 -1,1,504,poor,22,1791,westbrae,1783.836 -1,1,741,good,3,2226,downtown,2226 -2,1,564,good,13,3001,downtown,3001 -2,1,503,good,10,3026,downtown,3026 -2,1,755,good,7,3254,downtown,3254 -0,1,415,poor,39,1472,westbrae,1416.064 -2,1,911,good,6,3492,downtown,3492 -3,2,853,good,4,4710,downtown,4710 -0,1,337,good,1,1775,downtown,1775 -1,1,477,poor,64,1371,westbrae,1250.352 -3,2,1081,great,7,5351,south_side,5351 -0,1,464,poor,21,1645,westbrae,1641.71 -3,2,1219,good,1,5030,downtown,5030 -3,2,1106,good,10,4833,downtown,4833 -2,1,891,good,9,3371,downtown,3371 -1,1,518,good,6,2005,downtown,2005 -0,1,245,great,4,2094,south_side,2094 -3,2,1216,great,5,5495,south_side,5495 -0,1,381,poor,28,1483,westbrae,1459.272 -2,1,819,great,7,3806,south_side,3806 -2,1,787,good,9,3332,downtown,3332 -3,2,936,good,2,4738,downtown,4738 -2,1,740,good,6,3294,downtown,3294 -3,2,1215,great,13,5467,south_side,5467 -2,1,853,poor,40,3045,westbrae,2923.2 -2,1,942,poor,59,2977,westbrae,2744.794 -3,2,1204,good,7,5016,downtown,5016 -3,2,1098,great,10,5386,south_side,5386 -1,1,741,good,12,2170,downtown,2170 -1,1,603,great,10,2508,south_side,2508 -3,2,1074,good,12,4796,downtown,4796 -1,1,588,good,14,1961,downtown,1961 -0,1,334,poor,48,1243,westbrae,1173.392 -2,1,736,great,2,3854,south_side,3854 -3,2,1056,poor,54,4408,westbrae,4108.256 -1,1,625,great,2,2578,south_side,2578 -2,1,530,poor,47,2613,westbrae,2471.898 -2,1,626,great,4,3693,south_side,3693 -1,1,736,poor,30,1891,westbrae,1853.18 -3,2,882,good,10,4659,downtown,4659 -2,1,646,good,1,3280,downtown,3280 -0,1,231,good,2,1650,downtown,1650 -0,1,157,poor,36,1172,westbrae,1134.496 -1,1,489,good,9,1916,downtown,1916 -0,1,464,good,11,1793,downtown,1793 -1,1,320,great,8,2268,south_side,2268 -0,1,390,great,4,2230,south_side,2230 -2,1,682,great,5,3705,south_side,3705 -2,1,729,great,2,3856,south_side,3856 -3,2,837,poor,23,4439,westbrae,4412.366 -3,2,933,poor,22,4557,westbrae,4538.772 -1,1,606,great,14,2454,south_side,2454 -2,1,663,great,3,3691,south_side,3691 -1,1,407,good,3,1894,downtown,1894 -1,1,692,good,5,2115,downtown,2115 -3,2,975,great,6,5271,south_side,5271 -2,1,726,good,11,3242,downtown,3242 -1,1,329,good,10,1755,downtown,1755 -1,1,418,good,3,1907,downtown,1907 -1,1,627,good,2,2060,downtown,2060 -0,1,435,poor,26,1553,westbrae,1534.364 -2,1,533,great,2,3645,south_side,3645 -3,2,820,poor,23,4430,westbrae,4403.42 -2,1,935,great,13,3880,south_side,3880 -0,1,144,great,4,1979,south_side,1979 -2,1,872,poor,61,2884,westbrae,2647.512 -2,1,646,poor,53,2750,westbrae,2568.5 -0,1,141,poor,59,940,westbrae,866.68 -3,2,816,poor,25,4427,westbrae,4382.73 -0,1,320,poor,36,1322,westbrae,1279.696 -3,2,1143,poor,18,4846,westbrae,4846 -3,2,1087,good,2,4898,downtown,4898 -0,1,369,poor,43,1349,westbrae,1286.946 -1,1,358,good,3,1866,downtown,1866 -0,1,539,good,3,1899,downtown,1899 -2,1,885,poor,50,2974,westbrae,2795.56 -2,1,938,good,7,3497,downtown,3497 -0,1,488,poor,49,1418,westbrae,1335.756 -3,2,1157,good,10,4862,downtown,4862 -3,2,902,poor,29,4456,westbrae,4375.792 -2,1,623,poor,26,2961,westbrae,2925.468 -2,1,803,good,8,3279,downtown,3279 -3,2,1006,poor,44,4416,westbrae,4204.032 -3,2,916,poor,36,4378,westbrae,4237.904 -3,2,1180,good,5,5021,downtown,5021 -0,1,461,poor,31,1547,westbrae,1512.966 -0,1,390,poor,26,1531,westbrae,1512.628 -0,1,271,good,14,1557,downtown,1557 -2,1,745,poor,37,2951,westbrae,2850.666 -3,2,1080,good,14,4771,downtown,4771 -3,2,963,good,3,4734,downtown,4734 -2,1,901,poor,24,3226,westbrae,3200.192 -0,1,307,great,7,2151,south_side,2151 -3,2,871,good,11,4626,downtown,4626 -1,1,649,poor,46,1678,westbrae,1590.744 -0,1,459,poor,37,1464,westbrae,1414.224 -1,1,624,poor,31,1847,westbrae,1806.366 -1,1,350,good,11,1759,downtown,1759 -1,1,600,good,11,2009,downtown,2009 -0,1,541,good,8,1894,downtown,1894 -3,2,939,poor,29,4484,westbrae,4403.288 -2,1,606,great,4,3675,south_side,3675 -3,2,1139,good,11,4861,downtown,4861 -3,2,1234,good,3,5052,downtown,5052 -3,2,1150,good,2,4982,downtown,4982 -3,2,996,good,13,4758,downtown,4758 -2,1,559,great,1,3657,south_side,3657 -1,1,447,good,12,1784,downtown,1784 -1,1,698,poor,23,1998,westbrae,1986.012 -0,1,371,good,13,1639,downtown,1639 -0,1,425,poor,34,1449,westbrae,1408.428 -3,2,948,great,8,5224,south_side,5224 -0,1,441,poor,19,1613,westbrae,1613 -0,1,112,good,7,1487,downtown,1487 -2,1,876,poor,15,3347,westbrae,3347 -3,2,860,good,4,4631,downtown,4631 -0,1,312,poor,26,1479,westbrae,1461.252 -0,1,233,great,10,2079,south_side,2079 -1,1,743,great,3,2732,south_side,2732 -2,1,564,good,6,3137,downtown,3137 -0,1,335,good,12,1609,downtown,1609 -2,1,879,great,10,3923,south_side,3923 -3,2,1001,poor,57,4245,westbrae,3930.87 -1,1,721,great,3,2701,south_side,2701 -0,1,547,good,2,1936,downtown,1936 -2,1,871,good,3,3434,downtown,3434 -2,1,904,great,8,3938,south_side,3938 -0,1,150,poor,60,957,westbrae,880.44 -2,1,892,poor,18,3303,westbrae,3303 -2,1,886,good,10,3396,downtown,3396 -2,1,619,great,0,3671,south_side,3671 -2,1,598,poor,16,3049,westbrae,3049 -2,1,883,good,8,3411,downtown,3411 -1,1,443,good,10,1820,downtown,1820 -3,2,969,poor,56,4236,westbrae,3931.008 -0,1,457,good,4,1796,downtown,1796 -3,2,1175,good,0,4999,downtown,4999 -0,1,511,great,1,2431,south_side,2431 -1,1,514,poor,51,1536,westbrae,1440.768 -3,2,916,good,3,4716,downtown,4716 -1,1,625,poor,42,1699,westbrae,1624.244 -0,1,442,good,5,1779,downtown,1779 -0,1,310,poor,20,1465,westbrae,1465 -3,2,865,good,0,4742,downtown,4742 -3,2,819,great,12,5049,south_side,5049 -1,1,598,great,7,2499,south_side,2499 -0,1,383,good,11,1707,downtown,1707 -0,1,145,good,6,1442,downtown,1442 -2,1,878,good,8,3379,downtown,3379 -2,1,557,poor,29,2904,westbrae,2851.728 -0,1,505,good,2,1863,downtown,1863 -2,1,646,great,0,3727,south_side,3727 -0,1,123,great,9,1976,south_side,1976 -0,1,375,poor,17,1620,westbrae,1620 -3,2,1124,great,9,5420,south_side,5420 -1,1,704,good,14,2081,downtown,2081 -0,1,294,great,1,2221,south_side,2221 -2,1,850,good,2,3437,downtown,3437 -0,1,376,good,1,1766,downtown,1766 -0,1,150,poor,43,1083,westbrae,1033.182 -3,2,1054,poor,46,4411,westbrae,4181.628 -0,1,191,good,12,1492,downtown,1492 -0,1,300,good,8,1667,downtown,1667 -0,1,283,good,7,1582,downtown,1582 -0,1,454,good,13,1753,downtown,1753 -2,1,589,good,7,3077,downtown,3077 -2,1,906,good,9,3421,downtown,3421 -0,1,222,poor,58,1036,westbrae,957.264 -3,2,1088,poor,52,4393,westbrae,4111.848 -0,1,410,good,3,1740,downtown,1740 -1,1,597,good,9,1978,downtown,1978 -3,2,1006,poor,27,4580,westbrae,4515.88 -1,1,499,poor,38,1623,westbrae,1564.572 -0,1,149,poor,47,1046,westbrae,989.516 -0,1,426,good,3,1783,downtown,1783 -0,1,361,great,2,2210,south_side,2210 -3,2,880,good,2,4726,downtown,4726 -2,1,935,good,4,3477,downtown,3477 -1,1,363,good,12,1765,downtown,1765 -3,2,1037,poor,35,4578,westbrae,4440.66 -3,2,1145,good,11,4929,downtown,4929 -1,1,435,good,8,1855,downtown,1855 -3,2,1114,great,9,5363,south_side,5363 -3,2,1055,good,0,4933,downtown,4933 -1,1,503,good,9,1866,downtown,1866 -3,2,816,good,8,4594,downtown,4594 -1,1,435,good,6,1884,downtown,1884 -1,1,609,poor,15,1929,westbrae,1929 -3,2,984,good,0,4855,downtown,4855 -1,1,602,good,2,2043,downtown,2043 -2,1,892,good,3,3435,downtown,3435 -0,1,397,good,11,1698,downtown,1698 -3,2,871,poor,31,4397,westbrae,4300.266 -2,1,653,good,9,3147,downtown,3147 -1,1,748,poor,62,1631,westbrae,1493.996 -0,1,235,great,14,2000,south_side,2000 -3,2,1133,good,9,4874,downtown,4874 -0,1,272,poor,57,1117,westbrae,1034.342 -0,1,532,poor,63,1316,westbrae,1202.824 -2,1,855,good,9,3362,downtown,3362 -3,2,1165,good,9,4902,downtown,4902 -3,2,1085,great,13,5279,south_side,5279 -0,1,195,great,6,2039,south_side,2039 -1,1,409,great,4,2380,south_side,2380 -3,2,1186,poor,62,4437,westbrae,4064.292 -1,1,476,great,8,2370,south_side,2370 -1,1,438,great,6,2332,south_side,2332 -3,2,1182,good,6,4994,downtown,4994 -1,1,305,good,14,1675,downtown,1675 -3,2,1179,good,7,4977,downtown,4977 -3,2,1109,good,9,4906,downtown,4906 -0,1,322,good,13,1573,downtown,1573 -2,1,822,good,8,3374,downtown,3374 -0,1,525,good,0,1875,downtown,1875 -3,2,1022,poor,60,4300,westbrae,3956 -0,1,303,poor,48,1179,westbrae,1112.976 -1,1,656,poor,31,1872,westbrae,1830.816 -3,2,1212,poor,16,4906,westbrae,4906 -1,1,743,poor,44,1756,westbrae,1671.712 -1,1,354,good,0,1859,downtown,1859 -1,1,579,poor,51,1550,westbrae,1453.9 -1,1,327,poor,63,1201,westbrae,1097.714 -2,1,550,good,1,3155,downtown,3155 -3,2,914,good,0,4805,downtown,4805 -3,2,1225,poor,56,4504,westbrae,4179.712 -1,1,714,good,8,2135,downtown,2135 -2,1,679,good,1,3251,downtown,3251 -1,1,719,poor,64,1596,alcatraz_ave,1455.552 -0,1,461,great,6,2269,berkeley_hills,2269 -3,2,1097,good,7,4832,thowsand_oaks,4832 -2,1,525,good,3,3058,thowsand_oaks,3058 -3,2,850,good,1,4718,thowsand_oaks,4718 -3,2,1183,poor,23,4801,alcatraz_ave,4772.194 -3,2,915,great,1,5240,berkeley_hills,5240 -3,2,972,good,5,4794,thowsand_oaks,4794 -3,2,903,poor,27,4528,alcatraz_ave,4464.608 -2,1,738,good,11,3277,thowsand_oaks,3277 -1,1,608,poor,59,1477,alcatraz_ave,1361.794 -2,1,908,poor,49,3013,alcatraz_ave,2838.246 -3,2,961,good,9,4764,thowsand_oaks,4764 -0,1,225,good,3,1626,thowsand_oaks,1626 -1,1,635,good,1,2111,thowsand_oaks,2111 -1,1,519,good,8,1956,thowsand_oaks,1956 -2,1,917,great,13,3901,berkeley_hills,3901 -3,2,1094,great,12,5323,berkeley_hills,5323 -0,1,296,great,5,2151,berkeley_hills,2151 -2,1,536,poor,43,2751,alcatraz_ave,2624.454 -3,2,863,great,4,5151,berkeley_hills,5151 -3,2,953,good,10,4682,thowsand_oaks,4682 -0,1,451,poor,60,1234,alcatraz_ave,1135.28 -3,2,1236,good,11,4993,thowsand_oaks,4993 -2,1,624,poor,47,2738,alcatraz_ave,2590.148 -3,2,902,poor,42,4379,alcatraz_ave,4186.324 -0,1,310,good,12,1638,thowsand_oaks,1638 -2,1,882,good,13,3322,thowsand_oaks,3322 -2,1,939,good,12,3437,thowsand_oaks,3437 -2,1,659,good,14,3166,thowsand_oaks,3166 -2,1,740,good,1,3329,thowsand_oaks,3329 -2,1,697,good,3,3240,thowsand_oaks,3240 -1,1,580,poor,45,1593,alcatraz_ave,1513.35 -0,1,102,great,8,1895,berkeley_hills,1895 -2,1,921,poor,53,3026,alcatraz_ave,2826.284 -1,1,380,poor,30,1577,alcatraz_ave,1545.46 -3,2,869,great,2,5152,berkeley_hills,5152 -1,1,727,good,11,2078,thowsand_oaks,2078 -0,1,240,poor,45,1152,alcatraz_ave,1094.4 -0,1,535,good,14,1830,thowsand_oaks,1830 -2,1,677,poor,49,2778,alcatraz_ave,2616.876 -0,1,187,good,0,1566,thowsand_oaks,1566 -2,1,919,good,12,3419,thowsand_oaks,3419 -3,2,1171,poor,25,4775,alcatraz_ave,4727.25 -3,2,1121,good,10,4877,thowsand_oaks,4877 -0,1,124,good,6,1441,thowsand_oaks,1441 -1,1,326,good,9,1782,thowsand_oaks,1782 -0,1,469,good,14,1713,thowsand_oaks,1713 -3,2,1102,poor,64,4297,alcatraz_ave,3918.864 -0,1,379,poor,28,1534,alcatraz_ave,1509.456 -3,2,983,great,14,5145,berkeley_hills,5145 -1,1,686,good,4,2175,thowsand_oaks,2175 -0,1,194,great,10,2042,berkeley_hills,2042 -2,1,890,good,6,3476,thowsand_oaks,3476 -0,1,497,good,4,1877,thowsand_oaks,1877 -3,2,831,good,4,4621,thowsand_oaks,4621 -1,1,472,poor,17,1771,alcatraz_ave,1771 -3,2,1032,poor,51,4394,alcatraz_ave,4121.572 -0,1,398,great,12,2151,berkeley_hills,2151 -3,2,913,great,4,5269,berkeley_hills,5269 -0,1,438,great,8,2273,berkeley_hills,2273 -0,1,220,good,3,1548,thowsand_oaks,1548 -3,2,1009,great,8,5326,berkeley_hills,5326 -1,1,649,great,5,2563,berkeley_hills,2563 -0,1,499,good,5,1839,thowsand_oaks,1839 -2,1,677,good,0,3232,thowsand_oaks,3232 -3,2,926,good,13,4679,thowsand_oaks,4679 -3,2,1005,poor,59,4255,alcatraz_ave,3923.11 -0,1,137,good,9,1479,thowsand_oaks,1479 -3,2,1066,good,13,4774,thowsand_oaks,4774 -0,1,436,good,14,1658,thowsand_oaks,1658 -2,1,773,good,12,3207,thowsand_oaks,3207 -3,2,1038,great,8,5318,berkeley_hills,5318 -0,1,103,good,12,1413,thowsand_oaks,1413 -0,1,498,good,10,1801,thowsand_oaks,1801 -0,1,432,poor,55,1303,alcatraz_ave,1211.79 -0,1,127,poor,61,923,alcatraz_ave,847.314 -1,1,490,great,6,2452,berkeley_hills,2452 -3,2,987,great,6,5298,berkeley_hills,5298 -1,1,435,poor,49,1457,alcatraz_ave,1372.494 -2,1,606,good,11,3068,thowsand_oaks,3068 -1,1,638,poor,41,1774,alcatraz_ave,1699.492 -3,2,944,great,2,5321,berkeley_hills,5321 -1,1,601,poor,47,1637,alcatraz_ave,1548.602 -3,2,1024,poor,17,4745,alcatraz_ave,4745 -1,1,729,poor,63,1554,alcatraz_ave,1420.356 -3,2,1226,great,11,5436,berkeley_hills,5436 -0,1,519,good,12,1760,thowsand_oaks,1760 -0,1,399,good,9,1672,thowsand_oaks,1672 -2,1,665,good,12,3106,thowsand_oaks,3106 -2,1,566,great,9,3569,berkeley_hills,3569 -2,1,549,poor,19,2997,alcatraz_ave,2997 -2,1,934,good,2,3529,thowsand_oaks,3529 -0,1,500,poor,42,1482,alcatraz_ave,1416.792 -0,1,111,great,9,1878,berkeley_hills,1878 -3,2,848,good,6,4599,thowsand_oaks,4599 -0,1,305,good,3,1687,thowsand_oaks,1687 -0,1,361,poor,56,1245,alcatraz_ave,1155.36 -1,1,565,good,6,2038,thowsand_oaks,2038 -0,1,283,poor,54,1115,alcatraz_ave,1039.18 -2,1,505,great,11,3478,berkeley_hills,3478 -0,1,228,great,10,2067,berkeley_hills,2067 -3,2,1119,poor,54,4405,alcatraz_ave,4105.46 -0,1,267,good,2,1635,thowsand_oaks,1635 -3,2,916,good,0,4727,thowsand_oaks,4727 -2,1,903,poor,58,2923,alcatraz_ave,2700.852 -3,2,1046,good,13,4722,thowsand_oaks,4722 -1,1,513,good,11,1924,thowsand_oaks,1924 -3,2,1211,good,1,5043,thowsand_oaks,5043 -0,1,105,poor,28,1207,alcatraz_ave,1187.688 -2,1,553,good,1,3177,thowsand_oaks,3177 -3,2,1044,good,9,4847,thowsand_oaks,4847 -0,1,492,poor,64,1265,alcatraz_ave,1153.68 -2,1,847,good,7,3419,thowsand_oaks,3419 -0,1,232,poor,62,987,alcatraz_ave,904.092 -2,1,774,great,3,3875,berkeley_hills,3875 -2,1,582,poor,60,2591,alcatraz_ave,2383.72 -2,1,652,good,0,3212,thowsand_oaks,3212 -2,1,538,good,10,2988,thowsand_oaks,2988 -0,1,391,poor,53,1245,alcatraz_ave,1162.83 -1,1,723,great,3,2649,berkeley_hills,2649 -3,2,1043,great,2,5356,berkeley_hills,5356 -3,2,893,poor,38,4391,alcatraz_ave,4232.924 -2,1,608,good,9,3109,thowsand_oaks,3109 -0,1,511,great,12,2242,berkeley_hills,2242 -2,1,791,great,1,3870,berkeley_hills,3870 -2,1,792,poor,33,3014,alcatraz_ave,2935.636 -0,1,110,poor,20,1315,alcatraz_ave,1315 -1,1,616,poor,17,1943,alcatraz_ave,1943 -2,1,505,good,2,3080,thowsand_oaks,3080 -1,1,411,good,8,1810,thowsand_oaks,1810 -2,1,846,good,12,3282,thowsand_oaks,3282 -3,2,965,good,4,4811,thowsand_oaks,4811 -1,1,440,good,6,1847,thowsand_oaks,1847 -1,1,386,poor,56,1345,alcatraz_ave,1248.16 -3,2,1247,good,11,4974,thowsand_oaks,4974 -3,2,1058,great,11,5336,berkeley_hills,5336 -2,1,587,poor,63,2572,alcatraz_ave,2350.808 -0,1,145,great,1,2049,berkeley_hills,2049 -3,2,928,poor,47,4324,alcatraz_ave,4090.504 -2,1,735,good,4,3288,thowsand_oaks,3288 -1,1,433,poor,51,1430,alcatraz_ave,1341.34 -3,2,1009,good,10,4760,thowsand_oaks,4760 -0,1,369,poor,42,1308,alcatraz_ave,1250.448 -1,1,654,poor,36,1841,alcatraz_ave,1782.088 -0,1,144,good,5,1531,thowsand_oaks,1531 -0,1,425,great,13,2162,berkeley_hills,2162 -1,1,509,poor,20,1765,alcatraz_ave,1765 -2,1,615,good,9,3085,thowsand_oaks,3085 -1,1,519,great,6,2453,berkeley_hills,2453 -3,2,1235,great,1,5602,berkeley_hills,5602 -0,1,434,good,0,1800,thowsand_oaks,1800 -3,2,1028,poor,15,4693,alcatraz_ave,4693 -2,1,852,good,12,3369,thowsand_oaks,3369 -3,2,1039,good,1,4887,thowsand_oaks,4887 -3,2,1020,great,5,5319,berkeley_hills,5319 -0,1,354,great,13,2162,berkeley_hills,2162 -3,2,1034,great,12,5259,berkeley_hills,5259 -3,2,800,good,2,4618,thowsand_oaks,4618 -2,1,815,good,0,3436,thowsand_oaks,3436 -0,1,350,poor,61,1096,alcatraz_ave,1006.128 -0,1,489,great,6,2307,berkeley_hills,2307 -1,1,744,poor,37,1919,alcatraz_ave,1853.754 -3,2,874,good,1,4671,thowsand_oaks,4671 -0,1,392,great,7,2205,berkeley_hills,2205 -0,1,485,great,12,2285,berkeley_hills,2285 -0,1,500,poor,27,1632,alcatraz_ave,1609.152 -3,2,1239,poor,24,4876,alcatraz_ave,4836.992 -1,1,702,good,13,2068,thowsand_oaks,2068 -0,1,467,good,1,1869,thowsand_oaks,1869 -2,1,564,good,7,3119,thowsand_oaks,3119 -1,1,560,great,2,2497,berkeley_hills,2497 -0,1,219,good,8,1529,thowsand_oaks,1529 -3,2,1154,great,11,5396,berkeley_hills,5396 -1,1,309,good,2,1783,thowsand_oaks,1783 -2,1,655,poor,32,2921,alcatraz_ave,2850.896 -1,1,338,good,3,1835,thowsand_oaks,1835 -3,2,908,good,9,4620,thowsand_oaks,4620 -1,1,518,poor,60,1385,alcatraz_ave,1274.2 -0,1,279,good,11,1601,thowsand_oaks,1601 -0,1,267,good,7,1580,thowsand_oaks,1580 -2,1,587,good,14,3062,thowsand_oaks,3062 -1,1,648,poor,62,1557,alcatraz_ave,1426.212 -2,1,845,poor,37,3100,alcatraz_ave,2994.6 -2,1,549,great,6,3575,berkeley_hills,3575 -1,1,660,good,8,2092,thowsand_oaks,2092 -2,1,585,good,10,3060,thowsand_oaks,3060 -0,1,279,good,3,1681,thowsand_oaks,1681 -0,1,538,poor,43,1490,alcatraz_ave,1421.46 -2,1,932,poor,53,3006,alcatraz_ave,2807.604 -2,1,814,great,5,3868,berkeley_hills,3868 -2,1,709,poor,27,3018,alcatraz_ave,2975.748 -2,1,896,good,5,3483,thowsand_oaks,3483 -3,2,1140,good,10,4853,thowsand_oaks,4853 -3,2,1065,poor,32,4633,alcatraz_ave,4521.808 -2,1,735,poor,33,2980,alcatraz_ave,2902.52 -2,1,613,poor,39,2847,alcatraz_ave,2738.814 -1,1,666,poor,38,1775,alcatraz_ave,1711.1 -1,1,300,good,5,1739,thowsand_oaks,1739 -3,2,959,great,3,5286,berkeley_hills,5286 -0,1,135,great,1,2026,berkeley_hills,2026 -1,1,568,poor,32,1722,alcatraz_ave,1680.672 -2,1,824,poor,47,2962,alcatraz_ave,2802.052 -1,1,493,poor,24,1798,alcatraz_ave,1783.616 -2,1,666,great,10,3671,berkeley_hills,3671 -1,1,432,good,14,1782,thowsand_oaks,1782 -2,1,768,great,7,3795,berkeley_hills,3795 -0,1,223,poor,57,1068,alcatraz_ave,988.968 -1,1,568,poor,15,1885,alcatraz_ave,1885 -0,1,503,good,9,1839,thowsand_oaks,1839 -3,2,872,poor,53,4211,alcatraz_ave,3933.074 -1,1,407,great,3,2338,berkeley_hills,2338 -0,1,150,great,3,2041,berkeley_hills,2041 -1,1,701,good,10,2074,thowsand_oaks,2074 -3,2,811,poor,47,4193,alcatraz_ave,3966.578 -3,2,1086,poor,25,4639,alcatraz_ave,4592.61 -3,2,1002,good,2,4824,thowsand_oaks,4824 -1,1,618,poor,16,1954,alcatraz_ave,1954 -2,1,884,great,9,3855,berkeley_hills,3855 -3,2,1125,good,8,4895,thowsand_oaks,4895 -1,1,320,good,1,1789,thowsand_oaks,1789 -2,1,853,good,12,3336,thowsand_oaks,3336 -1,1,399,great,12,2236,berkeley_hills,2236 -3,2,980,great,8,5239,berkeley_hills,5239 -0,1,400,poor,24,1527,alcatraz_ave,1514.784 -2,1,836,great,5,3927,berkeley_hills,3927 -3,2,1175,poor,57,4465,alcatraz_ave,4134.59 -3,2,1094,good,6,4883,thowsand_oaks,4883 -2,1,925,good,13,3355,thowsand_oaks,3355 -2,1,736,poor,60,2708,alcatraz_ave,2491.36 -2,1,729,good,9,3280,thowsand_oaks,3280 -1,1,540,poor,60,1463,alcatraz_ave,1345.96 -3,2,858,poor,31,4366,alcatraz_ave,4269.948 -1,1,570,great,13,2421,berkeley_hills,2421 -2,1,664,great,0,3749,berkeley_hills,3749 -0,1,378,good,13,1653,thowsand_oaks,1653 -1,1,670,good,3,2149,thowsand_oaks,2149 -0,1,474,great,4,2382,berkeley_hills,2382 -3,2,1157,great,10,5453,berkeley_hills,5453 -1,1,417,great,11,2276,berkeley_hills,2276 -0,1,197,good,7,1576,thowsand_oaks,1576 -1,1,668,good,0,2191,thowsand_oaks,2191 -0,1,123,good,1,1485,thowsand_oaks,1485 -3,2,875,poor,58,4096,alcatraz_ave,3784.704 -3,2,1131,good,6,4957,thowsand_oaks,4957 -1,1,339,poor,22,1660,alcatraz_ave,1653.36 -2,1,921,good,5,3473,thowsand_oaks,3473 -3,2,894,poor,57,4200,alcatraz_ave,3889.2 -1,1,594,poor,57,1487,alcatraz_ave,1376.962 -2,1,935,great,14,3884,berkeley_hills,3884 -0,1,523,poor,38,1561,alcatraz_ave,1504.804 -0,1,535,poor,45,1451,alcatraz_ave,1378.45 -1,1,585,poor,56,1509,alcatraz_ave,1400.352 -1,1,385,poor,28,1639,alcatraz_ave,1612.776 -2,1,578,poor,16,3065,alcatraz_ave,3065 -1,1,678,good,14,2081,thowsand_oaks,2081 -0,1,493,poor,24,1636,alcatraz_ave,1622.912 -2,1,874,good,8,3359,thowsand_oaks,3359 -1,1,328,great,0,2285,berkeley_hills,2285 -0,1,549,poor,59,1397,alcatraz_ave,1288.034 -1,1,519,good,1,2014,thowsand_oaks,2014 -2,1,554,good,13,2996,thowsand_oaks,2996 -0,1,283,great,10,2072,berkeley_hills,2072 -1,1,557,good,14,1960,thowsand_oaks,1960 -0,1,148,great,6,2028,berkeley_hills,2028 -1,1,560,good,9,1992,thowsand_oaks,1992 -3,2,824,poor,49,4217,alcatraz_ave,3972.414 -0,1,177,good,7,1471,thowsand_oaks,1471 -2,1,776,great,2,3881,berkeley_hills,3881 -1,1,332,poor,15,1703,alcatraz_ave,1703 -1,1,722,great,13,2633,berkeley_hills,2633 -2,1,641,great,3,3751,berkeley_hills,3751 -3,2,829,poor,16,4508,alcatraz_ave,4508 -2,1,647,poor,19,3028,alcatraz_ave,3028 -2,1,619,good,5,3208,thowsand_oaks,3208 -0,1,188,good,7,1477,thowsand_oaks,1477 -3,2,1146,good,14,4836,thowsand_oaks,4836 -1,1,338,good,10,1783,thowsand_oaks,1783 -3,2,1232,good,0,5124,thowsand_oaks,5124 -3,2,1054,great,8,5335,berkeley_hills,5335 -2,1,916,great,3,3974,berkeley_hills,3974 -0,1,169,poor,24,1361,alcatraz_ave,1350.112 -1,1,431,good,13,1775,thowsand_oaks,1775 -3,2,840,great,9,5130,berkeley_hills,5130 -3,2,926,good,0,4820,thowsand_oaks,4820 -3,2,1245,great,8,5506,berkeley_hills,5506 -2,1,694,poor,41,2874,alcatraz_ave,2753.292 -3,2,881,great,0,5199,berkeley_hills,5199 -3,2,1199,poor,62,4418,alcatraz_ave,4046.888 -2,1,913,great,12,3901,berkeley_hills,3901 -0,1,409,poor,60,1190,alcatraz_ave,1094.8 -1,1,627,great,13,2505,berkeley_hills,2505 -1,1,510,great,10,2438,berkeley_hills,2438 -3,2,1218,poor,20,4906,alcatraz_ave,4906 -2,1,778,poor,34,3057,alcatraz_ave,2971.404 -3,2,891,good,12,4644,thowsand_oaks,4644 -2,1,587,poor,27,2928,alcatraz_ave,2887.008 -3,2,1020,good,4,4825,thowsand_oaks,4825 -3,2,1226,good,11,4936,thowsand_oaks,4936 -2,1,696,good,10,3182,thowsand_oaks,3182 -1,1,486,great,4,2412,berkeley_hills,2412 -1,1,426,good,12,1810,thowsand_oaks,1810 -2,1,845,poor,32,3121,alcatraz_ave,3046.096 -1,1,689,good,5,2089,thowsand_oaks,2089 -2,1,918,great,10,3929,berkeley_hills,3929 -3,2,1104,poor,43,4573,alcatraz_ave,4362.642 -3,2,1182,good,11,4944,thowsand_oaks,4944 -1,1,461,good,2,1965,thowsand_oaks,1965 -0,1,461,good,1,1819,thowsand_oaks,1819 -3,2,1206,good,2,5015,thowsand_oaks,5015 -0,1,237,poor,21,1409,alcatraz_ave,1406.182 -1,1,451,poor,41,1520,alcatraz_ave,1456.16 -2,1,903,poor,26,3193,alcatraz_ave,3154.684 -2,1,560,good,5,3089,thowsand_oaks,3089 -1,1,406,poor,55,1397,alcatraz_ave,1299.21 -0,1,547,poor,29,1641,alcatraz_ave,1611.462 -1,1,533,good,1,2000,thowsand_oaks,2000 -1,1,588,poor,50,1623,alcatraz_ave,1525.62 -1,1,471,poor,53,1450,northwest,1354.3 -3,2,1066,great,8,5301,east_elmwood,5301 -1,1,503,poor,17,1860,northwest,1860 -3,2,1024,good,14,4704,west_welmwood,4704 -0,1,323,poor,59,1127,northwest,1039.094 -1,1,687,poor,61,1616,northwest,1483.488 -3,2,995,good,2,4861,west_welmwood,4861 -0,1,357,good,3,1701,west_welmwood,1701 -2,1,723,poor,15,3170,northwest,3170 -3,2,841,good,11,4609,west_welmwood,4609 -0,1,276,great,4,2104,east_elmwood,2104 -3,2,1077,poor,62,4344,northwest,3979.104 -0,1,304,good,0,1683,west_welmwood,1683 -3,2,990,good,6,4820,west_welmwood,4820 -3,2,863,good,12,4600,west_welmwood,4600 -0,1,119,poor,25,1264,northwest,1251.36 -0,1,234,good,12,1492,west_welmwood,1492 -0,1,157,good,1,1536,west_welmwood,1536 -0,1,524,great,10,2358,east_elmwood,2358 -0,1,502,poor,47,1436,northwest,1358.456 -1,1,449,poor,51,1472,northwest,1380.736 -2,1,592,good,12,3074,west_welmwood,3074 -1,1,387,good,11,1813,west_welmwood,1813 -2,1,873,good,3,3435,west_welmwood,3435 -3,2,827,good,0,4632,west_welmwood,4632 -1,1,593,poor,63,1505,northwest,1375.57 -1,1,479,good,2,1995,west_welmwood,1995 -2,1,788,good,0,3396,west_welmwood,3396 -1,1,737,good,9,2146,west_welmwood,2146 -1,1,467,good,2,1979,west_welmwood,1979 -0,1,424,good,1,1813,west_welmwood,1813 -1,1,471,great,5,2402,east_elmwood,2402 -1,1,547,good,5,2040,west_welmwood,2040 -1,1,328,poor,35,1512,northwest,1466.64 -0,1,154,good,9,1432,west_welmwood,1432 -2,1,701,poor,31,3003,northwest,2936.934 -3,2,1169,poor,31,4758,northwest,4653.324 -3,2,1024,good,1,4856,west_welmwood,4856 -1,1,633,great,12,2550,east_elmwood,2550 -0,1,501,good,10,1825,west_welmwood,1825 -2,1,771,good,0,3340,west_welmwood,3340 -1,1,730,poor,59,1641,northwest,1513.002 -0,1,545,good,0,1980,west_welmwood,1980 -2,1,667,great,9,3647,east_elmwood,3647 -0,1,513,good,10,1779,west_welmwood,1779 -3,2,1152,good,12,4870,west_welmwood,4870 -0,1,333,good,2,1748,west_welmwood,1748 -2,1,919,good,2,3450,west_welmwood,3450 -1,1,372,poor,64,1261,northwest,1150.032 -1,1,671,poor,35,1823,northwest,1768.31 -0,1,319,good,0,1680,west_welmwood,1680 -3,2,867,poor,63,4060,northwest,3710.84 -0,1,320,good,12,1594,west_welmwood,1594 -1,1,594,great,8,2484,east_elmwood,2484 -2,1,882,good,14,3389,west_welmwood,3389 -3,2,1062,poor,56,4374,northwest,4059.072 -3,2,823,poor,59,4046,northwest,3730.412 -2,1,874,good,6,3450,west_welmwood,3450 -2,1,687,poor,30,2947,northwest,2888.06 -2,1,621,good,3,3150,west_welmwood,3150 -2,1,705,great,2,3812,east_elmwood,3812 -1,1,658,poor,41,1741,northwest,1667.878 -1,1,314,poor,58,1211,northwest,1118.964 -2,1,831,good,2,3439,west_welmwood,3439 -0,1,290,poor,59,1068,northwest,984.696 -0,1,398,poor,31,1499,northwest,1466.022 -2,1,729,good,12,3195,west_welmwood,3195 -0,1,238,poor,17,1471,northwest,1471 -0,1,538,good,10,1863,west_welmwood,1863 -2,1,631,poor,23,3019,northwest,3000.886 -0,1,542,poor,45,1474,northwest,1400.3 -3,2,883,poor,34,4365,northwest,4242.78 -2,1,703,poor,46,2811,northwest,2664.828 -1,1,441,poor,55,1396,northwest,1298.28 -3,2,1231,poor,62,4425,northwest,4053.3 -1,1,692,great,8,2590,east_elmwood,2590 -0,1,540,good,7,1834,west_welmwood,1834 -1,1,326,good,3,1773,west_welmwood,1773 -2,1,839,great,9,3816,east_elmwood,3816 -3,2,1173,great,4,5445,east_elmwood,5445 -1,1,569,poor,17,1937,northwest,1937 -2,1,622,great,10,3635,east_elmwood,3635 -2,1,716,poor,17,3183,northwest,3183 -1,1,548,good,12,1973,west_welmwood,1973 -2,1,591,poor,28,2954,northwest,2906.736 -3,2,868,poor,22,4453,northwest,4435.188 -2,1,853,poor,27,3213,northwest,3168.018 -1,1,733,great,2,2760,east_elmwood,2760 -2,1,710,good,14,3196,west_welmwood,3196 -0,1,151,good,0,1535,west_welmwood,1535 -3,2,1013,poor,25,4624,northwest,4577.76 -0,1,289,poor,54,1157,northwest,1078.324 -0,1,440,good,12,1746,west_welmwood,1746 -2,1,714,good,11,3202,west_welmwood,3202 -3,2,931,good,1,4762,west_welmwood,4762 -3,2,938,poor,19,4634,northwest,4634 -3,2,1191,great,0,5558,east_elmwood,5558 -1,1,495,good,6,1945,west_welmwood,1945 -3,2,1077,poor,20,4722,northwest,4722 -2,1,782,great,11,3744,east_elmwood,3744 -3,2,1099,good,7,4893,west_welmwood,4893 -0,1,398,great,13,2128,east_elmwood,2128 -1,1,385,good,13,1775,west_welmwood,1775 -0,1,213,good,13,1526,west_welmwood,1526 -0,1,180,poor,37,1199,northwest,1158.234 -3,2,979,poor,34,4456,northwest,4331.232 -2,1,757,good,5,3312,west_welmwood,3312 -1,1,673,great,9,2619,east_elmwood,2619 -1,1,438,good,9,1798,west_welmwood,1798 -0,1,110,great,2,2037,east_elmwood,2037 -1,1,577,good,5,2017,west_welmwood,2017 -2,1,905,good,4,3459,west_welmwood,3459 -3,2,856,good,4,4712,west_welmwood,4712 -3,2,1082,great,12,5273,east_elmwood,5273 -0,1,174,good,14,1429,west_welmwood,1429 -1,1,435,poor,60,1303,northwest,1198.76 -2,1,765,poor,38,2952,northwest,2845.728 -1,1,628,good,10,2040,west_welmwood,2040 -1,1,735,great,10,2621,east_elmwood,2621 -1,1,568,good,14,1947,west_welmwood,1947 -1,1,625,good,10,2008,west_welmwood,2008 -2,1,846,good,2,3470,west_welmwood,3470 -2,1,518,good,3,3094,west_welmwood,3094 -3,2,1185,poor,53,4487,northwest,4190.858 -0,1,531,good,7,1906,west_welmwood,1906 -3,2,1245,good,3,5057,west_welmwood,5057 -0,1,384,poor,36,1416,northwest,1370.688 -3,2,940,good,9,4741,west_welmwood,4741 -1,1,740,good,1,2265,west_welmwood,2265 -1,1,307,great,0,2348,east_elmwood,2348 -2,1,548,good,10,3086,west_welmwood,3086 -0,1,461,great,1,2390,east_elmwood,2390 -3,2,1145,good,4,4947,west_welmwood,4947 -0,1,522,good,7,1827,west_welmwood,1827 -0,1,324,good,14,1570,west_welmwood,1570 -2,1,528,poor,34,2823,northwest,2743.956 -0,1,498,poor,63,1259,northwest,1150.726 -1,1,725,good,8,2100,west_welmwood,2100 -2,1,612,good,7,3183,west_welmwood,3183 -1,1,496,poor,59,1430,northwest,1318.46 -2,1,769,poor,49,2845,northwest,2679.99 -3,2,1034,good,6,4811,west_welmwood,4811 -2,1,646,poor,57,2718,northwest,2516.868 -3,2,901,good,12,4635,west_welmwood,4635 -3,2,1167,good,11,4895,west_welmwood,4895 -0,1,405,poor,45,1319,northwest,1253.05 -3,2,1242,good,14,4936,west_welmwood,4936 -1,1,599,good,3,2084,west_welmwood,2084 -1,1,587,good,3,2101,west_welmwood,2101 -2,1,810,good,3,3398,west_welmwood,3398 -1,1,709,good,9,2161,west_welmwood,2161 -0,1,497,good,0,1906,west_welmwood,1906 -1,1,305,good,0,1794,west_welmwood,1794 -2,1,667,good,6,3220,west_welmwood,3220 -3,2,895,good,11,4663,west_welmwood,4663 -3,2,1161,good,9,4906,west_welmwood,4906 -0,1,387,poor,29,1501,northwest,1473.982 -1,1,503,good,0,2014,west_welmwood,2014 -2,1,856,great,4,3881,east_elmwood,3881 -1,1,444,great,11,2327,east_elmwood,2327 -0,1,207,good,3,1563,west_welmwood,1563 -1,1,676,good,1,2204,west_welmwood,2204 -3,2,1059,poor,25,4689,northwest,4642.11 -1,1,573,great,9,2436,east_elmwood,2436 -0,1,285,good,13,1595,west_welmwood,1595 -0,1,490,good,1,1864,west_welmwood,1864 -3,2,824,good,3,4631,west_welmwood,4631 -0,1,234,good,7,1562,west_welmwood,1562 -2,1,920,poor,15,3354,northwest,3354 -3,2,966,good,14,4689,west_welmwood,4689 -0,1,423,good,12,1725,west_welmwood,1725 -2,1,932,great,2,3980,east_elmwood,3980 -0,1,419,poor,28,1506,northwest,1481.904 -2,1,752,good,5,3342,west_welmwood,3342 -2,1,671,poor,51,2775,northwest,2602.95 -2,1,628,poor,51,2671,northwest,2505.398 -3,2,1058,good,12,4808,west_welmwood,4808 -2,1,668,poor,62,2605,northwest,2386.18 -0,1,131,poor,37,1125,northwest,1086.75 -0,1,269,good,9,1565,west_welmwood,1565 -1,1,663,good,9,2083,west_welmwood,2083 -3,2,1219,poor,34,4769,northwest,4635.468 -1,1,536,poor,41,1627,northwest,1558.666 -0,1,215,good,11,1532,west_welmwood,1532 -1,1,558,poor,51,1500,northwest,1407 -3,2,1242,great,1,5577,east_elmwood,5577 -1,1,672,poor,30,1845,northwest,1808.1 -1,1,497,good,4,1940,west_welmwood,1940 -1,1,598,great,2,2549,east_elmwood,2549 -3,2,1134,good,0,4959,west_welmwood,4959 -3,2,1174,great,11,5380,east_elmwood,5380 -1,1,476,poor,30,1634,northwest,1601.32 -0,1,228,great,1,2129,east_elmwood,2129 -0,1,389,great,12,2136,east_elmwood,2136 -0,1,489,great,7,2285,east_elmwood,2285 -1,1,352,poor,48,1357,northwest,1281.008 -0,1,544,good,7,1923,west_welmwood,1923 -1,1,604,poor,29,1776,northwest,1744.032 -1,1,528,good,5,2023,west_welmwood,2023 -3,2,1036,great,12,5305,east_elmwood,5305 -3,2,1060,poor,42,4462,northwest,4265.672 -2,1,872,good,2,3433,west_welmwood,3433 -1,1,477,good,4,1894,west_welmwood,1894 -2,1,933,poor,16,3398,northwest,3398 -1,1,511,good,10,1941,west_welmwood,1941 -0,1,228,poor,19,1443,northwest,1443 -2,1,819,poor,55,2869,northwest,2668.17 -1,1,556,great,10,2447,east_elmwood,2447 -0,1,318,poor,33,1376,northwest,1340.224 -3,2,814,good,3,4680,west_welmwood,4680 -3,2,1215,poor,48,4631,northwest,4371.664 -0,1,190,poor,44,1195,northwest,1137.64 -2,1,594,good,1,3217,west_welmwood,3217 -0,1,310,poor,39,1358,northwest,1306.396 -2,1,629,great,7,3690,east_elmwood,3690 -3,2,957,good,7,4687,west_welmwood,4687 -3,2,1056,great,10,5328,east_elmwood,5328 -2,1,741,good,0,3370,west_welmwood,3370 -1,1,447,good,10,1889,west_welmwood,1889 -2,1,772,great,0,3839,east_elmwood,3839 -2,1,669,poor,32,2932,northwest,2861.632 -1,1,485,poor,39,1613,northwest,1551.706 -1,1,462,great,5,2387,east_elmwood,2387 -1,1,430,great,6,2332,east_elmwood,2332 -3,2,1183,good,7,4920,west_welmwood,4920 -3,2,875,poor,27,4471,northwest,4408.406 -1,1,496,good,14,1839,west_welmwood,1839 -3,2,993,great,6,5290,east_elmwood,5290 -0,1,446,poor,38,1448,northwest,1395.872 -0,1,431,good,1,1855,west_welmwood,1855 -0,1,300,great,1,2228,east_elmwood,2228 -0,1,304,good,5,1610,west_welmwood,1610 -2,1,748,poor,38,3003,northwest,2894.892 -0,1,182,poor,49,1116,northwest,1051.272 -0,1,198,good,1,1577,west_welmwood,1577 -3,2,1129,poor,52,4475,northwest,4188.6 -3,2,1005,good,2,4807,west_welmwood,4807 -2,1,509,great,14,3502,east_elmwood,3502 -2,1,731,poor,29,3058,northwest,3002.956 -2,1,809,poor,45,2996,northwest,2846.2 -0,1,232,poor,44,1239,northwest,1179.528 -0,1,247,poor,27,1422,northwest,1402.092 -1,1,682,great,4,2689,east_elmwood,2689 -1,1,493,poor,34,1617,northwest,1571.724 -1,1,536,good,2,2043,west_welmwood,2043 -0,1,513,poor,59,1343,northwest,1238.246 -3,2,1218,good,1,5076,west_welmwood,5076 -3,2,1072,good,4,4875,west_welmwood,4875 -2,1,778,good,0,3362,west_welmwood,3362 -2,1,773,good,12,3290,west_welmwood,3290 -0,1,372,great,1,2268,east_elmwood,2268 -3,2,1146,poor,31,4694,northwest,4590.732 -3,2,904,good,7,4670,west_welmwood,4670 -0,1,263,good,6,1621,west_welmwood,1621 -2,1,784,good,5,3295,west_welmwood,3295 -1,1,669,great,13,2555,east_elmwood,2555 -0,1,488,good,4,1891,west_welmwood,1891 -0,1,231,good,11,1510,west_welmwood,1510 -2,1,849,good,2,3399,west_welmwood,3399 -2,1,621,good,3,3227,west_welmwood,3227 -1,1,483,good,8,1884,west_welmwood,1884 -0,1,243,good,5,1561,west_welmwood,1561 -2,1,840,good,10,3349,west_welmwood,3349 -3,2,1141,good,11,4852,west_welmwood,4852 -0,1,311,poor,40,1295,northwest,1243.2 -3,2,832,great,3,5146,east_elmwood,5146 -2,1,546,good,9,3092,west_welmwood,3092 -0,1,483,good,10,1801,west_welmwood,1801 -1,1,390,poor,63,1301,northwest,1189.114 -1,1,473,poor,37,1636,northwest,1580.376 -3,2,1080,good,3,4884,west_welmwood,4884 -2,1,815,poor,37,3029,northwest,2926.014 -0,1,289,poor,22,1430,northwest,1424.28 -1,1,372,good,11,1797,west_welmwood,1797 -2,1,949,great,10,3939,east_elmwood,3939 -2,1,754,poor,31,3015,northwest,2948.67 -0,1,105,poor,53,926,northwest,864.884 -2,1,848,good,11,3306,west_welmwood,3306 -1,1,737,poor,35,1901,northwest,1843.97 -2,1,856,poor,26,3175,northwest,3136.9 -3,2,953,poor,46,4294,northwest,4070.712 -0,1,449,poor,45,1435,northwest,1363.25 -3,2,1134,good,7,4922,west_welmwood,4922 -2,1,601,good,0,3153,west_welmwood,3153 -2,1,743,good,10,3230,west_welmwood,3230 -1,1,436,good,9,1850,west_welmwood,1850 -0,1,280,good,1,1701,west_welmwood,1701 -2,1,923,good,3,3522,west_welmwood,3522 -0,1,364,great,5,2237,east_elmwood,2237 -0,1,246,good,8,1608,west_welmwood,1608 -0,1,202,poor,23,1399,northwest,1390.606 -0,1,489,good,12,1738,west_welmwood,1738 -1,1,517,good,8,1963,west_welmwood,1963 -0,1,300,good,6,1661,west_welmwood,1661 -2,1,587,good,2,3132,west_welmwood,3132 -1,1,350,good,1,1852,west_welmwood,1852 -2,1,776,good,1,3354,west_welmwood,3354 -0,1,142,good,12,1418,west_welmwood,1418 -2,1,802,great,1,3885,east_elmwood,3885 -0,1,454,poor,17,1662,northwest,1662 -1,1,524,poor,64,1380,northwest,1258.56 -2,1,553,great,14,3485,east_elmwood,3485 -3,2,1139,great,13,5397,east_elmwood,5397 -2,1,820,good,8,3324,west_welmwood,3324 -0,1,505,good,14,1738,west_welmwood,1738 -0,1,340,poor,57,1190,northwest,1101.94 -3,2,1189,good,8,4956,west_welmwood,4956 -3,2,1165,great,0,5525,east_elmwood,5525 -0,1,400,poor,38,1407,northwest,1356.348 -3,2,1214,great,14,5405,east_elmwood,5405 -1,1,493,good,13,1846,west_welmwood,1846 -0,1,344,poor,64,1078,northwest,983.136 -0,1,367,poor,57,1230,northwest,1138.98 -2,1,864,great,5,3935,east_elmwood,3935 -1,1,384,poor,36,1499,northwest,1451.032 -2,1,948,good,3,3549,west_welmwood,3549 -0,1,390,poor,32,1479,northwest,1443.504 -0,1,435,good,12,1683,west_welmwood,1683 -1,1,725,great,10,2672,east_elmwood,2672 -0,1,456,poor,39,1418,northwest,1364.116 -2,1,788,good,5,3295,west_welmwood,3295 -2,1,810,good,10,3331,west_welmwood,3331 -0,1,456,good,13,1721,west_welmwood,1721 -1,1,349,great,0,2333,east_elmwood,2333 -3,2,1065,good,3,4848,west_welmwood,4848 -2,1,623,poor,36,2911,northwest,2817.848 -1,1,543,good,8,1952,west_welmwood,1952 -3,2,1047,good,8,4816,west_welmwood,4816 -0,1,189,poor,63,967,northwest,883.838 -1,1,323,great,4,2239,east_elmwood,2239 -0,1,221,poor,20,1454,northwest,1454 -3,2,1022,poor,20,4672,northwest,4672 -1,1,539,good,10,1987,west_welmwood,1987 -1,1,619,poor,29,1848,northwest,1814.736 -2,1,742,poor,42,2971,northwest,2840.276 -0,1,481,poor,45,1471,northwest,1397.45 -3,2,1246,great,10,5540,east_elmwood,5540 -1,1,530,great,1,2546,east_elmwood,2546 -2,1,536,poor,37,2790,northwest,2695.14 -3,2,1193,great,13,5424,east_elmwood,5424 -1,1,347,good,3,1806,west_welmwood,1806 -3,2,970,good,14,4640,west_welmwood,4640 -3,2,1081,poor,26,4643,northwest,4587.284 -3,2,885,good,4,4701,west_welmwood,4701 -3,2,803,great,1,5129,east_elmwood,5129 -2,1,785,poor,25,3177,northwest,3145.23 -3,2,1042,poor,36,4498,northwest,4354.064 -2,1,576,good,1,3173,west_welmwood,3173 -0,1,412,poor,41,1365,northwest,1307.67 -3,2,1189,good,8,4996,west_welmwood,4996 -2,1,771,great,2,3826,east_elmwood,3826 -2,1,847,poor,59,2876,northwest,2651.672 -3,2,1114,great,9,5355,east_elmwood,5355 -2,1,618,great,2,3746,east_elmwood,3746 -0,1,424,good,7,1732,west_welmwood,1732 -0,1,494,good,12,1786,west_welmwood,1786 -3,2,1113,great,3,5447,east_elmwood,5447 -3,2,1155,great,10,5451,east_elmwood,5451 -0,1,123,good,0,1527,west_welmwood,1527 -0,1,264,poor,62,1037,northwest,949.892 -2,1,864,good,3,3456,west_welmwood,3456 -1,1,440,good,3,1894,west_welmwood,1894 -3,2,1023,poor,54,4351,northwest,4055.132 -1,1,600,good,5,2020,west_welmwood,2020 -1,1,671,good,5,2156,west_welmwood,2156 -1,1,400,good,11,1789,west_welmwood,1789 -3,2,813,great,4,5096,east_elmwood,5096 -2,1,563,good,8,3096,west_welmwood,3096 -1,1,304,good,9,1759,west_welmwood,1759 -2,1,783,poor,26,3143,northwest,3105.284 -1,1,698,poor,39,1845,northwest,1774.89 -2,1,935,great,13,3903,east_elmwood,3903 -1,1,711,good,13,2033,west_welmwood,2033 -3,2,926,good,1,4723,west_welmwood,4723 -3,2,901,poor,31,4471,northwest,4372.638 -3,2,1002,poor,39,4485,northwest,4314.57 -1,1,442,good,0,1929,west_welmwood,1929 -0,1,483,poor,55,1337,northwest,1243.41 -0,1,191,poor,51,1077,northwest,1010.226 -3,2,882,poor,29,4466,northwest,4385.612 -1,1,531,poor,28,1739,northwest,1711.176 -1,1,495,good,4,1938,west_welmwood,1938 -3,2,857,poor,53,4161,northwest,3886.374 -1,1,561,poor,47,1543,northwest,1459.678 -0,1,256,poor,21,1399,northwest,1396.202 -2,1,643,good,14,3123,west_welmwood,3123 -3,2,1112,good,14,4782,west_welmwood,4782 -3,2,1153,good,13,4882,west_welmwood,4882 -2,1,532,great,10,3509,east_elmwood,3509 -2,1,519,poor,53,2588,northwest,2417.192 -1,1,661,poor,61,1591,northwest,1460.538 -0,1,226,poor,56,1085,northwest,1006.88 -0,1,286,great,6,2129,east_elmwood,2129 -0,1,205,good,9,1508,west_welmwood,1508 -0,1,231,good,1,1575,west_welmwood,1575 -3,2,1103,good,12,4872,west_welmwood,4872 -1,1,586,poor,48,1577,northwest,1488.688 -2,1,552,good,9,3046,west_welmwood,3046 -2,1,927,poor,26,3240,northwest,3201.12 -1,1,395,poor,60,1325,northwest,1219 -0,1,220,great,10,2010,east_elmwood,2010 -3,2,1054,good,11,4794,west_welmwood,4794 -1,1,611,good,0,2151,west_welmwood,2151 -0,1,128,good,14,1343,west_welmwood,1343 -3,2,813,great,13,5004,east_elmwood,5004 -3,2,1041,poor,48,4394,northwest,4147.936 -0,1,316,great,9,2138,east_elmwood,2138 -3,2,853,good,14,4546,west_welmwood,4546 -2,1,662,poor,61,2610,northwest,2395.98 -3,2,831,great,9,5102,east_elmwood,5102 -2,1,897,good,3,3432,west_welmwood,3432 -3,2,1066,good,13,4777,west_welmwood,4777 -1,1,663,poor,53,1650,northwest,1541.1 -0,1,463,poor,33,1492,northwest,1453.208 -0,1,442,great,14,2174,east_elmwood,2174 -0,1,226,good,5,1583,west_welmwood,1583 -1,1,519,poor,59,1391,northwest,1282.502 -0,1,438,good,11,1739,west_welmwood,1739 -2,1,663,good,1,3234,west_welmwood,3234 -1,1,323,great,9,2229,east_elmwood,2229 -3,2,898,great,10,5150,east_elmwood,5150 -3,2,1097,poor,56,4358,northwest,4044.224 -3,2,1094,good,7,4862,west_welmwood,4862 -3,2,1245,great,13,5482,east_elmwood,5482 -3,2,967,good,12,4706,west_welmwood,4706 -2,1,866,poor,46,3038,northwest,2880.024 -3,2,972,good,2,4766,west_welmwood,4766 -0,1,531,poor,15,1819,northwest,1819 -1,1,429,good,11,1785,west_welmwood,1785 -0,1,530,good,10,1853,west_welmwood,1853 -1,1,710,good,9,2089,west_welmwood,2089 -3,2,1166,poor,37,4682,northwest,4522.812 -3,2,1184,good,9,4962,west_welmwood,4962 -3,2,872,poor,52,4167,northwest,3900.312 -2,1,606,good,12,3071,west_welmwood,3071 -2,1,724,poor,55,2790,northwest,2594.7 -0,1,176,great,1,2035,east_elmwood,2035 -1,1,578,good,14,1903,west_welmwood,1903 -1,1,665,great,4,2633,east_elmwood,2633 -3,2,813,poor,58,4065,northwest,3756.06 -1,1,499,poor,46,1498,northwest,1420.104 -0,1,225,good,7,1578,west_welmwood,1578 -3,2,1157,great,14,5317,east_elmwood,5317 -0,1,454,poor,60,1301,northwest,1196.92 -1,1,407,good,14,1750,west_welmwood,1750 -0,1,197,poor,58,992,northwest,916.608 -2,1,774,great,3,3821,east_elmwood,3821 -3,2,1027,good,6,4843,west_welmwood,4843 -0,1,293,poor,35,1379,northwest,1337.63 -0,1,446,good,11,1780,west_welmwood,1780 -3,2,1134,poor,59,4438,northwest,4091.836 -0,1,493,good,9,1798,west_welmwood,1798 -3,2,1098,poor,63,4331,northwest,3958.534 -3,2,1096,poor,27,4639,northwest,4574.054 -2,1,689,good,5,3261,west_welmwood,3261 -2,1,699,great,14,3682,east_elmwood,3682 -1,1,637,great,11,2546,east_elmwood,2546 -1,1,344,good,2,1810,west_welmwood,1810 -0,1,154,good,9,1454,west_welmwood,1454 -1,1,726,good,11,2079,west_welmwood,2079 -1,1,698,good,5,2159,west_welmwood,2159 -3,2,1177,good,4,4958,west_welmwood,4958 -3,2,1058,great,6,5359,east_elmwood,5359 -0,1,489,great,4,2324,east_elmwood,2324 -1,1,347,poor,59,1304,northwest,1202.288 -2,1,901,poor,21,3307,northwest,3300.386 -3,2,1238,poor,53,4517,northwest,4218.878 -3,2,883,poor,64,4064,northwest,3706.368 -0,1,366,good,11,1635,west_welmwood,1635 -2,1,922,great,2,4049,east_elmwood,4049 -3,2,1000,good,13,4694,west_welmwood,4694 -1,1,601,great,13,2520,east_elmwood,2520 -2,1,911,poor,40,3153,northwest,3026.88 -1,1,518,good,13,1922,west_welmwood,1922 -2,1,504,good,12,2990,west_welmwood,2990 -1,1,350,good,6,1800,west_welmwood,1800 -0,1,224,poor,55,1094,northwest,1017.42 -3,2,830,poor,15,4536,northwest,4536 -2,1,659,poor,53,2693,northwest,2515.262 -1,1,525,poor,21,1789,northwest,1785.422 -3,2,1169,poor,61,4381,northwest,4021.758 -1,1,421,poor,27,1634,northwest,1611.124 -0,1,235,poor,21,1436,northwest,1433.128 -1,1,391,poor,19,1710,northwest,1710 -0,1,322,good,0,1713,west_welmwood,1713 -3,2,921,great,8,5141,east_elmwood,5141 -0,1,428,poor,33,1484,northwest,1445.416 -3,2,929,poor,31,4510,northwest,4410.78 -1,1,749,good,7,2148,west_welmwood,2148 -2,1,632,great,2,3692,east_elmwood,3692 -2,1,780,great,1,3856,east_elmwood,3856 -2,1,809,good,13,3238,west_welmwood,3238 -0,1,456,good,1,1850,west_welmwood,1850 -2,1,783,good,1,3379,west_welmwood,3379 -0,1,546,poor,44,1517,northwest,1444.184 -0,1,254,good,1,1662,west_welmwood,1662 -0,1,117,great,14,1879,east_elmwood,1879 -1,1,541,poor,51,1482,northwest,1390.116 -1,1,702,good,6,2182,west_welmwood,2182 -1,1,404,good,6,1880,west_welmwood,1880 -1,1,437,great,6,2392,east_elmwood,2392 -3,2,1059,poor,62,4335,zaytuna_college,3970.86 -2,1,547,good,7,3028,north_berkeley,3028 -2,1,814,poor,52,2891,zaytuna_college,2705.976 -2,1,647,good,14,3059,north_berkeley,3059 -2,1,846,good,0,3434,north_berkeley,3434 -3,2,1009,good,7,4826,north_berkeley,4826 -3,2,1137,good,3,4949,north_berkeley,4949 -3,2,1054,good,4,4820,north_berkeley,4820 -3,2,818,good,0,4651,north_berkeley,4651 -3,2,1155,good,4,4992,north_berkeley,4992 -1,1,355,good,11,1703,north_berkeley,1703 -0,1,309,great,7,2105,east_elmwood,2105 -2,1,565,great,3,3644,east_elmwood,3644 -2,1,745,good,12,3178,north_berkeley,3178 -2,1,644,poor,36,2924,zaytuna_college,2830.432 -0,1,506,poor,19,1740,zaytuna_college,1740 -1,1,300,great,6,2273,east_elmwood,2273 -1,1,530,poor,20,1853,zaytuna_college,1853 -3,2,911,great,5,5199,east_elmwood,5199 -1,1,616,poor,44,1667,zaytuna_college,1586.984 -0,1,234,poor,49,1125,zaytuna_college,1059.75 -1,1,636,poor,53,1623,zaytuna_college,1515.882 -0,1,397,good,5,1716,north_berkeley,1716 -3,2,1235,good,14,4914,north_berkeley,4914 -3,2,1061,poor,63,4281,zaytuna_college,3912.834 -0,1,270,good,1,1661,north_berkeley,1661 -1,1,620,good,12,1950,north_berkeley,1950 -3,2,875,poor,30,4448,zaytuna_college,4359.04 -1,1,689,good,10,2117,north_berkeley,2117 -1,1,716,good,12,2102,north_berkeley,2102 -3,2,1149,poor,49,4464,zaytuna_college,4205.088 -1,1,403,poor,16,1760,zaytuna_college,1760 -1,1,588,good,10,2023,north_berkeley,2023 -2,1,668,good,4,3183,north_berkeley,3183 -3,2,1211,good,7,4966,north_berkeley,4966 -1,1,467,good,10,1839,north_berkeley,1839 -3,2,987,good,12,4689,north_berkeley,4689 -1,1,617,good,0,2067,north_berkeley,2067 -0,1,171,poor,28,1260,zaytuna_college,1239.84 -0,1,440,good,3,1854,north_berkeley,1854 -1,1,326,good,3,1809,north_berkeley,1809 -3,2,1157,good,10,4922,north_berkeley,4922 -0,1,249,good,5,1561,north_berkeley,1561 -0,1,288,good,0,1696,north_berkeley,1696 -1,1,455,poor,20,1799,zaytuna_college,1799 -2,1,682,poor,46,2781,zaytuna_college,2636.388 -3,2,1143,poor,63,4406,zaytuna_college,4027.084 -2,1,684,great,6,3740,east_elmwood,3740 -1,1,433,good,8,1840,north_berkeley,1840 -3,2,874,good,7,4698,north_berkeley,4698 -1,1,488,great,10,2379,east_elmwood,2379 -0,1,371,great,7,2243,east_elmwood,2243 -1,1,572,good,0,2057,north_berkeley,2057 -0,1,196,great,1,2045,east_elmwood,2045 -3,2,1229,poor,19,4921,zaytuna_college,4921 -3,2,868,great,9,5112,east_elmwood,5112 -0,1,548,good,1,1923,north_berkeley,1923 -1,1,739,poor,48,1739,zaytuna_college,1641.616 -1,1,429,good,1,1896,north_berkeley,1896 -1,1,559,great,5,2480,east_elmwood,2480 -1,1,571,good,0,2038,north_berkeley,2038 -1,1,684,good,2,2213,north_berkeley,2213 -0,1,469,great,5,2350,east_elmwood,2350 -0,1,424,good,1,1783,north_berkeley,1783 -2,1,550,good,1,3157,north_berkeley,3157 -1,1,384,poor,49,1354,zaytuna_college,1275.468 -3,2,1165,poor,60,4463,zaytuna_college,4105.96 -3,2,1078,poor,24,4709,zaytuna_college,4671.328 -2,1,788,good,8,3319,north_berkeley,3319 -2,1,577,poor,58,2595,zaytuna_college,2397.78 -0,1,185,poor,23,1377,zaytuna_college,1368.738 -2,1,586,good,5,3154,north_berkeley,3154 -3,2,1105,good,8,4847,north_berkeley,4847 -3,2,933,good,2,4792,north_berkeley,4792 -1,1,315,poor,17,1672,zaytuna_college,1672 -1,1,466,good,9,1887,north_berkeley,1887 -1,1,637,good,8,2095,north_berkeley,2095 -2,1,536,good,14,3036,north_berkeley,3036 -0,1,235,great,4,2047,east_elmwood,2047 -3,2,813,good,10,4533,north_berkeley,4533 -1,1,748,poor,39,1809,zaytuna_college,1740.258 -2,1,926,poor,61,2885,zaytuna_college,2648.43 -0,1,348,good,8,1638,north_berkeley,1638 -2,1,777,good,8,3342,north_berkeley,3342 -0,1,245,poor,48,1124,zaytuna_college,1061.056 -1,1,319,great,6,2295,east_elmwood,2295 -3,2,1150,great,11,5416,east_elmwood,5416 -3,2,1198,good,6,4981,north_berkeley,4981 -0,1,155,good,4,1510,north_berkeley,1510 -1,1,562,good,5,2015,north_berkeley,2015 -1,1,608,poor,63,1457,zaytuna_college,1331.698 -3,2,915,great,3,5275,east_elmwood,5275 -2,1,719,good,11,3218,north_berkeley,3218 -0,1,357,poor,30,1420,zaytuna_college,1391.6 -2,1,759,great,2,3791,east_elmwood,3791 -1,1,659,poor,37,1764,zaytuna_college,1704.024 -2,1,760,good,2,3381,north_berkeley,3381 -2,1,875,great,3,3949,east_elmwood,3949 -2,1,637,great,8,3698,east_elmwood,3698 -3,2,1245,poor,51,4628,zaytuna_college,4341.064 -0,1,542,good,3,1951,north_berkeley,1951 -3,2,1165,great,10,5370,east_elmwood,5370 -2,1,583,great,0,3709,east_elmwood,3709 -1,1,659,great,8,2552,east_elmwood,2552 -1,1,564,great,2,2586,east_elmwood,2586 -1,1,300,poor,27,1557,zaytuna_college,1535.202 -2,1,688,great,14,3697,east_elmwood,3697 -2,1,935,good,12,3444,north_berkeley,3444 -1,1,721,great,10,2589,east_elmwood,2589 -3,2,1164,good,5,4924,north_berkeley,4924 -0,1,455,good,9,1812,north_berkeley,1812 -3,2,935,poor,61,4224,zaytuna_college,3877.632 -1,1,584,poor,41,1644,zaytuna_college,1574.952 -0,1,501,great,3,2366,east_elmwood,2366 -1,1,679,poor,29,1930,zaytuna_college,1895.26 -0,1,439,poor,54,1290,zaytuna_college,1202.28 -1,1,705,poor,56,1641,zaytuna_college,1522.848 -1,1,597,good,8,2061,north_berkeley,2061 -3,2,1127,great,12,5402,east_elmwood,5402 -0,1,493,good,6,1830,north_berkeley,1830 -2,1,619,good,3,3160,north_berkeley,3160 -3,2,1102,poor,50,4418,zaytuna_college,4152.92 -1,1,554,poor,37,1680,zaytuna_college,1622.88 -0,1,359,poor,15,1627,zaytuna_college,1627 -3,2,982,great,9,5243,east_elmwood,5243 -3,2,1084,poor,18,4793,zaytuna_college,4793 -0,1,257,good,13,1518,north_berkeley,1518 -3,2,1111,poor,64,4317,zaytuna_college,3937.104 -1,1,432,great,8,2320,east_elmwood,2320 -3,2,1063,great,10,5292,east_elmwood,5292 -0,1,546,great,12,2286,east_elmwood,2286 -2,1,752,poor,26,3082,zaytuna_college,3045.016 -0,1,478,good,6,1814,north_berkeley,1814 -2,1,739,great,3,3775,east_elmwood,3775 -1,1,506,good,6,1920,north_berkeley,1920 -3,2,1237,good,12,4938,north_berkeley,4938 -0,1,427,poor,55,1293,zaytuna_college,1202.49 -2,1,773,poor,32,3088,zaytuna_college,3013.888 -1,1,386,good,5,1861,north_berkeley,1861 -3,2,1232,good,11,5015,north_berkeley,5015 -0,1,468,poor,38,1449,zaytuna_college,1396.836 -0,1,235,good,3,1597,north_berkeley,1597 -0,1,135,good,7,1417,north_berkeley,1417 -1,1,537,poor,43,1595,zaytuna_college,1521.63 -2,1,663,good,8,3146,north_berkeley,3146 -1,1,662,great,9,2538,east_elmwood,2538 -3,2,1175,poor,33,4677,zaytuna_college,4555.398 -1,1,452,good,0,1962,north_berkeley,1962 -3,2,1224,good,6,5040,north_berkeley,5040 -2,1,651,great,4,3721,east_elmwood,3721 -1,1,304,great,2,2288,east_elmwood,2288 -3,2,1211,great,14,5418,east_elmwood,5418 -3,2,1123,poor,20,4727,zaytuna_college,4727 -1,1,654,poor,49,1630,zaytuna_college,1535.46 -1,1,630,good,6,2079,north_berkeley,2079 -1,1,631,poor,28,1894,zaytuna_college,1863.696 -0,1,435,good,14,1681,north_berkeley,1681 -0,1,400,good,1,1747,north_berkeley,1747 -3,2,1162,good,14,4824,north_berkeley,4824 -2,1,734,poor,37,2958,zaytuna_college,2857.428 -0,1,444,good,12,1722,north_berkeley,1722 -1,1,470,good,3,1912,north_berkeley,1912 -0,1,248,good,3,1594,north_berkeley,1594 -2,1,724,great,7,3770,east_elmwood,3770 -3,2,1136,good,3,4930,north_berkeley,4930 -3,2,891,poor,39,4333,zaytuna_college,4168.346 -2,1,629,great,12,3623,east_elmwood,3623 -0,1,484,great,6,2346,east_elmwood,2346 -0,1,203,good,8,1554,north_berkeley,1554 -3,2,1031,great,0,5336,east_elmwood,5336 -1,1,306,good,11,1727,north_berkeley,1727 -0,1,170,good,11,1455,north_berkeley,1455 -2,1,749,great,8,3799,east_elmwood,3799 -2,1,781,good,14,3197,north_berkeley,3197 -1,1,550,poor,17,1913,zaytuna_college,1913 -1,1,435,good,8,1826,north_berkeley,1826 -3,2,957,good,4,4751,north_berkeley,4751 -1,1,604,good,0,2082,north_berkeley,2082 -1,1,548,good,1,2014,north_berkeley,2014 -0,1,153,poor,17,1424,zaytuna_college,1424 -2,1,635,great,1,3696,east_elmwood,3696 -0,1,113,good,4,1461,north_berkeley,1461 -0,1,296,great,13,2095,east_elmwood,2095 -2,1,743,great,9,3758,east_elmwood,3758 -0,1,453,good,11,1735,north_berkeley,1735 -0,1,113,poor,31,1182,zaytuna_college,1155.996 -1,1,663,poor,37,1771,zaytuna_college,1710.786 -1,1,633,good,0,2179,north_berkeley,2179 -2,1,835,poor,33,3092,zaytuna_college,3011.608 -0,1,349,good,4,1687,north_berkeley,1687 -2,1,698,poor,41,2897,zaytuna_college,2775.326 -2,1,504,good,2,3098,north_berkeley,3098 -0,1,121,poor,54,999,frontage_rd,931.068 -1,1,684,good,1,2192,north_berkeley,2192 -2,1,901,good,7,3473,north_berkeley,3473 -0,1,478,great,1,2414,east_elmwood,2414 -2,1,898,poor,41,3082,frontage_rd,2952.556 -1,1,482,good,14,1804,north_berkeley,1804 -0,1,239,poor,49,1168,frontage_rd,1100.256 -1,1,572,great,13,2403,east_elmwood,2403 -2,1,815,good,0,3378,north_berkeley,3378 -1,1,565,poor,61,1470,frontage_rd,1349.46 -3,2,1004,good,11,4706,north_berkeley,4706 -0,1,393,poor,49,1289,frontage_rd,1214.238 -0,1,117,great,3,1939,east_elmwood,1939 -0,1,448,poor,47,1337,frontage_rd,1264.802 -0,1,104,poor,20,1322,frontage_rd,1322 -1,1,391,good,10,1761,north_berkeley,1761 -0,1,542,good,5,1889,north_berkeley,1889 -0,1,365,good,9,1716,north_berkeley,1716 -0,1,510,poor,19,1730,frontage_rd,1730 -1,1,354,good,7,1811,north_berkeley,1811 -1,1,634,good,11,1991,north_berkeley,1991 -1,1,526,good,5,1933,north_berkeley,1933 -0,1,167,good,13,1447,north_berkeley,1447 -3,2,1018,poor,37,4453,frontage_rd,4301.598 -2,1,783,poor,54,2804,frontage_rd,2613.328 -1,1,416,good,3,1862,north_berkeley,1862 -3,2,916,poor,56,4246,frontage_rd,3940.288 -3,2,1223,good,1,5092,north_berkeley,5092 -1,1,359,poor,29,1577,frontage_rd,1548.614 -2,1,917,poor,29,3270,frontage_rd,3211.14 -1,1,338,great,9,2266,east_elmwood,2266 -3,2,926,poor,59,4218,frontage_rd,3888.996 -3,2,1116,great,14,5361,east_elmwood,5361 -3,2,1160,great,5,5470,east_elmwood,5470 -3,2,813,good,11,4568,north_berkeley,4568 -3,2,854,great,14,5103,east_elmwood,5103 -3,2,1068,great,11,5347,east_elmwood,5347 -2,1,758,great,4,3824,east_elmwood,3824 -2,1,741,good,11,3184,north_berkeley,3184 -2,1,761,poor,31,3055,frontage_rd,2987.79 -3,2,811,good,12,4580,north_berkeley,4580 -3,2,1005,great,9,5305,east_elmwood,5305 -0,1,190,good,6,1550,north_berkeley,1550 -3,2,953,poor,48,4293,frontage_rd,4052.592 -3,2,1206,good,1,5011,north_berkeley,5011 -2,1,637,great,7,3692,east_elmwood,3692 -3,2,980,good,4,4791,north_berkeley,4791 -1,1,318,good,11,1671,north_berkeley,1671 -3,2,1227,poor,32,4784,frontage_rd,4669.184 -0,1,398,poor,60,1166,frontage_rd,1072.72 -1,1,543,great,8,2431,east_elmwood,2431 -3,2,1207,poor,28,4746,frontage_rd,4670.064 -0,1,509,great,6,2333,east_elmwood,2333 -2,1,685,poor,47,2844,frontage_rd,2690.424 -3,2,823,good,6,4589,north_berkeley,4589 -2,1,846,poor,48,2929,frontage_rd,2764.976 -2,1,846,good,3,3437,north_berkeley,3437 -0,1,397,poor,34,1462,frontage_rd,1421.064 -0,1,261,poor,55,1126,frontage_rd,1047.18 -1,1,733,good,13,2056,north_berkeley,2056 -0,1,265,poor,35,1308,frontage_rd,1268.76 -1,1,326,great,6,2314,east_elmwood,2314 -3,2,1020,great,8,5302,east_elmwood,5302 -3,2,1027,poor,44,4451,frontage_rd,4237.352 -2,1,551,poor,63,2502,frontage_rd,2286.828 -3,2,807,poor,55,4133,frontage_rd,3843.69 -3,2,1139,poor,48,4468,frontage_rd,4217.792 -1,1,635,good,9,2017,north_berkeley,2017 -1,1,520,good,13,1851,north_berkeley,1851 -2,1,765,great,14,3693,east_elmwood,3693 -0,1,505,good,6,1804,north_berkeley,1804 -3,2,837,poor,27,4428,frontage_rd,4366.008 -3,2,921,good,5,4744,north_berkeley,4744 -2,1,924,great,12,3902,east_elmwood,3902 -3,2,976,poor,38,4406,frontage_rd,4247.384 -2,1,918,good,0,3524,north_berkeley,3524 -0,1,332,good,0,1714,north_berkeley,1714 -1,1,463,good,7,1885,north_berkeley,1885 -3,2,822,poor,44,4201,frontage_rd,3999.352 -1,1,626,poor,55,1588,frontage_rd,1476.84 -0,1,424,poor,60,1235,frontage_rd,1136.2 -1,1,383,poor,49,1436,frontage_rd,1352.712 -1,1,496,good,4,1917,north_berkeley,1917 -3,2,1166,poor,52,4524,frontage_rd,4234.464 -3,2,1211,good,14,4970,north_berkeley,4970 -3,2,896,poor,25,4498,frontage_rd,4453.02 -0,1,377,good,8,1698,north_berkeley,1698 -1,1,381,good,10,1784,north_berkeley,1784 -1,1,429,poor,53,1361,frontage_rd,1271.174 -2,1,750,good,9,3217,north_berkeley,3217 -1,1,718,poor,45,1806,frontage_rd,1715.7 -1,1,527,poor,47,1575,frontage_rd,1489.95 -2,1,652,good,4,3196,north_berkeley,3196 -3,2,1067,poor,24,4688,frontage_rd,4650.496 -3,2,1075,good,13,4841,north_berkeley,4841 -3,2,1154,great,1,5459,east_elmwood,5459 -1,1,649,great,1,2592,east_elmwood,2592 -0,1,195,good,9,1500,north_berkeley,1500 -2,1,774,poor,59,2778,frontage_rd,2561.316 -2,1,803,great,6,3822,east_elmwood,3822 -2,1,529,great,3,3633,east_elmwood,3633 -2,1,584,good,12,3042,north_berkeley,3042 -0,1,102,good,6,1472,north_berkeley,1472 -3,2,899,good,2,4696,north_berkeley,4696 -3,2,1237,good,14,4972,north_berkeley,4972 -0,1,228,poor,62,986,frontage_rd,903.176 -2,1,604,poor,40,2835,frontage_rd,2721.6 -0,1,518,poor,35,1607,frontage_rd,1558.79 -2,1,571,good,4,3100,north_berkeley,3100 -0,1,351,good,11,1657,north_berkeley,1657 -0,1,111,poor,29,1203,frontage_rd,1181.346 -1,1,345,poor,59,1273,frontage_rd,1173.706 -1,1,632,good,3,2082,north_berkeley,2082 -2,1,711,good,12,3151,north_berkeley,3151 -1,1,466,good,12,1848,north_berkeley,1848 -1,1,727,good,0,2190,north_berkeley,2190 -1,1,404,poor,63,1235,frontage_rd,1128.79 -1,1,374,poor,34,1498,frontage_rd,1456.056 -3,2,1108,great,13,5284,east_elmwood,5284 -3,2,1236,good,1,5119,north_berkeley,5119 -2,1,870,good,4,3433,north_berkeley,3433 -0,1,144,good,6,1523,north_berkeley,1523 -3,2,1221,good,11,4990,north_berkeley,4990 -3,2,985,good,10,4763,north_berkeley,4763 -1,1,318,great,7,2288,east_elmwood,2288 -2,1,538,good,12,3067,north_berkeley,3067 -1,1,487,great,13,2365,east_elmwood,2365 -2,1,836,good,13,3303,north_berkeley,3303 -3,2,1156,good,3,5025,north_berkeley,5025 -1,1,516,great,12,2439,east_elmwood,2439 -1,1,580,poor,53,1585,frontage_rd,1480.39 -1,1,713,good,9,2124,north_berkeley,2124 -2,1,724,good,2,3305,north_berkeley,3305 -3,2,1226,good,3,5005,north_berkeley,5005 -1,1,674,good,8,2128,north_berkeley,2128 -0,1,532,good,1,1932,north_berkeley,1932 -3,2,1099,good,12,4789,north_berkeley,4789 -3,2,807,good,7,4608,north_berkeley,4608 -3,2,959,great,6,5244,east_elmwood,5244 -0,1,260,good,5,1591,north_berkeley,1591 -0,1,203,poor,38,1250,frontage_rd,1205 -3,2,1112,poor,29,4693,frontage_rd,4608.526 -2,1,655,good,11,3144,north_berkeley,3144 -0,1,355,poor,61,1130,frontage_rd,1037.34 -3,2,1146,good,5,4995,north_berkeley,4995 -3,2,1098,great,8,5396,east_elmwood,5396 -0,1,117,good,5,1465,north_berkeley,1465 -0,1,474,great,9,2297,east_elmwood,2297 -0,1,463,great,10,2272,east_elmwood,2272 -2,1,770,good,9,3277,north_berkeley,3277 -1,1,642,great,11,2502,east_elmwood,2502 -3,2,1044,poor,26,4599,frontage_rd,4543.812 -0,1,425,poor,45,1335,frontage_rd,1268.25 -2,1,681,poor,38,2928,frontage_rd,2822.592 -2,1,509,good,6,3054,north_berkeley,3054 -1,1,568,great,3,2575,east_elmwood,2575 -1,1,749,poor,21,1990,frontage_rd,1986.02 -2,1,651,great,2,3701,east_elmwood,3701 -0,1,448,good,14,1696,north_berkeley,1696 -0,1,492,poor,20,1696,frontage_rd,1696 -0,1,516,poor,59,1288,frontage_rd,1187.536 -1,1,625,good,0,2162,north_berkeley,2162 -2,1,874,good,13,3320,north_berkeley,3320 -1,1,738,good,2,2253,north_berkeley,2253 -0,1,148,good,4,1537,north_berkeley,1537 -3,2,819,good,3,4646,north_berkeley,4646 -0,1,101,good,5,1483,north_berkeley,1483 -0,1,131,poor,22,1308,frontage_rd,1302.768 -0,1,158,poor,57,1019,frontage_rd,943.594 -1,1,725,poor,44,1744,frontage_rd,1660.288 -2,1,616,great,2,3678,east_elmwood,3678 -0,1,341,great,7,2195,east_elmwood,2195 -1,1,668,good,0,2190,north_berkeley,2190 -0,1,491,good,2,1850,north_berkeley,1850 -3,2,895,good,12,4638,north_berkeley,4638 -2,1,787,good,1,3407,north_berkeley,3407 -2,1,873,poor,15,3317,frontage_rd,3317 -1,1,525,poor,47,1569,frontage_rd,1484.274 -2,1,941,poor,56,2987,frontage_rd,2771.936 -1,1,537,good,9,1905,north_berkeley,1905 -3,2,1070,great,5,5367,east_elmwood,5367 -0,1,440,good,12,1751,north_berkeley,1751 -0,1,255,good,13,1513,north_berkeley,1513 -0,1,464,poor,45,1424,frontage_rd,1352.8 -3,2,974,good,6,4750,north_berkeley,4750 -1,1,322,poor,40,1452,frontage_rd,1393.92 -1,1,371,poor,47,1416,frontage_rd,1339.536 -1,1,487,poor,34,1685,frontage_rd,1637.82 -3,2,985,great,14,5221,east_elmwood,5221 -0,1,460,good,3,1853,north_berkeley,1853 -0,1,384,good,13,1667,north_berkeley,1667 -2,1,755,good,12,3205,north_berkeley,3205 -2,1,765,good,4,3362,north_berkeley,3362 -2,1,580,great,2,3692,east_elmwood,3692 -1,1,472,great,3,2479,east_elmwood,2479 -0,1,193,poor,33,1277,frontage_rd,1243.798 -1,1,312,poor,31,1509,frontage_rd,1475.802 -0,1,488,great,8,2304,east_elmwood,2304 -0,1,214,poor,33,1268,frontage_rd,1235.032 -3,2,939,good,7,4750,north_berkeley,4750 -0,1,502,poor,17,1743,frontage_rd,1743 -0,1,540,good,10,1835,north_berkeley,1835 -0,1,139,good,10,1487,north_berkeley,1487 -0,1,447,poor,29,1520,frontage_rd,1492.64 -2,1,776,great,4,3828,east_elmwood,3828 -1,1,695,great,6,2604,east_elmwood,2604 -0,1,449,good,7,1820,north_berkeley,1820 -3,2,984,great,6,5312,east_elmwood,5312 -0,1,255,good,9,1542,north_berkeley,1542 -3,2,811,poor,20,4428,frontage_rd,4428 -0,1,371,good,12,1643,north_berkeley,1643 -1,1,444,great,5,2399,east_elmwood,2399 -1,1,410,good,9,1800,north_berkeley,1800 -1,1,315,poor,39,1451,frontage_rd,1395.862 -0,1,368,poor,36,1412,frontage_rd,1366.816 -1,1,738,poor,51,1769,frontage_rd,1659.322 -1,1,372,good,1,1868,north_berkeley,1868 -3,2,899,poor,17,4620,frontage_rd,4620 -2,1,919,good,1,3538,north_berkeley,3538 -3,2,1082,good,7,4878,north_berkeley,4878 -3,2,1114,poor,44,4556,frontage_rd,4337.312 -0,1,163,poor,37,1155,frontage_rd,1115.73 -3,2,1090,poor,59,4367,frontage_rd,4026.374 -3,2,989,good,0,4856,north_berkeley,4856 -0,1,479,great,12,2275,east_elmwood,2275 -2,1,617,good,8,3124,north_berkeley,3124 -3,2,959,poor,61,4195,frontage_rd,3851.01 -1,1,735,good,12,2154,north_berkeley,2154 -3,2,1084,great,14,5272,east_elmwood,5272 -1,1,480,good,4,1956,north_berkeley,1956 -2,1,813,poor,58,2874,frontage_rd,2655.576 -0,1,103,great,1,2023,east_elmwood,2023 -2,1,695,great,9,3701,east_elmwood,3701 -0,1,186,poor,20,1427,frontage_rd,1427 -0,1,270,poor,44,1231,frontage_rd,1171.912 -2,1,800,poor,51,2845,frontage_rd,2668.61 -2,1,502,great,4,3570,east_elmwood,3570 -2,1,680,poor,60,2697,frontage_rd,2481.24 -3,2,1071,poor,58,4345,frontage_rd,4014.78 -1,1,662,good,10,2060,north_berkeley,2060 -0,1,499,good,13,1818,north_berkeley,1818 -1,1,308,poor,33,1446,frontage_rd,1408.404 -0,1,236,good,13,1507,north_berkeley,1507 -2,1,889,good,7,3458,north_berkeley,3458 -2,1,884,good,6,3381,north_berkeley,3381 -0,1,454,good,11,1699,north_berkeley,1699 -3,2,1036,good,4,4820,north_berkeley,4820 -2,1,637,good,9,3105,north_berkeley,3105 -1,1,339,poor,51,1294,frontage_rd,1213.772 -2,1,587,poor,25,2917,frontage_rd,2887.83 -0,1,489,poor,36,1529,frontage_rd,1480.072 -0,1,440,good,2,1843,north_berkeley,1843 -2,1,573,good,2,3143,north_berkeley,3143 -1,1,575,good,9,1949,north_berkeley,1949 -0,1,207,good,13,1481,north_berkeley,1481 -0,1,407,great,0,2320,east_elmwood,2320 -0,1,295,poor,43,1228,frontage_rd,1171.512 -0,1,434,poor,17,1633,frontage_rd,1633 -0,1,184,great,13,1938,east_elmwood,1938 -2,1,895,poor,58,2881,frontage_rd,2662.044 -0,1,323,great,7,2182,east_elmwood,2182 -2,1,747,great,4,3774,east_elmwood,3774 -3,2,1166,poor,39,4612,frontage_rd,4436.744 -1,1,689,poor,29,1917,frontage_rd,1882.494 -2,1,663,great,8,3636,east_elmwood,3636 -1,1,412,poor,50,1425,frontage_rd,1339.5 -3,2,1131,good,14,4875,north_berkeley,4875 -1,1,300,good,9,1736,north_berkeley,1736 -2,1,786,good,12,3262,north_berkeley,3262 -0,1,402,poor,48,1350,frontage_rd,1274.4 -2,1,535,great,2,3639,east_elmwood,3639 -1,1,360,good,9,1740,north_berkeley,1740 -0,1,276,great,9,2065,east_elmwood,2065 -2,1,647,poor,43,2797,frontage_rd,2668.338 -1,1,611,good,5,2075,north_berkeley,2075 -3,2,956,good,6,4777,north_berkeley,4777 -1,1,426,good,14,1788,north_berkeley,1788 -0,1,155,great,12,1945,east_elmwood,1945 -2,1,643,poor,40,2871,frontage_rd,2756.16 -1,1,593,good,10,1944,north_berkeley,1944 -3,2,1093,poor,39,4509,frontage_rd,4337.658 -1,1,487,good,12,1854,north_berkeley,1854 -1,1,349,good,10,1768,north_berkeley,1768 -0,1,171,poor,61,983,frontage_rd,902.394 -3,2,1024,good,1,4904,north_berkeley,4904 -0,1,116,good,10,1374,north_berkeley,1374 -2,1,561,poor,49,2708,frontage_rd,2550.936 -1,1,741,great,10,2637,east_elmwood,2637 -2,1,901,great,5,3952,east_elmwood,3952 -1,1,318,poor,57,1230,frontage_rd,1138.98 -1,1,688,great,8,2605,east_elmwood,2605 -3,2,828,good,7,4584,north_berkeley,4584 -2,1,938,great,4,3973,east_elmwood,3973 -2,1,669,poor,25,2970,frontage_rd,2940.3 -3,2,840,poor,64,4024,frontage_rd,3669.888 -2,1,638,poor,50,2712,frontage_rd,2549.28 -2,1,759,great,14,3712,east_elmwood,3712 -2,1,934,great,0,4058,east_elmwood,4058 -3,2,991,great,10,5287,east_elmwood,5287 -1,1,411,great,8,2316,east_elmwood,2316 -2,1,883,poor,35,3173,frontage_rd,3077.81 -3,2,1152,good,3,4986,north_berkeley,4986 -0,1,230,good,11,1490,north_berkeley,1490 -3,2,934,good,9,4733,north_berkeley,4733 -3,2,941,great,5,5274,east_elmwood,5274 -3,2,933,great,9,5156,east_elmwood,5156 -3,2,839,poor,55,4172,frontage_rd,3879.96 -0,1,216,great,1,2092,east_elmwood,2092 -3,2,1235,good,3,5088,north_berkeley,5088 -0,1,132,good,11,1445,north_berkeley,1445 -2,1,832,poor,34,3073,frontage_rd,2986.956 -2,1,900,great,13,3881,east_elmwood,3881 -3,2,897,good,8,4658,north_berkeley,4658 -1,1,698,great,11,2563,east_elmwood,2563 -1,1,368,good,1,1865,north_berkeley,1865 -2,1,664,good,8,3159,north_berkeley,3159 -3,2,1162,good,14,4883,north_berkeley,4883 -2,1,743,good,8,3300,north_berkeley,3300 -1,1,427,poor,64,1282,frontage_rd,1169.184 -1,1,632,good,1,2106,north_berkeley,2106 -2,1,698,poor,16,3173,frontage_rd,3173 -0,1,275,poor,18,1456,frontage_rd,1456 -3,2,1128,good,7,4889,north_berkeley,4889 -1,1,352,great,0,2346,east_elmwood,2346 -3,2,1174,good,3,4956,north_berkeley,4956 -1,1,440,great,5,2420,east_elmwood,2420 -1,1,460,good,4,1949,north_berkeley,1949 -2,1,870,good,7,3435,north_berkeley,3435 -3,2,1240,great,14,5497,east_elmwood,5497 -0,1,326,poor,45,1259,frontage_rd,1196.05 -0,1,311,good,12,1542,north_berkeley,1542 -2,1,656,poor,24,2990,frontage_rd,2966.08 -1,1,560,good,14,1899,north_berkeley,1899 -0,1,352,poor,59,1211,frontage_rd,1116.542 -2,1,714,good,1,3282,north_berkeley,3282 -3,2,834,poor,32,4396,frontage_rd,4290.496 -0,1,348,good,0,1730,north_berkeley,1730 -0,1,388,good,3,1746,north_berkeley,1746 -0,1,328,great,13,2126,east_elmwood,2126 -2,1,718,good,6,3230,north_berkeley,3230 -0,1,435,poor,64,1177,frontage_rd,1073.424 -3,2,1182,great,6,5483,east_elmwood,5483 -1,1,632,great,11,2503,east_elmwood,2503 -3,2,902,great,2,5186,east_elmwood,5186 -1,1,612,good,6,2034,north_berkeley,2034 -0,1,217,good,11,1516,north_berkeley,1516 -0,1,126,good,11,1428,north_berkeley,1428 -1,1,586,good,7,2018,north_berkeley,2018 -3,2,831,good,1,4659,north_berkeley,4659 -1,1,380,good,12,1758,north_berkeley,1758 -3,2,1224,good,6,4976,north_berkeley,4976 -3,2,1009,poor,60,4283,frontage_rd,3940.36 -2,1,872,great,12,3868,east_elmwood,3868 -0,1,116,great,9,1964,east_elmwood,1964 -3,2,1060,good,8,4862,north_berkeley,4862 -1,1,442,poor,62,1292,frontage_rd,1183.472 -0,1,442,good,14,1737,north_berkeley,1737 -0,1,519,poor,43,1531,frontage_rd,1460.574 -0,1,118,good,1,1489,north_berkeley,1489 -2,1,674,good,13,3110,north_berkeley,3110 -3,2,1182,great,14,5364,east_elmwood,5364 -3,2,1056,poor,20,4663,frontage_rd,4663 -1,1,357,poor,55,1301,frontage_rd,1209.93 -1,1,507,great,7,2401,east_elmwood,2401 -2,1,793,good,4,3303,north_berkeley,3303 -0,1,254,good,12,1578,north_berkeley,1578 -0,1,375,good,4,1767,north_berkeley,1767 -3,2,934,poor,15,4637,frontage_rd,4637 -2,1,677,poor,15,3123,frontage_rd,3123 -2,1,582,good,8,3057,north_berkeley,3057 -2,1,918,poor,33,3210,frontage_rd,3126.54 -3,2,994,good,7,4801,north_berkeley,4801 -0,1,144,poor,50,1045,frontage_rd,982.3 -3,2,997,good,11,4749,north_berkeley,4749 -0,1,430,great,8,2248,east_elmwood,2248 -0,1,443,poor,20,1602,frontage_rd,1602 -2,1,785,great,7,3832,east_elmwood,3832 -1,1,426,great,9,2344,east_elmwood,2344 -1,1,312,good,13,1645,north_berkeley,1645 -0,1,116,great,7,1929,east_elmwood,1929 -2,1,949,good,0,3513,north_berkeley,3513 -1,1,501,great,8,2390,east_elmwood,2390 -1,1,564,great,3,2538,east_elmwood,2538 -0,1,333,poor,26,1457,frontage_rd,1439.516 -3,2,879,great,9,5169,east_elmwood,5169 -2,1,817,good,4,3379,north_berkeley,3379 -3,2,1123,poor,49,4513,frontage_rd,4251.246 -1,1,"640,56",good,0,2184,north_berkeley,2184 -1,1,707,good,1,2168,north_berkeley,2168 -1,1,347,poor,63,1174,frontage_rd,1073.036 -0,1,245,poor,63,1060,frontage_rd,968.84 -3,2,1116,good,13,4808,north_berkeley,4808 -3,2,1083,great,9,5304,east_elmwood,5304 -3,2,911,great,0,5285,east_elmwood,5285 -1,1,724,good,12,2097,north_berkeley,2097 -1,1,627,good,8,2084,north_berkeley,2084 -0,1,259,great,10,2095,east_elmwood,2095 -0,1,216,poor,50,1079,frontage_rd,1014.26 -0,1,490,good,12,1732,north_berkeley,1732 -3,2,931,good,14,4617,north_berkeley,4617 -0,1,174,poor,43,1129,frontage_rd,1077.066 -3,2,956,great,10,5204,east_elmwood,5204 -2,1,937,poor,61,2962,frontage_rd,2719.116 -3,2,1220,great,11,5459,east_elmwood,5459 -0,1,240,good,12,1514,north_berkeley,1514 -3,2,964,great,2,5270,east_elmwood,5270 -1,1,589,poor,33,1770,frontage_rd,1723.98 -0,1,217,good,14,1447,north_berkeley,1447 -2,1,520,great,4,3540,east_elmwood,3540 -0,1,220,poor,49,1098,frontage_rd,1034.316 -3,2,1176,poor,19,4849,frontage_rd,4849 -3,2,1008,good,9,4794,north_berkeley,4794 -3,2,1202,good,11,4950,north_berkeley,4950 -3,2,1117,poor,57,4387,frontage_rd,4062.362 -3,2,1140,good,0,4996,north_berkeley,4996 -2,1,734,poor,35,2980,frontage_rd,2890.6 -0,1,419,good,6,1758,north_berkeley,1758 -3,2,1117,good,13,4796,north_berkeley,4796 -1,1,516,great,13,2352,east_elmwood,2352 -2,1,555,poor,51,2632,frontage_rd,2468.816 -1,1,407,good,12,1797,north_berkeley,1797 -0,1,356,good,5,1671,north_berkeley,1671 -2,1,860,good,1,3434,north_berkeley,3434 -1,1,637,great,5,2582,east_elmwood,2582 -3,2,995,poor,39,4491,frontage_rd,4320.342 -1,1,623,poor,38,1766,frontage_rd,1702.424 -3,2,968,good,9,4699,north_berkeley,4699 -3,2,1182,great,8,5404,east_elmwood,5404 -0,1,147,poor,47,1084,frontage_rd,1025.464 -0,1,534,good,0,1963,north_berkeley,1963 -3,2,871,good,11,4648,north_berkeley,4648 -1,1,348,great,3,2363,east_elmwood,2363 -1,1,427,poor,17,1712,frontage_rd,1712 -0,1,140,good,3,1488,north_berkeley,1488 -2,1,898,great,10,3922,east_elmwood,3922 -0,1,532,poor,36,1523,frontage_rd,1474.264 -0,1,432,poor,19,1628,frontage_rd,1628 -3,2,1160,great,10,5360,east_elmwood,5360 -2,1,775,poor,43,2926,frontage_rd,2791.404 -0,1,178,great,14,1898,east_elmwood,1898 -1,1,395,good,1,1858,north_berkeley,1858 -1,1,304,good,0,1777,north_berkeley,1777 -0,1,438,great,12,2258,east_elmwood,2258 -3,2,1037,good,5,4849,north_berkeley,4849 -0,1,246,good,7,1528,north_berkeley,1528 -3,2,948,good,4,4752,north_berkeley,4752 -1,1,505,good,10,1907,north_berkeley,1907 -1,1,562,good,10,1917,north_berkeley,1917 -0,1,248,poor,29,1365,frontage_rd,1340.43 -1,1,737,great,12,2573,east_elmwood,2573 -3,2,1100,good,2,4926,north_berkeley,4926 -3,2,804,good,6,4591,north_berkeley,4591 -0,1,104,great,11,1867,east_elmwood,1867 -1,1,444,good,14,1801,north_berkeley,1801 -0,1,186,poor,30,1324,frontage_rd,1297.52 -0,1,383,great,4,2202,east_elmwood,2202 -2,1,620,good,8,3107,north_berkeley,3107 -3,2,1098,good,14,4856,north_berkeley,4856 -1,1,718,poor,39,1778,frontage_rd,1710.436 -2,1,866,good,3,3418,north_berkeley,3418 -1,1,404,poor,53,1348,frontage_rd,1259.032 -3,2,1022,good,8,4829,north_berkeley,4829 -1,1,508,good,5,1971,north_berkeley,1971 -2,1,854,great,5,3862,east_elmwood,3862 -0,1,542,good,13,1791,north_berkeley,1791 -3,2,1240,great,10,5503,east_elmwood,5503 -0,1,128,good,6,1432,north_berkeley,1432 -0,1,365,good,2,1702,north_berkeley,1702 -0,1,156,poor,62,887,frontage_rd,812.492 -2,1,933,good,4,3449,north_berkeley,3449 -0,1,404,poor,61,1163,frontage_rd,1067.634 -2,1,822,good,8,3309,north_berkeley,3309 -2,1,744,great,13,3692,east_elmwood,3692 -1,1,507,good,12,1846,north_berkeley,1846 -1,1,505,great,11,2349,east_elmwood,2349 -2,1,946,poor,34,3251,frontage_rd,3159.972 -0,1,146,good,14,1387,north_berkeley,1387 -3,2,808,good,8,4612,north_berkeley,4612 -3,2,936,good,13,4692,north_berkeley,4692 -2,1,569,poor,19,2949,frontage_rd,2949 -2,1,508,great,2,3599,east_elmwood,3599 -3,2,1041,good,7,4801,north_berkeley,4801 -3,2,840,poor,50,4227,frontage_rd,3973.38 -3,2,1246,good,12,5011,north_berkeley,5011 -1,1,481,great,6,2418,east_elmwood,2418 -0,1,503,good,7,1849,north_berkeley,1849 -1,1,340,great,3,2307,east_elmwood,2307 -0,1,397,good,9,1703,north_berkeley,1703 -3,2,808,poor,45,4177,frontage_rd,3968.15 -2,1,794,poor,43,2992,frontage_rd,2854.368 -3,2,1209,good,10,4916,north_berkeley,4916 -1,1,358,poor,37,1471,frontage_rd,1420.986 -1,1,552,great,4,2485,east_elmwood,2485 -3,2,904,good,14,4576,north_berkeley,4576 -2,1,539,poor,27,2887,frontage_rd,2846.582 -2,1,761,good,3,3283,north_berkeley,3283 -1,1,639,great,8,2576,east_elmwood,2576 -0,1,191,poor,23,1375,frontage_rd,1366.75 -1,1,676,great,5,2620,east_elmwood,2620 -2,1,735,poor,31,2999,frontage_rd,2933.022 -3,2,941,good,9,4745,north_berkeley,4745 -3,2,1212,poor,21,4849,frontage_rd,4839.302 -2,1,556,poor,52,2627,frontage_rd,2458.872 -1,1,574,good,8,2008,north_berkeley,2008 -0,1,209,great,2,2042,east_elmwood,2042 -2,1,753,poor,16,3203,frontage_rd,3203 -0,1,276,poor,62,1069,frontage_rd,979.204 -3,2,1234,good,10,5025,north_berkeley,5025 -0,1,505,poor,24,1683,frontage_rd,1669.536 -1,1,661,great,7,2547,east_elmwood,2547 -3,2,885,great,8,5184,east_elmwood,5184 -2,1,865,great,9,3849,east_elmwood,3849 -0,1,150,great,14,1916,east_elmwood,1916 -3,2,838,poor,56,4139,frontage_rd,3840.992 -0,1,288,poor,22,1517,frontage_rd,1510.932 -2,1,868,good,7,3436,north_berkeley,3436 -3,2,1178,good,5,4932,north_berkeley,4932 -3,2,915,great,7,5205,east_elmwood,5205 -1,1,355,poor,64,1263,frontage_rd,1151.856 -0,1,478,good,10,1775,north_berkeley,1775 -0,1,352,great,11,2174,east_elmwood,2174 -0,1,281,good,10,1567,north_berkeley,1567 -0,1,178,poor,22,1322,frontage_rd,1316.712 -3,2,1124,poor,34,4603,frontage_rd,4474.116 -2,1,858,good,12,3307,north_berkeley,3307 -0,1,143,poor,21,1310,frontage_rd,1307.38 -3,2,1042,good,3,4836,north_berkeley,4836 -1,1,383,good,10,1740,north_berkeley,1740 -1,1,324,poor,50,1324,frontage_rd,1244.56 -2,1,508,great,2,3559,east_elmwood,3559 -1,1,398,good,6,1868,north_berkeley,1868 -1,1,408,good,6,1810,north_berkeley,1810 -2,1,813,poor,34,3057,frontage_rd,2971.404 -0,1,380,good,11,1627,north_berkeley,1627 -2,1,747,poor,16,3219,frontage_rd,3219 -2,1,791,poor,24,3175,frontage_rd,3149.6 -0,1,185,good,0,1578,north_berkeley,1578 -3,2,963,good,9,4736,north_berkeley,4736 -1,1,327,poor,49,1379,frontage_rd,1299.018 -0,1,125,great,10,1930,east_elmwood,1930 -3,2,1140,great,2,5507,east_elmwood,5507 -2,1,654,good,2,3270,north_berkeley,3270 -0,1,405,good,3,1754,north_berkeley,1754 -3,2,1206,good,7,4996,north_berkeley,4996 -0,1,261,good,7,1639,north_berkeley,1639 -2,1,772,poor,16,3224,frontage_rd,3224 -0,1,379,good,11,1649,north_berkeley,1649 -2,1,732,great,6,3730,east_elmwood,3730 -3,2,1187,good,9,4964,north_berkeley,4964 -2,1,712,great,14,3707,east_elmwood,3707 -1,1,662,poor,31,1849,frontage_rd,1808.322 -0,1,339,poor,30,1452,frontage_rd,1422.96 -2,1,880,poor,49,2957,frontage_rd,2785.494 -1,1,359,poor,44,1392,frontage_rd,1325.184 -0,1,163,poor,52,1037,frontage_rd,970.632 -0,1,365,poor,22,1525,frontage_rd,1518.9 -3,2,1091,good,13,4804,north_berkeley,4804 -1,1,697,poor,62,1599,frontage_rd,1464.684 -3,2,1092,poor,45,4470,frontage_rd,4246.5 -1,1,512,poor,54,1493,frontage_rd,1391.476 -2,1,710,poor,40,2891,frontage_rd,2775.36 -2,1,655,poor,62,2629,frontage_rd,2408.164 -3,2,807,good,12,4572,north_berkeley,4572 -0,1,147,good,4,1483,north_berkeley,1483 -2,1,797,good,9,3296,north_berkeley,3296 -0,1,442,good,10,1717,north_berkeley,1717 -0,1,325,great,11,2079,east_elmwood,2079 -2,1,704,great,6,3707,east_elmwood,3707 -0,1,495,great,13,2226,east_elmwood,2226 -3,2,957,good,11,4708,north_berkeley,4708 -2,1,"944,98",good,0,3581,north_berkeley,3581 -1,1,698,good,10,2132,north_berkeley,2132 -3,2,897,good,3,4677,north_berkeley,4677 -1,1,643,poor,23,1942,frontage_rd,1930.348 -2,1,757,good,9,3271,north_berkeley,3271 -3,2,1189,poor,26,4740,frontage_rd,4683.12 -3,2,932,good,9,4738,north_berkeley,4738 -3,2,948,good,9,4670,north_berkeley,4670 -1,1,488,poor,61,1377,frontage_rd,1264.086 -3,2,1185,poor,41,4578,frontage_rd,4385.724 -3,2,824,good,7,4646,north_berkeley,4646 -1,1,472,good,12,1896,north_berkeley,1896 -0,1,479,good,10,1770,north_berkeley,1770 -0,1,490,poor,17,1733,frontage_rd,1733 -1,1,457,great,6,2441,east_elmwood,2441 -1,1,520,good,7,1926,north_berkeley,1926 -1,1,322,great,5,2296,east_elmwood,2296 -0,1,482,poor,54,1295,frontage_rd,1206.94 -2,1,740,poor,40,2931,frontage_rd,2813.76 -2,1,760,good,0,3367,north_berkeley,3367 -2,1,758,poor,24,3120,frontage_rd,3095.04 -2,1,919,great,14,3917,east_elmwood,3917 -3,2,1011,good,4,4853,north_berkeley,4853 -1,1,665,great,6,2638,east_elmwood,2638 -3,2,1194,great,10,5429,east_elmwood,5429 -1,1,714,great,14,2526,east_elmwood,2526 -2,1,922,poor,29,3251,frontage_rd,3192.482 -1,1,649,great,9,2514,east_elmwood,2514 -1,1,380,poor,25,1651,frontage_rd,1634.49 -2,1,875,good,11,3322,north_berkeley,3322 -3,2,883,good,6,4685,north_berkeley,4685 -2,1,531,great,4,3634,east_elmwood,3634 -2,1,712,good,1,3282,north_berkeley,3282 -2,1,841,good,8,3369,north_berkeley,3369 -1,1,433,poor,60,1354,frontage_rd,1245.68 -2,1,929,poor,62,2956,frontage_rd,2707.696 -0,1,116,poor,50,998,frontage_rd,938.12 -1,1,325,good,13,1651,north_berkeley,1651 -3,2,1218,poor,60,4505,frontage_rd,4144.6 -1,1,335,poor,17,1645,frontage_rd,1645 -1,1,653,poor,28,1899,frontage_rd,1868.616 -3,2,1102,great,8,5390,east_elmwood,5390 -2,1,921,great,11,3888,east_elmwood,3888 -3,2,1207,poor,24,4809,frontage_rd,4770.528 -3,2,1054,good,9,4852,north_berkeley,4852 -3,2,1159,great,10,5383,east_elmwood,5383 -0,1,192,poor,41,1135,frontage_rd,1087.33 -0,1,469,good,7,1755,north_berkeley,1755 -2,1,687,good,12,3166,north_berkeley,3166 -3,2,829,good,13,4590,north_berkeley,4590 -0,1,445,good,12,1724,north_berkeley,1724 -0,1,226,poor,46,1202,frontage_rd,1139.496 -1,1,466,poor,30,1672,frontage_rd,1638.56 -3,2,1131,good,0,4950,north_berkeley,4950 -1,1,465,poor,15,1787,frontage_rd,1787 -0,1,454,great,13,2206,east_elmwood,2206 -2,1,545,good,3,3150,north_berkeley,3150 -1,1,696,good,11,2113,north_berkeley,2113 -3,2,1064,poor,27,4634,frontage_rd,4569.124 -0,1,193,poor,62,942,frontage_rd,862.872 -0,1,410,poor,46,1389,frontage_rd,1316.772 -3,2,1174,good,9,4919,north_berkeley,4919 -0,1,382,great,7,2204,east_elmwood,2204 -3,2,1002,good,4,4860,north_berkeley,4860 -0,1,542,great,3,2377,east_elmwood,2377 -1,1,621,great,0,2577,east_elmwood,2577 -3,2,1159,good,3,5002,north_berkeley,5002 -2,1,741,poor,16,3158,frontage_rd,3158 -3,2,1101,great,13,5282,east_elmwood,5282 -3,2,857,great,3,5208,east_elmwood,5208 -1,1,336,great,1,2351,east_elmwood,2351 -1,1,429,poor,38,1516,frontage_rd,1461.424 -3,2,1192,great,12,5421,east_elmwood,5421 -2,1,532,poor,20,2909,frontage_rd,2909 -0,1,399,good,5,1784,north_berkeley,1784 -1,1,652,good,12,2020,north_berkeley,2020 -2,1,879,great,2,3912,east_elmwood,3912 -3,2,833,poor,30,4378,frontage_rd,4290.44 -0,1,119,good,4,1485,north_berkeley,1485 -1,1,670,good,3,2179,north_berkeley,2179 -2,1,837,good,11,3301,north_berkeley,3301 -3,2,1094,poor,36,4542,frontage_rd,4396.656 -3,2,1132,good,6,4961,north_berkeley,4961 -3,2,1089,great,8,5328,east_elmwood,5328 -1,1,483,poor,31,1663,frontage_rd,1626.414 -0,1,456,poor,38,1525,frontage_rd,1470.1 -1,1,550,good,14,1952,north_berkeley,1952 -0,1,144,good,0,1580,north_berkeley,1580 -1,1,543,poor,63,1372,frontage_rd,1254.008 -2,1,733,good,6,3281,north_berkeley,3281 -2,1,773,good,8,3293,north_berkeley,3293 -0,1,508,good,1,1928,north_berkeley,1928 -1,1,685,poor,25,1965,frontage_rd,1945.35 -0,1,402,poor,46,1324,frontage_rd,1255.152 -2,1,571,good,12,3054,north_berkeley,3054 -1,1,336,poor,24,1580,frontage_rd,1567.36 -0,1,406,good,12,1668,north_berkeley,1668 -0,1,499,good,4,1816,north_berkeley,1816 -0,1,313,poor,40,1339,frontage_rd,1285.44 -1,1,681,good,4,2109,north_berkeley,2109 -2,1,721,poor,59,2752,frontage_rd,2537.344 -1,1,305,poor,48,1311,frontage_rd,1237.584 -3,2,923,poor,38,4418,frontage_rd,4258.952 -1,1,379,poor,34,1524,frontage_rd,1481.328 -0,1,421,great,3,2331,east_elmwood,2331 -2,1,594,poor,34,2841,frontage_rd,2761.452 -1,1,658,great,12,2491,east_elmwood,2491 -3,2,978,good,5,4820,north_berkeley,4820 -0,1,247,poor,36,1301,frontage_rd,1259.368 -2,1,812,great,12,3836,east_elmwood,3836 -1,1,326,good,9,1782,north_berkeley,1782 -1,1,661,poor,16,2004,frontage_rd,2004 -0,1,415,good,8,1753,north_berkeley,1753 -0,1,371,poor,63,1101,frontage_rd,1006.314 -3,2,1165,poor,33,4733,frontage_rd,4609.942 -3,2,915,good,1,4740,north_berkeley,4740 -0,1,270,good,8,1598,north_berkeley,1598 -0,1,239,good,7,1574,north_berkeley,1574 -3,2,1183,great,3,5474,east_elmwood,5474 -1,1,332,great,0,2302,east_elmwood,2302 -3,2,853,great,0,5187,east_elmwood,5187 -2,1,852,poor,54,2953,frontage_rd,2752.196 -2,1,764,poor,64,2751,frontage_rd,2508.912 -1,1,338,good,0,1826,north_berkeley,1826 -2,1,905,poor,46,3031,frontage_rd,2873.388 -2,1,711,good,13,3177,north_berkeley,3177 -1,1,580,poor,42,1630,frontage_rd,1558.28 -3,2,1111,poor,62,4325,frontage_rd,3961.7 -2,1,557,good,2,3181,north_berkeley,3181 -3,2,1192,poor,24,4759,frontage_rd,4720.928 -1,1,548,great,6,2463,east_elmwood,2463 -1,1,369,great,11,2278,east_elmwood,2278 -1,1,742,good,8,2120,north_berkeley,2120 -0,1,291,good,12,1546,north_berkeley,1546 -3,2,1096,great,1,5464,east_elmwood,5464 -2,1,936,poor,61,2950,frontage_rd,2708.1 -0,1,181,great,11,1964,east_elmwood,1964 -0,1,490,good,11,1760,north_berkeley,1760 -1,1,516,poor,44,1622,frontage_rd,1544.144 -1,1,344,poor,33,1508,frontage_rd,1468.792 -2,1,821,good,1,3408,north_berkeley,3408 -2,1,702,great,8,3695,east_elmwood,3695 -3,2,882,poor,49,4218,frontage_rd,3973.356 -2,1,945,poor,21,3367,frontage_rd,3360.266 -3,2,999,good,5,4830,north_berkeley,4830 -1,1,395,poor,30,1578,frontage_rd,1546.44 -0,1,527,great,8,2387,east_elmwood,2387 -3,2,1006,poor,48,4345,frontage_rd,4101.68 -1,1,696,good,10,2103,north_berkeley,2103 -0,1,309,good,0,1697,north_berkeley,1697 -3,2,923,good,1,4774,north_berkeley,4774 -3,2,879,good,7,4674,north_berkeley,4674 -0,1,227,good,7,1593,north_berkeley,1593 -0,1,150,poor,49,1053,frontage_rd,991.926 -2,1,912,good,6,3473,north_berkeley,3473 -1,1,353,poor,54,1293,frontage_rd,1205.076 -0,1,235,poor,52,1160,frontage_rd,1085.76 -2,1,584,poor,26,2922,frontage_rd,2886.936 -0,1,505,good,9,1827,north_berkeley,1827 -1,1,489,great,2,2514,east_elmwood,2514 -1,1,694,poor,28,1931,frontage_rd,1900.104 -2,1,638,good,1,3237,north_berkeley,3237 -0,1,285,good,0,1664,north_berkeley,1664 -3,2,1041,good,8,4829,north_berkeley,4829 -1,1,622,great,6,2604,east_elmwood,2604 -0,1,290,poor,20,1527,frontage_rd,1527 -2,1,823,good,3,3385,north_berkeley,3385 -0,1,330,poor,19,1522,frontage_rd,1522 -2,1,830,great,11,3857,east_elmwood,3857 -2,1,774,good,7,3322,north_berkeley,3322 -1,1,628,poor,52,1644,frontage_rd,1538.784 -3,2,1030,great,7,5351,east_elmwood,5351 -0,1,378,great,10,2200,east_elmwood,2200 -0,1,289,poor,44,1250,frontage_rd,1190 -1,1,519,good,4,1993,north_berkeley,1993 -0,1,144,good,11,1468,north_berkeley,1468 -2,1,585,poor,36,2806,frontage_rd,2716.208 -2,1,500,good,5,3001,north_berkeley,3001 -2,1,850,poor,24,3255,frontage_rd,3228.96 -3,2,970,good,13,4645,north_berkeley,4645 -2,1,728,great,9,3688,east_elmwood,3688 -3,2,1031,great,13,5242,east_elmwood,5242 -0,1,526,great,2,2393,east_elmwood,2393 -0,1,505,good,4,1882,north_berkeley,1882 -0,1,205,good,0,1579,north_berkeley,1579 -0,1,508,poor,49,1393,frontage_rd,1312.206 -1,1,471,good,13,1876,north_berkeley,1876 -1,1,359,poor,24,1628,frontage_rd,1614.976 -1,1,739,poor,64,1617,frontage_rd,1474.704 -3,2,1164,good,6,4945,north_berkeley,4945 -1,1,509,great,13,2402,east_elmwood,2402 -2,1,804,good,13,3227,north_berkeley,3227 -0,1,502,good,11,1748,north_berkeley,1748 -3,2,1152,poor,52,4524,frontage_rd,4234.464 -2,1,934,great,5,3947,east_elmwood,3947 -3,2,823,great,4,5149,east_elmwood,5149 -2,1,556,great,4,3646,east_elmwood,3646 -0,1,196,poor,41,1226,frontage_rd,1174.508 -2,1,665,poor,26,3048,frontage_rd,3011.424 -1,1,458,good,11,1826,north_berkeley,1826 -1,1,664,good,10,2024,north_berkeley,2024 -3,2,813,poor,26,4417,frontage_rd,4363.996 -3,2,1105,great,5,5374,east_elmwood,5374 -2,1,732,good,13,3152,north_berkeley,3152 -1,1,364,poor,49,1327,frontage_rd,1250.034 -1,1,498,good,2,1949,north_berkeley,1949 -1,1,731,good,13,2137,north_berkeley,2137 -3,2,1004,good,1,4870,north_berkeley,4870 -0,1,417,great,9,2265,east_elmwood,2265 -2,1,771,good,9,3328,north_berkeley,3328 -2,1,503,poor,32,2763,frontage_rd,2696.688 -3,2,1080,great,0,5459,east_elmwood,5459 -1,1,458,great,3,2384,east_elmwood,2384 -3,2,1240,poor,27,4817,frontage_rd,4749.562 -1,1,703,good,3,2174,north_berkeley,2174 -3,2,1100,good,10,4819,north_berkeley,4819 -3,2,1233,good,1,5037,north_berkeley,5037 -2,1,748,poor,25,3113,frontage_rd,3081.87 -3,2,954,poor,22,4632,frontage_rd,4613.472 -1,1,512,poor,49,1552,frontage_rd,1461.984 -0,1,141,great,9,1996,east_elmwood,1996 -2,1,888,great,1,3970,east_elmwood,3970 -2,1,944,poor,54,3019,frontage_rd,2813.708 -3,2,853,poor,50,4219,frontage_rd,3965.86 -2,1,686,good,12,3201,north_berkeley,3201 -2,1,763,poor,48,2850,frontage_rd,2690.4 -3,2,1107,poor,43,4567,frontage_rd,4356.918 -1,1,615,poor,31,1757,frontage_rd,1718.346 -3,2,1132,good,4,4938,north_berkeley,4938 -3,2,1038,poor,26,4655,frontage_rd,4599.14 -1,1,701,great,11,2570,east_elmwood,2570 -2,1,510,good,6,3053,north_berkeley,3053 -3,2,951,poor,57,4229,frontage_rd,3916.054 -2,1,764,poor,51,2878,frontage_rd,2699.564 -3,2,1148,good,0,5024,north_berkeley,5024 -2,1,827,poor,50,2966,frontage_rd,2788.04 -1,1,664,poor,52,1613,frontage_rd,1509.768 -3,2,1061,good,11,4811,north_berkeley,4811 -0,1,449,good,7,1773,north_berkeley,1773 -0,1,284,good,2,1695,north_berkeley,1695 -3,2,1191,good,7,5020,north_berkeley,5020 -3,2,1039,good,13,4765,north_berkeley,4765 -1,1,365,good,2,1828,north_berkeley,1828 -2,1,781,good,11,3299,north_berkeley,3299 -3,2,889,great,3,5256,east_elmwood,5256 -2,1,827,good,9,3305,north_berkeley,3305 -1,1,414,great,6,2379,east_elmwood,2379 -1,1,454,good,12,1793,north_berkeley,1793 -0,1,201,good,5,1594,north_berkeley,1594 -0,1,320,poor,64,1085,frontage_rd,989.52 -2,1,946,poor,56,3027,frontage_rd,2809.056 -3,2,1076,great,13,5332,east_elmwood,5332 -1,1,643,good,14,2034,north_berkeley,2034 -2,1,791,poor,62,2738,frontage_rd,2508.008 -1,1,580,great,4,2571,east_elmwood,2571 -0,1,104,great,0,2043,east_elmwood,2043 -1,1,584,good,12,1916,north_berkeley,1916 -3,2,1006,good,4,4784,north_berkeley,4784 -1,1,516,good,13,1915,north_berkeley,1915 -0,1,447,poor,15,1730,frontage_rd,1730 -1,1,722,good,8,2169,north_berkeley,2169 -3,2,811,poor,34,4343,frontage_rd,4221.396 -2,1,560,great,12,3517,east_elmwood,3517 -0,1,397,good,13,1694,north_berkeley,1694 -3,2,1061,good,2,4911,north_berkeley,4911 -3,2,888,good,10,4679,north_berkeley,4679 -1,1,358,good,0,1828,north_berkeley,1828 -3,2,982,great,12,5245,east_elmwood,5245 -2,1,864,good,14,3307,north_berkeley,3307 -0,1,188,poor,25,1364,frontage_rd,1350.36 -1,1,523,good,6,1942,north_berkeley,1942 -3,2,865,great,0,5248,east_elmwood,5248 -0,1,353,good,5,1722,north_berkeley,1722 -0,1,407,good,7,1753,north_berkeley,1753 -1,1,306,great,4,2226,east_elmwood,2226 -0,1,388,poor,60,1152,frontage_rd,1059.84 -0,1,110,poor,22,1319,frontage_rd,1313.724 -1,1,553,poor,35,1689,frontage_rd,1638.33 -1,1,339,poor,15,1698,frontage_rd,1698 -3,2,897,poor,17,4543,frontage_rd,4543 -2,1,574,good,9,3036,north_berkeley,3036 -0,1,449,good,10,1731,north_berkeley,1731 -0,1,441,good,5,1799,north_berkeley,1799 -0,1,502,great,4,2330,east_elmwood,2330 -2,1,757,poor,44,2956,frontage_rd,2814.112 -3,2,962,good,3,4830,north_berkeley,4830 -1,1,649,great,4,2612,east_elmwood,2612 -0,1,256,good,11,1577,north_berkeley,1577 -1,1,747,great,12,2654,east_elmwood,2654 -3,2,833,great,10,5104,east_elmwood,5104 -1,1,687,good,12,2060,north_berkeley,2060 -2,1,552,good,8,3065,north_berkeley,3065 -3,2,1241,great,2,5569,east_elmwood,5569 -0,1,459,poor,61,1298,frontage_rd,1191.564 -3,2,873,poor,26,4444,frontage_rd,4390.672 -0,1,152,poor,22,1356,frontage_rd,1350.576 -3,2,922,good,7,4670,north_berkeley,4670 -1,1,720,good,3,2198,north_berkeley,2198 -2,1,931,good,2,3497,north_berkeley,3497 -0,1,524,good,6,1893,north_berkeley,1893 -2,1,555,poor,54,2575,frontage_rd,2399.9 -0,1,159,good,7,1488,north_berkeley,1488 -1,1,380,good,8,1768,north_berkeley,1768 -1,1,685,good,0,2155,north_berkeley,2155 -2,1,848,good,11,3354,north_berkeley,3354 -1,1,547,poor,27,1751,frontage_rd,1726.486 -2,1,896,good,12,3363,north_berkeley,3363 -0,1,269,good,6,1560,north_berkeley,1560 -3,2,903,good,0,4764,north_berkeley,4764 -2,1,615,great,4,3633,east_elmwood,3633 -2,1,881,poor,35,3161,frontage_rd,3066.17 -0,1,331,good,13,1583,north_berkeley,1583 -0,1,498,poor,64,1238,frontage_rd,1129.056 -0,1,308,poor,28,1387,frontage_rd,1364.808 -0,1,103,great,3,1973,east_elmwood,1973 -3,2,1044,poor,52,4405,frontage_rd,4123.08 -2,1,788,poor,33,3010,frontage_rd,2931.74 -0,1,524,poor,33,1621,northwest,1578.854 -1,1,397,poor,25,1696,northwest,1679.04 -2,1,876,poor,27,3215,northwest,3169.99 -2,1,751,poor,64,2677,northwest,2441.424 -2,1,622,poor,32,2886,northwest,2816.736 -0,1,316,good,10,1596,west_welmwood,1596 -3,2,1171,poor,54,4473,northwest,4168.836 -3,2,1108,good,5,4875,west_welmwood,4875 -2,1,541,good,0,3103,west_welmwood,3103 -3,2,865,good,12,4643,west_welmwood,4643 -3,2,1165,great,3,5491,east_elmwood,5491 -2,1,728,good,2,3280,west_welmwood,3280 -0,1,298,great,13,2071,east_elmwood,2071 -3,2,923,good,12,4683,west_welmwood,4683 -0,1,274,great,5,2130,east_elmwood,2130 -0,1,349,poor,32,1444,northwest,1409.344 -2,1,796,great,4,3896,east_elmwood,3896 -1,1,726,poor,28,1898,northwest,1867.632 -2,1,501,great,5,3542,east_elmwood,3542 -2,1,714,good,4,3284,west_welmwood,3284 -3,2,988,great,4,5289,east_elmwood,5289 -2,1,546,poor,34,2757,northwest,2679.804 -0,1,484,poor,20,1678,northwest,1678 -3,2,1230,good,5,5059,west_welmwood,5059 -0,1,142,good,5,1492,west_welmwood,1492 -2,1,774,poor,53,2872,northwest,2682.448 -2,1,902,good,11,3375,west_welmwood,3375 -0,1,457,good,11,1763,west_welmwood,1763 -3,2,1034,good,7,4828,west_welmwood,4828 -3,2,838,poor,25,4412,northwest,4367.88 -1,1,503,good,14,1900,west_welmwood,1900 -0,1,445,good,7,1737,west_welmwood,1737 -2,1,914,poor,34,3189,northwest,3099.708 -2,1,867,great,6,3952,east_elmwood,3952 -2,1,583,good,4,3109,west_welmwood,3109 -0,1,161,great,7,2014,east_elmwood,2014 -1,1,707,good,14,2058,west_welmwood,2058 -1,1,343,great,9,2270,east_elmwood,2270 -2,1,746,great,14,3710,east_elmwood,3710 -2,1,706,good,14,3151,west_welmwood,3151 -1,1,610,good,5,2047,west_welmwood,2047 -3,2,1205,good,12,4969,west_welmwood,4969 -3,2,827,good,7,4655,west_welmwood,4655 -3,2,1055,good,0,4930,west_welmwood,4930 -2,1,625,good,14,3083,west_welmwood,3083 -2,1,630,good,14,3053,west_welmwood,3053 -1,1,654,poor,59,1575,northwest,1452.15 -3,2,1195,poor,17,4831,northwest,4831 -3,2,1227,good,11,4951,west_welmwood,4951 -3,2,1242,good,3,5081,west_welmwood,5081 -2,1,881,good,11,3337,west_welmwood,3337 -1,1,689,poor,35,1880,northwest,1823.6 -2,1,501,poor,35,2704,northwest,2622.88 -1,1,412,poor,21,1751,northwest,1747.498 -3,2,1081,poor,15,4793,northwest,4793 -2,1,725,great,2,3807,east_elmwood,3807 -2,1,768,great,11,3790,east_elmwood,3790 -0,1,152,good,2,1530,west_welmwood,1530 -3,2,1074,good,9,4804,west_welmwood,4804 -1,1,559,good,12,1890,west_welmwood,1890 -2,1,706,great,10,3747,east_elmwood,3747 -2,1,738,poor,23,3095,northwest,3076.43 -2,1,893,great,4,3962,east_elmwood,3962 -1,1,468,great,7,2405,east_elmwood,2405 -2,1,652,poor,22,3017,northwest,3004.932 -1,1,327,good,6,1769,west_welmwood,1769 -2,1,899,great,3,3930,east_elmwood,3930 -3,2,997,good,11,4733,west_welmwood,4733 -3,2,920,poor,20,4602,northwest,4602 -1,1,300,poor,55,1211,northwest,1126.23 -3,2,1174,great,0,5496,east_elmwood,5496 -1,1,679,good,14,2049,west_welmwood,2049 -3,2,887,good,0,4781,west_welmwood,4781 -2,1,856,good,9,3377,west_welmwood,3377 -0,1,216,good,14,1484,west_welmwood,1484 -0,1,476,great,3,2312,east_elmwood,2312 -2,1,592,good,12,3042,west_welmwood,3042 -0,1,325,poor,42,1337,northwest,1278.172 -2,1,790,poor,19,3170,northwest,3170 -1,1,354,good,12,1728,west_welmwood,1728 -1,1,529,good,12,1949,west_welmwood,1949 -1,1,522,great,12,2420,east_elmwood,2420 -0,1,358,good,13,1660,west_welmwood,1660 -0,1,468,poor,47,1447,northwest,1368.862 -0,1,429,great,11,2255,east_elmwood,2255 -3,2,1176,great,0,5535,east_elmwood,5535 -0,1,361,good,12,1656,west_welmwood,1656 -3,2,1214,good,2,5046,west_welmwood,5046 -1,1,738,good,3,2224,west_welmwood,2224 -3,2,852,poor,59,4128,northwest,3806.016 -3,2,1178,good,9,4951,west_welmwood,4951 -3,2,956,good,5,4746,west_welmwood,4746 -1,1,568,good,14,1884,west_welmwood,1884 -1,1,704,great,7,2638,east_elmwood,2638 -1,1,519,poor,64,1386,northwest,1264.032 -1,1,737,good,5,2185,west_welmwood,2185 -1,1,455,poor,47,1510,northwest,1428.46 -0,1,346,poor,61,1118,northwest,1026.324 -0,1,285,good,5,1627,west_welmwood,1627 -3,2,829,good,0,4646,west_welmwood,4646 -2,1,672,good,6,3202,west_welmwood,3202 -1,1,451,good,6,1873,west_welmwood,1873 -1,1,697,good,8,2119,west_welmwood,2119 -2,1,691,poor,34,2913,northwest,2831.436 -1,1,610,poor,49,1598,northwest,1505.316 -0,1,410,great,2,2287,east_elmwood,2287 -2,1,529,great,2,3606,east_elmwood,3606 -0,1,273,good,0,1656,west_welmwood,1656 -0,1,482,poor,55,1299,northwest,1208.07 -0,1,170,good,10,1421,west_welmwood,1421 -1,1,565,good,2,2001,west_welmwood,2001 -1,1,712,good,2,2186,west_welmwood,2186 -0,1,246,poor,16,1465,northwest,1465 -1,1,379,good,12,1725,west_welmwood,1725 -2,1,575,good,3,3095,west_welmwood,3095 -1,1,477,good,4,1900,west_welmwood,1900 -3,2,1081,good,9,4847,west_welmwood,4847 -2,1,908,good,13,3348,west_welmwood,3348 -3,2,1119,poor,47,4530,northwest,4285.38 -3,2,943,good,13,4693,west_welmwood,4693 -2,1,763,good,2,3339,west_welmwood,3339 -3,2,1192,poor,39,4630,northwest,4454.06 -2,1,600,great,14,3547,east_elmwood,3547 -0,1,361,poor,60,1186,northwest,1091.12 -2,1,847,poor,29,3160,northwest,3103.12 -0,1,206,poor,42,1215,northwest,1161.54 -1,1,300,great,12,2133,east_elmwood,2133 -2,1,927,good,4,3440,west_welmwood,3440 -0,1,524,poor,15,1726,northwest,1726 -1,1,469,poor,38,1637,northwest,1578.068 -0,1,193,poor,40,1165,northwest,1118.4 -2,1,627,poor,29,2924,northwest,2871.368 -2,1,603,good,6,3180,west_welmwood,3180 -0,1,424,poor,21,1639,northwest,1635.722 -2,1,785,poor,28,3149,northwest,3098.616 -3,2,1020,poor,15,4676,northwest,4676 -2,1,526,great,3,3602,east_elmwood,3602 -2,1,555,good,11,3089,west_welmwood,3089 -2,1,730,great,7,3781,east_elmwood,3781 -3,2,1228,good,10,4955,west_welmwood,4955 -0,1,437,poor,30,1585,northwest,1553.3 -1,1,511,poor,31,1746,northwest,1707.588 -3,2,1125,great,7,5430,east_elmwood,5430 -2,1,838,poor,16,3259,northwest,3259 -2,1,736,great,4,3837,east_elmwood,3837 -2,1,591,great,3,3670,east_elmwood,3670 -0,1,537,good,9,1858,west_welmwood,1858 -2,1,690,good,3,3216,west_welmwood,3216 -0,1,475,poor,36,1486,northwest,1438.448 -0,1,245,great,9,2039,east_elmwood,2039 -2,1,568,good,0,3137,west_welmwood,3137 -1,1,466,poor,23,1764,northwest,1753.416 -0,1,489,great,12,2311,east_elmwood,2311 -1,1,727,good,4,2217,west_welmwood,2217 -2,1,541,good,0,3148,west_welmwood,3148 -2,1,531,good,10,3053,west_welmwood,3053 -0,1,548,good,3,1931,west_welmwood,1931 -1,1,548,good,8,1975,west_welmwood,1975 -1,1,394,good,6,1800,west_welmwood,1800 -3,2,1231,good,12,4988,west_welmwood,4988 -2,1,532,great,3,3614,east_elmwood,3614 -0,1,256,good,4,1582,west_welmwood,1582 -2,1,613,good,1,3202,west_welmwood,3202 -3,2,1186,good,13,4928,west_welmwood,4928 -1,1,432,good,1,1910,west_welmwood,1910 -1,1,351,good,7,1804,west_welmwood,1804 -1,1,300,poor,27,1528,northwest,1506.608 -0,1,325,poor,51,1220,northwest,1144.36 -3,2,1101,great,11,5388,east_elmwood,5388 -3,2,1029,good,0,4831,west_welmwood,4831 -1,1,662,great,14,2517,east_elmwood,2517 -2,1,611,poor,48,2772,northwest,2616.768 -3,2,1155,good,7,4951,west_welmwood,4951 -2,1,683,great,14,3670,east_elmwood,3670 -0,1,361,great,13,2098,east_elmwood,2098 -1,1,589,poor,20,1872,northwest,1872 -2,1,764,poor,40,2915,northwest,2798.4 -1,1,522,good,12,1945,west_welmwood,1945 -0,1,227,poor,52,1120,northwest,1048.32 -2,1,525,good,3,3112,west_welmwood,3112 -2,1,656,good,4,3234,west_welmwood,3234 -1,1,630,poor,61,1505,northwest,1381.59 -3,2,842,great,14,5011,east_elmwood,5011 -0,1,520,poor,33,1552,northwest,1511.648 -3,2,1013,poor,46,4403,northwest,4174.044 -2,1,747,poor,20,3170,northwest,3170 -0,1,217,great,1,2075,east_elmwood,2075 -0,1,134,poor,31,1185,northwest,1158.93 -3,2,813,good,7,4622,west_welmwood,4622 -2,1,722,good,0,3337,west_welmwood,3337 -2,1,710,poor,45,2845,northwest,2702.75 -1,1,377,great,7,2353,east_elmwood,2353 -3,2,1169,poor,57,4441,northwest,4112.366 -3,2,881,good,4,4641,west_welmwood,4641 -0,1,415,great,5,2283,east_elmwood,2283 -1,1,401,poor,17,1705,northwest,1705 -1,1,552,poor,64,1460,northwest,1331.52 -3,2,894,good,9,4639,west_welmwood,4639 -2,1,925,good,8,3414,west_welmwood,3414 -2,1,661,good,10,3135,west_welmwood,3135 -0,1,233,poor,54,1123,northwest,1046.636 -0,1,264,poor,15,1521,northwest,1521 -1,1,445,poor,48,1482,northwest,1399.008 -2,1,849,good,10,3308,west_welmwood,3308 -2,1,852,poor,31,3116,northwest,3047.448 -2,1,901,great,6,3977,east_elmwood,3977 -2,1,934,great,14,3876,east_elmwood,3876 -1,1,691,good,12,2021,west_welmwood,2021 -2,1,722,great,9,3734,east_elmwood,3734 -3,2,1000,good,9,4741,west_welmwood,4741 -2,1,573,great,3,3598,east_elmwood,3598 -1,1,732,poor,42,1858,northwest,1776.248 -0,1,319,great,13,2068,east_elmwood,2068 -0,1,101,poor,53,944,northwest,881.696 -2,1,838,poor,42,3053,northwest,2918.668 -2,1,518,poor,57,2571,northwest,2380.746 -2,1,876,great,6,3893,east_elmwood,3893 -0,1,482,good,13,1741,west_welmwood,1741 -2,1,789,good,7,3342,west_welmwood,3342 -3,2,955,poor,18,4627,northwest,4627 -0,1,102,great,5,1924,east_elmwood,1924 -0,1,373,poor,50,1299,northwest,1221.06 -3,2,1056,poor,25,4679,northwest,4632.21 -0,1,184,great,1,2098,east_elmwood,2098 -0,1,107,good,14,1373,west_welmwood,1373 -0,1,430,good,3,1813,west_welmwood,1813 -0,1,255,poor,26,1382,northwest,1365.416 -3,2,1242,great,12,5514,east_elmwood,5514 -3,2,1124,good,13,4817,west_welmwood,4817 -2,1,833,great,2,3943,east_elmwood,3943 -0,1,417,good,11,1660,west_welmwood,1660 -0,1,254,good,8,1566,west_welmwood,1566 -1,1,481,good,4,1986,west_welmwood,1986 -2,1,611,good,4,3193,west_welmwood,3193 -1,1,639,poor,20,1948,northwest,1948 -0,1,455,good,7,1799,west_welmwood,1799 -2,1,881,poor,54,2903,northwest,2705.596 -1,1,364,great,12,2225,east_elmwood,2225 -0,1,172,good,8,1444,west_welmwood,1444 -1,1,643,great,1,2637,east_elmwood,2637 -2,1,663,good,4,3269,west_welmwood,3269 -2,1,931,good,5,3524,west_welmwood,3524 -3,2,1143,great,2,5488,east_elmwood,5488 -2,1,681,good,13,3185,west_welmwood,3185 -1,1,426,good,11,1857,west_welmwood,1857 -3,2,1070,great,3,5342,east_elmwood,5342 -1,1,388,good,14,1744,west_welmwood,1744 -1,1,722,good,2,2219,west_welmwood,2219 -2,1,621,great,3,3665,east_elmwood,3665 -3,2,1018,poor,45,4452,northwest,4229.4 -3,2,845,poor,39,4284,northwest,4121.208 -0,1,323,poor,24,1529,northwest,1516.768 -1,1,558,good,11,1917,west_welmwood,1917 -2,1,813,great,10,3819,east_elmwood,3819 -1,1,601,poor,36,1771,northwest,1714.328 -1,1,617,good,8,1999,west_welmwood,1999 -2,1,851,good,4,3414,west_welmwood,3414 -2,1,828,good,14,3248,west_welmwood,3248 -3,2,825,good,0,4711,west_welmwood,4711 -1,1,386,good,13,1713,west_welmwood,1713 -3,2,895,good,2,4769,west_welmwood,4769 -1,1,443,good,14,1845,west_welmwood,1845 -3,2,846,poor,56,4124,northwest,3827.072 -3,2,896,great,2,5257,east_elmwood,5257 -3,2,1014,poor,40,4482,northwest,4302.72 -1,1,378,good,8,1807,west_welmwood,1807 -3,2,1126,poor,41,4551,northwest,4359.858 -2,1,929,poor,47,3039,northwest,2874.894 -2,1,739,poor,57,2753,northwest,2549.278 -3,2,1145,good,13,4887,west_welmwood,4887 -1,1,667,poor,54,1581,northwest,1473.492 -2,1,836,good,4,3361,west_welmwood,3361 -2,1,758,good,9,3265,west_welmwood,3265 -3,2,824,good,8,4570,west_welmwood,4570 -1,1,506,good,12,1895,west_welmwood,1895 -1,1,544,poor,62,1429,northwest,1308.964 -0,1,121,good,2,1512,west_welmwood,1512 -1,1,638,poor,34,1759,northwest,1709.748 -0,1,135,good,4,1480,west_welmwood,1480 -1,1,385,good,3,1874,west_welmwood,1874 -2,1,589,great,8,3651,east_elmwood,3651 -0,1,511,good,4,1905,west_welmwood,1905 -2,1,663,poor,63,2589,northwest,2366.346 -1,1,739,poor,56,1677,northwest,1556.256 -1,1,610,poor,39,1673,northwest,1609.426 -3,2,818,great,8,5081,east_elmwood,5081 -3,2,1202,good,5,4988,west_welmwood,4988 -0,1,164,good,7,1496,west_welmwood,1496 -0,1,336,poor,60,1153,northwest,1060.76 -3,2,1243,good,10,4950,west_welmwood,4950 -2,1,563,great,11,3503,east_elmwood,3503 -1,1,425,good,10,1818,west_welmwood,1818 -2,1,849,poor,45,2993,northwest,2843.35 -1,1,701,good,2,2170,west_welmwood,2170 -1,1,689,great,11,2557,east_elmwood,2557 -3,2,958,poor,61,4202,northwest,3857.436 -2,1,732,poor,16,3201,northwest,3201 -1,1,318,poor,49,1368,northwest,1288.656 -1,1,471,good,8,1852,west_welmwood,1852 -0,1,378,great,3,2205,east_elmwood,2205 -1,1,582,poor,30,1800,northwest,1764 -1,1,326,great,12,2239,east_elmwood,2239 -1,1,307,good,0,1856,west_welmwood,1856 -3,2,1206,good,10,4955,west_welmwood,4955 -1,1,547,great,13,2367,east_elmwood,2367 -0,1,215,good,10,1530,west_welmwood,1530 -1,1,454,good,2,1889,west_welmwood,1889 -1,1,367,great,5,2319,east_elmwood,2319 -1,1,373,good,4,1831,west_welmwood,1831 -3,2,921,good,12,4672,west_welmwood,4672 -0,1,230,great,5,2105,east_elmwood,2105 -3,2,1110,poor,47,4482,northwest,4239.972 -3,2,1148,good,9,4891,west_welmwood,4891 -0,1,190,great,14,1922,east_elmwood,1922 -0,1,480,great,7,2324,east_elmwood,2324 -2,1,921,poor,39,3110,northwest,2991.82 -0,1,127,great,9,1948,east_elmwood,1948 -1,1,528,great,5,2450,east_elmwood,2450 -1,1,710,good,5,2193,west_welmwood,2193 -0,1,349,great,12,2117,east_elmwood,2117 -0,1,467,good,13,1692,west_welmwood,1692 -3,2,1137,poor,21,4813,northwest,4803.374 -0,1,394,poor,40,1435,northwest,1377.6 -1,1,659,great,6,2646,east_elmwood,2646 -0,1,410,poor,53,1248,northwest,1165.632 -3,2,1158,poor,54,4464,northwest,4160.448 -0,1,401,poor,52,1312,northwest,1228.032 -1,1,641,poor,46,1680,northwest,1592.64 -1,1,599,great,6,2552,east_elmwood,2552 -1,1,614,good,4,2103,west_welmwood,2103 -0,1,443,good,8,1759,west_welmwood,1759 -0,1,266,poor,44,1196,northwest,1138.592 -1,1,627,good,10,1985,west_welmwood,1985 -0,1,476,poor,30,1530,northwest,1499.4 -1,1,657,good,3,2163,west_welmwood,2163 -2,1,839,poor,35,3073,northwest,2980.81 -3,2,868,poor,28,4447,northwest,4375.848 -1,1,454,good,12,1809,west_welmwood,1809 -0,1,319,poor,63,1088,northwest,994.432 -1,1,537,poor,56,1508,northwest,1399.424 -3,2,1235,great,6,5500,east_elmwood,5500 -0,1,123,good,13,1393,west_welmwood,1393 -0,1,249,great,10,2060,east_elmwood,2060 -2,1,817,great,4,3866,east_elmwood,3866 -2,1,711,poor,43,2882,northwest,2749.428 -1,1,479,good,0,1978,west_welmwood,1978 -0,1,361,poor,27,1540,northwest,1518.44 -3,2,918,poor,45,4329,northwest,4112.55 -2,1,554,good,11,3017,west_welmwood,3017 -1,1,314,poor,23,1543,northwest,1533.742 -2,1,766,good,0,3387,west_welmwood,3387 -3,2,947,good,2,4765,west_welmwood,4765 -0,1,245,great,7,2066,east_elmwood,2066 -1,1,551,good,3,1971,west_welmwood,1971 -3,2,1166,good,3,5008,west_welmwood,5008 -3,2,1064,good,12,4793,west_welmwood,4793 -3,2,1102,poor,26,4711,northwest,4654.468 -0,1,323,good,7,1603,west_welmwood,1603 -3,2,991,great,3,5325,east_elmwood,5325 -0,1,239,poor,18,1500,northwest,1500 -3,2,1037,good,6,4815,west_welmwood,4815 -2,1,949,good,8,3489,west_welmwood,3489 -1,1,442,good,12,1865,west_welmwood,1865 -3,2,901,great,2,5212,east_elmwood,5212 -3,2,898,good,3,4765,west_welmwood,4765 -3,2,1090,poor,59,4368,northwest,4027.296 -1,1,747,great,0,2754,east_elmwood,2754 -2,1,719,great,2,3842,east_elmwood,3842 -0,1,473,good,0,1915,west_welmwood,1915 -1,1,483,good,2,1957,west_welmwood,1957 -2,1,773,good,10,3303,west_welmwood,3303 -3,2,852,good,6,4659,west_welmwood,4659 -1,1,691,great,8,2610,east_elmwood,2610 -2,1,763,good,1,3397,west_welmwood,3397 -2,1,560,poor,46,2736,northwest,2593.728 -2,1,581,great,5,3616,east_elmwood,3616 -0,1,296,good,10,1599,west_welmwood,1599 -2,1,585,great,6,3634,east_elmwood,3634 -1,1,535,good,12,1928,west_welmwood,1928 -1,1,698,poor,35,1846,northwest,1790.62 -2,1,629,poor,50,2695,northwest,2533.3 -0,1,104,poor,61,943,northwest,865.674 -2,1,904,good,14,3386,west_welmwood,3386 -3,2,1100,great,10,5348,east_elmwood,5348 -0,1,212,poor,15,1414,northwest,1414 -1,1,491,poor,38,1577,northwest,1520.228 -2,1,862,great,5,3869,east_elmwood,3869 -3,2,900,poor,59,4155,northwest,3830.91 -3,2,867,good,5,4617,west_welmwood,4617 -1,1,362,great,12,2216,east_elmwood,2216 -1,1,576,great,14,2422,east_elmwood,2422 -0,1,300,great,5,2181,east_elmwood,2181 -3,2,1017,poor,33,4487,northwest,4370.338 -2,1,546,great,1,3672,east_elmwood,3672 -2,1,589,poor,48,2729,northwest,2576.176 -1,1,517,good,5,1948,west_welmwood,1948 -0,1,289,great,13,2066,east_elmwood,2066 -0,1,272,poor,27,1418,northwest,1398.148 -3,2,1222,good,2,5038,west_welmwood,5038 -0,1,410,good,8,1742,west_welmwood,1742 -3,2,1199,good,1,5084,west_welmwood,5084 -1,1,477,good,2,1948,west_welmwood,1948 -2,1,727,poor,61,2708,northwest,2485.944 -2,1,516,good,10,3018,west_welmwood,3018 -1,1,375,poor,45,1440,northwest,1368 -3,2,959,good,1,4800,west_welmwood,4800 -3,2,1031,great,4,5380,east_elmwood,5380 -1,1,465,good,11,1810,west_welmwood,1810 -3,2,1181,good,7,4930,west_welmwood,4930 -0,1,307,poor,63,1120,northwest,1023.68 -3,2,1170,good,11,4880,west_welmwood,4880 -3,2,1093,great,7,5356,east_elmwood,5356 -3,2,1219,good,9,4951,west_welmwood,4951 -2,1,539,poor,42,2709,northwest,2589.804 -1,1,462,great,1,2462,east_elmwood,2462 -3,2,1211,good,5,5032,west_welmwood,5032 -0,1,530,good,13,1837,west_welmwood,1837 -0,1,177,good,5,1496,west_welmwood,1496 -0,1,116,good,14,1424,west_welmwood,1424 -0,1,494,poor,45,1493,northwest,1418.35 -1,1,685,good,4,2173,west_welmwood,2173 -0,1,515,great,7,2389,east_elmwood,2389 -1,1,398,good,1,1900,west_welmwood,1900 -0,1,408,good,0,1839,west_welmwood,1839 -1,1,694,good,0,2165,west_welmwood,2165 -0,1,499,great,5,2324,east_elmwood,2324 -1,1,558,good,6,1974,west_welmwood,1974 -2,1,603,poor,63,2610,northwest,2385.54 -0,1,448,great,9,2260,east_elmwood,2260 -2,1,818,good,3,3409,west_welmwood,3409 -1,1,586,great,4,2535,east_elmwood,2535 -2,1,770,poor,30,3114,northwest,3051.72 -1,1,675,good,13,2053,west_welmwood,2053 -3,2,1024,poor,28,4560,northwest,4487.04 -2,1,532,good,12,3019,west_welmwood,3019 -3,2,816,poor,47,4210,northwest,3982.66 -2,1,838,good,8,3341,west_welmwood,3341 -3,2,1059,great,4,5329,east_elmwood,5329 -1,1,637,good,3,2119,west_welmwood,2119 -0,1,464,good,3,1799,west_welmwood,1799 -2,1,767,good,4,3288,west_welmwood,3288 -2,1,577,great,5,3618,east_elmwood,3618 -1,1,403,poor,28,1619,northwest,1593.096 -2,1,751,good,8,3255,west_welmwood,3255 -0,1,304,great,2,2219,east_elmwood,2219 -2,1,868,poor,41,3033,northwest,2905.614 -0,1,440,great,9,2293,east_elmwood,2293 -2,1,729,great,1,3806,east_elmwood,3806 -2,1,772,good,12,3281,west_welmwood,3281 -1,1,551,poor,18,1840,northwest,1840 -1,1,319,poor,44,1411,northwest,1343.272 -1,1,644,poor,63,1484,northwest,1356.376 -1,1,586,poor,23,1862,northwest,1850.828 -3,2,893,poor,31,4418,northwest,4320.804 -1,1,413,great,3,2343,east_elmwood,2343 -1,1,721,good,14,2031,west_welmwood,2031 -2,1,844,good,7,3358,west_welmwood,3358 -3,2,1247,good,9,4992,west_welmwood,4992 -1,1,686,great,6,2623,east_elmwood,2623 -3,2,975,good,8,4701,west_welmwood,4701 -0,1,487,good,13,1722,west_welmwood,1722 -0,1,115,good,5,1500,west_welmwood,1500 -1,1,555,poor,45,1608,northwest,1527.6 -0,1,194,good,5,1550,west_welmwood,1550 -2,1,683,poor,40,2855,northwest,2740.8 -0,1,498,good,11,1751,west_welmwood,1751 -0,1,540,poor,44,1516,northwest,1443.232 -3,2,1138,good,1,4982,west_welmwood,4982 -1,1,518,great,1,2486,east_elmwood,2486 -2,1,667,poor,37,2933,northwest,2833.278 -2,1,935,good,2,3512,west_welmwood,3512 -3,2,1160,great,4,5431,east_elmwood,5431 -0,1,167,good,13,1401,west_welmwood,1401 -2,1,878,poor,53,2968,northwest,2772.112 -2,1,705,good,5,3267,west_welmwood,3267 -0,1,256,good,3,1657,west_welmwood,1657 -2,1,886,good,11,3369,west_welmwood,3369 -2,1,864,great,0,3929,east_elmwood,3929 -1,1,523,good,0,2046,west_welmwood,2046 -2,1,924,poor,46,3055,northwest,2896.14 -1,1,486,great,13,2383,east_elmwood,2383 -1,1,353,good,6,1744,west_welmwood,1744 -0,1,133,poor,30,1187,northwest,1163.26 -1,1,577,poor,22,1884,northwest,1876.464 -1,1,499,great,6,2486,east_elmwood,2486 -0,1,325,great,1,2170,east_elmwood,2170 -1,1,578,poor,37,1687,northwest,1629.642 -3,2,1032,good,12,4788,west_welmwood,4788 -0,1,229,good,2,1564,west_welmwood,1564 -1,1,539,great,14,2353,east_elmwood,2353 -0,1,538,good,10,1882,west_welmwood,1882 -2,1,564,good,9,3047,west_welmwood,3047 -2,1,697,great,5,3766,east_elmwood,3766 -2,1,629,good,13,3132,west_welmwood,3132 -0,1,530,great,14,2329,east_elmwood,2329 -2,1,688,good,9,3235,west_welmwood,3235 -2,1,925,good,1,3469,west_welmwood,3469 -1,1,537,good,8,1988,west_welmwood,1988 -3,2,1029,great,13,5294,east_elmwood,5294 -1,1,712,good,0,2230,west_welmwood,2230 -3,2,898,good,10,4610,west_welmwood,4610 -2,1,749,good,9,3249,west_welmwood,3249 -1,1,747,good,11,2164,west_welmwood,2164 -0,1,267,poor,33,1296,northwest,1262.304 -3,2,1117,poor,44,4487,northwest,4271.624 -1,1,581,poor,59,1527,northwest,1407.894 -1,1,687,poor,47,1732,northwest,1638.472 -2,1,683,good,0,3254,west_welmwood,3254 -1,1,587,poor,18,1908,northwest,1908 -0,1,477,poor,44,1467,northwest,1396.584 -3,2,944,good,13,4625,west_welmwood,4625 -0,1,511,great,1,2420,east_elmwood,2420 -3,2,932,poor,36,4444,northwest,4301.792 -0,1,347,good,8,1686,west_welmwood,1686 -0,1,272,good,2,1645,west_welmwood,1645 -2,1,659,great,11,3698,east_elmwood,3698 -1,1,742,great,10,2678,east_elmwood,2678 -2,1,830,great,14,3826,east_elmwood,3826 -2,1,571,great,3,3631,east_elmwood,3631 -3,2,1007,great,5,5339,east_elmwood,5339 -3,2,1059,poor,34,4592,northwest,4463.424 -2,1,823,poor,23,3191,northwest,3171.854 -1,1,705,great,5,2681,east_elmwood,2681 -1,1,381,poor,16,1742,northwest,1742 -1,1,558,good,4,2010,west_welmwood,2010 -1,1,452,poor,35,1637,northwest,1587.89 -1,1,446,good,7,1911,west_welmwood,1911 -3,2,1011,good,11,4704,west_welmwood,4704 -2,1,770,good,5,3285,west_welmwood,3285 -3,2,853,good,11,4592,west_welmwood,4592 -0,1,436,good,2,1838,west_welmwood,1838 -2,1,718,good,11,3197,west_welmwood,3197 -0,1,507,good,5,1835,west_welmwood,1835 -0,1,175,great,11,1985,east_elmwood,1985 -1,1,379,good,9,1785,west_welmwood,1785 -0,1,463,good,4,1859,west_welmwood,1859 -0,1,131,great,10,1917,east_elmwood,1917 -1,1,584,good,7,2039,west_welmwood,2039 -1,1,343,good,2,1866,west_welmwood,1866 -0,1,424,poor,24,1589,northwest,1576.288 -1,1,670,good,3,2153,west_welmwood,2153 -2,1,551,good,8,3103,west_welmwood,3103 -2,1,737,great,14,3733,east_elmwood,3733 -1,1,655,poor,36,1810,northwest,1752.08 -3,2,946,poor,46,4289,northwest,4065.972 -1,1,360,good,1,1841,west_welmwood,1841 -3,2,988,good,2,4846,west_welmwood,4846 -2,1,641,poor,17,3075,northwest,3075 -3,2,1163,good,12,4938,west_welmwood,4938 -3,2,1025,good,14,4745,west_welmwood,4745 -3,2,893,great,0,5228,east_elmwood,5228 -1,1,302,poor,38,1413,northwest,1362.132 -3,2,1183,good,2,4978,west_welmwood,4978 -1,1,721,great,3,2707,east_elmwood,2707 -2,1,670,poor,18,3137,northwest,3137 -3,2,1138,great,12,5408,east_elmwood,5408 -0,1,144,good,11,1436,west_welmwood,1436 -1,1,543,good,9,1927,west_welmwood,1927 -3,2,838,poor,63,4086,northwest,3734.604 -1,1,478,poor,16,1833,northwest,1833 -2,1,760,good,2,3319,west_welmwood,3319 -3,2,1054,good,9,4775,west_welmwood,4775 -1,1,738,poor,55,1641,northwest,1526.13 -1,1,331,poor,52,1321,northwest,1236.456 -2,1,881,great,6,3923,east_elmwood,3923 -2,1,916,poor,29,3211,northwest,3153.202 -1,1,592,good,12,1923,west_welmwood,1923 -0,1,250,great,5,2088,east_elmwood,2088 -3,2,1233,poor,36,4710,northwest,4559.28 -1,1,672,great,14,2547,east_elmwood,2547 -0,1,522,great,7,2318,east_elmwood,2318 -1,1,523,poor,59,1465,northwest,1350.73 -3,2,1116,great,1,5470,east_elmwood,5470 -0,1,389,good,14,1647,west_welmwood,1647 -3,2,1008,great,13,5198,east_elmwood,5198 -0,1,320,great,9,2173,east_elmwood,2173 -2,1,707,good,1,3301,west_welmwood,3301 -2,1,776,great,5,3870,east_elmwood,3870 -1,1,607,great,6,2582,east_elmwood,2582 -2,1,931,good,1,3475,west_welmwood,3475 -3,2,886,great,2,5224,east_elmwood,5224 -2,1,785,good,14,3277,west_welmwood,3277 -0,1,373,great,4,2199,east_elmwood,2199 -3,2,1243,good,7,4983,west_welmwood,4983 -0,1,549,poor,48,1454,northwest,1372.576 -2,1,620,poor,58,2641,northwest,2440.284 -2,1,921,good,1,3550,west_welmwood,3550 -0,1,106,poor,25,1239,northwest,1226.61 -1,1,623,good,4,2039,west_welmwood,2039 -0,1,328,great,9,2097,east_elmwood,2097 -3,2,1129,poor,37,4581,northwest,4425.246 -3,2,857,poor,42,4237,northwest,4050.572 -3,2,999,poor,32,4567,northwest,4457.392 -3,2,966,good,13,4681,west_welmwood,4681 -3,2,897,great,11,5107,east_elmwood,5107 -1,1,369,good,2,1807,west_welmwood,1807 -3,2,817,good,12,4541,west_welmwood,4541 -1,1,317,good,11,1688,west_welmwood,1688 -3,2,1228,great,8,5521,east_elmwood,5521 -1,1,479,good,14,1800,west_welmwood,1800 -1,1,748,good,12,2082,west_welmwood,2082 -1,1,330,good,11,1680,west_welmwood,1680 -1,1,398,good,14,1723,west_welmwood,1723 -2,1,533,poor,50,2623,northwest,2465.62 -3,2,830,good,3,4664,west_welmwood,4664 -3,2,1050,good,8,4783,west_welmwood,4783 -3,2,942,poor,64,4201,northwest,3831.312 -2,1,621,poor,42,2841,northwest,2715.996 -3,2,1221,great,9,5522,east_elmwood,5522 -0,1,259,poor,37,1288,northwest,1244.208 -1,1,599,poor,44,1645,northwest,1566.04 -2,1,860,good,3,3461,west_welmwood,3461 -0,1,330,good,2,1735,west_welmwood,1735 -2,1,736,great,8,3773,east_elmwood,3773 -1,1,687,great,14,2532,east_elmwood,2532 -3,2,1040,poor,24,4695,northwest,4657.44 -1,1,718,great,11,2640,east_elmwood,2640 -1,1,313,poor,15,1656,northwest,1656 -3,2,1008,poor,37,4492,northwest,4339.272 -1,1,550,poor,42,1585,northwest,1515.26 -1,1,467,poor,60,1407,northwest,1294.44 -0,1,472,great,9,2243,east_elmwood,2243 -0,1,189,great,4,2002,east_elmwood,2002 -2,1,935,poor,15,3429,northwest,3429 -2,1,638,poor,37,2843,northwest,2746.338 -3,2,803,great,5,5055,east_elmwood,5055 -2,1,734,poor,37,2940,northwest,2840.04 -3,2,815,good,5,4566,west_welmwood,4566 -2,1,618,good,0,3228,west_welmwood,3228 -2,1,519,poor,30,2790,northwest,2734.2 -3,2,1163,great,7,5429,east_elmwood,5429 -0,1,145,good,0,1542,west_welmwood,1542 -2,1,805,good,1,3345,west_welmwood,3345 -0,1,512,great,13,2318,east_elmwood,2318 -0,1,515,great,8,2307,east_elmwood,2307 -0,1,470,great,10,2251,east_elmwood,2251 -2,1,845,good,14,3312,west_welmwood,3312 -2,1,676,great,2,3726,east_elmwood,3726 -0,1,157,good,3,1523,west_welmwood,1523 -3,2,905,good,8,4666,west_welmwood,4666 -1,1,335,poor,34,1461,northwest,1420.092 -3,2,1240,great,7,5555,east_elmwood,5555 -1,1,686,good,9,2093,west_welmwood,2093 -1,1,437,good,2,1948,west_welmwood,1948 -3,2,1217,poor,29,4766,northwest,4680.212 -3,2,854,poor,47,4256,northwest,4026.176 -0,1,511,good,12,1787,west_welmwood,1787 -1,1,494,poor,18,1798,northwest,1798 -3,2,876,good,13,4560,west_welmwood,4560 -1,1,653,good,6,2057,west_welmwood,2057 -2,1,537,good,14,2997,west_welmwood,2997 -2,1,670,good,5,3258,west_welmwood,3258 -1,1,477,good,4,1980,west_welmwood,1980 -2,1,775,good,8,3265,west_welmwood,3265 -0,1,332,good,8,1670,west_welmwood,1670 -3,2,1214,good,3,5062,west_welmwood,5062 -2,1,631,poor,57,2618,northwest,2424.268 -2,1,801,good,4,3357,west_welmwood,3357 -3,2,847,good,10,4606,west_welmwood,4606 -2,1,672,great,8,3699,east_elmwood,3699 -2,1,903,good,2,3468,west_welmwood,3468 -0,1,513,poor,55,1386,northwest,1288.98 -1,1,642,great,7,2551,east_elmwood,2551 -3,2,973,poor,42,4398,northwest,4204.488 -1,1,452,great,2,2458,east_elmwood,2458 -0,1,119,good,5,1440,west_welmwood,1440 -2,1,763,good,12,3228,west_welmwood,3228 -1,1,365,good,10,1745,west_welmwood,1745 -1,1,548,poor,47,1556,northwest,1471.976 -2,1,920,good,6,3486,west_welmwood,3486 -0,1,303,great,5,2138,east_elmwood,2138 -1,1,664,poor,27,1938,northwest,1910.868 -2,1,773,good,11,3249,west_welmwood,3249 -0,1,143,good,7,1510,west_welmwood,1510 -0,1,286,good,14,1554,west_welmwood,1554 -3,2,850,poor,45,4232,northwest,4020.4 -1,1,670,poor,42,1761,northwest,1683.516 -0,1,165,poor,55,1006,northwest,935.58 -0,1,322,poor,45,1297,northwest,1232.15 -1,1,496,good,3,1988,west_welmwood,1988 -2,1,777,good,4,3358,west_welmwood,3358 -0,1,290,good,7,1597,west_welmwood,1597 -0,1,281,great,14,2027,east_elmwood,2027 -0,1,185,poor,56,1063,northwest,986.464 -1,1,601,poor,23,1915,northwest,1903.51 -0,1,156,poor,33,1264,northwest,1231.136 -3,2,967,poor,18,4644,northwest,4644 -2,1,687,good,7,3253,west_welmwood,3253 -0,1,102,great,13,1852,east_elmwood,1852 -0,1,237,good,3,1577,west_welmwood,1577 -3,2,1076,great,7,5330,east_elmwood,5330 -0,1,308,poor,47,1206,northwest,1140.876 -1,1,715,poor,27,1966,northwest,1938.476 -3,2,1095,poor,38,4598,northwest,4432.472 -3,2,1110,good,14,4784,west_welmwood,4784 -2,1,788,good,2,3352,west_welmwood,3352 -2,1,568,great,4,3629,east_elmwood,3629 -3,2,1097,poor,35,4553,northwest,4416.41 -1,1,540,good,13,1885,west_welmwood,1885 -0,1,507,poor,30,1624,northwest,1591.52 -3,2,919,good,14,4647,west_welmwood,4647 -0,1,314,poor,35,1320,northwest,1280.4 -3,2,1191,poor,43,4637,northwest,4423.698 -3,2,919,good,0,4734,west_welmwood,4734 -0,1,343,great,4,2199,east_elmwood,2199 -2,1,574,great,3,3633,east_elmwood,3633 -2,1,755,good,5,3343,west_welmwood,3343 -3,2,874,good,7,4629,west_welmwood,4629 -0,1,504,poor,29,1602,northwest,1573.164 -0,1,444,poor,41,1477,northwest,1414.966 -0,1,386,good,9,1741,west_welmwood,1741 -0,1,481,poor,51,1404,northwest,1316.952 -1,1,696,poor,32,1908,northwest,1862.208 -3,2,840,poor,15,4547,northwest,4547 -1,1,697,good,7,2115,west_welmwood,2115 -1,1,676,good,2,2202,west_welmwood,2202 -3,2,1094,great,9,5382,east_elmwood,5382 -2,1,669,poor,28,3004,northwest,2955.936 -1,1,340,good,10,1729,west_welmwood,1729 -1,1,475,good,1,1923,west_welmwood,1923 -0,1,340,good,1,1728,west_welmwood,1728 -1,1,389,good,2,1846,west_welmwood,1846 -1,1,623,poor,31,1787,northwest,1747.686 -0,1,264,good,12,1525,west_welmwood,1525 -1,1,701,poor,60,1578,northwest,1451.76 -3,2,1069,good,12,4760,west_welmwood,4760 -2,1,908,good,1,3465,west_welmwood,3465 -1,1,745,good,0,2253,west_welmwood,2253 -3,2,989,poor,43,4431,northwest,4227.174 -0,1,470,great,9,2315,east_elmwood,2315 -1,1,599,good,6,2035,west_welmwood,2035 -3,2,885,good,11,4622,west_welmwood,4622 -2,1,508,good,11,2950,west_welmwood,2950 -2,1,763,good,2,3372,west_welmwood,3372 -2,1,545,great,10,3587,east_elmwood,3587 -1,1,505,great,1,2538,east_elmwood,2538 -0,1,262,poor,18,1434,northwest,1434 -3,2,812,poor,48,4166,northwest,3932.704 -2,1,592,poor,55,2638,northwest,2453.34 -1,1,356,poor,38,1450,northwest,1397.8 -2,1,519,good,5,3082,west_welmwood,3082 -2,1,877,poor,23,3254,northwest,3234.476 -2,1,644,great,14,3579,east_elmwood,3579 -1,1,745,good,12,2161,west_welmwood,2161 -1,1,747,good,7,2205,west_welmwood,2205 -1,1,380,great,11,2231,east_elmwood,2231 -1,1,459,poor,62,1349,northwest,1235.684 -3,2,1073,good,11,4773,west_welmwood,4773 -0,1,376,good,7,1726,west_welmwood,1726 -2,1,757,poor,38,2969,northwest,2862.116 -1,1,658,poor,33,1807,northwest,1760.018 -2,1,816,great,11,3848,east_elmwood,3848 -2,1,848,good,11,3371,west_welmwood,3371 -0,1,259,good,2,1634,west_welmwood,1634 -2,1,607,good,4,3142,west_welmwood,3142 -3,2,894,good,6,4719,west_welmwood,4719 -1,1,339,poor,64,1220,northwest,1112.64 -2,1,756,poor,40,2930,northwest,2812.8 -0,1,209,good,9,1518,west_welmwood,1518 -2,1,567,good,5,3070,west_welmwood,3070 -0,1,504,good,0,1877,west_welmwood,1877 -0,1,486,poor,52,1344,northwest,1257.984 -1,1,666,good,11,2086,west_welmwood,2086 -2,1,532,great,6,3558,east_elmwood,3558 -1,1,503,good,12,1840,west_welmwood,1840 -1,1,453,poor,56,1345,northwest,1248.16 -2,1,659,great,5,3716,east_elmwood,3716 -0,1,278,good,0,1661,west_welmwood,1661 -2,1,524,good,5,3083,west_welmwood,3083 -1,1,527,good,11,1904,west_welmwood,1904 -1,1,661,poor,52,1601,northwest,1498.536 -3,2,1025,good,13,4775,west_welmwood,4775 -3,2,810,good,3,4660,west_welmwood,4660 -3,2,1086,good,13,4843,west_welmwood,4843 -3,2,812,good,0,4661,west_welmwood,4661 -3,2,881,good,0,4742,west_welmwood,4742 -1,1,510,great,12,2428,east_elmwood,2428 -1,1,378,good,11,1815,west_welmwood,1815 -2,1,889,great,10,3934,east_elmwood,3934 -1,1,748,poor,18,2038,northwest,2038 -1,1,476,poor,57,1366,northwest,1264.916 -3,2,931,poor,31,4433,northwest,4335.474 -0,1,111,good,9,1468,west_welmwood,1468 -1,1,571,poor,36,1757,northwest,1700.776 -2,1,909,poor,63,2898,northwest,2648.772 -3,2,1131,poor,35,4601,northwest,4462.97 -1,1,552,great,9,2446,east_elmwood,2446 -0,1,419,good,14,1709,west_welmwood,1709 -2,1,551,good,6,3130,west_welmwood,3130 -3,2,1120,good,7,4938,west_welmwood,4938 -1,1,647,poor,58,1536,northwest,1419.264 -0,1,435,good,0,1785,west_welmwood,1785 -0,1,222,good,1,1656,west_welmwood,1656 -0,1,468,good,11,1725,west_welmwood,1725 -2,1,561,poor,47,2693,northwest,2547.578 -1,1,747,poor,59,1699,northwest,1566.478 -1,1,696,great,14,2573,east_elmwood,2573 -1,1,440,good,13,1797,west_welmwood,1797 -1,1,686,great,9,2600,east_elmwood,2600 -2,1,712,great,7,3701,east_elmwood,3701 -0,1,302,poor,26,1432,northwest,1414.816 -2,1,626,good,14,3074,west_welmwood,3074 -1,1,700,poor,55,1663,northwest,1546.59 -1,1,409,good,12,1808,west_welmwood,1808 -3,2,880,good,4,4678,west_welmwood,4678 -3,2,915,good,5,4713,west_welmwood,4713 -1,1,748,poor,48,1756,northwest,1657.664 -0,1,227,good,9,1487,west_welmwood,1487 -1,1,390,good,8,1840,west_welmwood,1840 -0,1,460,good,0,1902,west_welmwood,1902 -1,1,392,good,8,1782,west_welmwood,1782 -2,1,921,good,14,3383,west_welmwood,3383 -2,1,756,good,3,3336,west_welmwood,3336 -1,1,604,great,1,2583,east_elmwood,2583 -3,2,832,great,3,5137,east_elmwood,5137 -3,2,1022,great,2,5386,east_elmwood,5386 -2,1,517,poor,36,2756,northwest,2667.808 -3,2,1018,good,6,4847,west_welmwood,4847 -0,1,422,good,7,1727,west_welmwood,1727 -2,1,716,good,8,3283,west_welmwood,3283 -0,1,301,great,3,2172,east_elmwood,2172 -1,1,608,poor,29,1829,northwest,1796.078 -2,1,889,poor,17,3277,northwest,3277 -2,1,577,great,9,3636,east_elmwood,3636 -2,1,671,great,0,3765,east_elmwood,3765 -0,1,315,poor,44,1277,northwest,1215.704 -1,1,709,great,1,2739,east_elmwood,2739 -3,2,1204,poor,63,4400,northwest,4021.6 -0,1,187,great,13,1920,east_elmwood,1920 -0,1,543,poor,31,1677,northwest,1640.106 -1,1,351,good,6,1799,west_welmwood,1799 -2,1,816,poor,45,2952,northwest,2804.4 -3,2,994,poor,53,4323,northwest,4037.682 -1,1,612,good,2,2102,west_welmwood,2102 -0,1,447,good,8,1798,west_welmwood,1798 -0,1,457,good,7,1744,west_welmwood,1744 -3,2,1211,good,8,4968,west_welmwood,4968 -0,1,284,poor,60,1041,northwest,957.72 -1,1,741,poor,19,2035,northwest,2035 -0,1,425,great,2,2298,east_elmwood,2298 -1,1,308,poor,36,1491,northwest,1443.288 -1,1,390,good,8,1776,west_welmwood,1776 -1,1,675,great,14,2529,east_elmwood,2529 -0,1,283,poor,44,1262,northwest,1201.424 -1,1,460,good,1,1938,west_welmwood,1938 -1,1,678,great,4,2609,east_elmwood,2609 -2,1,892,great,11,3925,east_elmwood,3925 -3,2,1149,good,7,4978,west_welmwood,4978 -3,2,876,great,2,5207,east_elmwood,5207 -1,1,588,poor,21,1850,northwest,1846.3 -3,2,1005,good,5,4794,west_welmwood,4794 -2,1,636,good,3,3193,west_welmwood,3193 -3,2,955,good,3,4763,west_welmwood,4763 -1,1,661,poor,37,1741,northwest,1681.806 -1,1,477,great,7,2447,east_elmwood,2447 -3,2,1230,good,4,5070,west_welmwood,5070 -2,1,735,great,13,3698,east_elmwood,3698 -2,1,630,poor,60,2667,northwest,2453.64 -2,1,506,good,12,3017,west_welmwood,3017 -3,2,1191,poor,18,4896,northwest,4896 -3,2,1103,poor,44,4475,northwest,4260.2 -2,1,760,great,9,3780,east_elmwood,3780 -3,2,979,good,14,4693,west_welmwood,4693 -0,1,440,great,7,2231,east_elmwood,2231 -1,1,359,great,12,2286,east_elmwood,2286 -0,1,125,poor,38,1104,northwest,1064.256 -1,1,692,good,0,2147,west_welmwood,2147 -0,1,503,poor,49,1455,northwest,1370.61 -1,1,659,good,12,2025,west_welmwood,2025 -3,2,995,good,12,4682,west_welmwood,4682 -0,1,435,poor,27,1522,northwest,1500.692 -3,2,1028,great,14,5204,east_elmwood,5204 -3,2,880,good,5,4676,west_welmwood,4676 -2,1,796,poor,15,3261,northwest,3261 -0,1,252,great,6,2140,east_elmwood,2140 -1,1,675,good,11,2112,west_welmwood,2112 -0,1,133,good,3,1482,west_welmwood,1482 -0,1,400,poor,18,1622,northwest,1622 -3,2,954,poor,19,4629,northwest,4629 -1,1,332,great,12,2174,east_elmwood,2174 -3,2,1178,poor,53,4479,northwest,4183.386 -0,1,334,good,6,1698,west_welmwood,1698 -0,1,176,good,2,1548,west_welmwood,1548 -2,1,854,poor,58,2909,northwest,2687.916 -2,1,846,good,1,3461,west_welmwood,3461 -2,1,543,good,2,3129,west_welmwood,3129 -2,1,620,good,1,3225,west_welmwood,3225 -3,2,911,poor,31,4419,northwest,4321.782 -0,1,304,great,1,2241,east_elmwood,2241 -0,1,241,great,3,2116,east_elmwood,2116 -0,1,453,great,13,2269,east_elmwood,2269 -2,1,589,great,11,3617,east_elmwood,3617 -2,1,942,poor,15,3419,northwest,3419 -2,1,678,poor,37,2936,northwest,2836.176 -1,1,465,poor,42,1519,northwest,1452.164 -0,1,256,great,0,2140,east_elmwood,2140 -2,1,561,poor,56,2580,northwest,2394.24 -3,2,1032,good,4,4804,west_welmwood,4804 -0,1,502,good,0,1915,west_welmwood,1915 -1,1,617,poor,35,1739,northwest,1686.83 -3,2,846,good,10,4620,west_welmwood,4620 -2,1,632,poor,37,2874,northwest,2776.284 -0,1,244,poor,59,1087,northwest,1002.214 -1,1,366,good,12,1708,west_welmwood,1708 -3,2,1097,good,4,4865,west_welmwood,4865 -3,2,931,poor,42,4385,northwest,4192.06 -2,1,531,good,3,3142,west_welmwood,3142 -1,1,428,good,13,1796,west_welmwood,1796 -0,1,427,great,1,2283,east_elmwood,2283 -2,1,682,poor,24,3036,northwest,3011.712 -0,1,181,poor,16,1430,northwest,1430 -2,1,672,good,10,3179,west_welmwood,3179 -2,1,548,poor,35,2762,northwest,2679.14 -2,1,721,good,5,3274,west_welmwood,3274 -1,1,379,poor,56,1283,northwest,1190.624 -0,1,369,poor,17,1592,northwest,1592 -2,1,738,good,2,3329,west_welmwood,3329 -0,1,321,great,14,2034,east_elmwood,2034 -2,1,636,great,9,3684,east_elmwood,3684 -1,1,564,poor,60,1452,northwest,1335.84 -2,1,723,good,7,3250,west_welmwood,3250 -1,1,584,poor,16,1950,northwest,1950 -1,1,307,good,12,1690,west_welmwood,1690 -2,1,946,great,11,3975,east_elmwood,3975 -2,1,655,good,2,3226,west_welmwood,3226 -3,2,1101,good,4,4903,west_welmwood,4903 -2,1,779,poor,27,3101,northwest,3057.586 -1,1,497,poor,28,1737,northwest,1709.208 -3,2,806,poor,55,4066,northwest,3781.38 -1,1,746,good,4,2223,west_welmwood,2223 -0,1,358,poor,30,1462,northwest,1432.76 -0,1,181,good,11,1489,west_welmwood,1489 -1,1,553,good,8,1964,west_welmwood,1964 -2,1,864,poor,15,3299,northwest,3299 -3,2,1040,good,11,4785,west_welmwood,4785 -2,1,516,good,8,3037,west_welmwood,3037 -2,1,517,good,13,3005,west_welmwood,3005 -2,1,835,poor,27,3131,northwest,3087.166 -3,2,1163,good,7,4987,west_welmwood,4987 -3,2,1044,poor,64,4281,northwest,3904.272 -3,2,1025,good,3,4887,west_welmwood,4887 -0,1,103,great,6,1970,east_elmwood,1970 -3,2,990,poor,33,4476,northwest,4359.624 -2,1,575,good,7,3143,west_welmwood,3143 -3,2,1014,good,6,4796,west_welmwood,4796 -3,2,1185,poor,16,4897,northwest,4897 -3,2,1058,good,0,4865,west_welmwood,4865 -1,1,412,great,10,2284,east_elmwood,2284 -0,1,251,good,12,1547,west_welmwood,1547 -2,1,711,good,1,3301,west_welmwood,3301 -2,1,813,good,4,3401,west_welmwood,3401 -2,1,726,poor,33,3016,northwest,2937.584 -0,1,192,poor,19,1434,northwest,1434 -1,1,486,good,11,1922,west_welmwood,1922 -3,2,1198,great,4,5524,east_elmwood,5524 -1,1,316,good,14,1688,west_welmwood,1688 -3,2,1238,great,5,5550,east_elmwood,5550 -3,2,868,good,1,4658,west_welmwood,4658 -1,1,371,great,13,2264,east_elmwood,2264 -2,1,597,good,5,3135,west_welmwood,3135 -0,1,298,good,13,1597,west_welmwood,1597 -3,2,1049,good,6,4831,west_welmwood,4831 -2,1,553,good,11,3063,west_welmwood,3063 -0,1,521,poor,61,1322,northwest,1213.596 -3,2,902,poor,49,4300,northwest,4050.6 -1,1,719,poor,41,1791,northwest,1715.778 -2,1,713,poor,63,2703,northwest,2470.542 -3,2,1196,good,0,5060,west_welmwood,5060 -1,1,426,good,8,1874,west_welmwood,1874 -3,2,1235,poor,44,4671,northwest,4446.792 -2,1,643,great,3,3670,east_elmwood,3670 -0,1,535,poor,26,1692,northwest,1671.696 -2,1,581,poor,29,2913,northwest,2860.566 -0,1,111,poor,62,849,northwest,777.684 -1,1,469,great,6,2432,east_elmwood,2432 -3,2,880,great,11,5103,east_elmwood,5103 -1,1,487,good,8,1909,west_welmwood,1909 -1,1,492,good,14,1834,west_welmwood,1834 -2,1,846,good,4,3394,west_welmwood,3394 -0,1,121,good,1,1522,west_welmwood,1522 -3,2,984,poor,25,4561,northwest,4515.39 -3,2,964,good,8,4705,west_welmwood,4705 -3,2,998,good,3,4818,west_welmwood,4818 -0,1,282,great,3,2119,east_elmwood,2119 -0,1,498,good,12,1811,west_welmwood,1811 -0,1,157,good,9,1511,west_welmwood,1511 -1,1,495,good,4,1969,west_welmwood,1969 -1,1,522,good,4,2029,west_welmwood,2029 -3,2,1036,poor,42,4496,northwest,4298.176 -0,1,441,good,5,1830,west_welmwood,1830 -1,1,361,good,1,1870,west_welmwood,1870 -1,1,452,poor,50,1426,northwest,1340.44 -0,1,441,great,0,2373,east_elmwood,2373 -1,1,396,good,0,1918,west_welmwood,1918 -0,1,220,good,12,1528,west_welmwood,1528 -3,2,894,good,14,4628,west_welmwood,4628 -0,1,111,good,7,1435,west_welmwood,1435 -2,1,881,great,7,3872,east_elmwood,3872 -1,1,613,great,13,2478,east_elmwood,2478 -1,1,380,poor,58,1296,northwest,1197.504 -3,2,985,good,5,4829,west_welmwood,4829 -2,1,784,great,14,3738,east_elmwood,3738 -1,1,431,poor,19,1787,northwest,1787 -2,1,564,great,3,3664,east_elmwood,3664 -1,1,445,poor,64,1296,northwest,1181.952 -0,1,236,good,3,1631,west_welmwood,1631 -2,1,893,good,6,3407,west_welmwood,3407 -1,1,355,good,3,1824,west_welmwood,1824 -1,1,600,poor,42,1655,northwest,1582.18 -3,2,929,great,7,5162,east_elmwood,5162 -2,1,881,poor,30,3146,northwest,3083.08 -3,2,801,poor,22,4462,northwest,4444.152 -1,1,732,good,9,2132,west_welmwood,2132 -1,1,702,poor,23,2003,northwest,1990.982 -3,2,1033,poor,53,4355,northwest,4067.57 -2,1,700,great,9,3691,east_elmwood,3691 -3,2,875,great,5,5145,east_elmwood,5145 -1,1,330,good,3,1798,west_welmwood,1798 -0,1,530,good,0,1961,west_welmwood,1961 -0,1,364,great,8,2215,east_elmwood,2215 -1,1,359,poor,42,1481,northwest,1415.836 -2,1,564,great,2,3645,east_elmwood,3645 -0,1,122,great,13,1897,east_elmwood,1897 -1,1,590,good,0,2125,west_welmwood,2125 -0,1,224,poor,42,1246,northwest,1191.176 -1,1,515,good,12,1869,west_welmwood,1869 -3,2,852,poor,20,4488,northwest,4488 -0,1,515,poor,57,1374,northwest,1272.324 -0,1,394,poor,45,1335,northwest,1268.25 -2,1,881,good,6,3375,west_welmwood,3375 -0,1,221,poor,56,1022,northwest,948.416 -0,1,462,great,5,2299,east_elmwood,2299 -0,1,406,poor,24,1535,northwest,1522.72 -1,1,595,great,4,2540,east_elmwood,2540 -1,1,474,good,11,1855,west_welmwood,1855 -3,2,1249,good,14,4931,west_welmwood,4931 -3,2,1223,good,10,5008,west_welmwood,5008 -3,2,834,good,0,4704,west_welmwood,4704 -0,1,288,poor,38,1288,northwest,1241.632 -3,2,1035,poor,44,4451,northwest,4237.352 -2,1,567,good,2,3183,west_welmwood,3183 -2,1,884,good,4,3430,west_welmwood,3430 -2,1,888,great,4,3900,east_elmwood,3900 -0,1,227,great,9,2064,east_elmwood,2064 -0,1,204,poor,39,1223,northwest,1176.526 -3,2,897,good,13,4660,west_welmwood,4660 -1,1,586,good,5,2056,west_welmwood,2056 -0,1,532,great,10,2315,east_elmwood,2315 -1,1,377,good,5,1839,west_welmwood,1839 -0,1,534,great,5,2429,east_elmwood,2429 -3,2,1201,poor,32,4767,northwest,4652.592 -0,1,163,poor,51,1003,northwest,940.814 -2,1,844,great,4,3905,east_elmwood,3905 -3,2,1024,poor,37,4487,northwest,4334.442 -0,1,196,poor,46,1142,northwest,1082.616 -2,1,661,poor,54,2732,northwest,2546.224 -0,1,317,good,10,1616,west_welmwood,1616 -0,1,162,great,13,1930,east_elmwood,1930 -1,1,721,great,10,2596,east_elmwood,2596 -1,1,472,great,10,2340,east_elmwood,2340 -2,1,650,poor,62,2617,northwest,2397.172 -2,1,535,good,9,3017,west_welmwood,3017 -1,1,311,good,8,1764,west_welmwood,1764 -0,1,371,good,7,1720,west_welmwood,1720 -1,1,524,good,11,1907,west_welmwood,1907 -2,1,580,good,5,3176,west_welmwood,3176 -1,1,681,poor,61,1583,northwest,1453.194 -2,1,943,good,14,3374,west_welmwood,3374 -1,1,309,poor,53,1290,northwest,1204.86 -3,2,1233,good,9,4960,west_welmwood,4960 -0,1,198,poor,49,1133,northwest,1067.286 -2,1,897,great,9,3919,east_elmwood,3919 -3,2,1129,poor,15,4828,northwest,4828 -3,2,1014,great,11,5263,east_elmwood,5263 -3,2,1119,good,9,4864,west_welmwood,4864 -3,2,962,poor,59,4199,northwest,3871.478 -1,1,317,poor,17,1655,northwest,1655 -0,1,165,poor,55,1010,northwest,939.3 -1,1,738,great,7,2648,east_elmwood,2648 -3,2,977,great,9,5281,east_elmwood,5281 -1,1,722,great,7,2686,east_elmwood,2686 -2,1,706,good,7,3187,west_welmwood,3187 -1,1,424,good,9,1811,west_welmwood,1811 -1,1,669,great,6,2594,east_elmwood,2594 -1,1,327,great,9,2252,east_elmwood,2252 -1,1,435,good,3,1915,west_welmwood,1915 -2,1,706,poor,31,2990,northwest,2924.22 -3,2,1200,good,10,4920,west_welmwood,4920 -2,1,622,great,0,3703,east_elmwood,3703 -1,1,338,poor,42,1463,northwest,1398.628 -2,1,724,poor,60,2725,northwest,2507 -0,1,255,good,14,1526,west_welmwood,1526 -1,1,501,poor,33,1718,northwest,1673.332 -2,1,643,great,7,3703,east_elmwood,3703 -3,2,1054,poor,19,4696,northwest,4696 -2,1,817,good,8,3296,west_welmwood,3296 -1,1,301,poor,36,1402,northwest,1357.136 -0,1,261,good,13,1577,west_welmwood,1577 -2,1,925,good,12,3447,west_welmwood,3447 -3,2,939,poor,52,4310,northwest,4034.16 -3,2,1203,poor,22,4848,northwest,4828.608 -0,1,439,great,12,2214,east_elmwood,2214 -1,1,344,poor,28,1603,northwest,1577.352 -1,1,404,poor,28,1602,northwest,1576.368 -2,1,723,good,6,3240,west_welmwood,3240 -1,1,494,great,1,2526,east_elmwood,2526 -2,1,870,great,10,3907,east_elmwood,3907 -1,1,562,good,1,2024,west_welmwood,2024 -1,1,335,poor,46,1347,northwest,1276.956 -3,2,897,good,10,4618,west_welmwood,4618 -2,1,512,good,4,3089,west_welmwood,3089 -1,1,743,good,13,2128,west_welmwood,2128 -3,2,1197,good,8,4984,west_welmwood,4984 -3,2,1221,great,4,5521,east_elmwood,5521 -0,1,327,great,9,2158,east_elmwood,2158 -0,1,199,good,8,1479,west_welmwood,1479 -3,2,942,good,13,4663,west_welmwood,4663 -2,1,616,good,4,3150,west_welmwood,3150 -0,1,380,good,14,1598,west_welmwood,1598 -2,1,707,good,12,3225,west_welmwood,3225 -0,1,498,poor,57,1355,northwest,1254.73 -2,1,566,poor,37,2755,northwest,2661.33 -3,2,1173,good,12,4855,west_welmwood,4855 -0,1,312,poor,28,1385,northwest,1362.84 -0,1,433,great,7,2220,east_elmwood,2220 -3,2,1243,good,0,5140,west_welmwood,5140 -2,1,946,good,12,3467,west_welmwood,3467 -2,1,666,good,1,3245,west_welmwood,3245 -2,1,689,poor,44,2879,northwest,2740.808 -2,1,549,good,11,2999,west_welmwood,2999 -0,1,514,poor,47,1399,northwest,1323.454 -3,2,932,poor,47,4356,northwest,4120.776 -0,1,325,poor,45,1268,northwest,1204.6 -3,2,1245,poor,30,4820,northwest,4723.6 -3,2,817,poor,37,4341,northwest,4193.406 -0,1,347,poor,27,1441,northwest,1420.826 -2,1,701,great,0,3846,east_elmwood,3846 -1,1,534,good,8,2001,west_welmwood,2001 -0,1,543,great,7,2385,east_elmwood,2385 -2,1,893,poor,59,2858,northwest,2635.076 -1,1,402,good,12,1809,west_welmwood,1809 -3,2,1110,great,2,5441,east_elmwood,5441 -0,1,295,poor,17,1566,northwest,1566 -0,1,277,good,9,1603,west_welmwood,1603 -1,1,552,great,4,2499,east_elmwood,2499 -0,1,110,great,0,2000,east_elmwood,2000 -1,1,347,poor,20,1624,northwest,1624 -3,2,1182,great,6,5497,east_elmwood,5497 -0,1,297,good,3,1657,west_welmwood,1657 -2,1,619,good,10,3152,west_welmwood,3152 -2,1,596,good,12,3080,west_welmwood,3080 -0,1,303,poor,35,1308,northwest,1268.76 -0,1,316,good,8,1681,west_welmwood,1681 -1,1,482,good,5,1888,west_welmwood,1888 -2,1,846,poor,16,3277,northwest,3277 -3,2,831,good,14,4539,west_welmwood,4539 -0,1,251,great,10,2056,east_elmwood,2056 -3,2,1154,poor,53,4515,northwest,4217.01 -2,1,564,good,7,3141,west_welmwood,3141 -3,2,1171,good,10,4928,west_welmwood,4928 -0,1,232,poor,27,1367,northwest,1347.862 -2,1,737,poor,56,2739,northwest,2541.792 -2,1,511,poor,47,2682,northwest,2537.172 -1,1,337,good,0,1855,west_welmwood,1855 -1,1,355,poor,22,1619,northwest,1612.524 -0,1,290,poor,48,1257,northwest,1186.608 -3,2,977,good,1,4809,west_welmwood,4809 -2,1,568,great,13,3566,east_elmwood,3566 -2,1,633,poor,57,2693,northwest,2493.718 -0,1,341,poor,39,1382,northwest,1329.484 -2,1,818,good,4,3394,west_welmwood,3394 -0,1,191,good,0,1578,west_welmwood,1578 -1,1,649,poor,27,1829,northwest,1803.394 -0,1,237,good,2,1613,west_welmwood,1613 -3,2,1028,good,8,4790,west_welmwood,4790 -1,1,459,good,12,1827,west_welmwood,1827 -2,1,786,good,0,3413,west_welmwood,3413 -0,1,454,poor,63,1200,northwest,1096.8 -1,1,539,poor,55,1464,northwest,1361.52 -0,1,220,poor,42,1247,northwest,1192.132 -1,1,747,poor,49,1803,northwest,1698.426 -2,1,542,poor,24,2878,northwest,2854.976 -3,2,913,good,11,4640,west_welmwood,4640 -2,1,949,good,0,3548,west_welmwood,3548 -0,1,212,poor,57,1028,northwest,951.928 -2,1,793,good,3,3378,west_welmwood,3378 -0,1,382,poor,29,1477,northwest,1450.414 -0,1,515,poor,52,1424,northwest,1332.864 -3,2,966,great,6,5240,east_elmwood,5240 -3,2,1034,poor,36,4514,northwest,4369.552 -3,2,1192,good,0,5042,west_welmwood,5042 -2,1,787,great,3,3843,east_elmwood,3843 -1,1,326,great,3,2268,east_elmwood,2268 -2,1,760,good,2,3291,west_welmwood,3291 -3,2,996,great,7,5321,east_elmwood,5321 -0,1,148,poor,58,953,northwest,880.572 -1,1,662,good,5,2125,west_welmwood,2125 -3,2,1038,poor,62,4311,northwest,3948.876 -1,1,690,great,5,2667,east_elmwood,2667 -3,2,1029,good,14,4699,west_welmwood,4699 -2,1,847,poor,46,2988,northwest,2832.624 -3,2,922,good,12,4642,west_welmwood,4642 -2,1,606,good,2,3183,west_welmwood,3183 -1,1,385,poor,36,1543,northwest,1493.624 -1,1,546,good,6,1938,west_welmwood,1938 -0,1,472,poor,26,1578,northwest,1559.064 -1,1,419,good,1,1911,west_welmwood,1911 -2,1,550,poor,37,2793,northwest,2698.038 -1,1,617,good,0,2077,west_welmwood,2077 -1,1,495,good,6,1920,west_welmwood,1920 -2,1,686,good,8,3186,west_welmwood,3186 -2,1,930,good,6,3484,west_welmwood,3484 -1,1,420,good,1,1933,west_welmwood,1933 -1,1,494,poor,24,1755,northwest,1740.96 -3,2,1012,good,2,4807,west_welmwood,4807 -3,2,835,poor,43,4232,northwest,4037.328 -2,1,539,great,2,3594,east_elmwood,3594 -0,1,153,great,13,1937,east_elmwood,1937 -2,1,776,great,14,3734,east_elmwood,3734 -1,1,563,good,13,1975,west_welmwood,1975 -3,2,1144,poor,51,4445,northwest,4169.41 -1,1,549,great,8,2478,east_elmwood,2478 -3,2,1015,good,9,4782,west_welmwood,4782 -1,1,446,poor,50,1465,northwest,1377.1 -0,1,531,great,0,2408,east_elmwood,2408 -2,1,538,poor,64,2513,northwest,2291.856 -0,1,279,good,14,1542,west_welmwood,1542 -2,1,860,great,2,3923,east_elmwood,3923 -3,2,1153,poor,31,4693,northwest,4589.754 -0,1,288,poor,36,1323,northwest,1280.664 -0,1,366,great,11,2190,east_elmwood,2190 -2,1,539,good,9,3029,west_welmwood,3029 -1,1,482,poor,61,1327,northwest,1218.186 -0,1,148,good,13,1458,west_welmwood,1458 -0,1,433,good,12,1703,west_welmwood,1703 -0,1,147,good,8,1472,west_welmwood,1472 -3,2,825,poor,30,4326,northwest,4239.48 -3,2,824,good,3,4621,west_welmwood,4621 -1,1,418,great,6,2321,east_elmwood,2321 -0,1,512,poor,55,1385,northwest,1288.05 -0,1,456,poor,42,1470,northwest,1405.32 -1,1,582,good,8,2050,west_welmwood,2050 -0,1,107,good,10,1438,west_welmwood,1438 -1,1,628,great,3,2559,east_elmwood,2559 -2,1,827,poor,41,3053,northwest,2924.774 -0,1,267,poor,30,1378,northwest,1350.44 -0,1,363,good,0,1750,west_welmwood,1750 -2,1,801,great,10,3776,east_elmwood,3776 -1,1,444,good,11,1817,west_welmwood,1817 -0,1,273,poor,29,1379,northwest,1354.178 -3,2,1012,great,9,5268,east_elmwood,5268 -2,1,837,good,10,3374,west_welmwood,3374 -0,1,329,good,12,1585,west_welmwood,1585 -3,2,1119,good,6,4899,west_welmwood,4899 -2,1,611,good,14,3048,west_welmwood,3048 -2,1,541,good,9,3014,west_welmwood,3014 -3,2,848,good,9,4594,west_welmwood,4594 -2,1,730,good,12,3164,west_welmwood,3164 -2,1,602,poor,49,2747,northwest,2587.674 -3,2,1164,poor,43,4604,northwest,4392.216 -1,1,632,good,13,2018,west_welmwood,2018 -2,1,797,good,9,3260,west_welmwood,3260 -1,1,377,great,2,2333,east_elmwood,2333 -2,1,780,great,4,3828,east_elmwood,3828 -3,2,1004,great,13,5216,east_elmwood,5216 -2,1,769,poor,22,3101,northwest,3088.596 -1,1,733,good,0,2230,west_welmwood,2230 -1,1,309,poor,62,1209,northwest,1107.444 -3,2,1178,good,11,4967,west_welmwood,4967 -1,1,464,poor,20,1768,northwest,1768 -2,1,835,good,12,3349,west_welmwood,3349 -3,2,1046,great,7,5363,east_elmwood,5363 -1,1,480,good,3,1923,west_welmwood,1923 -0,1,397,great,11,2173,east_elmwood,2173 -3,2,966,good,6,4768,west_welmwood,4768 -1,1,414,poor,37,1531,northwest,1478.946 -3,2,848,great,0,5242,east_elmwood,5242 -1,1,610,poor,38,1760,northwest,1696.64 -1,1,542,great,7,2480,east_elmwood,2480 -0,1,299,good,3,1669,west_welmwood,1669 -1,1,347,poor,53,1311,northwest,1224.474 -2,1,608,good,9,3107,west_welmwood,3107 -0,1,259,good,11,1594,west_welmwood,1594 -0,1,498,poor,39,1517,northwest,1459.354 -0,1,470,great,11,2308,east_elmwood,2308 -3,2,871,great,0,5265,east_elmwood,5265 -0,1,485,poor,48,1416,northwest,1336.704 -2,1,516,great,7,3546,east_elmwood,3546 -2,1,697,great,1,3787,east_elmwood,3787 -1,1,370,poor,58,1319,northwest,1218.756 -0,1,406,good,10,1685,west_welmwood,1685 -2,1,737,great,0,3878,east_elmwood,3878 -2,1,610,poor,18,3020,northwest,3020 -0,1,392,poor,26,1557,northwest,1538.316 -0,1,429,good,3,1801,west_welmwood,1801 -1,1,368,poor,29,1626,northwest,1596.732 -3,2,1111,good,7,4918,west_welmwood,4918 -2,1,612,great,10,3598,east_elmwood,3598 -2,1,860,good,6,3387,west_welmwood,3387 -2,1,939,good,3,3519,west_welmwood,3519 -1,1,676,poor,34,1848,northwest,1796.256 -1,1,513,good,8,1915,west_welmwood,1915 -3,2,945,good,7,4725,west_welmwood,4725 -1,1,400,poor,22,1636,northwest,1629.456 -1,1,704,great,2,2723,east_elmwood,2723 -1,1,721,poor,29,1931,northwest,1896.242 -2,1,554,good,12,3073,west_welmwood,3073 -0,1,439,good,3,1790,west_welmwood,1790 -0,1,523,good,3,1902,west_welmwood,1902 -1,1,477,good,3,1996,west_welmwood,1996 -1,1,698,good,4,2133,west_welmwood,2133 -0,1,463,poor,27,1582,northwest,1559.852 -1,1,321,poor,50,1307,northwest,1228.58 -0,1,170,good,5,1563,west_welmwood,1563 -3,2,1179,poor,44,4609,northwest,4387.768 -3,2,1117,good,8,4919,west_welmwood,4919 -1,1,672,good,11,2088,west_welmwood,2088 -2,1,937,good,6,3506,west_welmwood,3506 -2,1,545,poor,61,2489,northwest,2284.902 -2,1,532,great,9,3565,east_elmwood,3565 -0,1,528,good,6,1860,west_welmwood,1860 -2,1,813,great,3,3857,east_elmwood,3857 -2,1,821,poor,27,3118,northwest,3074.348 -3,2,1061,poor,44,4442,northwest,4228.784 -1,1,678,good,2,2125,west_welmwood,2125 -1,1,452,poor,18,1771,northwest,1771 -0,1,150,good,4,1490,west_welmwood,1490 -1,1,742,poor,52,1673,northwest,1565.928 -1,1,408,poor,17,1779,northwest,1779 -2,1,949,poor,56,2983,northwest,2768.224 -3,2,1229,great,3,5583,east_elmwood,5583 -0,1,287,great,14,2093,east_elmwood,2093 -0,1,169,great,9,1932,east_elmwood,1932 -2,1,623,good,12,3083,west_welmwood,3083 -3,2,1148,good,2,4931,west_welmwood,4931 -1,1,484,good,1,1999,west_welmwood,1999 -0,1,111,good,5,1510,west_welmwood,1510 -2,1,855,good,1,3433,west_welmwood,3433 -0,1,427,great,2,2312,east_elmwood,2312 -1,1,504,great,9,2416,east_elmwood,2416 -2,1,704,good,14,3142,west_welmwood,3142 -1,1,504,great,14,2346,east_elmwood,2346 -3,2,1127,good,10,4896,west_welmwood,4896 -1,1,501,good,6,1924,west_welmwood,1924 -2,1,923,poor,31,3199,northwest,3128.622 -3,2,894,good,2,4699,west_welmwood,4699 -2,1,778,good,6,3336,west_welmwood,3336 -1,1,698,good,7,2155,west_welmwood,2155 -1,1,376,great,4,2339,east_elmwood,2339 -3,2,1188,good,6,4992,west_welmwood,4992 -2,1,905,poor,16,3349,northwest,3349 -0,1,549,poor,47,1481,northwest,1401.026 -2,1,558,good,3,3115,west_welmwood,3115 -1,1,418,good,3,1880,west_welmwood,1880 -3,2,961,poor,43,4349,northwest,4148.946 -3,2,1112,good,5,4913,west_welmwood,4913 -0,1,467,good,2,1879,west_welmwood,1879 -3,2,1111,good,0,4999,west_welmwood,4999 -1,1,590,good,10,1975,west_welmwood,1975 -0,1,518,poor,62,1315,northwest,1204.54 -1,1,416,great,5,2341,east_elmwood,2341 -3,2,1051,poor,51,4382,northwest,4110.316 -2,1,934,good,10,3440,west_welmwood,3440 -0,1,209,good,3,1628,west_welmwood,1628 -2,1,618,great,12,3642,east_elmwood,3642 -1,1,648,great,10,2563,east_elmwood,2563 -1,1,338,good,10,1725,west_welmwood,1725 -3,2,1073,poor,21,4716,northwest,4706.568 -2,1,511,good,14,2926,west_welmwood,2926 -2,1,701,good,13,3156,west_welmwood,3156 -1,1,419,good,14,1753,west_welmwood,1753 -2,1,806,good,2,3364,west_welmwood,3364 -2,1,667,good,3,3278,west_welmwood,3278 -0,1,428,poor,30,1539,northwest,1508.22 -0,1,270,good,3,1664,west_welmwood,1664 -1,1,465,poor,19,1822,northwest,1822 -2,1,724,great,3,3806,east_elmwood,3806 -3,2,1105,poor,55,4384,northwest,4077.12 -2,1,911,great,1,4037,east_elmwood,4037 -0,1,495,poor,33,1574,northwest,1533.076 -2,1,818,great,1,3883,east_elmwood,3883 -0,1,524,good,4,1902,west_welmwood,1902 -3,2,1011,poor,64,4268,northwest,3892.416 -1,1,439,poor,26,1644,northwest,1624.272 -1,1,326,poor,49,1311,northwest,1234.962 -2,1,584,good,5,3141,west_welmwood,3141 -0,1,278,great,1,2140,east_elmwood,2140 -2,1,929,good,8,3455,west_welmwood,3455 -3,2,903,good,6,4700,west_welmwood,4700 -0,1,307,good,4,1658,west_welmwood,1658 -3,2,945,good,1,4779,west_welmwood,4779 -3,2,871,great,12,5094,east_elmwood,5094 -2,1,742,good,8,3288,west_welmwood,3288 -0,1,400,good,7,1734,west_welmwood,1734 -3,2,1158,great,7,5408,east_elmwood,5408 -3,2,862,poor,28,4473,northwest,4401.432 -1,1,513,poor,52,1540,northwest,1441.44 -0,1,248,good,9,1524,west_welmwood,1524 -3,2,1052,great,9,5345,east_elmwood,5345 -0,1,246,poor,60,1003,northwest,922.76 -3,2,1190,poor,55,4468,northwest,4155.24 -1,1,586,poor,61,1515,northwest,1390.77 -3,2,1219,great,7,5472,east_elmwood,5472 -1,1,371,great,13,2192,east_elmwood,2192 -1,1,729,good,14,2120,west_welmwood,2120 -3,2,986,poor,40,4388,northwest,4212.48 -3,2,1026,good,0,4893,west_welmwood,4893 -2,1,640,good,12,3120,west_welmwood,3120 -2,1,841,poor,26,3139,northwest,3101.332 -3,2,897,poor,35,4427,northwest,4294.19 -0,1,446,good,6,1823,west_welmwood,1823 -2,1,883,poor,22,3278,northwest,3264.888 -3,2,801,good,14,4468,west_welmwood,4468 -1,1,693,good,13,2108,west_welmwood,2108 -0,1,221,poor,40,1250,northwest,1200 -3,2,1116,poor,36,4593,northwest,4446.024 -3,2,1164,good,7,4922,west_welmwood,4922 -3,2,941,good,10,4643,west_welmwood,4643 -2,1,685,good,5,3272,west_welmwood,3272 -0,1,205,great,8,2064,east_elmwood,2064 -0,1,519,good,10,1781,west_welmwood,1781 -3,2,1064,good,13,4750,west_welmwood,4750 -0,1,486,poor,63,1273,northwest,1163.522 -3,2,1235,great,3,5545,east_elmwood,5545 -0,1,297,good,7,1648,west_welmwood,1648 -0,1,121,good,13,1402,west_welmwood,1402 -0,1,140,good,11,1385,west_welmwood,1385 -1,1,540,good,8,1974,west_welmwood,1974 -1,1,407,good,6,1848,west_welmwood,1848 -2,1,557,good,2,3185,west_welmwood,3185 -3,2,1002,poor,60,4225,northwest,3887 -1,1,505,good,11,1885,west_welmwood,1885 -2,1,861,great,12,3801,east_elmwood,3801 -1,1,718,good,14,2072,west_welmwood,2072 -0,1,366,great,6,2209,east_elmwood,2209 -2,1,696,great,10,3728,east_elmwood,3728 -3,2,1123,good,9,4870,west_welmwood,4870 -2,1,713,great,11,3667,east_elmwood,3667 -2,1,884,poor,23,3232,northwest,3212.608 -0,1,505,great,9,2293,east_elmwood,2293 -2,1,659,poor,55,2716,northwest,2525.88 -0,1,369,great,10,2212,east_elmwood,2212 -3,2,1220,poor,45,4615,northwest,4384.25 -2,1,522,great,1,3574,east_elmwood,3574 -2,1,671,poor,32,2946,northwest,2875.296 -2,1,762,good,5,3346,west_welmwood,3346 -0,1,192,poor,53,1111,northwest,1037.674 -1,1,553,poor,24,1803,northwest,1788.576 -1,1,618,great,12,2480,east_elmwood,2480 -1,1,394,poor,35,1534,northwest,1487.98 -3,2,828,poor,48,4230,northwest,3993.12 -1,1,705,poor,36,1873,northwest,1813.064 -3,2,987,poor,52,4355,northwest,4076.28 -3,2,854,great,10,5072,east_elmwood,5072 -2,1,930,poor,62,2932,northwest,2685.712 -0,1,283,good,7,1595,west_welmwood,1595 -0,1,241,good,5,1567,west_welmwood,1567 -0,1,315,good,1,1752,west_welmwood,1752 -2,1,687,poor,45,2880,northwest,2736 -3,2,1120,good,13,4833,west_welmwood,4833 -1,1,698,great,4,2654,east_elmwood,2654 -3,2,1243,poor,60,4474,northwest,4116.08 -0,1,180,great,12,1963,east_elmwood,1963 -3,2,1030,great,7,5314,east_elmwood,5314 -2,1,791,poor,39,3002,northwest,2887.924 -1,1,361,poor,16,1714,northwest,1714 -0,1,529,poor,23,1684,northwest,1673.896 -3,2,1091,poor,50,4429,northwest,4163.26 -2,1,888,good,14,3347,west_welmwood,3347 -1,1,479,great,10,2417,east_elmwood,2417 -0,1,122,great,5,1934,east_elmwood,1934 -2,1,929,good,5,3497,west_welmwood,3497 -0,1,439,good,7,1755,west_welmwood,1755 -3,2,1185,good,5,4999,west_welmwood,4999 -3,2,837,poor,37,4283,northwest,4137.378 -3,2,897,good,8,4674,west_welmwood,4674 -1,1,354,good,10,1776,west_welmwood,1776 -3,2,1205,poor,16,4914,northwest,4914 -3,2,1063,good,9,4835,west_welmwood,4835 -3,2,966,poor,20,4632,northwest,4632 -3,2,1192,great,6,5480,east_elmwood,5480 -1,1,693,poor,42,1807,northwest,1727.492 -0,1,214,poor,34,1305,northwest,1268.46 -3,2,1075,great,10,5320,east_elmwood,5320 -1,1,616,great,13,2456,east_elmwood,2456 -3,2,805,good,6,4632,west_welmwood,4632 -1,1,534,good,14,1936,west_welmwood,1936 -0,1,408,good,7,1705,west_welmwood,1705 -2,1,803,good,2,3354,west_welmwood,3354 -1,1,410,great,1,2365,east_elmwood,2365 -0,1,378,poor,21,1536,northwest,1532.928 -2,1,882,great,7,3892,east_elmwood,3892 -1,1,647,good,12,2041,west_welmwood,2041 -1,1,560,good,9,1930,west_welmwood,1930 -0,1,507,poor,30,1630,northwest,1597.4 -3,2,1129,poor,63,4354,northwest,3979.556 -0,1,115,good,9,1427,west_welmwood,1427 -1,1,318,poor,25,1518,northwest,1502.82 -1,1,315,poor,50,1302,northwest,1223.88 -2,1,748,poor,15,3188,northwest,3188 -2,1,685,poor,33,2938,northwest,2861.612 -0,1,154,poor,37,1180,northwest,1139.88 -0,1,515,good,10,1786,west_welmwood,1786 -1,1,355,good,6,1825,west_welmwood,1825 -3,2,1217,poor,19,4867,northwest,4867 -0,1,430,great,7,2292,east_elmwood,2292 -3,2,1227,good,9,4974,west_welmwood,4974 -3,2,1186,poor,63,4418,northwest,4038.052 -1,1,729,poor,41,1861,northwest,1782.838 -2,1,549,good,9,3018,west_welmwood,3018 -1,1,644,great,4,2611,east_elmwood,2611 -3,2,976,great,1,5331,east_elmwood,5331 -2,1,566,good,10,3052,west_welmwood,3052 -0,1,113,poor,26,1238,northwest,1223.144 -0,1,402,great,7,2248,east_elmwood,2248 -3,2,1243,good,2,5070,west_welmwood,5070 -2,1,893,great,5,3923,east_elmwood,3923 -0,1,104,poor,49,1025,northwest,965.55 -2,1,764,great,9,3784,east_elmwood,3784 -2,1,570,poor,64,2544,northwest,2320.128 -0,1,269,good,9,1546,west_welmwood,1546 -1,1,500,poor,17,1865,northwest,1865 -2,1,699,great,10,3685,east_elmwood,3685 -1,1,492,poor,19,1759,northwest,1759 -1,1,494,poor,49,1480,northwest,1394.16 -2,1,628,poor,53,2656,northwest,2480.704 -2,1,871,poor,37,3139,northwest,3032.274 -2,1,897,great,7,3894,east_elmwood,3894 -3,2,1249,good,12,5017,west_welmwood,5017 -1,1,458,good,6,1862,west_welmwood,1862 -2,1,616,great,0,3709,east_elmwood,3709 -2,1,846,good,6,3405,west_welmwood,3405 -1,1,565,good,11,1980,west_welmwood,1980 -3,2,1197,poor,56,4477,northwest,4154.656 -3,2,1177,good,3,4975,west_welmwood,4975 -2,1,776,good,5,3306,west_welmwood,3306 -3,2,819,good,14,4552,west_welmwood,4552 -0,1,344,good,8,1634,west_welmwood,1634 -0,1,435,good,7,1770,west_welmwood,1770 -1,1,677,great,3,2645,east_elmwood,2645 -2,1,702,poor,46,2831,northwest,2683.788 -1,1,483,good,7,1893,west_welmwood,1893 -1,1,571,good,8,2032,west_welmwood,2032 -1,1,368,good,5,1770,west_welmwood,1770 -2,1,559,poor,35,2786,northwest,2702.42 -0,1,510,poor,15,1781,northwest,1781 -3,2,1054,poor,26,4623,northwest,4567.524 -0,1,459,poor,26,1626,northwest,1606.488 -0,1,325,good,9,1598,west_welmwood,1598 -3,2,878,great,7,5200,east_elmwood,5200 -1,1,467,good,3,1960,west_welmwood,1960 -3,2,1072,good,12,4791,west_welmwood,4791 -1,1,474,poor,40,1607,northwest,1542.72 -0,1,486,good,4,1855,west_welmwood,1855 -0,1,459,good,11,1728,west_welmwood,1728 -1,1,344,great,6,2267,east_elmwood,2267 -2,1,861,poor,43,3040,northwest,2900.16 -3,2,1239,poor,24,4816,northwest,4777.472 -0,1,361,poor,33,1427,northwest,1389.898 -3,2,913,good,1,4794,west_welmwood,4794 -0,1,220,good,5,1619,west_welmwood,1619 -2,1,574,poor,47,2698,northwest,2552.308 -1,1,309,good,2,1832,west_welmwood,1832 -3,2,998,good,12,4758,west_welmwood,4758 -0,1,525,poor,44,1460,northwest,1389.92 -1,1,384,good,7,1776,west_welmwood,1776 -0,1,175,great,13,1952,east_elmwood,1952 -3,2,872,poor,53,4207,northwest,3929.338 -0,1,419,great,7,2209,east_elmwood,2209 -3,2,862,great,13,5060,east_elmwood,5060 -1,1,671,poor,40,1776,northwest,1704.96 -0,1,243,great,11,2075,east_elmwood,2075 -1,1,576,good,12,1992,west_welmwood,1992 -1,1,419,good,13,1777,west_welmwood,1777 -2,1,827,good,10,3303,west_welmwood,3303 -3,2,994,good,1,4871,west_welmwood,4871 -0,1,355,great,14,2076,east_elmwood,2076 -3,2,1024,good,9,4791,west_welmwood,4791 -1,1,340,great,6,2288,east_elmwood,2288 -1,1,433,good,9,1827,west_welmwood,1827 -0,1,473,poor,55,1351,northwest,1256.43 -1,1,579,great,1,2610,east_elmwood,2610 -3,2,1201,good,3,5067,west_welmwood,5067 -1,1,412,poor,64,1248,northwest,1138.176 -1,1,306,great,13,2225,east_elmwood,2225 -3,2,1218,good,0,5106,west_welmwood,5106 -0,1,386,good,2,1746,west_welmwood,1746 -2,1,634,good,12,3135,west_welmwood,3135 -1,1,505,great,10,2402,east_elmwood,2402 -2,1,847,poor,16,3301,northwest,3301 -0,1,126,good,6,1438,west_welmwood,1438 -3,2,1088,good,7,4873,west_welmwood,4873 -3,2,1171,good,13,4918,west_welmwood,4918 -3,2,1182,good,5,4972,west_welmwood,4972 -3,2,1076,poor,38,4529,northwest,4365.956 -2,1,607,poor,52,2714,northwest,2540.304 -1,1,576,poor,28,1804,northwest,1775.136 -3,2,1102,poor,17,4823,northwest,4823 -3,2,1128,good,0,4987,west_welmwood,4987 -1,1,629,good,6,2080,west_welmwood,2080 -0,1,449,poor,20,1602,northwest,1602 -2,1,744,good,3,3345,west_welmwood,3345 -3,2,1172,good,7,4932,west_welmwood,4932 -0,1,315,great,14,2072,east_elmwood,2072 -0,1,154,great,6,2003,east_elmwood,2003 -2,1,631,good,2,3185,west_welmwood,3185 -2,1,503,good,9,2963,west_welmwood,2963 -0,1,333,good,5,1724,west_welmwood,1724 -3,2,803,good,7,4602,west_welmwood,4602 -3,2,969,good,6,4761,west_welmwood,4761 -2,1,624,good,6,3195,west_welmwood,3195 -3,2,1138,good,1,4989,west_welmwood,4989 -2,1,633,good,8,3167,west_welmwood,3167 -3,2,1170,good,11,4900,west_welmwood,4900 -3,2,1107,good,6,4866,west_welmwood,4866 -0,1,147,great,10,1927,east_elmwood,1927 -2,1,788,poor,38,3017,northwest,2908.388 -3,2,1239,good,13,4991,west_welmwood,4991 -1,1,383,great,7,2356,east_elmwood,2356 -0,1,205,good,0,1587,west_welmwood,1587 -0,1,315,good,5,1654,west_welmwood,1654 -2,1,659,great,10,3684,east_elmwood,3684 -1,1,361,poor,53,1348,northwest,1259.032 -1,1,382,poor,43,1469,northwest,1401.426 -2,1,569,poor,37,2830,northwest,2733.78 -1,1,351,great,9,2283,east_elmwood,2283 -0,1,110,poor,55,961,northwest,893.73 -3,2,1160,great,8,5455,east_elmwood,5455 -1,1,376,good,6,1861,west_welmwood,1861 -0,1,119,poor,62,871,northwest,797.836 -0,1,323,great,13,2098,east_elmwood,2098 -0,1,262,good,12,1584,west_welmwood,1584 -0,1,119,poor,15,1373,northwest,1373 -2,1,899,good,12,3397,west_welmwood,3397 -3,2,971,good,5,4819,west_welmwood,4819 -2,1,938,good,12,3393,west_welmwood,3393 -2,1,655,good,3,3246,west_welmwood,3246 -0,1,449,good,9,1746,west_welmwood,1746 -1,1,613,poor,32,1777,northwest,1734.352 -0,1,251,great,2,2139,east_elmwood,2139 -1,1,374,good,11,1775,west_welmwood,1775 -3,2,1081,poor,34,4581,northwest,4452.732 -2,1,514,poor,25,2884,northwest,2855.16 -0,1,450,good,6,1773,west_welmwood,1773 -0,1,519,good,6,1882,west_welmwood,1882 -3,2,831,great,3,5122,east_elmwood,5122 -2,1,568,poor,40,2724,northwest,2615.04 -3,2,1194,good,7,4960,west_welmwood,4960 -2,1,620,poor,44,2758,northwest,2625.616 -3,2,990,great,7,5286,east_elmwood,5286 -2,1,513,great,14,3453,east_elmwood,3453 -0,1,196,good,11,1462,west_welmwood,1462 -2,1,594,good,9,3139,west_welmwood,3139 -0,1,167,poor,55,967,northwest,899.31 -3,2,1098,good,6,4855,west_welmwood,4855 -3,2,1072,good,11,4819,west_welmwood,4819 -1,1,397,good,13,1743,west_welmwood,1743 -1,1,383,good,13,1751,west_welmwood,1751 -2,1,546,poor,60,2515,northwest,2313.8 -3,2,1200,good,0,5080,west_welmwood,5080 -2,1,815,poor,43,2999,northwest,2861.046 -1,1,401,good,9,1768,west_welmwood,1768 -2,1,565,good,2,3137,west_welmwood,3137 -1,1,362,good,1,1883,west_welmwood,1883 -3,2,1156,good,5,4997,west_welmwood,4997 -1,1,502,poor,51,1522,northwest,1427.636 -1,1,487,poor,30,1648,northwest,1615.04 -3,2,1179,good,7,4937,west_welmwood,4937 -0,1,412,poor,37,1438,northwest,1389.108 -1,1,455,good,3,1891,west_welmwood,1891 -1,1,677,good,9,2040,west_welmwood,2040 -2,1,633,good,14,3098,west_welmwood,3098 -3,2,949,good,9,4699,west_welmwood,4699 -3,2,1125,great,2,5435,east_elmwood,5435 -1,1,581,poor,40,1711,northwest,1642.56 -2,1,900,poor,56,2957,northwest,2744.096 -3,2,980,great,3,5251,east_elmwood,5251 -0,1,549,good,0,1940,west_welmwood,1940 -1,1,615,poor,21,1870,northwest,1866.26 -0,1,473,good,14,1770,west_welmwood,1770 -1,1,523,good,7,1941,west_welmwood,1941 -2,1,745,poor,26,3105,northwest,3067.74 -1,1,566,good,7,1980,west_welmwood,1980 -1,1,544,good,7,1998,west_welmwood,1998 -2,1,903,poor,63,2855,northwest,2609.47 -2,1,600,great,6,3601,east_elmwood,3601 -1,1,465,good,0,1994,west_welmwood,1994 -2,1,675,poor,27,3009,northwest,2966.874 -3,2,893,great,7,5126,east_elmwood,5126 -3,2,1111,great,1,5487,east_elmwood,5487 -3,2,1144,poor,20,4783,northwest,4783 -2,1,508,poor,24,2863,northwest,2840.096 -1,1,551,great,12,2456,east_elmwood,2456 -2,1,676,poor,29,2943,northwest,2890.026 -1,1,699,good,7,2090,west_welmwood,2090 -1,1,429,good,7,1843,west_welmwood,1843 -1,1,590,good,4,2030,west_welmwood,2030 -0,1,128,good,4,1496,west_welmwood,1496 -3,2,1150,poor,57,4427,northwest,4099.402 -1,1,392,good,12,1758,west_welmwood,1758 -2,1,806,good,8,3286,west_welmwood,3286 -2,1,938,good,10,3427,west_welmwood,3427 -2,1,552,good,5,3072,west_welmwood,3072 -1,1,316,great,8,2283,east_elmwood,2283 -0,1,319,great,12,2071,east_elmwood,2071 -1,1,483,poor,59,1430,northwest,1318.46 -1,1,400,great,0,2413,east_elmwood,2413 -1,1,586,poor,48,1568,northwest,1480.192 -1,1,484,poor,24,1700,northwest,1686.4 -3,2,859,poor,23,4430,northwest,4403.42 -0,1,534,poor,56,1386,northwest,1286.208 -2,1,936,poor,47,3071,northwest,2905.166 -3,2,911,good,7,4665,west_welmwood,4665 -2,1,900,poor,24,3258,northwest,3231.936 -3,2,933,good,10,4719,west_welmwood,4719 -1,1,464,great,5,2366,east_elmwood,2366 -2,1,773,poor,44,2961,northwest,2818.872 -0,1,203,poor,26,1295,northwest,1279.46 -1,1,321,poor,18,1605,northwest,1605 -1,1,539,good,4,1976,west_welmwood,1976 -1,1,655,poor,45,1723,northwest,1636.85 -3,2,1185,good,4,4991,west_welmwood,4991 -1,1,611,poor,25,1904,northwest,1884.96 -0,1,509,good,11,1833,west_welmwood,1833 -2,1,666,good,8,3165,west_welmwood,3165 -2,1,539,poor,47,2621,northwest,2479.466 -0,1,442,poor,30,1546,northwest,1515.08 -1,1,690,poor,34,1809,northwest,1758.348 -3,2,1062,good,14,4744,west_welmwood,4744 -1,1,428,good,8,1880,west_welmwood,1880 -3,2,814,poor,60,4057,northwest,3732.44 -2,1,742,poor,38,2989,northwest,2881.396 -1,1,618,poor,62,1495,northwest,1369.42 -1,1,384,great,10,2250,east_elmwood,2250 -2,1,506,great,13,3521,east_elmwood,3521 -0,1,286,poor,35,1344,northwest,1303.68 -0,1,152,great,4,1973,east_elmwood,1973 -0,1,131,good,7,1412,west_welmwood,1412 -3,2,1079,good,9,4866,west_welmwood,4866 -2,1,904,poor,33,3141,northwest,3059.334 -2,1,804,good,6,3327,west_welmwood,3327 -0,1,384,good,7,1736,west_welmwood,1736 -0,1,264,poor,27,1344,northwest,1325.184 -0,1,542,poor,17,1816,northwest,1816 -3,2,1236,poor,55,4549,northwest,4230.57 -0,1,487,good,10,1820,west_welmwood,1820 -0,1,286,poor,55,1091,northwest,1014.63 -0,1,351,good,12,1606,west_welmwood,1606 -0,1,534,great,8,2345,east_elmwood,2345 -0,1,461,good,6,1780,west_welmwood,1780 -2,1,768,good,6,3269,west_welmwood,3269 -2,1,949,great,5,3998,east_elmwood,3998 -1,1,638,poor,31,1802,northwest,1762.356 -0,1,135,good,10,1466,west_welmwood,1466 -1,1,724,poor,44,1760,northwest,1675.52 -0,1,131,good,8,1439,west_welmwood,1439 -3,2,1241,great,2,5615,east_elmwood,5615 -0,1,524,great,5,2348,east_elmwood,2348 -1,1,494,good,10,1875,west_welmwood,1875 -1,1,592,poor,33,1739,northwest,1693.786 -2,1,759,poor,61,2720,northwest,2496.96 -0,1,176,good,3,1572,west_welmwood,1572 -1,1,435,poor,16,1814,northwest,1814 -3,2,966,poor,28,4583,northwest,4509.672 -0,1,466,poor,31,1529,northwest,1495.362 -1,1,507,good,8,1932,west_welmwood,1932 -3,2,1012,poor,35,4527,northwest,4391.19 -0,1,533,good,6,1849,west_welmwood,1849 -2,1,837,great,13,3834,east_elmwood,3834 -1,1,509,good,11,1916,west_welmwood,1916 -0,1,165,good,5,1562,west_welmwood,1562 -3,2,1055,great,0,5400,east_elmwood,5400 -0,1,267,good,2,1634,west_welmwood,1634 -2,1,545,good,9,3067,west_welmwood,3067 -3,2,1085,good,1,4931,west_welmwood,4931 -0,1,273,poor,30,1400,northwest,1372 -2,1,692,poor,31,2948,northwest,2883.144 -2,1,527,great,11,3468,east_elmwood,3468 -3,2,1144,good,3,4994,west_welmwood,4994 -0,1,288,great,0,2151,east_elmwood,2151 -0,1,313,poor,45,1239,northwest,1177.05 -0,1,124,good,12,1406,west_welmwood,1406 -1,1,332,poor,36,1496,northwest,1448.128 -2,1,724,good,14,3220,west_welmwood,3220 -1,1,429,great,0,2445,east_elmwood,2445 -2,1,752,good,1,3361,west_welmwood,3361 -3,2,825,great,6,5124,east_elmwood,5124 -3,2,1190,good,1,5008,west_welmwood,5008 -2,1,579,poor,58,2575,northwest,2379.3 -3,2,1069,poor,49,4472,northwest,4212.624 -0,1,345,poor,45,1309,northwest,1243.55 -0,1,293,great,8,2124,east_elmwood,2124 -0,1,134,good,4,1490,west_welmwood,1490 -1,1,477,poor,31,1643,northwest,1606.854 -0,1,211,poor,35,1281,northwest,1242.57 -3,2,1106,good,3,4951,west_welmwood,4951 -1,1,429,great,3,2442,east_elmwood,2442 -3,2,1186,poor,45,4551,northwest,4323.45 -0,1,287,good,13,1594,west_welmwood,1594 -2,1,927,great,0,3981,east_elmwood,3981 -3,2,1073,great,4,5379,east_elmwood,5379 -0,1,166,poor,15,1394,northwest,1394 -2,1,598,great,7,3600,east_elmwood,3600 -2,1,908,good,5,3482,west_welmwood,3482 -2,1,788,good,1,3364,west_welmwood,3364 -1,1,330,poor,43,1414,northwest,1348.956 -3,2,1233,good,9,4982,west_welmwood,4982 -1,1,496,good,3,1929,west_welmwood,1929 -3,2,1209,good,6,5039,west_welmwood,5039 -3,2,1224,poor,55,4500,northwest,4185 -1,1,409,good,2,1863,west_welmwood,1863 -2,1,605,good,10,3120,west_welmwood,3120 -2,1,517,poor,17,2916,northwest,2916 -1,1,515,great,0,2558,east_elmwood,2558 -1,1,373,great,4,2363,east_elmwood,2363 -2,1,637,great,8,3702,east_elmwood,3702 -0,1,219,poor,46,1158,northwest,1097.784 -0,1,151,great,0,2053,east_elmwood,2053 -0,1,187,great,3,2058,east_elmwood,2058 -0,1,158,poor,20,1338,northwest,1338 -0,1,146,good,6,1463,west_welmwood,1463 -1,1,493,good,4,1960,west_welmwood,1960 -1,1,348,good,2,1853,west_welmwood,1853 -2,1,603,poor,22,2956,northwest,2944.176 -1,1,546,good,9,1930,west_welmwood,1930 -3,2,1144,good,2,4932,west_welmwood,4932 -0,1,413,good,12,1674,west_welmwood,1674 -0,1,341,great,12,2156,east_elmwood,2156 -2,1,624,great,12,3642,east_elmwood,3642 -1,1,478,great,5,2434,east_elmwood,2434 -1,1,683,poor,64,1559,northwest,1421.808 -1,1,581,poor,34,1782,northwest,1732.104 -3,2,1210,good,1,5081,west_welmwood,5081 -1,1,658,great,3,2633,east_elmwood,2633 -2,1,758,poor,58,2745,northwest,2536.38 -1,1,611,good,2,2109,west_welmwood,2109 -1,1,522,good,10,1878,west_welmwood,1878 -1,1,612,great,11,2516,east_elmwood,2516 -0,1,408,good,2,1783,west_welmwood,1783 -2,1,789,poor,34,3021,northwest,2936.412 -3,2,963,good,7,4710,west_welmwood,4710 -2,1,718,great,1,3856,east_elmwood,3856 -0,1,403,poor,40,1428,northwest,1370.88 -1,1,598,poor,22,1879,northwest,1871.484 -1,1,689,good,1,2183,west_welmwood,2183 -0,1,418,great,1,2314,east_elmwood,2314 -2,1,727,poor,54,2809,northwest,2617.988 -1,1,738,great,8,2665,east_elmwood,2665 -3,2,1130,great,10,5400,east_elmwood,5400 -3,2,1057,good,9,4773,west_welmwood,4773 -1,1,719,great,0,2669,east_elmwood,2669 -1,1,472,good,1,1971,west_welmwood,1971 -1,1,745,good,6,2170,west_welmwood,2170 -1,1,443,good,5,1934,west_welmwood,1934 -3,2,1075,great,14,5236,east_elmwood,5236 -3,2,1144,good,2,5021,west_welmwood,5021 -1,1,339,great,8,2210,east_elmwood,2210 -1,1,575,good,5,1995,west_welmwood,1995 -2,1,542,good,1,3084,west_welmwood,3084 -0,1,439,good,4,1793,west_welmwood,1793 -2,1,877,great,3,3963,east_elmwood,3963 -0,1,457,poor,46,1407,northwest,1333.836 -0,1,388,poor,36,1380,northwest,1335.84 -0,1,484,poor,21,1677,northwest,1673.646 -0,1,500,poor,21,1687,northwest,1683.626 -3,2,942,good,0,4781,west_welmwood,4781 -3,2,1204,poor,64,4437,northwest,4046.544 -3,2,1102,good,13,4820,west_welmwood,4820 -1,1,603,poor,44,1712,northwest,1629.824 -1,1,499,good,6,1979,west_welmwood,1979 -2,1,582,good,3,3200,west_welmwood,3200 -3,2,874,poor,44,4292,northwest,4085.984 -3,2,967,great,9,5269,east_elmwood,5269 -1,1,668,great,4,2619,east_elmwood,2619 -0,1,530,great,7,2369,east_elmwood,2369 -2,1,750,good,11,3190,west_welmwood,3190 -0,1,263,good,4,1653,west_welmwood,1653 -1,1,467,good,2,1985,west_welmwood,1985 -3,2,1039,poor,39,4471,northwest,4301.102 -0,1,187,good,6,1489,west_welmwood,1489 -0,1,209,poor,50,1100,northwest,1034 -0,1,398,poor,59,1201,northwest,1107.322 -3,2,945,poor,63,4131,northwest,3775.734 -2,1,657,poor,31,2920,northwest,2855.76 -1,1,537,good,1,2023,west_welmwood,2023 -0,1,334,great,2,2231,east_elmwood,2231 -2,1,660,great,9,3678,east_elmwood,3678 -1,1,479,good,6,1903,west_welmwood,1903 -3,2,917,poor,64,4102,northwest,3741.024 -2,1,898,good,7,3389,west_welmwood,3389 -2,1,822,great,1,3950,east_elmwood,3950 -0,1,270,poor,47,1156,northwest,1093.576 -2,1,625,poor,64,2613,northwest,2383.056 -0,1,268,poor,45,1248,northwest,1185.6 -0,1,218,poor,46,1131,northwest,1072.188 -0,1,422,poor,45,1402,northwest,1331.9 -2,1,766,poor,61,2790,northwest,2561.22 -0,1,224,good,3,1638,west_welmwood,1638 -2,1,762,good,1,3326,west_welmwood,3326 -2,1,782,good,3,3318,west_welmwood,3318 -0,1,518,poor,36,1537,northwest,1487.816 -1,1,548,good,1,2076,west_welmwood,2076 -1,1,665,poor,44,1750,northwest,1666 -2,1,723,good,3,3340,west_welmwood,3340 -0,1,373,good,14,1599,west_welmwood,1599 -0,1,442,poor,63,1205,northwest,1101.37 -2,1,743,poor,49,2861,northwest,2695.062 -2,1,872,poor,35,3072,northwest,2979.84 -1,1,504,good,5,1988,west_welmwood,1988 -2,1,829,great,10,3837,east_elmwood,3837 -1,1,314,good,13,1656,west_welmwood,1656 -0,1,418,great,13,2186,east_elmwood,2186 -1,1,312,poor,29,1509,northwest,1481.838 -2,1,548,poor,17,3012,northwest,3012 -1,1,524,great,7,2475,east_elmwood,2475 -3,2,1142,good,14,4841,west_welmwood,4841 -2,1,929,great,9,3918,east_elmwood,3918 -1,1,505,great,3,2499,east_elmwood,2499 -2,1,665,poor,16,3097,northwest,3097 -0,1,109,great,13,1873,east_elmwood,1873 -0,1,290,poor,37,1274,northwest,1230.684 -3,2,1213,good,10,5001,west_welmwood,5001 -2,1,728,good,12,3185,west_welmwood,3185 -2,1,933,good,5,3520,west_welmwood,3520 -2,1,895,poor,23,3263,northwest,3243.422 -2,1,547,poor,23,2896,northwest,2878.624 -0,1,549,poor,41,1587,northwest,1520.346 -0,1,217,poor,34,1258,northwest,1222.776 -1,1,492,poor,56,1414,northwest,1312.192 -1,1,421,good,10,1818,west_welmwood,1818 -3,2,1034,poor,15,4684,northwest,4684 -2,1,944,good,10,3477,west_welmwood,3477 -1,1,347,good,12,1681,west_welmwood,1681 -3,2,1208,good,14,4951,west_welmwood,4951 -0,1,449,good,4,1802,west_welmwood,1802 -0,1,464,great,5,2343,east_elmwood,2343 -0,1,205,poor,55,1085,northwest,1009.05 -1,1,435,good,9,1891,west_welmwood,1891 -1,1,682,great,10,2617,east_elmwood,2617 -1,1,317,poor,57,1244,northwest,1151.944 -1,1,730,great,12,2649,east_elmwood,2649 -1,1,668,good,14,2036,west_welmwood,2036 -3,2,1199,good,4,5000,west_welmwood,5000 -0,1,461,great,11,2224,east_elmwood,2224 -2,1,576,poor,43,2727,northwest,2601.558 -3,2,1028,good,3,4845,west_welmwood,4845 -2,1,601,good,5,3154,west_welmwood,3154 -0,1,122,poor,33,1178,northwest,1147.372 -0,1,390,great,5,2241,east_elmwood,2241 -2,1,845,great,7,3891,east_elmwood,3891 -0,1,511,good,1,1881,west_welmwood,1881 -3,2,998,poor,46,4435,northwest,4204.38 -3,2,1042,good,2,4860,west_welmwood,4860 -0,1,195,great,0,2122,east_elmwood,2122 -1,1,305,poor,17,1636,northwest,1636 -2,1,839,great,13,3820,east_elmwood,3820 -3,2,1218,good,1,5045,west_welmwood,5045 -1,1,443,great,6,2401,east_elmwood,2401 -3,2,1203,poor,57,4453,northwest,4123.478 -1,1,429,good,5,1838,west_welmwood,1838 -3,2,1189,good,10,4899,west_welmwood,4899 -1,1,303,good,14,1695,west_welmwood,1695 -0,1,254,poor,64,1061,northwest,967.632 -0,1,208,poor,55,1053,northwest,979.29 -1,1,453,great,8,2397,east_elmwood,2397 -0,1,230,great,14,2012,east_elmwood,2012 -0,1,198,good,9,1470,west_welmwood,1470 -3,2,1132,good,13,4886,west_welmwood,4886 -0,1,314,great,5,2184,east_elmwood,2184 -2,1,882,good,12,3382,west_welmwood,3382 -3,2,1056,good,6,4849,west_welmwood,4849 -3,2,1072,great,7,5385,east_elmwood,5385 -0,1,154,good,1,1536,west_welmwood,1536 -1,1,612,poor,48,1668,northwest,1574.592 -0,1,201,good,7,1539,west_welmwood,1539 -2,1,540,good,10,3068,west_welmwood,3068 -3,2,1088,good,13,4829,west_welmwood,4829 -3,2,804,great,12,5012,east_elmwood,5012 -3,2,904,good,12,4659,west_welmwood,4659 -1,1,600,good,8,1992,west_welmwood,1992 -2,1,614,great,7,3612,east_elmwood,3612 -0,1,197,good,9,1472,west_welmwood,1472 -1,1,544,poor,24,1838,northwest,1823.296 -2,1,551,good,8,3079,west_welmwood,3079 -1,1,560,good,4,2067,west_welmwood,2067 -2,1,742,great,12,3692,east_elmwood,3692 -0,1,145,good,2,1486,west_welmwood,1486 -2,1,830,great,2,3897,east_elmwood,3897 -1,1,404,great,4,2348,east_elmwood,2348 -0,1,525,poor,57,1321,northwest,1223.246 -0,1,142,great,12,1940,east_elmwood,1940 -3,2,1088,good,11,4836,west_welmwood,4836 -0,1,232,poor,32,1280,northwest,1249.28 -2,1,598,poor,41,2780,northwest,2663.24 -0,1,199,great,4,2096,east_elmwood,2096 -2,1,584,great,6,3607,east_elmwood,3607 -3,2,857,good,9,4606,west_welmwood,4606 -1,1,530,great,14,2405,east_elmwood,2405 -2,1,614,poor,54,2714,northwest,2529.448 -0,1,363,good,11,1690,west_welmwood,1690 -2,1,649,poor,60,2663,northwest,2449.96 -2,1,535,good,9,2996,west_welmwood,2996 -3,2,1041,good,12,4744,west_welmwood,4744 -2,1,844,poor,53,2908,northwest,2716.072 -2,1,576,poor,64,2516,northwest,2294.592 -2,1,872,good,5,3402,west_welmwood,3402 -3,2,1172,good,10,4946,west_welmwood,4946 -3,2,874,great,9,5121,east_elmwood,5121 -3,2,1217,poor,34,4729,northwest,4596.588 -3,2,1220,poor,47,4600,northwest,4351.6 -3,2,1123,poor,46,4519,northwest,4284.012 -0,1,276,great,5,2095,east_elmwood,2095 -2,1,688,good,8,3246,west_welmwood,3246 -1,1,577,great,9,2528,east_elmwood,2528 -3,2,1232,great,6,5518,east_elmwood,5518 -3,2,973,poor,54,4298,northwest,4005.736 -1,1,668,poor,36,1815,northwest,1756.92 -1,1,711,good,11,2083,west_welmwood,2083 -0,1,235,good,14,1478,west_welmwood,1478 -3,2,1114,good,14,4836,west_welmwood,4836 -2,1,741,great,2,3832,east_elmwood,3832 -0,1,210,poor,52,1131,northwest,1058.616 -1,1,699,good,0,2162,west_welmwood,2162 -1,1,612,poor,33,1771,northwest,1724.954 -3,2,978,poor,20,4651,northwest,4651 -3,2,1160,poor,28,4737,northwest,4661.208 -0,1,103,good,9,1411,west_welmwood,1411 -1,1,739,great,9,2698,east_elmwood,2698 -1,1,589,poor,18,1948,northwest,1948 -0,1,429,great,8,2214,east_elmwood,2214 -0,1,399,great,5,2253,east_elmwood,2253 -2,1,789,great,1,3920,east_elmwood,3920 -3,2,1132,good,14,4820,west_welmwood,4820 -2,1,515,good,2,3078,west_welmwood,3078 -2,1,936,great,11,3934,east_elmwood,3934 -2,1,669,great,14,3633,east_elmwood,3633 -1,1,523,great,0,2527,east_elmwood,2527 -0,1,192,poor,58,1038,northwest,959.112 -0,1,500,poor,50,1448,northwest,1361.12 -1,1,615,good,2,2101,west_welmwood,2101 -1,1,352,poor,17,1649,northwest,1649 -3,2,863,great,11,5108,east_elmwood,5108 -2,1,827,good,11,3304,west_welmwood,3304 -2,1,799,great,4,3887,east_elmwood,3887 -2,1,613,poor,54,2672,northwest,2490.304 -2,1,524,good,10,3052,west_welmwood,3052 -0,1,307,good,3,1664,west_welmwood,1664 -1,1,343,poor,57,1250,northwest,1157.5 -0,1,474,great,10,2319,east_elmwood,2319 -2,1,803,poor,38,3053,northwest,2943.092 -3,2,1168,poor,28,4688,northwest,4612.992 -2,1,704,good,6,3234,west_welmwood,3234 -3,2,1237,good,2,5104,west_welmwood,5104 -3,2,865,good,10,4637,west_welmwood,4637 -0,1,268,good,0,1676,west_welmwood,1676 -1,1,697,good,2,2146,west_welmwood,2146 -1,1,372,good,12,1775,west_welmwood,1775 -3,2,904,poor,51,4256,northwest,3992.128 -2,1,590,poor,61,2554,northwest,2344.572 -1,1,436,good,2,1873,west_welmwood,1873 -3,2,845,good,12,4561,west_welmwood,4561 -3,2,1059,great,3,5336,east_elmwood,5336 -0,1,424,great,14,2177,east_elmwood,2177 -1,1,735,good,1,2181,west_welmwood,2181 -2,1,698,good,5,3214,west_welmwood,3214 -1,1,543,good,10,1984,west_welmwood,1984 -2,1,811,poor,45,2921,northwest,2774.95 -2,1,623,good,0,3211,west_welmwood,3211 -1,1,338,good,14,1709,west_welmwood,1709 -2,1,810,great,7,3875,east_elmwood,3875 -0,1,466,good,13,1715,west_welmwood,1715 -0,1,530,poor,35,1625,northwest,1576.25 -2,1,684,good,12,3164,west_welmwood,3164 -2,1,780,poor,60,2822,northwest,2596.24 -3,2,1241,good,4,5089,west_welmwood,5089 -3,2,1039,poor,19,4720,northwest,4720 -3,2,866,great,13,5039,east_elmwood,5039 -2,1,606,poor,59,2590,northwest,2387.98 -3,2,848,poor,58,4103,northwest,3791.172 -1,1,463,great,11,2357,east_elmwood,2357 -0,1,355,great,6,2189,east_elmwood,2189 -3,2,875,good,12,4608,west_welmwood,4608 -3,2,1075,good,13,4761,west_welmwood,4761 -3,2,915,good,7,4729,west_welmwood,4729 -1,1,326,poor,19,1617,northwest,1617 -3,2,905,good,1,4709,west_welmwood,4709 -0,1,441,great,8,2275,east_elmwood,2275 -2,1,748,poor,30,3078,northwest,3016.44 -0,1,514,good,3,1835,west_welmwood,1835 -0,1,443,poor,40,1410,northwest,1353.6 -0,1,207,good,10,1524,west_welmwood,1524 -1,1,657,great,9,2579,east_elmwood,2579 -2,1,866,poor,45,2988,northwest,2838.6 -0,1,221,great,12,1992,east_elmwood,1992 -3,2,986,good,1,4785,west_welmwood,4785 -2,1,852,poor,54,2881,northwest,2685.092 -0,1,176,good,8,1514,west_welmwood,1514 -0,1,372,poor,16,1590,northwest,1590 -3,2,936,good,2,4740,west_welmwood,4740 -3,2,1012,good,12,4721,west_welmwood,4721 -0,1,153,poor,28,1260,northwest,1239.84 -1,1,515,great,0,2514,east_elmwood,2514 -1,1,567,poor,47,1549,northwest,1465.354 -1,1,470,great,7,2387,east_elmwood,2387 -2,1,633,good,5,3211,west_welmwood,3211 -0,1,262,good,10,1549,west_welmwood,1549 -0,1,230,great,3,2071,east_elmwood,2071 -2,1,601,poor,29,2891,northwest,2838.962 -2,1,682,good,5,3189,west_welmwood,3189 -1,1,463,great,11,2345,east_elmwood,2345 -3,2,1197,poor,44,4649,northwest,4425.848 -2,1,925,great,1,4037,east_elmwood,4037 -0,1,277,poor,39,1305,northwest,1255.41 -3,2,1033,good,2,4852,west_welmwood,4852 -3,2,1208,poor,59,4443,northwest,4096.446 -0,1,190,good,11,1483,west_welmwood,1483 -0,1,483,good,8,1782,west_welmwood,1782 -3,2,896,great,11,5111,east_elmwood,5111 -3,2,1085,great,3,5399,east_elmwood,5399 -2,1,929,poor,48,3098,northwest,2924.512 -3,2,804,good,11,4496,west_welmwood,4496 -1,1,666,great,2,2652,east_elmwood,2652 -0,1,118,great,5,1953,east_elmwood,1953 -0,1,230,poor,52,1070,northwest,1001.52 -0,1,298,poor,32,1418,northwest,1383.968 -0,1,214,poor,29,1329,northwest,1305.078 -0,1,525,great,1,2463,east_elmwood,2463 -3,2,1241,good,3,5020,west_welmwood,5020 -1,1,446,good,11,1822,west_welmwood,1822 -2,1,761,great,2,3878,east_elmwood,3878 -2,1,615,poor,18,3053,northwest,3053 -0,1,189,good,7,1500,west_welmwood,1500 -0,1,443,good,9,1738,west_welmwood,1738 -0,1,146,poor,64,878,northwest,800.736 -2,1,787,poor,35,2989,northwest,2899.33 -0,1,356,poor,19,1594,northwest,1594 -3,2,1240,good,9,5011,west_welmwood,5011 -2,1,720,poor,28,3088,northwest,3038.592 -0,1,379,poor,34,1427,northwest,1387.044 -3,2,994,good,13,4675,west_welmwood,4675 -3,2,1213,good,12,4952,west_welmwood,4952 -0,1,338,good,3,1736,west_welmwood,1736 -3,2,919,great,9,5192,east_elmwood,5192 -2,1,501,good,9,3054,west_welmwood,3054 -1,1,476,good,3,1912,west_welmwood,1912 -2,1,577,great,6,3602,east_elmwood,3602 -1,1,400,poor,58,1340,northwest,1238.16 -3,2,1104,poor,59,4363,northwest,4022.686 -2,1,879,poor,16,3359,northwest,3359 -1,1,320,good,4,1808,west_welmwood,1808 -3,2,1038,great,1,5356,east_elmwood,5356 -3,2,884,great,8,5168,east_elmwood,5168 -0,1,445,great,6,2307,east_elmwood,2307 -2,1,841,poor,38,3096,northwest,2984.544 -2,1,704,good,12,3137,west_welmwood,3137 -2,1,571,good,11,3035,west_welmwood,3035 -3,2,1145,poor,59,4396,northwest,4053.112 -2,1,787,good,7,3317,west_welmwood,3317 -3,2,862,good,12,4626,west_welmwood,4626 -2,1,789,poor,52,2838,northwest,2656.368 -0,1,465,good,7,1809,west_welmwood,1809 -0,1,390,poor,30,1451,northwest,1421.98 -0,1,374,good,14,1683,west_welmwood,1683 -2,1,518,poor,43,2723,northwest,2597.742 -0,1,328,great,1,2224,east_elmwood,2224 -3,2,1227,great,12,5497,east_elmwood,5497 -3,2,873,good,6,4672,west_welmwood,4672 -3,2,1236,good,9,5010,west_welmwood,5010 -0,1,294,great,7,2162,east_elmwood,2162 -3,2,801,poor,55,4114,northwest,3826.02 -0,1,522,poor,44,1514,northwest,1441.328 -3,2,940,poor,39,4446,northwest,4277.052 -1,1,501,great,0,2485,east_elmwood,2485 -3,2,867,poor,49,4232,northwest,3986.544 -3,2,1156,poor,26,4697,northwest,4640.636 -1,1,669,good,8,2065,west_welmwood,2065 -3,2,853,poor,53,4126,northwest,3853.684 -2,1,786,poor,52,2862,northwest,2678.832 -0,1,306,good,5,1687,west_welmwood,1687 -0,1,525,good,12,1781,west_welmwood,1781 -0,1,400,good,3,1732,west_welmwood,1732 -1,1,329,poor,61,1186,northwest,1088.748 -0,1,474,good,11,1723,west_welmwood,1723 -1,1,698,poor,27,1949,northwest,1921.714 -2,1,914,good,5,3414,west_welmwood,3414 -1,1,712,poor,61,1626,northwest,1492.668 -2,1,541,great,3,3614,east_elmwood,3614 -1,1,397,good,13,1798,west_welmwood,1798 -3,2,1137,good,7,4908,west_welmwood,4908 -3,2,859,poor,56,4118,northwest,3821.504 -1,1,532,good,0,1982,west_welmwood,1982 -2,1,525,poor,37,2731,northwest,2638.146 -3,2,870,poor,61,4098,northwest,3761.964 -2,1,677,poor,31,2978,northwest,2912.484 -2,1,873,good,5,3399,west_welmwood,3399 -3,2,824,good,12,4510,west_welmwood,4510 -0,1,347,poor,36,1365,northwest,1321.32 -3,2,1215,great,0,5580,east_elmwood,5580 -0,1,201,good,1,1548,west_welmwood,1548 -2,1,590,poor,18,3026,northwest,3026 -2,1,540,good,12,2970,west_welmwood,2970 -3,2,960,good,11,4696,west_welmwood,4696 -0,1,417,good,7,1794,west_welmwood,1794 -3,2,962,great,0,5270,east_elmwood,5270 -1,1,513,good,14,1858,west_welmwood,1858 -2,1,621,good,8,3169,west_welmwood,3169 -2,1,612,poor,27,2919,northwest,2878.134 -1,1,522,great,11,2368,east_elmwood,2368 -3,2,1178,good,0,5045,west_welmwood,5045 -3,2,935,great,10,5137,east_elmwood,5137 -1,1,692,great,10,2570,east_elmwood,2570 -0,1,504,poor,28,1650,northwest,1623.6 -3,2,1058,poor,56,4393,northwest,4076.704 -0,1,112,good,14,1336,west_welmwood,1336 -0,1,396,great,0,2256,east_elmwood,2256 -1,1,744,great,3,2692,east_elmwood,2692 -2,1,715,poor,50,2783,northwest,2616.02 -3,2,1077,great,4,5369,east_elmwood,5369 -3,2,883,poor,47,4249,northwest,4019.554 -3,2,1056,good,4,4887,west_welmwood,4887 -0,1,425,good,12,1663,west_welmwood,1663 -1,1,602,good,4,2082,west_welmwood,2082 -1,1,642,great,3,2564,east_elmwood,2564 -1,1,351,poor,51,1365,northwest,1280.37 -1,1,674,poor,39,1758,northwest,1691.196 -3,2,1039,good,7,4858,west_welmwood,4858 -1,1,528,poor,37,1635,northwest,1579.41 -2,1,627,poor,42,2791,northwest,2668.196 -0,1,305,poor,40,1265,northwest,1214.4 -1,1,645,good,0,2097,west_welmwood,2097 -3,2,1074,good,5,4857,west_welmwood,4857 -1,1,536,poor,17,1836,northwest,1836 -0,1,313,good,8,1617,west_welmwood,1617 -1,1,532,great,6,2440,east_elmwood,2440 -3,2,983,good,13,4746,west_welmwood,4746 -3,2,1075,great,5,5398,east_elmwood,5398 -2,1,558,good,1,3175,west_welmwood,3175 -1,1,664,good,6,2145,west_welmwood,2145 -2,1,813,good,2,3399,west_welmwood,3399 -2,1,635,poor,25,3028,northwest,2997.72 -3,2,1029,great,4,5297,east_elmwood,5297 -0,1,137,great,3,2009,east_elmwood,2009 -1,1,372,poor,15,1723,northwest,1723 -0,1,512,poor,58,1371,northwest,1266.804 -3,2,1086,good,7,4880,west_welmwood,4880 -2,1,919,great,4,3951,east_elmwood,3951 -0,1,401,poor,59,1238,northwest,1141.436 -1,1,495,poor,57,1378,northwest,1276.028 -1,1,462,good,10,1910,west_welmwood,1910 -1,1,714,poor,55,1616,northwest,1502.88 -1,1,458,good,6,1879,west_welmwood,1879 -1,1,460,good,2,1922,west_welmwood,1922 -2,1,889,poor,20,3308,northwest,3308 -1,1,658,poor,37,1739,northwest,1679.874 -1,1,696,poor,61,1544,northwest,1417.392 -0,1,357,poor,46,1294,northwest,1226.712 -3,2,1131,poor,18,4839,northwest,4839 -2,1,845,poor,34,3151,northwest,3062.772 -0,1,377,good,12,1686,west_welmwood,1686 -0,1,464,great,12,2259,east_elmwood,2259 -3,2,1131,good,11,4873,west_welmwood,4873 -0,1,297,poor,27,1411,northwest,1391.246 -1,1,578,poor,57,1474,northwest,1364.924 -3,2,1036,good,0,4891,west_welmwood,4891 -3,2,1026,great,13,5220,east_elmwood,5220 -2,1,837,poor,49,2923,northwest,2753.466 -2,1,803,good,5,3384,west_welmwood,3384 -3,2,875,great,5,5213,east_elmwood,5213 -3,2,1000,good,8,4787,west_welmwood,4787 -3,2,842,good,4,4701,west_welmwood,4701 -3,2,1138,poor,21,4819,northwest,4809.362 -1,1,623,great,14,2476,east_elmwood,2476 -2,1,904,poor,25,3289,northwest,3256.11 -2,1,900,great,2,3976,east_elmwood,3976 -0,1,533,poor,48,1480,northwest,1397.12 -0,1,507,great,3,2426,east_elmwood,2426 -2,1,764,great,12,3706,east_elmwood,3706 -2,1,570,good,13,3066,west_welmwood,3066 -3,2,1218,good,12,4926,west_welmwood,4926 -2,1,686,good,4,3226,west_welmwood,3226 -1,1,708,great,5,2655,east_elmwood,2655 -3,2,985,poor,54,4296,northwest,4003.872 -1,1,346,poor,52,1304,northwest,1220.544 -2,1,743,great,0,3847,east_elmwood,3847 -0,1,144,great,0,2067,east_elmwood,2067 -3,2,921,poor,21,4569,northwest,4559.862 -2,1,520,poor,43,2665,northwest,2542.41 -2,1,762,good,14,3181,west_welmwood,3181 -3,2,1180,poor,47,4574,northwest,4327.004 -3,2,1246,poor,15,4923,northwest,4923 -3,2,1127,good,8,4910,west_welmwood,4910 -1,1,588,good,7,2015,west_welmwood,2015 -2,1,906,good,12,3393,west_welmwood,3393 -2,1,738,poor,57,2759,northwest,2554.834 -2,1,557,good,8,3118,west_welmwood,3118 -2,1,644,good,6,3203,west_welmwood,3203 -0,1,517,great,13,2293,east_elmwood,2293 -3,2,869,great,11,5135,east_elmwood,5135 -1,1,624,poor,41,1727,northwest,1654.466 -0,1,126,poor,21,1348,northwest,1345.304 -2,1,840,good,0,3404,west_welmwood,3404 -1,1,428,good,9,1816,west_welmwood,1816 -2,1,689,poor,28,3040,northwest,2991.36 -1,1,470,good,7,1892,west_welmwood,1892 -0,1,238,great,11,2041,east_elmwood,2041 -1,1,461,good,8,1857,west_welmwood,1857 -2,1,548,great,3,3659,east_elmwood,3659 -1,1,570,poor,48,1635,northwest,1543.44 -0,1,515,poor,37,1528,northwest,1476.048 -1,1,411,good,10,1782,west_welmwood,1782 -0,1,246,poor,57,1066,northwest,987.116 -2,1,859,good,0,3418,west_welmwood,3418 -3,2,971,good,10,4705,west_welmwood,4705 -2,1,549,good,4,3158,west_welmwood,3158 -3,2,1091,great,7,5411,east_elmwood,5411 -0,1,191,poor,56,1008,northwest,935.424 -1,1,377,great,8,2262,east_elmwood,2262 -0,1,250,poor,57,1063,northwest,984.338 -3,2,836,good,3,4697,west_welmwood,4697 -2,1,702,great,0,3784,east_elmwood,3784 -1,1,394,great,1,2337,east_elmwood,2337 -2,1,719,good,6,3223,west_welmwood,3223 -2,1,682,great,12,3623,east_elmwood,3623 -1,1,318,poor,42,1367,northwest,1306.852 -0,1,332,good,3,1739,west_welmwood,1739 -1,1,498,good,11,1904,west_welmwood,1904 -2,1,672,great,4,3703,east_elmwood,3703 -1,1,575,poor,62,1504,northwest,1377.664 -3,2,1036,good,11,4768,west_welmwood,4768 -2,1,812,good,4,3356,west_welmwood,3356 -1,1,706,poor,40,1770,northwest,1699.2 -2,1,584,good,5,3107,west_welmwood,3107 -1,1,555,poor,59,1419,northwest,1308.318 -1,1,653,good,4,2103,west_welmwood,2103 -1,1,604,good,2,2088,west_welmwood,2088 -2,1,798,poor,26,3098,northwest,3060.824 -3,2,1163,good,2,4995,west_welmwood,4995 -2,1,646,poor,27,2982,northwest,2940.252 -2,1,845,poor,46,3024,northwest,2866.752 -1,1,387,poor,47,1392,northwest,1316.832 -0,1,112,poor,60,948,northwest,872.16 -3,2,1079,poor,46,4445,northwest,4213.86 -3,2,1234,great,14,5394,east_elmwood,5394 -1,1,533,good,3,1992,west_welmwood,1992 -2,1,752,good,10,3204,west_welmwood,3204 -1,1,307,poor,55,1237,northwest,1150.41 -0,1,167,good,1,1517,west_welmwood,1517 -2,1,905,poor,46,3034,northwest,2876.232 -1,1,307,poor,40,1368,northwest,1313.28 -0,1,276,good,10,1586,west_welmwood,1586 -2,1,813,good,1,3364,west_welmwood,3364 -0,1,276,poor,43,1249,northwest,1191.546 -1,1,547,good,12,1953,west_welmwood,1953 -0,1,376,good,12,1695,west_welmwood,1695 -1,1,653,good,14,1999,west_welmwood,1999 -3,2,1017,poor,62,4264,northwest,3905.824 -1,1,582,great,6,2483,east_elmwood,2483 -3,2,1068,good,7,4889,west_welmwood,4889 -0,1,343,great,14,2120,east_elmwood,2120 -2,1,860,good,13,3357,west_welmwood,3357 -2,1,905,great,12,3863,east_elmwood,3863 -2,1,937,poor,18,3336,northwest,3336 -2,1,809,good,7,3295,west_welmwood,3295 -1,1,698,poor,56,1607,northwest,1491.296 -0,1,336,poor,62,1143,northwest,1046.988 -1,1,483,great,8,2417,east_elmwood,2417 -2,1,852,great,2,3975,east_elmwood,3975 -3,2,840,poor,58,4096,northwest,3784.704 -2,1,936,poor,53,3020,northwest,2820.68 -2,1,556,great,12,3535,east_elmwood,3535 -2,1,572,good,14,3047,west_welmwood,3047 -3,2,879,great,7,5149,east_elmwood,5149 -1,1,700,good,11,2079,west_welmwood,2079 -2,1,564,poor,63,2512,northwest,2295.968 -3,2,1148,great,4,5468,east_elmwood,5468 -0,1,542,poor,49,1420,northwest,1337.64 -2,1,668,good,0,3240,west_welmwood,3240 -2,1,813,great,4,3921,east_elmwood,3921 -3,2,1034,good,10,4782,west_welmwood,4782 -1,1,311,great,13,2185,east_elmwood,2185 -3,2,846,poor,62,4055,northwest,3714.38 -3,2,859,poor,16,4525,northwest,4525 -2,1,672,good,7,3244,west_welmwood,3244 -2,1,695,good,6,3190,west_welmwood,3190 -3,2,1025,good,12,4775,west_welmwood,4775 -0,1,519,poor,64,1270,northwest,1158.24 -2,1,779,poor,40,3022,northwest,2901.12 -3,2,1054,great,4,5342,east_elmwood,5342 -0,1,400,poor,29,1555,northwest,1527.01 -2,1,717,poor,28,3041,northwest,2992.344 -3,2,1163,poor,28,4763,northwest,4686.792 -0,1,408,great,14,2128,east_elmwood,2128 -3,2,1190,great,3,5475,east_elmwood,5475 -2,1,615,poor,41,2806,northwest,2688.148 -2,1,660,good,0,3220,west_welmwood,3220 -1,1,667,poor,22,1927,northwest,1919.292 -1,1,693,good,3,2129,west_welmwood,2129 -0,1,458,poor,36,1464,northwest,1417.152 -0,1,276,good,2,1638,west_welmwood,1638 -1,1,702,poor,44,1787,northwest,1701.224 -0,1,206,good,7,1541,west_welmwood,1541 -2,1,762,good,7,3317,west_welmwood,3317 -1,1,369,poor,26,1559,northwest,1540.292 -3,2,1164,good,11,4944,west_welmwood,4944 -2,1,858,good,9,3374,west_welmwood,3374 -1,1,509,good,11,1919,west_welmwood,1919 -0,1,344,good,14,1554,west_welmwood,1554 -2,1,509,great,6,3567,east_elmwood,3567 -2,1,860,poor,28,3223,northwest,3171.432 -2,1,699,good,8,3249,west_welmwood,3249 -0,1,541,good,14,1833,west_welmwood,1833 -2,1,898,good,3,3457,west_welmwood,3457 -0,1,391,good,13,1618,west_welmwood,1618 -0,1,516,great,13,2272,east_elmwood,2272 -2,1,546,poor,57,2606,northwest,2413.156 -3,2,898,good,3,4767,west_welmwood,4767 -2,1,895,good,0,3476,west_welmwood,3476 -2,1,877,poor,38,3084,northwest,2972.976 -1,1,706,good,10,2135,west_welmwood,2135 -0,1,542,poor,35,1597,northwest,1549.09 -3,2,1204,poor,54,4545,northwest,4235.94 -2,1,582,poor,48,2710,northwest,2558.24 -3,2,968,good,14,4679,west_welmwood,4679 -2,1,586,good,9,3106,west_welmwood,3106 -1,1,598,poor,28,1818,northwest,1788.912 -1,1,391,great,8,2328,east_elmwood,2328 -2,1,643,good,8,3195,west_welmwood,3195 -3,2,1036,good,3,4890,west_welmwood,4890 -3,2,867,poor,35,4388,northwest,4256.36 -3,2,1096,good,1,4971,west_welmwood,4971 -3,2,815,poor,21,4469,northwest,4460.062 -0,1,295,good,10,1553,west_welmwood,1553 -2,1,902,good,4,3453,west_welmwood,3453 -2,1,749,good,6,3240,west_welmwood,3240 -3,2,847,poor,38,4307,northwest,4151.948 -3,2,1170,poor,38,4655,northwest,4487.42 -1,1,346,good,12,1716,west_welmwood,1716 -2,1,566,good,6,3093,west_welmwood,3093 -2,1,768,poor,50,2823,northwest,2653.62 -1,1,616,poor,38,1727,northwest,1664.828 -2,1,501,good,4,3044,west_welmwood,3044 -2,1,538,poor,22,2927,northwest,2915.292 -3,2,912,poor,28,4463,northwest,4391.592 -2,1,683,poor,33,2923,northwest,2847.002 -3,2,1205,great,7,5532,east_elmwood,5532 -2,1,837,good,0,3457,west_welmwood,3457 -2,1,702,good,14,3190,west_welmwood,3190 -0,1,136,great,2,2008,east_elmwood,2008 -3,2,1075,poor,21,4690,northwest,4680.62 -1,1,432,good,10,1852,west_welmwood,1852 -3,2,941,good,11,4695,west_welmwood,4695 -0,1,220,great,0,2076,east_elmwood,2076 -1,1,671,poor,45,1699,northwest,1614.05 -2,1,682,good,10,3206,west_welmwood,3206 -1,1,700,good,1,2171,west_welmwood,2171 -1,1,411,great,14,2306,east_elmwood,2306 -2,1,552,poor,43,2729,northwest,2603.466 -0,1,497,poor,57,1371,northwest,1269.546 -3,2,951,good,3,4729,west_welmwood,4729 -0,1,430,great,12,2217,east_elmwood,2217 -3,2,1220,poor,32,4714,northwest,4600.864 -1,1,430,good,0,1928,west_welmwood,1928 -3,2,837,good,6,4594,west_welmwood,4594 -0,1,435,good,11,1681,west_welmwood,1681 -3,2,847,great,11,5051,east_elmwood,5051 -0,1,396,good,12,1708,west_welmwood,1708 -3,2,1119,good,6,4922,west_welmwood,4922 -0,1,386,poor,54,1268,northwest,1181.776 -3,2,1208,good,2,5004,west_welmwood,5004 -0,1,240,good,8,1609,west_welmwood,1609 -1,1,323,good,14,1697,west_welmwood,1697 -2,1,876,great,6,3866,east_elmwood,3866 -2,1,653,great,0,3801,east_elmwood,3801 -2,1,579,good,11,3039,west_welmwood,3039 -1,1,335,poor,24,1635,northwest,1621.92 -3,2,898,poor,62,4129,northwest,3782.164 -1,1,593,good,0,2059,west_welmwood,2059 -1,1,392,great,7,2282,east_elmwood,2282 -1,1,374,good,12,1708,west_welmwood,1708 -1,1,725,great,7,2672,east_elmwood,2672 -3,2,831,poor,27,4424,northwest,4362.064 -2,1,835,good,1,3378,west_welmwood,3378 -0,1,233,good,11,1506,west_welmwood,1506 -2,1,830,great,7,3879,east_elmwood,3879 -1,1,645,good,13,1976,west_welmwood,1976 -3,2,831,poor,36,4354,northwest,4214.672 -1,1,478,good,1,2009,west_welmwood,2009 -2,1,790,good,11,3300,west_welmwood,3300 -0,1,472,poor,27,1650,northwest,1626.9 -3,2,1200,good,11,4941,west_welmwood,4941 -2,1,805,poor,48,2897,northwest,2734.768 -1,1,561,good,6,1990,west_welmwood,1990 -2,1,857,good,8,3357,west_welmwood,3357 -0,1,126,poor,26,1275,northwest,1259.7 -0,1,412,great,9,2197,east_elmwood,2197 -2,1,911,poor,43,3085,northwest,2943.09 -0,1,398,good,10,1740,west_welmwood,1740 -0,1,459,poor,18,1671,northwest,1671 -1,1,407,good,1,1858,west_welmwood,1858 -3,2,928,good,6,4700,west_welmwood,4700 -0,1,232,good,3,1585,west_welmwood,1585 -2,1,703,good,0,3336,west_welmwood,3336 -1,1,628,great,11,2469,east_elmwood,2469 -1,1,643,poor,59,1602,northwest,1477.044 -1,1,359,poor,48,1380,northwest,1302.72 -2,1,755,good,8,3278,west_welmwood,3278 -2,1,613,good,5,3121,west_welmwood,3121 -3,2,1009,good,1,4829,west_welmwood,4829 -0,1,306,great,13,2099,east_elmwood,2099 -3,2,828,poor,57,4157,northwest,3849.382 -3,2,1245,poor,26,4824,northwest,4766.112 -3,2,859,good,9,4654,west_welmwood,4654 -3,2,1196,great,5,5537,east_elmwood,5537 -2,1,698,good,3,3229,west_welmwood,3229 -0,1,162,good,1,1533,west_welmwood,1533 -0,1,417,great,8,2254,east_elmwood,2254 -0,1,247,good,10,1578,west_welmwood,1578 -3,2,959,good,0,4847,west_welmwood,4847 -1,1,740,good,9,2101,west_welmwood,2101 -0,1,493,good,5,1802,west_welmwood,1802 -3,2,878,good,11,4572,west_welmwood,4572 -1,1,612,good,11,2031,west_welmwood,2031 -2,1,805,poor,40,2978,northwest,2858.88 -0,1,139,good,11,1445,west_welmwood,1445 -2,1,875,poor,26,3194,northwest,3155.672 -2,1,658,good,11,3156,west_welmwood,3156 -1,1,603,good,12,1962,west_welmwood,1962 -0,1,394,poor,61,1148,northwest,1053.864 -2,1,648,great,8,3707,east_elmwood,3707 -3,2,1038,great,11,5299,east_elmwood,5299 -0,1,249,good,8,1528,west_welmwood,1528 -1,1,362,poor,25,1654,northwest,1637.46 -2,1,622,good,0,3257,west_welmwood,3257 -2,1,724,good,13,3208,west_welmwood,3208 -0,1,186,poor,57,1011,northwest,936.186 -2,1,599,poor,22,2933,northwest,2921.268 -3,2,1248,good,10,4948,west_welmwood,4948 -0,1,272,good,9,1631,west_welmwood,1631 -2,1,772,great,9,3796,east_elmwood,3796 -1,1,427,good,0,1917,west_welmwood,1917 -3,2,1189,poor,49,4509,northwest,4247.478 -2,1,590,poor,59,2598,northwest,2395.356 -3,2,925,good,2,4795,west_welmwood,4795 -3,2,1074,poor,43,4525,northwest,4316.85 -2,1,559,good,1,3103,west_welmwood,3103 -3,2,931,poor,34,4416,northwest,4292.352 -1,1,584,poor,51,1585,northwest,1486.73 -3,2,879,good,3,4719,west_welmwood,4719 -1,1,417,poor,58,1377,northwest,1272.348 -2,1,800,poor,18,3177,northwest,3177 -2,1,507,poor,25,2849,northwest,2820.51 -1,1,351,good,14,1729,west_welmwood,1729 -3,2,1062,poor,28,4679,northwest,4604.136 -3,2,937,good,14,4598,west_welmwood,4598 -2,1,921,poor,23,3296,northwest,3276.224 -2,1,942,good,7,3441,west_welmwood,3441 -1,1,701,great,2,2699,east_elmwood,2699 -2,1,922,good,1,3538,west_welmwood,3538 -3,2,826,good,3,4688,west_welmwood,4688 -3,2,1147,good,0,5021,west_welmwood,5021 -1,1,602,poor,63,1431,northwest,1307.934 -2,1,814,poor,32,3072,northwest,2998.272 -2,1,875,good,12,3316,west_welmwood,3316 -1,1,365,poor,47,1373,northwest,1298.858 -0,1,263,good,9,1580,west_welmwood,1580 -0,1,371,poor,40,1354,northwest,1299.84 -2,1,877,poor,27,3169,northwest,3124.634 -2,1,606,good,12,3071,west_welmwood,3071 -0,1,316,great,12,2103,east_elmwood,2103 -0,1,487,good,10,1813,west_welmwood,1813 -3,2,1169,poor,26,4764,northwest,4706.832 -3,2,1233,good,6,5054,west_welmwood,5054 -3,2,1138,great,3,5414,east_elmwood,5414 -3,2,976,great,11,5255,east_elmwood,5255 -0,1,364,good,10,1615,west_welmwood,1615 -2,1,837,poor,30,3098,northwest,3036.04 -3,2,992,poor,39,4469,northwest,4299.178 -3,2,1151,great,13,5398,east_elmwood,5398 -0,1,528,good,12,1764,west_welmwood,1764 -1,1,453,good,12,1853,west_welmwood,1853 -2,1,578,good,14,3081,west_welmwood,3081 -2,1,910,great,11,3917,east_elmwood,3917 -3,2,1073,good,8,4819,west_welmwood,4819 -1,1,736,great,10,2649,east_elmwood,2649 -2,1,780,good,3,3329,west_welmwood,3329 -0,1,360,great,3,2266,east_elmwood,2266 -0,1,480,poor,21,1694,northwest,1690.612 -2,1,878,poor,59,2889,northwest,2663.658 -0,1,113,great,10,1927,east_elmwood,1927 -3,2,1161,great,3,5449,east_elmwood,5449 -3,2,937,poor,25,4565,northwest,4519.35 -1,1,581,good,10,1986,west_welmwood,1986 -3,2,1204,good,2,5041,west_welmwood,5041 -3,2,947,poor,35,4427,northwest,4294.19 -2,1,536,good,0,3093,west_welmwood,3093 -0,1,385,good,2,1778,west_welmwood,1778 -1,1,533,poor,36,1672,northwest,1618.496 -0,1,115,good,10,1402,west_welmwood,1402 -1,1,416,great,0,2413,east_elmwood,2413 -2,1,566,good,14,3041,west_welmwood,3041 -3,2,979,great,6,5257,east_elmwood,5257 -3,2,1152,good,4,4987,west_welmwood,4987 -1,1,384,good,11,1760,west_welmwood,1760 -2,1,541,good,11,3063,west_welmwood,3063 -2,1,799,poor,18,3174,northwest,3174 -0,1,535,poor,22,1701,northwest,1694.196 -0,1,210,good,3,1602,west_welmwood,1602 -3,2,912,poor,51,4207,northwest,3946.166 -3,2,1221,poor,40,4713,northwest,4524.48 -3,2,1091,good,1,4978,west_welmwood,4978 -3,2,1207,good,5,5055,west_welmwood,5055 -0,1,372,poor,31,1499,northwest,1466.022 -1,1,537,poor,33,1754,northwest,1708.396 -3,2,806,great,9,5111,east_elmwood,5111 -3,2,1062,great,12,5306,east_elmwood,5306 -1,1,695,good,8,2111,west_welmwood,2111 -0,1,531,great,4,2403,east_elmwood,2403 -1,1,693,poor,31,1909,northwest,1867.002 -1,1,732,good,3,2206,west_welmwood,2206 -1,1,664,poor,43,1744,northwest,1663.776 -3,2,998,great,5,5277,east_elmwood,5277 -2,1,657,poor,62,2598,northwest,2379.768 -3,2,1248,poor,40,4677,northwest,4489.92 -3,2,1112,great,13,5334,east_elmwood,5334 -1,1,451,good,8,1899,west_welmwood,1899 -2,1,562,good,2,3189,west_welmwood,3189 -1,1,671,great,5,2596,east_elmwood,2596 -3,2,1103,good,10,4852,west_welmwood,4852 -0,1,398,poor,62,1182,northwest,1082.712 -1,1,474,poor,16,1794,northwest,1794 -1,1,622,great,5,2566,east_elmwood,2566 -2,1,543,good,13,2991,west_welmwood,2991 -0,1,540,poor,15,1812,northwest,1812 -2,1,671,poor,61,2631,northwest,2415.258 -3,2,1171,poor,57,4437,northwest,4108.662 -0,1,316,great,4,2156,east_elmwood,2156 -3,2,1026,poor,48,4374,northwest,4129.056 -1,1,407,good,3,1898,west_welmwood,1898 -0,1,227,good,14,1462,west_welmwood,1462 -1,1,429,good,8,1866,west_welmwood,1866 -1,1,707,great,0,2752,east_elmwood,2752 -0,1,533,great,10,2371,east_elmwood,2371 -3,2,1106,poor,46,4483,northwest,4249.884 -0,1,414,great,5,2298,east_elmwood,2298 -2,1,745,poor,55,2828,northwest,2630.04 -0,1,334,good,5,1706,west_welmwood,1706 -3,2,1102,poor,32,4675,northwest,4562.8 -0,1,309,poor,43,1241,northwest,1183.914 -0,1,428,good,0,1851,west_welmwood,1851 -2,1,655,great,7,3697,east_elmwood,3697 -2,1,826,poor,17,3240,northwest,3240 -3,2,832,poor,63,4013,northwest,3667.882 -0,1,227,great,12,1977,east_elmwood,1977 -0,1,351,great,4,2251,east_elmwood,2251 -3,2,844,poor,57,4131,northwest,3825.306 -2,1,688,poor,41,2883,northwest,2761.914 -2,1,523,good,14,2998,west_welmwood,2998 -0,1,546,poor,30,1620,northwest,1587.6 -0,1,428,good,1,1865,west_welmwood,1865 -1,1,642,poor,49,1622,northwest,1527.924 -0,1,156,good,4,1474,west_welmwood,1474 -2,1,578,poor,30,2845,northwest,2788.1 -3,2,940,great,12,5177,east_elmwood,5177 -2,1,748,great,3,3813,east_elmwood,3813 -2,1,699,poor,32,2936,northwest,2865.536 -0,1,349,poor,50,1230,northwest,1156.2 -2,1,849,good,5,3390,west_welmwood,3390 -1,1,374,good,5,1850,west_welmwood,1850 -2,1,611,good,14,3061,west_welmwood,3061 -1,1,497,poor,52,1511,northwest,1414.296 -0,1,356,good,3,1703,west_welmwood,1703 -0,1,308,great,11,2137,east_elmwood,2137 -3,2,983,good,3,4837,west_welmwood,4837 -2,1,549,poor,51,2612,northwest,2450.056 -3,2,871,good,5,4711,west_welmwood,4711 -2,1,788,good,10,3313,west_welmwood,3313 -1,1,465,good,6,1881,west_welmwood,1881 -0,1,259,great,0,2137,east_elmwood,2137 -0,1,312,good,7,1612,west_welmwood,1612 -2,1,691,great,5,3696,east_elmwood,3696 -0,1,471,great,5,2353,east_elmwood,2353 -2,1,869,great,8,3876,east_elmwood,3876 -0,1,387,poor,56,1247,northwest,1157.216 -2,1,683,poor,38,2930,northwest,2824.52 -0,1,440,poor,29,1533,northwest,1505.406 -3,2,827,poor,48,4191,northwest,3956.304 -0,1,157,good,3,1487,west_welmwood,1487 -3,2,937,poor,64,4110,northwest,3748.32 -3,2,1037,poor,16,4721,northwest,4721 -2,1,918,poor,36,3175,northwest,3073.4 -2,1,543,good,0,3101,west_welmwood,3101 -1,1,663,good,1,2133,west_welmwood,2133 -1,1,529,good,0,2056,west_welmwood,2056 -0,1,446,great,5,2250,east_elmwood,2250 -3,2,972,good,12,4670,west_welmwood,4670 -2,1,509,great,1,3624,east_elmwood,3624 -0,1,382,poor,22,1597,northwest,1590.612 -3,2,1114,good,6,4892,west_welmwood,4892 -1,1,444,good,1,1948,west_welmwood,1948 -3,2,1019,great,12,5206,east_elmwood,5206 -2,1,936,great,12,3913,east_elmwood,3913 -0,1,319,poor,42,1261,northwest,1205.516 -0,1,352,good,9,1619,west_welmwood,1619 -3,2,1042,good,0,4915,west_welmwood,4915 -2,1,758,good,6,3271,west_welmwood,3271 -3,2,1156,good,4,5003,west_welmwood,5003 -3,2,849,good,1,4734,west_welmwood,4734 -1,1,350,great,10,2257,east_elmwood,2257 -2,1,793,good,6,3298,west_welmwood,3298 -1,1,650,good,14,2034,west_welmwood,2034 -1,1,526,great,8,2423,east_elmwood,2423 -3,2,857,good,2,4709,west_welmwood,4709 -1,1,520,good,5,1945,west_welmwood,1945 -1,1,508,great,6,2417,east_elmwood,2417 -3,2,1158,poor,53,4455,northwest,4160.97 -2,1,835,good,5,3383,west_welmwood,3383 -2,1,851,good,10,3310,west_welmwood,3310 -2,1,784,good,5,3287,west_welmwood,3287 -3,2,852,good,13,4550,west_welmwood,4550 -1,1,508,great,5,2484,east_elmwood,2484 -2,1,527,good,11,2990,west_welmwood,2990 -2,1,715,poor,64,2685,northwest,2448.72 -0,1,134,good,7,1513,west_welmwood,1513 -0,1,541,poor,22,1764,northwest,1756.944 -0,1,501,poor,41,1444,northwest,1383.352 -1,1,622,poor,56,1538,northwest,1427.264 -0,1,401,poor,40,1373,northwest,1318.08 -3,2,1167,poor,50,4535,northwest,4262.9 -0,1,196,poor,45,1133,northwest,1076.35 -3,2,916,good,7,4706,west_welmwood,4706 -1,1,536,good,2,1978,west_welmwood,1978 -0,1,174,good,12,1424,west_welmwood,1424 -3,2,1113,good,7,4869,west_welmwood,4869 -1,1,377,poor,62,1266,northwest,1159.656 -0,1,270,great,2,2134,east_elmwood,2134 -3,2,879,good,5,4670,west_welmwood,4670 -3,2,875,poor,22,4490,northwest,4472.04 -1,1,675,good,2,2189,west_welmwood,2189 -3,2,907,poor,25,4498,northwest,4453.02 -0,1,300,poor,35,1392,northwest,1350.24 -2,1,939,great,11,3895,east_elmwood,3895 -1,1,521,good,1,2004,west_welmwood,2004 -1,1,359,poor,45,1430,northwest,1358.5 -3,2,1246,good,7,4980,west_welmwood,4980 -2,1,530,good,1,3138,west_welmwood,3138 -2,1,501,good,11,2952,west_welmwood,2952 -0,1,307,poor,51,1243,northwest,1165.934 -3,2,1026,good,3,4834,west_welmwood,4834 -2,1,869,good,10,3406,west_welmwood,3406 -0,1,108,poor,45,1025,northwest,973.75 -3,2,1010,good,9,4813,west_welmwood,4813 -0,1,197,good,2,1622,west_welmwood,1622 -0,1,282,poor,38,1311,northwest,1263.804 -3,2,1096,poor,26,4685,northwest,4628.78 -0,1,537,good,12,1817,west_welmwood,1817 -2,1,939,poor,39,3118,northwest,2999.516 -0,1,503,great,7,2313,east_elmwood,2313 -2,1,914,good,12,3432,west_welmwood,3432 -2,1,799,poor,49,2874,northwest,2707.308 -1,1,594,poor,16,1886,northwest,1886 -2,1,911,good,5,3478,west_welmwood,3478 -0,1,286,good,10,1543,west_welmwood,1543 -0,1,433,good,14,1685,west_welmwood,1685 -1,1,392,good,4,1842,west_welmwood,1842 -0,1,222,good,4,1575,west_welmwood,1575 -2,1,939,poor,36,3162,northwest,3060.816 -2,1,507,good,2,3070,west_welmwood,3070 -1,1,410,poor,18,1736,northwest,1736 -0,1,434,poor,21,1636,northwest,1632.728 -1,1,385,good,5,1883,west_welmwood,1883 -2,1,821,poor,46,2942,northwest,2789.016 -1,1,659,good,5,2136,west_welmwood,2136 -3,2,1167,good,7,4938,west_welmwood,4938 -2,1,836,good,9,3335,west_welmwood,3335 -3,2,1236,poor,57,4563,northwest,4225.338 -0,1,456,poor,33,1564,northwest,1523.336 -0,1,421,poor,44,1365,northwest,1299.48 -0,1,461,poor,26,1562,northwest,1543.256 -3,2,1060,good,0,4875,west_welmwood,4875 -0,1,138,good,2,1491,west_welmwood,1491 -1,1,480,good,1,1924,west_welmwood,1924 -0,1,341,poor,17,1559,northwest,1559 -2,1,570,great,1,3670,east_elmwood,3670 -2,1,580,poor,53,2629,northwest,2455.486 -1,1,520,poor,29,1764,northwest,1732.248 -2,1,686,poor,20,3118,northwest,3118 -3,2,1183,great,3,5521,east_elmwood,5521 -0,1,380,good,2,1800,west_welmwood,1800 -2,1,727,poor,23,3098,northwest,3079.412 -3,2,1097,good,0,4910,west_welmwood,4910 -2,1,703,good,5,3255,west_welmwood,3255 -3,2,1046,great,14,5214,east_elmwood,5214 -0,1,336,great,8,2195,east_elmwood,2195 -2,1,500,poor,33,2778,northwest,2705.772 -1,1,578,poor,16,1906,northwest,1906 -1,1,571,great,3,2562,east_elmwood,2562 -2,1,735,good,11,3213,west_welmwood,3213 -0,1,384,poor,35,1394,northwest,1352.18 -0,1,200,great,6,1996,east_elmwood,1996 -2,1,612,good,9,3169,west_welmwood,3169 -1,1,619,good,2,2126,west_welmwood,2126 -2,1,905,poor,24,3282,northwest,3255.744 -0,1,307,good,3,1679,west_welmwood,1679 -3,2,962,good,1,4824,west_welmwood,4824 -1,1,653,good,4,2089,west_welmwood,2089 -2,1,638,good,3,3219,west_welmwood,3219 -2,1,666,good,7,3237,west_welmwood,3237 -0,1,235,poor,40,1186,northwest,1138.56 -0,1,487,poor,39,1483,northwest,1426.646 -3,2,919,great,1,5254,east_elmwood,5254 -0,1,504,great,3,2369,east_elmwood,2369 -0,1,532,great,2,2409,east_elmwood,2409 -0,1,216,good,6,1547,west_welmwood,1547 -2,1,848,poor,44,3015,northwest,2870.28 -3,2,1222,poor,15,4924,northwest,4924 -3,2,1159,poor,56,4404,northwest,4086.912 -2,1,712,great,14,3632,east_elmwood,3632 -0,1,411,poor,44,1322,northwest,1258.544 -3,2,1147,poor,19,4809,northwest,4809 -2,1,652,poor,57,2728,northwest,2526.128 -1,1,504,great,13,2416,east_elmwood,2416 -1,1,619,poor,20,1872,northwest,1872 -1,1,728,good,1,2254,west_welmwood,2254 -2,1,878,poor,47,3018,northwest,2855.028 -1,1,680,good,5,2133,west_welmwood,2133 -1,1,719,poor,41,1822,northwest,1745.476 -2,1,923,good,2,3458,west_welmwood,3458 -1,1,424,poor,41,1513,northwest,1449.454 -1,1,397,poor,26,1640,northwest,1620.32 -0,1,131,poor,37,1111,northwest,1073.226 -1,1,461,good,6,1863,west_welmwood,1863 -2,1,835,poor,38,3083,northwest,2972.012 -0,1,320,poor,36,1311,northwest,1269.048 -0,1,415,good,4,1814,west_welmwood,1814 -3,2,1034,good,13,4729,west_welmwood,4729 -2,1,786,poor,28,3144,northwest,3093.696 -3,2,807,poor,59,4091,northwest,3771.902 -2,1,724,good,14,3149,west_welmwood,3149 -2,1,512,good,6,3098,west_welmwood,3098 -2,1,753,great,12,3737,east_elmwood,3737 -1,1,521,good,5,1944,west_welmwood,1944 -0,1,147,good,13,1369,west_welmwood,1369 -1,1,627,poor,55,1610,northwest,1497.3 -0,1,174,good,8,1495,west_welmwood,1495 -1,1,590,good,9,1972,west_welmwood,1972 -1,1,655,good,1,2138,west_welmwood,2138 -0,1,506,poor,26,1601,northwest,1581.788 -3,2,1084,poor,16,4724,northwest,4724 diff --git a/docker/db_images/mariadb/sql-scripts/prepare.sql b/docker/db_images/mariadb/sql-scripts/prepare.sql deleted file mode 100644 index 8cc6567ce3d..00000000000 --- a/docker/db_images/mariadb/sql-scripts/prepare.sql +++ /dev/null @@ -1,18 +0,0 @@ -CREATE TABLE rentals ( -number_of_rooms INT, -number_of_bathrooms INT, -sqft varchar(25), -location varchar(25), -days_on_market INT, -initial_price FLOAT, -neighborhood varchar(25), -rental_price FLOAT -) -ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_general_ci; - -LOAD DATA INFILE '/home_rentals.csv' INTO TABLE rentals COLUMNS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '"' LINES TERMINATED BY '\n' IGNORE 1 LINES; - -CREATE USER 'ssl_user'@'172.17.0.1' IDENTIFIED BY 'ssl' REQUIRE SSL; -GRANT ALL ON *.* TO 'ssl_user'@'172.17.0.1'; -FLUSH PRIVILEGES; -ALTER USER 'ssl_user'@'172.17.0.1' REQUIRE X509; diff --git a/docker/db_images/mysql/Dockerfile b/docker/db_images/mysql/Dockerfile deleted file mode 100644 index 08e12815e8a..00000000000 --- a/docker/db_images/mysql/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM mysql - -ENV MYSQL_DATABASE test - -COPY ./sql-scripts/ /docker-entrypoint-initdb.d/ -COPY home_rentals.csv / diff --git a/docker/db_images/mysql/home_rentals.csv b/docker/db_images/mysql/home_rentals.csv deleted file mode 100755 index d74efda921c..00000000000 --- a/docker/db_images/mysql/home_rentals.csv +++ /dev/null @@ -1,5038 +0,0 @@ -number_of_rooms,number_of_bathrooms,sqft,location,days_on_market,initial_price,neighborhood,rental_price -0,1,484,great,10,2271,south_side,2271 -1,1,674,good,1,2167,downtown,2167 -1,1,554,poor,19,1883,westbrae,1883 -0,1,529,great,3,2431,south_side,2431 -3,2,1219,great,3,5510,south_side,5510 -1,1,398,great,11,2272,south_side,2272 -3,2,1190,poor,58,4463,westbrae,4123.812 -1,1,730,good,0,2224,downtown,2224 -0,1,298,great,9,2104,south_side,2104 -2,1,878,great,8,3861,south_side,3861 -1,1,677,good,14,2041,downtown,2041 -0,1,509,poor,18,1725,westbrae,1725 -0,1,481,poor,49,1388,westbrae,1307.496 -3,2,808,good,1,4677,downtown,4677 -1,1,522,poor,30,1713,westbrae,1678.74 -1,1,533,good,10,1903,downtown,1903 -3,2,937,good,2,4736,downtown,4736 -0,1,258,good,10,1544,downtown,1544 -1,1,630,great,11,2543,south_side,2543 -0,1,397,great,11,2168,south_side,2168 -2,1,932,good,10,3413,downtown,3413 -3,2,1069,good,9,4810,downtown,4810 -0,1,267,poor,32,1302,westbrae,1270.752 -0,1,332,good,6,1697,downtown,1697 -1,1,712,poor,56,1617,westbrae,1500.576 -3,2,1231,poor,25,4873,westbrae,4824.27 -2,1,818,good,3,3359,downtown,3359 -2,1,805,good,8,3358,downtown,3358 -3,2,1158,poor,44,4601,westbrae,4380.152 -3,2,952,great,7,5207,south_side,5207 -2,1,771,good,8,3305,downtown,3305 -1,1,333,great,6,2284,south_side,2284 -1,1,500,poor,54,1448,westbrae,1349.536 -2,1,690,poor,16,3095,westbrae,3095 -0,1,524,great,13,2317,south_side,2317 -2,1,762,good,1,3323,downtown,3323 -2,1,872,good,14,3375,downtown,3375 -1,1,673,great,7,2604,south_side,2604 -2,1,792,good,5,3390,downtown,3390 -2,1,640,good,8,3153,downtown,3153 -0,1,454,poor,47,1353,westbrae,1279.938 -2,1,932,good,10,3447,downtown,3447 -0,1,340,good,5,1722,downtown,1722 -1,1,595,good,1,2064,downtown,2064 -2,1,558,good,3,3118,downtown,3118 -3,2,823,good,10,4545,downtown,4545 -3,2,1104,poor,16,4750,westbrae,4750 -1,1,543,poor,18,1871,westbrae,1871 -0,1,455,great,13,2205,south_side,2205 -0,1,113,good,13,1378,downtown,1378 -2,1,553,good,8,3073,downtown,3073 -3,2,1030,great,14,5260,south_side,5260 -3,2,1175,great,6,5446,south_side,5446 -1,1,644,great,12,2572,south_side,2572 -1,1,421,great,8,2333,south_side,2333 -2,1,786,poor,63,2716,westbrae,2482.424 -0,1,518,poor,21,1674,westbrae,1670.652 -1,1,532,great,4,2491,south_side,2491 -1,1,533,poor,26,1748,westbrae,1727.024 -1,1,733,poor,56,1671,westbrae,1550.688 -0,1,417,good,9,1687,downtown,1687 -2,1,891,good,3,3504,downtown,3504 -2,1,938,poor,50,3008,westbrae,2827.52 -2,1,688,good,12,3154,downtown,3154 -1,1,504,poor,22,1791,westbrae,1783.836 -1,1,741,good,3,2226,downtown,2226 -2,1,564,good,13,3001,downtown,3001 -2,1,503,good,10,3026,downtown,3026 -2,1,755,good,7,3254,downtown,3254 -0,1,415,poor,39,1472,westbrae,1416.064 -2,1,911,good,6,3492,downtown,3492 -3,2,853,good,4,4710,downtown,4710 -0,1,337,good,1,1775,downtown,1775 -1,1,477,poor,64,1371,westbrae,1250.352 -3,2,1081,great,7,5351,south_side,5351 -0,1,464,poor,21,1645,westbrae,1641.71 -3,2,1219,good,1,5030,downtown,5030 -3,2,1106,good,10,4833,downtown,4833 -2,1,891,good,9,3371,downtown,3371 -1,1,518,good,6,2005,downtown,2005 -0,1,245,great,4,2094,south_side,2094 -3,2,1216,great,5,5495,south_side,5495 -0,1,381,poor,28,1483,westbrae,1459.272 -2,1,819,great,7,3806,south_side,3806 -2,1,787,good,9,3332,downtown,3332 -3,2,936,good,2,4738,downtown,4738 -2,1,740,good,6,3294,downtown,3294 -3,2,1215,great,13,5467,south_side,5467 -2,1,853,poor,40,3045,westbrae,2923.2 -2,1,942,poor,59,2977,westbrae,2744.794 -3,2,1204,good,7,5016,downtown,5016 -3,2,1098,great,10,5386,south_side,5386 -1,1,741,good,12,2170,downtown,2170 -1,1,603,great,10,2508,south_side,2508 -3,2,1074,good,12,4796,downtown,4796 -1,1,588,good,14,1961,downtown,1961 -0,1,334,poor,48,1243,westbrae,1173.392 -2,1,736,great,2,3854,south_side,3854 -3,2,1056,poor,54,4408,westbrae,4108.256 -1,1,625,great,2,2578,south_side,2578 -2,1,530,poor,47,2613,westbrae,2471.898 -2,1,626,great,4,3693,south_side,3693 -1,1,736,poor,30,1891,westbrae,1853.18 -3,2,882,good,10,4659,downtown,4659 -2,1,646,good,1,3280,downtown,3280 -0,1,231,good,2,1650,downtown,1650 -0,1,157,poor,36,1172,westbrae,1134.496 -1,1,489,good,9,1916,downtown,1916 -0,1,464,good,11,1793,downtown,1793 -1,1,320,great,8,2268,south_side,2268 -0,1,390,great,4,2230,south_side,2230 -2,1,682,great,5,3705,south_side,3705 -2,1,729,great,2,3856,south_side,3856 -3,2,837,poor,23,4439,westbrae,4412.366 -3,2,933,poor,22,4557,westbrae,4538.772 -1,1,606,great,14,2454,south_side,2454 -2,1,663,great,3,3691,south_side,3691 -1,1,407,good,3,1894,downtown,1894 -1,1,692,good,5,2115,downtown,2115 -3,2,975,great,6,5271,south_side,5271 -2,1,726,good,11,3242,downtown,3242 -1,1,329,good,10,1755,downtown,1755 -1,1,418,good,3,1907,downtown,1907 -1,1,627,good,2,2060,downtown,2060 -0,1,435,poor,26,1553,westbrae,1534.364 -2,1,533,great,2,3645,south_side,3645 -3,2,820,poor,23,4430,westbrae,4403.42 -2,1,935,great,13,3880,south_side,3880 -0,1,144,great,4,1979,south_side,1979 -2,1,872,poor,61,2884,westbrae,2647.512 -2,1,646,poor,53,2750,westbrae,2568.5 -0,1,141,poor,59,940,westbrae,866.68 -3,2,816,poor,25,4427,westbrae,4382.73 -0,1,320,poor,36,1322,westbrae,1279.696 -3,2,1143,poor,18,4846,westbrae,4846 -3,2,1087,good,2,4898,downtown,4898 -0,1,369,poor,43,1349,westbrae,1286.946 -1,1,358,good,3,1866,downtown,1866 -0,1,539,good,3,1899,downtown,1899 -2,1,885,poor,50,2974,westbrae,2795.56 -2,1,938,good,7,3497,downtown,3497 -0,1,488,poor,49,1418,westbrae,1335.756 -3,2,1157,good,10,4862,downtown,4862 -3,2,902,poor,29,4456,westbrae,4375.792 -2,1,623,poor,26,2961,westbrae,2925.468 -2,1,803,good,8,3279,downtown,3279 -3,2,1006,poor,44,4416,westbrae,4204.032 -3,2,916,poor,36,4378,westbrae,4237.904 -3,2,1180,good,5,5021,downtown,5021 -0,1,461,poor,31,1547,westbrae,1512.966 -0,1,390,poor,26,1531,westbrae,1512.628 -0,1,271,good,14,1557,downtown,1557 -2,1,745,poor,37,2951,westbrae,2850.666 -3,2,1080,good,14,4771,downtown,4771 -3,2,963,good,3,4734,downtown,4734 -2,1,901,poor,24,3226,westbrae,3200.192 -0,1,307,great,7,2151,south_side,2151 -3,2,871,good,11,4626,downtown,4626 -1,1,649,poor,46,1678,westbrae,1590.744 -0,1,459,poor,37,1464,westbrae,1414.224 -1,1,624,poor,31,1847,westbrae,1806.366 -1,1,350,good,11,1759,downtown,1759 -1,1,600,good,11,2009,downtown,2009 -0,1,541,good,8,1894,downtown,1894 -3,2,939,poor,29,4484,westbrae,4403.288 -2,1,606,great,4,3675,south_side,3675 -3,2,1139,good,11,4861,downtown,4861 -3,2,1234,good,3,5052,downtown,5052 -3,2,1150,good,2,4982,downtown,4982 -3,2,996,good,13,4758,downtown,4758 -2,1,559,great,1,3657,south_side,3657 -1,1,447,good,12,1784,downtown,1784 -1,1,698,poor,23,1998,westbrae,1986.012 -0,1,371,good,13,1639,downtown,1639 -0,1,425,poor,34,1449,westbrae,1408.428 -3,2,948,great,8,5224,south_side,5224 -0,1,441,poor,19,1613,westbrae,1613 -0,1,112,good,7,1487,downtown,1487 -2,1,876,poor,15,3347,westbrae,3347 -3,2,860,good,4,4631,downtown,4631 -0,1,312,poor,26,1479,westbrae,1461.252 -0,1,233,great,10,2079,south_side,2079 -1,1,743,great,3,2732,south_side,2732 -2,1,564,good,6,3137,downtown,3137 -0,1,335,good,12,1609,downtown,1609 -2,1,879,great,10,3923,south_side,3923 -3,2,1001,poor,57,4245,westbrae,3930.87 -1,1,721,great,3,2701,south_side,2701 -0,1,547,good,2,1936,downtown,1936 -2,1,871,good,3,3434,downtown,3434 -2,1,904,great,8,3938,south_side,3938 -0,1,150,poor,60,957,westbrae,880.44 -2,1,892,poor,18,3303,westbrae,3303 -2,1,886,good,10,3396,downtown,3396 -2,1,619,great,0,3671,south_side,3671 -2,1,598,poor,16,3049,westbrae,3049 -2,1,883,good,8,3411,downtown,3411 -1,1,443,good,10,1820,downtown,1820 -3,2,969,poor,56,4236,westbrae,3931.008 -0,1,457,good,4,1796,downtown,1796 -3,2,1175,good,0,4999,downtown,4999 -0,1,511,great,1,2431,south_side,2431 -1,1,514,poor,51,1536,westbrae,1440.768 -3,2,916,good,3,4716,downtown,4716 -1,1,625,poor,42,1699,westbrae,1624.244 -0,1,442,good,5,1779,downtown,1779 -0,1,310,poor,20,1465,westbrae,1465 -3,2,865,good,0,4742,downtown,4742 -3,2,819,great,12,5049,south_side,5049 -1,1,598,great,7,2499,south_side,2499 -0,1,383,good,11,1707,downtown,1707 -0,1,145,good,6,1442,downtown,1442 -2,1,878,good,8,3379,downtown,3379 -2,1,557,poor,29,2904,westbrae,2851.728 -0,1,505,good,2,1863,downtown,1863 -2,1,646,great,0,3727,south_side,3727 -0,1,123,great,9,1976,south_side,1976 -0,1,375,poor,17,1620,westbrae,1620 -3,2,1124,great,9,5420,south_side,5420 -1,1,704,good,14,2081,downtown,2081 -0,1,294,great,1,2221,south_side,2221 -2,1,850,good,2,3437,downtown,3437 -0,1,376,good,1,1766,downtown,1766 -0,1,150,poor,43,1083,westbrae,1033.182 -3,2,1054,poor,46,4411,westbrae,4181.628 -0,1,191,good,12,1492,downtown,1492 -0,1,300,good,8,1667,downtown,1667 -0,1,283,good,7,1582,downtown,1582 -0,1,454,good,13,1753,downtown,1753 -2,1,589,good,7,3077,downtown,3077 -2,1,906,good,9,3421,downtown,3421 -0,1,222,poor,58,1036,westbrae,957.264 -3,2,1088,poor,52,4393,westbrae,4111.848 -0,1,410,good,3,1740,downtown,1740 -1,1,597,good,9,1978,downtown,1978 -3,2,1006,poor,27,4580,westbrae,4515.88 -1,1,499,poor,38,1623,westbrae,1564.572 -0,1,149,poor,47,1046,westbrae,989.516 -0,1,426,good,3,1783,downtown,1783 -0,1,361,great,2,2210,south_side,2210 -3,2,880,good,2,4726,downtown,4726 -2,1,935,good,4,3477,downtown,3477 -1,1,363,good,12,1765,downtown,1765 -3,2,1037,poor,35,4578,westbrae,4440.66 -3,2,1145,good,11,4929,downtown,4929 -1,1,435,good,8,1855,downtown,1855 -3,2,1114,great,9,5363,south_side,5363 -3,2,1055,good,0,4933,downtown,4933 -1,1,503,good,9,1866,downtown,1866 -3,2,816,good,8,4594,downtown,4594 -1,1,435,good,6,1884,downtown,1884 -1,1,609,poor,15,1929,westbrae,1929 -3,2,984,good,0,4855,downtown,4855 -1,1,602,good,2,2043,downtown,2043 -2,1,892,good,3,3435,downtown,3435 -0,1,397,good,11,1698,downtown,1698 -3,2,871,poor,31,4397,westbrae,4300.266 -2,1,653,good,9,3147,downtown,3147 -1,1,748,poor,62,1631,westbrae,1493.996 -0,1,235,great,14,2000,south_side,2000 -3,2,1133,good,9,4874,downtown,4874 -0,1,272,poor,57,1117,westbrae,1034.342 -0,1,532,poor,63,1316,westbrae,1202.824 -2,1,855,good,9,3362,downtown,3362 -3,2,1165,good,9,4902,downtown,4902 -3,2,1085,great,13,5279,south_side,5279 -0,1,195,great,6,2039,south_side,2039 -1,1,409,great,4,2380,south_side,2380 -3,2,1186,poor,62,4437,westbrae,4064.292 -1,1,476,great,8,2370,south_side,2370 -1,1,438,great,6,2332,south_side,2332 -3,2,1182,good,6,4994,downtown,4994 -1,1,305,good,14,1675,downtown,1675 -3,2,1179,good,7,4977,downtown,4977 -3,2,1109,good,9,4906,downtown,4906 -0,1,322,good,13,1573,downtown,1573 -2,1,822,good,8,3374,downtown,3374 -0,1,525,good,0,1875,downtown,1875 -3,2,1022,poor,60,4300,westbrae,3956 -0,1,303,poor,48,1179,westbrae,1112.976 -1,1,656,poor,31,1872,westbrae,1830.816 -3,2,1212,poor,16,4906,westbrae,4906 -1,1,743,poor,44,1756,westbrae,1671.712 -1,1,354,good,0,1859,downtown,1859 -1,1,579,poor,51,1550,westbrae,1453.9 -1,1,327,poor,63,1201,westbrae,1097.714 -2,1,550,good,1,3155,downtown,3155 -3,2,914,good,0,4805,downtown,4805 -3,2,1225,poor,56,4504,westbrae,4179.712 -1,1,714,good,8,2135,downtown,2135 -2,1,679,good,1,3251,downtown,3251 -1,1,719,poor,64,1596,alcatraz_ave,1455.552 -0,1,461,great,6,2269,berkeley_hills,2269 -3,2,1097,good,7,4832,thowsand_oaks,4832 -2,1,525,good,3,3058,thowsand_oaks,3058 -3,2,850,good,1,4718,thowsand_oaks,4718 -3,2,1183,poor,23,4801,alcatraz_ave,4772.194 -3,2,915,great,1,5240,berkeley_hills,5240 -3,2,972,good,5,4794,thowsand_oaks,4794 -3,2,903,poor,27,4528,alcatraz_ave,4464.608 -2,1,738,good,11,3277,thowsand_oaks,3277 -1,1,608,poor,59,1477,alcatraz_ave,1361.794 -2,1,908,poor,49,3013,alcatraz_ave,2838.246 -3,2,961,good,9,4764,thowsand_oaks,4764 -0,1,225,good,3,1626,thowsand_oaks,1626 -1,1,635,good,1,2111,thowsand_oaks,2111 -1,1,519,good,8,1956,thowsand_oaks,1956 -2,1,917,great,13,3901,berkeley_hills,3901 -3,2,1094,great,12,5323,berkeley_hills,5323 -0,1,296,great,5,2151,berkeley_hills,2151 -2,1,536,poor,43,2751,alcatraz_ave,2624.454 -3,2,863,great,4,5151,berkeley_hills,5151 -3,2,953,good,10,4682,thowsand_oaks,4682 -0,1,451,poor,60,1234,alcatraz_ave,1135.28 -3,2,1236,good,11,4993,thowsand_oaks,4993 -2,1,624,poor,47,2738,alcatraz_ave,2590.148 -3,2,902,poor,42,4379,alcatraz_ave,4186.324 -0,1,310,good,12,1638,thowsand_oaks,1638 -2,1,882,good,13,3322,thowsand_oaks,3322 -2,1,939,good,12,3437,thowsand_oaks,3437 -2,1,659,good,14,3166,thowsand_oaks,3166 -2,1,740,good,1,3329,thowsand_oaks,3329 -2,1,697,good,3,3240,thowsand_oaks,3240 -1,1,580,poor,45,1593,alcatraz_ave,1513.35 -0,1,102,great,8,1895,berkeley_hills,1895 -2,1,921,poor,53,3026,alcatraz_ave,2826.284 -1,1,380,poor,30,1577,alcatraz_ave,1545.46 -3,2,869,great,2,5152,berkeley_hills,5152 -1,1,727,good,11,2078,thowsand_oaks,2078 -0,1,240,poor,45,1152,alcatraz_ave,1094.4 -0,1,535,good,14,1830,thowsand_oaks,1830 -2,1,677,poor,49,2778,alcatraz_ave,2616.876 -0,1,187,good,0,1566,thowsand_oaks,1566 -2,1,919,good,12,3419,thowsand_oaks,3419 -3,2,1171,poor,25,4775,alcatraz_ave,4727.25 -3,2,1121,good,10,4877,thowsand_oaks,4877 -0,1,124,good,6,1441,thowsand_oaks,1441 -1,1,326,good,9,1782,thowsand_oaks,1782 -0,1,469,good,14,1713,thowsand_oaks,1713 -3,2,1102,poor,64,4297,alcatraz_ave,3918.864 -0,1,379,poor,28,1534,alcatraz_ave,1509.456 -3,2,983,great,14,5145,berkeley_hills,5145 -1,1,686,good,4,2175,thowsand_oaks,2175 -0,1,194,great,10,2042,berkeley_hills,2042 -2,1,890,good,6,3476,thowsand_oaks,3476 -0,1,497,good,4,1877,thowsand_oaks,1877 -3,2,831,good,4,4621,thowsand_oaks,4621 -1,1,472,poor,17,1771,alcatraz_ave,1771 -3,2,1032,poor,51,4394,alcatraz_ave,4121.572 -0,1,398,great,12,2151,berkeley_hills,2151 -3,2,913,great,4,5269,berkeley_hills,5269 -0,1,438,great,8,2273,berkeley_hills,2273 -0,1,220,good,3,1548,thowsand_oaks,1548 -3,2,1009,great,8,5326,berkeley_hills,5326 -1,1,649,great,5,2563,berkeley_hills,2563 -0,1,499,good,5,1839,thowsand_oaks,1839 -2,1,677,good,0,3232,thowsand_oaks,3232 -3,2,926,good,13,4679,thowsand_oaks,4679 -3,2,1005,poor,59,4255,alcatraz_ave,3923.11 -0,1,137,good,9,1479,thowsand_oaks,1479 -3,2,1066,good,13,4774,thowsand_oaks,4774 -0,1,436,good,14,1658,thowsand_oaks,1658 -2,1,773,good,12,3207,thowsand_oaks,3207 -3,2,1038,great,8,5318,berkeley_hills,5318 -0,1,103,good,12,1413,thowsand_oaks,1413 -0,1,498,good,10,1801,thowsand_oaks,1801 -0,1,432,poor,55,1303,alcatraz_ave,1211.79 -0,1,127,poor,61,923,alcatraz_ave,847.314 -1,1,490,great,6,2452,berkeley_hills,2452 -3,2,987,great,6,5298,berkeley_hills,5298 -1,1,435,poor,49,1457,alcatraz_ave,1372.494 -2,1,606,good,11,3068,thowsand_oaks,3068 -1,1,638,poor,41,1774,alcatraz_ave,1699.492 -3,2,944,great,2,5321,berkeley_hills,5321 -1,1,601,poor,47,1637,alcatraz_ave,1548.602 -3,2,1024,poor,17,4745,alcatraz_ave,4745 -1,1,729,poor,63,1554,alcatraz_ave,1420.356 -3,2,1226,great,11,5436,berkeley_hills,5436 -0,1,519,good,12,1760,thowsand_oaks,1760 -0,1,399,good,9,1672,thowsand_oaks,1672 -2,1,665,good,12,3106,thowsand_oaks,3106 -2,1,566,great,9,3569,berkeley_hills,3569 -2,1,549,poor,19,2997,alcatraz_ave,2997 -2,1,934,good,2,3529,thowsand_oaks,3529 -0,1,500,poor,42,1482,alcatraz_ave,1416.792 -0,1,111,great,9,1878,berkeley_hills,1878 -3,2,848,good,6,4599,thowsand_oaks,4599 -0,1,305,good,3,1687,thowsand_oaks,1687 -0,1,361,poor,56,1245,alcatraz_ave,1155.36 -1,1,565,good,6,2038,thowsand_oaks,2038 -0,1,283,poor,54,1115,alcatraz_ave,1039.18 -2,1,505,great,11,3478,berkeley_hills,3478 -0,1,228,great,10,2067,berkeley_hills,2067 -3,2,1119,poor,54,4405,alcatraz_ave,4105.46 -0,1,267,good,2,1635,thowsand_oaks,1635 -3,2,916,good,0,4727,thowsand_oaks,4727 -2,1,903,poor,58,2923,alcatraz_ave,2700.852 -3,2,1046,good,13,4722,thowsand_oaks,4722 -1,1,513,good,11,1924,thowsand_oaks,1924 -3,2,1211,good,1,5043,thowsand_oaks,5043 -0,1,105,poor,28,1207,alcatraz_ave,1187.688 -2,1,553,good,1,3177,thowsand_oaks,3177 -3,2,1044,good,9,4847,thowsand_oaks,4847 -0,1,492,poor,64,1265,alcatraz_ave,1153.68 -2,1,847,good,7,3419,thowsand_oaks,3419 -0,1,232,poor,62,987,alcatraz_ave,904.092 -2,1,774,great,3,3875,berkeley_hills,3875 -2,1,582,poor,60,2591,alcatraz_ave,2383.72 -2,1,652,good,0,3212,thowsand_oaks,3212 -2,1,538,good,10,2988,thowsand_oaks,2988 -0,1,391,poor,53,1245,alcatraz_ave,1162.83 -1,1,723,great,3,2649,berkeley_hills,2649 -3,2,1043,great,2,5356,berkeley_hills,5356 -3,2,893,poor,38,4391,alcatraz_ave,4232.924 -2,1,608,good,9,3109,thowsand_oaks,3109 -0,1,511,great,12,2242,berkeley_hills,2242 -2,1,791,great,1,3870,berkeley_hills,3870 -2,1,792,poor,33,3014,alcatraz_ave,2935.636 -0,1,110,poor,20,1315,alcatraz_ave,1315 -1,1,616,poor,17,1943,alcatraz_ave,1943 -2,1,505,good,2,3080,thowsand_oaks,3080 -1,1,411,good,8,1810,thowsand_oaks,1810 -2,1,846,good,12,3282,thowsand_oaks,3282 -3,2,965,good,4,4811,thowsand_oaks,4811 -1,1,440,good,6,1847,thowsand_oaks,1847 -1,1,386,poor,56,1345,alcatraz_ave,1248.16 -3,2,1247,good,11,4974,thowsand_oaks,4974 -3,2,1058,great,11,5336,berkeley_hills,5336 -2,1,587,poor,63,2572,alcatraz_ave,2350.808 -0,1,145,great,1,2049,berkeley_hills,2049 -3,2,928,poor,47,4324,alcatraz_ave,4090.504 -2,1,735,good,4,3288,thowsand_oaks,3288 -1,1,433,poor,51,1430,alcatraz_ave,1341.34 -3,2,1009,good,10,4760,thowsand_oaks,4760 -0,1,369,poor,42,1308,alcatraz_ave,1250.448 -1,1,654,poor,36,1841,alcatraz_ave,1782.088 -0,1,144,good,5,1531,thowsand_oaks,1531 -0,1,425,great,13,2162,berkeley_hills,2162 -1,1,509,poor,20,1765,alcatraz_ave,1765 -2,1,615,good,9,3085,thowsand_oaks,3085 -1,1,519,great,6,2453,berkeley_hills,2453 -3,2,1235,great,1,5602,berkeley_hills,5602 -0,1,434,good,0,1800,thowsand_oaks,1800 -3,2,1028,poor,15,4693,alcatraz_ave,4693 -2,1,852,good,12,3369,thowsand_oaks,3369 -3,2,1039,good,1,4887,thowsand_oaks,4887 -3,2,1020,great,5,5319,berkeley_hills,5319 -0,1,354,great,13,2162,berkeley_hills,2162 -3,2,1034,great,12,5259,berkeley_hills,5259 -3,2,800,good,2,4618,thowsand_oaks,4618 -2,1,815,good,0,3436,thowsand_oaks,3436 -0,1,350,poor,61,1096,alcatraz_ave,1006.128 -0,1,489,great,6,2307,berkeley_hills,2307 -1,1,744,poor,37,1919,alcatraz_ave,1853.754 -3,2,874,good,1,4671,thowsand_oaks,4671 -0,1,392,great,7,2205,berkeley_hills,2205 -0,1,485,great,12,2285,berkeley_hills,2285 -0,1,500,poor,27,1632,alcatraz_ave,1609.152 -3,2,1239,poor,24,4876,alcatraz_ave,4836.992 -1,1,702,good,13,2068,thowsand_oaks,2068 -0,1,467,good,1,1869,thowsand_oaks,1869 -2,1,564,good,7,3119,thowsand_oaks,3119 -1,1,560,great,2,2497,berkeley_hills,2497 -0,1,219,good,8,1529,thowsand_oaks,1529 -3,2,1154,great,11,5396,berkeley_hills,5396 -1,1,309,good,2,1783,thowsand_oaks,1783 -2,1,655,poor,32,2921,alcatraz_ave,2850.896 -1,1,338,good,3,1835,thowsand_oaks,1835 -3,2,908,good,9,4620,thowsand_oaks,4620 -1,1,518,poor,60,1385,alcatraz_ave,1274.2 -0,1,279,good,11,1601,thowsand_oaks,1601 -0,1,267,good,7,1580,thowsand_oaks,1580 -2,1,587,good,14,3062,thowsand_oaks,3062 -1,1,648,poor,62,1557,alcatraz_ave,1426.212 -2,1,845,poor,37,3100,alcatraz_ave,2994.6 -2,1,549,great,6,3575,berkeley_hills,3575 -1,1,660,good,8,2092,thowsand_oaks,2092 -2,1,585,good,10,3060,thowsand_oaks,3060 -0,1,279,good,3,1681,thowsand_oaks,1681 -0,1,538,poor,43,1490,alcatraz_ave,1421.46 -2,1,932,poor,53,3006,alcatraz_ave,2807.604 -2,1,814,great,5,3868,berkeley_hills,3868 -2,1,709,poor,27,3018,alcatraz_ave,2975.748 -2,1,896,good,5,3483,thowsand_oaks,3483 -3,2,1140,good,10,4853,thowsand_oaks,4853 -3,2,1065,poor,32,4633,alcatraz_ave,4521.808 -2,1,735,poor,33,2980,alcatraz_ave,2902.52 -2,1,613,poor,39,2847,alcatraz_ave,2738.814 -1,1,666,poor,38,1775,alcatraz_ave,1711.1 -1,1,300,good,5,1739,thowsand_oaks,1739 -3,2,959,great,3,5286,berkeley_hills,5286 -0,1,135,great,1,2026,berkeley_hills,2026 -1,1,568,poor,32,1722,alcatraz_ave,1680.672 -2,1,824,poor,47,2962,alcatraz_ave,2802.052 -1,1,493,poor,24,1798,alcatraz_ave,1783.616 -2,1,666,great,10,3671,berkeley_hills,3671 -1,1,432,good,14,1782,thowsand_oaks,1782 -2,1,768,great,7,3795,berkeley_hills,3795 -0,1,223,poor,57,1068,alcatraz_ave,988.968 -1,1,568,poor,15,1885,alcatraz_ave,1885 -0,1,503,good,9,1839,thowsand_oaks,1839 -3,2,872,poor,53,4211,alcatraz_ave,3933.074 -1,1,407,great,3,2338,berkeley_hills,2338 -0,1,150,great,3,2041,berkeley_hills,2041 -1,1,701,good,10,2074,thowsand_oaks,2074 -3,2,811,poor,47,4193,alcatraz_ave,3966.578 -3,2,1086,poor,25,4639,alcatraz_ave,4592.61 -3,2,1002,good,2,4824,thowsand_oaks,4824 -1,1,618,poor,16,1954,alcatraz_ave,1954 -2,1,884,great,9,3855,berkeley_hills,3855 -3,2,1125,good,8,4895,thowsand_oaks,4895 -1,1,320,good,1,1789,thowsand_oaks,1789 -2,1,853,good,12,3336,thowsand_oaks,3336 -1,1,399,great,12,2236,berkeley_hills,2236 -3,2,980,great,8,5239,berkeley_hills,5239 -0,1,400,poor,24,1527,alcatraz_ave,1514.784 -2,1,836,great,5,3927,berkeley_hills,3927 -3,2,1175,poor,57,4465,alcatraz_ave,4134.59 -3,2,1094,good,6,4883,thowsand_oaks,4883 -2,1,925,good,13,3355,thowsand_oaks,3355 -2,1,736,poor,60,2708,alcatraz_ave,2491.36 -2,1,729,good,9,3280,thowsand_oaks,3280 -1,1,540,poor,60,1463,alcatraz_ave,1345.96 -3,2,858,poor,31,4366,alcatraz_ave,4269.948 -1,1,570,great,13,2421,berkeley_hills,2421 -2,1,664,great,0,3749,berkeley_hills,3749 -0,1,378,good,13,1653,thowsand_oaks,1653 -1,1,670,good,3,2149,thowsand_oaks,2149 -0,1,474,great,4,2382,berkeley_hills,2382 -3,2,1157,great,10,5453,berkeley_hills,5453 -1,1,417,great,11,2276,berkeley_hills,2276 -0,1,197,good,7,1576,thowsand_oaks,1576 -1,1,668,good,0,2191,thowsand_oaks,2191 -0,1,123,good,1,1485,thowsand_oaks,1485 -3,2,875,poor,58,4096,alcatraz_ave,3784.704 -3,2,1131,good,6,4957,thowsand_oaks,4957 -1,1,339,poor,22,1660,alcatraz_ave,1653.36 -2,1,921,good,5,3473,thowsand_oaks,3473 -3,2,894,poor,57,4200,alcatraz_ave,3889.2 -1,1,594,poor,57,1487,alcatraz_ave,1376.962 -2,1,935,great,14,3884,berkeley_hills,3884 -0,1,523,poor,38,1561,alcatraz_ave,1504.804 -0,1,535,poor,45,1451,alcatraz_ave,1378.45 -1,1,585,poor,56,1509,alcatraz_ave,1400.352 -1,1,385,poor,28,1639,alcatraz_ave,1612.776 -2,1,578,poor,16,3065,alcatraz_ave,3065 -1,1,678,good,14,2081,thowsand_oaks,2081 -0,1,493,poor,24,1636,alcatraz_ave,1622.912 -2,1,874,good,8,3359,thowsand_oaks,3359 -1,1,328,great,0,2285,berkeley_hills,2285 -0,1,549,poor,59,1397,alcatraz_ave,1288.034 -1,1,519,good,1,2014,thowsand_oaks,2014 -2,1,554,good,13,2996,thowsand_oaks,2996 -0,1,283,great,10,2072,berkeley_hills,2072 -1,1,557,good,14,1960,thowsand_oaks,1960 -0,1,148,great,6,2028,berkeley_hills,2028 -1,1,560,good,9,1992,thowsand_oaks,1992 -3,2,824,poor,49,4217,alcatraz_ave,3972.414 -0,1,177,good,7,1471,thowsand_oaks,1471 -2,1,776,great,2,3881,berkeley_hills,3881 -1,1,332,poor,15,1703,alcatraz_ave,1703 -1,1,722,great,13,2633,berkeley_hills,2633 -2,1,641,great,3,3751,berkeley_hills,3751 -3,2,829,poor,16,4508,alcatraz_ave,4508 -2,1,647,poor,19,3028,alcatraz_ave,3028 -2,1,619,good,5,3208,thowsand_oaks,3208 -0,1,188,good,7,1477,thowsand_oaks,1477 -3,2,1146,good,14,4836,thowsand_oaks,4836 -1,1,338,good,10,1783,thowsand_oaks,1783 -3,2,1232,good,0,5124,thowsand_oaks,5124 -3,2,1054,great,8,5335,berkeley_hills,5335 -2,1,916,great,3,3974,berkeley_hills,3974 -0,1,169,poor,24,1361,alcatraz_ave,1350.112 -1,1,431,good,13,1775,thowsand_oaks,1775 -3,2,840,great,9,5130,berkeley_hills,5130 -3,2,926,good,0,4820,thowsand_oaks,4820 -3,2,1245,great,8,5506,berkeley_hills,5506 -2,1,694,poor,41,2874,alcatraz_ave,2753.292 -3,2,881,great,0,5199,berkeley_hills,5199 -3,2,1199,poor,62,4418,alcatraz_ave,4046.888 -2,1,913,great,12,3901,berkeley_hills,3901 -0,1,409,poor,60,1190,alcatraz_ave,1094.8 -1,1,627,great,13,2505,berkeley_hills,2505 -1,1,510,great,10,2438,berkeley_hills,2438 -3,2,1218,poor,20,4906,alcatraz_ave,4906 -2,1,778,poor,34,3057,alcatraz_ave,2971.404 -3,2,891,good,12,4644,thowsand_oaks,4644 -2,1,587,poor,27,2928,alcatraz_ave,2887.008 -3,2,1020,good,4,4825,thowsand_oaks,4825 -3,2,1226,good,11,4936,thowsand_oaks,4936 -2,1,696,good,10,3182,thowsand_oaks,3182 -1,1,486,great,4,2412,berkeley_hills,2412 -1,1,426,good,12,1810,thowsand_oaks,1810 -2,1,845,poor,32,3121,alcatraz_ave,3046.096 -1,1,689,good,5,2089,thowsand_oaks,2089 -2,1,918,great,10,3929,berkeley_hills,3929 -3,2,1104,poor,43,4573,alcatraz_ave,4362.642 -3,2,1182,good,11,4944,thowsand_oaks,4944 -1,1,461,good,2,1965,thowsand_oaks,1965 -0,1,461,good,1,1819,thowsand_oaks,1819 -3,2,1206,good,2,5015,thowsand_oaks,5015 -0,1,237,poor,21,1409,alcatraz_ave,1406.182 -1,1,451,poor,41,1520,alcatraz_ave,1456.16 -2,1,903,poor,26,3193,alcatraz_ave,3154.684 -2,1,560,good,5,3089,thowsand_oaks,3089 -1,1,406,poor,55,1397,alcatraz_ave,1299.21 -0,1,547,poor,29,1641,alcatraz_ave,1611.462 -1,1,533,good,1,2000,thowsand_oaks,2000 -1,1,588,poor,50,1623,alcatraz_ave,1525.62 -1,1,471,poor,53,1450,northwest,1354.3 -3,2,1066,great,8,5301,east_elmwood,5301 -1,1,503,poor,17,1860,northwest,1860 -3,2,1024,good,14,4704,west_welmwood,4704 -0,1,323,poor,59,1127,northwest,1039.094 -1,1,687,poor,61,1616,northwest,1483.488 -3,2,995,good,2,4861,west_welmwood,4861 -0,1,357,good,3,1701,west_welmwood,1701 -2,1,723,poor,15,3170,northwest,3170 -3,2,841,good,11,4609,west_welmwood,4609 -0,1,276,great,4,2104,east_elmwood,2104 -3,2,1077,poor,62,4344,northwest,3979.104 -0,1,304,good,0,1683,west_welmwood,1683 -3,2,990,good,6,4820,west_welmwood,4820 -3,2,863,good,12,4600,west_welmwood,4600 -0,1,119,poor,25,1264,northwest,1251.36 -0,1,234,good,12,1492,west_welmwood,1492 -0,1,157,good,1,1536,west_welmwood,1536 -0,1,524,great,10,2358,east_elmwood,2358 -0,1,502,poor,47,1436,northwest,1358.456 -1,1,449,poor,51,1472,northwest,1380.736 -2,1,592,good,12,3074,west_welmwood,3074 -1,1,387,good,11,1813,west_welmwood,1813 -2,1,873,good,3,3435,west_welmwood,3435 -3,2,827,good,0,4632,west_welmwood,4632 -1,1,593,poor,63,1505,northwest,1375.57 -1,1,479,good,2,1995,west_welmwood,1995 -2,1,788,good,0,3396,west_welmwood,3396 -1,1,737,good,9,2146,west_welmwood,2146 -1,1,467,good,2,1979,west_welmwood,1979 -0,1,424,good,1,1813,west_welmwood,1813 -1,1,471,great,5,2402,east_elmwood,2402 -1,1,547,good,5,2040,west_welmwood,2040 -1,1,328,poor,35,1512,northwest,1466.64 -0,1,154,good,9,1432,west_welmwood,1432 -2,1,701,poor,31,3003,northwest,2936.934 -3,2,1169,poor,31,4758,northwest,4653.324 -3,2,1024,good,1,4856,west_welmwood,4856 -1,1,633,great,12,2550,east_elmwood,2550 -0,1,501,good,10,1825,west_welmwood,1825 -2,1,771,good,0,3340,west_welmwood,3340 -1,1,730,poor,59,1641,northwest,1513.002 -0,1,545,good,0,1980,west_welmwood,1980 -2,1,667,great,9,3647,east_elmwood,3647 -0,1,513,good,10,1779,west_welmwood,1779 -3,2,1152,good,12,4870,west_welmwood,4870 -0,1,333,good,2,1748,west_welmwood,1748 -2,1,919,good,2,3450,west_welmwood,3450 -1,1,372,poor,64,1261,northwest,1150.032 -1,1,671,poor,35,1823,northwest,1768.31 -0,1,319,good,0,1680,west_welmwood,1680 -3,2,867,poor,63,4060,northwest,3710.84 -0,1,320,good,12,1594,west_welmwood,1594 -1,1,594,great,8,2484,east_elmwood,2484 -2,1,882,good,14,3389,west_welmwood,3389 -3,2,1062,poor,56,4374,northwest,4059.072 -3,2,823,poor,59,4046,northwest,3730.412 -2,1,874,good,6,3450,west_welmwood,3450 -2,1,687,poor,30,2947,northwest,2888.06 -2,1,621,good,3,3150,west_welmwood,3150 -2,1,705,great,2,3812,east_elmwood,3812 -1,1,658,poor,41,1741,northwest,1667.878 -1,1,314,poor,58,1211,northwest,1118.964 -2,1,831,good,2,3439,west_welmwood,3439 -0,1,290,poor,59,1068,northwest,984.696 -0,1,398,poor,31,1499,northwest,1466.022 -2,1,729,good,12,3195,west_welmwood,3195 -0,1,238,poor,17,1471,northwest,1471 -0,1,538,good,10,1863,west_welmwood,1863 -2,1,631,poor,23,3019,northwest,3000.886 -0,1,542,poor,45,1474,northwest,1400.3 -3,2,883,poor,34,4365,northwest,4242.78 -2,1,703,poor,46,2811,northwest,2664.828 -1,1,441,poor,55,1396,northwest,1298.28 -3,2,1231,poor,62,4425,northwest,4053.3 -1,1,692,great,8,2590,east_elmwood,2590 -0,1,540,good,7,1834,west_welmwood,1834 -1,1,326,good,3,1773,west_welmwood,1773 -2,1,839,great,9,3816,east_elmwood,3816 -3,2,1173,great,4,5445,east_elmwood,5445 -1,1,569,poor,17,1937,northwest,1937 -2,1,622,great,10,3635,east_elmwood,3635 -2,1,716,poor,17,3183,northwest,3183 -1,1,548,good,12,1973,west_welmwood,1973 -2,1,591,poor,28,2954,northwest,2906.736 -3,2,868,poor,22,4453,northwest,4435.188 -2,1,853,poor,27,3213,northwest,3168.018 -1,1,733,great,2,2760,east_elmwood,2760 -2,1,710,good,14,3196,west_welmwood,3196 -0,1,151,good,0,1535,west_welmwood,1535 -3,2,1013,poor,25,4624,northwest,4577.76 -0,1,289,poor,54,1157,northwest,1078.324 -0,1,440,good,12,1746,west_welmwood,1746 -2,1,714,good,11,3202,west_welmwood,3202 -3,2,931,good,1,4762,west_welmwood,4762 -3,2,938,poor,19,4634,northwest,4634 -3,2,1191,great,0,5558,east_elmwood,5558 -1,1,495,good,6,1945,west_welmwood,1945 -3,2,1077,poor,20,4722,northwest,4722 -2,1,782,great,11,3744,east_elmwood,3744 -3,2,1099,good,7,4893,west_welmwood,4893 -0,1,398,great,13,2128,east_elmwood,2128 -1,1,385,good,13,1775,west_welmwood,1775 -0,1,213,good,13,1526,west_welmwood,1526 -0,1,180,poor,37,1199,northwest,1158.234 -3,2,979,poor,34,4456,northwest,4331.232 -2,1,757,good,5,3312,west_welmwood,3312 -1,1,673,great,9,2619,east_elmwood,2619 -1,1,438,good,9,1798,west_welmwood,1798 -0,1,110,great,2,2037,east_elmwood,2037 -1,1,577,good,5,2017,west_welmwood,2017 -2,1,905,good,4,3459,west_welmwood,3459 -3,2,856,good,4,4712,west_welmwood,4712 -3,2,1082,great,12,5273,east_elmwood,5273 -0,1,174,good,14,1429,west_welmwood,1429 -1,1,435,poor,60,1303,northwest,1198.76 -2,1,765,poor,38,2952,northwest,2845.728 -1,1,628,good,10,2040,west_welmwood,2040 -1,1,735,great,10,2621,east_elmwood,2621 -1,1,568,good,14,1947,west_welmwood,1947 -1,1,625,good,10,2008,west_welmwood,2008 -2,1,846,good,2,3470,west_welmwood,3470 -2,1,518,good,3,3094,west_welmwood,3094 -3,2,1185,poor,53,4487,northwest,4190.858 -0,1,531,good,7,1906,west_welmwood,1906 -3,2,1245,good,3,5057,west_welmwood,5057 -0,1,384,poor,36,1416,northwest,1370.688 -3,2,940,good,9,4741,west_welmwood,4741 -1,1,740,good,1,2265,west_welmwood,2265 -1,1,307,great,0,2348,east_elmwood,2348 -2,1,548,good,10,3086,west_welmwood,3086 -0,1,461,great,1,2390,east_elmwood,2390 -3,2,1145,good,4,4947,west_welmwood,4947 -0,1,522,good,7,1827,west_welmwood,1827 -0,1,324,good,14,1570,west_welmwood,1570 -2,1,528,poor,34,2823,northwest,2743.956 -0,1,498,poor,63,1259,northwest,1150.726 -1,1,725,good,8,2100,west_welmwood,2100 -2,1,612,good,7,3183,west_welmwood,3183 -1,1,496,poor,59,1430,northwest,1318.46 -2,1,769,poor,49,2845,northwest,2679.99 -3,2,1034,good,6,4811,west_welmwood,4811 -2,1,646,poor,57,2718,northwest,2516.868 -3,2,901,good,12,4635,west_welmwood,4635 -3,2,1167,good,11,4895,west_welmwood,4895 -0,1,405,poor,45,1319,northwest,1253.05 -3,2,1242,good,14,4936,west_welmwood,4936 -1,1,599,good,3,2084,west_welmwood,2084 -1,1,587,good,3,2101,west_welmwood,2101 -2,1,810,good,3,3398,west_welmwood,3398 -1,1,709,good,9,2161,west_welmwood,2161 -0,1,497,good,0,1906,west_welmwood,1906 -1,1,305,good,0,1794,west_welmwood,1794 -2,1,667,good,6,3220,west_welmwood,3220 -3,2,895,good,11,4663,west_welmwood,4663 -3,2,1161,good,9,4906,west_welmwood,4906 -0,1,387,poor,29,1501,northwest,1473.982 -1,1,503,good,0,2014,west_welmwood,2014 -2,1,856,great,4,3881,east_elmwood,3881 -1,1,444,great,11,2327,east_elmwood,2327 -0,1,207,good,3,1563,west_welmwood,1563 -1,1,676,good,1,2204,west_welmwood,2204 -3,2,1059,poor,25,4689,northwest,4642.11 -1,1,573,great,9,2436,east_elmwood,2436 -0,1,285,good,13,1595,west_welmwood,1595 -0,1,490,good,1,1864,west_welmwood,1864 -3,2,824,good,3,4631,west_welmwood,4631 -0,1,234,good,7,1562,west_welmwood,1562 -2,1,920,poor,15,3354,northwest,3354 -3,2,966,good,14,4689,west_welmwood,4689 -0,1,423,good,12,1725,west_welmwood,1725 -2,1,932,great,2,3980,east_elmwood,3980 -0,1,419,poor,28,1506,northwest,1481.904 -2,1,752,good,5,3342,west_welmwood,3342 -2,1,671,poor,51,2775,northwest,2602.95 -2,1,628,poor,51,2671,northwest,2505.398 -3,2,1058,good,12,4808,west_welmwood,4808 -2,1,668,poor,62,2605,northwest,2386.18 -0,1,131,poor,37,1125,northwest,1086.75 -0,1,269,good,9,1565,west_welmwood,1565 -1,1,663,good,9,2083,west_welmwood,2083 -3,2,1219,poor,34,4769,northwest,4635.468 -1,1,536,poor,41,1627,northwest,1558.666 -0,1,215,good,11,1532,west_welmwood,1532 -1,1,558,poor,51,1500,northwest,1407 -3,2,1242,great,1,5577,east_elmwood,5577 -1,1,672,poor,30,1845,northwest,1808.1 -1,1,497,good,4,1940,west_welmwood,1940 -1,1,598,great,2,2549,east_elmwood,2549 -3,2,1134,good,0,4959,west_welmwood,4959 -3,2,1174,great,11,5380,east_elmwood,5380 -1,1,476,poor,30,1634,northwest,1601.32 -0,1,228,great,1,2129,east_elmwood,2129 -0,1,389,great,12,2136,east_elmwood,2136 -0,1,489,great,7,2285,east_elmwood,2285 -1,1,352,poor,48,1357,northwest,1281.008 -0,1,544,good,7,1923,west_welmwood,1923 -1,1,604,poor,29,1776,northwest,1744.032 -1,1,528,good,5,2023,west_welmwood,2023 -3,2,1036,great,12,5305,east_elmwood,5305 -3,2,1060,poor,42,4462,northwest,4265.672 -2,1,872,good,2,3433,west_welmwood,3433 -1,1,477,good,4,1894,west_welmwood,1894 -2,1,933,poor,16,3398,northwest,3398 -1,1,511,good,10,1941,west_welmwood,1941 -0,1,228,poor,19,1443,northwest,1443 -2,1,819,poor,55,2869,northwest,2668.17 -1,1,556,great,10,2447,east_elmwood,2447 -0,1,318,poor,33,1376,northwest,1340.224 -3,2,814,good,3,4680,west_welmwood,4680 -3,2,1215,poor,48,4631,northwest,4371.664 -0,1,190,poor,44,1195,northwest,1137.64 -2,1,594,good,1,3217,west_welmwood,3217 -0,1,310,poor,39,1358,northwest,1306.396 -2,1,629,great,7,3690,east_elmwood,3690 -3,2,957,good,7,4687,west_welmwood,4687 -3,2,1056,great,10,5328,east_elmwood,5328 -2,1,741,good,0,3370,west_welmwood,3370 -1,1,447,good,10,1889,west_welmwood,1889 -2,1,772,great,0,3839,east_elmwood,3839 -2,1,669,poor,32,2932,northwest,2861.632 -1,1,485,poor,39,1613,northwest,1551.706 -1,1,462,great,5,2387,east_elmwood,2387 -1,1,430,great,6,2332,east_elmwood,2332 -3,2,1183,good,7,4920,west_welmwood,4920 -3,2,875,poor,27,4471,northwest,4408.406 -1,1,496,good,14,1839,west_welmwood,1839 -3,2,993,great,6,5290,east_elmwood,5290 -0,1,446,poor,38,1448,northwest,1395.872 -0,1,431,good,1,1855,west_welmwood,1855 -0,1,300,great,1,2228,east_elmwood,2228 -0,1,304,good,5,1610,west_welmwood,1610 -2,1,748,poor,38,3003,northwest,2894.892 -0,1,182,poor,49,1116,northwest,1051.272 -0,1,198,good,1,1577,west_welmwood,1577 -3,2,1129,poor,52,4475,northwest,4188.6 -3,2,1005,good,2,4807,west_welmwood,4807 -2,1,509,great,14,3502,east_elmwood,3502 -2,1,731,poor,29,3058,northwest,3002.956 -2,1,809,poor,45,2996,northwest,2846.2 -0,1,232,poor,44,1239,northwest,1179.528 -0,1,247,poor,27,1422,northwest,1402.092 -1,1,682,great,4,2689,east_elmwood,2689 -1,1,493,poor,34,1617,northwest,1571.724 -1,1,536,good,2,2043,west_welmwood,2043 -0,1,513,poor,59,1343,northwest,1238.246 -3,2,1218,good,1,5076,west_welmwood,5076 -3,2,1072,good,4,4875,west_welmwood,4875 -2,1,778,good,0,3362,west_welmwood,3362 -2,1,773,good,12,3290,west_welmwood,3290 -0,1,372,great,1,2268,east_elmwood,2268 -3,2,1146,poor,31,4694,northwest,4590.732 -3,2,904,good,7,4670,west_welmwood,4670 -0,1,263,good,6,1621,west_welmwood,1621 -2,1,784,good,5,3295,west_welmwood,3295 -1,1,669,great,13,2555,east_elmwood,2555 -0,1,488,good,4,1891,west_welmwood,1891 -0,1,231,good,11,1510,west_welmwood,1510 -2,1,849,good,2,3399,west_welmwood,3399 -2,1,621,good,3,3227,west_welmwood,3227 -1,1,483,good,8,1884,west_welmwood,1884 -0,1,243,good,5,1561,west_welmwood,1561 -2,1,840,good,10,3349,west_welmwood,3349 -3,2,1141,good,11,4852,west_welmwood,4852 -0,1,311,poor,40,1295,northwest,1243.2 -3,2,832,great,3,5146,east_elmwood,5146 -2,1,546,good,9,3092,west_welmwood,3092 -0,1,483,good,10,1801,west_welmwood,1801 -1,1,390,poor,63,1301,northwest,1189.114 -1,1,473,poor,37,1636,northwest,1580.376 -3,2,1080,good,3,4884,west_welmwood,4884 -2,1,815,poor,37,3029,northwest,2926.014 -0,1,289,poor,22,1430,northwest,1424.28 -1,1,372,good,11,1797,west_welmwood,1797 -2,1,949,great,10,3939,east_elmwood,3939 -2,1,754,poor,31,3015,northwest,2948.67 -0,1,105,poor,53,926,northwest,864.884 -2,1,848,good,11,3306,west_welmwood,3306 -1,1,737,poor,35,1901,northwest,1843.97 -2,1,856,poor,26,3175,northwest,3136.9 -3,2,953,poor,46,4294,northwest,4070.712 -0,1,449,poor,45,1435,northwest,1363.25 -3,2,1134,good,7,4922,west_welmwood,4922 -2,1,601,good,0,3153,west_welmwood,3153 -2,1,743,good,10,3230,west_welmwood,3230 -1,1,436,good,9,1850,west_welmwood,1850 -0,1,280,good,1,1701,west_welmwood,1701 -2,1,923,good,3,3522,west_welmwood,3522 -0,1,364,great,5,2237,east_elmwood,2237 -0,1,246,good,8,1608,west_welmwood,1608 -0,1,202,poor,23,1399,northwest,1390.606 -0,1,489,good,12,1738,west_welmwood,1738 -1,1,517,good,8,1963,west_welmwood,1963 -0,1,300,good,6,1661,west_welmwood,1661 -2,1,587,good,2,3132,west_welmwood,3132 -1,1,350,good,1,1852,west_welmwood,1852 -2,1,776,good,1,3354,west_welmwood,3354 -0,1,142,good,12,1418,west_welmwood,1418 -2,1,802,great,1,3885,east_elmwood,3885 -0,1,454,poor,17,1662,northwest,1662 -1,1,524,poor,64,1380,northwest,1258.56 -2,1,553,great,14,3485,east_elmwood,3485 -3,2,1139,great,13,5397,east_elmwood,5397 -2,1,820,good,8,3324,west_welmwood,3324 -0,1,505,good,14,1738,west_welmwood,1738 -0,1,340,poor,57,1190,northwest,1101.94 -3,2,1189,good,8,4956,west_welmwood,4956 -3,2,1165,great,0,5525,east_elmwood,5525 -0,1,400,poor,38,1407,northwest,1356.348 -3,2,1214,great,14,5405,east_elmwood,5405 -1,1,493,good,13,1846,west_welmwood,1846 -0,1,344,poor,64,1078,northwest,983.136 -0,1,367,poor,57,1230,northwest,1138.98 -2,1,864,great,5,3935,east_elmwood,3935 -1,1,384,poor,36,1499,northwest,1451.032 -2,1,948,good,3,3549,west_welmwood,3549 -0,1,390,poor,32,1479,northwest,1443.504 -0,1,435,good,12,1683,west_welmwood,1683 -1,1,725,great,10,2672,east_elmwood,2672 -0,1,456,poor,39,1418,northwest,1364.116 -2,1,788,good,5,3295,west_welmwood,3295 -2,1,810,good,10,3331,west_welmwood,3331 -0,1,456,good,13,1721,west_welmwood,1721 -1,1,349,great,0,2333,east_elmwood,2333 -3,2,1065,good,3,4848,west_welmwood,4848 -2,1,623,poor,36,2911,northwest,2817.848 -1,1,543,good,8,1952,west_welmwood,1952 -3,2,1047,good,8,4816,west_welmwood,4816 -0,1,189,poor,63,967,northwest,883.838 -1,1,323,great,4,2239,east_elmwood,2239 -0,1,221,poor,20,1454,northwest,1454 -3,2,1022,poor,20,4672,northwest,4672 -1,1,539,good,10,1987,west_welmwood,1987 -1,1,619,poor,29,1848,northwest,1814.736 -2,1,742,poor,42,2971,northwest,2840.276 -0,1,481,poor,45,1471,northwest,1397.45 -3,2,1246,great,10,5540,east_elmwood,5540 -1,1,530,great,1,2546,east_elmwood,2546 -2,1,536,poor,37,2790,northwest,2695.14 -3,2,1193,great,13,5424,east_elmwood,5424 -1,1,347,good,3,1806,west_welmwood,1806 -3,2,970,good,14,4640,west_welmwood,4640 -3,2,1081,poor,26,4643,northwest,4587.284 -3,2,885,good,4,4701,west_welmwood,4701 -3,2,803,great,1,5129,east_elmwood,5129 -2,1,785,poor,25,3177,northwest,3145.23 -3,2,1042,poor,36,4498,northwest,4354.064 -2,1,576,good,1,3173,west_welmwood,3173 -0,1,412,poor,41,1365,northwest,1307.67 -3,2,1189,good,8,4996,west_welmwood,4996 -2,1,771,great,2,3826,east_elmwood,3826 -2,1,847,poor,59,2876,northwest,2651.672 -3,2,1114,great,9,5355,east_elmwood,5355 -2,1,618,great,2,3746,east_elmwood,3746 -0,1,424,good,7,1732,west_welmwood,1732 -0,1,494,good,12,1786,west_welmwood,1786 -3,2,1113,great,3,5447,east_elmwood,5447 -3,2,1155,great,10,5451,east_elmwood,5451 -0,1,123,good,0,1527,west_welmwood,1527 -0,1,264,poor,62,1037,northwest,949.892 -2,1,864,good,3,3456,west_welmwood,3456 -1,1,440,good,3,1894,west_welmwood,1894 -3,2,1023,poor,54,4351,northwest,4055.132 -1,1,600,good,5,2020,west_welmwood,2020 -1,1,671,good,5,2156,west_welmwood,2156 -1,1,400,good,11,1789,west_welmwood,1789 -3,2,813,great,4,5096,east_elmwood,5096 -2,1,563,good,8,3096,west_welmwood,3096 -1,1,304,good,9,1759,west_welmwood,1759 -2,1,783,poor,26,3143,northwest,3105.284 -1,1,698,poor,39,1845,northwest,1774.89 -2,1,935,great,13,3903,east_elmwood,3903 -1,1,711,good,13,2033,west_welmwood,2033 -3,2,926,good,1,4723,west_welmwood,4723 -3,2,901,poor,31,4471,northwest,4372.638 -3,2,1002,poor,39,4485,northwest,4314.57 -1,1,442,good,0,1929,west_welmwood,1929 -0,1,483,poor,55,1337,northwest,1243.41 -0,1,191,poor,51,1077,northwest,1010.226 -3,2,882,poor,29,4466,northwest,4385.612 -1,1,531,poor,28,1739,northwest,1711.176 -1,1,495,good,4,1938,west_welmwood,1938 -3,2,857,poor,53,4161,northwest,3886.374 -1,1,561,poor,47,1543,northwest,1459.678 -0,1,256,poor,21,1399,northwest,1396.202 -2,1,643,good,14,3123,west_welmwood,3123 -3,2,1112,good,14,4782,west_welmwood,4782 -3,2,1153,good,13,4882,west_welmwood,4882 -2,1,532,great,10,3509,east_elmwood,3509 -2,1,519,poor,53,2588,northwest,2417.192 -1,1,661,poor,61,1591,northwest,1460.538 -0,1,226,poor,56,1085,northwest,1006.88 -0,1,286,great,6,2129,east_elmwood,2129 -0,1,205,good,9,1508,west_welmwood,1508 -0,1,231,good,1,1575,west_welmwood,1575 -3,2,1103,good,12,4872,west_welmwood,4872 -1,1,586,poor,48,1577,northwest,1488.688 -2,1,552,good,9,3046,west_welmwood,3046 -2,1,927,poor,26,3240,northwest,3201.12 -1,1,395,poor,60,1325,northwest,1219 -0,1,220,great,10,2010,east_elmwood,2010 -3,2,1054,good,11,4794,west_welmwood,4794 -1,1,611,good,0,2151,west_welmwood,2151 -0,1,128,good,14,1343,west_welmwood,1343 -3,2,813,great,13,5004,east_elmwood,5004 -3,2,1041,poor,48,4394,northwest,4147.936 -0,1,316,great,9,2138,east_elmwood,2138 -3,2,853,good,14,4546,west_welmwood,4546 -2,1,662,poor,61,2610,northwest,2395.98 -3,2,831,great,9,5102,east_elmwood,5102 -2,1,897,good,3,3432,west_welmwood,3432 -3,2,1066,good,13,4777,west_welmwood,4777 -1,1,663,poor,53,1650,northwest,1541.1 -0,1,463,poor,33,1492,northwest,1453.208 -0,1,442,great,14,2174,east_elmwood,2174 -0,1,226,good,5,1583,west_welmwood,1583 -1,1,519,poor,59,1391,northwest,1282.502 -0,1,438,good,11,1739,west_welmwood,1739 -2,1,663,good,1,3234,west_welmwood,3234 -1,1,323,great,9,2229,east_elmwood,2229 -3,2,898,great,10,5150,east_elmwood,5150 -3,2,1097,poor,56,4358,northwest,4044.224 -3,2,1094,good,7,4862,west_welmwood,4862 -3,2,1245,great,13,5482,east_elmwood,5482 -3,2,967,good,12,4706,west_welmwood,4706 -2,1,866,poor,46,3038,northwest,2880.024 -3,2,972,good,2,4766,west_welmwood,4766 -0,1,531,poor,15,1819,northwest,1819 -1,1,429,good,11,1785,west_welmwood,1785 -0,1,530,good,10,1853,west_welmwood,1853 -1,1,710,good,9,2089,west_welmwood,2089 -3,2,1166,poor,37,4682,northwest,4522.812 -3,2,1184,good,9,4962,west_welmwood,4962 -3,2,872,poor,52,4167,northwest,3900.312 -2,1,606,good,12,3071,west_welmwood,3071 -2,1,724,poor,55,2790,northwest,2594.7 -0,1,176,great,1,2035,east_elmwood,2035 -1,1,578,good,14,1903,west_welmwood,1903 -1,1,665,great,4,2633,east_elmwood,2633 -3,2,813,poor,58,4065,northwest,3756.06 -1,1,499,poor,46,1498,northwest,1420.104 -0,1,225,good,7,1578,west_welmwood,1578 -3,2,1157,great,14,5317,east_elmwood,5317 -0,1,454,poor,60,1301,northwest,1196.92 -1,1,407,good,14,1750,west_welmwood,1750 -0,1,197,poor,58,992,northwest,916.608 -2,1,774,great,3,3821,east_elmwood,3821 -3,2,1027,good,6,4843,west_welmwood,4843 -0,1,293,poor,35,1379,northwest,1337.63 -0,1,446,good,11,1780,west_welmwood,1780 -3,2,1134,poor,59,4438,northwest,4091.836 -0,1,493,good,9,1798,west_welmwood,1798 -3,2,1098,poor,63,4331,northwest,3958.534 -3,2,1096,poor,27,4639,northwest,4574.054 -2,1,689,good,5,3261,west_welmwood,3261 -2,1,699,great,14,3682,east_elmwood,3682 -1,1,637,great,11,2546,east_elmwood,2546 -1,1,344,good,2,1810,west_welmwood,1810 -0,1,154,good,9,1454,west_welmwood,1454 -1,1,726,good,11,2079,west_welmwood,2079 -1,1,698,good,5,2159,west_welmwood,2159 -3,2,1177,good,4,4958,west_welmwood,4958 -3,2,1058,great,6,5359,east_elmwood,5359 -0,1,489,great,4,2324,east_elmwood,2324 -1,1,347,poor,59,1304,northwest,1202.288 -2,1,901,poor,21,3307,northwest,3300.386 -3,2,1238,poor,53,4517,northwest,4218.878 -3,2,883,poor,64,4064,northwest,3706.368 -0,1,366,good,11,1635,west_welmwood,1635 -2,1,922,great,2,4049,east_elmwood,4049 -3,2,1000,good,13,4694,west_welmwood,4694 -1,1,601,great,13,2520,east_elmwood,2520 -2,1,911,poor,40,3153,northwest,3026.88 -1,1,518,good,13,1922,west_welmwood,1922 -2,1,504,good,12,2990,west_welmwood,2990 -1,1,350,good,6,1800,west_welmwood,1800 -0,1,224,poor,55,1094,northwest,1017.42 -3,2,830,poor,15,4536,northwest,4536 -2,1,659,poor,53,2693,northwest,2515.262 -1,1,525,poor,21,1789,northwest,1785.422 -3,2,1169,poor,61,4381,northwest,4021.758 -1,1,421,poor,27,1634,northwest,1611.124 -0,1,235,poor,21,1436,northwest,1433.128 -1,1,391,poor,19,1710,northwest,1710 -0,1,322,good,0,1713,west_welmwood,1713 -3,2,921,great,8,5141,east_elmwood,5141 -0,1,428,poor,33,1484,northwest,1445.416 -3,2,929,poor,31,4510,northwest,4410.78 -1,1,749,good,7,2148,west_welmwood,2148 -2,1,632,great,2,3692,east_elmwood,3692 -2,1,780,great,1,3856,east_elmwood,3856 -2,1,809,good,13,3238,west_welmwood,3238 -0,1,456,good,1,1850,west_welmwood,1850 -2,1,783,good,1,3379,west_welmwood,3379 -0,1,546,poor,44,1517,northwest,1444.184 -0,1,254,good,1,1662,west_welmwood,1662 -0,1,117,great,14,1879,east_elmwood,1879 -1,1,541,poor,51,1482,northwest,1390.116 -1,1,702,good,6,2182,west_welmwood,2182 -1,1,404,good,6,1880,west_welmwood,1880 -1,1,437,great,6,2392,east_elmwood,2392 -3,2,1059,poor,62,4335,zaytuna_college,3970.86 -2,1,547,good,7,3028,north_berkeley,3028 -2,1,814,poor,52,2891,zaytuna_college,2705.976 -2,1,647,good,14,3059,north_berkeley,3059 -2,1,846,good,0,3434,north_berkeley,3434 -3,2,1009,good,7,4826,north_berkeley,4826 -3,2,1137,good,3,4949,north_berkeley,4949 -3,2,1054,good,4,4820,north_berkeley,4820 -3,2,818,good,0,4651,north_berkeley,4651 -3,2,1155,good,4,4992,north_berkeley,4992 -1,1,355,good,11,1703,north_berkeley,1703 -0,1,309,great,7,2105,east_elmwood,2105 -2,1,565,great,3,3644,east_elmwood,3644 -2,1,745,good,12,3178,north_berkeley,3178 -2,1,644,poor,36,2924,zaytuna_college,2830.432 -0,1,506,poor,19,1740,zaytuna_college,1740 -1,1,300,great,6,2273,east_elmwood,2273 -1,1,530,poor,20,1853,zaytuna_college,1853 -3,2,911,great,5,5199,east_elmwood,5199 -1,1,616,poor,44,1667,zaytuna_college,1586.984 -0,1,234,poor,49,1125,zaytuna_college,1059.75 -1,1,636,poor,53,1623,zaytuna_college,1515.882 -0,1,397,good,5,1716,north_berkeley,1716 -3,2,1235,good,14,4914,north_berkeley,4914 -3,2,1061,poor,63,4281,zaytuna_college,3912.834 -0,1,270,good,1,1661,north_berkeley,1661 -1,1,620,good,12,1950,north_berkeley,1950 -3,2,875,poor,30,4448,zaytuna_college,4359.04 -1,1,689,good,10,2117,north_berkeley,2117 -1,1,716,good,12,2102,north_berkeley,2102 -3,2,1149,poor,49,4464,zaytuna_college,4205.088 -1,1,403,poor,16,1760,zaytuna_college,1760 -1,1,588,good,10,2023,north_berkeley,2023 -2,1,668,good,4,3183,north_berkeley,3183 -3,2,1211,good,7,4966,north_berkeley,4966 -1,1,467,good,10,1839,north_berkeley,1839 -3,2,987,good,12,4689,north_berkeley,4689 -1,1,617,good,0,2067,north_berkeley,2067 -0,1,171,poor,28,1260,zaytuna_college,1239.84 -0,1,440,good,3,1854,north_berkeley,1854 -1,1,326,good,3,1809,north_berkeley,1809 -3,2,1157,good,10,4922,north_berkeley,4922 -0,1,249,good,5,1561,north_berkeley,1561 -0,1,288,good,0,1696,north_berkeley,1696 -1,1,455,poor,20,1799,zaytuna_college,1799 -2,1,682,poor,46,2781,zaytuna_college,2636.388 -3,2,1143,poor,63,4406,zaytuna_college,4027.084 -2,1,684,great,6,3740,east_elmwood,3740 -1,1,433,good,8,1840,north_berkeley,1840 -3,2,874,good,7,4698,north_berkeley,4698 -1,1,488,great,10,2379,east_elmwood,2379 -0,1,371,great,7,2243,east_elmwood,2243 -1,1,572,good,0,2057,north_berkeley,2057 -0,1,196,great,1,2045,east_elmwood,2045 -3,2,1229,poor,19,4921,zaytuna_college,4921 -3,2,868,great,9,5112,east_elmwood,5112 -0,1,548,good,1,1923,north_berkeley,1923 -1,1,739,poor,48,1739,zaytuna_college,1641.616 -1,1,429,good,1,1896,north_berkeley,1896 -1,1,559,great,5,2480,east_elmwood,2480 -1,1,571,good,0,2038,north_berkeley,2038 -1,1,684,good,2,2213,north_berkeley,2213 -0,1,469,great,5,2350,east_elmwood,2350 -0,1,424,good,1,1783,north_berkeley,1783 -2,1,550,good,1,3157,north_berkeley,3157 -1,1,384,poor,49,1354,zaytuna_college,1275.468 -3,2,1165,poor,60,4463,zaytuna_college,4105.96 -3,2,1078,poor,24,4709,zaytuna_college,4671.328 -2,1,788,good,8,3319,north_berkeley,3319 -2,1,577,poor,58,2595,zaytuna_college,2397.78 -0,1,185,poor,23,1377,zaytuna_college,1368.738 -2,1,586,good,5,3154,north_berkeley,3154 -3,2,1105,good,8,4847,north_berkeley,4847 -3,2,933,good,2,4792,north_berkeley,4792 -1,1,315,poor,17,1672,zaytuna_college,1672 -1,1,466,good,9,1887,north_berkeley,1887 -1,1,637,good,8,2095,north_berkeley,2095 -2,1,536,good,14,3036,north_berkeley,3036 -0,1,235,great,4,2047,east_elmwood,2047 -3,2,813,good,10,4533,north_berkeley,4533 -1,1,748,poor,39,1809,zaytuna_college,1740.258 -2,1,926,poor,61,2885,zaytuna_college,2648.43 -0,1,348,good,8,1638,north_berkeley,1638 -2,1,777,good,8,3342,north_berkeley,3342 -0,1,245,poor,48,1124,zaytuna_college,1061.056 -1,1,319,great,6,2295,east_elmwood,2295 -3,2,1150,great,11,5416,east_elmwood,5416 -3,2,1198,good,6,4981,north_berkeley,4981 -0,1,155,good,4,1510,north_berkeley,1510 -1,1,562,good,5,2015,north_berkeley,2015 -1,1,608,poor,63,1457,zaytuna_college,1331.698 -3,2,915,great,3,5275,east_elmwood,5275 -2,1,719,good,11,3218,north_berkeley,3218 -0,1,357,poor,30,1420,zaytuna_college,1391.6 -2,1,759,great,2,3791,east_elmwood,3791 -1,1,659,poor,37,1764,zaytuna_college,1704.024 -2,1,760,good,2,3381,north_berkeley,3381 -2,1,875,great,3,3949,east_elmwood,3949 -2,1,637,great,8,3698,east_elmwood,3698 -3,2,1245,poor,51,4628,zaytuna_college,4341.064 -0,1,542,good,3,1951,north_berkeley,1951 -3,2,1165,great,10,5370,east_elmwood,5370 -2,1,583,great,0,3709,east_elmwood,3709 -1,1,659,great,8,2552,east_elmwood,2552 -1,1,564,great,2,2586,east_elmwood,2586 -1,1,300,poor,27,1557,zaytuna_college,1535.202 -2,1,688,great,14,3697,east_elmwood,3697 -2,1,935,good,12,3444,north_berkeley,3444 -1,1,721,great,10,2589,east_elmwood,2589 -3,2,1164,good,5,4924,north_berkeley,4924 -0,1,455,good,9,1812,north_berkeley,1812 -3,2,935,poor,61,4224,zaytuna_college,3877.632 -1,1,584,poor,41,1644,zaytuna_college,1574.952 -0,1,501,great,3,2366,east_elmwood,2366 -1,1,679,poor,29,1930,zaytuna_college,1895.26 -0,1,439,poor,54,1290,zaytuna_college,1202.28 -1,1,705,poor,56,1641,zaytuna_college,1522.848 -1,1,597,good,8,2061,north_berkeley,2061 -3,2,1127,great,12,5402,east_elmwood,5402 -0,1,493,good,6,1830,north_berkeley,1830 -2,1,619,good,3,3160,north_berkeley,3160 -3,2,1102,poor,50,4418,zaytuna_college,4152.92 -1,1,554,poor,37,1680,zaytuna_college,1622.88 -0,1,359,poor,15,1627,zaytuna_college,1627 -3,2,982,great,9,5243,east_elmwood,5243 -3,2,1084,poor,18,4793,zaytuna_college,4793 -0,1,257,good,13,1518,north_berkeley,1518 -3,2,1111,poor,64,4317,zaytuna_college,3937.104 -1,1,432,great,8,2320,east_elmwood,2320 -3,2,1063,great,10,5292,east_elmwood,5292 -0,1,546,great,12,2286,east_elmwood,2286 -2,1,752,poor,26,3082,zaytuna_college,3045.016 -0,1,478,good,6,1814,north_berkeley,1814 -2,1,739,great,3,3775,east_elmwood,3775 -1,1,506,good,6,1920,north_berkeley,1920 -3,2,1237,good,12,4938,north_berkeley,4938 -0,1,427,poor,55,1293,zaytuna_college,1202.49 -2,1,773,poor,32,3088,zaytuna_college,3013.888 -1,1,386,good,5,1861,north_berkeley,1861 -3,2,1232,good,11,5015,north_berkeley,5015 -0,1,468,poor,38,1449,zaytuna_college,1396.836 -0,1,235,good,3,1597,north_berkeley,1597 -0,1,135,good,7,1417,north_berkeley,1417 -1,1,537,poor,43,1595,zaytuna_college,1521.63 -2,1,663,good,8,3146,north_berkeley,3146 -1,1,662,great,9,2538,east_elmwood,2538 -3,2,1175,poor,33,4677,zaytuna_college,4555.398 -1,1,452,good,0,1962,north_berkeley,1962 -3,2,1224,good,6,5040,north_berkeley,5040 -2,1,651,great,4,3721,east_elmwood,3721 -1,1,304,great,2,2288,east_elmwood,2288 -3,2,1211,great,14,5418,east_elmwood,5418 -3,2,1123,poor,20,4727,zaytuna_college,4727 -1,1,654,poor,49,1630,zaytuna_college,1535.46 -1,1,630,good,6,2079,north_berkeley,2079 -1,1,631,poor,28,1894,zaytuna_college,1863.696 -0,1,435,good,14,1681,north_berkeley,1681 -0,1,400,good,1,1747,north_berkeley,1747 -3,2,1162,good,14,4824,north_berkeley,4824 -2,1,734,poor,37,2958,zaytuna_college,2857.428 -0,1,444,good,12,1722,north_berkeley,1722 -1,1,470,good,3,1912,north_berkeley,1912 -0,1,248,good,3,1594,north_berkeley,1594 -2,1,724,great,7,3770,east_elmwood,3770 -3,2,1136,good,3,4930,north_berkeley,4930 -3,2,891,poor,39,4333,zaytuna_college,4168.346 -2,1,629,great,12,3623,east_elmwood,3623 -0,1,484,great,6,2346,east_elmwood,2346 -0,1,203,good,8,1554,north_berkeley,1554 -3,2,1031,great,0,5336,east_elmwood,5336 -1,1,306,good,11,1727,north_berkeley,1727 -0,1,170,good,11,1455,north_berkeley,1455 -2,1,749,great,8,3799,east_elmwood,3799 -2,1,781,good,14,3197,north_berkeley,3197 -1,1,550,poor,17,1913,zaytuna_college,1913 -1,1,435,good,8,1826,north_berkeley,1826 -3,2,957,good,4,4751,north_berkeley,4751 -1,1,604,good,0,2082,north_berkeley,2082 -1,1,548,good,1,2014,north_berkeley,2014 -0,1,153,poor,17,1424,zaytuna_college,1424 -2,1,635,great,1,3696,east_elmwood,3696 -0,1,113,good,4,1461,north_berkeley,1461 -0,1,296,great,13,2095,east_elmwood,2095 -2,1,743,great,9,3758,east_elmwood,3758 -0,1,453,good,11,1735,north_berkeley,1735 -0,1,113,poor,31,1182,zaytuna_college,1155.996 -1,1,663,poor,37,1771,zaytuna_college,1710.786 -1,1,633,good,0,2179,north_berkeley,2179 -2,1,835,poor,33,3092,zaytuna_college,3011.608 -0,1,349,good,4,1687,north_berkeley,1687 -2,1,698,poor,41,2897,zaytuna_college,2775.326 -2,1,504,good,2,3098,north_berkeley,3098 -0,1,121,poor,54,999,frontage_rd,931.068 -1,1,684,good,1,2192,north_berkeley,2192 -2,1,901,good,7,3473,north_berkeley,3473 -0,1,478,great,1,2414,east_elmwood,2414 -2,1,898,poor,41,3082,frontage_rd,2952.556 -1,1,482,good,14,1804,north_berkeley,1804 -0,1,239,poor,49,1168,frontage_rd,1100.256 -1,1,572,great,13,2403,east_elmwood,2403 -2,1,815,good,0,3378,north_berkeley,3378 -1,1,565,poor,61,1470,frontage_rd,1349.46 -3,2,1004,good,11,4706,north_berkeley,4706 -0,1,393,poor,49,1289,frontage_rd,1214.238 -0,1,117,great,3,1939,east_elmwood,1939 -0,1,448,poor,47,1337,frontage_rd,1264.802 -0,1,104,poor,20,1322,frontage_rd,1322 -1,1,391,good,10,1761,north_berkeley,1761 -0,1,542,good,5,1889,north_berkeley,1889 -0,1,365,good,9,1716,north_berkeley,1716 -0,1,510,poor,19,1730,frontage_rd,1730 -1,1,354,good,7,1811,north_berkeley,1811 -1,1,634,good,11,1991,north_berkeley,1991 -1,1,526,good,5,1933,north_berkeley,1933 -0,1,167,good,13,1447,north_berkeley,1447 -3,2,1018,poor,37,4453,frontage_rd,4301.598 -2,1,783,poor,54,2804,frontage_rd,2613.328 -1,1,416,good,3,1862,north_berkeley,1862 -3,2,916,poor,56,4246,frontage_rd,3940.288 -3,2,1223,good,1,5092,north_berkeley,5092 -1,1,359,poor,29,1577,frontage_rd,1548.614 -2,1,917,poor,29,3270,frontage_rd,3211.14 -1,1,338,great,9,2266,east_elmwood,2266 -3,2,926,poor,59,4218,frontage_rd,3888.996 -3,2,1116,great,14,5361,east_elmwood,5361 -3,2,1160,great,5,5470,east_elmwood,5470 -3,2,813,good,11,4568,north_berkeley,4568 -3,2,854,great,14,5103,east_elmwood,5103 -3,2,1068,great,11,5347,east_elmwood,5347 -2,1,758,great,4,3824,east_elmwood,3824 -2,1,741,good,11,3184,north_berkeley,3184 -2,1,761,poor,31,3055,frontage_rd,2987.79 -3,2,811,good,12,4580,north_berkeley,4580 -3,2,1005,great,9,5305,east_elmwood,5305 -0,1,190,good,6,1550,north_berkeley,1550 -3,2,953,poor,48,4293,frontage_rd,4052.592 -3,2,1206,good,1,5011,north_berkeley,5011 -2,1,637,great,7,3692,east_elmwood,3692 -3,2,980,good,4,4791,north_berkeley,4791 -1,1,318,good,11,1671,north_berkeley,1671 -3,2,1227,poor,32,4784,frontage_rd,4669.184 -0,1,398,poor,60,1166,frontage_rd,1072.72 -1,1,543,great,8,2431,east_elmwood,2431 -3,2,1207,poor,28,4746,frontage_rd,4670.064 -0,1,509,great,6,2333,east_elmwood,2333 -2,1,685,poor,47,2844,frontage_rd,2690.424 -3,2,823,good,6,4589,north_berkeley,4589 -2,1,846,poor,48,2929,frontage_rd,2764.976 -2,1,846,good,3,3437,north_berkeley,3437 -0,1,397,poor,34,1462,frontage_rd,1421.064 -0,1,261,poor,55,1126,frontage_rd,1047.18 -1,1,733,good,13,2056,north_berkeley,2056 -0,1,265,poor,35,1308,frontage_rd,1268.76 -1,1,326,great,6,2314,east_elmwood,2314 -3,2,1020,great,8,5302,east_elmwood,5302 -3,2,1027,poor,44,4451,frontage_rd,4237.352 -2,1,551,poor,63,2502,frontage_rd,2286.828 -3,2,807,poor,55,4133,frontage_rd,3843.69 -3,2,1139,poor,48,4468,frontage_rd,4217.792 -1,1,635,good,9,2017,north_berkeley,2017 -1,1,520,good,13,1851,north_berkeley,1851 -2,1,765,great,14,3693,east_elmwood,3693 -0,1,505,good,6,1804,north_berkeley,1804 -3,2,837,poor,27,4428,frontage_rd,4366.008 -3,2,921,good,5,4744,north_berkeley,4744 -2,1,924,great,12,3902,east_elmwood,3902 -3,2,976,poor,38,4406,frontage_rd,4247.384 -2,1,918,good,0,3524,north_berkeley,3524 -0,1,332,good,0,1714,north_berkeley,1714 -1,1,463,good,7,1885,north_berkeley,1885 -3,2,822,poor,44,4201,frontage_rd,3999.352 -1,1,626,poor,55,1588,frontage_rd,1476.84 -0,1,424,poor,60,1235,frontage_rd,1136.2 -1,1,383,poor,49,1436,frontage_rd,1352.712 -1,1,496,good,4,1917,north_berkeley,1917 -3,2,1166,poor,52,4524,frontage_rd,4234.464 -3,2,1211,good,14,4970,north_berkeley,4970 -3,2,896,poor,25,4498,frontage_rd,4453.02 -0,1,377,good,8,1698,north_berkeley,1698 -1,1,381,good,10,1784,north_berkeley,1784 -1,1,429,poor,53,1361,frontage_rd,1271.174 -2,1,750,good,9,3217,north_berkeley,3217 -1,1,718,poor,45,1806,frontage_rd,1715.7 -1,1,527,poor,47,1575,frontage_rd,1489.95 -2,1,652,good,4,3196,north_berkeley,3196 -3,2,1067,poor,24,4688,frontage_rd,4650.496 -3,2,1075,good,13,4841,north_berkeley,4841 -3,2,1154,great,1,5459,east_elmwood,5459 -1,1,649,great,1,2592,east_elmwood,2592 -0,1,195,good,9,1500,north_berkeley,1500 -2,1,774,poor,59,2778,frontage_rd,2561.316 -2,1,803,great,6,3822,east_elmwood,3822 -2,1,529,great,3,3633,east_elmwood,3633 -2,1,584,good,12,3042,north_berkeley,3042 -0,1,102,good,6,1472,north_berkeley,1472 -3,2,899,good,2,4696,north_berkeley,4696 -3,2,1237,good,14,4972,north_berkeley,4972 -0,1,228,poor,62,986,frontage_rd,903.176 -2,1,604,poor,40,2835,frontage_rd,2721.6 -0,1,518,poor,35,1607,frontage_rd,1558.79 -2,1,571,good,4,3100,north_berkeley,3100 -0,1,351,good,11,1657,north_berkeley,1657 -0,1,111,poor,29,1203,frontage_rd,1181.346 -1,1,345,poor,59,1273,frontage_rd,1173.706 -1,1,632,good,3,2082,north_berkeley,2082 -2,1,711,good,12,3151,north_berkeley,3151 -1,1,466,good,12,1848,north_berkeley,1848 -1,1,727,good,0,2190,north_berkeley,2190 -1,1,404,poor,63,1235,frontage_rd,1128.79 -1,1,374,poor,34,1498,frontage_rd,1456.056 -3,2,1108,great,13,5284,east_elmwood,5284 -3,2,1236,good,1,5119,north_berkeley,5119 -2,1,870,good,4,3433,north_berkeley,3433 -0,1,144,good,6,1523,north_berkeley,1523 -3,2,1221,good,11,4990,north_berkeley,4990 -3,2,985,good,10,4763,north_berkeley,4763 -1,1,318,great,7,2288,east_elmwood,2288 -2,1,538,good,12,3067,north_berkeley,3067 -1,1,487,great,13,2365,east_elmwood,2365 -2,1,836,good,13,3303,north_berkeley,3303 -3,2,1156,good,3,5025,north_berkeley,5025 -1,1,516,great,12,2439,east_elmwood,2439 -1,1,580,poor,53,1585,frontage_rd,1480.39 -1,1,713,good,9,2124,north_berkeley,2124 -2,1,724,good,2,3305,north_berkeley,3305 -3,2,1226,good,3,5005,north_berkeley,5005 -1,1,674,good,8,2128,north_berkeley,2128 -0,1,532,good,1,1932,north_berkeley,1932 -3,2,1099,good,12,4789,north_berkeley,4789 -3,2,807,good,7,4608,north_berkeley,4608 -3,2,959,great,6,5244,east_elmwood,5244 -0,1,260,good,5,1591,north_berkeley,1591 -0,1,203,poor,38,1250,frontage_rd,1205 -3,2,1112,poor,29,4693,frontage_rd,4608.526 -2,1,655,good,11,3144,north_berkeley,3144 -0,1,355,poor,61,1130,frontage_rd,1037.34 -3,2,1146,good,5,4995,north_berkeley,4995 -3,2,1098,great,8,5396,east_elmwood,5396 -0,1,117,good,5,1465,north_berkeley,1465 -0,1,474,great,9,2297,east_elmwood,2297 -0,1,463,great,10,2272,east_elmwood,2272 -2,1,770,good,9,3277,north_berkeley,3277 -1,1,642,great,11,2502,east_elmwood,2502 -3,2,1044,poor,26,4599,frontage_rd,4543.812 -0,1,425,poor,45,1335,frontage_rd,1268.25 -2,1,681,poor,38,2928,frontage_rd,2822.592 -2,1,509,good,6,3054,north_berkeley,3054 -1,1,568,great,3,2575,east_elmwood,2575 -1,1,749,poor,21,1990,frontage_rd,1986.02 -2,1,651,great,2,3701,east_elmwood,3701 -0,1,448,good,14,1696,north_berkeley,1696 -0,1,492,poor,20,1696,frontage_rd,1696 -0,1,516,poor,59,1288,frontage_rd,1187.536 -1,1,625,good,0,2162,north_berkeley,2162 -2,1,874,good,13,3320,north_berkeley,3320 -1,1,738,good,2,2253,north_berkeley,2253 -0,1,148,good,4,1537,north_berkeley,1537 -3,2,819,good,3,4646,north_berkeley,4646 -0,1,101,good,5,1483,north_berkeley,1483 -0,1,131,poor,22,1308,frontage_rd,1302.768 -0,1,158,poor,57,1019,frontage_rd,943.594 -1,1,725,poor,44,1744,frontage_rd,1660.288 -2,1,616,great,2,3678,east_elmwood,3678 -0,1,341,great,7,2195,east_elmwood,2195 -1,1,668,good,0,2190,north_berkeley,2190 -0,1,491,good,2,1850,north_berkeley,1850 -3,2,895,good,12,4638,north_berkeley,4638 -2,1,787,good,1,3407,north_berkeley,3407 -2,1,873,poor,15,3317,frontage_rd,3317 -1,1,525,poor,47,1569,frontage_rd,1484.274 -2,1,941,poor,56,2987,frontage_rd,2771.936 -1,1,537,good,9,1905,north_berkeley,1905 -3,2,1070,great,5,5367,east_elmwood,5367 -0,1,440,good,12,1751,north_berkeley,1751 -0,1,255,good,13,1513,north_berkeley,1513 -0,1,464,poor,45,1424,frontage_rd,1352.8 -3,2,974,good,6,4750,north_berkeley,4750 -1,1,322,poor,40,1452,frontage_rd,1393.92 -1,1,371,poor,47,1416,frontage_rd,1339.536 -1,1,487,poor,34,1685,frontage_rd,1637.82 -3,2,985,great,14,5221,east_elmwood,5221 -0,1,460,good,3,1853,north_berkeley,1853 -0,1,384,good,13,1667,north_berkeley,1667 -2,1,755,good,12,3205,north_berkeley,3205 -2,1,765,good,4,3362,north_berkeley,3362 -2,1,580,great,2,3692,east_elmwood,3692 -1,1,472,great,3,2479,east_elmwood,2479 -0,1,193,poor,33,1277,frontage_rd,1243.798 -1,1,312,poor,31,1509,frontage_rd,1475.802 -0,1,488,great,8,2304,east_elmwood,2304 -0,1,214,poor,33,1268,frontage_rd,1235.032 -3,2,939,good,7,4750,north_berkeley,4750 -0,1,502,poor,17,1743,frontage_rd,1743 -0,1,540,good,10,1835,north_berkeley,1835 -0,1,139,good,10,1487,north_berkeley,1487 -0,1,447,poor,29,1520,frontage_rd,1492.64 -2,1,776,great,4,3828,east_elmwood,3828 -1,1,695,great,6,2604,east_elmwood,2604 -0,1,449,good,7,1820,north_berkeley,1820 -3,2,984,great,6,5312,east_elmwood,5312 -0,1,255,good,9,1542,north_berkeley,1542 -3,2,811,poor,20,4428,frontage_rd,4428 -0,1,371,good,12,1643,north_berkeley,1643 -1,1,444,great,5,2399,east_elmwood,2399 -1,1,410,good,9,1800,north_berkeley,1800 -1,1,315,poor,39,1451,frontage_rd,1395.862 -0,1,368,poor,36,1412,frontage_rd,1366.816 -1,1,738,poor,51,1769,frontage_rd,1659.322 -1,1,372,good,1,1868,north_berkeley,1868 -3,2,899,poor,17,4620,frontage_rd,4620 -2,1,919,good,1,3538,north_berkeley,3538 -3,2,1082,good,7,4878,north_berkeley,4878 -3,2,1114,poor,44,4556,frontage_rd,4337.312 -0,1,163,poor,37,1155,frontage_rd,1115.73 -3,2,1090,poor,59,4367,frontage_rd,4026.374 -3,2,989,good,0,4856,north_berkeley,4856 -0,1,479,great,12,2275,east_elmwood,2275 -2,1,617,good,8,3124,north_berkeley,3124 -3,2,959,poor,61,4195,frontage_rd,3851.01 -1,1,735,good,12,2154,north_berkeley,2154 -3,2,1084,great,14,5272,east_elmwood,5272 -1,1,480,good,4,1956,north_berkeley,1956 -2,1,813,poor,58,2874,frontage_rd,2655.576 -0,1,103,great,1,2023,east_elmwood,2023 -2,1,695,great,9,3701,east_elmwood,3701 -0,1,186,poor,20,1427,frontage_rd,1427 -0,1,270,poor,44,1231,frontage_rd,1171.912 -2,1,800,poor,51,2845,frontage_rd,2668.61 -2,1,502,great,4,3570,east_elmwood,3570 -2,1,680,poor,60,2697,frontage_rd,2481.24 -3,2,1071,poor,58,4345,frontage_rd,4014.78 -1,1,662,good,10,2060,north_berkeley,2060 -0,1,499,good,13,1818,north_berkeley,1818 -1,1,308,poor,33,1446,frontage_rd,1408.404 -0,1,236,good,13,1507,north_berkeley,1507 -2,1,889,good,7,3458,north_berkeley,3458 -2,1,884,good,6,3381,north_berkeley,3381 -0,1,454,good,11,1699,north_berkeley,1699 -3,2,1036,good,4,4820,north_berkeley,4820 -2,1,637,good,9,3105,north_berkeley,3105 -1,1,339,poor,51,1294,frontage_rd,1213.772 -2,1,587,poor,25,2917,frontage_rd,2887.83 -0,1,489,poor,36,1529,frontage_rd,1480.072 -0,1,440,good,2,1843,north_berkeley,1843 -2,1,573,good,2,3143,north_berkeley,3143 -1,1,575,good,9,1949,north_berkeley,1949 -0,1,207,good,13,1481,north_berkeley,1481 -0,1,407,great,0,2320,east_elmwood,2320 -0,1,295,poor,43,1228,frontage_rd,1171.512 -0,1,434,poor,17,1633,frontage_rd,1633 -0,1,184,great,13,1938,east_elmwood,1938 -2,1,895,poor,58,2881,frontage_rd,2662.044 -0,1,323,great,7,2182,east_elmwood,2182 -2,1,747,great,4,3774,east_elmwood,3774 -3,2,1166,poor,39,4612,frontage_rd,4436.744 -1,1,689,poor,29,1917,frontage_rd,1882.494 -2,1,663,great,8,3636,east_elmwood,3636 -1,1,412,poor,50,1425,frontage_rd,1339.5 -3,2,1131,good,14,4875,north_berkeley,4875 -1,1,300,good,9,1736,north_berkeley,1736 -2,1,786,good,12,3262,north_berkeley,3262 -0,1,402,poor,48,1350,frontage_rd,1274.4 -2,1,535,great,2,3639,east_elmwood,3639 -1,1,360,good,9,1740,north_berkeley,1740 -0,1,276,great,9,2065,east_elmwood,2065 -2,1,647,poor,43,2797,frontage_rd,2668.338 -1,1,611,good,5,2075,north_berkeley,2075 -3,2,956,good,6,4777,north_berkeley,4777 -1,1,426,good,14,1788,north_berkeley,1788 -0,1,155,great,12,1945,east_elmwood,1945 -2,1,643,poor,40,2871,frontage_rd,2756.16 -1,1,593,good,10,1944,north_berkeley,1944 -3,2,1093,poor,39,4509,frontage_rd,4337.658 -1,1,487,good,12,1854,north_berkeley,1854 -1,1,349,good,10,1768,north_berkeley,1768 -0,1,171,poor,61,983,frontage_rd,902.394 -3,2,1024,good,1,4904,north_berkeley,4904 -0,1,116,good,10,1374,north_berkeley,1374 -2,1,561,poor,49,2708,frontage_rd,2550.936 -1,1,741,great,10,2637,east_elmwood,2637 -2,1,901,great,5,3952,east_elmwood,3952 -1,1,318,poor,57,1230,frontage_rd,1138.98 -1,1,688,great,8,2605,east_elmwood,2605 -3,2,828,good,7,4584,north_berkeley,4584 -2,1,938,great,4,3973,east_elmwood,3973 -2,1,669,poor,25,2970,frontage_rd,2940.3 -3,2,840,poor,64,4024,frontage_rd,3669.888 -2,1,638,poor,50,2712,frontage_rd,2549.28 -2,1,759,great,14,3712,east_elmwood,3712 -2,1,934,great,0,4058,east_elmwood,4058 -3,2,991,great,10,5287,east_elmwood,5287 -1,1,411,great,8,2316,east_elmwood,2316 -2,1,883,poor,35,3173,frontage_rd,3077.81 -3,2,1152,good,3,4986,north_berkeley,4986 -0,1,230,good,11,1490,north_berkeley,1490 -3,2,934,good,9,4733,north_berkeley,4733 -3,2,941,great,5,5274,east_elmwood,5274 -3,2,933,great,9,5156,east_elmwood,5156 -3,2,839,poor,55,4172,frontage_rd,3879.96 -0,1,216,great,1,2092,east_elmwood,2092 -3,2,1235,good,3,5088,north_berkeley,5088 -0,1,132,good,11,1445,north_berkeley,1445 -2,1,832,poor,34,3073,frontage_rd,2986.956 -2,1,900,great,13,3881,east_elmwood,3881 -3,2,897,good,8,4658,north_berkeley,4658 -1,1,698,great,11,2563,east_elmwood,2563 -1,1,368,good,1,1865,north_berkeley,1865 -2,1,664,good,8,3159,north_berkeley,3159 -3,2,1162,good,14,4883,north_berkeley,4883 -2,1,743,good,8,3300,north_berkeley,3300 -1,1,427,poor,64,1282,frontage_rd,1169.184 -1,1,632,good,1,2106,north_berkeley,2106 -2,1,698,poor,16,3173,frontage_rd,3173 -0,1,275,poor,18,1456,frontage_rd,1456 -3,2,1128,good,7,4889,north_berkeley,4889 -1,1,352,great,0,2346,east_elmwood,2346 -3,2,1174,good,3,4956,north_berkeley,4956 -1,1,440,great,5,2420,east_elmwood,2420 -1,1,460,good,4,1949,north_berkeley,1949 -2,1,870,good,7,3435,north_berkeley,3435 -3,2,1240,great,14,5497,east_elmwood,5497 -0,1,326,poor,45,1259,frontage_rd,1196.05 -0,1,311,good,12,1542,north_berkeley,1542 -2,1,656,poor,24,2990,frontage_rd,2966.08 -1,1,560,good,14,1899,north_berkeley,1899 -0,1,352,poor,59,1211,frontage_rd,1116.542 -2,1,714,good,1,3282,north_berkeley,3282 -3,2,834,poor,32,4396,frontage_rd,4290.496 -0,1,348,good,0,1730,north_berkeley,1730 -0,1,388,good,3,1746,north_berkeley,1746 -0,1,328,great,13,2126,east_elmwood,2126 -2,1,718,good,6,3230,north_berkeley,3230 -0,1,435,poor,64,1177,frontage_rd,1073.424 -3,2,1182,great,6,5483,east_elmwood,5483 -1,1,632,great,11,2503,east_elmwood,2503 -3,2,902,great,2,5186,east_elmwood,5186 -1,1,612,good,6,2034,north_berkeley,2034 -0,1,217,good,11,1516,north_berkeley,1516 -0,1,126,good,11,1428,north_berkeley,1428 -1,1,586,good,7,2018,north_berkeley,2018 -3,2,831,good,1,4659,north_berkeley,4659 -1,1,380,good,12,1758,north_berkeley,1758 -3,2,1224,good,6,4976,north_berkeley,4976 -3,2,1009,poor,60,4283,frontage_rd,3940.36 -2,1,872,great,12,3868,east_elmwood,3868 -0,1,116,great,9,1964,east_elmwood,1964 -3,2,1060,good,8,4862,north_berkeley,4862 -1,1,442,poor,62,1292,frontage_rd,1183.472 -0,1,442,good,14,1737,north_berkeley,1737 -0,1,519,poor,43,1531,frontage_rd,1460.574 -0,1,118,good,1,1489,north_berkeley,1489 -2,1,674,good,13,3110,north_berkeley,3110 -3,2,1182,great,14,5364,east_elmwood,5364 -3,2,1056,poor,20,4663,frontage_rd,4663 -1,1,357,poor,55,1301,frontage_rd,1209.93 -1,1,507,great,7,2401,east_elmwood,2401 -2,1,793,good,4,3303,north_berkeley,3303 -0,1,254,good,12,1578,north_berkeley,1578 -0,1,375,good,4,1767,north_berkeley,1767 -3,2,934,poor,15,4637,frontage_rd,4637 -2,1,677,poor,15,3123,frontage_rd,3123 -2,1,582,good,8,3057,north_berkeley,3057 -2,1,918,poor,33,3210,frontage_rd,3126.54 -3,2,994,good,7,4801,north_berkeley,4801 -0,1,144,poor,50,1045,frontage_rd,982.3 -3,2,997,good,11,4749,north_berkeley,4749 -0,1,430,great,8,2248,east_elmwood,2248 -0,1,443,poor,20,1602,frontage_rd,1602 -2,1,785,great,7,3832,east_elmwood,3832 -1,1,426,great,9,2344,east_elmwood,2344 -1,1,312,good,13,1645,north_berkeley,1645 -0,1,116,great,7,1929,east_elmwood,1929 -2,1,949,good,0,3513,north_berkeley,3513 -1,1,501,great,8,2390,east_elmwood,2390 -1,1,564,great,3,2538,east_elmwood,2538 -0,1,333,poor,26,1457,frontage_rd,1439.516 -3,2,879,great,9,5169,east_elmwood,5169 -2,1,817,good,4,3379,north_berkeley,3379 -3,2,1123,poor,49,4513,frontage_rd,4251.246 -1,1,"640,56",good,0,2184,north_berkeley,2184 -1,1,707,good,1,2168,north_berkeley,2168 -1,1,347,poor,63,1174,frontage_rd,1073.036 -0,1,245,poor,63,1060,frontage_rd,968.84 -3,2,1116,good,13,4808,north_berkeley,4808 -3,2,1083,great,9,5304,east_elmwood,5304 -3,2,911,great,0,5285,east_elmwood,5285 -1,1,724,good,12,2097,north_berkeley,2097 -1,1,627,good,8,2084,north_berkeley,2084 -0,1,259,great,10,2095,east_elmwood,2095 -0,1,216,poor,50,1079,frontage_rd,1014.26 -0,1,490,good,12,1732,north_berkeley,1732 -3,2,931,good,14,4617,north_berkeley,4617 -0,1,174,poor,43,1129,frontage_rd,1077.066 -3,2,956,great,10,5204,east_elmwood,5204 -2,1,937,poor,61,2962,frontage_rd,2719.116 -3,2,1220,great,11,5459,east_elmwood,5459 -0,1,240,good,12,1514,north_berkeley,1514 -3,2,964,great,2,5270,east_elmwood,5270 -1,1,589,poor,33,1770,frontage_rd,1723.98 -0,1,217,good,14,1447,north_berkeley,1447 -2,1,520,great,4,3540,east_elmwood,3540 -0,1,220,poor,49,1098,frontage_rd,1034.316 -3,2,1176,poor,19,4849,frontage_rd,4849 -3,2,1008,good,9,4794,north_berkeley,4794 -3,2,1202,good,11,4950,north_berkeley,4950 -3,2,1117,poor,57,4387,frontage_rd,4062.362 -3,2,1140,good,0,4996,north_berkeley,4996 -2,1,734,poor,35,2980,frontage_rd,2890.6 -0,1,419,good,6,1758,north_berkeley,1758 -3,2,1117,good,13,4796,north_berkeley,4796 -1,1,516,great,13,2352,east_elmwood,2352 -2,1,555,poor,51,2632,frontage_rd,2468.816 -1,1,407,good,12,1797,north_berkeley,1797 -0,1,356,good,5,1671,north_berkeley,1671 -2,1,860,good,1,3434,north_berkeley,3434 -1,1,637,great,5,2582,east_elmwood,2582 -3,2,995,poor,39,4491,frontage_rd,4320.342 -1,1,623,poor,38,1766,frontage_rd,1702.424 -3,2,968,good,9,4699,north_berkeley,4699 -3,2,1182,great,8,5404,east_elmwood,5404 -0,1,147,poor,47,1084,frontage_rd,1025.464 -0,1,534,good,0,1963,north_berkeley,1963 -3,2,871,good,11,4648,north_berkeley,4648 -1,1,348,great,3,2363,east_elmwood,2363 -1,1,427,poor,17,1712,frontage_rd,1712 -0,1,140,good,3,1488,north_berkeley,1488 -2,1,898,great,10,3922,east_elmwood,3922 -0,1,532,poor,36,1523,frontage_rd,1474.264 -0,1,432,poor,19,1628,frontage_rd,1628 -3,2,1160,great,10,5360,east_elmwood,5360 -2,1,775,poor,43,2926,frontage_rd,2791.404 -0,1,178,great,14,1898,east_elmwood,1898 -1,1,395,good,1,1858,north_berkeley,1858 -1,1,304,good,0,1777,north_berkeley,1777 -0,1,438,great,12,2258,east_elmwood,2258 -3,2,1037,good,5,4849,north_berkeley,4849 -0,1,246,good,7,1528,north_berkeley,1528 -3,2,948,good,4,4752,north_berkeley,4752 -1,1,505,good,10,1907,north_berkeley,1907 -1,1,562,good,10,1917,north_berkeley,1917 -0,1,248,poor,29,1365,frontage_rd,1340.43 -1,1,737,great,12,2573,east_elmwood,2573 -3,2,1100,good,2,4926,north_berkeley,4926 -3,2,804,good,6,4591,north_berkeley,4591 -0,1,104,great,11,1867,east_elmwood,1867 -1,1,444,good,14,1801,north_berkeley,1801 -0,1,186,poor,30,1324,frontage_rd,1297.52 -0,1,383,great,4,2202,east_elmwood,2202 -2,1,620,good,8,3107,north_berkeley,3107 -3,2,1098,good,14,4856,north_berkeley,4856 -1,1,718,poor,39,1778,frontage_rd,1710.436 -2,1,866,good,3,3418,north_berkeley,3418 -1,1,404,poor,53,1348,frontage_rd,1259.032 -3,2,1022,good,8,4829,north_berkeley,4829 -1,1,508,good,5,1971,north_berkeley,1971 -2,1,854,great,5,3862,east_elmwood,3862 -0,1,542,good,13,1791,north_berkeley,1791 -3,2,1240,great,10,5503,east_elmwood,5503 -0,1,128,good,6,1432,north_berkeley,1432 -0,1,365,good,2,1702,north_berkeley,1702 -0,1,156,poor,62,887,frontage_rd,812.492 -2,1,933,good,4,3449,north_berkeley,3449 -0,1,404,poor,61,1163,frontage_rd,1067.634 -2,1,822,good,8,3309,north_berkeley,3309 -2,1,744,great,13,3692,east_elmwood,3692 -1,1,507,good,12,1846,north_berkeley,1846 -1,1,505,great,11,2349,east_elmwood,2349 -2,1,946,poor,34,3251,frontage_rd,3159.972 -0,1,146,good,14,1387,north_berkeley,1387 -3,2,808,good,8,4612,north_berkeley,4612 -3,2,936,good,13,4692,north_berkeley,4692 -2,1,569,poor,19,2949,frontage_rd,2949 -2,1,508,great,2,3599,east_elmwood,3599 -3,2,1041,good,7,4801,north_berkeley,4801 -3,2,840,poor,50,4227,frontage_rd,3973.38 -3,2,1246,good,12,5011,north_berkeley,5011 -1,1,481,great,6,2418,east_elmwood,2418 -0,1,503,good,7,1849,north_berkeley,1849 -1,1,340,great,3,2307,east_elmwood,2307 -0,1,397,good,9,1703,north_berkeley,1703 -3,2,808,poor,45,4177,frontage_rd,3968.15 -2,1,794,poor,43,2992,frontage_rd,2854.368 -3,2,1209,good,10,4916,north_berkeley,4916 -1,1,358,poor,37,1471,frontage_rd,1420.986 -1,1,552,great,4,2485,east_elmwood,2485 -3,2,904,good,14,4576,north_berkeley,4576 -2,1,539,poor,27,2887,frontage_rd,2846.582 -2,1,761,good,3,3283,north_berkeley,3283 -1,1,639,great,8,2576,east_elmwood,2576 -0,1,191,poor,23,1375,frontage_rd,1366.75 -1,1,676,great,5,2620,east_elmwood,2620 -2,1,735,poor,31,2999,frontage_rd,2933.022 -3,2,941,good,9,4745,north_berkeley,4745 -3,2,1212,poor,21,4849,frontage_rd,4839.302 -2,1,556,poor,52,2627,frontage_rd,2458.872 -1,1,574,good,8,2008,north_berkeley,2008 -0,1,209,great,2,2042,east_elmwood,2042 -2,1,753,poor,16,3203,frontage_rd,3203 -0,1,276,poor,62,1069,frontage_rd,979.204 -3,2,1234,good,10,5025,north_berkeley,5025 -0,1,505,poor,24,1683,frontage_rd,1669.536 -1,1,661,great,7,2547,east_elmwood,2547 -3,2,885,great,8,5184,east_elmwood,5184 -2,1,865,great,9,3849,east_elmwood,3849 -0,1,150,great,14,1916,east_elmwood,1916 -3,2,838,poor,56,4139,frontage_rd,3840.992 -0,1,288,poor,22,1517,frontage_rd,1510.932 -2,1,868,good,7,3436,north_berkeley,3436 -3,2,1178,good,5,4932,north_berkeley,4932 -3,2,915,great,7,5205,east_elmwood,5205 -1,1,355,poor,64,1263,frontage_rd,1151.856 -0,1,478,good,10,1775,north_berkeley,1775 -0,1,352,great,11,2174,east_elmwood,2174 -0,1,281,good,10,1567,north_berkeley,1567 -0,1,178,poor,22,1322,frontage_rd,1316.712 -3,2,1124,poor,34,4603,frontage_rd,4474.116 -2,1,858,good,12,3307,north_berkeley,3307 -0,1,143,poor,21,1310,frontage_rd,1307.38 -3,2,1042,good,3,4836,north_berkeley,4836 -1,1,383,good,10,1740,north_berkeley,1740 -1,1,324,poor,50,1324,frontage_rd,1244.56 -2,1,508,great,2,3559,east_elmwood,3559 -1,1,398,good,6,1868,north_berkeley,1868 -1,1,408,good,6,1810,north_berkeley,1810 -2,1,813,poor,34,3057,frontage_rd,2971.404 -0,1,380,good,11,1627,north_berkeley,1627 -2,1,747,poor,16,3219,frontage_rd,3219 -2,1,791,poor,24,3175,frontage_rd,3149.6 -0,1,185,good,0,1578,north_berkeley,1578 -3,2,963,good,9,4736,north_berkeley,4736 -1,1,327,poor,49,1379,frontage_rd,1299.018 -0,1,125,great,10,1930,east_elmwood,1930 -3,2,1140,great,2,5507,east_elmwood,5507 -2,1,654,good,2,3270,north_berkeley,3270 -0,1,405,good,3,1754,north_berkeley,1754 -3,2,1206,good,7,4996,north_berkeley,4996 -0,1,261,good,7,1639,north_berkeley,1639 -2,1,772,poor,16,3224,frontage_rd,3224 -0,1,379,good,11,1649,north_berkeley,1649 -2,1,732,great,6,3730,east_elmwood,3730 -3,2,1187,good,9,4964,north_berkeley,4964 -2,1,712,great,14,3707,east_elmwood,3707 -1,1,662,poor,31,1849,frontage_rd,1808.322 -0,1,339,poor,30,1452,frontage_rd,1422.96 -2,1,880,poor,49,2957,frontage_rd,2785.494 -1,1,359,poor,44,1392,frontage_rd,1325.184 -0,1,163,poor,52,1037,frontage_rd,970.632 -0,1,365,poor,22,1525,frontage_rd,1518.9 -3,2,1091,good,13,4804,north_berkeley,4804 -1,1,697,poor,62,1599,frontage_rd,1464.684 -3,2,1092,poor,45,4470,frontage_rd,4246.5 -1,1,512,poor,54,1493,frontage_rd,1391.476 -2,1,710,poor,40,2891,frontage_rd,2775.36 -2,1,655,poor,62,2629,frontage_rd,2408.164 -3,2,807,good,12,4572,north_berkeley,4572 -0,1,147,good,4,1483,north_berkeley,1483 -2,1,797,good,9,3296,north_berkeley,3296 -0,1,442,good,10,1717,north_berkeley,1717 -0,1,325,great,11,2079,east_elmwood,2079 -2,1,704,great,6,3707,east_elmwood,3707 -0,1,495,great,13,2226,east_elmwood,2226 -3,2,957,good,11,4708,north_berkeley,4708 -2,1,"944,98",good,0,3581,north_berkeley,3581 -1,1,698,good,10,2132,north_berkeley,2132 -3,2,897,good,3,4677,north_berkeley,4677 -1,1,643,poor,23,1942,frontage_rd,1930.348 -2,1,757,good,9,3271,north_berkeley,3271 -3,2,1189,poor,26,4740,frontage_rd,4683.12 -3,2,932,good,9,4738,north_berkeley,4738 -3,2,948,good,9,4670,north_berkeley,4670 -1,1,488,poor,61,1377,frontage_rd,1264.086 -3,2,1185,poor,41,4578,frontage_rd,4385.724 -3,2,824,good,7,4646,north_berkeley,4646 -1,1,472,good,12,1896,north_berkeley,1896 -0,1,479,good,10,1770,north_berkeley,1770 -0,1,490,poor,17,1733,frontage_rd,1733 -1,1,457,great,6,2441,east_elmwood,2441 -1,1,520,good,7,1926,north_berkeley,1926 -1,1,322,great,5,2296,east_elmwood,2296 -0,1,482,poor,54,1295,frontage_rd,1206.94 -2,1,740,poor,40,2931,frontage_rd,2813.76 -2,1,760,good,0,3367,north_berkeley,3367 -2,1,758,poor,24,3120,frontage_rd,3095.04 -2,1,919,great,14,3917,east_elmwood,3917 -3,2,1011,good,4,4853,north_berkeley,4853 -1,1,665,great,6,2638,east_elmwood,2638 -3,2,1194,great,10,5429,east_elmwood,5429 -1,1,714,great,14,2526,east_elmwood,2526 -2,1,922,poor,29,3251,frontage_rd,3192.482 -1,1,649,great,9,2514,east_elmwood,2514 -1,1,380,poor,25,1651,frontage_rd,1634.49 -2,1,875,good,11,3322,north_berkeley,3322 -3,2,883,good,6,4685,north_berkeley,4685 -2,1,531,great,4,3634,east_elmwood,3634 -2,1,712,good,1,3282,north_berkeley,3282 -2,1,841,good,8,3369,north_berkeley,3369 -1,1,433,poor,60,1354,frontage_rd,1245.68 -2,1,929,poor,62,2956,frontage_rd,2707.696 -0,1,116,poor,50,998,frontage_rd,938.12 -1,1,325,good,13,1651,north_berkeley,1651 -3,2,1218,poor,60,4505,frontage_rd,4144.6 -1,1,335,poor,17,1645,frontage_rd,1645 -1,1,653,poor,28,1899,frontage_rd,1868.616 -3,2,1102,great,8,5390,east_elmwood,5390 -2,1,921,great,11,3888,east_elmwood,3888 -3,2,1207,poor,24,4809,frontage_rd,4770.528 -3,2,1054,good,9,4852,north_berkeley,4852 -3,2,1159,great,10,5383,east_elmwood,5383 -0,1,192,poor,41,1135,frontage_rd,1087.33 -0,1,469,good,7,1755,north_berkeley,1755 -2,1,687,good,12,3166,north_berkeley,3166 -3,2,829,good,13,4590,north_berkeley,4590 -0,1,445,good,12,1724,north_berkeley,1724 -0,1,226,poor,46,1202,frontage_rd,1139.496 -1,1,466,poor,30,1672,frontage_rd,1638.56 -3,2,1131,good,0,4950,north_berkeley,4950 -1,1,465,poor,15,1787,frontage_rd,1787 -0,1,454,great,13,2206,east_elmwood,2206 -2,1,545,good,3,3150,north_berkeley,3150 -1,1,696,good,11,2113,north_berkeley,2113 -3,2,1064,poor,27,4634,frontage_rd,4569.124 -0,1,193,poor,62,942,frontage_rd,862.872 -0,1,410,poor,46,1389,frontage_rd,1316.772 -3,2,1174,good,9,4919,north_berkeley,4919 -0,1,382,great,7,2204,east_elmwood,2204 -3,2,1002,good,4,4860,north_berkeley,4860 -0,1,542,great,3,2377,east_elmwood,2377 -1,1,621,great,0,2577,east_elmwood,2577 -3,2,1159,good,3,5002,north_berkeley,5002 -2,1,741,poor,16,3158,frontage_rd,3158 -3,2,1101,great,13,5282,east_elmwood,5282 -3,2,857,great,3,5208,east_elmwood,5208 -1,1,336,great,1,2351,east_elmwood,2351 -1,1,429,poor,38,1516,frontage_rd,1461.424 -3,2,1192,great,12,5421,east_elmwood,5421 -2,1,532,poor,20,2909,frontage_rd,2909 -0,1,399,good,5,1784,north_berkeley,1784 -1,1,652,good,12,2020,north_berkeley,2020 -2,1,879,great,2,3912,east_elmwood,3912 -3,2,833,poor,30,4378,frontage_rd,4290.44 -0,1,119,good,4,1485,north_berkeley,1485 -1,1,670,good,3,2179,north_berkeley,2179 -2,1,837,good,11,3301,north_berkeley,3301 -3,2,1094,poor,36,4542,frontage_rd,4396.656 -3,2,1132,good,6,4961,north_berkeley,4961 -3,2,1089,great,8,5328,east_elmwood,5328 -1,1,483,poor,31,1663,frontage_rd,1626.414 -0,1,456,poor,38,1525,frontage_rd,1470.1 -1,1,550,good,14,1952,north_berkeley,1952 -0,1,144,good,0,1580,north_berkeley,1580 -1,1,543,poor,63,1372,frontage_rd,1254.008 -2,1,733,good,6,3281,north_berkeley,3281 -2,1,773,good,8,3293,north_berkeley,3293 -0,1,508,good,1,1928,north_berkeley,1928 -1,1,685,poor,25,1965,frontage_rd,1945.35 -0,1,402,poor,46,1324,frontage_rd,1255.152 -2,1,571,good,12,3054,north_berkeley,3054 -1,1,336,poor,24,1580,frontage_rd,1567.36 -0,1,406,good,12,1668,north_berkeley,1668 -0,1,499,good,4,1816,north_berkeley,1816 -0,1,313,poor,40,1339,frontage_rd,1285.44 -1,1,681,good,4,2109,north_berkeley,2109 -2,1,721,poor,59,2752,frontage_rd,2537.344 -1,1,305,poor,48,1311,frontage_rd,1237.584 -3,2,923,poor,38,4418,frontage_rd,4258.952 -1,1,379,poor,34,1524,frontage_rd,1481.328 -0,1,421,great,3,2331,east_elmwood,2331 -2,1,594,poor,34,2841,frontage_rd,2761.452 -1,1,658,great,12,2491,east_elmwood,2491 -3,2,978,good,5,4820,north_berkeley,4820 -0,1,247,poor,36,1301,frontage_rd,1259.368 -2,1,812,great,12,3836,east_elmwood,3836 -1,1,326,good,9,1782,north_berkeley,1782 -1,1,661,poor,16,2004,frontage_rd,2004 -0,1,415,good,8,1753,north_berkeley,1753 -0,1,371,poor,63,1101,frontage_rd,1006.314 -3,2,1165,poor,33,4733,frontage_rd,4609.942 -3,2,915,good,1,4740,north_berkeley,4740 -0,1,270,good,8,1598,north_berkeley,1598 -0,1,239,good,7,1574,north_berkeley,1574 -3,2,1183,great,3,5474,east_elmwood,5474 -1,1,332,great,0,2302,east_elmwood,2302 -3,2,853,great,0,5187,east_elmwood,5187 -2,1,852,poor,54,2953,frontage_rd,2752.196 -2,1,764,poor,64,2751,frontage_rd,2508.912 -1,1,338,good,0,1826,north_berkeley,1826 -2,1,905,poor,46,3031,frontage_rd,2873.388 -2,1,711,good,13,3177,north_berkeley,3177 -1,1,580,poor,42,1630,frontage_rd,1558.28 -3,2,1111,poor,62,4325,frontage_rd,3961.7 -2,1,557,good,2,3181,north_berkeley,3181 -3,2,1192,poor,24,4759,frontage_rd,4720.928 -1,1,548,great,6,2463,east_elmwood,2463 -1,1,369,great,11,2278,east_elmwood,2278 -1,1,742,good,8,2120,north_berkeley,2120 -0,1,291,good,12,1546,north_berkeley,1546 -3,2,1096,great,1,5464,east_elmwood,5464 -2,1,936,poor,61,2950,frontage_rd,2708.1 -0,1,181,great,11,1964,east_elmwood,1964 -0,1,490,good,11,1760,north_berkeley,1760 -1,1,516,poor,44,1622,frontage_rd,1544.144 -1,1,344,poor,33,1508,frontage_rd,1468.792 -2,1,821,good,1,3408,north_berkeley,3408 -2,1,702,great,8,3695,east_elmwood,3695 -3,2,882,poor,49,4218,frontage_rd,3973.356 -2,1,945,poor,21,3367,frontage_rd,3360.266 -3,2,999,good,5,4830,north_berkeley,4830 -1,1,395,poor,30,1578,frontage_rd,1546.44 -0,1,527,great,8,2387,east_elmwood,2387 -3,2,1006,poor,48,4345,frontage_rd,4101.68 -1,1,696,good,10,2103,north_berkeley,2103 -0,1,309,good,0,1697,north_berkeley,1697 -3,2,923,good,1,4774,north_berkeley,4774 -3,2,879,good,7,4674,north_berkeley,4674 -0,1,227,good,7,1593,north_berkeley,1593 -0,1,150,poor,49,1053,frontage_rd,991.926 -2,1,912,good,6,3473,north_berkeley,3473 -1,1,353,poor,54,1293,frontage_rd,1205.076 -0,1,235,poor,52,1160,frontage_rd,1085.76 -2,1,584,poor,26,2922,frontage_rd,2886.936 -0,1,505,good,9,1827,north_berkeley,1827 -1,1,489,great,2,2514,east_elmwood,2514 -1,1,694,poor,28,1931,frontage_rd,1900.104 -2,1,638,good,1,3237,north_berkeley,3237 -0,1,285,good,0,1664,north_berkeley,1664 -3,2,1041,good,8,4829,north_berkeley,4829 -1,1,622,great,6,2604,east_elmwood,2604 -0,1,290,poor,20,1527,frontage_rd,1527 -2,1,823,good,3,3385,north_berkeley,3385 -0,1,330,poor,19,1522,frontage_rd,1522 -2,1,830,great,11,3857,east_elmwood,3857 -2,1,774,good,7,3322,north_berkeley,3322 -1,1,628,poor,52,1644,frontage_rd,1538.784 -3,2,1030,great,7,5351,east_elmwood,5351 -0,1,378,great,10,2200,east_elmwood,2200 -0,1,289,poor,44,1250,frontage_rd,1190 -1,1,519,good,4,1993,north_berkeley,1993 -0,1,144,good,11,1468,north_berkeley,1468 -2,1,585,poor,36,2806,frontage_rd,2716.208 -2,1,500,good,5,3001,north_berkeley,3001 -2,1,850,poor,24,3255,frontage_rd,3228.96 -3,2,970,good,13,4645,north_berkeley,4645 -2,1,728,great,9,3688,east_elmwood,3688 -3,2,1031,great,13,5242,east_elmwood,5242 -0,1,526,great,2,2393,east_elmwood,2393 -0,1,505,good,4,1882,north_berkeley,1882 -0,1,205,good,0,1579,north_berkeley,1579 -0,1,508,poor,49,1393,frontage_rd,1312.206 -1,1,471,good,13,1876,north_berkeley,1876 -1,1,359,poor,24,1628,frontage_rd,1614.976 -1,1,739,poor,64,1617,frontage_rd,1474.704 -3,2,1164,good,6,4945,north_berkeley,4945 -1,1,509,great,13,2402,east_elmwood,2402 -2,1,804,good,13,3227,north_berkeley,3227 -0,1,502,good,11,1748,north_berkeley,1748 -3,2,1152,poor,52,4524,frontage_rd,4234.464 -2,1,934,great,5,3947,east_elmwood,3947 -3,2,823,great,4,5149,east_elmwood,5149 -2,1,556,great,4,3646,east_elmwood,3646 -0,1,196,poor,41,1226,frontage_rd,1174.508 -2,1,665,poor,26,3048,frontage_rd,3011.424 -1,1,458,good,11,1826,north_berkeley,1826 -1,1,664,good,10,2024,north_berkeley,2024 -3,2,813,poor,26,4417,frontage_rd,4363.996 -3,2,1105,great,5,5374,east_elmwood,5374 -2,1,732,good,13,3152,north_berkeley,3152 -1,1,364,poor,49,1327,frontage_rd,1250.034 -1,1,498,good,2,1949,north_berkeley,1949 -1,1,731,good,13,2137,north_berkeley,2137 -3,2,1004,good,1,4870,north_berkeley,4870 -0,1,417,great,9,2265,east_elmwood,2265 -2,1,771,good,9,3328,north_berkeley,3328 -2,1,503,poor,32,2763,frontage_rd,2696.688 -3,2,1080,great,0,5459,east_elmwood,5459 -1,1,458,great,3,2384,east_elmwood,2384 -3,2,1240,poor,27,4817,frontage_rd,4749.562 -1,1,703,good,3,2174,north_berkeley,2174 -3,2,1100,good,10,4819,north_berkeley,4819 -3,2,1233,good,1,5037,north_berkeley,5037 -2,1,748,poor,25,3113,frontage_rd,3081.87 -3,2,954,poor,22,4632,frontage_rd,4613.472 -1,1,512,poor,49,1552,frontage_rd,1461.984 -0,1,141,great,9,1996,east_elmwood,1996 -2,1,888,great,1,3970,east_elmwood,3970 -2,1,944,poor,54,3019,frontage_rd,2813.708 -3,2,853,poor,50,4219,frontage_rd,3965.86 -2,1,686,good,12,3201,north_berkeley,3201 -2,1,763,poor,48,2850,frontage_rd,2690.4 -3,2,1107,poor,43,4567,frontage_rd,4356.918 -1,1,615,poor,31,1757,frontage_rd,1718.346 -3,2,1132,good,4,4938,north_berkeley,4938 -3,2,1038,poor,26,4655,frontage_rd,4599.14 -1,1,701,great,11,2570,east_elmwood,2570 -2,1,510,good,6,3053,north_berkeley,3053 -3,2,951,poor,57,4229,frontage_rd,3916.054 -2,1,764,poor,51,2878,frontage_rd,2699.564 -3,2,1148,good,0,5024,north_berkeley,5024 -2,1,827,poor,50,2966,frontage_rd,2788.04 -1,1,664,poor,52,1613,frontage_rd,1509.768 -3,2,1061,good,11,4811,north_berkeley,4811 -0,1,449,good,7,1773,north_berkeley,1773 -0,1,284,good,2,1695,north_berkeley,1695 -3,2,1191,good,7,5020,north_berkeley,5020 -3,2,1039,good,13,4765,north_berkeley,4765 -1,1,365,good,2,1828,north_berkeley,1828 -2,1,781,good,11,3299,north_berkeley,3299 -3,2,889,great,3,5256,east_elmwood,5256 -2,1,827,good,9,3305,north_berkeley,3305 -1,1,414,great,6,2379,east_elmwood,2379 -1,1,454,good,12,1793,north_berkeley,1793 -0,1,201,good,5,1594,north_berkeley,1594 -0,1,320,poor,64,1085,frontage_rd,989.52 -2,1,946,poor,56,3027,frontage_rd,2809.056 -3,2,1076,great,13,5332,east_elmwood,5332 -1,1,643,good,14,2034,north_berkeley,2034 -2,1,791,poor,62,2738,frontage_rd,2508.008 -1,1,580,great,4,2571,east_elmwood,2571 -0,1,104,great,0,2043,east_elmwood,2043 -1,1,584,good,12,1916,north_berkeley,1916 -3,2,1006,good,4,4784,north_berkeley,4784 -1,1,516,good,13,1915,north_berkeley,1915 -0,1,447,poor,15,1730,frontage_rd,1730 -1,1,722,good,8,2169,north_berkeley,2169 -3,2,811,poor,34,4343,frontage_rd,4221.396 -2,1,560,great,12,3517,east_elmwood,3517 -0,1,397,good,13,1694,north_berkeley,1694 -3,2,1061,good,2,4911,north_berkeley,4911 -3,2,888,good,10,4679,north_berkeley,4679 -1,1,358,good,0,1828,north_berkeley,1828 -3,2,982,great,12,5245,east_elmwood,5245 -2,1,864,good,14,3307,north_berkeley,3307 -0,1,188,poor,25,1364,frontage_rd,1350.36 -1,1,523,good,6,1942,north_berkeley,1942 -3,2,865,great,0,5248,east_elmwood,5248 -0,1,353,good,5,1722,north_berkeley,1722 -0,1,407,good,7,1753,north_berkeley,1753 -1,1,306,great,4,2226,east_elmwood,2226 -0,1,388,poor,60,1152,frontage_rd,1059.84 -0,1,110,poor,22,1319,frontage_rd,1313.724 -1,1,553,poor,35,1689,frontage_rd,1638.33 -1,1,339,poor,15,1698,frontage_rd,1698 -3,2,897,poor,17,4543,frontage_rd,4543 -2,1,574,good,9,3036,north_berkeley,3036 -0,1,449,good,10,1731,north_berkeley,1731 -0,1,441,good,5,1799,north_berkeley,1799 -0,1,502,great,4,2330,east_elmwood,2330 -2,1,757,poor,44,2956,frontage_rd,2814.112 -3,2,962,good,3,4830,north_berkeley,4830 -1,1,649,great,4,2612,east_elmwood,2612 -0,1,256,good,11,1577,north_berkeley,1577 -1,1,747,great,12,2654,east_elmwood,2654 -3,2,833,great,10,5104,east_elmwood,5104 -1,1,687,good,12,2060,north_berkeley,2060 -2,1,552,good,8,3065,north_berkeley,3065 -3,2,1241,great,2,5569,east_elmwood,5569 -0,1,459,poor,61,1298,frontage_rd,1191.564 -3,2,873,poor,26,4444,frontage_rd,4390.672 -0,1,152,poor,22,1356,frontage_rd,1350.576 -3,2,922,good,7,4670,north_berkeley,4670 -1,1,720,good,3,2198,north_berkeley,2198 -2,1,931,good,2,3497,north_berkeley,3497 -0,1,524,good,6,1893,north_berkeley,1893 -2,1,555,poor,54,2575,frontage_rd,2399.9 -0,1,159,good,7,1488,north_berkeley,1488 -1,1,380,good,8,1768,north_berkeley,1768 -1,1,685,good,0,2155,north_berkeley,2155 -2,1,848,good,11,3354,north_berkeley,3354 -1,1,547,poor,27,1751,frontage_rd,1726.486 -2,1,896,good,12,3363,north_berkeley,3363 -0,1,269,good,6,1560,north_berkeley,1560 -3,2,903,good,0,4764,north_berkeley,4764 -2,1,615,great,4,3633,east_elmwood,3633 -2,1,881,poor,35,3161,frontage_rd,3066.17 -0,1,331,good,13,1583,north_berkeley,1583 -0,1,498,poor,64,1238,frontage_rd,1129.056 -0,1,308,poor,28,1387,frontage_rd,1364.808 -0,1,103,great,3,1973,east_elmwood,1973 -3,2,1044,poor,52,4405,frontage_rd,4123.08 -2,1,788,poor,33,3010,frontage_rd,2931.74 -0,1,524,poor,33,1621,northwest,1578.854 -1,1,397,poor,25,1696,northwest,1679.04 -2,1,876,poor,27,3215,northwest,3169.99 -2,1,751,poor,64,2677,northwest,2441.424 -2,1,622,poor,32,2886,northwest,2816.736 -0,1,316,good,10,1596,west_welmwood,1596 -3,2,1171,poor,54,4473,northwest,4168.836 -3,2,1108,good,5,4875,west_welmwood,4875 -2,1,541,good,0,3103,west_welmwood,3103 -3,2,865,good,12,4643,west_welmwood,4643 -3,2,1165,great,3,5491,east_elmwood,5491 -2,1,728,good,2,3280,west_welmwood,3280 -0,1,298,great,13,2071,east_elmwood,2071 -3,2,923,good,12,4683,west_welmwood,4683 -0,1,274,great,5,2130,east_elmwood,2130 -0,1,349,poor,32,1444,northwest,1409.344 -2,1,796,great,4,3896,east_elmwood,3896 -1,1,726,poor,28,1898,northwest,1867.632 -2,1,501,great,5,3542,east_elmwood,3542 -2,1,714,good,4,3284,west_welmwood,3284 -3,2,988,great,4,5289,east_elmwood,5289 -2,1,546,poor,34,2757,northwest,2679.804 -0,1,484,poor,20,1678,northwest,1678 -3,2,1230,good,5,5059,west_welmwood,5059 -0,1,142,good,5,1492,west_welmwood,1492 -2,1,774,poor,53,2872,northwest,2682.448 -2,1,902,good,11,3375,west_welmwood,3375 -0,1,457,good,11,1763,west_welmwood,1763 -3,2,1034,good,7,4828,west_welmwood,4828 -3,2,838,poor,25,4412,northwest,4367.88 -1,1,503,good,14,1900,west_welmwood,1900 -0,1,445,good,7,1737,west_welmwood,1737 -2,1,914,poor,34,3189,northwest,3099.708 -2,1,867,great,6,3952,east_elmwood,3952 -2,1,583,good,4,3109,west_welmwood,3109 -0,1,161,great,7,2014,east_elmwood,2014 -1,1,707,good,14,2058,west_welmwood,2058 -1,1,343,great,9,2270,east_elmwood,2270 -2,1,746,great,14,3710,east_elmwood,3710 -2,1,706,good,14,3151,west_welmwood,3151 -1,1,610,good,5,2047,west_welmwood,2047 -3,2,1205,good,12,4969,west_welmwood,4969 -3,2,827,good,7,4655,west_welmwood,4655 -3,2,1055,good,0,4930,west_welmwood,4930 -2,1,625,good,14,3083,west_welmwood,3083 -2,1,630,good,14,3053,west_welmwood,3053 -1,1,654,poor,59,1575,northwest,1452.15 -3,2,1195,poor,17,4831,northwest,4831 -3,2,1227,good,11,4951,west_welmwood,4951 -3,2,1242,good,3,5081,west_welmwood,5081 -2,1,881,good,11,3337,west_welmwood,3337 -1,1,689,poor,35,1880,northwest,1823.6 -2,1,501,poor,35,2704,northwest,2622.88 -1,1,412,poor,21,1751,northwest,1747.498 -3,2,1081,poor,15,4793,northwest,4793 -2,1,725,great,2,3807,east_elmwood,3807 -2,1,768,great,11,3790,east_elmwood,3790 -0,1,152,good,2,1530,west_welmwood,1530 -3,2,1074,good,9,4804,west_welmwood,4804 -1,1,559,good,12,1890,west_welmwood,1890 -2,1,706,great,10,3747,east_elmwood,3747 -2,1,738,poor,23,3095,northwest,3076.43 -2,1,893,great,4,3962,east_elmwood,3962 -1,1,468,great,7,2405,east_elmwood,2405 -2,1,652,poor,22,3017,northwest,3004.932 -1,1,327,good,6,1769,west_welmwood,1769 -2,1,899,great,3,3930,east_elmwood,3930 -3,2,997,good,11,4733,west_welmwood,4733 -3,2,920,poor,20,4602,northwest,4602 -1,1,300,poor,55,1211,northwest,1126.23 -3,2,1174,great,0,5496,east_elmwood,5496 -1,1,679,good,14,2049,west_welmwood,2049 -3,2,887,good,0,4781,west_welmwood,4781 -2,1,856,good,9,3377,west_welmwood,3377 -0,1,216,good,14,1484,west_welmwood,1484 -0,1,476,great,3,2312,east_elmwood,2312 -2,1,592,good,12,3042,west_welmwood,3042 -0,1,325,poor,42,1337,northwest,1278.172 -2,1,790,poor,19,3170,northwest,3170 -1,1,354,good,12,1728,west_welmwood,1728 -1,1,529,good,12,1949,west_welmwood,1949 -1,1,522,great,12,2420,east_elmwood,2420 -0,1,358,good,13,1660,west_welmwood,1660 -0,1,468,poor,47,1447,northwest,1368.862 -0,1,429,great,11,2255,east_elmwood,2255 -3,2,1176,great,0,5535,east_elmwood,5535 -0,1,361,good,12,1656,west_welmwood,1656 -3,2,1214,good,2,5046,west_welmwood,5046 -1,1,738,good,3,2224,west_welmwood,2224 -3,2,852,poor,59,4128,northwest,3806.016 -3,2,1178,good,9,4951,west_welmwood,4951 -3,2,956,good,5,4746,west_welmwood,4746 -1,1,568,good,14,1884,west_welmwood,1884 -1,1,704,great,7,2638,east_elmwood,2638 -1,1,519,poor,64,1386,northwest,1264.032 -1,1,737,good,5,2185,west_welmwood,2185 -1,1,455,poor,47,1510,northwest,1428.46 -0,1,346,poor,61,1118,northwest,1026.324 -0,1,285,good,5,1627,west_welmwood,1627 -3,2,829,good,0,4646,west_welmwood,4646 -2,1,672,good,6,3202,west_welmwood,3202 -1,1,451,good,6,1873,west_welmwood,1873 -1,1,697,good,8,2119,west_welmwood,2119 -2,1,691,poor,34,2913,northwest,2831.436 -1,1,610,poor,49,1598,northwest,1505.316 -0,1,410,great,2,2287,east_elmwood,2287 -2,1,529,great,2,3606,east_elmwood,3606 -0,1,273,good,0,1656,west_welmwood,1656 -0,1,482,poor,55,1299,northwest,1208.07 -0,1,170,good,10,1421,west_welmwood,1421 -1,1,565,good,2,2001,west_welmwood,2001 -1,1,712,good,2,2186,west_welmwood,2186 -0,1,246,poor,16,1465,northwest,1465 -1,1,379,good,12,1725,west_welmwood,1725 -2,1,575,good,3,3095,west_welmwood,3095 -1,1,477,good,4,1900,west_welmwood,1900 -3,2,1081,good,9,4847,west_welmwood,4847 -2,1,908,good,13,3348,west_welmwood,3348 -3,2,1119,poor,47,4530,northwest,4285.38 -3,2,943,good,13,4693,west_welmwood,4693 -2,1,763,good,2,3339,west_welmwood,3339 -3,2,1192,poor,39,4630,northwest,4454.06 -2,1,600,great,14,3547,east_elmwood,3547 -0,1,361,poor,60,1186,northwest,1091.12 -2,1,847,poor,29,3160,northwest,3103.12 -0,1,206,poor,42,1215,northwest,1161.54 -1,1,300,great,12,2133,east_elmwood,2133 -2,1,927,good,4,3440,west_welmwood,3440 -0,1,524,poor,15,1726,northwest,1726 -1,1,469,poor,38,1637,northwest,1578.068 -0,1,193,poor,40,1165,northwest,1118.4 -2,1,627,poor,29,2924,northwest,2871.368 -2,1,603,good,6,3180,west_welmwood,3180 -0,1,424,poor,21,1639,northwest,1635.722 -2,1,785,poor,28,3149,northwest,3098.616 -3,2,1020,poor,15,4676,northwest,4676 -2,1,526,great,3,3602,east_elmwood,3602 -2,1,555,good,11,3089,west_welmwood,3089 -2,1,730,great,7,3781,east_elmwood,3781 -3,2,1228,good,10,4955,west_welmwood,4955 -0,1,437,poor,30,1585,northwest,1553.3 -1,1,511,poor,31,1746,northwest,1707.588 -3,2,1125,great,7,5430,east_elmwood,5430 -2,1,838,poor,16,3259,northwest,3259 -2,1,736,great,4,3837,east_elmwood,3837 -2,1,591,great,3,3670,east_elmwood,3670 -0,1,537,good,9,1858,west_welmwood,1858 -2,1,690,good,3,3216,west_welmwood,3216 -0,1,475,poor,36,1486,northwest,1438.448 -0,1,245,great,9,2039,east_elmwood,2039 -2,1,568,good,0,3137,west_welmwood,3137 -1,1,466,poor,23,1764,northwest,1753.416 -0,1,489,great,12,2311,east_elmwood,2311 -1,1,727,good,4,2217,west_welmwood,2217 -2,1,541,good,0,3148,west_welmwood,3148 -2,1,531,good,10,3053,west_welmwood,3053 -0,1,548,good,3,1931,west_welmwood,1931 -1,1,548,good,8,1975,west_welmwood,1975 -1,1,394,good,6,1800,west_welmwood,1800 -3,2,1231,good,12,4988,west_welmwood,4988 -2,1,532,great,3,3614,east_elmwood,3614 -0,1,256,good,4,1582,west_welmwood,1582 -2,1,613,good,1,3202,west_welmwood,3202 -3,2,1186,good,13,4928,west_welmwood,4928 -1,1,432,good,1,1910,west_welmwood,1910 -1,1,351,good,7,1804,west_welmwood,1804 -1,1,300,poor,27,1528,northwest,1506.608 -0,1,325,poor,51,1220,northwest,1144.36 -3,2,1101,great,11,5388,east_elmwood,5388 -3,2,1029,good,0,4831,west_welmwood,4831 -1,1,662,great,14,2517,east_elmwood,2517 -2,1,611,poor,48,2772,northwest,2616.768 -3,2,1155,good,7,4951,west_welmwood,4951 -2,1,683,great,14,3670,east_elmwood,3670 -0,1,361,great,13,2098,east_elmwood,2098 -1,1,589,poor,20,1872,northwest,1872 -2,1,764,poor,40,2915,northwest,2798.4 -1,1,522,good,12,1945,west_welmwood,1945 -0,1,227,poor,52,1120,northwest,1048.32 -2,1,525,good,3,3112,west_welmwood,3112 -2,1,656,good,4,3234,west_welmwood,3234 -1,1,630,poor,61,1505,northwest,1381.59 -3,2,842,great,14,5011,east_elmwood,5011 -0,1,520,poor,33,1552,northwest,1511.648 -3,2,1013,poor,46,4403,northwest,4174.044 -2,1,747,poor,20,3170,northwest,3170 -0,1,217,great,1,2075,east_elmwood,2075 -0,1,134,poor,31,1185,northwest,1158.93 -3,2,813,good,7,4622,west_welmwood,4622 -2,1,722,good,0,3337,west_welmwood,3337 -2,1,710,poor,45,2845,northwest,2702.75 -1,1,377,great,7,2353,east_elmwood,2353 -3,2,1169,poor,57,4441,northwest,4112.366 -3,2,881,good,4,4641,west_welmwood,4641 -0,1,415,great,5,2283,east_elmwood,2283 -1,1,401,poor,17,1705,northwest,1705 -1,1,552,poor,64,1460,northwest,1331.52 -3,2,894,good,9,4639,west_welmwood,4639 -2,1,925,good,8,3414,west_welmwood,3414 -2,1,661,good,10,3135,west_welmwood,3135 -0,1,233,poor,54,1123,northwest,1046.636 -0,1,264,poor,15,1521,northwest,1521 -1,1,445,poor,48,1482,northwest,1399.008 -2,1,849,good,10,3308,west_welmwood,3308 -2,1,852,poor,31,3116,northwest,3047.448 -2,1,901,great,6,3977,east_elmwood,3977 -2,1,934,great,14,3876,east_elmwood,3876 -1,1,691,good,12,2021,west_welmwood,2021 -2,1,722,great,9,3734,east_elmwood,3734 -3,2,1000,good,9,4741,west_welmwood,4741 -2,1,573,great,3,3598,east_elmwood,3598 -1,1,732,poor,42,1858,northwest,1776.248 -0,1,319,great,13,2068,east_elmwood,2068 -0,1,101,poor,53,944,northwest,881.696 -2,1,838,poor,42,3053,northwest,2918.668 -2,1,518,poor,57,2571,northwest,2380.746 -2,1,876,great,6,3893,east_elmwood,3893 -0,1,482,good,13,1741,west_welmwood,1741 -2,1,789,good,7,3342,west_welmwood,3342 -3,2,955,poor,18,4627,northwest,4627 -0,1,102,great,5,1924,east_elmwood,1924 -0,1,373,poor,50,1299,northwest,1221.06 -3,2,1056,poor,25,4679,northwest,4632.21 -0,1,184,great,1,2098,east_elmwood,2098 -0,1,107,good,14,1373,west_welmwood,1373 -0,1,430,good,3,1813,west_welmwood,1813 -0,1,255,poor,26,1382,northwest,1365.416 -3,2,1242,great,12,5514,east_elmwood,5514 -3,2,1124,good,13,4817,west_welmwood,4817 -2,1,833,great,2,3943,east_elmwood,3943 -0,1,417,good,11,1660,west_welmwood,1660 -0,1,254,good,8,1566,west_welmwood,1566 -1,1,481,good,4,1986,west_welmwood,1986 -2,1,611,good,4,3193,west_welmwood,3193 -1,1,639,poor,20,1948,northwest,1948 -0,1,455,good,7,1799,west_welmwood,1799 -2,1,881,poor,54,2903,northwest,2705.596 -1,1,364,great,12,2225,east_elmwood,2225 -0,1,172,good,8,1444,west_welmwood,1444 -1,1,643,great,1,2637,east_elmwood,2637 -2,1,663,good,4,3269,west_welmwood,3269 -2,1,931,good,5,3524,west_welmwood,3524 -3,2,1143,great,2,5488,east_elmwood,5488 -2,1,681,good,13,3185,west_welmwood,3185 -1,1,426,good,11,1857,west_welmwood,1857 -3,2,1070,great,3,5342,east_elmwood,5342 -1,1,388,good,14,1744,west_welmwood,1744 -1,1,722,good,2,2219,west_welmwood,2219 -2,1,621,great,3,3665,east_elmwood,3665 -3,2,1018,poor,45,4452,northwest,4229.4 -3,2,845,poor,39,4284,northwest,4121.208 -0,1,323,poor,24,1529,northwest,1516.768 -1,1,558,good,11,1917,west_welmwood,1917 -2,1,813,great,10,3819,east_elmwood,3819 -1,1,601,poor,36,1771,northwest,1714.328 -1,1,617,good,8,1999,west_welmwood,1999 -2,1,851,good,4,3414,west_welmwood,3414 -2,1,828,good,14,3248,west_welmwood,3248 -3,2,825,good,0,4711,west_welmwood,4711 -1,1,386,good,13,1713,west_welmwood,1713 -3,2,895,good,2,4769,west_welmwood,4769 -1,1,443,good,14,1845,west_welmwood,1845 -3,2,846,poor,56,4124,northwest,3827.072 -3,2,896,great,2,5257,east_elmwood,5257 -3,2,1014,poor,40,4482,northwest,4302.72 -1,1,378,good,8,1807,west_welmwood,1807 -3,2,1126,poor,41,4551,northwest,4359.858 -2,1,929,poor,47,3039,northwest,2874.894 -2,1,739,poor,57,2753,northwest,2549.278 -3,2,1145,good,13,4887,west_welmwood,4887 -1,1,667,poor,54,1581,northwest,1473.492 -2,1,836,good,4,3361,west_welmwood,3361 -2,1,758,good,9,3265,west_welmwood,3265 -3,2,824,good,8,4570,west_welmwood,4570 -1,1,506,good,12,1895,west_welmwood,1895 -1,1,544,poor,62,1429,northwest,1308.964 -0,1,121,good,2,1512,west_welmwood,1512 -1,1,638,poor,34,1759,northwest,1709.748 -0,1,135,good,4,1480,west_welmwood,1480 -1,1,385,good,3,1874,west_welmwood,1874 -2,1,589,great,8,3651,east_elmwood,3651 -0,1,511,good,4,1905,west_welmwood,1905 -2,1,663,poor,63,2589,northwest,2366.346 -1,1,739,poor,56,1677,northwest,1556.256 -1,1,610,poor,39,1673,northwest,1609.426 -3,2,818,great,8,5081,east_elmwood,5081 -3,2,1202,good,5,4988,west_welmwood,4988 -0,1,164,good,7,1496,west_welmwood,1496 -0,1,336,poor,60,1153,northwest,1060.76 -3,2,1243,good,10,4950,west_welmwood,4950 -2,1,563,great,11,3503,east_elmwood,3503 -1,1,425,good,10,1818,west_welmwood,1818 -2,1,849,poor,45,2993,northwest,2843.35 -1,1,701,good,2,2170,west_welmwood,2170 -1,1,689,great,11,2557,east_elmwood,2557 -3,2,958,poor,61,4202,northwest,3857.436 -2,1,732,poor,16,3201,northwest,3201 -1,1,318,poor,49,1368,northwest,1288.656 -1,1,471,good,8,1852,west_welmwood,1852 -0,1,378,great,3,2205,east_elmwood,2205 -1,1,582,poor,30,1800,northwest,1764 -1,1,326,great,12,2239,east_elmwood,2239 -1,1,307,good,0,1856,west_welmwood,1856 -3,2,1206,good,10,4955,west_welmwood,4955 -1,1,547,great,13,2367,east_elmwood,2367 -0,1,215,good,10,1530,west_welmwood,1530 -1,1,454,good,2,1889,west_welmwood,1889 -1,1,367,great,5,2319,east_elmwood,2319 -1,1,373,good,4,1831,west_welmwood,1831 -3,2,921,good,12,4672,west_welmwood,4672 -0,1,230,great,5,2105,east_elmwood,2105 -3,2,1110,poor,47,4482,northwest,4239.972 -3,2,1148,good,9,4891,west_welmwood,4891 -0,1,190,great,14,1922,east_elmwood,1922 -0,1,480,great,7,2324,east_elmwood,2324 -2,1,921,poor,39,3110,northwest,2991.82 -0,1,127,great,9,1948,east_elmwood,1948 -1,1,528,great,5,2450,east_elmwood,2450 -1,1,710,good,5,2193,west_welmwood,2193 -0,1,349,great,12,2117,east_elmwood,2117 -0,1,467,good,13,1692,west_welmwood,1692 -3,2,1137,poor,21,4813,northwest,4803.374 -0,1,394,poor,40,1435,northwest,1377.6 -1,1,659,great,6,2646,east_elmwood,2646 -0,1,410,poor,53,1248,northwest,1165.632 -3,2,1158,poor,54,4464,northwest,4160.448 -0,1,401,poor,52,1312,northwest,1228.032 -1,1,641,poor,46,1680,northwest,1592.64 -1,1,599,great,6,2552,east_elmwood,2552 -1,1,614,good,4,2103,west_welmwood,2103 -0,1,443,good,8,1759,west_welmwood,1759 -0,1,266,poor,44,1196,northwest,1138.592 -1,1,627,good,10,1985,west_welmwood,1985 -0,1,476,poor,30,1530,northwest,1499.4 -1,1,657,good,3,2163,west_welmwood,2163 -2,1,839,poor,35,3073,northwest,2980.81 -3,2,868,poor,28,4447,northwest,4375.848 -1,1,454,good,12,1809,west_welmwood,1809 -0,1,319,poor,63,1088,northwest,994.432 -1,1,537,poor,56,1508,northwest,1399.424 -3,2,1235,great,6,5500,east_elmwood,5500 -0,1,123,good,13,1393,west_welmwood,1393 -0,1,249,great,10,2060,east_elmwood,2060 -2,1,817,great,4,3866,east_elmwood,3866 -2,1,711,poor,43,2882,northwest,2749.428 -1,1,479,good,0,1978,west_welmwood,1978 -0,1,361,poor,27,1540,northwest,1518.44 -3,2,918,poor,45,4329,northwest,4112.55 -2,1,554,good,11,3017,west_welmwood,3017 -1,1,314,poor,23,1543,northwest,1533.742 -2,1,766,good,0,3387,west_welmwood,3387 -3,2,947,good,2,4765,west_welmwood,4765 -0,1,245,great,7,2066,east_elmwood,2066 -1,1,551,good,3,1971,west_welmwood,1971 -3,2,1166,good,3,5008,west_welmwood,5008 -3,2,1064,good,12,4793,west_welmwood,4793 -3,2,1102,poor,26,4711,northwest,4654.468 -0,1,323,good,7,1603,west_welmwood,1603 -3,2,991,great,3,5325,east_elmwood,5325 -0,1,239,poor,18,1500,northwest,1500 -3,2,1037,good,6,4815,west_welmwood,4815 -2,1,949,good,8,3489,west_welmwood,3489 -1,1,442,good,12,1865,west_welmwood,1865 -3,2,901,great,2,5212,east_elmwood,5212 -3,2,898,good,3,4765,west_welmwood,4765 -3,2,1090,poor,59,4368,northwest,4027.296 -1,1,747,great,0,2754,east_elmwood,2754 -2,1,719,great,2,3842,east_elmwood,3842 -0,1,473,good,0,1915,west_welmwood,1915 -1,1,483,good,2,1957,west_welmwood,1957 -2,1,773,good,10,3303,west_welmwood,3303 -3,2,852,good,6,4659,west_welmwood,4659 -1,1,691,great,8,2610,east_elmwood,2610 -2,1,763,good,1,3397,west_welmwood,3397 -2,1,560,poor,46,2736,northwest,2593.728 -2,1,581,great,5,3616,east_elmwood,3616 -0,1,296,good,10,1599,west_welmwood,1599 -2,1,585,great,6,3634,east_elmwood,3634 -1,1,535,good,12,1928,west_welmwood,1928 -1,1,698,poor,35,1846,northwest,1790.62 -2,1,629,poor,50,2695,northwest,2533.3 -0,1,104,poor,61,943,northwest,865.674 -2,1,904,good,14,3386,west_welmwood,3386 -3,2,1100,great,10,5348,east_elmwood,5348 -0,1,212,poor,15,1414,northwest,1414 -1,1,491,poor,38,1577,northwest,1520.228 -2,1,862,great,5,3869,east_elmwood,3869 -3,2,900,poor,59,4155,northwest,3830.91 -3,2,867,good,5,4617,west_welmwood,4617 -1,1,362,great,12,2216,east_elmwood,2216 -1,1,576,great,14,2422,east_elmwood,2422 -0,1,300,great,5,2181,east_elmwood,2181 -3,2,1017,poor,33,4487,northwest,4370.338 -2,1,546,great,1,3672,east_elmwood,3672 -2,1,589,poor,48,2729,northwest,2576.176 -1,1,517,good,5,1948,west_welmwood,1948 -0,1,289,great,13,2066,east_elmwood,2066 -0,1,272,poor,27,1418,northwest,1398.148 -3,2,1222,good,2,5038,west_welmwood,5038 -0,1,410,good,8,1742,west_welmwood,1742 -3,2,1199,good,1,5084,west_welmwood,5084 -1,1,477,good,2,1948,west_welmwood,1948 -2,1,727,poor,61,2708,northwest,2485.944 -2,1,516,good,10,3018,west_welmwood,3018 -1,1,375,poor,45,1440,northwest,1368 -3,2,959,good,1,4800,west_welmwood,4800 -3,2,1031,great,4,5380,east_elmwood,5380 -1,1,465,good,11,1810,west_welmwood,1810 -3,2,1181,good,7,4930,west_welmwood,4930 -0,1,307,poor,63,1120,northwest,1023.68 -3,2,1170,good,11,4880,west_welmwood,4880 -3,2,1093,great,7,5356,east_elmwood,5356 -3,2,1219,good,9,4951,west_welmwood,4951 -2,1,539,poor,42,2709,northwest,2589.804 -1,1,462,great,1,2462,east_elmwood,2462 -3,2,1211,good,5,5032,west_welmwood,5032 -0,1,530,good,13,1837,west_welmwood,1837 -0,1,177,good,5,1496,west_welmwood,1496 -0,1,116,good,14,1424,west_welmwood,1424 -0,1,494,poor,45,1493,northwest,1418.35 -1,1,685,good,4,2173,west_welmwood,2173 -0,1,515,great,7,2389,east_elmwood,2389 -1,1,398,good,1,1900,west_welmwood,1900 -0,1,408,good,0,1839,west_welmwood,1839 -1,1,694,good,0,2165,west_welmwood,2165 -0,1,499,great,5,2324,east_elmwood,2324 -1,1,558,good,6,1974,west_welmwood,1974 -2,1,603,poor,63,2610,northwest,2385.54 -0,1,448,great,9,2260,east_elmwood,2260 -2,1,818,good,3,3409,west_welmwood,3409 -1,1,586,great,4,2535,east_elmwood,2535 -2,1,770,poor,30,3114,northwest,3051.72 -1,1,675,good,13,2053,west_welmwood,2053 -3,2,1024,poor,28,4560,northwest,4487.04 -2,1,532,good,12,3019,west_welmwood,3019 -3,2,816,poor,47,4210,northwest,3982.66 -2,1,838,good,8,3341,west_welmwood,3341 -3,2,1059,great,4,5329,east_elmwood,5329 -1,1,637,good,3,2119,west_welmwood,2119 -0,1,464,good,3,1799,west_welmwood,1799 -2,1,767,good,4,3288,west_welmwood,3288 -2,1,577,great,5,3618,east_elmwood,3618 -1,1,403,poor,28,1619,northwest,1593.096 -2,1,751,good,8,3255,west_welmwood,3255 -0,1,304,great,2,2219,east_elmwood,2219 -2,1,868,poor,41,3033,northwest,2905.614 -0,1,440,great,9,2293,east_elmwood,2293 -2,1,729,great,1,3806,east_elmwood,3806 -2,1,772,good,12,3281,west_welmwood,3281 -1,1,551,poor,18,1840,northwest,1840 -1,1,319,poor,44,1411,northwest,1343.272 -1,1,644,poor,63,1484,northwest,1356.376 -1,1,586,poor,23,1862,northwest,1850.828 -3,2,893,poor,31,4418,northwest,4320.804 -1,1,413,great,3,2343,east_elmwood,2343 -1,1,721,good,14,2031,west_welmwood,2031 -2,1,844,good,7,3358,west_welmwood,3358 -3,2,1247,good,9,4992,west_welmwood,4992 -1,1,686,great,6,2623,east_elmwood,2623 -3,2,975,good,8,4701,west_welmwood,4701 -0,1,487,good,13,1722,west_welmwood,1722 -0,1,115,good,5,1500,west_welmwood,1500 -1,1,555,poor,45,1608,northwest,1527.6 -0,1,194,good,5,1550,west_welmwood,1550 -2,1,683,poor,40,2855,northwest,2740.8 -0,1,498,good,11,1751,west_welmwood,1751 -0,1,540,poor,44,1516,northwest,1443.232 -3,2,1138,good,1,4982,west_welmwood,4982 -1,1,518,great,1,2486,east_elmwood,2486 -2,1,667,poor,37,2933,northwest,2833.278 -2,1,935,good,2,3512,west_welmwood,3512 -3,2,1160,great,4,5431,east_elmwood,5431 -0,1,167,good,13,1401,west_welmwood,1401 -2,1,878,poor,53,2968,northwest,2772.112 -2,1,705,good,5,3267,west_welmwood,3267 -0,1,256,good,3,1657,west_welmwood,1657 -2,1,886,good,11,3369,west_welmwood,3369 -2,1,864,great,0,3929,east_elmwood,3929 -1,1,523,good,0,2046,west_welmwood,2046 -2,1,924,poor,46,3055,northwest,2896.14 -1,1,486,great,13,2383,east_elmwood,2383 -1,1,353,good,6,1744,west_welmwood,1744 -0,1,133,poor,30,1187,northwest,1163.26 -1,1,577,poor,22,1884,northwest,1876.464 -1,1,499,great,6,2486,east_elmwood,2486 -0,1,325,great,1,2170,east_elmwood,2170 -1,1,578,poor,37,1687,northwest,1629.642 -3,2,1032,good,12,4788,west_welmwood,4788 -0,1,229,good,2,1564,west_welmwood,1564 -1,1,539,great,14,2353,east_elmwood,2353 -0,1,538,good,10,1882,west_welmwood,1882 -2,1,564,good,9,3047,west_welmwood,3047 -2,1,697,great,5,3766,east_elmwood,3766 -2,1,629,good,13,3132,west_welmwood,3132 -0,1,530,great,14,2329,east_elmwood,2329 -2,1,688,good,9,3235,west_welmwood,3235 -2,1,925,good,1,3469,west_welmwood,3469 -1,1,537,good,8,1988,west_welmwood,1988 -3,2,1029,great,13,5294,east_elmwood,5294 -1,1,712,good,0,2230,west_welmwood,2230 -3,2,898,good,10,4610,west_welmwood,4610 -2,1,749,good,9,3249,west_welmwood,3249 -1,1,747,good,11,2164,west_welmwood,2164 -0,1,267,poor,33,1296,northwest,1262.304 -3,2,1117,poor,44,4487,northwest,4271.624 -1,1,581,poor,59,1527,northwest,1407.894 -1,1,687,poor,47,1732,northwest,1638.472 -2,1,683,good,0,3254,west_welmwood,3254 -1,1,587,poor,18,1908,northwest,1908 -0,1,477,poor,44,1467,northwest,1396.584 -3,2,944,good,13,4625,west_welmwood,4625 -0,1,511,great,1,2420,east_elmwood,2420 -3,2,932,poor,36,4444,northwest,4301.792 -0,1,347,good,8,1686,west_welmwood,1686 -0,1,272,good,2,1645,west_welmwood,1645 -2,1,659,great,11,3698,east_elmwood,3698 -1,1,742,great,10,2678,east_elmwood,2678 -2,1,830,great,14,3826,east_elmwood,3826 -2,1,571,great,3,3631,east_elmwood,3631 -3,2,1007,great,5,5339,east_elmwood,5339 -3,2,1059,poor,34,4592,northwest,4463.424 -2,1,823,poor,23,3191,northwest,3171.854 -1,1,705,great,5,2681,east_elmwood,2681 -1,1,381,poor,16,1742,northwest,1742 -1,1,558,good,4,2010,west_welmwood,2010 -1,1,452,poor,35,1637,northwest,1587.89 -1,1,446,good,7,1911,west_welmwood,1911 -3,2,1011,good,11,4704,west_welmwood,4704 -2,1,770,good,5,3285,west_welmwood,3285 -3,2,853,good,11,4592,west_welmwood,4592 -0,1,436,good,2,1838,west_welmwood,1838 -2,1,718,good,11,3197,west_welmwood,3197 -0,1,507,good,5,1835,west_welmwood,1835 -0,1,175,great,11,1985,east_elmwood,1985 -1,1,379,good,9,1785,west_welmwood,1785 -0,1,463,good,4,1859,west_welmwood,1859 -0,1,131,great,10,1917,east_elmwood,1917 -1,1,584,good,7,2039,west_welmwood,2039 -1,1,343,good,2,1866,west_welmwood,1866 -0,1,424,poor,24,1589,northwest,1576.288 -1,1,670,good,3,2153,west_welmwood,2153 -2,1,551,good,8,3103,west_welmwood,3103 -2,1,737,great,14,3733,east_elmwood,3733 -1,1,655,poor,36,1810,northwest,1752.08 -3,2,946,poor,46,4289,northwest,4065.972 -1,1,360,good,1,1841,west_welmwood,1841 -3,2,988,good,2,4846,west_welmwood,4846 -2,1,641,poor,17,3075,northwest,3075 -3,2,1163,good,12,4938,west_welmwood,4938 -3,2,1025,good,14,4745,west_welmwood,4745 -3,2,893,great,0,5228,east_elmwood,5228 -1,1,302,poor,38,1413,northwest,1362.132 -3,2,1183,good,2,4978,west_welmwood,4978 -1,1,721,great,3,2707,east_elmwood,2707 -2,1,670,poor,18,3137,northwest,3137 -3,2,1138,great,12,5408,east_elmwood,5408 -0,1,144,good,11,1436,west_welmwood,1436 -1,1,543,good,9,1927,west_welmwood,1927 -3,2,838,poor,63,4086,northwest,3734.604 -1,1,478,poor,16,1833,northwest,1833 -2,1,760,good,2,3319,west_welmwood,3319 -3,2,1054,good,9,4775,west_welmwood,4775 -1,1,738,poor,55,1641,northwest,1526.13 -1,1,331,poor,52,1321,northwest,1236.456 -2,1,881,great,6,3923,east_elmwood,3923 -2,1,916,poor,29,3211,northwest,3153.202 -1,1,592,good,12,1923,west_welmwood,1923 -0,1,250,great,5,2088,east_elmwood,2088 -3,2,1233,poor,36,4710,northwest,4559.28 -1,1,672,great,14,2547,east_elmwood,2547 -0,1,522,great,7,2318,east_elmwood,2318 -1,1,523,poor,59,1465,northwest,1350.73 -3,2,1116,great,1,5470,east_elmwood,5470 -0,1,389,good,14,1647,west_welmwood,1647 -3,2,1008,great,13,5198,east_elmwood,5198 -0,1,320,great,9,2173,east_elmwood,2173 -2,1,707,good,1,3301,west_welmwood,3301 -2,1,776,great,5,3870,east_elmwood,3870 -1,1,607,great,6,2582,east_elmwood,2582 -2,1,931,good,1,3475,west_welmwood,3475 -3,2,886,great,2,5224,east_elmwood,5224 -2,1,785,good,14,3277,west_welmwood,3277 -0,1,373,great,4,2199,east_elmwood,2199 -3,2,1243,good,7,4983,west_welmwood,4983 -0,1,549,poor,48,1454,northwest,1372.576 -2,1,620,poor,58,2641,northwest,2440.284 -2,1,921,good,1,3550,west_welmwood,3550 -0,1,106,poor,25,1239,northwest,1226.61 -1,1,623,good,4,2039,west_welmwood,2039 -0,1,328,great,9,2097,east_elmwood,2097 -3,2,1129,poor,37,4581,northwest,4425.246 -3,2,857,poor,42,4237,northwest,4050.572 -3,2,999,poor,32,4567,northwest,4457.392 -3,2,966,good,13,4681,west_welmwood,4681 -3,2,897,great,11,5107,east_elmwood,5107 -1,1,369,good,2,1807,west_welmwood,1807 -3,2,817,good,12,4541,west_welmwood,4541 -1,1,317,good,11,1688,west_welmwood,1688 -3,2,1228,great,8,5521,east_elmwood,5521 -1,1,479,good,14,1800,west_welmwood,1800 -1,1,748,good,12,2082,west_welmwood,2082 -1,1,330,good,11,1680,west_welmwood,1680 -1,1,398,good,14,1723,west_welmwood,1723 -2,1,533,poor,50,2623,northwest,2465.62 -3,2,830,good,3,4664,west_welmwood,4664 -3,2,1050,good,8,4783,west_welmwood,4783 -3,2,942,poor,64,4201,northwest,3831.312 -2,1,621,poor,42,2841,northwest,2715.996 -3,2,1221,great,9,5522,east_elmwood,5522 -0,1,259,poor,37,1288,northwest,1244.208 -1,1,599,poor,44,1645,northwest,1566.04 -2,1,860,good,3,3461,west_welmwood,3461 -0,1,330,good,2,1735,west_welmwood,1735 -2,1,736,great,8,3773,east_elmwood,3773 -1,1,687,great,14,2532,east_elmwood,2532 -3,2,1040,poor,24,4695,northwest,4657.44 -1,1,718,great,11,2640,east_elmwood,2640 -1,1,313,poor,15,1656,northwest,1656 -3,2,1008,poor,37,4492,northwest,4339.272 -1,1,550,poor,42,1585,northwest,1515.26 -1,1,467,poor,60,1407,northwest,1294.44 -0,1,472,great,9,2243,east_elmwood,2243 -0,1,189,great,4,2002,east_elmwood,2002 -2,1,935,poor,15,3429,northwest,3429 -2,1,638,poor,37,2843,northwest,2746.338 -3,2,803,great,5,5055,east_elmwood,5055 -2,1,734,poor,37,2940,northwest,2840.04 -3,2,815,good,5,4566,west_welmwood,4566 -2,1,618,good,0,3228,west_welmwood,3228 -2,1,519,poor,30,2790,northwest,2734.2 -3,2,1163,great,7,5429,east_elmwood,5429 -0,1,145,good,0,1542,west_welmwood,1542 -2,1,805,good,1,3345,west_welmwood,3345 -0,1,512,great,13,2318,east_elmwood,2318 -0,1,515,great,8,2307,east_elmwood,2307 -0,1,470,great,10,2251,east_elmwood,2251 -2,1,845,good,14,3312,west_welmwood,3312 -2,1,676,great,2,3726,east_elmwood,3726 -0,1,157,good,3,1523,west_welmwood,1523 -3,2,905,good,8,4666,west_welmwood,4666 -1,1,335,poor,34,1461,northwest,1420.092 -3,2,1240,great,7,5555,east_elmwood,5555 -1,1,686,good,9,2093,west_welmwood,2093 -1,1,437,good,2,1948,west_welmwood,1948 -3,2,1217,poor,29,4766,northwest,4680.212 -3,2,854,poor,47,4256,northwest,4026.176 -0,1,511,good,12,1787,west_welmwood,1787 -1,1,494,poor,18,1798,northwest,1798 -3,2,876,good,13,4560,west_welmwood,4560 -1,1,653,good,6,2057,west_welmwood,2057 -2,1,537,good,14,2997,west_welmwood,2997 -2,1,670,good,5,3258,west_welmwood,3258 -1,1,477,good,4,1980,west_welmwood,1980 -2,1,775,good,8,3265,west_welmwood,3265 -0,1,332,good,8,1670,west_welmwood,1670 -3,2,1214,good,3,5062,west_welmwood,5062 -2,1,631,poor,57,2618,northwest,2424.268 -2,1,801,good,4,3357,west_welmwood,3357 -3,2,847,good,10,4606,west_welmwood,4606 -2,1,672,great,8,3699,east_elmwood,3699 -2,1,903,good,2,3468,west_welmwood,3468 -0,1,513,poor,55,1386,northwest,1288.98 -1,1,642,great,7,2551,east_elmwood,2551 -3,2,973,poor,42,4398,northwest,4204.488 -1,1,452,great,2,2458,east_elmwood,2458 -0,1,119,good,5,1440,west_welmwood,1440 -2,1,763,good,12,3228,west_welmwood,3228 -1,1,365,good,10,1745,west_welmwood,1745 -1,1,548,poor,47,1556,northwest,1471.976 -2,1,920,good,6,3486,west_welmwood,3486 -0,1,303,great,5,2138,east_elmwood,2138 -1,1,664,poor,27,1938,northwest,1910.868 -2,1,773,good,11,3249,west_welmwood,3249 -0,1,143,good,7,1510,west_welmwood,1510 -0,1,286,good,14,1554,west_welmwood,1554 -3,2,850,poor,45,4232,northwest,4020.4 -1,1,670,poor,42,1761,northwest,1683.516 -0,1,165,poor,55,1006,northwest,935.58 -0,1,322,poor,45,1297,northwest,1232.15 -1,1,496,good,3,1988,west_welmwood,1988 -2,1,777,good,4,3358,west_welmwood,3358 -0,1,290,good,7,1597,west_welmwood,1597 -0,1,281,great,14,2027,east_elmwood,2027 -0,1,185,poor,56,1063,northwest,986.464 -1,1,601,poor,23,1915,northwest,1903.51 -0,1,156,poor,33,1264,northwest,1231.136 -3,2,967,poor,18,4644,northwest,4644 -2,1,687,good,7,3253,west_welmwood,3253 -0,1,102,great,13,1852,east_elmwood,1852 -0,1,237,good,3,1577,west_welmwood,1577 -3,2,1076,great,7,5330,east_elmwood,5330 -0,1,308,poor,47,1206,northwest,1140.876 -1,1,715,poor,27,1966,northwest,1938.476 -3,2,1095,poor,38,4598,northwest,4432.472 -3,2,1110,good,14,4784,west_welmwood,4784 -2,1,788,good,2,3352,west_welmwood,3352 -2,1,568,great,4,3629,east_elmwood,3629 -3,2,1097,poor,35,4553,northwest,4416.41 -1,1,540,good,13,1885,west_welmwood,1885 -0,1,507,poor,30,1624,northwest,1591.52 -3,2,919,good,14,4647,west_welmwood,4647 -0,1,314,poor,35,1320,northwest,1280.4 -3,2,1191,poor,43,4637,northwest,4423.698 -3,2,919,good,0,4734,west_welmwood,4734 -0,1,343,great,4,2199,east_elmwood,2199 -2,1,574,great,3,3633,east_elmwood,3633 -2,1,755,good,5,3343,west_welmwood,3343 -3,2,874,good,7,4629,west_welmwood,4629 -0,1,504,poor,29,1602,northwest,1573.164 -0,1,444,poor,41,1477,northwest,1414.966 -0,1,386,good,9,1741,west_welmwood,1741 -0,1,481,poor,51,1404,northwest,1316.952 -1,1,696,poor,32,1908,northwest,1862.208 -3,2,840,poor,15,4547,northwest,4547 -1,1,697,good,7,2115,west_welmwood,2115 -1,1,676,good,2,2202,west_welmwood,2202 -3,2,1094,great,9,5382,east_elmwood,5382 -2,1,669,poor,28,3004,northwest,2955.936 -1,1,340,good,10,1729,west_welmwood,1729 -1,1,475,good,1,1923,west_welmwood,1923 -0,1,340,good,1,1728,west_welmwood,1728 -1,1,389,good,2,1846,west_welmwood,1846 -1,1,623,poor,31,1787,northwest,1747.686 -0,1,264,good,12,1525,west_welmwood,1525 -1,1,701,poor,60,1578,northwest,1451.76 -3,2,1069,good,12,4760,west_welmwood,4760 -2,1,908,good,1,3465,west_welmwood,3465 -1,1,745,good,0,2253,west_welmwood,2253 -3,2,989,poor,43,4431,northwest,4227.174 -0,1,470,great,9,2315,east_elmwood,2315 -1,1,599,good,6,2035,west_welmwood,2035 -3,2,885,good,11,4622,west_welmwood,4622 -2,1,508,good,11,2950,west_welmwood,2950 -2,1,763,good,2,3372,west_welmwood,3372 -2,1,545,great,10,3587,east_elmwood,3587 -1,1,505,great,1,2538,east_elmwood,2538 -0,1,262,poor,18,1434,northwest,1434 -3,2,812,poor,48,4166,northwest,3932.704 -2,1,592,poor,55,2638,northwest,2453.34 -1,1,356,poor,38,1450,northwest,1397.8 -2,1,519,good,5,3082,west_welmwood,3082 -2,1,877,poor,23,3254,northwest,3234.476 -2,1,644,great,14,3579,east_elmwood,3579 -1,1,745,good,12,2161,west_welmwood,2161 -1,1,747,good,7,2205,west_welmwood,2205 -1,1,380,great,11,2231,east_elmwood,2231 -1,1,459,poor,62,1349,northwest,1235.684 -3,2,1073,good,11,4773,west_welmwood,4773 -0,1,376,good,7,1726,west_welmwood,1726 -2,1,757,poor,38,2969,northwest,2862.116 -1,1,658,poor,33,1807,northwest,1760.018 -2,1,816,great,11,3848,east_elmwood,3848 -2,1,848,good,11,3371,west_welmwood,3371 -0,1,259,good,2,1634,west_welmwood,1634 -2,1,607,good,4,3142,west_welmwood,3142 -3,2,894,good,6,4719,west_welmwood,4719 -1,1,339,poor,64,1220,northwest,1112.64 -2,1,756,poor,40,2930,northwest,2812.8 -0,1,209,good,9,1518,west_welmwood,1518 -2,1,567,good,5,3070,west_welmwood,3070 -0,1,504,good,0,1877,west_welmwood,1877 -0,1,486,poor,52,1344,northwest,1257.984 -1,1,666,good,11,2086,west_welmwood,2086 -2,1,532,great,6,3558,east_elmwood,3558 -1,1,503,good,12,1840,west_welmwood,1840 -1,1,453,poor,56,1345,northwest,1248.16 -2,1,659,great,5,3716,east_elmwood,3716 -0,1,278,good,0,1661,west_welmwood,1661 -2,1,524,good,5,3083,west_welmwood,3083 -1,1,527,good,11,1904,west_welmwood,1904 -1,1,661,poor,52,1601,northwest,1498.536 -3,2,1025,good,13,4775,west_welmwood,4775 -3,2,810,good,3,4660,west_welmwood,4660 -3,2,1086,good,13,4843,west_welmwood,4843 -3,2,812,good,0,4661,west_welmwood,4661 -3,2,881,good,0,4742,west_welmwood,4742 -1,1,510,great,12,2428,east_elmwood,2428 -1,1,378,good,11,1815,west_welmwood,1815 -2,1,889,great,10,3934,east_elmwood,3934 -1,1,748,poor,18,2038,northwest,2038 -1,1,476,poor,57,1366,northwest,1264.916 -3,2,931,poor,31,4433,northwest,4335.474 -0,1,111,good,9,1468,west_welmwood,1468 -1,1,571,poor,36,1757,northwest,1700.776 -2,1,909,poor,63,2898,northwest,2648.772 -3,2,1131,poor,35,4601,northwest,4462.97 -1,1,552,great,9,2446,east_elmwood,2446 -0,1,419,good,14,1709,west_welmwood,1709 -2,1,551,good,6,3130,west_welmwood,3130 -3,2,1120,good,7,4938,west_welmwood,4938 -1,1,647,poor,58,1536,northwest,1419.264 -0,1,435,good,0,1785,west_welmwood,1785 -0,1,222,good,1,1656,west_welmwood,1656 -0,1,468,good,11,1725,west_welmwood,1725 -2,1,561,poor,47,2693,northwest,2547.578 -1,1,747,poor,59,1699,northwest,1566.478 -1,1,696,great,14,2573,east_elmwood,2573 -1,1,440,good,13,1797,west_welmwood,1797 -1,1,686,great,9,2600,east_elmwood,2600 -2,1,712,great,7,3701,east_elmwood,3701 -0,1,302,poor,26,1432,northwest,1414.816 -2,1,626,good,14,3074,west_welmwood,3074 -1,1,700,poor,55,1663,northwest,1546.59 -1,1,409,good,12,1808,west_welmwood,1808 -3,2,880,good,4,4678,west_welmwood,4678 -3,2,915,good,5,4713,west_welmwood,4713 -1,1,748,poor,48,1756,northwest,1657.664 -0,1,227,good,9,1487,west_welmwood,1487 -1,1,390,good,8,1840,west_welmwood,1840 -0,1,460,good,0,1902,west_welmwood,1902 -1,1,392,good,8,1782,west_welmwood,1782 -2,1,921,good,14,3383,west_welmwood,3383 -2,1,756,good,3,3336,west_welmwood,3336 -1,1,604,great,1,2583,east_elmwood,2583 -3,2,832,great,3,5137,east_elmwood,5137 -3,2,1022,great,2,5386,east_elmwood,5386 -2,1,517,poor,36,2756,northwest,2667.808 -3,2,1018,good,6,4847,west_welmwood,4847 -0,1,422,good,7,1727,west_welmwood,1727 -2,1,716,good,8,3283,west_welmwood,3283 -0,1,301,great,3,2172,east_elmwood,2172 -1,1,608,poor,29,1829,northwest,1796.078 -2,1,889,poor,17,3277,northwest,3277 -2,1,577,great,9,3636,east_elmwood,3636 -2,1,671,great,0,3765,east_elmwood,3765 -0,1,315,poor,44,1277,northwest,1215.704 -1,1,709,great,1,2739,east_elmwood,2739 -3,2,1204,poor,63,4400,northwest,4021.6 -0,1,187,great,13,1920,east_elmwood,1920 -0,1,543,poor,31,1677,northwest,1640.106 -1,1,351,good,6,1799,west_welmwood,1799 -2,1,816,poor,45,2952,northwest,2804.4 -3,2,994,poor,53,4323,northwest,4037.682 -1,1,612,good,2,2102,west_welmwood,2102 -0,1,447,good,8,1798,west_welmwood,1798 -0,1,457,good,7,1744,west_welmwood,1744 -3,2,1211,good,8,4968,west_welmwood,4968 -0,1,284,poor,60,1041,northwest,957.72 -1,1,741,poor,19,2035,northwest,2035 -0,1,425,great,2,2298,east_elmwood,2298 -1,1,308,poor,36,1491,northwest,1443.288 -1,1,390,good,8,1776,west_welmwood,1776 -1,1,675,great,14,2529,east_elmwood,2529 -0,1,283,poor,44,1262,northwest,1201.424 -1,1,460,good,1,1938,west_welmwood,1938 -1,1,678,great,4,2609,east_elmwood,2609 -2,1,892,great,11,3925,east_elmwood,3925 -3,2,1149,good,7,4978,west_welmwood,4978 -3,2,876,great,2,5207,east_elmwood,5207 -1,1,588,poor,21,1850,northwest,1846.3 -3,2,1005,good,5,4794,west_welmwood,4794 -2,1,636,good,3,3193,west_welmwood,3193 -3,2,955,good,3,4763,west_welmwood,4763 -1,1,661,poor,37,1741,northwest,1681.806 -1,1,477,great,7,2447,east_elmwood,2447 -3,2,1230,good,4,5070,west_welmwood,5070 -2,1,735,great,13,3698,east_elmwood,3698 -2,1,630,poor,60,2667,northwest,2453.64 -2,1,506,good,12,3017,west_welmwood,3017 -3,2,1191,poor,18,4896,northwest,4896 -3,2,1103,poor,44,4475,northwest,4260.2 -2,1,760,great,9,3780,east_elmwood,3780 -3,2,979,good,14,4693,west_welmwood,4693 -0,1,440,great,7,2231,east_elmwood,2231 -1,1,359,great,12,2286,east_elmwood,2286 -0,1,125,poor,38,1104,northwest,1064.256 -1,1,692,good,0,2147,west_welmwood,2147 -0,1,503,poor,49,1455,northwest,1370.61 -1,1,659,good,12,2025,west_welmwood,2025 -3,2,995,good,12,4682,west_welmwood,4682 -0,1,435,poor,27,1522,northwest,1500.692 -3,2,1028,great,14,5204,east_elmwood,5204 -3,2,880,good,5,4676,west_welmwood,4676 -2,1,796,poor,15,3261,northwest,3261 -0,1,252,great,6,2140,east_elmwood,2140 -1,1,675,good,11,2112,west_welmwood,2112 -0,1,133,good,3,1482,west_welmwood,1482 -0,1,400,poor,18,1622,northwest,1622 -3,2,954,poor,19,4629,northwest,4629 -1,1,332,great,12,2174,east_elmwood,2174 -3,2,1178,poor,53,4479,northwest,4183.386 -0,1,334,good,6,1698,west_welmwood,1698 -0,1,176,good,2,1548,west_welmwood,1548 -2,1,854,poor,58,2909,northwest,2687.916 -2,1,846,good,1,3461,west_welmwood,3461 -2,1,543,good,2,3129,west_welmwood,3129 -2,1,620,good,1,3225,west_welmwood,3225 -3,2,911,poor,31,4419,northwest,4321.782 -0,1,304,great,1,2241,east_elmwood,2241 -0,1,241,great,3,2116,east_elmwood,2116 -0,1,453,great,13,2269,east_elmwood,2269 -2,1,589,great,11,3617,east_elmwood,3617 -2,1,942,poor,15,3419,northwest,3419 -2,1,678,poor,37,2936,northwest,2836.176 -1,1,465,poor,42,1519,northwest,1452.164 -0,1,256,great,0,2140,east_elmwood,2140 -2,1,561,poor,56,2580,northwest,2394.24 -3,2,1032,good,4,4804,west_welmwood,4804 -0,1,502,good,0,1915,west_welmwood,1915 -1,1,617,poor,35,1739,northwest,1686.83 -3,2,846,good,10,4620,west_welmwood,4620 -2,1,632,poor,37,2874,northwest,2776.284 -0,1,244,poor,59,1087,northwest,1002.214 -1,1,366,good,12,1708,west_welmwood,1708 -3,2,1097,good,4,4865,west_welmwood,4865 -3,2,931,poor,42,4385,northwest,4192.06 -2,1,531,good,3,3142,west_welmwood,3142 -1,1,428,good,13,1796,west_welmwood,1796 -0,1,427,great,1,2283,east_elmwood,2283 -2,1,682,poor,24,3036,northwest,3011.712 -0,1,181,poor,16,1430,northwest,1430 -2,1,672,good,10,3179,west_welmwood,3179 -2,1,548,poor,35,2762,northwest,2679.14 -2,1,721,good,5,3274,west_welmwood,3274 -1,1,379,poor,56,1283,northwest,1190.624 -0,1,369,poor,17,1592,northwest,1592 -2,1,738,good,2,3329,west_welmwood,3329 -0,1,321,great,14,2034,east_elmwood,2034 -2,1,636,great,9,3684,east_elmwood,3684 -1,1,564,poor,60,1452,northwest,1335.84 -2,1,723,good,7,3250,west_welmwood,3250 -1,1,584,poor,16,1950,northwest,1950 -1,1,307,good,12,1690,west_welmwood,1690 -2,1,946,great,11,3975,east_elmwood,3975 -2,1,655,good,2,3226,west_welmwood,3226 -3,2,1101,good,4,4903,west_welmwood,4903 -2,1,779,poor,27,3101,northwest,3057.586 -1,1,497,poor,28,1737,northwest,1709.208 -3,2,806,poor,55,4066,northwest,3781.38 -1,1,746,good,4,2223,west_welmwood,2223 -0,1,358,poor,30,1462,northwest,1432.76 -0,1,181,good,11,1489,west_welmwood,1489 -1,1,553,good,8,1964,west_welmwood,1964 -2,1,864,poor,15,3299,northwest,3299 -3,2,1040,good,11,4785,west_welmwood,4785 -2,1,516,good,8,3037,west_welmwood,3037 -2,1,517,good,13,3005,west_welmwood,3005 -2,1,835,poor,27,3131,northwest,3087.166 -3,2,1163,good,7,4987,west_welmwood,4987 -3,2,1044,poor,64,4281,northwest,3904.272 -3,2,1025,good,3,4887,west_welmwood,4887 -0,1,103,great,6,1970,east_elmwood,1970 -3,2,990,poor,33,4476,northwest,4359.624 -2,1,575,good,7,3143,west_welmwood,3143 -3,2,1014,good,6,4796,west_welmwood,4796 -3,2,1185,poor,16,4897,northwest,4897 -3,2,1058,good,0,4865,west_welmwood,4865 -1,1,412,great,10,2284,east_elmwood,2284 -0,1,251,good,12,1547,west_welmwood,1547 -2,1,711,good,1,3301,west_welmwood,3301 -2,1,813,good,4,3401,west_welmwood,3401 -2,1,726,poor,33,3016,northwest,2937.584 -0,1,192,poor,19,1434,northwest,1434 -1,1,486,good,11,1922,west_welmwood,1922 -3,2,1198,great,4,5524,east_elmwood,5524 -1,1,316,good,14,1688,west_welmwood,1688 -3,2,1238,great,5,5550,east_elmwood,5550 -3,2,868,good,1,4658,west_welmwood,4658 -1,1,371,great,13,2264,east_elmwood,2264 -2,1,597,good,5,3135,west_welmwood,3135 -0,1,298,good,13,1597,west_welmwood,1597 -3,2,1049,good,6,4831,west_welmwood,4831 -2,1,553,good,11,3063,west_welmwood,3063 -0,1,521,poor,61,1322,northwest,1213.596 -3,2,902,poor,49,4300,northwest,4050.6 -1,1,719,poor,41,1791,northwest,1715.778 -2,1,713,poor,63,2703,northwest,2470.542 -3,2,1196,good,0,5060,west_welmwood,5060 -1,1,426,good,8,1874,west_welmwood,1874 -3,2,1235,poor,44,4671,northwest,4446.792 -2,1,643,great,3,3670,east_elmwood,3670 -0,1,535,poor,26,1692,northwest,1671.696 -2,1,581,poor,29,2913,northwest,2860.566 -0,1,111,poor,62,849,northwest,777.684 -1,1,469,great,6,2432,east_elmwood,2432 -3,2,880,great,11,5103,east_elmwood,5103 -1,1,487,good,8,1909,west_welmwood,1909 -1,1,492,good,14,1834,west_welmwood,1834 -2,1,846,good,4,3394,west_welmwood,3394 -0,1,121,good,1,1522,west_welmwood,1522 -3,2,984,poor,25,4561,northwest,4515.39 -3,2,964,good,8,4705,west_welmwood,4705 -3,2,998,good,3,4818,west_welmwood,4818 -0,1,282,great,3,2119,east_elmwood,2119 -0,1,498,good,12,1811,west_welmwood,1811 -0,1,157,good,9,1511,west_welmwood,1511 -1,1,495,good,4,1969,west_welmwood,1969 -1,1,522,good,4,2029,west_welmwood,2029 -3,2,1036,poor,42,4496,northwest,4298.176 -0,1,441,good,5,1830,west_welmwood,1830 -1,1,361,good,1,1870,west_welmwood,1870 -1,1,452,poor,50,1426,northwest,1340.44 -0,1,441,great,0,2373,east_elmwood,2373 -1,1,396,good,0,1918,west_welmwood,1918 -0,1,220,good,12,1528,west_welmwood,1528 -3,2,894,good,14,4628,west_welmwood,4628 -0,1,111,good,7,1435,west_welmwood,1435 -2,1,881,great,7,3872,east_elmwood,3872 -1,1,613,great,13,2478,east_elmwood,2478 -1,1,380,poor,58,1296,northwest,1197.504 -3,2,985,good,5,4829,west_welmwood,4829 -2,1,784,great,14,3738,east_elmwood,3738 -1,1,431,poor,19,1787,northwest,1787 -2,1,564,great,3,3664,east_elmwood,3664 -1,1,445,poor,64,1296,northwest,1181.952 -0,1,236,good,3,1631,west_welmwood,1631 -2,1,893,good,6,3407,west_welmwood,3407 -1,1,355,good,3,1824,west_welmwood,1824 -1,1,600,poor,42,1655,northwest,1582.18 -3,2,929,great,7,5162,east_elmwood,5162 -2,1,881,poor,30,3146,northwest,3083.08 -3,2,801,poor,22,4462,northwest,4444.152 -1,1,732,good,9,2132,west_welmwood,2132 -1,1,702,poor,23,2003,northwest,1990.982 -3,2,1033,poor,53,4355,northwest,4067.57 -2,1,700,great,9,3691,east_elmwood,3691 -3,2,875,great,5,5145,east_elmwood,5145 -1,1,330,good,3,1798,west_welmwood,1798 -0,1,530,good,0,1961,west_welmwood,1961 -0,1,364,great,8,2215,east_elmwood,2215 -1,1,359,poor,42,1481,northwest,1415.836 -2,1,564,great,2,3645,east_elmwood,3645 -0,1,122,great,13,1897,east_elmwood,1897 -1,1,590,good,0,2125,west_welmwood,2125 -0,1,224,poor,42,1246,northwest,1191.176 -1,1,515,good,12,1869,west_welmwood,1869 -3,2,852,poor,20,4488,northwest,4488 -0,1,515,poor,57,1374,northwest,1272.324 -0,1,394,poor,45,1335,northwest,1268.25 -2,1,881,good,6,3375,west_welmwood,3375 -0,1,221,poor,56,1022,northwest,948.416 -0,1,462,great,5,2299,east_elmwood,2299 -0,1,406,poor,24,1535,northwest,1522.72 -1,1,595,great,4,2540,east_elmwood,2540 -1,1,474,good,11,1855,west_welmwood,1855 -3,2,1249,good,14,4931,west_welmwood,4931 -3,2,1223,good,10,5008,west_welmwood,5008 -3,2,834,good,0,4704,west_welmwood,4704 -0,1,288,poor,38,1288,northwest,1241.632 -3,2,1035,poor,44,4451,northwest,4237.352 -2,1,567,good,2,3183,west_welmwood,3183 -2,1,884,good,4,3430,west_welmwood,3430 -2,1,888,great,4,3900,east_elmwood,3900 -0,1,227,great,9,2064,east_elmwood,2064 -0,1,204,poor,39,1223,northwest,1176.526 -3,2,897,good,13,4660,west_welmwood,4660 -1,1,586,good,5,2056,west_welmwood,2056 -0,1,532,great,10,2315,east_elmwood,2315 -1,1,377,good,5,1839,west_welmwood,1839 -0,1,534,great,5,2429,east_elmwood,2429 -3,2,1201,poor,32,4767,northwest,4652.592 -0,1,163,poor,51,1003,northwest,940.814 -2,1,844,great,4,3905,east_elmwood,3905 -3,2,1024,poor,37,4487,northwest,4334.442 -0,1,196,poor,46,1142,northwest,1082.616 -2,1,661,poor,54,2732,northwest,2546.224 -0,1,317,good,10,1616,west_welmwood,1616 -0,1,162,great,13,1930,east_elmwood,1930 -1,1,721,great,10,2596,east_elmwood,2596 -1,1,472,great,10,2340,east_elmwood,2340 -2,1,650,poor,62,2617,northwest,2397.172 -2,1,535,good,9,3017,west_welmwood,3017 -1,1,311,good,8,1764,west_welmwood,1764 -0,1,371,good,7,1720,west_welmwood,1720 -1,1,524,good,11,1907,west_welmwood,1907 -2,1,580,good,5,3176,west_welmwood,3176 -1,1,681,poor,61,1583,northwest,1453.194 -2,1,943,good,14,3374,west_welmwood,3374 -1,1,309,poor,53,1290,northwest,1204.86 -3,2,1233,good,9,4960,west_welmwood,4960 -0,1,198,poor,49,1133,northwest,1067.286 -2,1,897,great,9,3919,east_elmwood,3919 -3,2,1129,poor,15,4828,northwest,4828 -3,2,1014,great,11,5263,east_elmwood,5263 -3,2,1119,good,9,4864,west_welmwood,4864 -3,2,962,poor,59,4199,northwest,3871.478 -1,1,317,poor,17,1655,northwest,1655 -0,1,165,poor,55,1010,northwest,939.3 -1,1,738,great,7,2648,east_elmwood,2648 -3,2,977,great,9,5281,east_elmwood,5281 -1,1,722,great,7,2686,east_elmwood,2686 -2,1,706,good,7,3187,west_welmwood,3187 -1,1,424,good,9,1811,west_welmwood,1811 -1,1,669,great,6,2594,east_elmwood,2594 -1,1,327,great,9,2252,east_elmwood,2252 -1,1,435,good,3,1915,west_welmwood,1915 -2,1,706,poor,31,2990,northwest,2924.22 -3,2,1200,good,10,4920,west_welmwood,4920 -2,1,622,great,0,3703,east_elmwood,3703 -1,1,338,poor,42,1463,northwest,1398.628 -2,1,724,poor,60,2725,northwest,2507 -0,1,255,good,14,1526,west_welmwood,1526 -1,1,501,poor,33,1718,northwest,1673.332 -2,1,643,great,7,3703,east_elmwood,3703 -3,2,1054,poor,19,4696,northwest,4696 -2,1,817,good,8,3296,west_welmwood,3296 -1,1,301,poor,36,1402,northwest,1357.136 -0,1,261,good,13,1577,west_welmwood,1577 -2,1,925,good,12,3447,west_welmwood,3447 -3,2,939,poor,52,4310,northwest,4034.16 -3,2,1203,poor,22,4848,northwest,4828.608 -0,1,439,great,12,2214,east_elmwood,2214 -1,1,344,poor,28,1603,northwest,1577.352 -1,1,404,poor,28,1602,northwest,1576.368 -2,1,723,good,6,3240,west_welmwood,3240 -1,1,494,great,1,2526,east_elmwood,2526 -2,1,870,great,10,3907,east_elmwood,3907 -1,1,562,good,1,2024,west_welmwood,2024 -1,1,335,poor,46,1347,northwest,1276.956 -3,2,897,good,10,4618,west_welmwood,4618 -2,1,512,good,4,3089,west_welmwood,3089 -1,1,743,good,13,2128,west_welmwood,2128 -3,2,1197,good,8,4984,west_welmwood,4984 -3,2,1221,great,4,5521,east_elmwood,5521 -0,1,327,great,9,2158,east_elmwood,2158 -0,1,199,good,8,1479,west_welmwood,1479 -3,2,942,good,13,4663,west_welmwood,4663 -2,1,616,good,4,3150,west_welmwood,3150 -0,1,380,good,14,1598,west_welmwood,1598 -2,1,707,good,12,3225,west_welmwood,3225 -0,1,498,poor,57,1355,northwest,1254.73 -2,1,566,poor,37,2755,northwest,2661.33 -3,2,1173,good,12,4855,west_welmwood,4855 -0,1,312,poor,28,1385,northwest,1362.84 -0,1,433,great,7,2220,east_elmwood,2220 -3,2,1243,good,0,5140,west_welmwood,5140 -2,1,946,good,12,3467,west_welmwood,3467 -2,1,666,good,1,3245,west_welmwood,3245 -2,1,689,poor,44,2879,northwest,2740.808 -2,1,549,good,11,2999,west_welmwood,2999 -0,1,514,poor,47,1399,northwest,1323.454 -3,2,932,poor,47,4356,northwest,4120.776 -0,1,325,poor,45,1268,northwest,1204.6 -3,2,1245,poor,30,4820,northwest,4723.6 -3,2,817,poor,37,4341,northwest,4193.406 -0,1,347,poor,27,1441,northwest,1420.826 -2,1,701,great,0,3846,east_elmwood,3846 -1,1,534,good,8,2001,west_welmwood,2001 -0,1,543,great,7,2385,east_elmwood,2385 -2,1,893,poor,59,2858,northwest,2635.076 -1,1,402,good,12,1809,west_welmwood,1809 -3,2,1110,great,2,5441,east_elmwood,5441 -0,1,295,poor,17,1566,northwest,1566 -0,1,277,good,9,1603,west_welmwood,1603 -1,1,552,great,4,2499,east_elmwood,2499 -0,1,110,great,0,2000,east_elmwood,2000 -1,1,347,poor,20,1624,northwest,1624 -3,2,1182,great,6,5497,east_elmwood,5497 -0,1,297,good,3,1657,west_welmwood,1657 -2,1,619,good,10,3152,west_welmwood,3152 -2,1,596,good,12,3080,west_welmwood,3080 -0,1,303,poor,35,1308,northwest,1268.76 -0,1,316,good,8,1681,west_welmwood,1681 -1,1,482,good,5,1888,west_welmwood,1888 -2,1,846,poor,16,3277,northwest,3277 -3,2,831,good,14,4539,west_welmwood,4539 -0,1,251,great,10,2056,east_elmwood,2056 -3,2,1154,poor,53,4515,northwest,4217.01 -2,1,564,good,7,3141,west_welmwood,3141 -3,2,1171,good,10,4928,west_welmwood,4928 -0,1,232,poor,27,1367,northwest,1347.862 -2,1,737,poor,56,2739,northwest,2541.792 -2,1,511,poor,47,2682,northwest,2537.172 -1,1,337,good,0,1855,west_welmwood,1855 -1,1,355,poor,22,1619,northwest,1612.524 -0,1,290,poor,48,1257,northwest,1186.608 -3,2,977,good,1,4809,west_welmwood,4809 -2,1,568,great,13,3566,east_elmwood,3566 -2,1,633,poor,57,2693,northwest,2493.718 -0,1,341,poor,39,1382,northwest,1329.484 -2,1,818,good,4,3394,west_welmwood,3394 -0,1,191,good,0,1578,west_welmwood,1578 -1,1,649,poor,27,1829,northwest,1803.394 -0,1,237,good,2,1613,west_welmwood,1613 -3,2,1028,good,8,4790,west_welmwood,4790 -1,1,459,good,12,1827,west_welmwood,1827 -2,1,786,good,0,3413,west_welmwood,3413 -0,1,454,poor,63,1200,northwest,1096.8 -1,1,539,poor,55,1464,northwest,1361.52 -0,1,220,poor,42,1247,northwest,1192.132 -1,1,747,poor,49,1803,northwest,1698.426 -2,1,542,poor,24,2878,northwest,2854.976 -3,2,913,good,11,4640,west_welmwood,4640 -2,1,949,good,0,3548,west_welmwood,3548 -0,1,212,poor,57,1028,northwest,951.928 -2,1,793,good,3,3378,west_welmwood,3378 -0,1,382,poor,29,1477,northwest,1450.414 -0,1,515,poor,52,1424,northwest,1332.864 -3,2,966,great,6,5240,east_elmwood,5240 -3,2,1034,poor,36,4514,northwest,4369.552 -3,2,1192,good,0,5042,west_welmwood,5042 -2,1,787,great,3,3843,east_elmwood,3843 -1,1,326,great,3,2268,east_elmwood,2268 -2,1,760,good,2,3291,west_welmwood,3291 -3,2,996,great,7,5321,east_elmwood,5321 -0,1,148,poor,58,953,northwest,880.572 -1,1,662,good,5,2125,west_welmwood,2125 -3,2,1038,poor,62,4311,northwest,3948.876 -1,1,690,great,5,2667,east_elmwood,2667 -3,2,1029,good,14,4699,west_welmwood,4699 -2,1,847,poor,46,2988,northwest,2832.624 -3,2,922,good,12,4642,west_welmwood,4642 -2,1,606,good,2,3183,west_welmwood,3183 -1,1,385,poor,36,1543,northwest,1493.624 -1,1,546,good,6,1938,west_welmwood,1938 -0,1,472,poor,26,1578,northwest,1559.064 -1,1,419,good,1,1911,west_welmwood,1911 -2,1,550,poor,37,2793,northwest,2698.038 -1,1,617,good,0,2077,west_welmwood,2077 -1,1,495,good,6,1920,west_welmwood,1920 -2,1,686,good,8,3186,west_welmwood,3186 -2,1,930,good,6,3484,west_welmwood,3484 -1,1,420,good,1,1933,west_welmwood,1933 -1,1,494,poor,24,1755,northwest,1740.96 -3,2,1012,good,2,4807,west_welmwood,4807 -3,2,835,poor,43,4232,northwest,4037.328 -2,1,539,great,2,3594,east_elmwood,3594 -0,1,153,great,13,1937,east_elmwood,1937 -2,1,776,great,14,3734,east_elmwood,3734 -1,1,563,good,13,1975,west_welmwood,1975 -3,2,1144,poor,51,4445,northwest,4169.41 -1,1,549,great,8,2478,east_elmwood,2478 -3,2,1015,good,9,4782,west_welmwood,4782 -1,1,446,poor,50,1465,northwest,1377.1 -0,1,531,great,0,2408,east_elmwood,2408 -2,1,538,poor,64,2513,northwest,2291.856 -0,1,279,good,14,1542,west_welmwood,1542 -2,1,860,great,2,3923,east_elmwood,3923 -3,2,1153,poor,31,4693,northwest,4589.754 -0,1,288,poor,36,1323,northwest,1280.664 -0,1,366,great,11,2190,east_elmwood,2190 -2,1,539,good,9,3029,west_welmwood,3029 -1,1,482,poor,61,1327,northwest,1218.186 -0,1,148,good,13,1458,west_welmwood,1458 -0,1,433,good,12,1703,west_welmwood,1703 -0,1,147,good,8,1472,west_welmwood,1472 -3,2,825,poor,30,4326,northwest,4239.48 -3,2,824,good,3,4621,west_welmwood,4621 -1,1,418,great,6,2321,east_elmwood,2321 -0,1,512,poor,55,1385,northwest,1288.05 -0,1,456,poor,42,1470,northwest,1405.32 -1,1,582,good,8,2050,west_welmwood,2050 -0,1,107,good,10,1438,west_welmwood,1438 -1,1,628,great,3,2559,east_elmwood,2559 -2,1,827,poor,41,3053,northwest,2924.774 -0,1,267,poor,30,1378,northwest,1350.44 -0,1,363,good,0,1750,west_welmwood,1750 -2,1,801,great,10,3776,east_elmwood,3776 -1,1,444,good,11,1817,west_welmwood,1817 -0,1,273,poor,29,1379,northwest,1354.178 -3,2,1012,great,9,5268,east_elmwood,5268 -2,1,837,good,10,3374,west_welmwood,3374 -0,1,329,good,12,1585,west_welmwood,1585 -3,2,1119,good,6,4899,west_welmwood,4899 -2,1,611,good,14,3048,west_welmwood,3048 -2,1,541,good,9,3014,west_welmwood,3014 -3,2,848,good,9,4594,west_welmwood,4594 -2,1,730,good,12,3164,west_welmwood,3164 -2,1,602,poor,49,2747,northwest,2587.674 -3,2,1164,poor,43,4604,northwest,4392.216 -1,1,632,good,13,2018,west_welmwood,2018 -2,1,797,good,9,3260,west_welmwood,3260 -1,1,377,great,2,2333,east_elmwood,2333 -2,1,780,great,4,3828,east_elmwood,3828 -3,2,1004,great,13,5216,east_elmwood,5216 -2,1,769,poor,22,3101,northwest,3088.596 -1,1,733,good,0,2230,west_welmwood,2230 -1,1,309,poor,62,1209,northwest,1107.444 -3,2,1178,good,11,4967,west_welmwood,4967 -1,1,464,poor,20,1768,northwest,1768 -2,1,835,good,12,3349,west_welmwood,3349 -3,2,1046,great,7,5363,east_elmwood,5363 -1,1,480,good,3,1923,west_welmwood,1923 -0,1,397,great,11,2173,east_elmwood,2173 -3,2,966,good,6,4768,west_welmwood,4768 -1,1,414,poor,37,1531,northwest,1478.946 -3,2,848,great,0,5242,east_elmwood,5242 -1,1,610,poor,38,1760,northwest,1696.64 -1,1,542,great,7,2480,east_elmwood,2480 -0,1,299,good,3,1669,west_welmwood,1669 -1,1,347,poor,53,1311,northwest,1224.474 -2,1,608,good,9,3107,west_welmwood,3107 -0,1,259,good,11,1594,west_welmwood,1594 -0,1,498,poor,39,1517,northwest,1459.354 -0,1,470,great,11,2308,east_elmwood,2308 -3,2,871,great,0,5265,east_elmwood,5265 -0,1,485,poor,48,1416,northwest,1336.704 -2,1,516,great,7,3546,east_elmwood,3546 -2,1,697,great,1,3787,east_elmwood,3787 -1,1,370,poor,58,1319,northwest,1218.756 -0,1,406,good,10,1685,west_welmwood,1685 -2,1,737,great,0,3878,east_elmwood,3878 -2,1,610,poor,18,3020,northwest,3020 -0,1,392,poor,26,1557,northwest,1538.316 -0,1,429,good,3,1801,west_welmwood,1801 -1,1,368,poor,29,1626,northwest,1596.732 -3,2,1111,good,7,4918,west_welmwood,4918 -2,1,612,great,10,3598,east_elmwood,3598 -2,1,860,good,6,3387,west_welmwood,3387 -2,1,939,good,3,3519,west_welmwood,3519 -1,1,676,poor,34,1848,northwest,1796.256 -1,1,513,good,8,1915,west_welmwood,1915 -3,2,945,good,7,4725,west_welmwood,4725 -1,1,400,poor,22,1636,northwest,1629.456 -1,1,704,great,2,2723,east_elmwood,2723 -1,1,721,poor,29,1931,northwest,1896.242 -2,1,554,good,12,3073,west_welmwood,3073 -0,1,439,good,3,1790,west_welmwood,1790 -0,1,523,good,3,1902,west_welmwood,1902 -1,1,477,good,3,1996,west_welmwood,1996 -1,1,698,good,4,2133,west_welmwood,2133 -0,1,463,poor,27,1582,northwest,1559.852 -1,1,321,poor,50,1307,northwest,1228.58 -0,1,170,good,5,1563,west_welmwood,1563 -3,2,1179,poor,44,4609,northwest,4387.768 -3,2,1117,good,8,4919,west_welmwood,4919 -1,1,672,good,11,2088,west_welmwood,2088 -2,1,937,good,6,3506,west_welmwood,3506 -2,1,545,poor,61,2489,northwest,2284.902 -2,1,532,great,9,3565,east_elmwood,3565 -0,1,528,good,6,1860,west_welmwood,1860 -2,1,813,great,3,3857,east_elmwood,3857 -2,1,821,poor,27,3118,northwest,3074.348 -3,2,1061,poor,44,4442,northwest,4228.784 -1,1,678,good,2,2125,west_welmwood,2125 -1,1,452,poor,18,1771,northwest,1771 -0,1,150,good,4,1490,west_welmwood,1490 -1,1,742,poor,52,1673,northwest,1565.928 -1,1,408,poor,17,1779,northwest,1779 -2,1,949,poor,56,2983,northwest,2768.224 -3,2,1229,great,3,5583,east_elmwood,5583 -0,1,287,great,14,2093,east_elmwood,2093 -0,1,169,great,9,1932,east_elmwood,1932 -2,1,623,good,12,3083,west_welmwood,3083 -3,2,1148,good,2,4931,west_welmwood,4931 -1,1,484,good,1,1999,west_welmwood,1999 -0,1,111,good,5,1510,west_welmwood,1510 -2,1,855,good,1,3433,west_welmwood,3433 -0,1,427,great,2,2312,east_elmwood,2312 -1,1,504,great,9,2416,east_elmwood,2416 -2,1,704,good,14,3142,west_welmwood,3142 -1,1,504,great,14,2346,east_elmwood,2346 -3,2,1127,good,10,4896,west_welmwood,4896 -1,1,501,good,6,1924,west_welmwood,1924 -2,1,923,poor,31,3199,northwest,3128.622 -3,2,894,good,2,4699,west_welmwood,4699 -2,1,778,good,6,3336,west_welmwood,3336 -1,1,698,good,7,2155,west_welmwood,2155 -1,1,376,great,4,2339,east_elmwood,2339 -3,2,1188,good,6,4992,west_welmwood,4992 -2,1,905,poor,16,3349,northwest,3349 -0,1,549,poor,47,1481,northwest,1401.026 -2,1,558,good,3,3115,west_welmwood,3115 -1,1,418,good,3,1880,west_welmwood,1880 -3,2,961,poor,43,4349,northwest,4148.946 -3,2,1112,good,5,4913,west_welmwood,4913 -0,1,467,good,2,1879,west_welmwood,1879 -3,2,1111,good,0,4999,west_welmwood,4999 -1,1,590,good,10,1975,west_welmwood,1975 -0,1,518,poor,62,1315,northwest,1204.54 -1,1,416,great,5,2341,east_elmwood,2341 -3,2,1051,poor,51,4382,northwest,4110.316 -2,1,934,good,10,3440,west_welmwood,3440 -0,1,209,good,3,1628,west_welmwood,1628 -2,1,618,great,12,3642,east_elmwood,3642 -1,1,648,great,10,2563,east_elmwood,2563 -1,1,338,good,10,1725,west_welmwood,1725 -3,2,1073,poor,21,4716,northwest,4706.568 -2,1,511,good,14,2926,west_welmwood,2926 -2,1,701,good,13,3156,west_welmwood,3156 -1,1,419,good,14,1753,west_welmwood,1753 -2,1,806,good,2,3364,west_welmwood,3364 -2,1,667,good,3,3278,west_welmwood,3278 -0,1,428,poor,30,1539,northwest,1508.22 -0,1,270,good,3,1664,west_welmwood,1664 -1,1,465,poor,19,1822,northwest,1822 -2,1,724,great,3,3806,east_elmwood,3806 -3,2,1105,poor,55,4384,northwest,4077.12 -2,1,911,great,1,4037,east_elmwood,4037 -0,1,495,poor,33,1574,northwest,1533.076 -2,1,818,great,1,3883,east_elmwood,3883 -0,1,524,good,4,1902,west_welmwood,1902 -3,2,1011,poor,64,4268,northwest,3892.416 -1,1,439,poor,26,1644,northwest,1624.272 -1,1,326,poor,49,1311,northwest,1234.962 -2,1,584,good,5,3141,west_welmwood,3141 -0,1,278,great,1,2140,east_elmwood,2140 -2,1,929,good,8,3455,west_welmwood,3455 -3,2,903,good,6,4700,west_welmwood,4700 -0,1,307,good,4,1658,west_welmwood,1658 -3,2,945,good,1,4779,west_welmwood,4779 -3,2,871,great,12,5094,east_elmwood,5094 -2,1,742,good,8,3288,west_welmwood,3288 -0,1,400,good,7,1734,west_welmwood,1734 -3,2,1158,great,7,5408,east_elmwood,5408 -3,2,862,poor,28,4473,northwest,4401.432 -1,1,513,poor,52,1540,northwest,1441.44 -0,1,248,good,9,1524,west_welmwood,1524 -3,2,1052,great,9,5345,east_elmwood,5345 -0,1,246,poor,60,1003,northwest,922.76 -3,2,1190,poor,55,4468,northwest,4155.24 -1,1,586,poor,61,1515,northwest,1390.77 -3,2,1219,great,7,5472,east_elmwood,5472 -1,1,371,great,13,2192,east_elmwood,2192 -1,1,729,good,14,2120,west_welmwood,2120 -3,2,986,poor,40,4388,northwest,4212.48 -3,2,1026,good,0,4893,west_welmwood,4893 -2,1,640,good,12,3120,west_welmwood,3120 -2,1,841,poor,26,3139,northwest,3101.332 -3,2,897,poor,35,4427,northwest,4294.19 -0,1,446,good,6,1823,west_welmwood,1823 -2,1,883,poor,22,3278,northwest,3264.888 -3,2,801,good,14,4468,west_welmwood,4468 -1,1,693,good,13,2108,west_welmwood,2108 -0,1,221,poor,40,1250,northwest,1200 -3,2,1116,poor,36,4593,northwest,4446.024 -3,2,1164,good,7,4922,west_welmwood,4922 -3,2,941,good,10,4643,west_welmwood,4643 -2,1,685,good,5,3272,west_welmwood,3272 -0,1,205,great,8,2064,east_elmwood,2064 -0,1,519,good,10,1781,west_welmwood,1781 -3,2,1064,good,13,4750,west_welmwood,4750 -0,1,486,poor,63,1273,northwest,1163.522 -3,2,1235,great,3,5545,east_elmwood,5545 -0,1,297,good,7,1648,west_welmwood,1648 -0,1,121,good,13,1402,west_welmwood,1402 -0,1,140,good,11,1385,west_welmwood,1385 -1,1,540,good,8,1974,west_welmwood,1974 -1,1,407,good,6,1848,west_welmwood,1848 -2,1,557,good,2,3185,west_welmwood,3185 -3,2,1002,poor,60,4225,northwest,3887 -1,1,505,good,11,1885,west_welmwood,1885 -2,1,861,great,12,3801,east_elmwood,3801 -1,1,718,good,14,2072,west_welmwood,2072 -0,1,366,great,6,2209,east_elmwood,2209 -2,1,696,great,10,3728,east_elmwood,3728 -3,2,1123,good,9,4870,west_welmwood,4870 -2,1,713,great,11,3667,east_elmwood,3667 -2,1,884,poor,23,3232,northwest,3212.608 -0,1,505,great,9,2293,east_elmwood,2293 -2,1,659,poor,55,2716,northwest,2525.88 -0,1,369,great,10,2212,east_elmwood,2212 -3,2,1220,poor,45,4615,northwest,4384.25 -2,1,522,great,1,3574,east_elmwood,3574 -2,1,671,poor,32,2946,northwest,2875.296 -2,1,762,good,5,3346,west_welmwood,3346 -0,1,192,poor,53,1111,northwest,1037.674 -1,1,553,poor,24,1803,northwest,1788.576 -1,1,618,great,12,2480,east_elmwood,2480 -1,1,394,poor,35,1534,northwest,1487.98 -3,2,828,poor,48,4230,northwest,3993.12 -1,1,705,poor,36,1873,northwest,1813.064 -3,2,987,poor,52,4355,northwest,4076.28 -3,2,854,great,10,5072,east_elmwood,5072 -2,1,930,poor,62,2932,northwest,2685.712 -0,1,283,good,7,1595,west_welmwood,1595 -0,1,241,good,5,1567,west_welmwood,1567 -0,1,315,good,1,1752,west_welmwood,1752 -2,1,687,poor,45,2880,northwest,2736 -3,2,1120,good,13,4833,west_welmwood,4833 -1,1,698,great,4,2654,east_elmwood,2654 -3,2,1243,poor,60,4474,northwest,4116.08 -0,1,180,great,12,1963,east_elmwood,1963 -3,2,1030,great,7,5314,east_elmwood,5314 -2,1,791,poor,39,3002,northwest,2887.924 -1,1,361,poor,16,1714,northwest,1714 -0,1,529,poor,23,1684,northwest,1673.896 -3,2,1091,poor,50,4429,northwest,4163.26 -2,1,888,good,14,3347,west_welmwood,3347 -1,1,479,great,10,2417,east_elmwood,2417 -0,1,122,great,5,1934,east_elmwood,1934 -2,1,929,good,5,3497,west_welmwood,3497 -0,1,439,good,7,1755,west_welmwood,1755 -3,2,1185,good,5,4999,west_welmwood,4999 -3,2,837,poor,37,4283,northwest,4137.378 -3,2,897,good,8,4674,west_welmwood,4674 -1,1,354,good,10,1776,west_welmwood,1776 -3,2,1205,poor,16,4914,northwest,4914 -3,2,1063,good,9,4835,west_welmwood,4835 -3,2,966,poor,20,4632,northwest,4632 -3,2,1192,great,6,5480,east_elmwood,5480 -1,1,693,poor,42,1807,northwest,1727.492 -0,1,214,poor,34,1305,northwest,1268.46 -3,2,1075,great,10,5320,east_elmwood,5320 -1,1,616,great,13,2456,east_elmwood,2456 -3,2,805,good,6,4632,west_welmwood,4632 -1,1,534,good,14,1936,west_welmwood,1936 -0,1,408,good,7,1705,west_welmwood,1705 -2,1,803,good,2,3354,west_welmwood,3354 -1,1,410,great,1,2365,east_elmwood,2365 -0,1,378,poor,21,1536,northwest,1532.928 -2,1,882,great,7,3892,east_elmwood,3892 -1,1,647,good,12,2041,west_welmwood,2041 -1,1,560,good,9,1930,west_welmwood,1930 -0,1,507,poor,30,1630,northwest,1597.4 -3,2,1129,poor,63,4354,northwest,3979.556 -0,1,115,good,9,1427,west_welmwood,1427 -1,1,318,poor,25,1518,northwest,1502.82 -1,1,315,poor,50,1302,northwest,1223.88 -2,1,748,poor,15,3188,northwest,3188 -2,1,685,poor,33,2938,northwest,2861.612 -0,1,154,poor,37,1180,northwest,1139.88 -0,1,515,good,10,1786,west_welmwood,1786 -1,1,355,good,6,1825,west_welmwood,1825 -3,2,1217,poor,19,4867,northwest,4867 -0,1,430,great,7,2292,east_elmwood,2292 -3,2,1227,good,9,4974,west_welmwood,4974 -3,2,1186,poor,63,4418,northwest,4038.052 -1,1,729,poor,41,1861,northwest,1782.838 -2,1,549,good,9,3018,west_welmwood,3018 -1,1,644,great,4,2611,east_elmwood,2611 -3,2,976,great,1,5331,east_elmwood,5331 -2,1,566,good,10,3052,west_welmwood,3052 -0,1,113,poor,26,1238,northwest,1223.144 -0,1,402,great,7,2248,east_elmwood,2248 -3,2,1243,good,2,5070,west_welmwood,5070 -2,1,893,great,5,3923,east_elmwood,3923 -0,1,104,poor,49,1025,northwest,965.55 -2,1,764,great,9,3784,east_elmwood,3784 -2,1,570,poor,64,2544,northwest,2320.128 -0,1,269,good,9,1546,west_welmwood,1546 -1,1,500,poor,17,1865,northwest,1865 -2,1,699,great,10,3685,east_elmwood,3685 -1,1,492,poor,19,1759,northwest,1759 -1,1,494,poor,49,1480,northwest,1394.16 -2,1,628,poor,53,2656,northwest,2480.704 -2,1,871,poor,37,3139,northwest,3032.274 -2,1,897,great,7,3894,east_elmwood,3894 -3,2,1249,good,12,5017,west_welmwood,5017 -1,1,458,good,6,1862,west_welmwood,1862 -2,1,616,great,0,3709,east_elmwood,3709 -2,1,846,good,6,3405,west_welmwood,3405 -1,1,565,good,11,1980,west_welmwood,1980 -3,2,1197,poor,56,4477,northwest,4154.656 -3,2,1177,good,3,4975,west_welmwood,4975 -2,1,776,good,5,3306,west_welmwood,3306 -3,2,819,good,14,4552,west_welmwood,4552 -0,1,344,good,8,1634,west_welmwood,1634 -0,1,435,good,7,1770,west_welmwood,1770 -1,1,677,great,3,2645,east_elmwood,2645 -2,1,702,poor,46,2831,northwest,2683.788 -1,1,483,good,7,1893,west_welmwood,1893 -1,1,571,good,8,2032,west_welmwood,2032 -1,1,368,good,5,1770,west_welmwood,1770 -2,1,559,poor,35,2786,northwest,2702.42 -0,1,510,poor,15,1781,northwest,1781 -3,2,1054,poor,26,4623,northwest,4567.524 -0,1,459,poor,26,1626,northwest,1606.488 -0,1,325,good,9,1598,west_welmwood,1598 -3,2,878,great,7,5200,east_elmwood,5200 -1,1,467,good,3,1960,west_welmwood,1960 -3,2,1072,good,12,4791,west_welmwood,4791 -1,1,474,poor,40,1607,northwest,1542.72 -0,1,486,good,4,1855,west_welmwood,1855 -0,1,459,good,11,1728,west_welmwood,1728 -1,1,344,great,6,2267,east_elmwood,2267 -2,1,861,poor,43,3040,northwest,2900.16 -3,2,1239,poor,24,4816,northwest,4777.472 -0,1,361,poor,33,1427,northwest,1389.898 -3,2,913,good,1,4794,west_welmwood,4794 -0,1,220,good,5,1619,west_welmwood,1619 -2,1,574,poor,47,2698,northwest,2552.308 -1,1,309,good,2,1832,west_welmwood,1832 -3,2,998,good,12,4758,west_welmwood,4758 -0,1,525,poor,44,1460,northwest,1389.92 -1,1,384,good,7,1776,west_welmwood,1776 -0,1,175,great,13,1952,east_elmwood,1952 -3,2,872,poor,53,4207,northwest,3929.338 -0,1,419,great,7,2209,east_elmwood,2209 -3,2,862,great,13,5060,east_elmwood,5060 -1,1,671,poor,40,1776,northwest,1704.96 -0,1,243,great,11,2075,east_elmwood,2075 -1,1,576,good,12,1992,west_welmwood,1992 -1,1,419,good,13,1777,west_welmwood,1777 -2,1,827,good,10,3303,west_welmwood,3303 -3,2,994,good,1,4871,west_welmwood,4871 -0,1,355,great,14,2076,east_elmwood,2076 -3,2,1024,good,9,4791,west_welmwood,4791 -1,1,340,great,6,2288,east_elmwood,2288 -1,1,433,good,9,1827,west_welmwood,1827 -0,1,473,poor,55,1351,northwest,1256.43 -1,1,579,great,1,2610,east_elmwood,2610 -3,2,1201,good,3,5067,west_welmwood,5067 -1,1,412,poor,64,1248,northwest,1138.176 -1,1,306,great,13,2225,east_elmwood,2225 -3,2,1218,good,0,5106,west_welmwood,5106 -0,1,386,good,2,1746,west_welmwood,1746 -2,1,634,good,12,3135,west_welmwood,3135 -1,1,505,great,10,2402,east_elmwood,2402 -2,1,847,poor,16,3301,northwest,3301 -0,1,126,good,6,1438,west_welmwood,1438 -3,2,1088,good,7,4873,west_welmwood,4873 -3,2,1171,good,13,4918,west_welmwood,4918 -3,2,1182,good,5,4972,west_welmwood,4972 -3,2,1076,poor,38,4529,northwest,4365.956 -2,1,607,poor,52,2714,northwest,2540.304 -1,1,576,poor,28,1804,northwest,1775.136 -3,2,1102,poor,17,4823,northwest,4823 -3,2,1128,good,0,4987,west_welmwood,4987 -1,1,629,good,6,2080,west_welmwood,2080 -0,1,449,poor,20,1602,northwest,1602 -2,1,744,good,3,3345,west_welmwood,3345 -3,2,1172,good,7,4932,west_welmwood,4932 -0,1,315,great,14,2072,east_elmwood,2072 -0,1,154,great,6,2003,east_elmwood,2003 -2,1,631,good,2,3185,west_welmwood,3185 -2,1,503,good,9,2963,west_welmwood,2963 -0,1,333,good,5,1724,west_welmwood,1724 -3,2,803,good,7,4602,west_welmwood,4602 -3,2,969,good,6,4761,west_welmwood,4761 -2,1,624,good,6,3195,west_welmwood,3195 -3,2,1138,good,1,4989,west_welmwood,4989 -2,1,633,good,8,3167,west_welmwood,3167 -3,2,1170,good,11,4900,west_welmwood,4900 -3,2,1107,good,6,4866,west_welmwood,4866 -0,1,147,great,10,1927,east_elmwood,1927 -2,1,788,poor,38,3017,northwest,2908.388 -3,2,1239,good,13,4991,west_welmwood,4991 -1,1,383,great,7,2356,east_elmwood,2356 -0,1,205,good,0,1587,west_welmwood,1587 -0,1,315,good,5,1654,west_welmwood,1654 -2,1,659,great,10,3684,east_elmwood,3684 -1,1,361,poor,53,1348,northwest,1259.032 -1,1,382,poor,43,1469,northwest,1401.426 -2,1,569,poor,37,2830,northwest,2733.78 -1,1,351,great,9,2283,east_elmwood,2283 -0,1,110,poor,55,961,northwest,893.73 -3,2,1160,great,8,5455,east_elmwood,5455 -1,1,376,good,6,1861,west_welmwood,1861 -0,1,119,poor,62,871,northwest,797.836 -0,1,323,great,13,2098,east_elmwood,2098 -0,1,262,good,12,1584,west_welmwood,1584 -0,1,119,poor,15,1373,northwest,1373 -2,1,899,good,12,3397,west_welmwood,3397 -3,2,971,good,5,4819,west_welmwood,4819 -2,1,938,good,12,3393,west_welmwood,3393 -2,1,655,good,3,3246,west_welmwood,3246 -0,1,449,good,9,1746,west_welmwood,1746 -1,1,613,poor,32,1777,northwest,1734.352 -0,1,251,great,2,2139,east_elmwood,2139 -1,1,374,good,11,1775,west_welmwood,1775 -3,2,1081,poor,34,4581,northwest,4452.732 -2,1,514,poor,25,2884,northwest,2855.16 -0,1,450,good,6,1773,west_welmwood,1773 -0,1,519,good,6,1882,west_welmwood,1882 -3,2,831,great,3,5122,east_elmwood,5122 -2,1,568,poor,40,2724,northwest,2615.04 -3,2,1194,good,7,4960,west_welmwood,4960 -2,1,620,poor,44,2758,northwest,2625.616 -3,2,990,great,7,5286,east_elmwood,5286 -2,1,513,great,14,3453,east_elmwood,3453 -0,1,196,good,11,1462,west_welmwood,1462 -2,1,594,good,9,3139,west_welmwood,3139 -0,1,167,poor,55,967,northwest,899.31 -3,2,1098,good,6,4855,west_welmwood,4855 -3,2,1072,good,11,4819,west_welmwood,4819 -1,1,397,good,13,1743,west_welmwood,1743 -1,1,383,good,13,1751,west_welmwood,1751 -2,1,546,poor,60,2515,northwest,2313.8 -3,2,1200,good,0,5080,west_welmwood,5080 -2,1,815,poor,43,2999,northwest,2861.046 -1,1,401,good,9,1768,west_welmwood,1768 -2,1,565,good,2,3137,west_welmwood,3137 -1,1,362,good,1,1883,west_welmwood,1883 -3,2,1156,good,5,4997,west_welmwood,4997 -1,1,502,poor,51,1522,northwest,1427.636 -1,1,487,poor,30,1648,northwest,1615.04 -3,2,1179,good,7,4937,west_welmwood,4937 -0,1,412,poor,37,1438,northwest,1389.108 -1,1,455,good,3,1891,west_welmwood,1891 -1,1,677,good,9,2040,west_welmwood,2040 -2,1,633,good,14,3098,west_welmwood,3098 -3,2,949,good,9,4699,west_welmwood,4699 -3,2,1125,great,2,5435,east_elmwood,5435 -1,1,581,poor,40,1711,northwest,1642.56 -2,1,900,poor,56,2957,northwest,2744.096 -3,2,980,great,3,5251,east_elmwood,5251 -0,1,549,good,0,1940,west_welmwood,1940 -1,1,615,poor,21,1870,northwest,1866.26 -0,1,473,good,14,1770,west_welmwood,1770 -1,1,523,good,7,1941,west_welmwood,1941 -2,1,745,poor,26,3105,northwest,3067.74 -1,1,566,good,7,1980,west_welmwood,1980 -1,1,544,good,7,1998,west_welmwood,1998 -2,1,903,poor,63,2855,northwest,2609.47 -2,1,600,great,6,3601,east_elmwood,3601 -1,1,465,good,0,1994,west_welmwood,1994 -2,1,675,poor,27,3009,northwest,2966.874 -3,2,893,great,7,5126,east_elmwood,5126 -3,2,1111,great,1,5487,east_elmwood,5487 -3,2,1144,poor,20,4783,northwest,4783 -2,1,508,poor,24,2863,northwest,2840.096 -1,1,551,great,12,2456,east_elmwood,2456 -2,1,676,poor,29,2943,northwest,2890.026 -1,1,699,good,7,2090,west_welmwood,2090 -1,1,429,good,7,1843,west_welmwood,1843 -1,1,590,good,4,2030,west_welmwood,2030 -0,1,128,good,4,1496,west_welmwood,1496 -3,2,1150,poor,57,4427,northwest,4099.402 -1,1,392,good,12,1758,west_welmwood,1758 -2,1,806,good,8,3286,west_welmwood,3286 -2,1,938,good,10,3427,west_welmwood,3427 -2,1,552,good,5,3072,west_welmwood,3072 -1,1,316,great,8,2283,east_elmwood,2283 -0,1,319,great,12,2071,east_elmwood,2071 -1,1,483,poor,59,1430,northwest,1318.46 -1,1,400,great,0,2413,east_elmwood,2413 -1,1,586,poor,48,1568,northwest,1480.192 -1,1,484,poor,24,1700,northwest,1686.4 -3,2,859,poor,23,4430,northwest,4403.42 -0,1,534,poor,56,1386,northwest,1286.208 -2,1,936,poor,47,3071,northwest,2905.166 -3,2,911,good,7,4665,west_welmwood,4665 -2,1,900,poor,24,3258,northwest,3231.936 -3,2,933,good,10,4719,west_welmwood,4719 -1,1,464,great,5,2366,east_elmwood,2366 -2,1,773,poor,44,2961,northwest,2818.872 -0,1,203,poor,26,1295,northwest,1279.46 -1,1,321,poor,18,1605,northwest,1605 -1,1,539,good,4,1976,west_welmwood,1976 -1,1,655,poor,45,1723,northwest,1636.85 -3,2,1185,good,4,4991,west_welmwood,4991 -1,1,611,poor,25,1904,northwest,1884.96 -0,1,509,good,11,1833,west_welmwood,1833 -2,1,666,good,8,3165,west_welmwood,3165 -2,1,539,poor,47,2621,northwest,2479.466 -0,1,442,poor,30,1546,northwest,1515.08 -1,1,690,poor,34,1809,northwest,1758.348 -3,2,1062,good,14,4744,west_welmwood,4744 -1,1,428,good,8,1880,west_welmwood,1880 -3,2,814,poor,60,4057,northwest,3732.44 -2,1,742,poor,38,2989,northwest,2881.396 -1,1,618,poor,62,1495,northwest,1369.42 -1,1,384,great,10,2250,east_elmwood,2250 -2,1,506,great,13,3521,east_elmwood,3521 -0,1,286,poor,35,1344,northwest,1303.68 -0,1,152,great,4,1973,east_elmwood,1973 -0,1,131,good,7,1412,west_welmwood,1412 -3,2,1079,good,9,4866,west_welmwood,4866 -2,1,904,poor,33,3141,northwest,3059.334 -2,1,804,good,6,3327,west_welmwood,3327 -0,1,384,good,7,1736,west_welmwood,1736 -0,1,264,poor,27,1344,northwest,1325.184 -0,1,542,poor,17,1816,northwest,1816 -3,2,1236,poor,55,4549,northwest,4230.57 -0,1,487,good,10,1820,west_welmwood,1820 -0,1,286,poor,55,1091,northwest,1014.63 -0,1,351,good,12,1606,west_welmwood,1606 -0,1,534,great,8,2345,east_elmwood,2345 -0,1,461,good,6,1780,west_welmwood,1780 -2,1,768,good,6,3269,west_welmwood,3269 -2,1,949,great,5,3998,east_elmwood,3998 -1,1,638,poor,31,1802,northwest,1762.356 -0,1,135,good,10,1466,west_welmwood,1466 -1,1,724,poor,44,1760,northwest,1675.52 -0,1,131,good,8,1439,west_welmwood,1439 -3,2,1241,great,2,5615,east_elmwood,5615 -0,1,524,great,5,2348,east_elmwood,2348 -1,1,494,good,10,1875,west_welmwood,1875 -1,1,592,poor,33,1739,northwest,1693.786 -2,1,759,poor,61,2720,northwest,2496.96 -0,1,176,good,3,1572,west_welmwood,1572 -1,1,435,poor,16,1814,northwest,1814 -3,2,966,poor,28,4583,northwest,4509.672 -0,1,466,poor,31,1529,northwest,1495.362 -1,1,507,good,8,1932,west_welmwood,1932 -3,2,1012,poor,35,4527,northwest,4391.19 -0,1,533,good,6,1849,west_welmwood,1849 -2,1,837,great,13,3834,east_elmwood,3834 -1,1,509,good,11,1916,west_welmwood,1916 -0,1,165,good,5,1562,west_welmwood,1562 -3,2,1055,great,0,5400,east_elmwood,5400 -0,1,267,good,2,1634,west_welmwood,1634 -2,1,545,good,9,3067,west_welmwood,3067 -3,2,1085,good,1,4931,west_welmwood,4931 -0,1,273,poor,30,1400,northwest,1372 -2,1,692,poor,31,2948,northwest,2883.144 -2,1,527,great,11,3468,east_elmwood,3468 -3,2,1144,good,3,4994,west_welmwood,4994 -0,1,288,great,0,2151,east_elmwood,2151 -0,1,313,poor,45,1239,northwest,1177.05 -0,1,124,good,12,1406,west_welmwood,1406 -1,1,332,poor,36,1496,northwest,1448.128 -2,1,724,good,14,3220,west_welmwood,3220 -1,1,429,great,0,2445,east_elmwood,2445 -2,1,752,good,1,3361,west_welmwood,3361 -3,2,825,great,6,5124,east_elmwood,5124 -3,2,1190,good,1,5008,west_welmwood,5008 -2,1,579,poor,58,2575,northwest,2379.3 -3,2,1069,poor,49,4472,northwest,4212.624 -0,1,345,poor,45,1309,northwest,1243.55 -0,1,293,great,8,2124,east_elmwood,2124 -0,1,134,good,4,1490,west_welmwood,1490 -1,1,477,poor,31,1643,northwest,1606.854 -0,1,211,poor,35,1281,northwest,1242.57 -3,2,1106,good,3,4951,west_welmwood,4951 -1,1,429,great,3,2442,east_elmwood,2442 -3,2,1186,poor,45,4551,northwest,4323.45 -0,1,287,good,13,1594,west_welmwood,1594 -2,1,927,great,0,3981,east_elmwood,3981 -3,2,1073,great,4,5379,east_elmwood,5379 -0,1,166,poor,15,1394,northwest,1394 -2,1,598,great,7,3600,east_elmwood,3600 -2,1,908,good,5,3482,west_welmwood,3482 -2,1,788,good,1,3364,west_welmwood,3364 -1,1,330,poor,43,1414,northwest,1348.956 -3,2,1233,good,9,4982,west_welmwood,4982 -1,1,496,good,3,1929,west_welmwood,1929 -3,2,1209,good,6,5039,west_welmwood,5039 -3,2,1224,poor,55,4500,northwest,4185 -1,1,409,good,2,1863,west_welmwood,1863 -2,1,605,good,10,3120,west_welmwood,3120 -2,1,517,poor,17,2916,northwest,2916 -1,1,515,great,0,2558,east_elmwood,2558 -1,1,373,great,4,2363,east_elmwood,2363 -2,1,637,great,8,3702,east_elmwood,3702 -0,1,219,poor,46,1158,northwest,1097.784 -0,1,151,great,0,2053,east_elmwood,2053 -0,1,187,great,3,2058,east_elmwood,2058 -0,1,158,poor,20,1338,northwest,1338 -0,1,146,good,6,1463,west_welmwood,1463 -1,1,493,good,4,1960,west_welmwood,1960 -1,1,348,good,2,1853,west_welmwood,1853 -2,1,603,poor,22,2956,northwest,2944.176 -1,1,546,good,9,1930,west_welmwood,1930 -3,2,1144,good,2,4932,west_welmwood,4932 -0,1,413,good,12,1674,west_welmwood,1674 -0,1,341,great,12,2156,east_elmwood,2156 -2,1,624,great,12,3642,east_elmwood,3642 -1,1,478,great,5,2434,east_elmwood,2434 -1,1,683,poor,64,1559,northwest,1421.808 -1,1,581,poor,34,1782,northwest,1732.104 -3,2,1210,good,1,5081,west_welmwood,5081 -1,1,658,great,3,2633,east_elmwood,2633 -2,1,758,poor,58,2745,northwest,2536.38 -1,1,611,good,2,2109,west_welmwood,2109 -1,1,522,good,10,1878,west_welmwood,1878 -1,1,612,great,11,2516,east_elmwood,2516 -0,1,408,good,2,1783,west_welmwood,1783 -2,1,789,poor,34,3021,northwest,2936.412 -3,2,963,good,7,4710,west_welmwood,4710 -2,1,718,great,1,3856,east_elmwood,3856 -0,1,403,poor,40,1428,northwest,1370.88 -1,1,598,poor,22,1879,northwest,1871.484 -1,1,689,good,1,2183,west_welmwood,2183 -0,1,418,great,1,2314,east_elmwood,2314 -2,1,727,poor,54,2809,northwest,2617.988 -1,1,738,great,8,2665,east_elmwood,2665 -3,2,1130,great,10,5400,east_elmwood,5400 -3,2,1057,good,9,4773,west_welmwood,4773 -1,1,719,great,0,2669,east_elmwood,2669 -1,1,472,good,1,1971,west_welmwood,1971 -1,1,745,good,6,2170,west_welmwood,2170 -1,1,443,good,5,1934,west_welmwood,1934 -3,2,1075,great,14,5236,east_elmwood,5236 -3,2,1144,good,2,5021,west_welmwood,5021 -1,1,339,great,8,2210,east_elmwood,2210 -1,1,575,good,5,1995,west_welmwood,1995 -2,1,542,good,1,3084,west_welmwood,3084 -0,1,439,good,4,1793,west_welmwood,1793 -2,1,877,great,3,3963,east_elmwood,3963 -0,1,457,poor,46,1407,northwest,1333.836 -0,1,388,poor,36,1380,northwest,1335.84 -0,1,484,poor,21,1677,northwest,1673.646 -0,1,500,poor,21,1687,northwest,1683.626 -3,2,942,good,0,4781,west_welmwood,4781 -3,2,1204,poor,64,4437,northwest,4046.544 -3,2,1102,good,13,4820,west_welmwood,4820 -1,1,603,poor,44,1712,northwest,1629.824 -1,1,499,good,6,1979,west_welmwood,1979 -2,1,582,good,3,3200,west_welmwood,3200 -3,2,874,poor,44,4292,northwest,4085.984 -3,2,967,great,9,5269,east_elmwood,5269 -1,1,668,great,4,2619,east_elmwood,2619 -0,1,530,great,7,2369,east_elmwood,2369 -2,1,750,good,11,3190,west_welmwood,3190 -0,1,263,good,4,1653,west_welmwood,1653 -1,1,467,good,2,1985,west_welmwood,1985 -3,2,1039,poor,39,4471,northwest,4301.102 -0,1,187,good,6,1489,west_welmwood,1489 -0,1,209,poor,50,1100,northwest,1034 -0,1,398,poor,59,1201,northwest,1107.322 -3,2,945,poor,63,4131,northwest,3775.734 -2,1,657,poor,31,2920,northwest,2855.76 -1,1,537,good,1,2023,west_welmwood,2023 -0,1,334,great,2,2231,east_elmwood,2231 -2,1,660,great,9,3678,east_elmwood,3678 -1,1,479,good,6,1903,west_welmwood,1903 -3,2,917,poor,64,4102,northwest,3741.024 -2,1,898,good,7,3389,west_welmwood,3389 -2,1,822,great,1,3950,east_elmwood,3950 -0,1,270,poor,47,1156,northwest,1093.576 -2,1,625,poor,64,2613,northwest,2383.056 -0,1,268,poor,45,1248,northwest,1185.6 -0,1,218,poor,46,1131,northwest,1072.188 -0,1,422,poor,45,1402,northwest,1331.9 -2,1,766,poor,61,2790,northwest,2561.22 -0,1,224,good,3,1638,west_welmwood,1638 -2,1,762,good,1,3326,west_welmwood,3326 -2,1,782,good,3,3318,west_welmwood,3318 -0,1,518,poor,36,1537,northwest,1487.816 -1,1,548,good,1,2076,west_welmwood,2076 -1,1,665,poor,44,1750,northwest,1666 -2,1,723,good,3,3340,west_welmwood,3340 -0,1,373,good,14,1599,west_welmwood,1599 -0,1,442,poor,63,1205,northwest,1101.37 -2,1,743,poor,49,2861,northwest,2695.062 -2,1,872,poor,35,3072,northwest,2979.84 -1,1,504,good,5,1988,west_welmwood,1988 -2,1,829,great,10,3837,east_elmwood,3837 -1,1,314,good,13,1656,west_welmwood,1656 -0,1,418,great,13,2186,east_elmwood,2186 -1,1,312,poor,29,1509,northwest,1481.838 -2,1,548,poor,17,3012,northwest,3012 -1,1,524,great,7,2475,east_elmwood,2475 -3,2,1142,good,14,4841,west_welmwood,4841 -2,1,929,great,9,3918,east_elmwood,3918 -1,1,505,great,3,2499,east_elmwood,2499 -2,1,665,poor,16,3097,northwest,3097 -0,1,109,great,13,1873,east_elmwood,1873 -0,1,290,poor,37,1274,northwest,1230.684 -3,2,1213,good,10,5001,west_welmwood,5001 -2,1,728,good,12,3185,west_welmwood,3185 -2,1,933,good,5,3520,west_welmwood,3520 -2,1,895,poor,23,3263,northwest,3243.422 -2,1,547,poor,23,2896,northwest,2878.624 -0,1,549,poor,41,1587,northwest,1520.346 -0,1,217,poor,34,1258,northwest,1222.776 -1,1,492,poor,56,1414,northwest,1312.192 -1,1,421,good,10,1818,west_welmwood,1818 -3,2,1034,poor,15,4684,northwest,4684 -2,1,944,good,10,3477,west_welmwood,3477 -1,1,347,good,12,1681,west_welmwood,1681 -3,2,1208,good,14,4951,west_welmwood,4951 -0,1,449,good,4,1802,west_welmwood,1802 -0,1,464,great,5,2343,east_elmwood,2343 -0,1,205,poor,55,1085,northwest,1009.05 -1,1,435,good,9,1891,west_welmwood,1891 -1,1,682,great,10,2617,east_elmwood,2617 -1,1,317,poor,57,1244,northwest,1151.944 -1,1,730,great,12,2649,east_elmwood,2649 -1,1,668,good,14,2036,west_welmwood,2036 -3,2,1199,good,4,5000,west_welmwood,5000 -0,1,461,great,11,2224,east_elmwood,2224 -2,1,576,poor,43,2727,northwest,2601.558 -3,2,1028,good,3,4845,west_welmwood,4845 -2,1,601,good,5,3154,west_welmwood,3154 -0,1,122,poor,33,1178,northwest,1147.372 -0,1,390,great,5,2241,east_elmwood,2241 -2,1,845,great,7,3891,east_elmwood,3891 -0,1,511,good,1,1881,west_welmwood,1881 -3,2,998,poor,46,4435,northwest,4204.38 -3,2,1042,good,2,4860,west_welmwood,4860 -0,1,195,great,0,2122,east_elmwood,2122 -1,1,305,poor,17,1636,northwest,1636 -2,1,839,great,13,3820,east_elmwood,3820 -3,2,1218,good,1,5045,west_welmwood,5045 -1,1,443,great,6,2401,east_elmwood,2401 -3,2,1203,poor,57,4453,northwest,4123.478 -1,1,429,good,5,1838,west_welmwood,1838 -3,2,1189,good,10,4899,west_welmwood,4899 -1,1,303,good,14,1695,west_welmwood,1695 -0,1,254,poor,64,1061,northwest,967.632 -0,1,208,poor,55,1053,northwest,979.29 -1,1,453,great,8,2397,east_elmwood,2397 -0,1,230,great,14,2012,east_elmwood,2012 -0,1,198,good,9,1470,west_welmwood,1470 -3,2,1132,good,13,4886,west_welmwood,4886 -0,1,314,great,5,2184,east_elmwood,2184 -2,1,882,good,12,3382,west_welmwood,3382 -3,2,1056,good,6,4849,west_welmwood,4849 -3,2,1072,great,7,5385,east_elmwood,5385 -0,1,154,good,1,1536,west_welmwood,1536 -1,1,612,poor,48,1668,northwest,1574.592 -0,1,201,good,7,1539,west_welmwood,1539 -2,1,540,good,10,3068,west_welmwood,3068 -3,2,1088,good,13,4829,west_welmwood,4829 -3,2,804,great,12,5012,east_elmwood,5012 -3,2,904,good,12,4659,west_welmwood,4659 -1,1,600,good,8,1992,west_welmwood,1992 -2,1,614,great,7,3612,east_elmwood,3612 -0,1,197,good,9,1472,west_welmwood,1472 -1,1,544,poor,24,1838,northwest,1823.296 -2,1,551,good,8,3079,west_welmwood,3079 -1,1,560,good,4,2067,west_welmwood,2067 -2,1,742,great,12,3692,east_elmwood,3692 -0,1,145,good,2,1486,west_welmwood,1486 -2,1,830,great,2,3897,east_elmwood,3897 -1,1,404,great,4,2348,east_elmwood,2348 -0,1,525,poor,57,1321,northwest,1223.246 -0,1,142,great,12,1940,east_elmwood,1940 -3,2,1088,good,11,4836,west_welmwood,4836 -0,1,232,poor,32,1280,northwest,1249.28 -2,1,598,poor,41,2780,northwest,2663.24 -0,1,199,great,4,2096,east_elmwood,2096 -2,1,584,great,6,3607,east_elmwood,3607 -3,2,857,good,9,4606,west_welmwood,4606 -1,1,530,great,14,2405,east_elmwood,2405 -2,1,614,poor,54,2714,northwest,2529.448 -0,1,363,good,11,1690,west_welmwood,1690 -2,1,649,poor,60,2663,northwest,2449.96 -2,1,535,good,9,2996,west_welmwood,2996 -3,2,1041,good,12,4744,west_welmwood,4744 -2,1,844,poor,53,2908,northwest,2716.072 -2,1,576,poor,64,2516,northwest,2294.592 -2,1,872,good,5,3402,west_welmwood,3402 -3,2,1172,good,10,4946,west_welmwood,4946 -3,2,874,great,9,5121,east_elmwood,5121 -3,2,1217,poor,34,4729,northwest,4596.588 -3,2,1220,poor,47,4600,northwest,4351.6 -3,2,1123,poor,46,4519,northwest,4284.012 -0,1,276,great,5,2095,east_elmwood,2095 -2,1,688,good,8,3246,west_welmwood,3246 -1,1,577,great,9,2528,east_elmwood,2528 -3,2,1232,great,6,5518,east_elmwood,5518 -3,2,973,poor,54,4298,northwest,4005.736 -1,1,668,poor,36,1815,northwest,1756.92 -1,1,711,good,11,2083,west_welmwood,2083 -0,1,235,good,14,1478,west_welmwood,1478 -3,2,1114,good,14,4836,west_welmwood,4836 -2,1,741,great,2,3832,east_elmwood,3832 -0,1,210,poor,52,1131,northwest,1058.616 -1,1,699,good,0,2162,west_welmwood,2162 -1,1,612,poor,33,1771,northwest,1724.954 -3,2,978,poor,20,4651,northwest,4651 -3,2,1160,poor,28,4737,northwest,4661.208 -0,1,103,good,9,1411,west_welmwood,1411 -1,1,739,great,9,2698,east_elmwood,2698 -1,1,589,poor,18,1948,northwest,1948 -0,1,429,great,8,2214,east_elmwood,2214 -0,1,399,great,5,2253,east_elmwood,2253 -2,1,789,great,1,3920,east_elmwood,3920 -3,2,1132,good,14,4820,west_welmwood,4820 -2,1,515,good,2,3078,west_welmwood,3078 -2,1,936,great,11,3934,east_elmwood,3934 -2,1,669,great,14,3633,east_elmwood,3633 -1,1,523,great,0,2527,east_elmwood,2527 -0,1,192,poor,58,1038,northwest,959.112 -0,1,500,poor,50,1448,northwest,1361.12 -1,1,615,good,2,2101,west_welmwood,2101 -1,1,352,poor,17,1649,northwest,1649 -3,2,863,great,11,5108,east_elmwood,5108 -2,1,827,good,11,3304,west_welmwood,3304 -2,1,799,great,4,3887,east_elmwood,3887 -2,1,613,poor,54,2672,northwest,2490.304 -2,1,524,good,10,3052,west_welmwood,3052 -0,1,307,good,3,1664,west_welmwood,1664 -1,1,343,poor,57,1250,northwest,1157.5 -0,1,474,great,10,2319,east_elmwood,2319 -2,1,803,poor,38,3053,northwest,2943.092 -3,2,1168,poor,28,4688,northwest,4612.992 -2,1,704,good,6,3234,west_welmwood,3234 -3,2,1237,good,2,5104,west_welmwood,5104 -3,2,865,good,10,4637,west_welmwood,4637 -0,1,268,good,0,1676,west_welmwood,1676 -1,1,697,good,2,2146,west_welmwood,2146 -1,1,372,good,12,1775,west_welmwood,1775 -3,2,904,poor,51,4256,northwest,3992.128 -2,1,590,poor,61,2554,northwest,2344.572 -1,1,436,good,2,1873,west_welmwood,1873 -3,2,845,good,12,4561,west_welmwood,4561 -3,2,1059,great,3,5336,east_elmwood,5336 -0,1,424,great,14,2177,east_elmwood,2177 -1,1,735,good,1,2181,west_welmwood,2181 -2,1,698,good,5,3214,west_welmwood,3214 -1,1,543,good,10,1984,west_welmwood,1984 -2,1,811,poor,45,2921,northwest,2774.95 -2,1,623,good,0,3211,west_welmwood,3211 -1,1,338,good,14,1709,west_welmwood,1709 -2,1,810,great,7,3875,east_elmwood,3875 -0,1,466,good,13,1715,west_welmwood,1715 -0,1,530,poor,35,1625,northwest,1576.25 -2,1,684,good,12,3164,west_welmwood,3164 -2,1,780,poor,60,2822,northwest,2596.24 -3,2,1241,good,4,5089,west_welmwood,5089 -3,2,1039,poor,19,4720,northwest,4720 -3,2,866,great,13,5039,east_elmwood,5039 -2,1,606,poor,59,2590,northwest,2387.98 -3,2,848,poor,58,4103,northwest,3791.172 -1,1,463,great,11,2357,east_elmwood,2357 -0,1,355,great,6,2189,east_elmwood,2189 -3,2,875,good,12,4608,west_welmwood,4608 -3,2,1075,good,13,4761,west_welmwood,4761 -3,2,915,good,7,4729,west_welmwood,4729 -1,1,326,poor,19,1617,northwest,1617 -3,2,905,good,1,4709,west_welmwood,4709 -0,1,441,great,8,2275,east_elmwood,2275 -2,1,748,poor,30,3078,northwest,3016.44 -0,1,514,good,3,1835,west_welmwood,1835 -0,1,443,poor,40,1410,northwest,1353.6 -0,1,207,good,10,1524,west_welmwood,1524 -1,1,657,great,9,2579,east_elmwood,2579 -2,1,866,poor,45,2988,northwest,2838.6 -0,1,221,great,12,1992,east_elmwood,1992 -3,2,986,good,1,4785,west_welmwood,4785 -2,1,852,poor,54,2881,northwest,2685.092 -0,1,176,good,8,1514,west_welmwood,1514 -0,1,372,poor,16,1590,northwest,1590 -3,2,936,good,2,4740,west_welmwood,4740 -3,2,1012,good,12,4721,west_welmwood,4721 -0,1,153,poor,28,1260,northwest,1239.84 -1,1,515,great,0,2514,east_elmwood,2514 -1,1,567,poor,47,1549,northwest,1465.354 -1,1,470,great,7,2387,east_elmwood,2387 -2,1,633,good,5,3211,west_welmwood,3211 -0,1,262,good,10,1549,west_welmwood,1549 -0,1,230,great,3,2071,east_elmwood,2071 -2,1,601,poor,29,2891,northwest,2838.962 -2,1,682,good,5,3189,west_welmwood,3189 -1,1,463,great,11,2345,east_elmwood,2345 -3,2,1197,poor,44,4649,northwest,4425.848 -2,1,925,great,1,4037,east_elmwood,4037 -0,1,277,poor,39,1305,northwest,1255.41 -3,2,1033,good,2,4852,west_welmwood,4852 -3,2,1208,poor,59,4443,northwest,4096.446 -0,1,190,good,11,1483,west_welmwood,1483 -0,1,483,good,8,1782,west_welmwood,1782 -3,2,896,great,11,5111,east_elmwood,5111 -3,2,1085,great,3,5399,east_elmwood,5399 -2,1,929,poor,48,3098,northwest,2924.512 -3,2,804,good,11,4496,west_welmwood,4496 -1,1,666,great,2,2652,east_elmwood,2652 -0,1,118,great,5,1953,east_elmwood,1953 -0,1,230,poor,52,1070,northwest,1001.52 -0,1,298,poor,32,1418,northwest,1383.968 -0,1,214,poor,29,1329,northwest,1305.078 -0,1,525,great,1,2463,east_elmwood,2463 -3,2,1241,good,3,5020,west_welmwood,5020 -1,1,446,good,11,1822,west_welmwood,1822 -2,1,761,great,2,3878,east_elmwood,3878 -2,1,615,poor,18,3053,northwest,3053 -0,1,189,good,7,1500,west_welmwood,1500 -0,1,443,good,9,1738,west_welmwood,1738 -0,1,146,poor,64,878,northwest,800.736 -2,1,787,poor,35,2989,northwest,2899.33 -0,1,356,poor,19,1594,northwest,1594 -3,2,1240,good,9,5011,west_welmwood,5011 -2,1,720,poor,28,3088,northwest,3038.592 -0,1,379,poor,34,1427,northwest,1387.044 -3,2,994,good,13,4675,west_welmwood,4675 -3,2,1213,good,12,4952,west_welmwood,4952 -0,1,338,good,3,1736,west_welmwood,1736 -3,2,919,great,9,5192,east_elmwood,5192 -2,1,501,good,9,3054,west_welmwood,3054 -1,1,476,good,3,1912,west_welmwood,1912 -2,1,577,great,6,3602,east_elmwood,3602 -1,1,400,poor,58,1340,northwest,1238.16 -3,2,1104,poor,59,4363,northwest,4022.686 -2,1,879,poor,16,3359,northwest,3359 -1,1,320,good,4,1808,west_welmwood,1808 -3,2,1038,great,1,5356,east_elmwood,5356 -3,2,884,great,8,5168,east_elmwood,5168 -0,1,445,great,6,2307,east_elmwood,2307 -2,1,841,poor,38,3096,northwest,2984.544 -2,1,704,good,12,3137,west_welmwood,3137 -2,1,571,good,11,3035,west_welmwood,3035 -3,2,1145,poor,59,4396,northwest,4053.112 -2,1,787,good,7,3317,west_welmwood,3317 -3,2,862,good,12,4626,west_welmwood,4626 -2,1,789,poor,52,2838,northwest,2656.368 -0,1,465,good,7,1809,west_welmwood,1809 -0,1,390,poor,30,1451,northwest,1421.98 -0,1,374,good,14,1683,west_welmwood,1683 -2,1,518,poor,43,2723,northwest,2597.742 -0,1,328,great,1,2224,east_elmwood,2224 -3,2,1227,great,12,5497,east_elmwood,5497 -3,2,873,good,6,4672,west_welmwood,4672 -3,2,1236,good,9,5010,west_welmwood,5010 -0,1,294,great,7,2162,east_elmwood,2162 -3,2,801,poor,55,4114,northwest,3826.02 -0,1,522,poor,44,1514,northwest,1441.328 -3,2,940,poor,39,4446,northwest,4277.052 -1,1,501,great,0,2485,east_elmwood,2485 -3,2,867,poor,49,4232,northwest,3986.544 -3,2,1156,poor,26,4697,northwest,4640.636 -1,1,669,good,8,2065,west_welmwood,2065 -3,2,853,poor,53,4126,northwest,3853.684 -2,1,786,poor,52,2862,northwest,2678.832 -0,1,306,good,5,1687,west_welmwood,1687 -0,1,525,good,12,1781,west_welmwood,1781 -0,1,400,good,3,1732,west_welmwood,1732 -1,1,329,poor,61,1186,northwest,1088.748 -0,1,474,good,11,1723,west_welmwood,1723 -1,1,698,poor,27,1949,northwest,1921.714 -2,1,914,good,5,3414,west_welmwood,3414 -1,1,712,poor,61,1626,northwest,1492.668 -2,1,541,great,3,3614,east_elmwood,3614 -1,1,397,good,13,1798,west_welmwood,1798 -3,2,1137,good,7,4908,west_welmwood,4908 -3,2,859,poor,56,4118,northwest,3821.504 -1,1,532,good,0,1982,west_welmwood,1982 -2,1,525,poor,37,2731,northwest,2638.146 -3,2,870,poor,61,4098,northwest,3761.964 -2,1,677,poor,31,2978,northwest,2912.484 -2,1,873,good,5,3399,west_welmwood,3399 -3,2,824,good,12,4510,west_welmwood,4510 -0,1,347,poor,36,1365,northwest,1321.32 -3,2,1215,great,0,5580,east_elmwood,5580 -0,1,201,good,1,1548,west_welmwood,1548 -2,1,590,poor,18,3026,northwest,3026 -2,1,540,good,12,2970,west_welmwood,2970 -3,2,960,good,11,4696,west_welmwood,4696 -0,1,417,good,7,1794,west_welmwood,1794 -3,2,962,great,0,5270,east_elmwood,5270 -1,1,513,good,14,1858,west_welmwood,1858 -2,1,621,good,8,3169,west_welmwood,3169 -2,1,612,poor,27,2919,northwest,2878.134 -1,1,522,great,11,2368,east_elmwood,2368 -3,2,1178,good,0,5045,west_welmwood,5045 -3,2,935,great,10,5137,east_elmwood,5137 -1,1,692,great,10,2570,east_elmwood,2570 -0,1,504,poor,28,1650,northwest,1623.6 -3,2,1058,poor,56,4393,northwest,4076.704 -0,1,112,good,14,1336,west_welmwood,1336 -0,1,396,great,0,2256,east_elmwood,2256 -1,1,744,great,3,2692,east_elmwood,2692 -2,1,715,poor,50,2783,northwest,2616.02 -3,2,1077,great,4,5369,east_elmwood,5369 -3,2,883,poor,47,4249,northwest,4019.554 -3,2,1056,good,4,4887,west_welmwood,4887 -0,1,425,good,12,1663,west_welmwood,1663 -1,1,602,good,4,2082,west_welmwood,2082 -1,1,642,great,3,2564,east_elmwood,2564 -1,1,351,poor,51,1365,northwest,1280.37 -1,1,674,poor,39,1758,northwest,1691.196 -3,2,1039,good,7,4858,west_welmwood,4858 -1,1,528,poor,37,1635,northwest,1579.41 -2,1,627,poor,42,2791,northwest,2668.196 -0,1,305,poor,40,1265,northwest,1214.4 -1,1,645,good,0,2097,west_welmwood,2097 -3,2,1074,good,5,4857,west_welmwood,4857 -1,1,536,poor,17,1836,northwest,1836 -0,1,313,good,8,1617,west_welmwood,1617 -1,1,532,great,6,2440,east_elmwood,2440 -3,2,983,good,13,4746,west_welmwood,4746 -3,2,1075,great,5,5398,east_elmwood,5398 -2,1,558,good,1,3175,west_welmwood,3175 -1,1,664,good,6,2145,west_welmwood,2145 -2,1,813,good,2,3399,west_welmwood,3399 -2,1,635,poor,25,3028,northwest,2997.72 -3,2,1029,great,4,5297,east_elmwood,5297 -0,1,137,great,3,2009,east_elmwood,2009 -1,1,372,poor,15,1723,northwest,1723 -0,1,512,poor,58,1371,northwest,1266.804 -3,2,1086,good,7,4880,west_welmwood,4880 -2,1,919,great,4,3951,east_elmwood,3951 -0,1,401,poor,59,1238,northwest,1141.436 -1,1,495,poor,57,1378,northwest,1276.028 -1,1,462,good,10,1910,west_welmwood,1910 -1,1,714,poor,55,1616,northwest,1502.88 -1,1,458,good,6,1879,west_welmwood,1879 -1,1,460,good,2,1922,west_welmwood,1922 -2,1,889,poor,20,3308,northwest,3308 -1,1,658,poor,37,1739,northwest,1679.874 -1,1,696,poor,61,1544,northwest,1417.392 -0,1,357,poor,46,1294,northwest,1226.712 -3,2,1131,poor,18,4839,northwest,4839 -2,1,845,poor,34,3151,northwest,3062.772 -0,1,377,good,12,1686,west_welmwood,1686 -0,1,464,great,12,2259,east_elmwood,2259 -3,2,1131,good,11,4873,west_welmwood,4873 -0,1,297,poor,27,1411,northwest,1391.246 -1,1,578,poor,57,1474,northwest,1364.924 -3,2,1036,good,0,4891,west_welmwood,4891 -3,2,1026,great,13,5220,east_elmwood,5220 -2,1,837,poor,49,2923,northwest,2753.466 -2,1,803,good,5,3384,west_welmwood,3384 -3,2,875,great,5,5213,east_elmwood,5213 -3,2,1000,good,8,4787,west_welmwood,4787 -3,2,842,good,4,4701,west_welmwood,4701 -3,2,1138,poor,21,4819,northwest,4809.362 -1,1,623,great,14,2476,east_elmwood,2476 -2,1,904,poor,25,3289,northwest,3256.11 -2,1,900,great,2,3976,east_elmwood,3976 -0,1,533,poor,48,1480,northwest,1397.12 -0,1,507,great,3,2426,east_elmwood,2426 -2,1,764,great,12,3706,east_elmwood,3706 -2,1,570,good,13,3066,west_welmwood,3066 -3,2,1218,good,12,4926,west_welmwood,4926 -2,1,686,good,4,3226,west_welmwood,3226 -1,1,708,great,5,2655,east_elmwood,2655 -3,2,985,poor,54,4296,northwest,4003.872 -1,1,346,poor,52,1304,northwest,1220.544 -2,1,743,great,0,3847,east_elmwood,3847 -0,1,144,great,0,2067,east_elmwood,2067 -3,2,921,poor,21,4569,northwest,4559.862 -2,1,520,poor,43,2665,northwest,2542.41 -2,1,762,good,14,3181,west_welmwood,3181 -3,2,1180,poor,47,4574,northwest,4327.004 -3,2,1246,poor,15,4923,northwest,4923 -3,2,1127,good,8,4910,west_welmwood,4910 -1,1,588,good,7,2015,west_welmwood,2015 -2,1,906,good,12,3393,west_welmwood,3393 -2,1,738,poor,57,2759,northwest,2554.834 -2,1,557,good,8,3118,west_welmwood,3118 -2,1,644,good,6,3203,west_welmwood,3203 -0,1,517,great,13,2293,east_elmwood,2293 -3,2,869,great,11,5135,east_elmwood,5135 -1,1,624,poor,41,1727,northwest,1654.466 -0,1,126,poor,21,1348,northwest,1345.304 -2,1,840,good,0,3404,west_welmwood,3404 -1,1,428,good,9,1816,west_welmwood,1816 -2,1,689,poor,28,3040,northwest,2991.36 -1,1,470,good,7,1892,west_welmwood,1892 -0,1,238,great,11,2041,east_elmwood,2041 -1,1,461,good,8,1857,west_welmwood,1857 -2,1,548,great,3,3659,east_elmwood,3659 -1,1,570,poor,48,1635,northwest,1543.44 -0,1,515,poor,37,1528,northwest,1476.048 -1,1,411,good,10,1782,west_welmwood,1782 -0,1,246,poor,57,1066,northwest,987.116 -2,1,859,good,0,3418,west_welmwood,3418 -3,2,971,good,10,4705,west_welmwood,4705 -2,1,549,good,4,3158,west_welmwood,3158 -3,2,1091,great,7,5411,east_elmwood,5411 -0,1,191,poor,56,1008,northwest,935.424 -1,1,377,great,8,2262,east_elmwood,2262 -0,1,250,poor,57,1063,northwest,984.338 -3,2,836,good,3,4697,west_welmwood,4697 -2,1,702,great,0,3784,east_elmwood,3784 -1,1,394,great,1,2337,east_elmwood,2337 -2,1,719,good,6,3223,west_welmwood,3223 -2,1,682,great,12,3623,east_elmwood,3623 -1,1,318,poor,42,1367,northwest,1306.852 -0,1,332,good,3,1739,west_welmwood,1739 -1,1,498,good,11,1904,west_welmwood,1904 -2,1,672,great,4,3703,east_elmwood,3703 -1,1,575,poor,62,1504,northwest,1377.664 -3,2,1036,good,11,4768,west_welmwood,4768 -2,1,812,good,4,3356,west_welmwood,3356 -1,1,706,poor,40,1770,northwest,1699.2 -2,1,584,good,5,3107,west_welmwood,3107 -1,1,555,poor,59,1419,northwest,1308.318 -1,1,653,good,4,2103,west_welmwood,2103 -1,1,604,good,2,2088,west_welmwood,2088 -2,1,798,poor,26,3098,northwest,3060.824 -3,2,1163,good,2,4995,west_welmwood,4995 -2,1,646,poor,27,2982,northwest,2940.252 -2,1,845,poor,46,3024,northwest,2866.752 -1,1,387,poor,47,1392,northwest,1316.832 -0,1,112,poor,60,948,northwest,872.16 -3,2,1079,poor,46,4445,northwest,4213.86 -3,2,1234,great,14,5394,east_elmwood,5394 -1,1,533,good,3,1992,west_welmwood,1992 -2,1,752,good,10,3204,west_welmwood,3204 -1,1,307,poor,55,1237,northwest,1150.41 -0,1,167,good,1,1517,west_welmwood,1517 -2,1,905,poor,46,3034,northwest,2876.232 -1,1,307,poor,40,1368,northwest,1313.28 -0,1,276,good,10,1586,west_welmwood,1586 -2,1,813,good,1,3364,west_welmwood,3364 -0,1,276,poor,43,1249,northwest,1191.546 -1,1,547,good,12,1953,west_welmwood,1953 -0,1,376,good,12,1695,west_welmwood,1695 -1,1,653,good,14,1999,west_welmwood,1999 -3,2,1017,poor,62,4264,northwest,3905.824 -1,1,582,great,6,2483,east_elmwood,2483 -3,2,1068,good,7,4889,west_welmwood,4889 -0,1,343,great,14,2120,east_elmwood,2120 -2,1,860,good,13,3357,west_welmwood,3357 -2,1,905,great,12,3863,east_elmwood,3863 -2,1,937,poor,18,3336,northwest,3336 -2,1,809,good,7,3295,west_welmwood,3295 -1,1,698,poor,56,1607,northwest,1491.296 -0,1,336,poor,62,1143,northwest,1046.988 -1,1,483,great,8,2417,east_elmwood,2417 -2,1,852,great,2,3975,east_elmwood,3975 -3,2,840,poor,58,4096,northwest,3784.704 -2,1,936,poor,53,3020,northwest,2820.68 -2,1,556,great,12,3535,east_elmwood,3535 -2,1,572,good,14,3047,west_welmwood,3047 -3,2,879,great,7,5149,east_elmwood,5149 -1,1,700,good,11,2079,west_welmwood,2079 -2,1,564,poor,63,2512,northwest,2295.968 -3,2,1148,great,4,5468,east_elmwood,5468 -0,1,542,poor,49,1420,northwest,1337.64 -2,1,668,good,0,3240,west_welmwood,3240 -2,1,813,great,4,3921,east_elmwood,3921 -3,2,1034,good,10,4782,west_welmwood,4782 -1,1,311,great,13,2185,east_elmwood,2185 -3,2,846,poor,62,4055,northwest,3714.38 -3,2,859,poor,16,4525,northwest,4525 -2,1,672,good,7,3244,west_welmwood,3244 -2,1,695,good,6,3190,west_welmwood,3190 -3,2,1025,good,12,4775,west_welmwood,4775 -0,1,519,poor,64,1270,northwest,1158.24 -2,1,779,poor,40,3022,northwest,2901.12 -3,2,1054,great,4,5342,east_elmwood,5342 -0,1,400,poor,29,1555,northwest,1527.01 -2,1,717,poor,28,3041,northwest,2992.344 -3,2,1163,poor,28,4763,northwest,4686.792 -0,1,408,great,14,2128,east_elmwood,2128 -3,2,1190,great,3,5475,east_elmwood,5475 -2,1,615,poor,41,2806,northwest,2688.148 -2,1,660,good,0,3220,west_welmwood,3220 -1,1,667,poor,22,1927,northwest,1919.292 -1,1,693,good,3,2129,west_welmwood,2129 -0,1,458,poor,36,1464,northwest,1417.152 -0,1,276,good,2,1638,west_welmwood,1638 -1,1,702,poor,44,1787,northwest,1701.224 -0,1,206,good,7,1541,west_welmwood,1541 -2,1,762,good,7,3317,west_welmwood,3317 -1,1,369,poor,26,1559,northwest,1540.292 -3,2,1164,good,11,4944,west_welmwood,4944 -2,1,858,good,9,3374,west_welmwood,3374 -1,1,509,good,11,1919,west_welmwood,1919 -0,1,344,good,14,1554,west_welmwood,1554 -2,1,509,great,6,3567,east_elmwood,3567 -2,1,860,poor,28,3223,northwest,3171.432 -2,1,699,good,8,3249,west_welmwood,3249 -0,1,541,good,14,1833,west_welmwood,1833 -2,1,898,good,3,3457,west_welmwood,3457 -0,1,391,good,13,1618,west_welmwood,1618 -0,1,516,great,13,2272,east_elmwood,2272 -2,1,546,poor,57,2606,northwest,2413.156 -3,2,898,good,3,4767,west_welmwood,4767 -2,1,895,good,0,3476,west_welmwood,3476 -2,1,877,poor,38,3084,northwest,2972.976 -1,1,706,good,10,2135,west_welmwood,2135 -0,1,542,poor,35,1597,northwest,1549.09 -3,2,1204,poor,54,4545,northwest,4235.94 -2,1,582,poor,48,2710,northwest,2558.24 -3,2,968,good,14,4679,west_welmwood,4679 -2,1,586,good,9,3106,west_welmwood,3106 -1,1,598,poor,28,1818,northwest,1788.912 -1,1,391,great,8,2328,east_elmwood,2328 -2,1,643,good,8,3195,west_welmwood,3195 -3,2,1036,good,3,4890,west_welmwood,4890 -3,2,867,poor,35,4388,northwest,4256.36 -3,2,1096,good,1,4971,west_welmwood,4971 -3,2,815,poor,21,4469,northwest,4460.062 -0,1,295,good,10,1553,west_welmwood,1553 -2,1,902,good,4,3453,west_welmwood,3453 -2,1,749,good,6,3240,west_welmwood,3240 -3,2,847,poor,38,4307,northwest,4151.948 -3,2,1170,poor,38,4655,northwest,4487.42 -1,1,346,good,12,1716,west_welmwood,1716 -2,1,566,good,6,3093,west_welmwood,3093 -2,1,768,poor,50,2823,northwest,2653.62 -1,1,616,poor,38,1727,northwest,1664.828 -2,1,501,good,4,3044,west_welmwood,3044 -2,1,538,poor,22,2927,northwest,2915.292 -3,2,912,poor,28,4463,northwest,4391.592 -2,1,683,poor,33,2923,northwest,2847.002 -3,2,1205,great,7,5532,east_elmwood,5532 -2,1,837,good,0,3457,west_welmwood,3457 -2,1,702,good,14,3190,west_welmwood,3190 -0,1,136,great,2,2008,east_elmwood,2008 -3,2,1075,poor,21,4690,northwest,4680.62 -1,1,432,good,10,1852,west_welmwood,1852 -3,2,941,good,11,4695,west_welmwood,4695 -0,1,220,great,0,2076,east_elmwood,2076 -1,1,671,poor,45,1699,northwest,1614.05 -2,1,682,good,10,3206,west_welmwood,3206 -1,1,700,good,1,2171,west_welmwood,2171 -1,1,411,great,14,2306,east_elmwood,2306 -2,1,552,poor,43,2729,northwest,2603.466 -0,1,497,poor,57,1371,northwest,1269.546 -3,2,951,good,3,4729,west_welmwood,4729 -0,1,430,great,12,2217,east_elmwood,2217 -3,2,1220,poor,32,4714,northwest,4600.864 -1,1,430,good,0,1928,west_welmwood,1928 -3,2,837,good,6,4594,west_welmwood,4594 -0,1,435,good,11,1681,west_welmwood,1681 -3,2,847,great,11,5051,east_elmwood,5051 -0,1,396,good,12,1708,west_welmwood,1708 -3,2,1119,good,6,4922,west_welmwood,4922 -0,1,386,poor,54,1268,northwest,1181.776 -3,2,1208,good,2,5004,west_welmwood,5004 -0,1,240,good,8,1609,west_welmwood,1609 -1,1,323,good,14,1697,west_welmwood,1697 -2,1,876,great,6,3866,east_elmwood,3866 -2,1,653,great,0,3801,east_elmwood,3801 -2,1,579,good,11,3039,west_welmwood,3039 -1,1,335,poor,24,1635,northwest,1621.92 -3,2,898,poor,62,4129,northwest,3782.164 -1,1,593,good,0,2059,west_welmwood,2059 -1,1,392,great,7,2282,east_elmwood,2282 -1,1,374,good,12,1708,west_welmwood,1708 -1,1,725,great,7,2672,east_elmwood,2672 -3,2,831,poor,27,4424,northwest,4362.064 -2,1,835,good,1,3378,west_welmwood,3378 -0,1,233,good,11,1506,west_welmwood,1506 -2,1,830,great,7,3879,east_elmwood,3879 -1,1,645,good,13,1976,west_welmwood,1976 -3,2,831,poor,36,4354,northwest,4214.672 -1,1,478,good,1,2009,west_welmwood,2009 -2,1,790,good,11,3300,west_welmwood,3300 -0,1,472,poor,27,1650,northwest,1626.9 -3,2,1200,good,11,4941,west_welmwood,4941 -2,1,805,poor,48,2897,northwest,2734.768 -1,1,561,good,6,1990,west_welmwood,1990 -2,1,857,good,8,3357,west_welmwood,3357 -0,1,126,poor,26,1275,northwest,1259.7 -0,1,412,great,9,2197,east_elmwood,2197 -2,1,911,poor,43,3085,northwest,2943.09 -0,1,398,good,10,1740,west_welmwood,1740 -0,1,459,poor,18,1671,northwest,1671 -1,1,407,good,1,1858,west_welmwood,1858 -3,2,928,good,6,4700,west_welmwood,4700 -0,1,232,good,3,1585,west_welmwood,1585 -2,1,703,good,0,3336,west_welmwood,3336 -1,1,628,great,11,2469,east_elmwood,2469 -1,1,643,poor,59,1602,northwest,1477.044 -1,1,359,poor,48,1380,northwest,1302.72 -2,1,755,good,8,3278,west_welmwood,3278 -2,1,613,good,5,3121,west_welmwood,3121 -3,2,1009,good,1,4829,west_welmwood,4829 -0,1,306,great,13,2099,east_elmwood,2099 -3,2,828,poor,57,4157,northwest,3849.382 -3,2,1245,poor,26,4824,northwest,4766.112 -3,2,859,good,9,4654,west_welmwood,4654 -3,2,1196,great,5,5537,east_elmwood,5537 -2,1,698,good,3,3229,west_welmwood,3229 -0,1,162,good,1,1533,west_welmwood,1533 -0,1,417,great,8,2254,east_elmwood,2254 -0,1,247,good,10,1578,west_welmwood,1578 -3,2,959,good,0,4847,west_welmwood,4847 -1,1,740,good,9,2101,west_welmwood,2101 -0,1,493,good,5,1802,west_welmwood,1802 -3,2,878,good,11,4572,west_welmwood,4572 -1,1,612,good,11,2031,west_welmwood,2031 -2,1,805,poor,40,2978,northwest,2858.88 -0,1,139,good,11,1445,west_welmwood,1445 -2,1,875,poor,26,3194,northwest,3155.672 -2,1,658,good,11,3156,west_welmwood,3156 -1,1,603,good,12,1962,west_welmwood,1962 -0,1,394,poor,61,1148,northwest,1053.864 -2,1,648,great,8,3707,east_elmwood,3707 -3,2,1038,great,11,5299,east_elmwood,5299 -0,1,249,good,8,1528,west_welmwood,1528 -1,1,362,poor,25,1654,northwest,1637.46 -2,1,622,good,0,3257,west_welmwood,3257 -2,1,724,good,13,3208,west_welmwood,3208 -0,1,186,poor,57,1011,northwest,936.186 -2,1,599,poor,22,2933,northwest,2921.268 -3,2,1248,good,10,4948,west_welmwood,4948 -0,1,272,good,9,1631,west_welmwood,1631 -2,1,772,great,9,3796,east_elmwood,3796 -1,1,427,good,0,1917,west_welmwood,1917 -3,2,1189,poor,49,4509,northwest,4247.478 -2,1,590,poor,59,2598,northwest,2395.356 -3,2,925,good,2,4795,west_welmwood,4795 -3,2,1074,poor,43,4525,northwest,4316.85 -2,1,559,good,1,3103,west_welmwood,3103 -3,2,931,poor,34,4416,northwest,4292.352 -1,1,584,poor,51,1585,northwest,1486.73 -3,2,879,good,3,4719,west_welmwood,4719 -1,1,417,poor,58,1377,northwest,1272.348 -2,1,800,poor,18,3177,northwest,3177 -2,1,507,poor,25,2849,northwest,2820.51 -1,1,351,good,14,1729,west_welmwood,1729 -3,2,1062,poor,28,4679,northwest,4604.136 -3,2,937,good,14,4598,west_welmwood,4598 -2,1,921,poor,23,3296,northwest,3276.224 -2,1,942,good,7,3441,west_welmwood,3441 -1,1,701,great,2,2699,east_elmwood,2699 -2,1,922,good,1,3538,west_welmwood,3538 -3,2,826,good,3,4688,west_welmwood,4688 -3,2,1147,good,0,5021,west_welmwood,5021 -1,1,602,poor,63,1431,northwest,1307.934 -2,1,814,poor,32,3072,northwest,2998.272 -2,1,875,good,12,3316,west_welmwood,3316 -1,1,365,poor,47,1373,northwest,1298.858 -0,1,263,good,9,1580,west_welmwood,1580 -0,1,371,poor,40,1354,northwest,1299.84 -2,1,877,poor,27,3169,northwest,3124.634 -2,1,606,good,12,3071,west_welmwood,3071 -0,1,316,great,12,2103,east_elmwood,2103 -0,1,487,good,10,1813,west_welmwood,1813 -3,2,1169,poor,26,4764,northwest,4706.832 -3,2,1233,good,6,5054,west_welmwood,5054 -3,2,1138,great,3,5414,east_elmwood,5414 -3,2,976,great,11,5255,east_elmwood,5255 -0,1,364,good,10,1615,west_welmwood,1615 -2,1,837,poor,30,3098,northwest,3036.04 -3,2,992,poor,39,4469,northwest,4299.178 -3,2,1151,great,13,5398,east_elmwood,5398 -0,1,528,good,12,1764,west_welmwood,1764 -1,1,453,good,12,1853,west_welmwood,1853 -2,1,578,good,14,3081,west_welmwood,3081 -2,1,910,great,11,3917,east_elmwood,3917 -3,2,1073,good,8,4819,west_welmwood,4819 -1,1,736,great,10,2649,east_elmwood,2649 -2,1,780,good,3,3329,west_welmwood,3329 -0,1,360,great,3,2266,east_elmwood,2266 -0,1,480,poor,21,1694,northwest,1690.612 -2,1,878,poor,59,2889,northwest,2663.658 -0,1,113,great,10,1927,east_elmwood,1927 -3,2,1161,great,3,5449,east_elmwood,5449 -3,2,937,poor,25,4565,northwest,4519.35 -1,1,581,good,10,1986,west_welmwood,1986 -3,2,1204,good,2,5041,west_welmwood,5041 -3,2,947,poor,35,4427,northwest,4294.19 -2,1,536,good,0,3093,west_welmwood,3093 -0,1,385,good,2,1778,west_welmwood,1778 -1,1,533,poor,36,1672,northwest,1618.496 -0,1,115,good,10,1402,west_welmwood,1402 -1,1,416,great,0,2413,east_elmwood,2413 -2,1,566,good,14,3041,west_welmwood,3041 -3,2,979,great,6,5257,east_elmwood,5257 -3,2,1152,good,4,4987,west_welmwood,4987 -1,1,384,good,11,1760,west_welmwood,1760 -2,1,541,good,11,3063,west_welmwood,3063 -2,1,799,poor,18,3174,northwest,3174 -0,1,535,poor,22,1701,northwest,1694.196 -0,1,210,good,3,1602,west_welmwood,1602 -3,2,912,poor,51,4207,northwest,3946.166 -3,2,1221,poor,40,4713,northwest,4524.48 -3,2,1091,good,1,4978,west_welmwood,4978 -3,2,1207,good,5,5055,west_welmwood,5055 -0,1,372,poor,31,1499,northwest,1466.022 -1,1,537,poor,33,1754,northwest,1708.396 -3,2,806,great,9,5111,east_elmwood,5111 -3,2,1062,great,12,5306,east_elmwood,5306 -1,1,695,good,8,2111,west_welmwood,2111 -0,1,531,great,4,2403,east_elmwood,2403 -1,1,693,poor,31,1909,northwest,1867.002 -1,1,732,good,3,2206,west_welmwood,2206 -1,1,664,poor,43,1744,northwest,1663.776 -3,2,998,great,5,5277,east_elmwood,5277 -2,1,657,poor,62,2598,northwest,2379.768 -3,2,1248,poor,40,4677,northwest,4489.92 -3,2,1112,great,13,5334,east_elmwood,5334 -1,1,451,good,8,1899,west_welmwood,1899 -2,1,562,good,2,3189,west_welmwood,3189 -1,1,671,great,5,2596,east_elmwood,2596 -3,2,1103,good,10,4852,west_welmwood,4852 -0,1,398,poor,62,1182,northwest,1082.712 -1,1,474,poor,16,1794,northwest,1794 -1,1,622,great,5,2566,east_elmwood,2566 -2,1,543,good,13,2991,west_welmwood,2991 -0,1,540,poor,15,1812,northwest,1812 -2,1,671,poor,61,2631,northwest,2415.258 -3,2,1171,poor,57,4437,northwest,4108.662 -0,1,316,great,4,2156,east_elmwood,2156 -3,2,1026,poor,48,4374,northwest,4129.056 -1,1,407,good,3,1898,west_welmwood,1898 -0,1,227,good,14,1462,west_welmwood,1462 -1,1,429,good,8,1866,west_welmwood,1866 -1,1,707,great,0,2752,east_elmwood,2752 -0,1,533,great,10,2371,east_elmwood,2371 -3,2,1106,poor,46,4483,northwest,4249.884 -0,1,414,great,5,2298,east_elmwood,2298 -2,1,745,poor,55,2828,northwest,2630.04 -0,1,334,good,5,1706,west_welmwood,1706 -3,2,1102,poor,32,4675,northwest,4562.8 -0,1,309,poor,43,1241,northwest,1183.914 -0,1,428,good,0,1851,west_welmwood,1851 -2,1,655,great,7,3697,east_elmwood,3697 -2,1,826,poor,17,3240,northwest,3240 -3,2,832,poor,63,4013,northwest,3667.882 -0,1,227,great,12,1977,east_elmwood,1977 -0,1,351,great,4,2251,east_elmwood,2251 -3,2,844,poor,57,4131,northwest,3825.306 -2,1,688,poor,41,2883,northwest,2761.914 -2,1,523,good,14,2998,west_welmwood,2998 -0,1,546,poor,30,1620,northwest,1587.6 -0,1,428,good,1,1865,west_welmwood,1865 -1,1,642,poor,49,1622,northwest,1527.924 -0,1,156,good,4,1474,west_welmwood,1474 -2,1,578,poor,30,2845,northwest,2788.1 -3,2,940,great,12,5177,east_elmwood,5177 -2,1,748,great,3,3813,east_elmwood,3813 -2,1,699,poor,32,2936,northwest,2865.536 -0,1,349,poor,50,1230,northwest,1156.2 -2,1,849,good,5,3390,west_welmwood,3390 -1,1,374,good,5,1850,west_welmwood,1850 -2,1,611,good,14,3061,west_welmwood,3061 -1,1,497,poor,52,1511,northwest,1414.296 -0,1,356,good,3,1703,west_welmwood,1703 -0,1,308,great,11,2137,east_elmwood,2137 -3,2,983,good,3,4837,west_welmwood,4837 -2,1,549,poor,51,2612,northwest,2450.056 -3,2,871,good,5,4711,west_welmwood,4711 -2,1,788,good,10,3313,west_welmwood,3313 -1,1,465,good,6,1881,west_welmwood,1881 -0,1,259,great,0,2137,east_elmwood,2137 -0,1,312,good,7,1612,west_welmwood,1612 -2,1,691,great,5,3696,east_elmwood,3696 -0,1,471,great,5,2353,east_elmwood,2353 -2,1,869,great,8,3876,east_elmwood,3876 -0,1,387,poor,56,1247,northwest,1157.216 -2,1,683,poor,38,2930,northwest,2824.52 -0,1,440,poor,29,1533,northwest,1505.406 -3,2,827,poor,48,4191,northwest,3956.304 -0,1,157,good,3,1487,west_welmwood,1487 -3,2,937,poor,64,4110,northwest,3748.32 -3,2,1037,poor,16,4721,northwest,4721 -2,1,918,poor,36,3175,northwest,3073.4 -2,1,543,good,0,3101,west_welmwood,3101 -1,1,663,good,1,2133,west_welmwood,2133 -1,1,529,good,0,2056,west_welmwood,2056 -0,1,446,great,5,2250,east_elmwood,2250 -3,2,972,good,12,4670,west_welmwood,4670 -2,1,509,great,1,3624,east_elmwood,3624 -0,1,382,poor,22,1597,northwest,1590.612 -3,2,1114,good,6,4892,west_welmwood,4892 -1,1,444,good,1,1948,west_welmwood,1948 -3,2,1019,great,12,5206,east_elmwood,5206 -2,1,936,great,12,3913,east_elmwood,3913 -0,1,319,poor,42,1261,northwest,1205.516 -0,1,352,good,9,1619,west_welmwood,1619 -3,2,1042,good,0,4915,west_welmwood,4915 -2,1,758,good,6,3271,west_welmwood,3271 -3,2,1156,good,4,5003,west_welmwood,5003 -3,2,849,good,1,4734,west_welmwood,4734 -1,1,350,great,10,2257,east_elmwood,2257 -2,1,793,good,6,3298,west_welmwood,3298 -1,1,650,good,14,2034,west_welmwood,2034 -1,1,526,great,8,2423,east_elmwood,2423 -3,2,857,good,2,4709,west_welmwood,4709 -1,1,520,good,5,1945,west_welmwood,1945 -1,1,508,great,6,2417,east_elmwood,2417 -3,2,1158,poor,53,4455,northwest,4160.97 -2,1,835,good,5,3383,west_welmwood,3383 -2,1,851,good,10,3310,west_welmwood,3310 -2,1,784,good,5,3287,west_welmwood,3287 -3,2,852,good,13,4550,west_welmwood,4550 -1,1,508,great,5,2484,east_elmwood,2484 -2,1,527,good,11,2990,west_welmwood,2990 -2,1,715,poor,64,2685,northwest,2448.72 -0,1,134,good,7,1513,west_welmwood,1513 -0,1,541,poor,22,1764,northwest,1756.944 -0,1,501,poor,41,1444,northwest,1383.352 -1,1,622,poor,56,1538,northwest,1427.264 -0,1,401,poor,40,1373,northwest,1318.08 -3,2,1167,poor,50,4535,northwest,4262.9 -0,1,196,poor,45,1133,northwest,1076.35 -3,2,916,good,7,4706,west_welmwood,4706 -1,1,536,good,2,1978,west_welmwood,1978 -0,1,174,good,12,1424,west_welmwood,1424 -3,2,1113,good,7,4869,west_welmwood,4869 -1,1,377,poor,62,1266,northwest,1159.656 -0,1,270,great,2,2134,east_elmwood,2134 -3,2,879,good,5,4670,west_welmwood,4670 -3,2,875,poor,22,4490,northwest,4472.04 -1,1,675,good,2,2189,west_welmwood,2189 -3,2,907,poor,25,4498,northwest,4453.02 -0,1,300,poor,35,1392,northwest,1350.24 -2,1,939,great,11,3895,east_elmwood,3895 -1,1,521,good,1,2004,west_welmwood,2004 -1,1,359,poor,45,1430,northwest,1358.5 -3,2,1246,good,7,4980,west_welmwood,4980 -2,1,530,good,1,3138,west_welmwood,3138 -2,1,501,good,11,2952,west_welmwood,2952 -0,1,307,poor,51,1243,northwest,1165.934 -3,2,1026,good,3,4834,west_welmwood,4834 -2,1,869,good,10,3406,west_welmwood,3406 -0,1,108,poor,45,1025,northwest,973.75 -3,2,1010,good,9,4813,west_welmwood,4813 -0,1,197,good,2,1622,west_welmwood,1622 -0,1,282,poor,38,1311,northwest,1263.804 -3,2,1096,poor,26,4685,northwest,4628.78 -0,1,537,good,12,1817,west_welmwood,1817 -2,1,939,poor,39,3118,northwest,2999.516 -0,1,503,great,7,2313,east_elmwood,2313 -2,1,914,good,12,3432,west_welmwood,3432 -2,1,799,poor,49,2874,northwest,2707.308 -1,1,594,poor,16,1886,northwest,1886 -2,1,911,good,5,3478,west_welmwood,3478 -0,1,286,good,10,1543,west_welmwood,1543 -0,1,433,good,14,1685,west_welmwood,1685 -1,1,392,good,4,1842,west_welmwood,1842 -0,1,222,good,4,1575,west_welmwood,1575 -2,1,939,poor,36,3162,northwest,3060.816 -2,1,507,good,2,3070,west_welmwood,3070 -1,1,410,poor,18,1736,northwest,1736 -0,1,434,poor,21,1636,northwest,1632.728 -1,1,385,good,5,1883,west_welmwood,1883 -2,1,821,poor,46,2942,northwest,2789.016 -1,1,659,good,5,2136,west_welmwood,2136 -3,2,1167,good,7,4938,west_welmwood,4938 -2,1,836,good,9,3335,west_welmwood,3335 -3,2,1236,poor,57,4563,northwest,4225.338 -0,1,456,poor,33,1564,northwest,1523.336 -0,1,421,poor,44,1365,northwest,1299.48 -0,1,461,poor,26,1562,northwest,1543.256 -3,2,1060,good,0,4875,west_welmwood,4875 -0,1,138,good,2,1491,west_welmwood,1491 -1,1,480,good,1,1924,west_welmwood,1924 -0,1,341,poor,17,1559,northwest,1559 -2,1,570,great,1,3670,east_elmwood,3670 -2,1,580,poor,53,2629,northwest,2455.486 -1,1,520,poor,29,1764,northwest,1732.248 -2,1,686,poor,20,3118,northwest,3118 -3,2,1183,great,3,5521,east_elmwood,5521 -0,1,380,good,2,1800,west_welmwood,1800 -2,1,727,poor,23,3098,northwest,3079.412 -3,2,1097,good,0,4910,west_welmwood,4910 -2,1,703,good,5,3255,west_welmwood,3255 -3,2,1046,great,14,5214,east_elmwood,5214 -0,1,336,great,8,2195,east_elmwood,2195 -2,1,500,poor,33,2778,northwest,2705.772 -1,1,578,poor,16,1906,northwest,1906 -1,1,571,great,3,2562,east_elmwood,2562 -2,1,735,good,11,3213,west_welmwood,3213 -0,1,384,poor,35,1394,northwest,1352.18 -0,1,200,great,6,1996,east_elmwood,1996 -2,1,612,good,9,3169,west_welmwood,3169 -1,1,619,good,2,2126,west_welmwood,2126 -2,1,905,poor,24,3282,northwest,3255.744 -0,1,307,good,3,1679,west_welmwood,1679 -3,2,962,good,1,4824,west_welmwood,4824 -1,1,653,good,4,2089,west_welmwood,2089 -2,1,638,good,3,3219,west_welmwood,3219 -2,1,666,good,7,3237,west_welmwood,3237 -0,1,235,poor,40,1186,northwest,1138.56 -0,1,487,poor,39,1483,northwest,1426.646 -3,2,919,great,1,5254,east_elmwood,5254 -0,1,504,great,3,2369,east_elmwood,2369 -0,1,532,great,2,2409,east_elmwood,2409 -0,1,216,good,6,1547,west_welmwood,1547 -2,1,848,poor,44,3015,northwest,2870.28 -3,2,1222,poor,15,4924,northwest,4924 -3,2,1159,poor,56,4404,northwest,4086.912 -2,1,712,great,14,3632,east_elmwood,3632 -0,1,411,poor,44,1322,northwest,1258.544 -3,2,1147,poor,19,4809,northwest,4809 -2,1,652,poor,57,2728,northwest,2526.128 -1,1,504,great,13,2416,east_elmwood,2416 -1,1,619,poor,20,1872,northwest,1872 -1,1,728,good,1,2254,west_welmwood,2254 -2,1,878,poor,47,3018,northwest,2855.028 -1,1,680,good,5,2133,west_welmwood,2133 -1,1,719,poor,41,1822,northwest,1745.476 -2,1,923,good,2,3458,west_welmwood,3458 -1,1,424,poor,41,1513,northwest,1449.454 -1,1,397,poor,26,1640,northwest,1620.32 -0,1,131,poor,37,1111,northwest,1073.226 -1,1,461,good,6,1863,west_welmwood,1863 -2,1,835,poor,38,3083,northwest,2972.012 -0,1,320,poor,36,1311,northwest,1269.048 -0,1,415,good,4,1814,west_welmwood,1814 -3,2,1034,good,13,4729,west_welmwood,4729 -2,1,786,poor,28,3144,northwest,3093.696 -3,2,807,poor,59,4091,northwest,3771.902 -2,1,724,good,14,3149,west_welmwood,3149 -2,1,512,good,6,3098,west_welmwood,3098 -2,1,753,great,12,3737,east_elmwood,3737 -1,1,521,good,5,1944,west_welmwood,1944 -0,1,147,good,13,1369,west_welmwood,1369 -1,1,627,poor,55,1610,northwest,1497.3 -0,1,174,good,8,1495,west_welmwood,1495 -1,1,590,good,9,1972,west_welmwood,1972 -1,1,655,good,1,2138,west_welmwood,2138 -0,1,506,poor,26,1601,northwest,1581.788 -3,2,1084,poor,16,4724,northwest,4724 diff --git a/docker/db_images/mysql/sql-scripts/prepare.sql b/docker/db_images/mysql/sql-scripts/prepare.sql deleted file mode 100644 index 6991251982a..00000000000 --- a/docker/db_images/mysql/sql-scripts/prepare.sql +++ /dev/null @@ -1,17 +0,0 @@ -CREATE TABLE rentals ( -number_of_rooms INT, -number_of_bathrooms INT, -sqft varchar(25), -location varchar(25), -days_on_market INT, -initial_price FLOAT, -neighborhood varchar(25), -rental_price FLOAT -); - -CREATE USER 'ssl_user'@'172.17.0.1' IDENTIFIED BY 'ssl' REQUIRE SSL; -GRANT ALL ON *.* TO 'ssl_user'@'172.17.0.1'; -FLUSH PRIVILEGES; -ALTER USER 'ssl_user'@'172.17.0.1' REQUIRE X509; - -LOAD DATA INFILE '/home_rentals.csv' INTO TABLE rentals COLUMNS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '"' LINES TERMINATED BY '\n' IGNORE 1 LINES; diff --git a/docker/db_images/postgres/Dockerfile b/docker/db_images/postgres/Dockerfile deleted file mode 100644 index e59714e2cee..00000000000 --- a/docker/db_images/postgres/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM postgres - -ENV POSTGRES_DB test - -COPY ./sql-scripts/ /docker-entrypoint-initdb.d/ -COPY ./certs/* /var/lib/.postgresql/ -# COPY ./postgresql.conf /var/lib/postgresql/data/ -RUN chown postgres:postgres /var/lib/.postgresql/server.* -COPY home_rentals.csv / diff --git a/docker/db_images/postgres/certs/root.crt b/docker/db_images/postgres/certs/root.crt deleted file mode 100644 index af4b26d70a8..00000000000 --- a/docker/db_images/postgres/certs/root.crt +++ /dev/null @@ -1,77 +0,0 @@ -Certificate: - Data: - Version: 3 (0x2) - Serial Number: - 2d:70:fc:05:14:70:04:d4:bf:22:ec:9d:80:50:fe:2a:91:ce:ca:10 - Signature Algorithm: sha256WithRSAEncryption - Issuer: CN = 127.0.0.1 - Validity - Not Before: Oct 12 17:09:09 2022 GMT - Not After : Oct 9 17:09:09 2032 GMT - Subject: CN = 127.0.0.1 - Subject Public Key Info: - Public Key Algorithm: rsaEncryption - RSA Public-Key: (2048 bit) - Modulus: - 00:ca:38:cf:e9:f5:69:15:40:ec:ad:63:46:c2:fb: - 07:b2:a2:58:3d:ef:ba:15:8c:8d:e2:00:e0:7e:6f: - 8f:30:03:8c:c7:bd:25:59:43:52:a8:2e:b4:9a:1a: - 6f:df:e8:85:a1:77:5e:c9:2b:92:2a:f8:6d:27:37: - 5a:91:bd:4c:23:71:dd:26:a9:89:75:ee:a3:f0:e4: - c7:05:19:d0:73:af:4d:08:dc:f4:39:27:07:61:9a: - d9:86:9e:31:6b:a2:f6:79:90:63:d5:d8:33:ff:2a: - da:52:81:33:f4:2f:17:47:21:4f:86:f8:36:48:70: - ba:6a:5e:26:c0:c7:cf:63:0d:37:ea:63:f3:06:f8: - 38:bc:d1:d7:88:0f:dc:78:e7:49:ee:e9:6b:04:a9: - 6a:2d:21:f4:c0:ac:a6:4d:42:9f:2f:10:ca:cb:fe: - 3f:98:13:73:29:90:f5:45:a6:37:7c:6d:ae:64:e2: - 4e:0c:6e:66:f3:38:14:34:88:19:c3:f7:7c:27:d7: - 84:84:01:0b:d9:6e:cf:a4:5d:fd:62:91:fc:9c:eb: - b1:89:8a:cf:a7:ac:7a:16:f0:4a:7a:aa:5f:0d:ec: - 07:70:dd:8d:3a:c5:92:3a:7e:37:77:4f:d5:b4:d1: - 11:86:b6:eb:bd:f4:1f:b6:49:f4:fd:dd:00:0d:66: - 56:b7 - Exponent: 65537 (0x10001) - X509v3 extensions: - X509v3 Subject Key Identifier: - E8:3B:85:AF:0C:42:7B:1C:48:C0:0F:CA:D1:61:52:5B:AC:57:6D:3C - X509v3 Authority Key Identifier: - keyid:E8:3B:85:AF:0C:42:7B:1C:48:C0:0F:CA:D1:61:52:5B:AC:57:6D:3C - - X509v3 Basic Constraints: critical - CA:TRUE - Signature Algorithm: sha256WithRSAEncryption - 5d:da:14:0b:70:07:87:6e:71:5f:49:a3:fd:55:37:ae:d0:c3: - 2e:0e:fc:f4:75:f8:36:ff:72:18:1a:cf:2f:82:30:df:80:5d: - b9:c8:bf:d4:3b:8d:41:75:0b:8e:08:9a:76:fc:91:dd:5c:55: - 1d:18:43:aa:50:9a:c2:44:a3:7d:1c:26:f4:5a:38:f1:24:82: - 59:ab:d2:4a:7b:f2:d3:83:ff:79:98:5b:7d:e2:26:7c:9e:ca: - 7a:b6:a2:3a:58:00:dd:40:0c:df:f1:43:c5:fe:71:e5:60:84: - 97:c6:fa:7a:36:00:5d:8d:09:68:0f:b1:73:0b:3c:78:3a:ee: - b4:52:da:8e:c4:89:02:32:31:56:50:0c:a1:1c:bc:97:2d:67: - ce:6a:8a:13:22:b7:7f:09:59:a3:1a:52:88:5f:0f:34:2e:dd: - 20:cd:96:74:8f:6e:b1:f7:84:5b:a6:52:1c:72:be:14:1b:4e: - f8:15:6c:48:d1:8d:ff:95:dd:82:bf:4e:b2:c9:79:ec:08:94: - 8c:07:19:73:32:9a:d7:45:4b:18:81:17:6e:c4:78:b6:ea:cb: - 26:f6:17:b9:1e:9d:a5:d7:fa:94:8d:71:6c:a3:6f:25:7f:c2: - 2e:3a:3d:b9:93:60:97:d5:59:0c:38:ca:1d:61:3d:95:31:0e: - 12:d3:73:8a ------BEGIN CERTIFICATE----- -MIIDCTCCAfGgAwIBAgIULXD8BRRwBNS/IuydgFD+KpHOyhAwDQYJKoZIhvcNAQEL -BQAwFDESMBAGA1UEAwwJMTI3LjAuMC4xMB4XDTIyMTAxMjE3MDkwOVoXDTMyMTAw -OTE3MDkwOVowFDESMBAGA1UEAwwJMTI3LjAuMC4xMIIBIjANBgkqhkiG9w0BAQEF -AAOCAQ8AMIIBCgKCAQEAyjjP6fVpFUDsrWNGwvsHsqJYPe+6FYyN4gDgfm+PMAOM -x70lWUNSqC60mhpv3+iFoXdeySuSKvhtJzdakb1MI3HdJqmJde6j8OTHBRnQc69N -CNz0OScHYZrZhp4xa6L2eZBj1dgz/yraUoEz9C8XRyFPhvg2SHC6al4mwMfPYw03 -6mPzBvg4vNHXiA/ceOdJ7ulrBKlqLSH0wKymTUKfLxDKy/4/mBNzKZD1RaY3fG2u -ZOJODG5m8zgUNIgZw/d8J9eEhAEL2W7PpF39YpH8nOuxiYrPp6x6FvBKeqpfDewH -cN2NOsWSOn43d0/VtNERhrbrvfQftkn0/d0ADWZWtwIDAQABo1MwUTAdBgNVHQ4E -FgQU6DuFrwxCexxIwA/K0WFSW6xXbTwwHwYDVR0jBBgwFoAU6DuFrwxCexxIwA/K -0WFSW6xXbTwwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAXdoU -C3AHh25xX0mj/VU3rtDDLg789HX4Nv9yGBrPL4Iw34Bduci/1DuNQXULjgiadvyR -3VxVHRhDqlCawkSjfRwm9Fo48SSCWavSSnvy04P/eZhbfeImfJ7KeraiOlgA3UAM -3/FDxf5x5WCEl8b6ejYAXY0JaA+xcws8eDrutFLajsSJAjIxVlAMoRy8ly1nzmqK -EyK3fwlZoxpSiF8PNC7dIM2WdI9usfeEW6ZSHHK+FBtO+BVsSNGN/5Xdgr9Ossl5 -7AiUjAcZczKa10VLGIEXbsR4turLJvYXuR6dpdf6lI1xbKNvJX/CLjo9uZNgl9VZ -DDjKHWE9lTEOEtNzig== ------END CERTIFICATE----- diff --git a/docker/db_images/postgres/certs/server.crt b/docker/db_images/postgres/certs/server.crt deleted file mode 100644 index af4b26d70a8..00000000000 --- a/docker/db_images/postgres/certs/server.crt +++ /dev/null @@ -1,77 +0,0 @@ -Certificate: - Data: - Version: 3 (0x2) - Serial Number: - 2d:70:fc:05:14:70:04:d4:bf:22:ec:9d:80:50:fe:2a:91:ce:ca:10 - Signature Algorithm: sha256WithRSAEncryption - Issuer: CN = 127.0.0.1 - Validity - Not Before: Oct 12 17:09:09 2022 GMT - Not After : Oct 9 17:09:09 2032 GMT - Subject: CN = 127.0.0.1 - Subject Public Key Info: - Public Key Algorithm: rsaEncryption - RSA Public-Key: (2048 bit) - Modulus: - 00:ca:38:cf:e9:f5:69:15:40:ec:ad:63:46:c2:fb: - 07:b2:a2:58:3d:ef:ba:15:8c:8d:e2:00:e0:7e:6f: - 8f:30:03:8c:c7:bd:25:59:43:52:a8:2e:b4:9a:1a: - 6f:df:e8:85:a1:77:5e:c9:2b:92:2a:f8:6d:27:37: - 5a:91:bd:4c:23:71:dd:26:a9:89:75:ee:a3:f0:e4: - c7:05:19:d0:73:af:4d:08:dc:f4:39:27:07:61:9a: - d9:86:9e:31:6b:a2:f6:79:90:63:d5:d8:33:ff:2a: - da:52:81:33:f4:2f:17:47:21:4f:86:f8:36:48:70: - ba:6a:5e:26:c0:c7:cf:63:0d:37:ea:63:f3:06:f8: - 38:bc:d1:d7:88:0f:dc:78:e7:49:ee:e9:6b:04:a9: - 6a:2d:21:f4:c0:ac:a6:4d:42:9f:2f:10:ca:cb:fe: - 3f:98:13:73:29:90:f5:45:a6:37:7c:6d:ae:64:e2: - 4e:0c:6e:66:f3:38:14:34:88:19:c3:f7:7c:27:d7: - 84:84:01:0b:d9:6e:cf:a4:5d:fd:62:91:fc:9c:eb: - b1:89:8a:cf:a7:ac:7a:16:f0:4a:7a:aa:5f:0d:ec: - 07:70:dd:8d:3a:c5:92:3a:7e:37:77:4f:d5:b4:d1: - 11:86:b6:eb:bd:f4:1f:b6:49:f4:fd:dd:00:0d:66: - 56:b7 - Exponent: 65537 (0x10001) - X509v3 extensions: - X509v3 Subject Key Identifier: - E8:3B:85:AF:0C:42:7B:1C:48:C0:0F:CA:D1:61:52:5B:AC:57:6D:3C - X509v3 Authority Key Identifier: - keyid:E8:3B:85:AF:0C:42:7B:1C:48:C0:0F:CA:D1:61:52:5B:AC:57:6D:3C - - X509v3 Basic Constraints: critical - CA:TRUE - Signature Algorithm: sha256WithRSAEncryption - 5d:da:14:0b:70:07:87:6e:71:5f:49:a3:fd:55:37:ae:d0:c3: - 2e:0e:fc:f4:75:f8:36:ff:72:18:1a:cf:2f:82:30:df:80:5d: - b9:c8:bf:d4:3b:8d:41:75:0b:8e:08:9a:76:fc:91:dd:5c:55: - 1d:18:43:aa:50:9a:c2:44:a3:7d:1c:26:f4:5a:38:f1:24:82: - 59:ab:d2:4a:7b:f2:d3:83:ff:79:98:5b:7d:e2:26:7c:9e:ca: - 7a:b6:a2:3a:58:00:dd:40:0c:df:f1:43:c5:fe:71:e5:60:84: - 97:c6:fa:7a:36:00:5d:8d:09:68:0f:b1:73:0b:3c:78:3a:ee: - b4:52:da:8e:c4:89:02:32:31:56:50:0c:a1:1c:bc:97:2d:67: - ce:6a:8a:13:22:b7:7f:09:59:a3:1a:52:88:5f:0f:34:2e:dd: - 20:cd:96:74:8f:6e:b1:f7:84:5b:a6:52:1c:72:be:14:1b:4e: - f8:15:6c:48:d1:8d:ff:95:dd:82:bf:4e:b2:c9:79:ec:08:94: - 8c:07:19:73:32:9a:d7:45:4b:18:81:17:6e:c4:78:b6:ea:cb: - 26:f6:17:b9:1e:9d:a5:d7:fa:94:8d:71:6c:a3:6f:25:7f:c2: - 2e:3a:3d:b9:93:60:97:d5:59:0c:38:ca:1d:61:3d:95:31:0e: - 12:d3:73:8a ------BEGIN CERTIFICATE----- -MIIDCTCCAfGgAwIBAgIULXD8BRRwBNS/IuydgFD+KpHOyhAwDQYJKoZIhvcNAQEL -BQAwFDESMBAGA1UEAwwJMTI3LjAuMC4xMB4XDTIyMTAxMjE3MDkwOVoXDTMyMTAw -OTE3MDkwOVowFDESMBAGA1UEAwwJMTI3LjAuMC4xMIIBIjANBgkqhkiG9w0BAQEF -AAOCAQ8AMIIBCgKCAQEAyjjP6fVpFUDsrWNGwvsHsqJYPe+6FYyN4gDgfm+PMAOM -x70lWUNSqC60mhpv3+iFoXdeySuSKvhtJzdakb1MI3HdJqmJde6j8OTHBRnQc69N -CNz0OScHYZrZhp4xa6L2eZBj1dgz/yraUoEz9C8XRyFPhvg2SHC6al4mwMfPYw03 -6mPzBvg4vNHXiA/ceOdJ7ulrBKlqLSH0wKymTUKfLxDKy/4/mBNzKZD1RaY3fG2u -ZOJODG5m8zgUNIgZw/d8J9eEhAEL2W7PpF39YpH8nOuxiYrPp6x6FvBKeqpfDewH -cN2NOsWSOn43d0/VtNERhrbrvfQftkn0/d0ADWZWtwIDAQABo1MwUTAdBgNVHQ4E -FgQU6DuFrwxCexxIwA/K0WFSW6xXbTwwHwYDVR0jBBgwFoAU6DuFrwxCexxIwA/K -0WFSW6xXbTwwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAXdoU -C3AHh25xX0mj/VU3rtDDLg789HX4Nv9yGBrPL4Iw34Bduci/1DuNQXULjgiadvyR -3VxVHRhDqlCawkSjfRwm9Fo48SSCWavSSnvy04P/eZhbfeImfJ7KeraiOlgA3UAM -3/FDxf5x5WCEl8b6ejYAXY0JaA+xcws8eDrutFLajsSJAjIxVlAMoRy8ly1nzmqK -EyK3fwlZoxpSiF8PNC7dIM2WdI9usfeEW6ZSHHK+FBtO+BVsSNGN/5Xdgr9Ossl5 -7AiUjAcZczKa10VLGIEXbsR4turLJvYXuR6dpdf6lI1xbKNvJX/CLjo9uZNgl9VZ -DDjKHWE9lTEOEtNzig== ------END CERTIFICATE----- diff --git a/docker/db_images/postgres/certs/server.key b/docker/db_images/postgres/certs/server.key deleted file mode 100644 index f099eab07fc..00000000000 --- a/docker/db_images/postgres/certs/server.key +++ /dev/null @@ -1,28 +0,0 @@ ------BEGIN PRIVATE KEY----- -MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDKOM/p9WkVQOyt -Y0bC+weyolg977oVjI3iAOB+b48wA4zHvSVZQ1KoLrSaGm/f6IWhd17JK5Iq+G0n -N1qRvUwjcd0mqYl17qPw5McFGdBzr00I3PQ5JwdhmtmGnjFrovZ5kGPV2DP/KtpS -gTP0LxdHIU+G+DZIcLpqXibAx89jDTfqY/MG+Di80deID9x450nu6WsEqWotIfTA -rKZNQp8vEMrL/j+YE3MpkPVFpjd8ba5k4k4MbmbzOBQ0iBnD93wn14SEAQvZbs+k -Xf1ikfyc67GJis+nrHoW8Ep6ql8N7Adw3Y06xZI6fjd3T9W00RGGtuu99B+2SfT9 -3QANZla3AgMBAAECggEBAKvAYOZGpz+D6wDApuMHrlwMAoJoT1XqEL95GzKHsceH -3FPw0/3ibhulv4J7HbXAmW6wIfMebIDOinaaTvQlILuiA8DIkZV2YAtDY71zVVZL -91Olxk+cVGqTyFjLlxHiURCikAiaHl+El/UnA2dLb6vcIN1zhzczd0DMQoHNsthe -aN1jzYTtPBgc7tLKowInbdtjXcGFUc2X/JPt8eRax/YmrS6bqhcO1DtBlTHnTq/h -AACh9n5f/kSM1clej3v7zMSQps4ltE1YFqUKVaQQubHSIHsVu16rP2t8eUTMdxpb -rzBtGdnMoWkU0JouUPEPHp5RSVB1y9i5ENR4QH2kO4ECgYEA7WhKAN4ATcmsKBqS -YsxqhKjFPDOZtNgAo5IPvcDjO7C9kml1+g8m0hIF4RfZpSgxKhPPz9E5kn9dUqh/ -nH+hbJqS7l0ET4AJJJ07aojICfskWB57Boy6k8hW/c3kAsjAgoddOXp6dW1UY/jW -KTgmGPuGACf2tiojc/3d6lgp7GECgYEA2g8ZpbyP5dkCiLHRcrwMjOS03jnAmKEg -ysHlGRrtCsxl4Kwm984lpzxwVko8+RjYCyXumIl900NylTgWUdt4Aiz7Y4VQcrYd -buulU0539F4wmtvsvNeLqyFoXrBMjlVKh2w6Ldcq5OwwP4GIvE1HDOb9x2tG123K -pEvZt3GIWhcCgYEAkIe/FcwRb0nprviOSeucpWqBuObJGpmtuecAAktdi0dMAciG -dFepEJa8UY7Nh4hydeVdzGzZmeBEFkBSgBoLUizAarDiIOwXpNy8AV2IOttUAvl7 -eD9QjwuWOimQOEmzOUPjc0ynJLoMeFhTOQDf4LGvoISz466/qqvQ4AwQacECgYEA -l3a5p+esRElSe0SYaYu5Y/xQrbOL1fjF4FCCI8gSMtf7Q+4QOzyT5vAST73NUHEz -T7sPWiDWp3JIGuIUjtNVLp20GFK3qJZbJ3rDIXR12eW6+TYd4KYPu9S+vFbVNmCE -HkDMwJ6eitiMQvBk2/cJjSq9yFD5qhX3Fdwh5EwWHA0CgYAQvdraw3ITVex6Eqo2 -5TNS1GK8AFUIFlpAlY6jQDZvI07CgjzxEcYxqrpCGZW9v+j7AltPwwRtQnLkTtBM -gI5j8SoLTqB2gAMfKe35HNQUfp3PDqJO7ud/0boAbH0TzI7FaDCVAitdMwMpQDup -LbP+EhQTfqMxDyXDtv/jjhCrug== ------END PRIVATE KEY----- diff --git a/docker/db_images/postgres/home_rentals.csv b/docker/db_images/postgres/home_rentals.csv deleted file mode 100755 index d74efda921c..00000000000 --- a/docker/db_images/postgres/home_rentals.csv +++ /dev/null @@ -1,5038 +0,0 @@ -number_of_rooms,number_of_bathrooms,sqft,location,days_on_market,initial_price,neighborhood,rental_price -0,1,484,great,10,2271,south_side,2271 -1,1,674,good,1,2167,downtown,2167 -1,1,554,poor,19,1883,westbrae,1883 -0,1,529,great,3,2431,south_side,2431 -3,2,1219,great,3,5510,south_side,5510 -1,1,398,great,11,2272,south_side,2272 -3,2,1190,poor,58,4463,westbrae,4123.812 -1,1,730,good,0,2224,downtown,2224 -0,1,298,great,9,2104,south_side,2104 -2,1,878,great,8,3861,south_side,3861 -1,1,677,good,14,2041,downtown,2041 -0,1,509,poor,18,1725,westbrae,1725 -0,1,481,poor,49,1388,westbrae,1307.496 -3,2,808,good,1,4677,downtown,4677 -1,1,522,poor,30,1713,westbrae,1678.74 -1,1,533,good,10,1903,downtown,1903 -3,2,937,good,2,4736,downtown,4736 -0,1,258,good,10,1544,downtown,1544 -1,1,630,great,11,2543,south_side,2543 -0,1,397,great,11,2168,south_side,2168 -2,1,932,good,10,3413,downtown,3413 -3,2,1069,good,9,4810,downtown,4810 -0,1,267,poor,32,1302,westbrae,1270.752 -0,1,332,good,6,1697,downtown,1697 -1,1,712,poor,56,1617,westbrae,1500.576 -3,2,1231,poor,25,4873,westbrae,4824.27 -2,1,818,good,3,3359,downtown,3359 -2,1,805,good,8,3358,downtown,3358 -3,2,1158,poor,44,4601,westbrae,4380.152 -3,2,952,great,7,5207,south_side,5207 -2,1,771,good,8,3305,downtown,3305 -1,1,333,great,6,2284,south_side,2284 -1,1,500,poor,54,1448,westbrae,1349.536 -2,1,690,poor,16,3095,westbrae,3095 -0,1,524,great,13,2317,south_side,2317 -2,1,762,good,1,3323,downtown,3323 -2,1,872,good,14,3375,downtown,3375 -1,1,673,great,7,2604,south_side,2604 -2,1,792,good,5,3390,downtown,3390 -2,1,640,good,8,3153,downtown,3153 -0,1,454,poor,47,1353,westbrae,1279.938 -2,1,932,good,10,3447,downtown,3447 -0,1,340,good,5,1722,downtown,1722 -1,1,595,good,1,2064,downtown,2064 -2,1,558,good,3,3118,downtown,3118 -3,2,823,good,10,4545,downtown,4545 -3,2,1104,poor,16,4750,westbrae,4750 -1,1,543,poor,18,1871,westbrae,1871 -0,1,455,great,13,2205,south_side,2205 -0,1,113,good,13,1378,downtown,1378 -2,1,553,good,8,3073,downtown,3073 -3,2,1030,great,14,5260,south_side,5260 -3,2,1175,great,6,5446,south_side,5446 -1,1,644,great,12,2572,south_side,2572 -1,1,421,great,8,2333,south_side,2333 -2,1,786,poor,63,2716,westbrae,2482.424 -0,1,518,poor,21,1674,westbrae,1670.652 -1,1,532,great,4,2491,south_side,2491 -1,1,533,poor,26,1748,westbrae,1727.024 -1,1,733,poor,56,1671,westbrae,1550.688 -0,1,417,good,9,1687,downtown,1687 -2,1,891,good,3,3504,downtown,3504 -2,1,938,poor,50,3008,westbrae,2827.52 -2,1,688,good,12,3154,downtown,3154 -1,1,504,poor,22,1791,westbrae,1783.836 -1,1,741,good,3,2226,downtown,2226 -2,1,564,good,13,3001,downtown,3001 -2,1,503,good,10,3026,downtown,3026 -2,1,755,good,7,3254,downtown,3254 -0,1,415,poor,39,1472,westbrae,1416.064 -2,1,911,good,6,3492,downtown,3492 -3,2,853,good,4,4710,downtown,4710 -0,1,337,good,1,1775,downtown,1775 -1,1,477,poor,64,1371,westbrae,1250.352 -3,2,1081,great,7,5351,south_side,5351 -0,1,464,poor,21,1645,westbrae,1641.71 -3,2,1219,good,1,5030,downtown,5030 -3,2,1106,good,10,4833,downtown,4833 -2,1,891,good,9,3371,downtown,3371 -1,1,518,good,6,2005,downtown,2005 -0,1,245,great,4,2094,south_side,2094 -3,2,1216,great,5,5495,south_side,5495 -0,1,381,poor,28,1483,westbrae,1459.272 -2,1,819,great,7,3806,south_side,3806 -2,1,787,good,9,3332,downtown,3332 -3,2,936,good,2,4738,downtown,4738 -2,1,740,good,6,3294,downtown,3294 -3,2,1215,great,13,5467,south_side,5467 -2,1,853,poor,40,3045,westbrae,2923.2 -2,1,942,poor,59,2977,westbrae,2744.794 -3,2,1204,good,7,5016,downtown,5016 -3,2,1098,great,10,5386,south_side,5386 -1,1,741,good,12,2170,downtown,2170 -1,1,603,great,10,2508,south_side,2508 -3,2,1074,good,12,4796,downtown,4796 -1,1,588,good,14,1961,downtown,1961 -0,1,334,poor,48,1243,westbrae,1173.392 -2,1,736,great,2,3854,south_side,3854 -3,2,1056,poor,54,4408,westbrae,4108.256 -1,1,625,great,2,2578,south_side,2578 -2,1,530,poor,47,2613,westbrae,2471.898 -2,1,626,great,4,3693,south_side,3693 -1,1,736,poor,30,1891,westbrae,1853.18 -3,2,882,good,10,4659,downtown,4659 -2,1,646,good,1,3280,downtown,3280 -0,1,231,good,2,1650,downtown,1650 -0,1,157,poor,36,1172,westbrae,1134.496 -1,1,489,good,9,1916,downtown,1916 -0,1,464,good,11,1793,downtown,1793 -1,1,320,great,8,2268,south_side,2268 -0,1,390,great,4,2230,south_side,2230 -2,1,682,great,5,3705,south_side,3705 -2,1,729,great,2,3856,south_side,3856 -3,2,837,poor,23,4439,westbrae,4412.366 -3,2,933,poor,22,4557,westbrae,4538.772 -1,1,606,great,14,2454,south_side,2454 -2,1,663,great,3,3691,south_side,3691 -1,1,407,good,3,1894,downtown,1894 -1,1,692,good,5,2115,downtown,2115 -3,2,975,great,6,5271,south_side,5271 -2,1,726,good,11,3242,downtown,3242 -1,1,329,good,10,1755,downtown,1755 -1,1,418,good,3,1907,downtown,1907 -1,1,627,good,2,2060,downtown,2060 -0,1,435,poor,26,1553,westbrae,1534.364 -2,1,533,great,2,3645,south_side,3645 -3,2,820,poor,23,4430,westbrae,4403.42 -2,1,935,great,13,3880,south_side,3880 -0,1,144,great,4,1979,south_side,1979 -2,1,872,poor,61,2884,westbrae,2647.512 -2,1,646,poor,53,2750,westbrae,2568.5 -0,1,141,poor,59,940,westbrae,866.68 -3,2,816,poor,25,4427,westbrae,4382.73 -0,1,320,poor,36,1322,westbrae,1279.696 -3,2,1143,poor,18,4846,westbrae,4846 -3,2,1087,good,2,4898,downtown,4898 -0,1,369,poor,43,1349,westbrae,1286.946 -1,1,358,good,3,1866,downtown,1866 -0,1,539,good,3,1899,downtown,1899 -2,1,885,poor,50,2974,westbrae,2795.56 -2,1,938,good,7,3497,downtown,3497 -0,1,488,poor,49,1418,westbrae,1335.756 -3,2,1157,good,10,4862,downtown,4862 -3,2,902,poor,29,4456,westbrae,4375.792 -2,1,623,poor,26,2961,westbrae,2925.468 -2,1,803,good,8,3279,downtown,3279 -3,2,1006,poor,44,4416,westbrae,4204.032 -3,2,916,poor,36,4378,westbrae,4237.904 -3,2,1180,good,5,5021,downtown,5021 -0,1,461,poor,31,1547,westbrae,1512.966 -0,1,390,poor,26,1531,westbrae,1512.628 -0,1,271,good,14,1557,downtown,1557 -2,1,745,poor,37,2951,westbrae,2850.666 -3,2,1080,good,14,4771,downtown,4771 -3,2,963,good,3,4734,downtown,4734 -2,1,901,poor,24,3226,westbrae,3200.192 -0,1,307,great,7,2151,south_side,2151 -3,2,871,good,11,4626,downtown,4626 -1,1,649,poor,46,1678,westbrae,1590.744 -0,1,459,poor,37,1464,westbrae,1414.224 -1,1,624,poor,31,1847,westbrae,1806.366 -1,1,350,good,11,1759,downtown,1759 -1,1,600,good,11,2009,downtown,2009 -0,1,541,good,8,1894,downtown,1894 -3,2,939,poor,29,4484,westbrae,4403.288 -2,1,606,great,4,3675,south_side,3675 -3,2,1139,good,11,4861,downtown,4861 -3,2,1234,good,3,5052,downtown,5052 -3,2,1150,good,2,4982,downtown,4982 -3,2,996,good,13,4758,downtown,4758 -2,1,559,great,1,3657,south_side,3657 -1,1,447,good,12,1784,downtown,1784 -1,1,698,poor,23,1998,westbrae,1986.012 -0,1,371,good,13,1639,downtown,1639 -0,1,425,poor,34,1449,westbrae,1408.428 -3,2,948,great,8,5224,south_side,5224 -0,1,441,poor,19,1613,westbrae,1613 -0,1,112,good,7,1487,downtown,1487 -2,1,876,poor,15,3347,westbrae,3347 -3,2,860,good,4,4631,downtown,4631 -0,1,312,poor,26,1479,westbrae,1461.252 -0,1,233,great,10,2079,south_side,2079 -1,1,743,great,3,2732,south_side,2732 -2,1,564,good,6,3137,downtown,3137 -0,1,335,good,12,1609,downtown,1609 -2,1,879,great,10,3923,south_side,3923 -3,2,1001,poor,57,4245,westbrae,3930.87 -1,1,721,great,3,2701,south_side,2701 -0,1,547,good,2,1936,downtown,1936 -2,1,871,good,3,3434,downtown,3434 -2,1,904,great,8,3938,south_side,3938 -0,1,150,poor,60,957,westbrae,880.44 -2,1,892,poor,18,3303,westbrae,3303 -2,1,886,good,10,3396,downtown,3396 -2,1,619,great,0,3671,south_side,3671 -2,1,598,poor,16,3049,westbrae,3049 -2,1,883,good,8,3411,downtown,3411 -1,1,443,good,10,1820,downtown,1820 -3,2,969,poor,56,4236,westbrae,3931.008 -0,1,457,good,4,1796,downtown,1796 -3,2,1175,good,0,4999,downtown,4999 -0,1,511,great,1,2431,south_side,2431 -1,1,514,poor,51,1536,westbrae,1440.768 -3,2,916,good,3,4716,downtown,4716 -1,1,625,poor,42,1699,westbrae,1624.244 -0,1,442,good,5,1779,downtown,1779 -0,1,310,poor,20,1465,westbrae,1465 -3,2,865,good,0,4742,downtown,4742 -3,2,819,great,12,5049,south_side,5049 -1,1,598,great,7,2499,south_side,2499 -0,1,383,good,11,1707,downtown,1707 -0,1,145,good,6,1442,downtown,1442 -2,1,878,good,8,3379,downtown,3379 -2,1,557,poor,29,2904,westbrae,2851.728 -0,1,505,good,2,1863,downtown,1863 -2,1,646,great,0,3727,south_side,3727 -0,1,123,great,9,1976,south_side,1976 -0,1,375,poor,17,1620,westbrae,1620 -3,2,1124,great,9,5420,south_side,5420 -1,1,704,good,14,2081,downtown,2081 -0,1,294,great,1,2221,south_side,2221 -2,1,850,good,2,3437,downtown,3437 -0,1,376,good,1,1766,downtown,1766 -0,1,150,poor,43,1083,westbrae,1033.182 -3,2,1054,poor,46,4411,westbrae,4181.628 -0,1,191,good,12,1492,downtown,1492 -0,1,300,good,8,1667,downtown,1667 -0,1,283,good,7,1582,downtown,1582 -0,1,454,good,13,1753,downtown,1753 -2,1,589,good,7,3077,downtown,3077 -2,1,906,good,9,3421,downtown,3421 -0,1,222,poor,58,1036,westbrae,957.264 -3,2,1088,poor,52,4393,westbrae,4111.848 -0,1,410,good,3,1740,downtown,1740 -1,1,597,good,9,1978,downtown,1978 -3,2,1006,poor,27,4580,westbrae,4515.88 -1,1,499,poor,38,1623,westbrae,1564.572 -0,1,149,poor,47,1046,westbrae,989.516 -0,1,426,good,3,1783,downtown,1783 -0,1,361,great,2,2210,south_side,2210 -3,2,880,good,2,4726,downtown,4726 -2,1,935,good,4,3477,downtown,3477 -1,1,363,good,12,1765,downtown,1765 -3,2,1037,poor,35,4578,westbrae,4440.66 -3,2,1145,good,11,4929,downtown,4929 -1,1,435,good,8,1855,downtown,1855 -3,2,1114,great,9,5363,south_side,5363 -3,2,1055,good,0,4933,downtown,4933 -1,1,503,good,9,1866,downtown,1866 -3,2,816,good,8,4594,downtown,4594 -1,1,435,good,6,1884,downtown,1884 -1,1,609,poor,15,1929,westbrae,1929 -3,2,984,good,0,4855,downtown,4855 -1,1,602,good,2,2043,downtown,2043 -2,1,892,good,3,3435,downtown,3435 -0,1,397,good,11,1698,downtown,1698 -3,2,871,poor,31,4397,westbrae,4300.266 -2,1,653,good,9,3147,downtown,3147 -1,1,748,poor,62,1631,westbrae,1493.996 -0,1,235,great,14,2000,south_side,2000 -3,2,1133,good,9,4874,downtown,4874 -0,1,272,poor,57,1117,westbrae,1034.342 -0,1,532,poor,63,1316,westbrae,1202.824 -2,1,855,good,9,3362,downtown,3362 -3,2,1165,good,9,4902,downtown,4902 -3,2,1085,great,13,5279,south_side,5279 -0,1,195,great,6,2039,south_side,2039 -1,1,409,great,4,2380,south_side,2380 -3,2,1186,poor,62,4437,westbrae,4064.292 -1,1,476,great,8,2370,south_side,2370 -1,1,438,great,6,2332,south_side,2332 -3,2,1182,good,6,4994,downtown,4994 -1,1,305,good,14,1675,downtown,1675 -3,2,1179,good,7,4977,downtown,4977 -3,2,1109,good,9,4906,downtown,4906 -0,1,322,good,13,1573,downtown,1573 -2,1,822,good,8,3374,downtown,3374 -0,1,525,good,0,1875,downtown,1875 -3,2,1022,poor,60,4300,westbrae,3956 -0,1,303,poor,48,1179,westbrae,1112.976 -1,1,656,poor,31,1872,westbrae,1830.816 -3,2,1212,poor,16,4906,westbrae,4906 -1,1,743,poor,44,1756,westbrae,1671.712 -1,1,354,good,0,1859,downtown,1859 -1,1,579,poor,51,1550,westbrae,1453.9 -1,1,327,poor,63,1201,westbrae,1097.714 -2,1,550,good,1,3155,downtown,3155 -3,2,914,good,0,4805,downtown,4805 -3,2,1225,poor,56,4504,westbrae,4179.712 -1,1,714,good,8,2135,downtown,2135 -2,1,679,good,1,3251,downtown,3251 -1,1,719,poor,64,1596,alcatraz_ave,1455.552 -0,1,461,great,6,2269,berkeley_hills,2269 -3,2,1097,good,7,4832,thowsand_oaks,4832 -2,1,525,good,3,3058,thowsand_oaks,3058 -3,2,850,good,1,4718,thowsand_oaks,4718 -3,2,1183,poor,23,4801,alcatraz_ave,4772.194 -3,2,915,great,1,5240,berkeley_hills,5240 -3,2,972,good,5,4794,thowsand_oaks,4794 -3,2,903,poor,27,4528,alcatraz_ave,4464.608 -2,1,738,good,11,3277,thowsand_oaks,3277 -1,1,608,poor,59,1477,alcatraz_ave,1361.794 -2,1,908,poor,49,3013,alcatraz_ave,2838.246 -3,2,961,good,9,4764,thowsand_oaks,4764 -0,1,225,good,3,1626,thowsand_oaks,1626 -1,1,635,good,1,2111,thowsand_oaks,2111 -1,1,519,good,8,1956,thowsand_oaks,1956 -2,1,917,great,13,3901,berkeley_hills,3901 -3,2,1094,great,12,5323,berkeley_hills,5323 -0,1,296,great,5,2151,berkeley_hills,2151 -2,1,536,poor,43,2751,alcatraz_ave,2624.454 -3,2,863,great,4,5151,berkeley_hills,5151 -3,2,953,good,10,4682,thowsand_oaks,4682 -0,1,451,poor,60,1234,alcatraz_ave,1135.28 -3,2,1236,good,11,4993,thowsand_oaks,4993 -2,1,624,poor,47,2738,alcatraz_ave,2590.148 -3,2,902,poor,42,4379,alcatraz_ave,4186.324 -0,1,310,good,12,1638,thowsand_oaks,1638 -2,1,882,good,13,3322,thowsand_oaks,3322 -2,1,939,good,12,3437,thowsand_oaks,3437 -2,1,659,good,14,3166,thowsand_oaks,3166 -2,1,740,good,1,3329,thowsand_oaks,3329 -2,1,697,good,3,3240,thowsand_oaks,3240 -1,1,580,poor,45,1593,alcatraz_ave,1513.35 -0,1,102,great,8,1895,berkeley_hills,1895 -2,1,921,poor,53,3026,alcatraz_ave,2826.284 -1,1,380,poor,30,1577,alcatraz_ave,1545.46 -3,2,869,great,2,5152,berkeley_hills,5152 -1,1,727,good,11,2078,thowsand_oaks,2078 -0,1,240,poor,45,1152,alcatraz_ave,1094.4 -0,1,535,good,14,1830,thowsand_oaks,1830 -2,1,677,poor,49,2778,alcatraz_ave,2616.876 -0,1,187,good,0,1566,thowsand_oaks,1566 -2,1,919,good,12,3419,thowsand_oaks,3419 -3,2,1171,poor,25,4775,alcatraz_ave,4727.25 -3,2,1121,good,10,4877,thowsand_oaks,4877 -0,1,124,good,6,1441,thowsand_oaks,1441 -1,1,326,good,9,1782,thowsand_oaks,1782 -0,1,469,good,14,1713,thowsand_oaks,1713 -3,2,1102,poor,64,4297,alcatraz_ave,3918.864 -0,1,379,poor,28,1534,alcatraz_ave,1509.456 -3,2,983,great,14,5145,berkeley_hills,5145 -1,1,686,good,4,2175,thowsand_oaks,2175 -0,1,194,great,10,2042,berkeley_hills,2042 -2,1,890,good,6,3476,thowsand_oaks,3476 -0,1,497,good,4,1877,thowsand_oaks,1877 -3,2,831,good,4,4621,thowsand_oaks,4621 -1,1,472,poor,17,1771,alcatraz_ave,1771 -3,2,1032,poor,51,4394,alcatraz_ave,4121.572 -0,1,398,great,12,2151,berkeley_hills,2151 -3,2,913,great,4,5269,berkeley_hills,5269 -0,1,438,great,8,2273,berkeley_hills,2273 -0,1,220,good,3,1548,thowsand_oaks,1548 -3,2,1009,great,8,5326,berkeley_hills,5326 -1,1,649,great,5,2563,berkeley_hills,2563 -0,1,499,good,5,1839,thowsand_oaks,1839 -2,1,677,good,0,3232,thowsand_oaks,3232 -3,2,926,good,13,4679,thowsand_oaks,4679 -3,2,1005,poor,59,4255,alcatraz_ave,3923.11 -0,1,137,good,9,1479,thowsand_oaks,1479 -3,2,1066,good,13,4774,thowsand_oaks,4774 -0,1,436,good,14,1658,thowsand_oaks,1658 -2,1,773,good,12,3207,thowsand_oaks,3207 -3,2,1038,great,8,5318,berkeley_hills,5318 -0,1,103,good,12,1413,thowsand_oaks,1413 -0,1,498,good,10,1801,thowsand_oaks,1801 -0,1,432,poor,55,1303,alcatraz_ave,1211.79 -0,1,127,poor,61,923,alcatraz_ave,847.314 -1,1,490,great,6,2452,berkeley_hills,2452 -3,2,987,great,6,5298,berkeley_hills,5298 -1,1,435,poor,49,1457,alcatraz_ave,1372.494 -2,1,606,good,11,3068,thowsand_oaks,3068 -1,1,638,poor,41,1774,alcatraz_ave,1699.492 -3,2,944,great,2,5321,berkeley_hills,5321 -1,1,601,poor,47,1637,alcatraz_ave,1548.602 -3,2,1024,poor,17,4745,alcatraz_ave,4745 -1,1,729,poor,63,1554,alcatraz_ave,1420.356 -3,2,1226,great,11,5436,berkeley_hills,5436 -0,1,519,good,12,1760,thowsand_oaks,1760 -0,1,399,good,9,1672,thowsand_oaks,1672 -2,1,665,good,12,3106,thowsand_oaks,3106 -2,1,566,great,9,3569,berkeley_hills,3569 -2,1,549,poor,19,2997,alcatraz_ave,2997 -2,1,934,good,2,3529,thowsand_oaks,3529 -0,1,500,poor,42,1482,alcatraz_ave,1416.792 -0,1,111,great,9,1878,berkeley_hills,1878 -3,2,848,good,6,4599,thowsand_oaks,4599 -0,1,305,good,3,1687,thowsand_oaks,1687 -0,1,361,poor,56,1245,alcatraz_ave,1155.36 -1,1,565,good,6,2038,thowsand_oaks,2038 -0,1,283,poor,54,1115,alcatraz_ave,1039.18 -2,1,505,great,11,3478,berkeley_hills,3478 -0,1,228,great,10,2067,berkeley_hills,2067 -3,2,1119,poor,54,4405,alcatraz_ave,4105.46 -0,1,267,good,2,1635,thowsand_oaks,1635 -3,2,916,good,0,4727,thowsand_oaks,4727 -2,1,903,poor,58,2923,alcatraz_ave,2700.852 -3,2,1046,good,13,4722,thowsand_oaks,4722 -1,1,513,good,11,1924,thowsand_oaks,1924 -3,2,1211,good,1,5043,thowsand_oaks,5043 -0,1,105,poor,28,1207,alcatraz_ave,1187.688 -2,1,553,good,1,3177,thowsand_oaks,3177 -3,2,1044,good,9,4847,thowsand_oaks,4847 -0,1,492,poor,64,1265,alcatraz_ave,1153.68 -2,1,847,good,7,3419,thowsand_oaks,3419 -0,1,232,poor,62,987,alcatraz_ave,904.092 -2,1,774,great,3,3875,berkeley_hills,3875 -2,1,582,poor,60,2591,alcatraz_ave,2383.72 -2,1,652,good,0,3212,thowsand_oaks,3212 -2,1,538,good,10,2988,thowsand_oaks,2988 -0,1,391,poor,53,1245,alcatraz_ave,1162.83 -1,1,723,great,3,2649,berkeley_hills,2649 -3,2,1043,great,2,5356,berkeley_hills,5356 -3,2,893,poor,38,4391,alcatraz_ave,4232.924 -2,1,608,good,9,3109,thowsand_oaks,3109 -0,1,511,great,12,2242,berkeley_hills,2242 -2,1,791,great,1,3870,berkeley_hills,3870 -2,1,792,poor,33,3014,alcatraz_ave,2935.636 -0,1,110,poor,20,1315,alcatraz_ave,1315 -1,1,616,poor,17,1943,alcatraz_ave,1943 -2,1,505,good,2,3080,thowsand_oaks,3080 -1,1,411,good,8,1810,thowsand_oaks,1810 -2,1,846,good,12,3282,thowsand_oaks,3282 -3,2,965,good,4,4811,thowsand_oaks,4811 -1,1,440,good,6,1847,thowsand_oaks,1847 -1,1,386,poor,56,1345,alcatraz_ave,1248.16 -3,2,1247,good,11,4974,thowsand_oaks,4974 -3,2,1058,great,11,5336,berkeley_hills,5336 -2,1,587,poor,63,2572,alcatraz_ave,2350.808 -0,1,145,great,1,2049,berkeley_hills,2049 -3,2,928,poor,47,4324,alcatraz_ave,4090.504 -2,1,735,good,4,3288,thowsand_oaks,3288 -1,1,433,poor,51,1430,alcatraz_ave,1341.34 -3,2,1009,good,10,4760,thowsand_oaks,4760 -0,1,369,poor,42,1308,alcatraz_ave,1250.448 -1,1,654,poor,36,1841,alcatraz_ave,1782.088 -0,1,144,good,5,1531,thowsand_oaks,1531 -0,1,425,great,13,2162,berkeley_hills,2162 -1,1,509,poor,20,1765,alcatraz_ave,1765 -2,1,615,good,9,3085,thowsand_oaks,3085 -1,1,519,great,6,2453,berkeley_hills,2453 -3,2,1235,great,1,5602,berkeley_hills,5602 -0,1,434,good,0,1800,thowsand_oaks,1800 -3,2,1028,poor,15,4693,alcatraz_ave,4693 -2,1,852,good,12,3369,thowsand_oaks,3369 -3,2,1039,good,1,4887,thowsand_oaks,4887 -3,2,1020,great,5,5319,berkeley_hills,5319 -0,1,354,great,13,2162,berkeley_hills,2162 -3,2,1034,great,12,5259,berkeley_hills,5259 -3,2,800,good,2,4618,thowsand_oaks,4618 -2,1,815,good,0,3436,thowsand_oaks,3436 -0,1,350,poor,61,1096,alcatraz_ave,1006.128 -0,1,489,great,6,2307,berkeley_hills,2307 -1,1,744,poor,37,1919,alcatraz_ave,1853.754 -3,2,874,good,1,4671,thowsand_oaks,4671 -0,1,392,great,7,2205,berkeley_hills,2205 -0,1,485,great,12,2285,berkeley_hills,2285 -0,1,500,poor,27,1632,alcatraz_ave,1609.152 -3,2,1239,poor,24,4876,alcatraz_ave,4836.992 -1,1,702,good,13,2068,thowsand_oaks,2068 -0,1,467,good,1,1869,thowsand_oaks,1869 -2,1,564,good,7,3119,thowsand_oaks,3119 -1,1,560,great,2,2497,berkeley_hills,2497 -0,1,219,good,8,1529,thowsand_oaks,1529 -3,2,1154,great,11,5396,berkeley_hills,5396 -1,1,309,good,2,1783,thowsand_oaks,1783 -2,1,655,poor,32,2921,alcatraz_ave,2850.896 -1,1,338,good,3,1835,thowsand_oaks,1835 -3,2,908,good,9,4620,thowsand_oaks,4620 -1,1,518,poor,60,1385,alcatraz_ave,1274.2 -0,1,279,good,11,1601,thowsand_oaks,1601 -0,1,267,good,7,1580,thowsand_oaks,1580 -2,1,587,good,14,3062,thowsand_oaks,3062 -1,1,648,poor,62,1557,alcatraz_ave,1426.212 -2,1,845,poor,37,3100,alcatraz_ave,2994.6 -2,1,549,great,6,3575,berkeley_hills,3575 -1,1,660,good,8,2092,thowsand_oaks,2092 -2,1,585,good,10,3060,thowsand_oaks,3060 -0,1,279,good,3,1681,thowsand_oaks,1681 -0,1,538,poor,43,1490,alcatraz_ave,1421.46 -2,1,932,poor,53,3006,alcatraz_ave,2807.604 -2,1,814,great,5,3868,berkeley_hills,3868 -2,1,709,poor,27,3018,alcatraz_ave,2975.748 -2,1,896,good,5,3483,thowsand_oaks,3483 -3,2,1140,good,10,4853,thowsand_oaks,4853 -3,2,1065,poor,32,4633,alcatraz_ave,4521.808 -2,1,735,poor,33,2980,alcatraz_ave,2902.52 -2,1,613,poor,39,2847,alcatraz_ave,2738.814 -1,1,666,poor,38,1775,alcatraz_ave,1711.1 -1,1,300,good,5,1739,thowsand_oaks,1739 -3,2,959,great,3,5286,berkeley_hills,5286 -0,1,135,great,1,2026,berkeley_hills,2026 -1,1,568,poor,32,1722,alcatraz_ave,1680.672 -2,1,824,poor,47,2962,alcatraz_ave,2802.052 -1,1,493,poor,24,1798,alcatraz_ave,1783.616 -2,1,666,great,10,3671,berkeley_hills,3671 -1,1,432,good,14,1782,thowsand_oaks,1782 -2,1,768,great,7,3795,berkeley_hills,3795 -0,1,223,poor,57,1068,alcatraz_ave,988.968 -1,1,568,poor,15,1885,alcatraz_ave,1885 -0,1,503,good,9,1839,thowsand_oaks,1839 -3,2,872,poor,53,4211,alcatraz_ave,3933.074 -1,1,407,great,3,2338,berkeley_hills,2338 -0,1,150,great,3,2041,berkeley_hills,2041 -1,1,701,good,10,2074,thowsand_oaks,2074 -3,2,811,poor,47,4193,alcatraz_ave,3966.578 -3,2,1086,poor,25,4639,alcatraz_ave,4592.61 -3,2,1002,good,2,4824,thowsand_oaks,4824 -1,1,618,poor,16,1954,alcatraz_ave,1954 -2,1,884,great,9,3855,berkeley_hills,3855 -3,2,1125,good,8,4895,thowsand_oaks,4895 -1,1,320,good,1,1789,thowsand_oaks,1789 -2,1,853,good,12,3336,thowsand_oaks,3336 -1,1,399,great,12,2236,berkeley_hills,2236 -3,2,980,great,8,5239,berkeley_hills,5239 -0,1,400,poor,24,1527,alcatraz_ave,1514.784 -2,1,836,great,5,3927,berkeley_hills,3927 -3,2,1175,poor,57,4465,alcatraz_ave,4134.59 -3,2,1094,good,6,4883,thowsand_oaks,4883 -2,1,925,good,13,3355,thowsand_oaks,3355 -2,1,736,poor,60,2708,alcatraz_ave,2491.36 -2,1,729,good,9,3280,thowsand_oaks,3280 -1,1,540,poor,60,1463,alcatraz_ave,1345.96 -3,2,858,poor,31,4366,alcatraz_ave,4269.948 -1,1,570,great,13,2421,berkeley_hills,2421 -2,1,664,great,0,3749,berkeley_hills,3749 -0,1,378,good,13,1653,thowsand_oaks,1653 -1,1,670,good,3,2149,thowsand_oaks,2149 -0,1,474,great,4,2382,berkeley_hills,2382 -3,2,1157,great,10,5453,berkeley_hills,5453 -1,1,417,great,11,2276,berkeley_hills,2276 -0,1,197,good,7,1576,thowsand_oaks,1576 -1,1,668,good,0,2191,thowsand_oaks,2191 -0,1,123,good,1,1485,thowsand_oaks,1485 -3,2,875,poor,58,4096,alcatraz_ave,3784.704 -3,2,1131,good,6,4957,thowsand_oaks,4957 -1,1,339,poor,22,1660,alcatraz_ave,1653.36 -2,1,921,good,5,3473,thowsand_oaks,3473 -3,2,894,poor,57,4200,alcatraz_ave,3889.2 -1,1,594,poor,57,1487,alcatraz_ave,1376.962 -2,1,935,great,14,3884,berkeley_hills,3884 -0,1,523,poor,38,1561,alcatraz_ave,1504.804 -0,1,535,poor,45,1451,alcatraz_ave,1378.45 -1,1,585,poor,56,1509,alcatraz_ave,1400.352 -1,1,385,poor,28,1639,alcatraz_ave,1612.776 -2,1,578,poor,16,3065,alcatraz_ave,3065 -1,1,678,good,14,2081,thowsand_oaks,2081 -0,1,493,poor,24,1636,alcatraz_ave,1622.912 -2,1,874,good,8,3359,thowsand_oaks,3359 -1,1,328,great,0,2285,berkeley_hills,2285 -0,1,549,poor,59,1397,alcatraz_ave,1288.034 -1,1,519,good,1,2014,thowsand_oaks,2014 -2,1,554,good,13,2996,thowsand_oaks,2996 -0,1,283,great,10,2072,berkeley_hills,2072 -1,1,557,good,14,1960,thowsand_oaks,1960 -0,1,148,great,6,2028,berkeley_hills,2028 -1,1,560,good,9,1992,thowsand_oaks,1992 -3,2,824,poor,49,4217,alcatraz_ave,3972.414 -0,1,177,good,7,1471,thowsand_oaks,1471 -2,1,776,great,2,3881,berkeley_hills,3881 -1,1,332,poor,15,1703,alcatraz_ave,1703 -1,1,722,great,13,2633,berkeley_hills,2633 -2,1,641,great,3,3751,berkeley_hills,3751 -3,2,829,poor,16,4508,alcatraz_ave,4508 -2,1,647,poor,19,3028,alcatraz_ave,3028 -2,1,619,good,5,3208,thowsand_oaks,3208 -0,1,188,good,7,1477,thowsand_oaks,1477 -3,2,1146,good,14,4836,thowsand_oaks,4836 -1,1,338,good,10,1783,thowsand_oaks,1783 -3,2,1232,good,0,5124,thowsand_oaks,5124 -3,2,1054,great,8,5335,berkeley_hills,5335 -2,1,916,great,3,3974,berkeley_hills,3974 -0,1,169,poor,24,1361,alcatraz_ave,1350.112 -1,1,431,good,13,1775,thowsand_oaks,1775 -3,2,840,great,9,5130,berkeley_hills,5130 -3,2,926,good,0,4820,thowsand_oaks,4820 -3,2,1245,great,8,5506,berkeley_hills,5506 -2,1,694,poor,41,2874,alcatraz_ave,2753.292 -3,2,881,great,0,5199,berkeley_hills,5199 -3,2,1199,poor,62,4418,alcatraz_ave,4046.888 -2,1,913,great,12,3901,berkeley_hills,3901 -0,1,409,poor,60,1190,alcatraz_ave,1094.8 -1,1,627,great,13,2505,berkeley_hills,2505 -1,1,510,great,10,2438,berkeley_hills,2438 -3,2,1218,poor,20,4906,alcatraz_ave,4906 -2,1,778,poor,34,3057,alcatraz_ave,2971.404 -3,2,891,good,12,4644,thowsand_oaks,4644 -2,1,587,poor,27,2928,alcatraz_ave,2887.008 -3,2,1020,good,4,4825,thowsand_oaks,4825 -3,2,1226,good,11,4936,thowsand_oaks,4936 -2,1,696,good,10,3182,thowsand_oaks,3182 -1,1,486,great,4,2412,berkeley_hills,2412 -1,1,426,good,12,1810,thowsand_oaks,1810 -2,1,845,poor,32,3121,alcatraz_ave,3046.096 -1,1,689,good,5,2089,thowsand_oaks,2089 -2,1,918,great,10,3929,berkeley_hills,3929 -3,2,1104,poor,43,4573,alcatraz_ave,4362.642 -3,2,1182,good,11,4944,thowsand_oaks,4944 -1,1,461,good,2,1965,thowsand_oaks,1965 -0,1,461,good,1,1819,thowsand_oaks,1819 -3,2,1206,good,2,5015,thowsand_oaks,5015 -0,1,237,poor,21,1409,alcatraz_ave,1406.182 -1,1,451,poor,41,1520,alcatraz_ave,1456.16 -2,1,903,poor,26,3193,alcatraz_ave,3154.684 -2,1,560,good,5,3089,thowsand_oaks,3089 -1,1,406,poor,55,1397,alcatraz_ave,1299.21 -0,1,547,poor,29,1641,alcatraz_ave,1611.462 -1,1,533,good,1,2000,thowsand_oaks,2000 -1,1,588,poor,50,1623,alcatraz_ave,1525.62 -1,1,471,poor,53,1450,northwest,1354.3 -3,2,1066,great,8,5301,east_elmwood,5301 -1,1,503,poor,17,1860,northwest,1860 -3,2,1024,good,14,4704,west_welmwood,4704 -0,1,323,poor,59,1127,northwest,1039.094 -1,1,687,poor,61,1616,northwest,1483.488 -3,2,995,good,2,4861,west_welmwood,4861 -0,1,357,good,3,1701,west_welmwood,1701 -2,1,723,poor,15,3170,northwest,3170 -3,2,841,good,11,4609,west_welmwood,4609 -0,1,276,great,4,2104,east_elmwood,2104 -3,2,1077,poor,62,4344,northwest,3979.104 -0,1,304,good,0,1683,west_welmwood,1683 -3,2,990,good,6,4820,west_welmwood,4820 -3,2,863,good,12,4600,west_welmwood,4600 -0,1,119,poor,25,1264,northwest,1251.36 -0,1,234,good,12,1492,west_welmwood,1492 -0,1,157,good,1,1536,west_welmwood,1536 -0,1,524,great,10,2358,east_elmwood,2358 -0,1,502,poor,47,1436,northwest,1358.456 -1,1,449,poor,51,1472,northwest,1380.736 -2,1,592,good,12,3074,west_welmwood,3074 -1,1,387,good,11,1813,west_welmwood,1813 -2,1,873,good,3,3435,west_welmwood,3435 -3,2,827,good,0,4632,west_welmwood,4632 -1,1,593,poor,63,1505,northwest,1375.57 -1,1,479,good,2,1995,west_welmwood,1995 -2,1,788,good,0,3396,west_welmwood,3396 -1,1,737,good,9,2146,west_welmwood,2146 -1,1,467,good,2,1979,west_welmwood,1979 -0,1,424,good,1,1813,west_welmwood,1813 -1,1,471,great,5,2402,east_elmwood,2402 -1,1,547,good,5,2040,west_welmwood,2040 -1,1,328,poor,35,1512,northwest,1466.64 -0,1,154,good,9,1432,west_welmwood,1432 -2,1,701,poor,31,3003,northwest,2936.934 -3,2,1169,poor,31,4758,northwest,4653.324 -3,2,1024,good,1,4856,west_welmwood,4856 -1,1,633,great,12,2550,east_elmwood,2550 -0,1,501,good,10,1825,west_welmwood,1825 -2,1,771,good,0,3340,west_welmwood,3340 -1,1,730,poor,59,1641,northwest,1513.002 -0,1,545,good,0,1980,west_welmwood,1980 -2,1,667,great,9,3647,east_elmwood,3647 -0,1,513,good,10,1779,west_welmwood,1779 -3,2,1152,good,12,4870,west_welmwood,4870 -0,1,333,good,2,1748,west_welmwood,1748 -2,1,919,good,2,3450,west_welmwood,3450 -1,1,372,poor,64,1261,northwest,1150.032 -1,1,671,poor,35,1823,northwest,1768.31 -0,1,319,good,0,1680,west_welmwood,1680 -3,2,867,poor,63,4060,northwest,3710.84 -0,1,320,good,12,1594,west_welmwood,1594 -1,1,594,great,8,2484,east_elmwood,2484 -2,1,882,good,14,3389,west_welmwood,3389 -3,2,1062,poor,56,4374,northwest,4059.072 -3,2,823,poor,59,4046,northwest,3730.412 -2,1,874,good,6,3450,west_welmwood,3450 -2,1,687,poor,30,2947,northwest,2888.06 -2,1,621,good,3,3150,west_welmwood,3150 -2,1,705,great,2,3812,east_elmwood,3812 -1,1,658,poor,41,1741,northwest,1667.878 -1,1,314,poor,58,1211,northwest,1118.964 -2,1,831,good,2,3439,west_welmwood,3439 -0,1,290,poor,59,1068,northwest,984.696 -0,1,398,poor,31,1499,northwest,1466.022 -2,1,729,good,12,3195,west_welmwood,3195 -0,1,238,poor,17,1471,northwest,1471 -0,1,538,good,10,1863,west_welmwood,1863 -2,1,631,poor,23,3019,northwest,3000.886 -0,1,542,poor,45,1474,northwest,1400.3 -3,2,883,poor,34,4365,northwest,4242.78 -2,1,703,poor,46,2811,northwest,2664.828 -1,1,441,poor,55,1396,northwest,1298.28 -3,2,1231,poor,62,4425,northwest,4053.3 -1,1,692,great,8,2590,east_elmwood,2590 -0,1,540,good,7,1834,west_welmwood,1834 -1,1,326,good,3,1773,west_welmwood,1773 -2,1,839,great,9,3816,east_elmwood,3816 -3,2,1173,great,4,5445,east_elmwood,5445 -1,1,569,poor,17,1937,northwest,1937 -2,1,622,great,10,3635,east_elmwood,3635 -2,1,716,poor,17,3183,northwest,3183 -1,1,548,good,12,1973,west_welmwood,1973 -2,1,591,poor,28,2954,northwest,2906.736 -3,2,868,poor,22,4453,northwest,4435.188 -2,1,853,poor,27,3213,northwest,3168.018 -1,1,733,great,2,2760,east_elmwood,2760 -2,1,710,good,14,3196,west_welmwood,3196 -0,1,151,good,0,1535,west_welmwood,1535 -3,2,1013,poor,25,4624,northwest,4577.76 -0,1,289,poor,54,1157,northwest,1078.324 -0,1,440,good,12,1746,west_welmwood,1746 -2,1,714,good,11,3202,west_welmwood,3202 -3,2,931,good,1,4762,west_welmwood,4762 -3,2,938,poor,19,4634,northwest,4634 -3,2,1191,great,0,5558,east_elmwood,5558 -1,1,495,good,6,1945,west_welmwood,1945 -3,2,1077,poor,20,4722,northwest,4722 -2,1,782,great,11,3744,east_elmwood,3744 -3,2,1099,good,7,4893,west_welmwood,4893 -0,1,398,great,13,2128,east_elmwood,2128 -1,1,385,good,13,1775,west_welmwood,1775 -0,1,213,good,13,1526,west_welmwood,1526 -0,1,180,poor,37,1199,northwest,1158.234 -3,2,979,poor,34,4456,northwest,4331.232 -2,1,757,good,5,3312,west_welmwood,3312 -1,1,673,great,9,2619,east_elmwood,2619 -1,1,438,good,9,1798,west_welmwood,1798 -0,1,110,great,2,2037,east_elmwood,2037 -1,1,577,good,5,2017,west_welmwood,2017 -2,1,905,good,4,3459,west_welmwood,3459 -3,2,856,good,4,4712,west_welmwood,4712 -3,2,1082,great,12,5273,east_elmwood,5273 -0,1,174,good,14,1429,west_welmwood,1429 -1,1,435,poor,60,1303,northwest,1198.76 -2,1,765,poor,38,2952,northwest,2845.728 -1,1,628,good,10,2040,west_welmwood,2040 -1,1,735,great,10,2621,east_elmwood,2621 -1,1,568,good,14,1947,west_welmwood,1947 -1,1,625,good,10,2008,west_welmwood,2008 -2,1,846,good,2,3470,west_welmwood,3470 -2,1,518,good,3,3094,west_welmwood,3094 -3,2,1185,poor,53,4487,northwest,4190.858 -0,1,531,good,7,1906,west_welmwood,1906 -3,2,1245,good,3,5057,west_welmwood,5057 -0,1,384,poor,36,1416,northwest,1370.688 -3,2,940,good,9,4741,west_welmwood,4741 -1,1,740,good,1,2265,west_welmwood,2265 -1,1,307,great,0,2348,east_elmwood,2348 -2,1,548,good,10,3086,west_welmwood,3086 -0,1,461,great,1,2390,east_elmwood,2390 -3,2,1145,good,4,4947,west_welmwood,4947 -0,1,522,good,7,1827,west_welmwood,1827 -0,1,324,good,14,1570,west_welmwood,1570 -2,1,528,poor,34,2823,northwest,2743.956 -0,1,498,poor,63,1259,northwest,1150.726 -1,1,725,good,8,2100,west_welmwood,2100 -2,1,612,good,7,3183,west_welmwood,3183 -1,1,496,poor,59,1430,northwest,1318.46 -2,1,769,poor,49,2845,northwest,2679.99 -3,2,1034,good,6,4811,west_welmwood,4811 -2,1,646,poor,57,2718,northwest,2516.868 -3,2,901,good,12,4635,west_welmwood,4635 -3,2,1167,good,11,4895,west_welmwood,4895 -0,1,405,poor,45,1319,northwest,1253.05 -3,2,1242,good,14,4936,west_welmwood,4936 -1,1,599,good,3,2084,west_welmwood,2084 -1,1,587,good,3,2101,west_welmwood,2101 -2,1,810,good,3,3398,west_welmwood,3398 -1,1,709,good,9,2161,west_welmwood,2161 -0,1,497,good,0,1906,west_welmwood,1906 -1,1,305,good,0,1794,west_welmwood,1794 -2,1,667,good,6,3220,west_welmwood,3220 -3,2,895,good,11,4663,west_welmwood,4663 -3,2,1161,good,9,4906,west_welmwood,4906 -0,1,387,poor,29,1501,northwest,1473.982 -1,1,503,good,0,2014,west_welmwood,2014 -2,1,856,great,4,3881,east_elmwood,3881 -1,1,444,great,11,2327,east_elmwood,2327 -0,1,207,good,3,1563,west_welmwood,1563 -1,1,676,good,1,2204,west_welmwood,2204 -3,2,1059,poor,25,4689,northwest,4642.11 -1,1,573,great,9,2436,east_elmwood,2436 -0,1,285,good,13,1595,west_welmwood,1595 -0,1,490,good,1,1864,west_welmwood,1864 -3,2,824,good,3,4631,west_welmwood,4631 -0,1,234,good,7,1562,west_welmwood,1562 -2,1,920,poor,15,3354,northwest,3354 -3,2,966,good,14,4689,west_welmwood,4689 -0,1,423,good,12,1725,west_welmwood,1725 -2,1,932,great,2,3980,east_elmwood,3980 -0,1,419,poor,28,1506,northwest,1481.904 -2,1,752,good,5,3342,west_welmwood,3342 -2,1,671,poor,51,2775,northwest,2602.95 -2,1,628,poor,51,2671,northwest,2505.398 -3,2,1058,good,12,4808,west_welmwood,4808 -2,1,668,poor,62,2605,northwest,2386.18 -0,1,131,poor,37,1125,northwest,1086.75 -0,1,269,good,9,1565,west_welmwood,1565 -1,1,663,good,9,2083,west_welmwood,2083 -3,2,1219,poor,34,4769,northwest,4635.468 -1,1,536,poor,41,1627,northwest,1558.666 -0,1,215,good,11,1532,west_welmwood,1532 -1,1,558,poor,51,1500,northwest,1407 -3,2,1242,great,1,5577,east_elmwood,5577 -1,1,672,poor,30,1845,northwest,1808.1 -1,1,497,good,4,1940,west_welmwood,1940 -1,1,598,great,2,2549,east_elmwood,2549 -3,2,1134,good,0,4959,west_welmwood,4959 -3,2,1174,great,11,5380,east_elmwood,5380 -1,1,476,poor,30,1634,northwest,1601.32 -0,1,228,great,1,2129,east_elmwood,2129 -0,1,389,great,12,2136,east_elmwood,2136 -0,1,489,great,7,2285,east_elmwood,2285 -1,1,352,poor,48,1357,northwest,1281.008 -0,1,544,good,7,1923,west_welmwood,1923 -1,1,604,poor,29,1776,northwest,1744.032 -1,1,528,good,5,2023,west_welmwood,2023 -3,2,1036,great,12,5305,east_elmwood,5305 -3,2,1060,poor,42,4462,northwest,4265.672 -2,1,872,good,2,3433,west_welmwood,3433 -1,1,477,good,4,1894,west_welmwood,1894 -2,1,933,poor,16,3398,northwest,3398 -1,1,511,good,10,1941,west_welmwood,1941 -0,1,228,poor,19,1443,northwest,1443 -2,1,819,poor,55,2869,northwest,2668.17 -1,1,556,great,10,2447,east_elmwood,2447 -0,1,318,poor,33,1376,northwest,1340.224 -3,2,814,good,3,4680,west_welmwood,4680 -3,2,1215,poor,48,4631,northwest,4371.664 -0,1,190,poor,44,1195,northwest,1137.64 -2,1,594,good,1,3217,west_welmwood,3217 -0,1,310,poor,39,1358,northwest,1306.396 -2,1,629,great,7,3690,east_elmwood,3690 -3,2,957,good,7,4687,west_welmwood,4687 -3,2,1056,great,10,5328,east_elmwood,5328 -2,1,741,good,0,3370,west_welmwood,3370 -1,1,447,good,10,1889,west_welmwood,1889 -2,1,772,great,0,3839,east_elmwood,3839 -2,1,669,poor,32,2932,northwest,2861.632 -1,1,485,poor,39,1613,northwest,1551.706 -1,1,462,great,5,2387,east_elmwood,2387 -1,1,430,great,6,2332,east_elmwood,2332 -3,2,1183,good,7,4920,west_welmwood,4920 -3,2,875,poor,27,4471,northwest,4408.406 -1,1,496,good,14,1839,west_welmwood,1839 -3,2,993,great,6,5290,east_elmwood,5290 -0,1,446,poor,38,1448,northwest,1395.872 -0,1,431,good,1,1855,west_welmwood,1855 -0,1,300,great,1,2228,east_elmwood,2228 -0,1,304,good,5,1610,west_welmwood,1610 -2,1,748,poor,38,3003,northwest,2894.892 -0,1,182,poor,49,1116,northwest,1051.272 -0,1,198,good,1,1577,west_welmwood,1577 -3,2,1129,poor,52,4475,northwest,4188.6 -3,2,1005,good,2,4807,west_welmwood,4807 -2,1,509,great,14,3502,east_elmwood,3502 -2,1,731,poor,29,3058,northwest,3002.956 -2,1,809,poor,45,2996,northwest,2846.2 -0,1,232,poor,44,1239,northwest,1179.528 -0,1,247,poor,27,1422,northwest,1402.092 -1,1,682,great,4,2689,east_elmwood,2689 -1,1,493,poor,34,1617,northwest,1571.724 -1,1,536,good,2,2043,west_welmwood,2043 -0,1,513,poor,59,1343,northwest,1238.246 -3,2,1218,good,1,5076,west_welmwood,5076 -3,2,1072,good,4,4875,west_welmwood,4875 -2,1,778,good,0,3362,west_welmwood,3362 -2,1,773,good,12,3290,west_welmwood,3290 -0,1,372,great,1,2268,east_elmwood,2268 -3,2,1146,poor,31,4694,northwest,4590.732 -3,2,904,good,7,4670,west_welmwood,4670 -0,1,263,good,6,1621,west_welmwood,1621 -2,1,784,good,5,3295,west_welmwood,3295 -1,1,669,great,13,2555,east_elmwood,2555 -0,1,488,good,4,1891,west_welmwood,1891 -0,1,231,good,11,1510,west_welmwood,1510 -2,1,849,good,2,3399,west_welmwood,3399 -2,1,621,good,3,3227,west_welmwood,3227 -1,1,483,good,8,1884,west_welmwood,1884 -0,1,243,good,5,1561,west_welmwood,1561 -2,1,840,good,10,3349,west_welmwood,3349 -3,2,1141,good,11,4852,west_welmwood,4852 -0,1,311,poor,40,1295,northwest,1243.2 -3,2,832,great,3,5146,east_elmwood,5146 -2,1,546,good,9,3092,west_welmwood,3092 -0,1,483,good,10,1801,west_welmwood,1801 -1,1,390,poor,63,1301,northwest,1189.114 -1,1,473,poor,37,1636,northwest,1580.376 -3,2,1080,good,3,4884,west_welmwood,4884 -2,1,815,poor,37,3029,northwest,2926.014 -0,1,289,poor,22,1430,northwest,1424.28 -1,1,372,good,11,1797,west_welmwood,1797 -2,1,949,great,10,3939,east_elmwood,3939 -2,1,754,poor,31,3015,northwest,2948.67 -0,1,105,poor,53,926,northwest,864.884 -2,1,848,good,11,3306,west_welmwood,3306 -1,1,737,poor,35,1901,northwest,1843.97 -2,1,856,poor,26,3175,northwest,3136.9 -3,2,953,poor,46,4294,northwest,4070.712 -0,1,449,poor,45,1435,northwest,1363.25 -3,2,1134,good,7,4922,west_welmwood,4922 -2,1,601,good,0,3153,west_welmwood,3153 -2,1,743,good,10,3230,west_welmwood,3230 -1,1,436,good,9,1850,west_welmwood,1850 -0,1,280,good,1,1701,west_welmwood,1701 -2,1,923,good,3,3522,west_welmwood,3522 -0,1,364,great,5,2237,east_elmwood,2237 -0,1,246,good,8,1608,west_welmwood,1608 -0,1,202,poor,23,1399,northwest,1390.606 -0,1,489,good,12,1738,west_welmwood,1738 -1,1,517,good,8,1963,west_welmwood,1963 -0,1,300,good,6,1661,west_welmwood,1661 -2,1,587,good,2,3132,west_welmwood,3132 -1,1,350,good,1,1852,west_welmwood,1852 -2,1,776,good,1,3354,west_welmwood,3354 -0,1,142,good,12,1418,west_welmwood,1418 -2,1,802,great,1,3885,east_elmwood,3885 -0,1,454,poor,17,1662,northwest,1662 -1,1,524,poor,64,1380,northwest,1258.56 -2,1,553,great,14,3485,east_elmwood,3485 -3,2,1139,great,13,5397,east_elmwood,5397 -2,1,820,good,8,3324,west_welmwood,3324 -0,1,505,good,14,1738,west_welmwood,1738 -0,1,340,poor,57,1190,northwest,1101.94 -3,2,1189,good,8,4956,west_welmwood,4956 -3,2,1165,great,0,5525,east_elmwood,5525 -0,1,400,poor,38,1407,northwest,1356.348 -3,2,1214,great,14,5405,east_elmwood,5405 -1,1,493,good,13,1846,west_welmwood,1846 -0,1,344,poor,64,1078,northwest,983.136 -0,1,367,poor,57,1230,northwest,1138.98 -2,1,864,great,5,3935,east_elmwood,3935 -1,1,384,poor,36,1499,northwest,1451.032 -2,1,948,good,3,3549,west_welmwood,3549 -0,1,390,poor,32,1479,northwest,1443.504 -0,1,435,good,12,1683,west_welmwood,1683 -1,1,725,great,10,2672,east_elmwood,2672 -0,1,456,poor,39,1418,northwest,1364.116 -2,1,788,good,5,3295,west_welmwood,3295 -2,1,810,good,10,3331,west_welmwood,3331 -0,1,456,good,13,1721,west_welmwood,1721 -1,1,349,great,0,2333,east_elmwood,2333 -3,2,1065,good,3,4848,west_welmwood,4848 -2,1,623,poor,36,2911,northwest,2817.848 -1,1,543,good,8,1952,west_welmwood,1952 -3,2,1047,good,8,4816,west_welmwood,4816 -0,1,189,poor,63,967,northwest,883.838 -1,1,323,great,4,2239,east_elmwood,2239 -0,1,221,poor,20,1454,northwest,1454 -3,2,1022,poor,20,4672,northwest,4672 -1,1,539,good,10,1987,west_welmwood,1987 -1,1,619,poor,29,1848,northwest,1814.736 -2,1,742,poor,42,2971,northwest,2840.276 -0,1,481,poor,45,1471,northwest,1397.45 -3,2,1246,great,10,5540,east_elmwood,5540 -1,1,530,great,1,2546,east_elmwood,2546 -2,1,536,poor,37,2790,northwest,2695.14 -3,2,1193,great,13,5424,east_elmwood,5424 -1,1,347,good,3,1806,west_welmwood,1806 -3,2,970,good,14,4640,west_welmwood,4640 -3,2,1081,poor,26,4643,northwest,4587.284 -3,2,885,good,4,4701,west_welmwood,4701 -3,2,803,great,1,5129,east_elmwood,5129 -2,1,785,poor,25,3177,northwest,3145.23 -3,2,1042,poor,36,4498,northwest,4354.064 -2,1,576,good,1,3173,west_welmwood,3173 -0,1,412,poor,41,1365,northwest,1307.67 -3,2,1189,good,8,4996,west_welmwood,4996 -2,1,771,great,2,3826,east_elmwood,3826 -2,1,847,poor,59,2876,northwest,2651.672 -3,2,1114,great,9,5355,east_elmwood,5355 -2,1,618,great,2,3746,east_elmwood,3746 -0,1,424,good,7,1732,west_welmwood,1732 -0,1,494,good,12,1786,west_welmwood,1786 -3,2,1113,great,3,5447,east_elmwood,5447 -3,2,1155,great,10,5451,east_elmwood,5451 -0,1,123,good,0,1527,west_welmwood,1527 -0,1,264,poor,62,1037,northwest,949.892 -2,1,864,good,3,3456,west_welmwood,3456 -1,1,440,good,3,1894,west_welmwood,1894 -3,2,1023,poor,54,4351,northwest,4055.132 -1,1,600,good,5,2020,west_welmwood,2020 -1,1,671,good,5,2156,west_welmwood,2156 -1,1,400,good,11,1789,west_welmwood,1789 -3,2,813,great,4,5096,east_elmwood,5096 -2,1,563,good,8,3096,west_welmwood,3096 -1,1,304,good,9,1759,west_welmwood,1759 -2,1,783,poor,26,3143,northwest,3105.284 -1,1,698,poor,39,1845,northwest,1774.89 -2,1,935,great,13,3903,east_elmwood,3903 -1,1,711,good,13,2033,west_welmwood,2033 -3,2,926,good,1,4723,west_welmwood,4723 -3,2,901,poor,31,4471,northwest,4372.638 -3,2,1002,poor,39,4485,northwest,4314.57 -1,1,442,good,0,1929,west_welmwood,1929 -0,1,483,poor,55,1337,northwest,1243.41 -0,1,191,poor,51,1077,northwest,1010.226 -3,2,882,poor,29,4466,northwest,4385.612 -1,1,531,poor,28,1739,northwest,1711.176 -1,1,495,good,4,1938,west_welmwood,1938 -3,2,857,poor,53,4161,northwest,3886.374 -1,1,561,poor,47,1543,northwest,1459.678 -0,1,256,poor,21,1399,northwest,1396.202 -2,1,643,good,14,3123,west_welmwood,3123 -3,2,1112,good,14,4782,west_welmwood,4782 -3,2,1153,good,13,4882,west_welmwood,4882 -2,1,532,great,10,3509,east_elmwood,3509 -2,1,519,poor,53,2588,northwest,2417.192 -1,1,661,poor,61,1591,northwest,1460.538 -0,1,226,poor,56,1085,northwest,1006.88 -0,1,286,great,6,2129,east_elmwood,2129 -0,1,205,good,9,1508,west_welmwood,1508 -0,1,231,good,1,1575,west_welmwood,1575 -3,2,1103,good,12,4872,west_welmwood,4872 -1,1,586,poor,48,1577,northwest,1488.688 -2,1,552,good,9,3046,west_welmwood,3046 -2,1,927,poor,26,3240,northwest,3201.12 -1,1,395,poor,60,1325,northwest,1219 -0,1,220,great,10,2010,east_elmwood,2010 -3,2,1054,good,11,4794,west_welmwood,4794 -1,1,611,good,0,2151,west_welmwood,2151 -0,1,128,good,14,1343,west_welmwood,1343 -3,2,813,great,13,5004,east_elmwood,5004 -3,2,1041,poor,48,4394,northwest,4147.936 -0,1,316,great,9,2138,east_elmwood,2138 -3,2,853,good,14,4546,west_welmwood,4546 -2,1,662,poor,61,2610,northwest,2395.98 -3,2,831,great,9,5102,east_elmwood,5102 -2,1,897,good,3,3432,west_welmwood,3432 -3,2,1066,good,13,4777,west_welmwood,4777 -1,1,663,poor,53,1650,northwest,1541.1 -0,1,463,poor,33,1492,northwest,1453.208 -0,1,442,great,14,2174,east_elmwood,2174 -0,1,226,good,5,1583,west_welmwood,1583 -1,1,519,poor,59,1391,northwest,1282.502 -0,1,438,good,11,1739,west_welmwood,1739 -2,1,663,good,1,3234,west_welmwood,3234 -1,1,323,great,9,2229,east_elmwood,2229 -3,2,898,great,10,5150,east_elmwood,5150 -3,2,1097,poor,56,4358,northwest,4044.224 -3,2,1094,good,7,4862,west_welmwood,4862 -3,2,1245,great,13,5482,east_elmwood,5482 -3,2,967,good,12,4706,west_welmwood,4706 -2,1,866,poor,46,3038,northwest,2880.024 -3,2,972,good,2,4766,west_welmwood,4766 -0,1,531,poor,15,1819,northwest,1819 -1,1,429,good,11,1785,west_welmwood,1785 -0,1,530,good,10,1853,west_welmwood,1853 -1,1,710,good,9,2089,west_welmwood,2089 -3,2,1166,poor,37,4682,northwest,4522.812 -3,2,1184,good,9,4962,west_welmwood,4962 -3,2,872,poor,52,4167,northwest,3900.312 -2,1,606,good,12,3071,west_welmwood,3071 -2,1,724,poor,55,2790,northwest,2594.7 -0,1,176,great,1,2035,east_elmwood,2035 -1,1,578,good,14,1903,west_welmwood,1903 -1,1,665,great,4,2633,east_elmwood,2633 -3,2,813,poor,58,4065,northwest,3756.06 -1,1,499,poor,46,1498,northwest,1420.104 -0,1,225,good,7,1578,west_welmwood,1578 -3,2,1157,great,14,5317,east_elmwood,5317 -0,1,454,poor,60,1301,northwest,1196.92 -1,1,407,good,14,1750,west_welmwood,1750 -0,1,197,poor,58,992,northwest,916.608 -2,1,774,great,3,3821,east_elmwood,3821 -3,2,1027,good,6,4843,west_welmwood,4843 -0,1,293,poor,35,1379,northwest,1337.63 -0,1,446,good,11,1780,west_welmwood,1780 -3,2,1134,poor,59,4438,northwest,4091.836 -0,1,493,good,9,1798,west_welmwood,1798 -3,2,1098,poor,63,4331,northwest,3958.534 -3,2,1096,poor,27,4639,northwest,4574.054 -2,1,689,good,5,3261,west_welmwood,3261 -2,1,699,great,14,3682,east_elmwood,3682 -1,1,637,great,11,2546,east_elmwood,2546 -1,1,344,good,2,1810,west_welmwood,1810 -0,1,154,good,9,1454,west_welmwood,1454 -1,1,726,good,11,2079,west_welmwood,2079 -1,1,698,good,5,2159,west_welmwood,2159 -3,2,1177,good,4,4958,west_welmwood,4958 -3,2,1058,great,6,5359,east_elmwood,5359 -0,1,489,great,4,2324,east_elmwood,2324 -1,1,347,poor,59,1304,northwest,1202.288 -2,1,901,poor,21,3307,northwest,3300.386 -3,2,1238,poor,53,4517,northwest,4218.878 -3,2,883,poor,64,4064,northwest,3706.368 -0,1,366,good,11,1635,west_welmwood,1635 -2,1,922,great,2,4049,east_elmwood,4049 -3,2,1000,good,13,4694,west_welmwood,4694 -1,1,601,great,13,2520,east_elmwood,2520 -2,1,911,poor,40,3153,northwest,3026.88 -1,1,518,good,13,1922,west_welmwood,1922 -2,1,504,good,12,2990,west_welmwood,2990 -1,1,350,good,6,1800,west_welmwood,1800 -0,1,224,poor,55,1094,northwest,1017.42 -3,2,830,poor,15,4536,northwest,4536 -2,1,659,poor,53,2693,northwest,2515.262 -1,1,525,poor,21,1789,northwest,1785.422 -3,2,1169,poor,61,4381,northwest,4021.758 -1,1,421,poor,27,1634,northwest,1611.124 -0,1,235,poor,21,1436,northwest,1433.128 -1,1,391,poor,19,1710,northwest,1710 -0,1,322,good,0,1713,west_welmwood,1713 -3,2,921,great,8,5141,east_elmwood,5141 -0,1,428,poor,33,1484,northwest,1445.416 -3,2,929,poor,31,4510,northwest,4410.78 -1,1,749,good,7,2148,west_welmwood,2148 -2,1,632,great,2,3692,east_elmwood,3692 -2,1,780,great,1,3856,east_elmwood,3856 -2,1,809,good,13,3238,west_welmwood,3238 -0,1,456,good,1,1850,west_welmwood,1850 -2,1,783,good,1,3379,west_welmwood,3379 -0,1,546,poor,44,1517,northwest,1444.184 -0,1,254,good,1,1662,west_welmwood,1662 -0,1,117,great,14,1879,east_elmwood,1879 -1,1,541,poor,51,1482,northwest,1390.116 -1,1,702,good,6,2182,west_welmwood,2182 -1,1,404,good,6,1880,west_welmwood,1880 -1,1,437,great,6,2392,east_elmwood,2392 -3,2,1059,poor,62,4335,zaytuna_college,3970.86 -2,1,547,good,7,3028,north_berkeley,3028 -2,1,814,poor,52,2891,zaytuna_college,2705.976 -2,1,647,good,14,3059,north_berkeley,3059 -2,1,846,good,0,3434,north_berkeley,3434 -3,2,1009,good,7,4826,north_berkeley,4826 -3,2,1137,good,3,4949,north_berkeley,4949 -3,2,1054,good,4,4820,north_berkeley,4820 -3,2,818,good,0,4651,north_berkeley,4651 -3,2,1155,good,4,4992,north_berkeley,4992 -1,1,355,good,11,1703,north_berkeley,1703 -0,1,309,great,7,2105,east_elmwood,2105 -2,1,565,great,3,3644,east_elmwood,3644 -2,1,745,good,12,3178,north_berkeley,3178 -2,1,644,poor,36,2924,zaytuna_college,2830.432 -0,1,506,poor,19,1740,zaytuna_college,1740 -1,1,300,great,6,2273,east_elmwood,2273 -1,1,530,poor,20,1853,zaytuna_college,1853 -3,2,911,great,5,5199,east_elmwood,5199 -1,1,616,poor,44,1667,zaytuna_college,1586.984 -0,1,234,poor,49,1125,zaytuna_college,1059.75 -1,1,636,poor,53,1623,zaytuna_college,1515.882 -0,1,397,good,5,1716,north_berkeley,1716 -3,2,1235,good,14,4914,north_berkeley,4914 -3,2,1061,poor,63,4281,zaytuna_college,3912.834 -0,1,270,good,1,1661,north_berkeley,1661 -1,1,620,good,12,1950,north_berkeley,1950 -3,2,875,poor,30,4448,zaytuna_college,4359.04 -1,1,689,good,10,2117,north_berkeley,2117 -1,1,716,good,12,2102,north_berkeley,2102 -3,2,1149,poor,49,4464,zaytuna_college,4205.088 -1,1,403,poor,16,1760,zaytuna_college,1760 -1,1,588,good,10,2023,north_berkeley,2023 -2,1,668,good,4,3183,north_berkeley,3183 -3,2,1211,good,7,4966,north_berkeley,4966 -1,1,467,good,10,1839,north_berkeley,1839 -3,2,987,good,12,4689,north_berkeley,4689 -1,1,617,good,0,2067,north_berkeley,2067 -0,1,171,poor,28,1260,zaytuna_college,1239.84 -0,1,440,good,3,1854,north_berkeley,1854 -1,1,326,good,3,1809,north_berkeley,1809 -3,2,1157,good,10,4922,north_berkeley,4922 -0,1,249,good,5,1561,north_berkeley,1561 -0,1,288,good,0,1696,north_berkeley,1696 -1,1,455,poor,20,1799,zaytuna_college,1799 -2,1,682,poor,46,2781,zaytuna_college,2636.388 -3,2,1143,poor,63,4406,zaytuna_college,4027.084 -2,1,684,great,6,3740,east_elmwood,3740 -1,1,433,good,8,1840,north_berkeley,1840 -3,2,874,good,7,4698,north_berkeley,4698 -1,1,488,great,10,2379,east_elmwood,2379 -0,1,371,great,7,2243,east_elmwood,2243 -1,1,572,good,0,2057,north_berkeley,2057 -0,1,196,great,1,2045,east_elmwood,2045 -3,2,1229,poor,19,4921,zaytuna_college,4921 -3,2,868,great,9,5112,east_elmwood,5112 -0,1,548,good,1,1923,north_berkeley,1923 -1,1,739,poor,48,1739,zaytuna_college,1641.616 -1,1,429,good,1,1896,north_berkeley,1896 -1,1,559,great,5,2480,east_elmwood,2480 -1,1,571,good,0,2038,north_berkeley,2038 -1,1,684,good,2,2213,north_berkeley,2213 -0,1,469,great,5,2350,east_elmwood,2350 -0,1,424,good,1,1783,north_berkeley,1783 -2,1,550,good,1,3157,north_berkeley,3157 -1,1,384,poor,49,1354,zaytuna_college,1275.468 -3,2,1165,poor,60,4463,zaytuna_college,4105.96 -3,2,1078,poor,24,4709,zaytuna_college,4671.328 -2,1,788,good,8,3319,north_berkeley,3319 -2,1,577,poor,58,2595,zaytuna_college,2397.78 -0,1,185,poor,23,1377,zaytuna_college,1368.738 -2,1,586,good,5,3154,north_berkeley,3154 -3,2,1105,good,8,4847,north_berkeley,4847 -3,2,933,good,2,4792,north_berkeley,4792 -1,1,315,poor,17,1672,zaytuna_college,1672 -1,1,466,good,9,1887,north_berkeley,1887 -1,1,637,good,8,2095,north_berkeley,2095 -2,1,536,good,14,3036,north_berkeley,3036 -0,1,235,great,4,2047,east_elmwood,2047 -3,2,813,good,10,4533,north_berkeley,4533 -1,1,748,poor,39,1809,zaytuna_college,1740.258 -2,1,926,poor,61,2885,zaytuna_college,2648.43 -0,1,348,good,8,1638,north_berkeley,1638 -2,1,777,good,8,3342,north_berkeley,3342 -0,1,245,poor,48,1124,zaytuna_college,1061.056 -1,1,319,great,6,2295,east_elmwood,2295 -3,2,1150,great,11,5416,east_elmwood,5416 -3,2,1198,good,6,4981,north_berkeley,4981 -0,1,155,good,4,1510,north_berkeley,1510 -1,1,562,good,5,2015,north_berkeley,2015 -1,1,608,poor,63,1457,zaytuna_college,1331.698 -3,2,915,great,3,5275,east_elmwood,5275 -2,1,719,good,11,3218,north_berkeley,3218 -0,1,357,poor,30,1420,zaytuna_college,1391.6 -2,1,759,great,2,3791,east_elmwood,3791 -1,1,659,poor,37,1764,zaytuna_college,1704.024 -2,1,760,good,2,3381,north_berkeley,3381 -2,1,875,great,3,3949,east_elmwood,3949 -2,1,637,great,8,3698,east_elmwood,3698 -3,2,1245,poor,51,4628,zaytuna_college,4341.064 -0,1,542,good,3,1951,north_berkeley,1951 -3,2,1165,great,10,5370,east_elmwood,5370 -2,1,583,great,0,3709,east_elmwood,3709 -1,1,659,great,8,2552,east_elmwood,2552 -1,1,564,great,2,2586,east_elmwood,2586 -1,1,300,poor,27,1557,zaytuna_college,1535.202 -2,1,688,great,14,3697,east_elmwood,3697 -2,1,935,good,12,3444,north_berkeley,3444 -1,1,721,great,10,2589,east_elmwood,2589 -3,2,1164,good,5,4924,north_berkeley,4924 -0,1,455,good,9,1812,north_berkeley,1812 -3,2,935,poor,61,4224,zaytuna_college,3877.632 -1,1,584,poor,41,1644,zaytuna_college,1574.952 -0,1,501,great,3,2366,east_elmwood,2366 -1,1,679,poor,29,1930,zaytuna_college,1895.26 -0,1,439,poor,54,1290,zaytuna_college,1202.28 -1,1,705,poor,56,1641,zaytuna_college,1522.848 -1,1,597,good,8,2061,north_berkeley,2061 -3,2,1127,great,12,5402,east_elmwood,5402 -0,1,493,good,6,1830,north_berkeley,1830 -2,1,619,good,3,3160,north_berkeley,3160 -3,2,1102,poor,50,4418,zaytuna_college,4152.92 -1,1,554,poor,37,1680,zaytuna_college,1622.88 -0,1,359,poor,15,1627,zaytuna_college,1627 -3,2,982,great,9,5243,east_elmwood,5243 -3,2,1084,poor,18,4793,zaytuna_college,4793 -0,1,257,good,13,1518,north_berkeley,1518 -3,2,1111,poor,64,4317,zaytuna_college,3937.104 -1,1,432,great,8,2320,east_elmwood,2320 -3,2,1063,great,10,5292,east_elmwood,5292 -0,1,546,great,12,2286,east_elmwood,2286 -2,1,752,poor,26,3082,zaytuna_college,3045.016 -0,1,478,good,6,1814,north_berkeley,1814 -2,1,739,great,3,3775,east_elmwood,3775 -1,1,506,good,6,1920,north_berkeley,1920 -3,2,1237,good,12,4938,north_berkeley,4938 -0,1,427,poor,55,1293,zaytuna_college,1202.49 -2,1,773,poor,32,3088,zaytuna_college,3013.888 -1,1,386,good,5,1861,north_berkeley,1861 -3,2,1232,good,11,5015,north_berkeley,5015 -0,1,468,poor,38,1449,zaytuna_college,1396.836 -0,1,235,good,3,1597,north_berkeley,1597 -0,1,135,good,7,1417,north_berkeley,1417 -1,1,537,poor,43,1595,zaytuna_college,1521.63 -2,1,663,good,8,3146,north_berkeley,3146 -1,1,662,great,9,2538,east_elmwood,2538 -3,2,1175,poor,33,4677,zaytuna_college,4555.398 -1,1,452,good,0,1962,north_berkeley,1962 -3,2,1224,good,6,5040,north_berkeley,5040 -2,1,651,great,4,3721,east_elmwood,3721 -1,1,304,great,2,2288,east_elmwood,2288 -3,2,1211,great,14,5418,east_elmwood,5418 -3,2,1123,poor,20,4727,zaytuna_college,4727 -1,1,654,poor,49,1630,zaytuna_college,1535.46 -1,1,630,good,6,2079,north_berkeley,2079 -1,1,631,poor,28,1894,zaytuna_college,1863.696 -0,1,435,good,14,1681,north_berkeley,1681 -0,1,400,good,1,1747,north_berkeley,1747 -3,2,1162,good,14,4824,north_berkeley,4824 -2,1,734,poor,37,2958,zaytuna_college,2857.428 -0,1,444,good,12,1722,north_berkeley,1722 -1,1,470,good,3,1912,north_berkeley,1912 -0,1,248,good,3,1594,north_berkeley,1594 -2,1,724,great,7,3770,east_elmwood,3770 -3,2,1136,good,3,4930,north_berkeley,4930 -3,2,891,poor,39,4333,zaytuna_college,4168.346 -2,1,629,great,12,3623,east_elmwood,3623 -0,1,484,great,6,2346,east_elmwood,2346 -0,1,203,good,8,1554,north_berkeley,1554 -3,2,1031,great,0,5336,east_elmwood,5336 -1,1,306,good,11,1727,north_berkeley,1727 -0,1,170,good,11,1455,north_berkeley,1455 -2,1,749,great,8,3799,east_elmwood,3799 -2,1,781,good,14,3197,north_berkeley,3197 -1,1,550,poor,17,1913,zaytuna_college,1913 -1,1,435,good,8,1826,north_berkeley,1826 -3,2,957,good,4,4751,north_berkeley,4751 -1,1,604,good,0,2082,north_berkeley,2082 -1,1,548,good,1,2014,north_berkeley,2014 -0,1,153,poor,17,1424,zaytuna_college,1424 -2,1,635,great,1,3696,east_elmwood,3696 -0,1,113,good,4,1461,north_berkeley,1461 -0,1,296,great,13,2095,east_elmwood,2095 -2,1,743,great,9,3758,east_elmwood,3758 -0,1,453,good,11,1735,north_berkeley,1735 -0,1,113,poor,31,1182,zaytuna_college,1155.996 -1,1,663,poor,37,1771,zaytuna_college,1710.786 -1,1,633,good,0,2179,north_berkeley,2179 -2,1,835,poor,33,3092,zaytuna_college,3011.608 -0,1,349,good,4,1687,north_berkeley,1687 -2,1,698,poor,41,2897,zaytuna_college,2775.326 -2,1,504,good,2,3098,north_berkeley,3098 -0,1,121,poor,54,999,frontage_rd,931.068 -1,1,684,good,1,2192,north_berkeley,2192 -2,1,901,good,7,3473,north_berkeley,3473 -0,1,478,great,1,2414,east_elmwood,2414 -2,1,898,poor,41,3082,frontage_rd,2952.556 -1,1,482,good,14,1804,north_berkeley,1804 -0,1,239,poor,49,1168,frontage_rd,1100.256 -1,1,572,great,13,2403,east_elmwood,2403 -2,1,815,good,0,3378,north_berkeley,3378 -1,1,565,poor,61,1470,frontage_rd,1349.46 -3,2,1004,good,11,4706,north_berkeley,4706 -0,1,393,poor,49,1289,frontage_rd,1214.238 -0,1,117,great,3,1939,east_elmwood,1939 -0,1,448,poor,47,1337,frontage_rd,1264.802 -0,1,104,poor,20,1322,frontage_rd,1322 -1,1,391,good,10,1761,north_berkeley,1761 -0,1,542,good,5,1889,north_berkeley,1889 -0,1,365,good,9,1716,north_berkeley,1716 -0,1,510,poor,19,1730,frontage_rd,1730 -1,1,354,good,7,1811,north_berkeley,1811 -1,1,634,good,11,1991,north_berkeley,1991 -1,1,526,good,5,1933,north_berkeley,1933 -0,1,167,good,13,1447,north_berkeley,1447 -3,2,1018,poor,37,4453,frontage_rd,4301.598 -2,1,783,poor,54,2804,frontage_rd,2613.328 -1,1,416,good,3,1862,north_berkeley,1862 -3,2,916,poor,56,4246,frontage_rd,3940.288 -3,2,1223,good,1,5092,north_berkeley,5092 -1,1,359,poor,29,1577,frontage_rd,1548.614 -2,1,917,poor,29,3270,frontage_rd,3211.14 -1,1,338,great,9,2266,east_elmwood,2266 -3,2,926,poor,59,4218,frontage_rd,3888.996 -3,2,1116,great,14,5361,east_elmwood,5361 -3,2,1160,great,5,5470,east_elmwood,5470 -3,2,813,good,11,4568,north_berkeley,4568 -3,2,854,great,14,5103,east_elmwood,5103 -3,2,1068,great,11,5347,east_elmwood,5347 -2,1,758,great,4,3824,east_elmwood,3824 -2,1,741,good,11,3184,north_berkeley,3184 -2,1,761,poor,31,3055,frontage_rd,2987.79 -3,2,811,good,12,4580,north_berkeley,4580 -3,2,1005,great,9,5305,east_elmwood,5305 -0,1,190,good,6,1550,north_berkeley,1550 -3,2,953,poor,48,4293,frontage_rd,4052.592 -3,2,1206,good,1,5011,north_berkeley,5011 -2,1,637,great,7,3692,east_elmwood,3692 -3,2,980,good,4,4791,north_berkeley,4791 -1,1,318,good,11,1671,north_berkeley,1671 -3,2,1227,poor,32,4784,frontage_rd,4669.184 -0,1,398,poor,60,1166,frontage_rd,1072.72 -1,1,543,great,8,2431,east_elmwood,2431 -3,2,1207,poor,28,4746,frontage_rd,4670.064 -0,1,509,great,6,2333,east_elmwood,2333 -2,1,685,poor,47,2844,frontage_rd,2690.424 -3,2,823,good,6,4589,north_berkeley,4589 -2,1,846,poor,48,2929,frontage_rd,2764.976 -2,1,846,good,3,3437,north_berkeley,3437 -0,1,397,poor,34,1462,frontage_rd,1421.064 -0,1,261,poor,55,1126,frontage_rd,1047.18 -1,1,733,good,13,2056,north_berkeley,2056 -0,1,265,poor,35,1308,frontage_rd,1268.76 -1,1,326,great,6,2314,east_elmwood,2314 -3,2,1020,great,8,5302,east_elmwood,5302 -3,2,1027,poor,44,4451,frontage_rd,4237.352 -2,1,551,poor,63,2502,frontage_rd,2286.828 -3,2,807,poor,55,4133,frontage_rd,3843.69 -3,2,1139,poor,48,4468,frontage_rd,4217.792 -1,1,635,good,9,2017,north_berkeley,2017 -1,1,520,good,13,1851,north_berkeley,1851 -2,1,765,great,14,3693,east_elmwood,3693 -0,1,505,good,6,1804,north_berkeley,1804 -3,2,837,poor,27,4428,frontage_rd,4366.008 -3,2,921,good,5,4744,north_berkeley,4744 -2,1,924,great,12,3902,east_elmwood,3902 -3,2,976,poor,38,4406,frontage_rd,4247.384 -2,1,918,good,0,3524,north_berkeley,3524 -0,1,332,good,0,1714,north_berkeley,1714 -1,1,463,good,7,1885,north_berkeley,1885 -3,2,822,poor,44,4201,frontage_rd,3999.352 -1,1,626,poor,55,1588,frontage_rd,1476.84 -0,1,424,poor,60,1235,frontage_rd,1136.2 -1,1,383,poor,49,1436,frontage_rd,1352.712 -1,1,496,good,4,1917,north_berkeley,1917 -3,2,1166,poor,52,4524,frontage_rd,4234.464 -3,2,1211,good,14,4970,north_berkeley,4970 -3,2,896,poor,25,4498,frontage_rd,4453.02 -0,1,377,good,8,1698,north_berkeley,1698 -1,1,381,good,10,1784,north_berkeley,1784 -1,1,429,poor,53,1361,frontage_rd,1271.174 -2,1,750,good,9,3217,north_berkeley,3217 -1,1,718,poor,45,1806,frontage_rd,1715.7 -1,1,527,poor,47,1575,frontage_rd,1489.95 -2,1,652,good,4,3196,north_berkeley,3196 -3,2,1067,poor,24,4688,frontage_rd,4650.496 -3,2,1075,good,13,4841,north_berkeley,4841 -3,2,1154,great,1,5459,east_elmwood,5459 -1,1,649,great,1,2592,east_elmwood,2592 -0,1,195,good,9,1500,north_berkeley,1500 -2,1,774,poor,59,2778,frontage_rd,2561.316 -2,1,803,great,6,3822,east_elmwood,3822 -2,1,529,great,3,3633,east_elmwood,3633 -2,1,584,good,12,3042,north_berkeley,3042 -0,1,102,good,6,1472,north_berkeley,1472 -3,2,899,good,2,4696,north_berkeley,4696 -3,2,1237,good,14,4972,north_berkeley,4972 -0,1,228,poor,62,986,frontage_rd,903.176 -2,1,604,poor,40,2835,frontage_rd,2721.6 -0,1,518,poor,35,1607,frontage_rd,1558.79 -2,1,571,good,4,3100,north_berkeley,3100 -0,1,351,good,11,1657,north_berkeley,1657 -0,1,111,poor,29,1203,frontage_rd,1181.346 -1,1,345,poor,59,1273,frontage_rd,1173.706 -1,1,632,good,3,2082,north_berkeley,2082 -2,1,711,good,12,3151,north_berkeley,3151 -1,1,466,good,12,1848,north_berkeley,1848 -1,1,727,good,0,2190,north_berkeley,2190 -1,1,404,poor,63,1235,frontage_rd,1128.79 -1,1,374,poor,34,1498,frontage_rd,1456.056 -3,2,1108,great,13,5284,east_elmwood,5284 -3,2,1236,good,1,5119,north_berkeley,5119 -2,1,870,good,4,3433,north_berkeley,3433 -0,1,144,good,6,1523,north_berkeley,1523 -3,2,1221,good,11,4990,north_berkeley,4990 -3,2,985,good,10,4763,north_berkeley,4763 -1,1,318,great,7,2288,east_elmwood,2288 -2,1,538,good,12,3067,north_berkeley,3067 -1,1,487,great,13,2365,east_elmwood,2365 -2,1,836,good,13,3303,north_berkeley,3303 -3,2,1156,good,3,5025,north_berkeley,5025 -1,1,516,great,12,2439,east_elmwood,2439 -1,1,580,poor,53,1585,frontage_rd,1480.39 -1,1,713,good,9,2124,north_berkeley,2124 -2,1,724,good,2,3305,north_berkeley,3305 -3,2,1226,good,3,5005,north_berkeley,5005 -1,1,674,good,8,2128,north_berkeley,2128 -0,1,532,good,1,1932,north_berkeley,1932 -3,2,1099,good,12,4789,north_berkeley,4789 -3,2,807,good,7,4608,north_berkeley,4608 -3,2,959,great,6,5244,east_elmwood,5244 -0,1,260,good,5,1591,north_berkeley,1591 -0,1,203,poor,38,1250,frontage_rd,1205 -3,2,1112,poor,29,4693,frontage_rd,4608.526 -2,1,655,good,11,3144,north_berkeley,3144 -0,1,355,poor,61,1130,frontage_rd,1037.34 -3,2,1146,good,5,4995,north_berkeley,4995 -3,2,1098,great,8,5396,east_elmwood,5396 -0,1,117,good,5,1465,north_berkeley,1465 -0,1,474,great,9,2297,east_elmwood,2297 -0,1,463,great,10,2272,east_elmwood,2272 -2,1,770,good,9,3277,north_berkeley,3277 -1,1,642,great,11,2502,east_elmwood,2502 -3,2,1044,poor,26,4599,frontage_rd,4543.812 -0,1,425,poor,45,1335,frontage_rd,1268.25 -2,1,681,poor,38,2928,frontage_rd,2822.592 -2,1,509,good,6,3054,north_berkeley,3054 -1,1,568,great,3,2575,east_elmwood,2575 -1,1,749,poor,21,1990,frontage_rd,1986.02 -2,1,651,great,2,3701,east_elmwood,3701 -0,1,448,good,14,1696,north_berkeley,1696 -0,1,492,poor,20,1696,frontage_rd,1696 -0,1,516,poor,59,1288,frontage_rd,1187.536 -1,1,625,good,0,2162,north_berkeley,2162 -2,1,874,good,13,3320,north_berkeley,3320 -1,1,738,good,2,2253,north_berkeley,2253 -0,1,148,good,4,1537,north_berkeley,1537 -3,2,819,good,3,4646,north_berkeley,4646 -0,1,101,good,5,1483,north_berkeley,1483 -0,1,131,poor,22,1308,frontage_rd,1302.768 -0,1,158,poor,57,1019,frontage_rd,943.594 -1,1,725,poor,44,1744,frontage_rd,1660.288 -2,1,616,great,2,3678,east_elmwood,3678 -0,1,341,great,7,2195,east_elmwood,2195 -1,1,668,good,0,2190,north_berkeley,2190 -0,1,491,good,2,1850,north_berkeley,1850 -3,2,895,good,12,4638,north_berkeley,4638 -2,1,787,good,1,3407,north_berkeley,3407 -2,1,873,poor,15,3317,frontage_rd,3317 -1,1,525,poor,47,1569,frontage_rd,1484.274 -2,1,941,poor,56,2987,frontage_rd,2771.936 -1,1,537,good,9,1905,north_berkeley,1905 -3,2,1070,great,5,5367,east_elmwood,5367 -0,1,440,good,12,1751,north_berkeley,1751 -0,1,255,good,13,1513,north_berkeley,1513 -0,1,464,poor,45,1424,frontage_rd,1352.8 -3,2,974,good,6,4750,north_berkeley,4750 -1,1,322,poor,40,1452,frontage_rd,1393.92 -1,1,371,poor,47,1416,frontage_rd,1339.536 -1,1,487,poor,34,1685,frontage_rd,1637.82 -3,2,985,great,14,5221,east_elmwood,5221 -0,1,460,good,3,1853,north_berkeley,1853 -0,1,384,good,13,1667,north_berkeley,1667 -2,1,755,good,12,3205,north_berkeley,3205 -2,1,765,good,4,3362,north_berkeley,3362 -2,1,580,great,2,3692,east_elmwood,3692 -1,1,472,great,3,2479,east_elmwood,2479 -0,1,193,poor,33,1277,frontage_rd,1243.798 -1,1,312,poor,31,1509,frontage_rd,1475.802 -0,1,488,great,8,2304,east_elmwood,2304 -0,1,214,poor,33,1268,frontage_rd,1235.032 -3,2,939,good,7,4750,north_berkeley,4750 -0,1,502,poor,17,1743,frontage_rd,1743 -0,1,540,good,10,1835,north_berkeley,1835 -0,1,139,good,10,1487,north_berkeley,1487 -0,1,447,poor,29,1520,frontage_rd,1492.64 -2,1,776,great,4,3828,east_elmwood,3828 -1,1,695,great,6,2604,east_elmwood,2604 -0,1,449,good,7,1820,north_berkeley,1820 -3,2,984,great,6,5312,east_elmwood,5312 -0,1,255,good,9,1542,north_berkeley,1542 -3,2,811,poor,20,4428,frontage_rd,4428 -0,1,371,good,12,1643,north_berkeley,1643 -1,1,444,great,5,2399,east_elmwood,2399 -1,1,410,good,9,1800,north_berkeley,1800 -1,1,315,poor,39,1451,frontage_rd,1395.862 -0,1,368,poor,36,1412,frontage_rd,1366.816 -1,1,738,poor,51,1769,frontage_rd,1659.322 -1,1,372,good,1,1868,north_berkeley,1868 -3,2,899,poor,17,4620,frontage_rd,4620 -2,1,919,good,1,3538,north_berkeley,3538 -3,2,1082,good,7,4878,north_berkeley,4878 -3,2,1114,poor,44,4556,frontage_rd,4337.312 -0,1,163,poor,37,1155,frontage_rd,1115.73 -3,2,1090,poor,59,4367,frontage_rd,4026.374 -3,2,989,good,0,4856,north_berkeley,4856 -0,1,479,great,12,2275,east_elmwood,2275 -2,1,617,good,8,3124,north_berkeley,3124 -3,2,959,poor,61,4195,frontage_rd,3851.01 -1,1,735,good,12,2154,north_berkeley,2154 -3,2,1084,great,14,5272,east_elmwood,5272 -1,1,480,good,4,1956,north_berkeley,1956 -2,1,813,poor,58,2874,frontage_rd,2655.576 -0,1,103,great,1,2023,east_elmwood,2023 -2,1,695,great,9,3701,east_elmwood,3701 -0,1,186,poor,20,1427,frontage_rd,1427 -0,1,270,poor,44,1231,frontage_rd,1171.912 -2,1,800,poor,51,2845,frontage_rd,2668.61 -2,1,502,great,4,3570,east_elmwood,3570 -2,1,680,poor,60,2697,frontage_rd,2481.24 -3,2,1071,poor,58,4345,frontage_rd,4014.78 -1,1,662,good,10,2060,north_berkeley,2060 -0,1,499,good,13,1818,north_berkeley,1818 -1,1,308,poor,33,1446,frontage_rd,1408.404 -0,1,236,good,13,1507,north_berkeley,1507 -2,1,889,good,7,3458,north_berkeley,3458 -2,1,884,good,6,3381,north_berkeley,3381 -0,1,454,good,11,1699,north_berkeley,1699 -3,2,1036,good,4,4820,north_berkeley,4820 -2,1,637,good,9,3105,north_berkeley,3105 -1,1,339,poor,51,1294,frontage_rd,1213.772 -2,1,587,poor,25,2917,frontage_rd,2887.83 -0,1,489,poor,36,1529,frontage_rd,1480.072 -0,1,440,good,2,1843,north_berkeley,1843 -2,1,573,good,2,3143,north_berkeley,3143 -1,1,575,good,9,1949,north_berkeley,1949 -0,1,207,good,13,1481,north_berkeley,1481 -0,1,407,great,0,2320,east_elmwood,2320 -0,1,295,poor,43,1228,frontage_rd,1171.512 -0,1,434,poor,17,1633,frontage_rd,1633 -0,1,184,great,13,1938,east_elmwood,1938 -2,1,895,poor,58,2881,frontage_rd,2662.044 -0,1,323,great,7,2182,east_elmwood,2182 -2,1,747,great,4,3774,east_elmwood,3774 -3,2,1166,poor,39,4612,frontage_rd,4436.744 -1,1,689,poor,29,1917,frontage_rd,1882.494 -2,1,663,great,8,3636,east_elmwood,3636 -1,1,412,poor,50,1425,frontage_rd,1339.5 -3,2,1131,good,14,4875,north_berkeley,4875 -1,1,300,good,9,1736,north_berkeley,1736 -2,1,786,good,12,3262,north_berkeley,3262 -0,1,402,poor,48,1350,frontage_rd,1274.4 -2,1,535,great,2,3639,east_elmwood,3639 -1,1,360,good,9,1740,north_berkeley,1740 -0,1,276,great,9,2065,east_elmwood,2065 -2,1,647,poor,43,2797,frontage_rd,2668.338 -1,1,611,good,5,2075,north_berkeley,2075 -3,2,956,good,6,4777,north_berkeley,4777 -1,1,426,good,14,1788,north_berkeley,1788 -0,1,155,great,12,1945,east_elmwood,1945 -2,1,643,poor,40,2871,frontage_rd,2756.16 -1,1,593,good,10,1944,north_berkeley,1944 -3,2,1093,poor,39,4509,frontage_rd,4337.658 -1,1,487,good,12,1854,north_berkeley,1854 -1,1,349,good,10,1768,north_berkeley,1768 -0,1,171,poor,61,983,frontage_rd,902.394 -3,2,1024,good,1,4904,north_berkeley,4904 -0,1,116,good,10,1374,north_berkeley,1374 -2,1,561,poor,49,2708,frontage_rd,2550.936 -1,1,741,great,10,2637,east_elmwood,2637 -2,1,901,great,5,3952,east_elmwood,3952 -1,1,318,poor,57,1230,frontage_rd,1138.98 -1,1,688,great,8,2605,east_elmwood,2605 -3,2,828,good,7,4584,north_berkeley,4584 -2,1,938,great,4,3973,east_elmwood,3973 -2,1,669,poor,25,2970,frontage_rd,2940.3 -3,2,840,poor,64,4024,frontage_rd,3669.888 -2,1,638,poor,50,2712,frontage_rd,2549.28 -2,1,759,great,14,3712,east_elmwood,3712 -2,1,934,great,0,4058,east_elmwood,4058 -3,2,991,great,10,5287,east_elmwood,5287 -1,1,411,great,8,2316,east_elmwood,2316 -2,1,883,poor,35,3173,frontage_rd,3077.81 -3,2,1152,good,3,4986,north_berkeley,4986 -0,1,230,good,11,1490,north_berkeley,1490 -3,2,934,good,9,4733,north_berkeley,4733 -3,2,941,great,5,5274,east_elmwood,5274 -3,2,933,great,9,5156,east_elmwood,5156 -3,2,839,poor,55,4172,frontage_rd,3879.96 -0,1,216,great,1,2092,east_elmwood,2092 -3,2,1235,good,3,5088,north_berkeley,5088 -0,1,132,good,11,1445,north_berkeley,1445 -2,1,832,poor,34,3073,frontage_rd,2986.956 -2,1,900,great,13,3881,east_elmwood,3881 -3,2,897,good,8,4658,north_berkeley,4658 -1,1,698,great,11,2563,east_elmwood,2563 -1,1,368,good,1,1865,north_berkeley,1865 -2,1,664,good,8,3159,north_berkeley,3159 -3,2,1162,good,14,4883,north_berkeley,4883 -2,1,743,good,8,3300,north_berkeley,3300 -1,1,427,poor,64,1282,frontage_rd,1169.184 -1,1,632,good,1,2106,north_berkeley,2106 -2,1,698,poor,16,3173,frontage_rd,3173 -0,1,275,poor,18,1456,frontage_rd,1456 -3,2,1128,good,7,4889,north_berkeley,4889 -1,1,352,great,0,2346,east_elmwood,2346 -3,2,1174,good,3,4956,north_berkeley,4956 -1,1,440,great,5,2420,east_elmwood,2420 -1,1,460,good,4,1949,north_berkeley,1949 -2,1,870,good,7,3435,north_berkeley,3435 -3,2,1240,great,14,5497,east_elmwood,5497 -0,1,326,poor,45,1259,frontage_rd,1196.05 -0,1,311,good,12,1542,north_berkeley,1542 -2,1,656,poor,24,2990,frontage_rd,2966.08 -1,1,560,good,14,1899,north_berkeley,1899 -0,1,352,poor,59,1211,frontage_rd,1116.542 -2,1,714,good,1,3282,north_berkeley,3282 -3,2,834,poor,32,4396,frontage_rd,4290.496 -0,1,348,good,0,1730,north_berkeley,1730 -0,1,388,good,3,1746,north_berkeley,1746 -0,1,328,great,13,2126,east_elmwood,2126 -2,1,718,good,6,3230,north_berkeley,3230 -0,1,435,poor,64,1177,frontage_rd,1073.424 -3,2,1182,great,6,5483,east_elmwood,5483 -1,1,632,great,11,2503,east_elmwood,2503 -3,2,902,great,2,5186,east_elmwood,5186 -1,1,612,good,6,2034,north_berkeley,2034 -0,1,217,good,11,1516,north_berkeley,1516 -0,1,126,good,11,1428,north_berkeley,1428 -1,1,586,good,7,2018,north_berkeley,2018 -3,2,831,good,1,4659,north_berkeley,4659 -1,1,380,good,12,1758,north_berkeley,1758 -3,2,1224,good,6,4976,north_berkeley,4976 -3,2,1009,poor,60,4283,frontage_rd,3940.36 -2,1,872,great,12,3868,east_elmwood,3868 -0,1,116,great,9,1964,east_elmwood,1964 -3,2,1060,good,8,4862,north_berkeley,4862 -1,1,442,poor,62,1292,frontage_rd,1183.472 -0,1,442,good,14,1737,north_berkeley,1737 -0,1,519,poor,43,1531,frontage_rd,1460.574 -0,1,118,good,1,1489,north_berkeley,1489 -2,1,674,good,13,3110,north_berkeley,3110 -3,2,1182,great,14,5364,east_elmwood,5364 -3,2,1056,poor,20,4663,frontage_rd,4663 -1,1,357,poor,55,1301,frontage_rd,1209.93 -1,1,507,great,7,2401,east_elmwood,2401 -2,1,793,good,4,3303,north_berkeley,3303 -0,1,254,good,12,1578,north_berkeley,1578 -0,1,375,good,4,1767,north_berkeley,1767 -3,2,934,poor,15,4637,frontage_rd,4637 -2,1,677,poor,15,3123,frontage_rd,3123 -2,1,582,good,8,3057,north_berkeley,3057 -2,1,918,poor,33,3210,frontage_rd,3126.54 -3,2,994,good,7,4801,north_berkeley,4801 -0,1,144,poor,50,1045,frontage_rd,982.3 -3,2,997,good,11,4749,north_berkeley,4749 -0,1,430,great,8,2248,east_elmwood,2248 -0,1,443,poor,20,1602,frontage_rd,1602 -2,1,785,great,7,3832,east_elmwood,3832 -1,1,426,great,9,2344,east_elmwood,2344 -1,1,312,good,13,1645,north_berkeley,1645 -0,1,116,great,7,1929,east_elmwood,1929 -2,1,949,good,0,3513,north_berkeley,3513 -1,1,501,great,8,2390,east_elmwood,2390 -1,1,564,great,3,2538,east_elmwood,2538 -0,1,333,poor,26,1457,frontage_rd,1439.516 -3,2,879,great,9,5169,east_elmwood,5169 -2,1,817,good,4,3379,north_berkeley,3379 -3,2,1123,poor,49,4513,frontage_rd,4251.246 -1,1,"640,56",good,0,2184,north_berkeley,2184 -1,1,707,good,1,2168,north_berkeley,2168 -1,1,347,poor,63,1174,frontage_rd,1073.036 -0,1,245,poor,63,1060,frontage_rd,968.84 -3,2,1116,good,13,4808,north_berkeley,4808 -3,2,1083,great,9,5304,east_elmwood,5304 -3,2,911,great,0,5285,east_elmwood,5285 -1,1,724,good,12,2097,north_berkeley,2097 -1,1,627,good,8,2084,north_berkeley,2084 -0,1,259,great,10,2095,east_elmwood,2095 -0,1,216,poor,50,1079,frontage_rd,1014.26 -0,1,490,good,12,1732,north_berkeley,1732 -3,2,931,good,14,4617,north_berkeley,4617 -0,1,174,poor,43,1129,frontage_rd,1077.066 -3,2,956,great,10,5204,east_elmwood,5204 -2,1,937,poor,61,2962,frontage_rd,2719.116 -3,2,1220,great,11,5459,east_elmwood,5459 -0,1,240,good,12,1514,north_berkeley,1514 -3,2,964,great,2,5270,east_elmwood,5270 -1,1,589,poor,33,1770,frontage_rd,1723.98 -0,1,217,good,14,1447,north_berkeley,1447 -2,1,520,great,4,3540,east_elmwood,3540 -0,1,220,poor,49,1098,frontage_rd,1034.316 -3,2,1176,poor,19,4849,frontage_rd,4849 -3,2,1008,good,9,4794,north_berkeley,4794 -3,2,1202,good,11,4950,north_berkeley,4950 -3,2,1117,poor,57,4387,frontage_rd,4062.362 -3,2,1140,good,0,4996,north_berkeley,4996 -2,1,734,poor,35,2980,frontage_rd,2890.6 -0,1,419,good,6,1758,north_berkeley,1758 -3,2,1117,good,13,4796,north_berkeley,4796 -1,1,516,great,13,2352,east_elmwood,2352 -2,1,555,poor,51,2632,frontage_rd,2468.816 -1,1,407,good,12,1797,north_berkeley,1797 -0,1,356,good,5,1671,north_berkeley,1671 -2,1,860,good,1,3434,north_berkeley,3434 -1,1,637,great,5,2582,east_elmwood,2582 -3,2,995,poor,39,4491,frontage_rd,4320.342 -1,1,623,poor,38,1766,frontage_rd,1702.424 -3,2,968,good,9,4699,north_berkeley,4699 -3,2,1182,great,8,5404,east_elmwood,5404 -0,1,147,poor,47,1084,frontage_rd,1025.464 -0,1,534,good,0,1963,north_berkeley,1963 -3,2,871,good,11,4648,north_berkeley,4648 -1,1,348,great,3,2363,east_elmwood,2363 -1,1,427,poor,17,1712,frontage_rd,1712 -0,1,140,good,3,1488,north_berkeley,1488 -2,1,898,great,10,3922,east_elmwood,3922 -0,1,532,poor,36,1523,frontage_rd,1474.264 -0,1,432,poor,19,1628,frontage_rd,1628 -3,2,1160,great,10,5360,east_elmwood,5360 -2,1,775,poor,43,2926,frontage_rd,2791.404 -0,1,178,great,14,1898,east_elmwood,1898 -1,1,395,good,1,1858,north_berkeley,1858 -1,1,304,good,0,1777,north_berkeley,1777 -0,1,438,great,12,2258,east_elmwood,2258 -3,2,1037,good,5,4849,north_berkeley,4849 -0,1,246,good,7,1528,north_berkeley,1528 -3,2,948,good,4,4752,north_berkeley,4752 -1,1,505,good,10,1907,north_berkeley,1907 -1,1,562,good,10,1917,north_berkeley,1917 -0,1,248,poor,29,1365,frontage_rd,1340.43 -1,1,737,great,12,2573,east_elmwood,2573 -3,2,1100,good,2,4926,north_berkeley,4926 -3,2,804,good,6,4591,north_berkeley,4591 -0,1,104,great,11,1867,east_elmwood,1867 -1,1,444,good,14,1801,north_berkeley,1801 -0,1,186,poor,30,1324,frontage_rd,1297.52 -0,1,383,great,4,2202,east_elmwood,2202 -2,1,620,good,8,3107,north_berkeley,3107 -3,2,1098,good,14,4856,north_berkeley,4856 -1,1,718,poor,39,1778,frontage_rd,1710.436 -2,1,866,good,3,3418,north_berkeley,3418 -1,1,404,poor,53,1348,frontage_rd,1259.032 -3,2,1022,good,8,4829,north_berkeley,4829 -1,1,508,good,5,1971,north_berkeley,1971 -2,1,854,great,5,3862,east_elmwood,3862 -0,1,542,good,13,1791,north_berkeley,1791 -3,2,1240,great,10,5503,east_elmwood,5503 -0,1,128,good,6,1432,north_berkeley,1432 -0,1,365,good,2,1702,north_berkeley,1702 -0,1,156,poor,62,887,frontage_rd,812.492 -2,1,933,good,4,3449,north_berkeley,3449 -0,1,404,poor,61,1163,frontage_rd,1067.634 -2,1,822,good,8,3309,north_berkeley,3309 -2,1,744,great,13,3692,east_elmwood,3692 -1,1,507,good,12,1846,north_berkeley,1846 -1,1,505,great,11,2349,east_elmwood,2349 -2,1,946,poor,34,3251,frontage_rd,3159.972 -0,1,146,good,14,1387,north_berkeley,1387 -3,2,808,good,8,4612,north_berkeley,4612 -3,2,936,good,13,4692,north_berkeley,4692 -2,1,569,poor,19,2949,frontage_rd,2949 -2,1,508,great,2,3599,east_elmwood,3599 -3,2,1041,good,7,4801,north_berkeley,4801 -3,2,840,poor,50,4227,frontage_rd,3973.38 -3,2,1246,good,12,5011,north_berkeley,5011 -1,1,481,great,6,2418,east_elmwood,2418 -0,1,503,good,7,1849,north_berkeley,1849 -1,1,340,great,3,2307,east_elmwood,2307 -0,1,397,good,9,1703,north_berkeley,1703 -3,2,808,poor,45,4177,frontage_rd,3968.15 -2,1,794,poor,43,2992,frontage_rd,2854.368 -3,2,1209,good,10,4916,north_berkeley,4916 -1,1,358,poor,37,1471,frontage_rd,1420.986 -1,1,552,great,4,2485,east_elmwood,2485 -3,2,904,good,14,4576,north_berkeley,4576 -2,1,539,poor,27,2887,frontage_rd,2846.582 -2,1,761,good,3,3283,north_berkeley,3283 -1,1,639,great,8,2576,east_elmwood,2576 -0,1,191,poor,23,1375,frontage_rd,1366.75 -1,1,676,great,5,2620,east_elmwood,2620 -2,1,735,poor,31,2999,frontage_rd,2933.022 -3,2,941,good,9,4745,north_berkeley,4745 -3,2,1212,poor,21,4849,frontage_rd,4839.302 -2,1,556,poor,52,2627,frontage_rd,2458.872 -1,1,574,good,8,2008,north_berkeley,2008 -0,1,209,great,2,2042,east_elmwood,2042 -2,1,753,poor,16,3203,frontage_rd,3203 -0,1,276,poor,62,1069,frontage_rd,979.204 -3,2,1234,good,10,5025,north_berkeley,5025 -0,1,505,poor,24,1683,frontage_rd,1669.536 -1,1,661,great,7,2547,east_elmwood,2547 -3,2,885,great,8,5184,east_elmwood,5184 -2,1,865,great,9,3849,east_elmwood,3849 -0,1,150,great,14,1916,east_elmwood,1916 -3,2,838,poor,56,4139,frontage_rd,3840.992 -0,1,288,poor,22,1517,frontage_rd,1510.932 -2,1,868,good,7,3436,north_berkeley,3436 -3,2,1178,good,5,4932,north_berkeley,4932 -3,2,915,great,7,5205,east_elmwood,5205 -1,1,355,poor,64,1263,frontage_rd,1151.856 -0,1,478,good,10,1775,north_berkeley,1775 -0,1,352,great,11,2174,east_elmwood,2174 -0,1,281,good,10,1567,north_berkeley,1567 -0,1,178,poor,22,1322,frontage_rd,1316.712 -3,2,1124,poor,34,4603,frontage_rd,4474.116 -2,1,858,good,12,3307,north_berkeley,3307 -0,1,143,poor,21,1310,frontage_rd,1307.38 -3,2,1042,good,3,4836,north_berkeley,4836 -1,1,383,good,10,1740,north_berkeley,1740 -1,1,324,poor,50,1324,frontage_rd,1244.56 -2,1,508,great,2,3559,east_elmwood,3559 -1,1,398,good,6,1868,north_berkeley,1868 -1,1,408,good,6,1810,north_berkeley,1810 -2,1,813,poor,34,3057,frontage_rd,2971.404 -0,1,380,good,11,1627,north_berkeley,1627 -2,1,747,poor,16,3219,frontage_rd,3219 -2,1,791,poor,24,3175,frontage_rd,3149.6 -0,1,185,good,0,1578,north_berkeley,1578 -3,2,963,good,9,4736,north_berkeley,4736 -1,1,327,poor,49,1379,frontage_rd,1299.018 -0,1,125,great,10,1930,east_elmwood,1930 -3,2,1140,great,2,5507,east_elmwood,5507 -2,1,654,good,2,3270,north_berkeley,3270 -0,1,405,good,3,1754,north_berkeley,1754 -3,2,1206,good,7,4996,north_berkeley,4996 -0,1,261,good,7,1639,north_berkeley,1639 -2,1,772,poor,16,3224,frontage_rd,3224 -0,1,379,good,11,1649,north_berkeley,1649 -2,1,732,great,6,3730,east_elmwood,3730 -3,2,1187,good,9,4964,north_berkeley,4964 -2,1,712,great,14,3707,east_elmwood,3707 -1,1,662,poor,31,1849,frontage_rd,1808.322 -0,1,339,poor,30,1452,frontage_rd,1422.96 -2,1,880,poor,49,2957,frontage_rd,2785.494 -1,1,359,poor,44,1392,frontage_rd,1325.184 -0,1,163,poor,52,1037,frontage_rd,970.632 -0,1,365,poor,22,1525,frontage_rd,1518.9 -3,2,1091,good,13,4804,north_berkeley,4804 -1,1,697,poor,62,1599,frontage_rd,1464.684 -3,2,1092,poor,45,4470,frontage_rd,4246.5 -1,1,512,poor,54,1493,frontage_rd,1391.476 -2,1,710,poor,40,2891,frontage_rd,2775.36 -2,1,655,poor,62,2629,frontage_rd,2408.164 -3,2,807,good,12,4572,north_berkeley,4572 -0,1,147,good,4,1483,north_berkeley,1483 -2,1,797,good,9,3296,north_berkeley,3296 -0,1,442,good,10,1717,north_berkeley,1717 -0,1,325,great,11,2079,east_elmwood,2079 -2,1,704,great,6,3707,east_elmwood,3707 -0,1,495,great,13,2226,east_elmwood,2226 -3,2,957,good,11,4708,north_berkeley,4708 -2,1,"944,98",good,0,3581,north_berkeley,3581 -1,1,698,good,10,2132,north_berkeley,2132 -3,2,897,good,3,4677,north_berkeley,4677 -1,1,643,poor,23,1942,frontage_rd,1930.348 -2,1,757,good,9,3271,north_berkeley,3271 -3,2,1189,poor,26,4740,frontage_rd,4683.12 -3,2,932,good,9,4738,north_berkeley,4738 -3,2,948,good,9,4670,north_berkeley,4670 -1,1,488,poor,61,1377,frontage_rd,1264.086 -3,2,1185,poor,41,4578,frontage_rd,4385.724 -3,2,824,good,7,4646,north_berkeley,4646 -1,1,472,good,12,1896,north_berkeley,1896 -0,1,479,good,10,1770,north_berkeley,1770 -0,1,490,poor,17,1733,frontage_rd,1733 -1,1,457,great,6,2441,east_elmwood,2441 -1,1,520,good,7,1926,north_berkeley,1926 -1,1,322,great,5,2296,east_elmwood,2296 -0,1,482,poor,54,1295,frontage_rd,1206.94 -2,1,740,poor,40,2931,frontage_rd,2813.76 -2,1,760,good,0,3367,north_berkeley,3367 -2,1,758,poor,24,3120,frontage_rd,3095.04 -2,1,919,great,14,3917,east_elmwood,3917 -3,2,1011,good,4,4853,north_berkeley,4853 -1,1,665,great,6,2638,east_elmwood,2638 -3,2,1194,great,10,5429,east_elmwood,5429 -1,1,714,great,14,2526,east_elmwood,2526 -2,1,922,poor,29,3251,frontage_rd,3192.482 -1,1,649,great,9,2514,east_elmwood,2514 -1,1,380,poor,25,1651,frontage_rd,1634.49 -2,1,875,good,11,3322,north_berkeley,3322 -3,2,883,good,6,4685,north_berkeley,4685 -2,1,531,great,4,3634,east_elmwood,3634 -2,1,712,good,1,3282,north_berkeley,3282 -2,1,841,good,8,3369,north_berkeley,3369 -1,1,433,poor,60,1354,frontage_rd,1245.68 -2,1,929,poor,62,2956,frontage_rd,2707.696 -0,1,116,poor,50,998,frontage_rd,938.12 -1,1,325,good,13,1651,north_berkeley,1651 -3,2,1218,poor,60,4505,frontage_rd,4144.6 -1,1,335,poor,17,1645,frontage_rd,1645 -1,1,653,poor,28,1899,frontage_rd,1868.616 -3,2,1102,great,8,5390,east_elmwood,5390 -2,1,921,great,11,3888,east_elmwood,3888 -3,2,1207,poor,24,4809,frontage_rd,4770.528 -3,2,1054,good,9,4852,north_berkeley,4852 -3,2,1159,great,10,5383,east_elmwood,5383 -0,1,192,poor,41,1135,frontage_rd,1087.33 -0,1,469,good,7,1755,north_berkeley,1755 -2,1,687,good,12,3166,north_berkeley,3166 -3,2,829,good,13,4590,north_berkeley,4590 -0,1,445,good,12,1724,north_berkeley,1724 -0,1,226,poor,46,1202,frontage_rd,1139.496 -1,1,466,poor,30,1672,frontage_rd,1638.56 -3,2,1131,good,0,4950,north_berkeley,4950 -1,1,465,poor,15,1787,frontage_rd,1787 -0,1,454,great,13,2206,east_elmwood,2206 -2,1,545,good,3,3150,north_berkeley,3150 -1,1,696,good,11,2113,north_berkeley,2113 -3,2,1064,poor,27,4634,frontage_rd,4569.124 -0,1,193,poor,62,942,frontage_rd,862.872 -0,1,410,poor,46,1389,frontage_rd,1316.772 -3,2,1174,good,9,4919,north_berkeley,4919 -0,1,382,great,7,2204,east_elmwood,2204 -3,2,1002,good,4,4860,north_berkeley,4860 -0,1,542,great,3,2377,east_elmwood,2377 -1,1,621,great,0,2577,east_elmwood,2577 -3,2,1159,good,3,5002,north_berkeley,5002 -2,1,741,poor,16,3158,frontage_rd,3158 -3,2,1101,great,13,5282,east_elmwood,5282 -3,2,857,great,3,5208,east_elmwood,5208 -1,1,336,great,1,2351,east_elmwood,2351 -1,1,429,poor,38,1516,frontage_rd,1461.424 -3,2,1192,great,12,5421,east_elmwood,5421 -2,1,532,poor,20,2909,frontage_rd,2909 -0,1,399,good,5,1784,north_berkeley,1784 -1,1,652,good,12,2020,north_berkeley,2020 -2,1,879,great,2,3912,east_elmwood,3912 -3,2,833,poor,30,4378,frontage_rd,4290.44 -0,1,119,good,4,1485,north_berkeley,1485 -1,1,670,good,3,2179,north_berkeley,2179 -2,1,837,good,11,3301,north_berkeley,3301 -3,2,1094,poor,36,4542,frontage_rd,4396.656 -3,2,1132,good,6,4961,north_berkeley,4961 -3,2,1089,great,8,5328,east_elmwood,5328 -1,1,483,poor,31,1663,frontage_rd,1626.414 -0,1,456,poor,38,1525,frontage_rd,1470.1 -1,1,550,good,14,1952,north_berkeley,1952 -0,1,144,good,0,1580,north_berkeley,1580 -1,1,543,poor,63,1372,frontage_rd,1254.008 -2,1,733,good,6,3281,north_berkeley,3281 -2,1,773,good,8,3293,north_berkeley,3293 -0,1,508,good,1,1928,north_berkeley,1928 -1,1,685,poor,25,1965,frontage_rd,1945.35 -0,1,402,poor,46,1324,frontage_rd,1255.152 -2,1,571,good,12,3054,north_berkeley,3054 -1,1,336,poor,24,1580,frontage_rd,1567.36 -0,1,406,good,12,1668,north_berkeley,1668 -0,1,499,good,4,1816,north_berkeley,1816 -0,1,313,poor,40,1339,frontage_rd,1285.44 -1,1,681,good,4,2109,north_berkeley,2109 -2,1,721,poor,59,2752,frontage_rd,2537.344 -1,1,305,poor,48,1311,frontage_rd,1237.584 -3,2,923,poor,38,4418,frontage_rd,4258.952 -1,1,379,poor,34,1524,frontage_rd,1481.328 -0,1,421,great,3,2331,east_elmwood,2331 -2,1,594,poor,34,2841,frontage_rd,2761.452 -1,1,658,great,12,2491,east_elmwood,2491 -3,2,978,good,5,4820,north_berkeley,4820 -0,1,247,poor,36,1301,frontage_rd,1259.368 -2,1,812,great,12,3836,east_elmwood,3836 -1,1,326,good,9,1782,north_berkeley,1782 -1,1,661,poor,16,2004,frontage_rd,2004 -0,1,415,good,8,1753,north_berkeley,1753 -0,1,371,poor,63,1101,frontage_rd,1006.314 -3,2,1165,poor,33,4733,frontage_rd,4609.942 -3,2,915,good,1,4740,north_berkeley,4740 -0,1,270,good,8,1598,north_berkeley,1598 -0,1,239,good,7,1574,north_berkeley,1574 -3,2,1183,great,3,5474,east_elmwood,5474 -1,1,332,great,0,2302,east_elmwood,2302 -3,2,853,great,0,5187,east_elmwood,5187 -2,1,852,poor,54,2953,frontage_rd,2752.196 -2,1,764,poor,64,2751,frontage_rd,2508.912 -1,1,338,good,0,1826,north_berkeley,1826 -2,1,905,poor,46,3031,frontage_rd,2873.388 -2,1,711,good,13,3177,north_berkeley,3177 -1,1,580,poor,42,1630,frontage_rd,1558.28 -3,2,1111,poor,62,4325,frontage_rd,3961.7 -2,1,557,good,2,3181,north_berkeley,3181 -3,2,1192,poor,24,4759,frontage_rd,4720.928 -1,1,548,great,6,2463,east_elmwood,2463 -1,1,369,great,11,2278,east_elmwood,2278 -1,1,742,good,8,2120,north_berkeley,2120 -0,1,291,good,12,1546,north_berkeley,1546 -3,2,1096,great,1,5464,east_elmwood,5464 -2,1,936,poor,61,2950,frontage_rd,2708.1 -0,1,181,great,11,1964,east_elmwood,1964 -0,1,490,good,11,1760,north_berkeley,1760 -1,1,516,poor,44,1622,frontage_rd,1544.144 -1,1,344,poor,33,1508,frontage_rd,1468.792 -2,1,821,good,1,3408,north_berkeley,3408 -2,1,702,great,8,3695,east_elmwood,3695 -3,2,882,poor,49,4218,frontage_rd,3973.356 -2,1,945,poor,21,3367,frontage_rd,3360.266 -3,2,999,good,5,4830,north_berkeley,4830 -1,1,395,poor,30,1578,frontage_rd,1546.44 -0,1,527,great,8,2387,east_elmwood,2387 -3,2,1006,poor,48,4345,frontage_rd,4101.68 -1,1,696,good,10,2103,north_berkeley,2103 -0,1,309,good,0,1697,north_berkeley,1697 -3,2,923,good,1,4774,north_berkeley,4774 -3,2,879,good,7,4674,north_berkeley,4674 -0,1,227,good,7,1593,north_berkeley,1593 -0,1,150,poor,49,1053,frontage_rd,991.926 -2,1,912,good,6,3473,north_berkeley,3473 -1,1,353,poor,54,1293,frontage_rd,1205.076 -0,1,235,poor,52,1160,frontage_rd,1085.76 -2,1,584,poor,26,2922,frontage_rd,2886.936 -0,1,505,good,9,1827,north_berkeley,1827 -1,1,489,great,2,2514,east_elmwood,2514 -1,1,694,poor,28,1931,frontage_rd,1900.104 -2,1,638,good,1,3237,north_berkeley,3237 -0,1,285,good,0,1664,north_berkeley,1664 -3,2,1041,good,8,4829,north_berkeley,4829 -1,1,622,great,6,2604,east_elmwood,2604 -0,1,290,poor,20,1527,frontage_rd,1527 -2,1,823,good,3,3385,north_berkeley,3385 -0,1,330,poor,19,1522,frontage_rd,1522 -2,1,830,great,11,3857,east_elmwood,3857 -2,1,774,good,7,3322,north_berkeley,3322 -1,1,628,poor,52,1644,frontage_rd,1538.784 -3,2,1030,great,7,5351,east_elmwood,5351 -0,1,378,great,10,2200,east_elmwood,2200 -0,1,289,poor,44,1250,frontage_rd,1190 -1,1,519,good,4,1993,north_berkeley,1993 -0,1,144,good,11,1468,north_berkeley,1468 -2,1,585,poor,36,2806,frontage_rd,2716.208 -2,1,500,good,5,3001,north_berkeley,3001 -2,1,850,poor,24,3255,frontage_rd,3228.96 -3,2,970,good,13,4645,north_berkeley,4645 -2,1,728,great,9,3688,east_elmwood,3688 -3,2,1031,great,13,5242,east_elmwood,5242 -0,1,526,great,2,2393,east_elmwood,2393 -0,1,505,good,4,1882,north_berkeley,1882 -0,1,205,good,0,1579,north_berkeley,1579 -0,1,508,poor,49,1393,frontage_rd,1312.206 -1,1,471,good,13,1876,north_berkeley,1876 -1,1,359,poor,24,1628,frontage_rd,1614.976 -1,1,739,poor,64,1617,frontage_rd,1474.704 -3,2,1164,good,6,4945,north_berkeley,4945 -1,1,509,great,13,2402,east_elmwood,2402 -2,1,804,good,13,3227,north_berkeley,3227 -0,1,502,good,11,1748,north_berkeley,1748 -3,2,1152,poor,52,4524,frontage_rd,4234.464 -2,1,934,great,5,3947,east_elmwood,3947 -3,2,823,great,4,5149,east_elmwood,5149 -2,1,556,great,4,3646,east_elmwood,3646 -0,1,196,poor,41,1226,frontage_rd,1174.508 -2,1,665,poor,26,3048,frontage_rd,3011.424 -1,1,458,good,11,1826,north_berkeley,1826 -1,1,664,good,10,2024,north_berkeley,2024 -3,2,813,poor,26,4417,frontage_rd,4363.996 -3,2,1105,great,5,5374,east_elmwood,5374 -2,1,732,good,13,3152,north_berkeley,3152 -1,1,364,poor,49,1327,frontage_rd,1250.034 -1,1,498,good,2,1949,north_berkeley,1949 -1,1,731,good,13,2137,north_berkeley,2137 -3,2,1004,good,1,4870,north_berkeley,4870 -0,1,417,great,9,2265,east_elmwood,2265 -2,1,771,good,9,3328,north_berkeley,3328 -2,1,503,poor,32,2763,frontage_rd,2696.688 -3,2,1080,great,0,5459,east_elmwood,5459 -1,1,458,great,3,2384,east_elmwood,2384 -3,2,1240,poor,27,4817,frontage_rd,4749.562 -1,1,703,good,3,2174,north_berkeley,2174 -3,2,1100,good,10,4819,north_berkeley,4819 -3,2,1233,good,1,5037,north_berkeley,5037 -2,1,748,poor,25,3113,frontage_rd,3081.87 -3,2,954,poor,22,4632,frontage_rd,4613.472 -1,1,512,poor,49,1552,frontage_rd,1461.984 -0,1,141,great,9,1996,east_elmwood,1996 -2,1,888,great,1,3970,east_elmwood,3970 -2,1,944,poor,54,3019,frontage_rd,2813.708 -3,2,853,poor,50,4219,frontage_rd,3965.86 -2,1,686,good,12,3201,north_berkeley,3201 -2,1,763,poor,48,2850,frontage_rd,2690.4 -3,2,1107,poor,43,4567,frontage_rd,4356.918 -1,1,615,poor,31,1757,frontage_rd,1718.346 -3,2,1132,good,4,4938,north_berkeley,4938 -3,2,1038,poor,26,4655,frontage_rd,4599.14 -1,1,701,great,11,2570,east_elmwood,2570 -2,1,510,good,6,3053,north_berkeley,3053 -3,2,951,poor,57,4229,frontage_rd,3916.054 -2,1,764,poor,51,2878,frontage_rd,2699.564 -3,2,1148,good,0,5024,north_berkeley,5024 -2,1,827,poor,50,2966,frontage_rd,2788.04 -1,1,664,poor,52,1613,frontage_rd,1509.768 -3,2,1061,good,11,4811,north_berkeley,4811 -0,1,449,good,7,1773,north_berkeley,1773 -0,1,284,good,2,1695,north_berkeley,1695 -3,2,1191,good,7,5020,north_berkeley,5020 -3,2,1039,good,13,4765,north_berkeley,4765 -1,1,365,good,2,1828,north_berkeley,1828 -2,1,781,good,11,3299,north_berkeley,3299 -3,2,889,great,3,5256,east_elmwood,5256 -2,1,827,good,9,3305,north_berkeley,3305 -1,1,414,great,6,2379,east_elmwood,2379 -1,1,454,good,12,1793,north_berkeley,1793 -0,1,201,good,5,1594,north_berkeley,1594 -0,1,320,poor,64,1085,frontage_rd,989.52 -2,1,946,poor,56,3027,frontage_rd,2809.056 -3,2,1076,great,13,5332,east_elmwood,5332 -1,1,643,good,14,2034,north_berkeley,2034 -2,1,791,poor,62,2738,frontage_rd,2508.008 -1,1,580,great,4,2571,east_elmwood,2571 -0,1,104,great,0,2043,east_elmwood,2043 -1,1,584,good,12,1916,north_berkeley,1916 -3,2,1006,good,4,4784,north_berkeley,4784 -1,1,516,good,13,1915,north_berkeley,1915 -0,1,447,poor,15,1730,frontage_rd,1730 -1,1,722,good,8,2169,north_berkeley,2169 -3,2,811,poor,34,4343,frontage_rd,4221.396 -2,1,560,great,12,3517,east_elmwood,3517 -0,1,397,good,13,1694,north_berkeley,1694 -3,2,1061,good,2,4911,north_berkeley,4911 -3,2,888,good,10,4679,north_berkeley,4679 -1,1,358,good,0,1828,north_berkeley,1828 -3,2,982,great,12,5245,east_elmwood,5245 -2,1,864,good,14,3307,north_berkeley,3307 -0,1,188,poor,25,1364,frontage_rd,1350.36 -1,1,523,good,6,1942,north_berkeley,1942 -3,2,865,great,0,5248,east_elmwood,5248 -0,1,353,good,5,1722,north_berkeley,1722 -0,1,407,good,7,1753,north_berkeley,1753 -1,1,306,great,4,2226,east_elmwood,2226 -0,1,388,poor,60,1152,frontage_rd,1059.84 -0,1,110,poor,22,1319,frontage_rd,1313.724 -1,1,553,poor,35,1689,frontage_rd,1638.33 -1,1,339,poor,15,1698,frontage_rd,1698 -3,2,897,poor,17,4543,frontage_rd,4543 -2,1,574,good,9,3036,north_berkeley,3036 -0,1,449,good,10,1731,north_berkeley,1731 -0,1,441,good,5,1799,north_berkeley,1799 -0,1,502,great,4,2330,east_elmwood,2330 -2,1,757,poor,44,2956,frontage_rd,2814.112 -3,2,962,good,3,4830,north_berkeley,4830 -1,1,649,great,4,2612,east_elmwood,2612 -0,1,256,good,11,1577,north_berkeley,1577 -1,1,747,great,12,2654,east_elmwood,2654 -3,2,833,great,10,5104,east_elmwood,5104 -1,1,687,good,12,2060,north_berkeley,2060 -2,1,552,good,8,3065,north_berkeley,3065 -3,2,1241,great,2,5569,east_elmwood,5569 -0,1,459,poor,61,1298,frontage_rd,1191.564 -3,2,873,poor,26,4444,frontage_rd,4390.672 -0,1,152,poor,22,1356,frontage_rd,1350.576 -3,2,922,good,7,4670,north_berkeley,4670 -1,1,720,good,3,2198,north_berkeley,2198 -2,1,931,good,2,3497,north_berkeley,3497 -0,1,524,good,6,1893,north_berkeley,1893 -2,1,555,poor,54,2575,frontage_rd,2399.9 -0,1,159,good,7,1488,north_berkeley,1488 -1,1,380,good,8,1768,north_berkeley,1768 -1,1,685,good,0,2155,north_berkeley,2155 -2,1,848,good,11,3354,north_berkeley,3354 -1,1,547,poor,27,1751,frontage_rd,1726.486 -2,1,896,good,12,3363,north_berkeley,3363 -0,1,269,good,6,1560,north_berkeley,1560 -3,2,903,good,0,4764,north_berkeley,4764 -2,1,615,great,4,3633,east_elmwood,3633 -2,1,881,poor,35,3161,frontage_rd,3066.17 -0,1,331,good,13,1583,north_berkeley,1583 -0,1,498,poor,64,1238,frontage_rd,1129.056 -0,1,308,poor,28,1387,frontage_rd,1364.808 -0,1,103,great,3,1973,east_elmwood,1973 -3,2,1044,poor,52,4405,frontage_rd,4123.08 -2,1,788,poor,33,3010,frontage_rd,2931.74 -0,1,524,poor,33,1621,northwest,1578.854 -1,1,397,poor,25,1696,northwest,1679.04 -2,1,876,poor,27,3215,northwest,3169.99 -2,1,751,poor,64,2677,northwest,2441.424 -2,1,622,poor,32,2886,northwest,2816.736 -0,1,316,good,10,1596,west_welmwood,1596 -3,2,1171,poor,54,4473,northwest,4168.836 -3,2,1108,good,5,4875,west_welmwood,4875 -2,1,541,good,0,3103,west_welmwood,3103 -3,2,865,good,12,4643,west_welmwood,4643 -3,2,1165,great,3,5491,east_elmwood,5491 -2,1,728,good,2,3280,west_welmwood,3280 -0,1,298,great,13,2071,east_elmwood,2071 -3,2,923,good,12,4683,west_welmwood,4683 -0,1,274,great,5,2130,east_elmwood,2130 -0,1,349,poor,32,1444,northwest,1409.344 -2,1,796,great,4,3896,east_elmwood,3896 -1,1,726,poor,28,1898,northwest,1867.632 -2,1,501,great,5,3542,east_elmwood,3542 -2,1,714,good,4,3284,west_welmwood,3284 -3,2,988,great,4,5289,east_elmwood,5289 -2,1,546,poor,34,2757,northwest,2679.804 -0,1,484,poor,20,1678,northwest,1678 -3,2,1230,good,5,5059,west_welmwood,5059 -0,1,142,good,5,1492,west_welmwood,1492 -2,1,774,poor,53,2872,northwest,2682.448 -2,1,902,good,11,3375,west_welmwood,3375 -0,1,457,good,11,1763,west_welmwood,1763 -3,2,1034,good,7,4828,west_welmwood,4828 -3,2,838,poor,25,4412,northwest,4367.88 -1,1,503,good,14,1900,west_welmwood,1900 -0,1,445,good,7,1737,west_welmwood,1737 -2,1,914,poor,34,3189,northwest,3099.708 -2,1,867,great,6,3952,east_elmwood,3952 -2,1,583,good,4,3109,west_welmwood,3109 -0,1,161,great,7,2014,east_elmwood,2014 -1,1,707,good,14,2058,west_welmwood,2058 -1,1,343,great,9,2270,east_elmwood,2270 -2,1,746,great,14,3710,east_elmwood,3710 -2,1,706,good,14,3151,west_welmwood,3151 -1,1,610,good,5,2047,west_welmwood,2047 -3,2,1205,good,12,4969,west_welmwood,4969 -3,2,827,good,7,4655,west_welmwood,4655 -3,2,1055,good,0,4930,west_welmwood,4930 -2,1,625,good,14,3083,west_welmwood,3083 -2,1,630,good,14,3053,west_welmwood,3053 -1,1,654,poor,59,1575,northwest,1452.15 -3,2,1195,poor,17,4831,northwest,4831 -3,2,1227,good,11,4951,west_welmwood,4951 -3,2,1242,good,3,5081,west_welmwood,5081 -2,1,881,good,11,3337,west_welmwood,3337 -1,1,689,poor,35,1880,northwest,1823.6 -2,1,501,poor,35,2704,northwest,2622.88 -1,1,412,poor,21,1751,northwest,1747.498 -3,2,1081,poor,15,4793,northwest,4793 -2,1,725,great,2,3807,east_elmwood,3807 -2,1,768,great,11,3790,east_elmwood,3790 -0,1,152,good,2,1530,west_welmwood,1530 -3,2,1074,good,9,4804,west_welmwood,4804 -1,1,559,good,12,1890,west_welmwood,1890 -2,1,706,great,10,3747,east_elmwood,3747 -2,1,738,poor,23,3095,northwest,3076.43 -2,1,893,great,4,3962,east_elmwood,3962 -1,1,468,great,7,2405,east_elmwood,2405 -2,1,652,poor,22,3017,northwest,3004.932 -1,1,327,good,6,1769,west_welmwood,1769 -2,1,899,great,3,3930,east_elmwood,3930 -3,2,997,good,11,4733,west_welmwood,4733 -3,2,920,poor,20,4602,northwest,4602 -1,1,300,poor,55,1211,northwest,1126.23 -3,2,1174,great,0,5496,east_elmwood,5496 -1,1,679,good,14,2049,west_welmwood,2049 -3,2,887,good,0,4781,west_welmwood,4781 -2,1,856,good,9,3377,west_welmwood,3377 -0,1,216,good,14,1484,west_welmwood,1484 -0,1,476,great,3,2312,east_elmwood,2312 -2,1,592,good,12,3042,west_welmwood,3042 -0,1,325,poor,42,1337,northwest,1278.172 -2,1,790,poor,19,3170,northwest,3170 -1,1,354,good,12,1728,west_welmwood,1728 -1,1,529,good,12,1949,west_welmwood,1949 -1,1,522,great,12,2420,east_elmwood,2420 -0,1,358,good,13,1660,west_welmwood,1660 -0,1,468,poor,47,1447,northwest,1368.862 -0,1,429,great,11,2255,east_elmwood,2255 -3,2,1176,great,0,5535,east_elmwood,5535 -0,1,361,good,12,1656,west_welmwood,1656 -3,2,1214,good,2,5046,west_welmwood,5046 -1,1,738,good,3,2224,west_welmwood,2224 -3,2,852,poor,59,4128,northwest,3806.016 -3,2,1178,good,9,4951,west_welmwood,4951 -3,2,956,good,5,4746,west_welmwood,4746 -1,1,568,good,14,1884,west_welmwood,1884 -1,1,704,great,7,2638,east_elmwood,2638 -1,1,519,poor,64,1386,northwest,1264.032 -1,1,737,good,5,2185,west_welmwood,2185 -1,1,455,poor,47,1510,northwest,1428.46 -0,1,346,poor,61,1118,northwest,1026.324 -0,1,285,good,5,1627,west_welmwood,1627 -3,2,829,good,0,4646,west_welmwood,4646 -2,1,672,good,6,3202,west_welmwood,3202 -1,1,451,good,6,1873,west_welmwood,1873 -1,1,697,good,8,2119,west_welmwood,2119 -2,1,691,poor,34,2913,northwest,2831.436 -1,1,610,poor,49,1598,northwest,1505.316 -0,1,410,great,2,2287,east_elmwood,2287 -2,1,529,great,2,3606,east_elmwood,3606 -0,1,273,good,0,1656,west_welmwood,1656 -0,1,482,poor,55,1299,northwest,1208.07 -0,1,170,good,10,1421,west_welmwood,1421 -1,1,565,good,2,2001,west_welmwood,2001 -1,1,712,good,2,2186,west_welmwood,2186 -0,1,246,poor,16,1465,northwest,1465 -1,1,379,good,12,1725,west_welmwood,1725 -2,1,575,good,3,3095,west_welmwood,3095 -1,1,477,good,4,1900,west_welmwood,1900 -3,2,1081,good,9,4847,west_welmwood,4847 -2,1,908,good,13,3348,west_welmwood,3348 -3,2,1119,poor,47,4530,northwest,4285.38 -3,2,943,good,13,4693,west_welmwood,4693 -2,1,763,good,2,3339,west_welmwood,3339 -3,2,1192,poor,39,4630,northwest,4454.06 -2,1,600,great,14,3547,east_elmwood,3547 -0,1,361,poor,60,1186,northwest,1091.12 -2,1,847,poor,29,3160,northwest,3103.12 -0,1,206,poor,42,1215,northwest,1161.54 -1,1,300,great,12,2133,east_elmwood,2133 -2,1,927,good,4,3440,west_welmwood,3440 -0,1,524,poor,15,1726,northwest,1726 -1,1,469,poor,38,1637,northwest,1578.068 -0,1,193,poor,40,1165,northwest,1118.4 -2,1,627,poor,29,2924,northwest,2871.368 -2,1,603,good,6,3180,west_welmwood,3180 -0,1,424,poor,21,1639,northwest,1635.722 -2,1,785,poor,28,3149,northwest,3098.616 -3,2,1020,poor,15,4676,northwest,4676 -2,1,526,great,3,3602,east_elmwood,3602 -2,1,555,good,11,3089,west_welmwood,3089 -2,1,730,great,7,3781,east_elmwood,3781 -3,2,1228,good,10,4955,west_welmwood,4955 -0,1,437,poor,30,1585,northwest,1553.3 -1,1,511,poor,31,1746,northwest,1707.588 -3,2,1125,great,7,5430,east_elmwood,5430 -2,1,838,poor,16,3259,northwest,3259 -2,1,736,great,4,3837,east_elmwood,3837 -2,1,591,great,3,3670,east_elmwood,3670 -0,1,537,good,9,1858,west_welmwood,1858 -2,1,690,good,3,3216,west_welmwood,3216 -0,1,475,poor,36,1486,northwest,1438.448 -0,1,245,great,9,2039,east_elmwood,2039 -2,1,568,good,0,3137,west_welmwood,3137 -1,1,466,poor,23,1764,northwest,1753.416 -0,1,489,great,12,2311,east_elmwood,2311 -1,1,727,good,4,2217,west_welmwood,2217 -2,1,541,good,0,3148,west_welmwood,3148 -2,1,531,good,10,3053,west_welmwood,3053 -0,1,548,good,3,1931,west_welmwood,1931 -1,1,548,good,8,1975,west_welmwood,1975 -1,1,394,good,6,1800,west_welmwood,1800 -3,2,1231,good,12,4988,west_welmwood,4988 -2,1,532,great,3,3614,east_elmwood,3614 -0,1,256,good,4,1582,west_welmwood,1582 -2,1,613,good,1,3202,west_welmwood,3202 -3,2,1186,good,13,4928,west_welmwood,4928 -1,1,432,good,1,1910,west_welmwood,1910 -1,1,351,good,7,1804,west_welmwood,1804 -1,1,300,poor,27,1528,northwest,1506.608 -0,1,325,poor,51,1220,northwest,1144.36 -3,2,1101,great,11,5388,east_elmwood,5388 -3,2,1029,good,0,4831,west_welmwood,4831 -1,1,662,great,14,2517,east_elmwood,2517 -2,1,611,poor,48,2772,northwest,2616.768 -3,2,1155,good,7,4951,west_welmwood,4951 -2,1,683,great,14,3670,east_elmwood,3670 -0,1,361,great,13,2098,east_elmwood,2098 -1,1,589,poor,20,1872,northwest,1872 -2,1,764,poor,40,2915,northwest,2798.4 -1,1,522,good,12,1945,west_welmwood,1945 -0,1,227,poor,52,1120,northwest,1048.32 -2,1,525,good,3,3112,west_welmwood,3112 -2,1,656,good,4,3234,west_welmwood,3234 -1,1,630,poor,61,1505,northwest,1381.59 -3,2,842,great,14,5011,east_elmwood,5011 -0,1,520,poor,33,1552,northwest,1511.648 -3,2,1013,poor,46,4403,northwest,4174.044 -2,1,747,poor,20,3170,northwest,3170 -0,1,217,great,1,2075,east_elmwood,2075 -0,1,134,poor,31,1185,northwest,1158.93 -3,2,813,good,7,4622,west_welmwood,4622 -2,1,722,good,0,3337,west_welmwood,3337 -2,1,710,poor,45,2845,northwest,2702.75 -1,1,377,great,7,2353,east_elmwood,2353 -3,2,1169,poor,57,4441,northwest,4112.366 -3,2,881,good,4,4641,west_welmwood,4641 -0,1,415,great,5,2283,east_elmwood,2283 -1,1,401,poor,17,1705,northwest,1705 -1,1,552,poor,64,1460,northwest,1331.52 -3,2,894,good,9,4639,west_welmwood,4639 -2,1,925,good,8,3414,west_welmwood,3414 -2,1,661,good,10,3135,west_welmwood,3135 -0,1,233,poor,54,1123,northwest,1046.636 -0,1,264,poor,15,1521,northwest,1521 -1,1,445,poor,48,1482,northwest,1399.008 -2,1,849,good,10,3308,west_welmwood,3308 -2,1,852,poor,31,3116,northwest,3047.448 -2,1,901,great,6,3977,east_elmwood,3977 -2,1,934,great,14,3876,east_elmwood,3876 -1,1,691,good,12,2021,west_welmwood,2021 -2,1,722,great,9,3734,east_elmwood,3734 -3,2,1000,good,9,4741,west_welmwood,4741 -2,1,573,great,3,3598,east_elmwood,3598 -1,1,732,poor,42,1858,northwest,1776.248 -0,1,319,great,13,2068,east_elmwood,2068 -0,1,101,poor,53,944,northwest,881.696 -2,1,838,poor,42,3053,northwest,2918.668 -2,1,518,poor,57,2571,northwest,2380.746 -2,1,876,great,6,3893,east_elmwood,3893 -0,1,482,good,13,1741,west_welmwood,1741 -2,1,789,good,7,3342,west_welmwood,3342 -3,2,955,poor,18,4627,northwest,4627 -0,1,102,great,5,1924,east_elmwood,1924 -0,1,373,poor,50,1299,northwest,1221.06 -3,2,1056,poor,25,4679,northwest,4632.21 -0,1,184,great,1,2098,east_elmwood,2098 -0,1,107,good,14,1373,west_welmwood,1373 -0,1,430,good,3,1813,west_welmwood,1813 -0,1,255,poor,26,1382,northwest,1365.416 -3,2,1242,great,12,5514,east_elmwood,5514 -3,2,1124,good,13,4817,west_welmwood,4817 -2,1,833,great,2,3943,east_elmwood,3943 -0,1,417,good,11,1660,west_welmwood,1660 -0,1,254,good,8,1566,west_welmwood,1566 -1,1,481,good,4,1986,west_welmwood,1986 -2,1,611,good,4,3193,west_welmwood,3193 -1,1,639,poor,20,1948,northwest,1948 -0,1,455,good,7,1799,west_welmwood,1799 -2,1,881,poor,54,2903,northwest,2705.596 -1,1,364,great,12,2225,east_elmwood,2225 -0,1,172,good,8,1444,west_welmwood,1444 -1,1,643,great,1,2637,east_elmwood,2637 -2,1,663,good,4,3269,west_welmwood,3269 -2,1,931,good,5,3524,west_welmwood,3524 -3,2,1143,great,2,5488,east_elmwood,5488 -2,1,681,good,13,3185,west_welmwood,3185 -1,1,426,good,11,1857,west_welmwood,1857 -3,2,1070,great,3,5342,east_elmwood,5342 -1,1,388,good,14,1744,west_welmwood,1744 -1,1,722,good,2,2219,west_welmwood,2219 -2,1,621,great,3,3665,east_elmwood,3665 -3,2,1018,poor,45,4452,northwest,4229.4 -3,2,845,poor,39,4284,northwest,4121.208 -0,1,323,poor,24,1529,northwest,1516.768 -1,1,558,good,11,1917,west_welmwood,1917 -2,1,813,great,10,3819,east_elmwood,3819 -1,1,601,poor,36,1771,northwest,1714.328 -1,1,617,good,8,1999,west_welmwood,1999 -2,1,851,good,4,3414,west_welmwood,3414 -2,1,828,good,14,3248,west_welmwood,3248 -3,2,825,good,0,4711,west_welmwood,4711 -1,1,386,good,13,1713,west_welmwood,1713 -3,2,895,good,2,4769,west_welmwood,4769 -1,1,443,good,14,1845,west_welmwood,1845 -3,2,846,poor,56,4124,northwest,3827.072 -3,2,896,great,2,5257,east_elmwood,5257 -3,2,1014,poor,40,4482,northwest,4302.72 -1,1,378,good,8,1807,west_welmwood,1807 -3,2,1126,poor,41,4551,northwest,4359.858 -2,1,929,poor,47,3039,northwest,2874.894 -2,1,739,poor,57,2753,northwest,2549.278 -3,2,1145,good,13,4887,west_welmwood,4887 -1,1,667,poor,54,1581,northwest,1473.492 -2,1,836,good,4,3361,west_welmwood,3361 -2,1,758,good,9,3265,west_welmwood,3265 -3,2,824,good,8,4570,west_welmwood,4570 -1,1,506,good,12,1895,west_welmwood,1895 -1,1,544,poor,62,1429,northwest,1308.964 -0,1,121,good,2,1512,west_welmwood,1512 -1,1,638,poor,34,1759,northwest,1709.748 -0,1,135,good,4,1480,west_welmwood,1480 -1,1,385,good,3,1874,west_welmwood,1874 -2,1,589,great,8,3651,east_elmwood,3651 -0,1,511,good,4,1905,west_welmwood,1905 -2,1,663,poor,63,2589,northwest,2366.346 -1,1,739,poor,56,1677,northwest,1556.256 -1,1,610,poor,39,1673,northwest,1609.426 -3,2,818,great,8,5081,east_elmwood,5081 -3,2,1202,good,5,4988,west_welmwood,4988 -0,1,164,good,7,1496,west_welmwood,1496 -0,1,336,poor,60,1153,northwest,1060.76 -3,2,1243,good,10,4950,west_welmwood,4950 -2,1,563,great,11,3503,east_elmwood,3503 -1,1,425,good,10,1818,west_welmwood,1818 -2,1,849,poor,45,2993,northwest,2843.35 -1,1,701,good,2,2170,west_welmwood,2170 -1,1,689,great,11,2557,east_elmwood,2557 -3,2,958,poor,61,4202,northwest,3857.436 -2,1,732,poor,16,3201,northwest,3201 -1,1,318,poor,49,1368,northwest,1288.656 -1,1,471,good,8,1852,west_welmwood,1852 -0,1,378,great,3,2205,east_elmwood,2205 -1,1,582,poor,30,1800,northwest,1764 -1,1,326,great,12,2239,east_elmwood,2239 -1,1,307,good,0,1856,west_welmwood,1856 -3,2,1206,good,10,4955,west_welmwood,4955 -1,1,547,great,13,2367,east_elmwood,2367 -0,1,215,good,10,1530,west_welmwood,1530 -1,1,454,good,2,1889,west_welmwood,1889 -1,1,367,great,5,2319,east_elmwood,2319 -1,1,373,good,4,1831,west_welmwood,1831 -3,2,921,good,12,4672,west_welmwood,4672 -0,1,230,great,5,2105,east_elmwood,2105 -3,2,1110,poor,47,4482,northwest,4239.972 -3,2,1148,good,9,4891,west_welmwood,4891 -0,1,190,great,14,1922,east_elmwood,1922 -0,1,480,great,7,2324,east_elmwood,2324 -2,1,921,poor,39,3110,northwest,2991.82 -0,1,127,great,9,1948,east_elmwood,1948 -1,1,528,great,5,2450,east_elmwood,2450 -1,1,710,good,5,2193,west_welmwood,2193 -0,1,349,great,12,2117,east_elmwood,2117 -0,1,467,good,13,1692,west_welmwood,1692 -3,2,1137,poor,21,4813,northwest,4803.374 -0,1,394,poor,40,1435,northwest,1377.6 -1,1,659,great,6,2646,east_elmwood,2646 -0,1,410,poor,53,1248,northwest,1165.632 -3,2,1158,poor,54,4464,northwest,4160.448 -0,1,401,poor,52,1312,northwest,1228.032 -1,1,641,poor,46,1680,northwest,1592.64 -1,1,599,great,6,2552,east_elmwood,2552 -1,1,614,good,4,2103,west_welmwood,2103 -0,1,443,good,8,1759,west_welmwood,1759 -0,1,266,poor,44,1196,northwest,1138.592 -1,1,627,good,10,1985,west_welmwood,1985 -0,1,476,poor,30,1530,northwest,1499.4 -1,1,657,good,3,2163,west_welmwood,2163 -2,1,839,poor,35,3073,northwest,2980.81 -3,2,868,poor,28,4447,northwest,4375.848 -1,1,454,good,12,1809,west_welmwood,1809 -0,1,319,poor,63,1088,northwest,994.432 -1,1,537,poor,56,1508,northwest,1399.424 -3,2,1235,great,6,5500,east_elmwood,5500 -0,1,123,good,13,1393,west_welmwood,1393 -0,1,249,great,10,2060,east_elmwood,2060 -2,1,817,great,4,3866,east_elmwood,3866 -2,1,711,poor,43,2882,northwest,2749.428 -1,1,479,good,0,1978,west_welmwood,1978 -0,1,361,poor,27,1540,northwest,1518.44 -3,2,918,poor,45,4329,northwest,4112.55 -2,1,554,good,11,3017,west_welmwood,3017 -1,1,314,poor,23,1543,northwest,1533.742 -2,1,766,good,0,3387,west_welmwood,3387 -3,2,947,good,2,4765,west_welmwood,4765 -0,1,245,great,7,2066,east_elmwood,2066 -1,1,551,good,3,1971,west_welmwood,1971 -3,2,1166,good,3,5008,west_welmwood,5008 -3,2,1064,good,12,4793,west_welmwood,4793 -3,2,1102,poor,26,4711,northwest,4654.468 -0,1,323,good,7,1603,west_welmwood,1603 -3,2,991,great,3,5325,east_elmwood,5325 -0,1,239,poor,18,1500,northwest,1500 -3,2,1037,good,6,4815,west_welmwood,4815 -2,1,949,good,8,3489,west_welmwood,3489 -1,1,442,good,12,1865,west_welmwood,1865 -3,2,901,great,2,5212,east_elmwood,5212 -3,2,898,good,3,4765,west_welmwood,4765 -3,2,1090,poor,59,4368,northwest,4027.296 -1,1,747,great,0,2754,east_elmwood,2754 -2,1,719,great,2,3842,east_elmwood,3842 -0,1,473,good,0,1915,west_welmwood,1915 -1,1,483,good,2,1957,west_welmwood,1957 -2,1,773,good,10,3303,west_welmwood,3303 -3,2,852,good,6,4659,west_welmwood,4659 -1,1,691,great,8,2610,east_elmwood,2610 -2,1,763,good,1,3397,west_welmwood,3397 -2,1,560,poor,46,2736,northwest,2593.728 -2,1,581,great,5,3616,east_elmwood,3616 -0,1,296,good,10,1599,west_welmwood,1599 -2,1,585,great,6,3634,east_elmwood,3634 -1,1,535,good,12,1928,west_welmwood,1928 -1,1,698,poor,35,1846,northwest,1790.62 -2,1,629,poor,50,2695,northwest,2533.3 -0,1,104,poor,61,943,northwest,865.674 -2,1,904,good,14,3386,west_welmwood,3386 -3,2,1100,great,10,5348,east_elmwood,5348 -0,1,212,poor,15,1414,northwest,1414 -1,1,491,poor,38,1577,northwest,1520.228 -2,1,862,great,5,3869,east_elmwood,3869 -3,2,900,poor,59,4155,northwest,3830.91 -3,2,867,good,5,4617,west_welmwood,4617 -1,1,362,great,12,2216,east_elmwood,2216 -1,1,576,great,14,2422,east_elmwood,2422 -0,1,300,great,5,2181,east_elmwood,2181 -3,2,1017,poor,33,4487,northwest,4370.338 -2,1,546,great,1,3672,east_elmwood,3672 -2,1,589,poor,48,2729,northwest,2576.176 -1,1,517,good,5,1948,west_welmwood,1948 -0,1,289,great,13,2066,east_elmwood,2066 -0,1,272,poor,27,1418,northwest,1398.148 -3,2,1222,good,2,5038,west_welmwood,5038 -0,1,410,good,8,1742,west_welmwood,1742 -3,2,1199,good,1,5084,west_welmwood,5084 -1,1,477,good,2,1948,west_welmwood,1948 -2,1,727,poor,61,2708,northwest,2485.944 -2,1,516,good,10,3018,west_welmwood,3018 -1,1,375,poor,45,1440,northwest,1368 -3,2,959,good,1,4800,west_welmwood,4800 -3,2,1031,great,4,5380,east_elmwood,5380 -1,1,465,good,11,1810,west_welmwood,1810 -3,2,1181,good,7,4930,west_welmwood,4930 -0,1,307,poor,63,1120,northwest,1023.68 -3,2,1170,good,11,4880,west_welmwood,4880 -3,2,1093,great,7,5356,east_elmwood,5356 -3,2,1219,good,9,4951,west_welmwood,4951 -2,1,539,poor,42,2709,northwest,2589.804 -1,1,462,great,1,2462,east_elmwood,2462 -3,2,1211,good,5,5032,west_welmwood,5032 -0,1,530,good,13,1837,west_welmwood,1837 -0,1,177,good,5,1496,west_welmwood,1496 -0,1,116,good,14,1424,west_welmwood,1424 -0,1,494,poor,45,1493,northwest,1418.35 -1,1,685,good,4,2173,west_welmwood,2173 -0,1,515,great,7,2389,east_elmwood,2389 -1,1,398,good,1,1900,west_welmwood,1900 -0,1,408,good,0,1839,west_welmwood,1839 -1,1,694,good,0,2165,west_welmwood,2165 -0,1,499,great,5,2324,east_elmwood,2324 -1,1,558,good,6,1974,west_welmwood,1974 -2,1,603,poor,63,2610,northwest,2385.54 -0,1,448,great,9,2260,east_elmwood,2260 -2,1,818,good,3,3409,west_welmwood,3409 -1,1,586,great,4,2535,east_elmwood,2535 -2,1,770,poor,30,3114,northwest,3051.72 -1,1,675,good,13,2053,west_welmwood,2053 -3,2,1024,poor,28,4560,northwest,4487.04 -2,1,532,good,12,3019,west_welmwood,3019 -3,2,816,poor,47,4210,northwest,3982.66 -2,1,838,good,8,3341,west_welmwood,3341 -3,2,1059,great,4,5329,east_elmwood,5329 -1,1,637,good,3,2119,west_welmwood,2119 -0,1,464,good,3,1799,west_welmwood,1799 -2,1,767,good,4,3288,west_welmwood,3288 -2,1,577,great,5,3618,east_elmwood,3618 -1,1,403,poor,28,1619,northwest,1593.096 -2,1,751,good,8,3255,west_welmwood,3255 -0,1,304,great,2,2219,east_elmwood,2219 -2,1,868,poor,41,3033,northwest,2905.614 -0,1,440,great,9,2293,east_elmwood,2293 -2,1,729,great,1,3806,east_elmwood,3806 -2,1,772,good,12,3281,west_welmwood,3281 -1,1,551,poor,18,1840,northwest,1840 -1,1,319,poor,44,1411,northwest,1343.272 -1,1,644,poor,63,1484,northwest,1356.376 -1,1,586,poor,23,1862,northwest,1850.828 -3,2,893,poor,31,4418,northwest,4320.804 -1,1,413,great,3,2343,east_elmwood,2343 -1,1,721,good,14,2031,west_welmwood,2031 -2,1,844,good,7,3358,west_welmwood,3358 -3,2,1247,good,9,4992,west_welmwood,4992 -1,1,686,great,6,2623,east_elmwood,2623 -3,2,975,good,8,4701,west_welmwood,4701 -0,1,487,good,13,1722,west_welmwood,1722 -0,1,115,good,5,1500,west_welmwood,1500 -1,1,555,poor,45,1608,northwest,1527.6 -0,1,194,good,5,1550,west_welmwood,1550 -2,1,683,poor,40,2855,northwest,2740.8 -0,1,498,good,11,1751,west_welmwood,1751 -0,1,540,poor,44,1516,northwest,1443.232 -3,2,1138,good,1,4982,west_welmwood,4982 -1,1,518,great,1,2486,east_elmwood,2486 -2,1,667,poor,37,2933,northwest,2833.278 -2,1,935,good,2,3512,west_welmwood,3512 -3,2,1160,great,4,5431,east_elmwood,5431 -0,1,167,good,13,1401,west_welmwood,1401 -2,1,878,poor,53,2968,northwest,2772.112 -2,1,705,good,5,3267,west_welmwood,3267 -0,1,256,good,3,1657,west_welmwood,1657 -2,1,886,good,11,3369,west_welmwood,3369 -2,1,864,great,0,3929,east_elmwood,3929 -1,1,523,good,0,2046,west_welmwood,2046 -2,1,924,poor,46,3055,northwest,2896.14 -1,1,486,great,13,2383,east_elmwood,2383 -1,1,353,good,6,1744,west_welmwood,1744 -0,1,133,poor,30,1187,northwest,1163.26 -1,1,577,poor,22,1884,northwest,1876.464 -1,1,499,great,6,2486,east_elmwood,2486 -0,1,325,great,1,2170,east_elmwood,2170 -1,1,578,poor,37,1687,northwest,1629.642 -3,2,1032,good,12,4788,west_welmwood,4788 -0,1,229,good,2,1564,west_welmwood,1564 -1,1,539,great,14,2353,east_elmwood,2353 -0,1,538,good,10,1882,west_welmwood,1882 -2,1,564,good,9,3047,west_welmwood,3047 -2,1,697,great,5,3766,east_elmwood,3766 -2,1,629,good,13,3132,west_welmwood,3132 -0,1,530,great,14,2329,east_elmwood,2329 -2,1,688,good,9,3235,west_welmwood,3235 -2,1,925,good,1,3469,west_welmwood,3469 -1,1,537,good,8,1988,west_welmwood,1988 -3,2,1029,great,13,5294,east_elmwood,5294 -1,1,712,good,0,2230,west_welmwood,2230 -3,2,898,good,10,4610,west_welmwood,4610 -2,1,749,good,9,3249,west_welmwood,3249 -1,1,747,good,11,2164,west_welmwood,2164 -0,1,267,poor,33,1296,northwest,1262.304 -3,2,1117,poor,44,4487,northwest,4271.624 -1,1,581,poor,59,1527,northwest,1407.894 -1,1,687,poor,47,1732,northwest,1638.472 -2,1,683,good,0,3254,west_welmwood,3254 -1,1,587,poor,18,1908,northwest,1908 -0,1,477,poor,44,1467,northwest,1396.584 -3,2,944,good,13,4625,west_welmwood,4625 -0,1,511,great,1,2420,east_elmwood,2420 -3,2,932,poor,36,4444,northwest,4301.792 -0,1,347,good,8,1686,west_welmwood,1686 -0,1,272,good,2,1645,west_welmwood,1645 -2,1,659,great,11,3698,east_elmwood,3698 -1,1,742,great,10,2678,east_elmwood,2678 -2,1,830,great,14,3826,east_elmwood,3826 -2,1,571,great,3,3631,east_elmwood,3631 -3,2,1007,great,5,5339,east_elmwood,5339 -3,2,1059,poor,34,4592,northwest,4463.424 -2,1,823,poor,23,3191,northwest,3171.854 -1,1,705,great,5,2681,east_elmwood,2681 -1,1,381,poor,16,1742,northwest,1742 -1,1,558,good,4,2010,west_welmwood,2010 -1,1,452,poor,35,1637,northwest,1587.89 -1,1,446,good,7,1911,west_welmwood,1911 -3,2,1011,good,11,4704,west_welmwood,4704 -2,1,770,good,5,3285,west_welmwood,3285 -3,2,853,good,11,4592,west_welmwood,4592 -0,1,436,good,2,1838,west_welmwood,1838 -2,1,718,good,11,3197,west_welmwood,3197 -0,1,507,good,5,1835,west_welmwood,1835 -0,1,175,great,11,1985,east_elmwood,1985 -1,1,379,good,9,1785,west_welmwood,1785 -0,1,463,good,4,1859,west_welmwood,1859 -0,1,131,great,10,1917,east_elmwood,1917 -1,1,584,good,7,2039,west_welmwood,2039 -1,1,343,good,2,1866,west_welmwood,1866 -0,1,424,poor,24,1589,northwest,1576.288 -1,1,670,good,3,2153,west_welmwood,2153 -2,1,551,good,8,3103,west_welmwood,3103 -2,1,737,great,14,3733,east_elmwood,3733 -1,1,655,poor,36,1810,northwest,1752.08 -3,2,946,poor,46,4289,northwest,4065.972 -1,1,360,good,1,1841,west_welmwood,1841 -3,2,988,good,2,4846,west_welmwood,4846 -2,1,641,poor,17,3075,northwest,3075 -3,2,1163,good,12,4938,west_welmwood,4938 -3,2,1025,good,14,4745,west_welmwood,4745 -3,2,893,great,0,5228,east_elmwood,5228 -1,1,302,poor,38,1413,northwest,1362.132 -3,2,1183,good,2,4978,west_welmwood,4978 -1,1,721,great,3,2707,east_elmwood,2707 -2,1,670,poor,18,3137,northwest,3137 -3,2,1138,great,12,5408,east_elmwood,5408 -0,1,144,good,11,1436,west_welmwood,1436 -1,1,543,good,9,1927,west_welmwood,1927 -3,2,838,poor,63,4086,northwest,3734.604 -1,1,478,poor,16,1833,northwest,1833 -2,1,760,good,2,3319,west_welmwood,3319 -3,2,1054,good,9,4775,west_welmwood,4775 -1,1,738,poor,55,1641,northwest,1526.13 -1,1,331,poor,52,1321,northwest,1236.456 -2,1,881,great,6,3923,east_elmwood,3923 -2,1,916,poor,29,3211,northwest,3153.202 -1,1,592,good,12,1923,west_welmwood,1923 -0,1,250,great,5,2088,east_elmwood,2088 -3,2,1233,poor,36,4710,northwest,4559.28 -1,1,672,great,14,2547,east_elmwood,2547 -0,1,522,great,7,2318,east_elmwood,2318 -1,1,523,poor,59,1465,northwest,1350.73 -3,2,1116,great,1,5470,east_elmwood,5470 -0,1,389,good,14,1647,west_welmwood,1647 -3,2,1008,great,13,5198,east_elmwood,5198 -0,1,320,great,9,2173,east_elmwood,2173 -2,1,707,good,1,3301,west_welmwood,3301 -2,1,776,great,5,3870,east_elmwood,3870 -1,1,607,great,6,2582,east_elmwood,2582 -2,1,931,good,1,3475,west_welmwood,3475 -3,2,886,great,2,5224,east_elmwood,5224 -2,1,785,good,14,3277,west_welmwood,3277 -0,1,373,great,4,2199,east_elmwood,2199 -3,2,1243,good,7,4983,west_welmwood,4983 -0,1,549,poor,48,1454,northwest,1372.576 -2,1,620,poor,58,2641,northwest,2440.284 -2,1,921,good,1,3550,west_welmwood,3550 -0,1,106,poor,25,1239,northwest,1226.61 -1,1,623,good,4,2039,west_welmwood,2039 -0,1,328,great,9,2097,east_elmwood,2097 -3,2,1129,poor,37,4581,northwest,4425.246 -3,2,857,poor,42,4237,northwest,4050.572 -3,2,999,poor,32,4567,northwest,4457.392 -3,2,966,good,13,4681,west_welmwood,4681 -3,2,897,great,11,5107,east_elmwood,5107 -1,1,369,good,2,1807,west_welmwood,1807 -3,2,817,good,12,4541,west_welmwood,4541 -1,1,317,good,11,1688,west_welmwood,1688 -3,2,1228,great,8,5521,east_elmwood,5521 -1,1,479,good,14,1800,west_welmwood,1800 -1,1,748,good,12,2082,west_welmwood,2082 -1,1,330,good,11,1680,west_welmwood,1680 -1,1,398,good,14,1723,west_welmwood,1723 -2,1,533,poor,50,2623,northwest,2465.62 -3,2,830,good,3,4664,west_welmwood,4664 -3,2,1050,good,8,4783,west_welmwood,4783 -3,2,942,poor,64,4201,northwest,3831.312 -2,1,621,poor,42,2841,northwest,2715.996 -3,2,1221,great,9,5522,east_elmwood,5522 -0,1,259,poor,37,1288,northwest,1244.208 -1,1,599,poor,44,1645,northwest,1566.04 -2,1,860,good,3,3461,west_welmwood,3461 -0,1,330,good,2,1735,west_welmwood,1735 -2,1,736,great,8,3773,east_elmwood,3773 -1,1,687,great,14,2532,east_elmwood,2532 -3,2,1040,poor,24,4695,northwest,4657.44 -1,1,718,great,11,2640,east_elmwood,2640 -1,1,313,poor,15,1656,northwest,1656 -3,2,1008,poor,37,4492,northwest,4339.272 -1,1,550,poor,42,1585,northwest,1515.26 -1,1,467,poor,60,1407,northwest,1294.44 -0,1,472,great,9,2243,east_elmwood,2243 -0,1,189,great,4,2002,east_elmwood,2002 -2,1,935,poor,15,3429,northwest,3429 -2,1,638,poor,37,2843,northwest,2746.338 -3,2,803,great,5,5055,east_elmwood,5055 -2,1,734,poor,37,2940,northwest,2840.04 -3,2,815,good,5,4566,west_welmwood,4566 -2,1,618,good,0,3228,west_welmwood,3228 -2,1,519,poor,30,2790,northwest,2734.2 -3,2,1163,great,7,5429,east_elmwood,5429 -0,1,145,good,0,1542,west_welmwood,1542 -2,1,805,good,1,3345,west_welmwood,3345 -0,1,512,great,13,2318,east_elmwood,2318 -0,1,515,great,8,2307,east_elmwood,2307 -0,1,470,great,10,2251,east_elmwood,2251 -2,1,845,good,14,3312,west_welmwood,3312 -2,1,676,great,2,3726,east_elmwood,3726 -0,1,157,good,3,1523,west_welmwood,1523 -3,2,905,good,8,4666,west_welmwood,4666 -1,1,335,poor,34,1461,northwest,1420.092 -3,2,1240,great,7,5555,east_elmwood,5555 -1,1,686,good,9,2093,west_welmwood,2093 -1,1,437,good,2,1948,west_welmwood,1948 -3,2,1217,poor,29,4766,northwest,4680.212 -3,2,854,poor,47,4256,northwest,4026.176 -0,1,511,good,12,1787,west_welmwood,1787 -1,1,494,poor,18,1798,northwest,1798 -3,2,876,good,13,4560,west_welmwood,4560 -1,1,653,good,6,2057,west_welmwood,2057 -2,1,537,good,14,2997,west_welmwood,2997 -2,1,670,good,5,3258,west_welmwood,3258 -1,1,477,good,4,1980,west_welmwood,1980 -2,1,775,good,8,3265,west_welmwood,3265 -0,1,332,good,8,1670,west_welmwood,1670 -3,2,1214,good,3,5062,west_welmwood,5062 -2,1,631,poor,57,2618,northwest,2424.268 -2,1,801,good,4,3357,west_welmwood,3357 -3,2,847,good,10,4606,west_welmwood,4606 -2,1,672,great,8,3699,east_elmwood,3699 -2,1,903,good,2,3468,west_welmwood,3468 -0,1,513,poor,55,1386,northwest,1288.98 -1,1,642,great,7,2551,east_elmwood,2551 -3,2,973,poor,42,4398,northwest,4204.488 -1,1,452,great,2,2458,east_elmwood,2458 -0,1,119,good,5,1440,west_welmwood,1440 -2,1,763,good,12,3228,west_welmwood,3228 -1,1,365,good,10,1745,west_welmwood,1745 -1,1,548,poor,47,1556,northwest,1471.976 -2,1,920,good,6,3486,west_welmwood,3486 -0,1,303,great,5,2138,east_elmwood,2138 -1,1,664,poor,27,1938,northwest,1910.868 -2,1,773,good,11,3249,west_welmwood,3249 -0,1,143,good,7,1510,west_welmwood,1510 -0,1,286,good,14,1554,west_welmwood,1554 -3,2,850,poor,45,4232,northwest,4020.4 -1,1,670,poor,42,1761,northwest,1683.516 -0,1,165,poor,55,1006,northwest,935.58 -0,1,322,poor,45,1297,northwest,1232.15 -1,1,496,good,3,1988,west_welmwood,1988 -2,1,777,good,4,3358,west_welmwood,3358 -0,1,290,good,7,1597,west_welmwood,1597 -0,1,281,great,14,2027,east_elmwood,2027 -0,1,185,poor,56,1063,northwest,986.464 -1,1,601,poor,23,1915,northwest,1903.51 -0,1,156,poor,33,1264,northwest,1231.136 -3,2,967,poor,18,4644,northwest,4644 -2,1,687,good,7,3253,west_welmwood,3253 -0,1,102,great,13,1852,east_elmwood,1852 -0,1,237,good,3,1577,west_welmwood,1577 -3,2,1076,great,7,5330,east_elmwood,5330 -0,1,308,poor,47,1206,northwest,1140.876 -1,1,715,poor,27,1966,northwest,1938.476 -3,2,1095,poor,38,4598,northwest,4432.472 -3,2,1110,good,14,4784,west_welmwood,4784 -2,1,788,good,2,3352,west_welmwood,3352 -2,1,568,great,4,3629,east_elmwood,3629 -3,2,1097,poor,35,4553,northwest,4416.41 -1,1,540,good,13,1885,west_welmwood,1885 -0,1,507,poor,30,1624,northwest,1591.52 -3,2,919,good,14,4647,west_welmwood,4647 -0,1,314,poor,35,1320,northwest,1280.4 -3,2,1191,poor,43,4637,northwest,4423.698 -3,2,919,good,0,4734,west_welmwood,4734 -0,1,343,great,4,2199,east_elmwood,2199 -2,1,574,great,3,3633,east_elmwood,3633 -2,1,755,good,5,3343,west_welmwood,3343 -3,2,874,good,7,4629,west_welmwood,4629 -0,1,504,poor,29,1602,northwest,1573.164 -0,1,444,poor,41,1477,northwest,1414.966 -0,1,386,good,9,1741,west_welmwood,1741 -0,1,481,poor,51,1404,northwest,1316.952 -1,1,696,poor,32,1908,northwest,1862.208 -3,2,840,poor,15,4547,northwest,4547 -1,1,697,good,7,2115,west_welmwood,2115 -1,1,676,good,2,2202,west_welmwood,2202 -3,2,1094,great,9,5382,east_elmwood,5382 -2,1,669,poor,28,3004,northwest,2955.936 -1,1,340,good,10,1729,west_welmwood,1729 -1,1,475,good,1,1923,west_welmwood,1923 -0,1,340,good,1,1728,west_welmwood,1728 -1,1,389,good,2,1846,west_welmwood,1846 -1,1,623,poor,31,1787,northwest,1747.686 -0,1,264,good,12,1525,west_welmwood,1525 -1,1,701,poor,60,1578,northwest,1451.76 -3,2,1069,good,12,4760,west_welmwood,4760 -2,1,908,good,1,3465,west_welmwood,3465 -1,1,745,good,0,2253,west_welmwood,2253 -3,2,989,poor,43,4431,northwest,4227.174 -0,1,470,great,9,2315,east_elmwood,2315 -1,1,599,good,6,2035,west_welmwood,2035 -3,2,885,good,11,4622,west_welmwood,4622 -2,1,508,good,11,2950,west_welmwood,2950 -2,1,763,good,2,3372,west_welmwood,3372 -2,1,545,great,10,3587,east_elmwood,3587 -1,1,505,great,1,2538,east_elmwood,2538 -0,1,262,poor,18,1434,northwest,1434 -3,2,812,poor,48,4166,northwest,3932.704 -2,1,592,poor,55,2638,northwest,2453.34 -1,1,356,poor,38,1450,northwest,1397.8 -2,1,519,good,5,3082,west_welmwood,3082 -2,1,877,poor,23,3254,northwest,3234.476 -2,1,644,great,14,3579,east_elmwood,3579 -1,1,745,good,12,2161,west_welmwood,2161 -1,1,747,good,7,2205,west_welmwood,2205 -1,1,380,great,11,2231,east_elmwood,2231 -1,1,459,poor,62,1349,northwest,1235.684 -3,2,1073,good,11,4773,west_welmwood,4773 -0,1,376,good,7,1726,west_welmwood,1726 -2,1,757,poor,38,2969,northwest,2862.116 -1,1,658,poor,33,1807,northwest,1760.018 -2,1,816,great,11,3848,east_elmwood,3848 -2,1,848,good,11,3371,west_welmwood,3371 -0,1,259,good,2,1634,west_welmwood,1634 -2,1,607,good,4,3142,west_welmwood,3142 -3,2,894,good,6,4719,west_welmwood,4719 -1,1,339,poor,64,1220,northwest,1112.64 -2,1,756,poor,40,2930,northwest,2812.8 -0,1,209,good,9,1518,west_welmwood,1518 -2,1,567,good,5,3070,west_welmwood,3070 -0,1,504,good,0,1877,west_welmwood,1877 -0,1,486,poor,52,1344,northwest,1257.984 -1,1,666,good,11,2086,west_welmwood,2086 -2,1,532,great,6,3558,east_elmwood,3558 -1,1,503,good,12,1840,west_welmwood,1840 -1,1,453,poor,56,1345,northwest,1248.16 -2,1,659,great,5,3716,east_elmwood,3716 -0,1,278,good,0,1661,west_welmwood,1661 -2,1,524,good,5,3083,west_welmwood,3083 -1,1,527,good,11,1904,west_welmwood,1904 -1,1,661,poor,52,1601,northwest,1498.536 -3,2,1025,good,13,4775,west_welmwood,4775 -3,2,810,good,3,4660,west_welmwood,4660 -3,2,1086,good,13,4843,west_welmwood,4843 -3,2,812,good,0,4661,west_welmwood,4661 -3,2,881,good,0,4742,west_welmwood,4742 -1,1,510,great,12,2428,east_elmwood,2428 -1,1,378,good,11,1815,west_welmwood,1815 -2,1,889,great,10,3934,east_elmwood,3934 -1,1,748,poor,18,2038,northwest,2038 -1,1,476,poor,57,1366,northwest,1264.916 -3,2,931,poor,31,4433,northwest,4335.474 -0,1,111,good,9,1468,west_welmwood,1468 -1,1,571,poor,36,1757,northwest,1700.776 -2,1,909,poor,63,2898,northwest,2648.772 -3,2,1131,poor,35,4601,northwest,4462.97 -1,1,552,great,9,2446,east_elmwood,2446 -0,1,419,good,14,1709,west_welmwood,1709 -2,1,551,good,6,3130,west_welmwood,3130 -3,2,1120,good,7,4938,west_welmwood,4938 -1,1,647,poor,58,1536,northwest,1419.264 -0,1,435,good,0,1785,west_welmwood,1785 -0,1,222,good,1,1656,west_welmwood,1656 -0,1,468,good,11,1725,west_welmwood,1725 -2,1,561,poor,47,2693,northwest,2547.578 -1,1,747,poor,59,1699,northwest,1566.478 -1,1,696,great,14,2573,east_elmwood,2573 -1,1,440,good,13,1797,west_welmwood,1797 -1,1,686,great,9,2600,east_elmwood,2600 -2,1,712,great,7,3701,east_elmwood,3701 -0,1,302,poor,26,1432,northwest,1414.816 -2,1,626,good,14,3074,west_welmwood,3074 -1,1,700,poor,55,1663,northwest,1546.59 -1,1,409,good,12,1808,west_welmwood,1808 -3,2,880,good,4,4678,west_welmwood,4678 -3,2,915,good,5,4713,west_welmwood,4713 -1,1,748,poor,48,1756,northwest,1657.664 -0,1,227,good,9,1487,west_welmwood,1487 -1,1,390,good,8,1840,west_welmwood,1840 -0,1,460,good,0,1902,west_welmwood,1902 -1,1,392,good,8,1782,west_welmwood,1782 -2,1,921,good,14,3383,west_welmwood,3383 -2,1,756,good,3,3336,west_welmwood,3336 -1,1,604,great,1,2583,east_elmwood,2583 -3,2,832,great,3,5137,east_elmwood,5137 -3,2,1022,great,2,5386,east_elmwood,5386 -2,1,517,poor,36,2756,northwest,2667.808 -3,2,1018,good,6,4847,west_welmwood,4847 -0,1,422,good,7,1727,west_welmwood,1727 -2,1,716,good,8,3283,west_welmwood,3283 -0,1,301,great,3,2172,east_elmwood,2172 -1,1,608,poor,29,1829,northwest,1796.078 -2,1,889,poor,17,3277,northwest,3277 -2,1,577,great,9,3636,east_elmwood,3636 -2,1,671,great,0,3765,east_elmwood,3765 -0,1,315,poor,44,1277,northwest,1215.704 -1,1,709,great,1,2739,east_elmwood,2739 -3,2,1204,poor,63,4400,northwest,4021.6 -0,1,187,great,13,1920,east_elmwood,1920 -0,1,543,poor,31,1677,northwest,1640.106 -1,1,351,good,6,1799,west_welmwood,1799 -2,1,816,poor,45,2952,northwest,2804.4 -3,2,994,poor,53,4323,northwest,4037.682 -1,1,612,good,2,2102,west_welmwood,2102 -0,1,447,good,8,1798,west_welmwood,1798 -0,1,457,good,7,1744,west_welmwood,1744 -3,2,1211,good,8,4968,west_welmwood,4968 -0,1,284,poor,60,1041,northwest,957.72 -1,1,741,poor,19,2035,northwest,2035 -0,1,425,great,2,2298,east_elmwood,2298 -1,1,308,poor,36,1491,northwest,1443.288 -1,1,390,good,8,1776,west_welmwood,1776 -1,1,675,great,14,2529,east_elmwood,2529 -0,1,283,poor,44,1262,northwest,1201.424 -1,1,460,good,1,1938,west_welmwood,1938 -1,1,678,great,4,2609,east_elmwood,2609 -2,1,892,great,11,3925,east_elmwood,3925 -3,2,1149,good,7,4978,west_welmwood,4978 -3,2,876,great,2,5207,east_elmwood,5207 -1,1,588,poor,21,1850,northwest,1846.3 -3,2,1005,good,5,4794,west_welmwood,4794 -2,1,636,good,3,3193,west_welmwood,3193 -3,2,955,good,3,4763,west_welmwood,4763 -1,1,661,poor,37,1741,northwest,1681.806 -1,1,477,great,7,2447,east_elmwood,2447 -3,2,1230,good,4,5070,west_welmwood,5070 -2,1,735,great,13,3698,east_elmwood,3698 -2,1,630,poor,60,2667,northwest,2453.64 -2,1,506,good,12,3017,west_welmwood,3017 -3,2,1191,poor,18,4896,northwest,4896 -3,2,1103,poor,44,4475,northwest,4260.2 -2,1,760,great,9,3780,east_elmwood,3780 -3,2,979,good,14,4693,west_welmwood,4693 -0,1,440,great,7,2231,east_elmwood,2231 -1,1,359,great,12,2286,east_elmwood,2286 -0,1,125,poor,38,1104,northwest,1064.256 -1,1,692,good,0,2147,west_welmwood,2147 -0,1,503,poor,49,1455,northwest,1370.61 -1,1,659,good,12,2025,west_welmwood,2025 -3,2,995,good,12,4682,west_welmwood,4682 -0,1,435,poor,27,1522,northwest,1500.692 -3,2,1028,great,14,5204,east_elmwood,5204 -3,2,880,good,5,4676,west_welmwood,4676 -2,1,796,poor,15,3261,northwest,3261 -0,1,252,great,6,2140,east_elmwood,2140 -1,1,675,good,11,2112,west_welmwood,2112 -0,1,133,good,3,1482,west_welmwood,1482 -0,1,400,poor,18,1622,northwest,1622 -3,2,954,poor,19,4629,northwest,4629 -1,1,332,great,12,2174,east_elmwood,2174 -3,2,1178,poor,53,4479,northwest,4183.386 -0,1,334,good,6,1698,west_welmwood,1698 -0,1,176,good,2,1548,west_welmwood,1548 -2,1,854,poor,58,2909,northwest,2687.916 -2,1,846,good,1,3461,west_welmwood,3461 -2,1,543,good,2,3129,west_welmwood,3129 -2,1,620,good,1,3225,west_welmwood,3225 -3,2,911,poor,31,4419,northwest,4321.782 -0,1,304,great,1,2241,east_elmwood,2241 -0,1,241,great,3,2116,east_elmwood,2116 -0,1,453,great,13,2269,east_elmwood,2269 -2,1,589,great,11,3617,east_elmwood,3617 -2,1,942,poor,15,3419,northwest,3419 -2,1,678,poor,37,2936,northwest,2836.176 -1,1,465,poor,42,1519,northwest,1452.164 -0,1,256,great,0,2140,east_elmwood,2140 -2,1,561,poor,56,2580,northwest,2394.24 -3,2,1032,good,4,4804,west_welmwood,4804 -0,1,502,good,0,1915,west_welmwood,1915 -1,1,617,poor,35,1739,northwest,1686.83 -3,2,846,good,10,4620,west_welmwood,4620 -2,1,632,poor,37,2874,northwest,2776.284 -0,1,244,poor,59,1087,northwest,1002.214 -1,1,366,good,12,1708,west_welmwood,1708 -3,2,1097,good,4,4865,west_welmwood,4865 -3,2,931,poor,42,4385,northwest,4192.06 -2,1,531,good,3,3142,west_welmwood,3142 -1,1,428,good,13,1796,west_welmwood,1796 -0,1,427,great,1,2283,east_elmwood,2283 -2,1,682,poor,24,3036,northwest,3011.712 -0,1,181,poor,16,1430,northwest,1430 -2,1,672,good,10,3179,west_welmwood,3179 -2,1,548,poor,35,2762,northwest,2679.14 -2,1,721,good,5,3274,west_welmwood,3274 -1,1,379,poor,56,1283,northwest,1190.624 -0,1,369,poor,17,1592,northwest,1592 -2,1,738,good,2,3329,west_welmwood,3329 -0,1,321,great,14,2034,east_elmwood,2034 -2,1,636,great,9,3684,east_elmwood,3684 -1,1,564,poor,60,1452,northwest,1335.84 -2,1,723,good,7,3250,west_welmwood,3250 -1,1,584,poor,16,1950,northwest,1950 -1,1,307,good,12,1690,west_welmwood,1690 -2,1,946,great,11,3975,east_elmwood,3975 -2,1,655,good,2,3226,west_welmwood,3226 -3,2,1101,good,4,4903,west_welmwood,4903 -2,1,779,poor,27,3101,northwest,3057.586 -1,1,497,poor,28,1737,northwest,1709.208 -3,2,806,poor,55,4066,northwest,3781.38 -1,1,746,good,4,2223,west_welmwood,2223 -0,1,358,poor,30,1462,northwest,1432.76 -0,1,181,good,11,1489,west_welmwood,1489 -1,1,553,good,8,1964,west_welmwood,1964 -2,1,864,poor,15,3299,northwest,3299 -3,2,1040,good,11,4785,west_welmwood,4785 -2,1,516,good,8,3037,west_welmwood,3037 -2,1,517,good,13,3005,west_welmwood,3005 -2,1,835,poor,27,3131,northwest,3087.166 -3,2,1163,good,7,4987,west_welmwood,4987 -3,2,1044,poor,64,4281,northwest,3904.272 -3,2,1025,good,3,4887,west_welmwood,4887 -0,1,103,great,6,1970,east_elmwood,1970 -3,2,990,poor,33,4476,northwest,4359.624 -2,1,575,good,7,3143,west_welmwood,3143 -3,2,1014,good,6,4796,west_welmwood,4796 -3,2,1185,poor,16,4897,northwest,4897 -3,2,1058,good,0,4865,west_welmwood,4865 -1,1,412,great,10,2284,east_elmwood,2284 -0,1,251,good,12,1547,west_welmwood,1547 -2,1,711,good,1,3301,west_welmwood,3301 -2,1,813,good,4,3401,west_welmwood,3401 -2,1,726,poor,33,3016,northwest,2937.584 -0,1,192,poor,19,1434,northwest,1434 -1,1,486,good,11,1922,west_welmwood,1922 -3,2,1198,great,4,5524,east_elmwood,5524 -1,1,316,good,14,1688,west_welmwood,1688 -3,2,1238,great,5,5550,east_elmwood,5550 -3,2,868,good,1,4658,west_welmwood,4658 -1,1,371,great,13,2264,east_elmwood,2264 -2,1,597,good,5,3135,west_welmwood,3135 -0,1,298,good,13,1597,west_welmwood,1597 -3,2,1049,good,6,4831,west_welmwood,4831 -2,1,553,good,11,3063,west_welmwood,3063 -0,1,521,poor,61,1322,northwest,1213.596 -3,2,902,poor,49,4300,northwest,4050.6 -1,1,719,poor,41,1791,northwest,1715.778 -2,1,713,poor,63,2703,northwest,2470.542 -3,2,1196,good,0,5060,west_welmwood,5060 -1,1,426,good,8,1874,west_welmwood,1874 -3,2,1235,poor,44,4671,northwest,4446.792 -2,1,643,great,3,3670,east_elmwood,3670 -0,1,535,poor,26,1692,northwest,1671.696 -2,1,581,poor,29,2913,northwest,2860.566 -0,1,111,poor,62,849,northwest,777.684 -1,1,469,great,6,2432,east_elmwood,2432 -3,2,880,great,11,5103,east_elmwood,5103 -1,1,487,good,8,1909,west_welmwood,1909 -1,1,492,good,14,1834,west_welmwood,1834 -2,1,846,good,4,3394,west_welmwood,3394 -0,1,121,good,1,1522,west_welmwood,1522 -3,2,984,poor,25,4561,northwest,4515.39 -3,2,964,good,8,4705,west_welmwood,4705 -3,2,998,good,3,4818,west_welmwood,4818 -0,1,282,great,3,2119,east_elmwood,2119 -0,1,498,good,12,1811,west_welmwood,1811 -0,1,157,good,9,1511,west_welmwood,1511 -1,1,495,good,4,1969,west_welmwood,1969 -1,1,522,good,4,2029,west_welmwood,2029 -3,2,1036,poor,42,4496,northwest,4298.176 -0,1,441,good,5,1830,west_welmwood,1830 -1,1,361,good,1,1870,west_welmwood,1870 -1,1,452,poor,50,1426,northwest,1340.44 -0,1,441,great,0,2373,east_elmwood,2373 -1,1,396,good,0,1918,west_welmwood,1918 -0,1,220,good,12,1528,west_welmwood,1528 -3,2,894,good,14,4628,west_welmwood,4628 -0,1,111,good,7,1435,west_welmwood,1435 -2,1,881,great,7,3872,east_elmwood,3872 -1,1,613,great,13,2478,east_elmwood,2478 -1,1,380,poor,58,1296,northwest,1197.504 -3,2,985,good,5,4829,west_welmwood,4829 -2,1,784,great,14,3738,east_elmwood,3738 -1,1,431,poor,19,1787,northwest,1787 -2,1,564,great,3,3664,east_elmwood,3664 -1,1,445,poor,64,1296,northwest,1181.952 -0,1,236,good,3,1631,west_welmwood,1631 -2,1,893,good,6,3407,west_welmwood,3407 -1,1,355,good,3,1824,west_welmwood,1824 -1,1,600,poor,42,1655,northwest,1582.18 -3,2,929,great,7,5162,east_elmwood,5162 -2,1,881,poor,30,3146,northwest,3083.08 -3,2,801,poor,22,4462,northwest,4444.152 -1,1,732,good,9,2132,west_welmwood,2132 -1,1,702,poor,23,2003,northwest,1990.982 -3,2,1033,poor,53,4355,northwest,4067.57 -2,1,700,great,9,3691,east_elmwood,3691 -3,2,875,great,5,5145,east_elmwood,5145 -1,1,330,good,3,1798,west_welmwood,1798 -0,1,530,good,0,1961,west_welmwood,1961 -0,1,364,great,8,2215,east_elmwood,2215 -1,1,359,poor,42,1481,northwest,1415.836 -2,1,564,great,2,3645,east_elmwood,3645 -0,1,122,great,13,1897,east_elmwood,1897 -1,1,590,good,0,2125,west_welmwood,2125 -0,1,224,poor,42,1246,northwest,1191.176 -1,1,515,good,12,1869,west_welmwood,1869 -3,2,852,poor,20,4488,northwest,4488 -0,1,515,poor,57,1374,northwest,1272.324 -0,1,394,poor,45,1335,northwest,1268.25 -2,1,881,good,6,3375,west_welmwood,3375 -0,1,221,poor,56,1022,northwest,948.416 -0,1,462,great,5,2299,east_elmwood,2299 -0,1,406,poor,24,1535,northwest,1522.72 -1,1,595,great,4,2540,east_elmwood,2540 -1,1,474,good,11,1855,west_welmwood,1855 -3,2,1249,good,14,4931,west_welmwood,4931 -3,2,1223,good,10,5008,west_welmwood,5008 -3,2,834,good,0,4704,west_welmwood,4704 -0,1,288,poor,38,1288,northwest,1241.632 -3,2,1035,poor,44,4451,northwest,4237.352 -2,1,567,good,2,3183,west_welmwood,3183 -2,1,884,good,4,3430,west_welmwood,3430 -2,1,888,great,4,3900,east_elmwood,3900 -0,1,227,great,9,2064,east_elmwood,2064 -0,1,204,poor,39,1223,northwest,1176.526 -3,2,897,good,13,4660,west_welmwood,4660 -1,1,586,good,5,2056,west_welmwood,2056 -0,1,532,great,10,2315,east_elmwood,2315 -1,1,377,good,5,1839,west_welmwood,1839 -0,1,534,great,5,2429,east_elmwood,2429 -3,2,1201,poor,32,4767,northwest,4652.592 -0,1,163,poor,51,1003,northwest,940.814 -2,1,844,great,4,3905,east_elmwood,3905 -3,2,1024,poor,37,4487,northwest,4334.442 -0,1,196,poor,46,1142,northwest,1082.616 -2,1,661,poor,54,2732,northwest,2546.224 -0,1,317,good,10,1616,west_welmwood,1616 -0,1,162,great,13,1930,east_elmwood,1930 -1,1,721,great,10,2596,east_elmwood,2596 -1,1,472,great,10,2340,east_elmwood,2340 -2,1,650,poor,62,2617,northwest,2397.172 -2,1,535,good,9,3017,west_welmwood,3017 -1,1,311,good,8,1764,west_welmwood,1764 -0,1,371,good,7,1720,west_welmwood,1720 -1,1,524,good,11,1907,west_welmwood,1907 -2,1,580,good,5,3176,west_welmwood,3176 -1,1,681,poor,61,1583,northwest,1453.194 -2,1,943,good,14,3374,west_welmwood,3374 -1,1,309,poor,53,1290,northwest,1204.86 -3,2,1233,good,9,4960,west_welmwood,4960 -0,1,198,poor,49,1133,northwest,1067.286 -2,1,897,great,9,3919,east_elmwood,3919 -3,2,1129,poor,15,4828,northwest,4828 -3,2,1014,great,11,5263,east_elmwood,5263 -3,2,1119,good,9,4864,west_welmwood,4864 -3,2,962,poor,59,4199,northwest,3871.478 -1,1,317,poor,17,1655,northwest,1655 -0,1,165,poor,55,1010,northwest,939.3 -1,1,738,great,7,2648,east_elmwood,2648 -3,2,977,great,9,5281,east_elmwood,5281 -1,1,722,great,7,2686,east_elmwood,2686 -2,1,706,good,7,3187,west_welmwood,3187 -1,1,424,good,9,1811,west_welmwood,1811 -1,1,669,great,6,2594,east_elmwood,2594 -1,1,327,great,9,2252,east_elmwood,2252 -1,1,435,good,3,1915,west_welmwood,1915 -2,1,706,poor,31,2990,northwest,2924.22 -3,2,1200,good,10,4920,west_welmwood,4920 -2,1,622,great,0,3703,east_elmwood,3703 -1,1,338,poor,42,1463,northwest,1398.628 -2,1,724,poor,60,2725,northwest,2507 -0,1,255,good,14,1526,west_welmwood,1526 -1,1,501,poor,33,1718,northwest,1673.332 -2,1,643,great,7,3703,east_elmwood,3703 -3,2,1054,poor,19,4696,northwest,4696 -2,1,817,good,8,3296,west_welmwood,3296 -1,1,301,poor,36,1402,northwest,1357.136 -0,1,261,good,13,1577,west_welmwood,1577 -2,1,925,good,12,3447,west_welmwood,3447 -3,2,939,poor,52,4310,northwest,4034.16 -3,2,1203,poor,22,4848,northwest,4828.608 -0,1,439,great,12,2214,east_elmwood,2214 -1,1,344,poor,28,1603,northwest,1577.352 -1,1,404,poor,28,1602,northwest,1576.368 -2,1,723,good,6,3240,west_welmwood,3240 -1,1,494,great,1,2526,east_elmwood,2526 -2,1,870,great,10,3907,east_elmwood,3907 -1,1,562,good,1,2024,west_welmwood,2024 -1,1,335,poor,46,1347,northwest,1276.956 -3,2,897,good,10,4618,west_welmwood,4618 -2,1,512,good,4,3089,west_welmwood,3089 -1,1,743,good,13,2128,west_welmwood,2128 -3,2,1197,good,8,4984,west_welmwood,4984 -3,2,1221,great,4,5521,east_elmwood,5521 -0,1,327,great,9,2158,east_elmwood,2158 -0,1,199,good,8,1479,west_welmwood,1479 -3,2,942,good,13,4663,west_welmwood,4663 -2,1,616,good,4,3150,west_welmwood,3150 -0,1,380,good,14,1598,west_welmwood,1598 -2,1,707,good,12,3225,west_welmwood,3225 -0,1,498,poor,57,1355,northwest,1254.73 -2,1,566,poor,37,2755,northwest,2661.33 -3,2,1173,good,12,4855,west_welmwood,4855 -0,1,312,poor,28,1385,northwest,1362.84 -0,1,433,great,7,2220,east_elmwood,2220 -3,2,1243,good,0,5140,west_welmwood,5140 -2,1,946,good,12,3467,west_welmwood,3467 -2,1,666,good,1,3245,west_welmwood,3245 -2,1,689,poor,44,2879,northwest,2740.808 -2,1,549,good,11,2999,west_welmwood,2999 -0,1,514,poor,47,1399,northwest,1323.454 -3,2,932,poor,47,4356,northwest,4120.776 -0,1,325,poor,45,1268,northwest,1204.6 -3,2,1245,poor,30,4820,northwest,4723.6 -3,2,817,poor,37,4341,northwest,4193.406 -0,1,347,poor,27,1441,northwest,1420.826 -2,1,701,great,0,3846,east_elmwood,3846 -1,1,534,good,8,2001,west_welmwood,2001 -0,1,543,great,7,2385,east_elmwood,2385 -2,1,893,poor,59,2858,northwest,2635.076 -1,1,402,good,12,1809,west_welmwood,1809 -3,2,1110,great,2,5441,east_elmwood,5441 -0,1,295,poor,17,1566,northwest,1566 -0,1,277,good,9,1603,west_welmwood,1603 -1,1,552,great,4,2499,east_elmwood,2499 -0,1,110,great,0,2000,east_elmwood,2000 -1,1,347,poor,20,1624,northwest,1624 -3,2,1182,great,6,5497,east_elmwood,5497 -0,1,297,good,3,1657,west_welmwood,1657 -2,1,619,good,10,3152,west_welmwood,3152 -2,1,596,good,12,3080,west_welmwood,3080 -0,1,303,poor,35,1308,northwest,1268.76 -0,1,316,good,8,1681,west_welmwood,1681 -1,1,482,good,5,1888,west_welmwood,1888 -2,1,846,poor,16,3277,northwest,3277 -3,2,831,good,14,4539,west_welmwood,4539 -0,1,251,great,10,2056,east_elmwood,2056 -3,2,1154,poor,53,4515,northwest,4217.01 -2,1,564,good,7,3141,west_welmwood,3141 -3,2,1171,good,10,4928,west_welmwood,4928 -0,1,232,poor,27,1367,northwest,1347.862 -2,1,737,poor,56,2739,northwest,2541.792 -2,1,511,poor,47,2682,northwest,2537.172 -1,1,337,good,0,1855,west_welmwood,1855 -1,1,355,poor,22,1619,northwest,1612.524 -0,1,290,poor,48,1257,northwest,1186.608 -3,2,977,good,1,4809,west_welmwood,4809 -2,1,568,great,13,3566,east_elmwood,3566 -2,1,633,poor,57,2693,northwest,2493.718 -0,1,341,poor,39,1382,northwest,1329.484 -2,1,818,good,4,3394,west_welmwood,3394 -0,1,191,good,0,1578,west_welmwood,1578 -1,1,649,poor,27,1829,northwest,1803.394 -0,1,237,good,2,1613,west_welmwood,1613 -3,2,1028,good,8,4790,west_welmwood,4790 -1,1,459,good,12,1827,west_welmwood,1827 -2,1,786,good,0,3413,west_welmwood,3413 -0,1,454,poor,63,1200,northwest,1096.8 -1,1,539,poor,55,1464,northwest,1361.52 -0,1,220,poor,42,1247,northwest,1192.132 -1,1,747,poor,49,1803,northwest,1698.426 -2,1,542,poor,24,2878,northwest,2854.976 -3,2,913,good,11,4640,west_welmwood,4640 -2,1,949,good,0,3548,west_welmwood,3548 -0,1,212,poor,57,1028,northwest,951.928 -2,1,793,good,3,3378,west_welmwood,3378 -0,1,382,poor,29,1477,northwest,1450.414 -0,1,515,poor,52,1424,northwest,1332.864 -3,2,966,great,6,5240,east_elmwood,5240 -3,2,1034,poor,36,4514,northwest,4369.552 -3,2,1192,good,0,5042,west_welmwood,5042 -2,1,787,great,3,3843,east_elmwood,3843 -1,1,326,great,3,2268,east_elmwood,2268 -2,1,760,good,2,3291,west_welmwood,3291 -3,2,996,great,7,5321,east_elmwood,5321 -0,1,148,poor,58,953,northwest,880.572 -1,1,662,good,5,2125,west_welmwood,2125 -3,2,1038,poor,62,4311,northwest,3948.876 -1,1,690,great,5,2667,east_elmwood,2667 -3,2,1029,good,14,4699,west_welmwood,4699 -2,1,847,poor,46,2988,northwest,2832.624 -3,2,922,good,12,4642,west_welmwood,4642 -2,1,606,good,2,3183,west_welmwood,3183 -1,1,385,poor,36,1543,northwest,1493.624 -1,1,546,good,6,1938,west_welmwood,1938 -0,1,472,poor,26,1578,northwest,1559.064 -1,1,419,good,1,1911,west_welmwood,1911 -2,1,550,poor,37,2793,northwest,2698.038 -1,1,617,good,0,2077,west_welmwood,2077 -1,1,495,good,6,1920,west_welmwood,1920 -2,1,686,good,8,3186,west_welmwood,3186 -2,1,930,good,6,3484,west_welmwood,3484 -1,1,420,good,1,1933,west_welmwood,1933 -1,1,494,poor,24,1755,northwest,1740.96 -3,2,1012,good,2,4807,west_welmwood,4807 -3,2,835,poor,43,4232,northwest,4037.328 -2,1,539,great,2,3594,east_elmwood,3594 -0,1,153,great,13,1937,east_elmwood,1937 -2,1,776,great,14,3734,east_elmwood,3734 -1,1,563,good,13,1975,west_welmwood,1975 -3,2,1144,poor,51,4445,northwest,4169.41 -1,1,549,great,8,2478,east_elmwood,2478 -3,2,1015,good,9,4782,west_welmwood,4782 -1,1,446,poor,50,1465,northwest,1377.1 -0,1,531,great,0,2408,east_elmwood,2408 -2,1,538,poor,64,2513,northwest,2291.856 -0,1,279,good,14,1542,west_welmwood,1542 -2,1,860,great,2,3923,east_elmwood,3923 -3,2,1153,poor,31,4693,northwest,4589.754 -0,1,288,poor,36,1323,northwest,1280.664 -0,1,366,great,11,2190,east_elmwood,2190 -2,1,539,good,9,3029,west_welmwood,3029 -1,1,482,poor,61,1327,northwest,1218.186 -0,1,148,good,13,1458,west_welmwood,1458 -0,1,433,good,12,1703,west_welmwood,1703 -0,1,147,good,8,1472,west_welmwood,1472 -3,2,825,poor,30,4326,northwest,4239.48 -3,2,824,good,3,4621,west_welmwood,4621 -1,1,418,great,6,2321,east_elmwood,2321 -0,1,512,poor,55,1385,northwest,1288.05 -0,1,456,poor,42,1470,northwest,1405.32 -1,1,582,good,8,2050,west_welmwood,2050 -0,1,107,good,10,1438,west_welmwood,1438 -1,1,628,great,3,2559,east_elmwood,2559 -2,1,827,poor,41,3053,northwest,2924.774 -0,1,267,poor,30,1378,northwest,1350.44 -0,1,363,good,0,1750,west_welmwood,1750 -2,1,801,great,10,3776,east_elmwood,3776 -1,1,444,good,11,1817,west_welmwood,1817 -0,1,273,poor,29,1379,northwest,1354.178 -3,2,1012,great,9,5268,east_elmwood,5268 -2,1,837,good,10,3374,west_welmwood,3374 -0,1,329,good,12,1585,west_welmwood,1585 -3,2,1119,good,6,4899,west_welmwood,4899 -2,1,611,good,14,3048,west_welmwood,3048 -2,1,541,good,9,3014,west_welmwood,3014 -3,2,848,good,9,4594,west_welmwood,4594 -2,1,730,good,12,3164,west_welmwood,3164 -2,1,602,poor,49,2747,northwest,2587.674 -3,2,1164,poor,43,4604,northwest,4392.216 -1,1,632,good,13,2018,west_welmwood,2018 -2,1,797,good,9,3260,west_welmwood,3260 -1,1,377,great,2,2333,east_elmwood,2333 -2,1,780,great,4,3828,east_elmwood,3828 -3,2,1004,great,13,5216,east_elmwood,5216 -2,1,769,poor,22,3101,northwest,3088.596 -1,1,733,good,0,2230,west_welmwood,2230 -1,1,309,poor,62,1209,northwest,1107.444 -3,2,1178,good,11,4967,west_welmwood,4967 -1,1,464,poor,20,1768,northwest,1768 -2,1,835,good,12,3349,west_welmwood,3349 -3,2,1046,great,7,5363,east_elmwood,5363 -1,1,480,good,3,1923,west_welmwood,1923 -0,1,397,great,11,2173,east_elmwood,2173 -3,2,966,good,6,4768,west_welmwood,4768 -1,1,414,poor,37,1531,northwest,1478.946 -3,2,848,great,0,5242,east_elmwood,5242 -1,1,610,poor,38,1760,northwest,1696.64 -1,1,542,great,7,2480,east_elmwood,2480 -0,1,299,good,3,1669,west_welmwood,1669 -1,1,347,poor,53,1311,northwest,1224.474 -2,1,608,good,9,3107,west_welmwood,3107 -0,1,259,good,11,1594,west_welmwood,1594 -0,1,498,poor,39,1517,northwest,1459.354 -0,1,470,great,11,2308,east_elmwood,2308 -3,2,871,great,0,5265,east_elmwood,5265 -0,1,485,poor,48,1416,northwest,1336.704 -2,1,516,great,7,3546,east_elmwood,3546 -2,1,697,great,1,3787,east_elmwood,3787 -1,1,370,poor,58,1319,northwest,1218.756 -0,1,406,good,10,1685,west_welmwood,1685 -2,1,737,great,0,3878,east_elmwood,3878 -2,1,610,poor,18,3020,northwest,3020 -0,1,392,poor,26,1557,northwest,1538.316 -0,1,429,good,3,1801,west_welmwood,1801 -1,1,368,poor,29,1626,northwest,1596.732 -3,2,1111,good,7,4918,west_welmwood,4918 -2,1,612,great,10,3598,east_elmwood,3598 -2,1,860,good,6,3387,west_welmwood,3387 -2,1,939,good,3,3519,west_welmwood,3519 -1,1,676,poor,34,1848,northwest,1796.256 -1,1,513,good,8,1915,west_welmwood,1915 -3,2,945,good,7,4725,west_welmwood,4725 -1,1,400,poor,22,1636,northwest,1629.456 -1,1,704,great,2,2723,east_elmwood,2723 -1,1,721,poor,29,1931,northwest,1896.242 -2,1,554,good,12,3073,west_welmwood,3073 -0,1,439,good,3,1790,west_welmwood,1790 -0,1,523,good,3,1902,west_welmwood,1902 -1,1,477,good,3,1996,west_welmwood,1996 -1,1,698,good,4,2133,west_welmwood,2133 -0,1,463,poor,27,1582,northwest,1559.852 -1,1,321,poor,50,1307,northwest,1228.58 -0,1,170,good,5,1563,west_welmwood,1563 -3,2,1179,poor,44,4609,northwest,4387.768 -3,2,1117,good,8,4919,west_welmwood,4919 -1,1,672,good,11,2088,west_welmwood,2088 -2,1,937,good,6,3506,west_welmwood,3506 -2,1,545,poor,61,2489,northwest,2284.902 -2,1,532,great,9,3565,east_elmwood,3565 -0,1,528,good,6,1860,west_welmwood,1860 -2,1,813,great,3,3857,east_elmwood,3857 -2,1,821,poor,27,3118,northwest,3074.348 -3,2,1061,poor,44,4442,northwest,4228.784 -1,1,678,good,2,2125,west_welmwood,2125 -1,1,452,poor,18,1771,northwest,1771 -0,1,150,good,4,1490,west_welmwood,1490 -1,1,742,poor,52,1673,northwest,1565.928 -1,1,408,poor,17,1779,northwest,1779 -2,1,949,poor,56,2983,northwest,2768.224 -3,2,1229,great,3,5583,east_elmwood,5583 -0,1,287,great,14,2093,east_elmwood,2093 -0,1,169,great,9,1932,east_elmwood,1932 -2,1,623,good,12,3083,west_welmwood,3083 -3,2,1148,good,2,4931,west_welmwood,4931 -1,1,484,good,1,1999,west_welmwood,1999 -0,1,111,good,5,1510,west_welmwood,1510 -2,1,855,good,1,3433,west_welmwood,3433 -0,1,427,great,2,2312,east_elmwood,2312 -1,1,504,great,9,2416,east_elmwood,2416 -2,1,704,good,14,3142,west_welmwood,3142 -1,1,504,great,14,2346,east_elmwood,2346 -3,2,1127,good,10,4896,west_welmwood,4896 -1,1,501,good,6,1924,west_welmwood,1924 -2,1,923,poor,31,3199,northwest,3128.622 -3,2,894,good,2,4699,west_welmwood,4699 -2,1,778,good,6,3336,west_welmwood,3336 -1,1,698,good,7,2155,west_welmwood,2155 -1,1,376,great,4,2339,east_elmwood,2339 -3,2,1188,good,6,4992,west_welmwood,4992 -2,1,905,poor,16,3349,northwest,3349 -0,1,549,poor,47,1481,northwest,1401.026 -2,1,558,good,3,3115,west_welmwood,3115 -1,1,418,good,3,1880,west_welmwood,1880 -3,2,961,poor,43,4349,northwest,4148.946 -3,2,1112,good,5,4913,west_welmwood,4913 -0,1,467,good,2,1879,west_welmwood,1879 -3,2,1111,good,0,4999,west_welmwood,4999 -1,1,590,good,10,1975,west_welmwood,1975 -0,1,518,poor,62,1315,northwest,1204.54 -1,1,416,great,5,2341,east_elmwood,2341 -3,2,1051,poor,51,4382,northwest,4110.316 -2,1,934,good,10,3440,west_welmwood,3440 -0,1,209,good,3,1628,west_welmwood,1628 -2,1,618,great,12,3642,east_elmwood,3642 -1,1,648,great,10,2563,east_elmwood,2563 -1,1,338,good,10,1725,west_welmwood,1725 -3,2,1073,poor,21,4716,northwest,4706.568 -2,1,511,good,14,2926,west_welmwood,2926 -2,1,701,good,13,3156,west_welmwood,3156 -1,1,419,good,14,1753,west_welmwood,1753 -2,1,806,good,2,3364,west_welmwood,3364 -2,1,667,good,3,3278,west_welmwood,3278 -0,1,428,poor,30,1539,northwest,1508.22 -0,1,270,good,3,1664,west_welmwood,1664 -1,1,465,poor,19,1822,northwest,1822 -2,1,724,great,3,3806,east_elmwood,3806 -3,2,1105,poor,55,4384,northwest,4077.12 -2,1,911,great,1,4037,east_elmwood,4037 -0,1,495,poor,33,1574,northwest,1533.076 -2,1,818,great,1,3883,east_elmwood,3883 -0,1,524,good,4,1902,west_welmwood,1902 -3,2,1011,poor,64,4268,northwest,3892.416 -1,1,439,poor,26,1644,northwest,1624.272 -1,1,326,poor,49,1311,northwest,1234.962 -2,1,584,good,5,3141,west_welmwood,3141 -0,1,278,great,1,2140,east_elmwood,2140 -2,1,929,good,8,3455,west_welmwood,3455 -3,2,903,good,6,4700,west_welmwood,4700 -0,1,307,good,4,1658,west_welmwood,1658 -3,2,945,good,1,4779,west_welmwood,4779 -3,2,871,great,12,5094,east_elmwood,5094 -2,1,742,good,8,3288,west_welmwood,3288 -0,1,400,good,7,1734,west_welmwood,1734 -3,2,1158,great,7,5408,east_elmwood,5408 -3,2,862,poor,28,4473,northwest,4401.432 -1,1,513,poor,52,1540,northwest,1441.44 -0,1,248,good,9,1524,west_welmwood,1524 -3,2,1052,great,9,5345,east_elmwood,5345 -0,1,246,poor,60,1003,northwest,922.76 -3,2,1190,poor,55,4468,northwest,4155.24 -1,1,586,poor,61,1515,northwest,1390.77 -3,2,1219,great,7,5472,east_elmwood,5472 -1,1,371,great,13,2192,east_elmwood,2192 -1,1,729,good,14,2120,west_welmwood,2120 -3,2,986,poor,40,4388,northwest,4212.48 -3,2,1026,good,0,4893,west_welmwood,4893 -2,1,640,good,12,3120,west_welmwood,3120 -2,1,841,poor,26,3139,northwest,3101.332 -3,2,897,poor,35,4427,northwest,4294.19 -0,1,446,good,6,1823,west_welmwood,1823 -2,1,883,poor,22,3278,northwest,3264.888 -3,2,801,good,14,4468,west_welmwood,4468 -1,1,693,good,13,2108,west_welmwood,2108 -0,1,221,poor,40,1250,northwest,1200 -3,2,1116,poor,36,4593,northwest,4446.024 -3,2,1164,good,7,4922,west_welmwood,4922 -3,2,941,good,10,4643,west_welmwood,4643 -2,1,685,good,5,3272,west_welmwood,3272 -0,1,205,great,8,2064,east_elmwood,2064 -0,1,519,good,10,1781,west_welmwood,1781 -3,2,1064,good,13,4750,west_welmwood,4750 -0,1,486,poor,63,1273,northwest,1163.522 -3,2,1235,great,3,5545,east_elmwood,5545 -0,1,297,good,7,1648,west_welmwood,1648 -0,1,121,good,13,1402,west_welmwood,1402 -0,1,140,good,11,1385,west_welmwood,1385 -1,1,540,good,8,1974,west_welmwood,1974 -1,1,407,good,6,1848,west_welmwood,1848 -2,1,557,good,2,3185,west_welmwood,3185 -3,2,1002,poor,60,4225,northwest,3887 -1,1,505,good,11,1885,west_welmwood,1885 -2,1,861,great,12,3801,east_elmwood,3801 -1,1,718,good,14,2072,west_welmwood,2072 -0,1,366,great,6,2209,east_elmwood,2209 -2,1,696,great,10,3728,east_elmwood,3728 -3,2,1123,good,9,4870,west_welmwood,4870 -2,1,713,great,11,3667,east_elmwood,3667 -2,1,884,poor,23,3232,northwest,3212.608 -0,1,505,great,9,2293,east_elmwood,2293 -2,1,659,poor,55,2716,northwest,2525.88 -0,1,369,great,10,2212,east_elmwood,2212 -3,2,1220,poor,45,4615,northwest,4384.25 -2,1,522,great,1,3574,east_elmwood,3574 -2,1,671,poor,32,2946,northwest,2875.296 -2,1,762,good,5,3346,west_welmwood,3346 -0,1,192,poor,53,1111,northwest,1037.674 -1,1,553,poor,24,1803,northwest,1788.576 -1,1,618,great,12,2480,east_elmwood,2480 -1,1,394,poor,35,1534,northwest,1487.98 -3,2,828,poor,48,4230,northwest,3993.12 -1,1,705,poor,36,1873,northwest,1813.064 -3,2,987,poor,52,4355,northwest,4076.28 -3,2,854,great,10,5072,east_elmwood,5072 -2,1,930,poor,62,2932,northwest,2685.712 -0,1,283,good,7,1595,west_welmwood,1595 -0,1,241,good,5,1567,west_welmwood,1567 -0,1,315,good,1,1752,west_welmwood,1752 -2,1,687,poor,45,2880,northwest,2736 -3,2,1120,good,13,4833,west_welmwood,4833 -1,1,698,great,4,2654,east_elmwood,2654 -3,2,1243,poor,60,4474,northwest,4116.08 -0,1,180,great,12,1963,east_elmwood,1963 -3,2,1030,great,7,5314,east_elmwood,5314 -2,1,791,poor,39,3002,northwest,2887.924 -1,1,361,poor,16,1714,northwest,1714 -0,1,529,poor,23,1684,northwest,1673.896 -3,2,1091,poor,50,4429,northwest,4163.26 -2,1,888,good,14,3347,west_welmwood,3347 -1,1,479,great,10,2417,east_elmwood,2417 -0,1,122,great,5,1934,east_elmwood,1934 -2,1,929,good,5,3497,west_welmwood,3497 -0,1,439,good,7,1755,west_welmwood,1755 -3,2,1185,good,5,4999,west_welmwood,4999 -3,2,837,poor,37,4283,northwest,4137.378 -3,2,897,good,8,4674,west_welmwood,4674 -1,1,354,good,10,1776,west_welmwood,1776 -3,2,1205,poor,16,4914,northwest,4914 -3,2,1063,good,9,4835,west_welmwood,4835 -3,2,966,poor,20,4632,northwest,4632 -3,2,1192,great,6,5480,east_elmwood,5480 -1,1,693,poor,42,1807,northwest,1727.492 -0,1,214,poor,34,1305,northwest,1268.46 -3,2,1075,great,10,5320,east_elmwood,5320 -1,1,616,great,13,2456,east_elmwood,2456 -3,2,805,good,6,4632,west_welmwood,4632 -1,1,534,good,14,1936,west_welmwood,1936 -0,1,408,good,7,1705,west_welmwood,1705 -2,1,803,good,2,3354,west_welmwood,3354 -1,1,410,great,1,2365,east_elmwood,2365 -0,1,378,poor,21,1536,northwest,1532.928 -2,1,882,great,7,3892,east_elmwood,3892 -1,1,647,good,12,2041,west_welmwood,2041 -1,1,560,good,9,1930,west_welmwood,1930 -0,1,507,poor,30,1630,northwest,1597.4 -3,2,1129,poor,63,4354,northwest,3979.556 -0,1,115,good,9,1427,west_welmwood,1427 -1,1,318,poor,25,1518,northwest,1502.82 -1,1,315,poor,50,1302,northwest,1223.88 -2,1,748,poor,15,3188,northwest,3188 -2,1,685,poor,33,2938,northwest,2861.612 -0,1,154,poor,37,1180,northwest,1139.88 -0,1,515,good,10,1786,west_welmwood,1786 -1,1,355,good,6,1825,west_welmwood,1825 -3,2,1217,poor,19,4867,northwest,4867 -0,1,430,great,7,2292,east_elmwood,2292 -3,2,1227,good,9,4974,west_welmwood,4974 -3,2,1186,poor,63,4418,northwest,4038.052 -1,1,729,poor,41,1861,northwest,1782.838 -2,1,549,good,9,3018,west_welmwood,3018 -1,1,644,great,4,2611,east_elmwood,2611 -3,2,976,great,1,5331,east_elmwood,5331 -2,1,566,good,10,3052,west_welmwood,3052 -0,1,113,poor,26,1238,northwest,1223.144 -0,1,402,great,7,2248,east_elmwood,2248 -3,2,1243,good,2,5070,west_welmwood,5070 -2,1,893,great,5,3923,east_elmwood,3923 -0,1,104,poor,49,1025,northwest,965.55 -2,1,764,great,9,3784,east_elmwood,3784 -2,1,570,poor,64,2544,northwest,2320.128 -0,1,269,good,9,1546,west_welmwood,1546 -1,1,500,poor,17,1865,northwest,1865 -2,1,699,great,10,3685,east_elmwood,3685 -1,1,492,poor,19,1759,northwest,1759 -1,1,494,poor,49,1480,northwest,1394.16 -2,1,628,poor,53,2656,northwest,2480.704 -2,1,871,poor,37,3139,northwest,3032.274 -2,1,897,great,7,3894,east_elmwood,3894 -3,2,1249,good,12,5017,west_welmwood,5017 -1,1,458,good,6,1862,west_welmwood,1862 -2,1,616,great,0,3709,east_elmwood,3709 -2,1,846,good,6,3405,west_welmwood,3405 -1,1,565,good,11,1980,west_welmwood,1980 -3,2,1197,poor,56,4477,northwest,4154.656 -3,2,1177,good,3,4975,west_welmwood,4975 -2,1,776,good,5,3306,west_welmwood,3306 -3,2,819,good,14,4552,west_welmwood,4552 -0,1,344,good,8,1634,west_welmwood,1634 -0,1,435,good,7,1770,west_welmwood,1770 -1,1,677,great,3,2645,east_elmwood,2645 -2,1,702,poor,46,2831,northwest,2683.788 -1,1,483,good,7,1893,west_welmwood,1893 -1,1,571,good,8,2032,west_welmwood,2032 -1,1,368,good,5,1770,west_welmwood,1770 -2,1,559,poor,35,2786,northwest,2702.42 -0,1,510,poor,15,1781,northwest,1781 -3,2,1054,poor,26,4623,northwest,4567.524 -0,1,459,poor,26,1626,northwest,1606.488 -0,1,325,good,9,1598,west_welmwood,1598 -3,2,878,great,7,5200,east_elmwood,5200 -1,1,467,good,3,1960,west_welmwood,1960 -3,2,1072,good,12,4791,west_welmwood,4791 -1,1,474,poor,40,1607,northwest,1542.72 -0,1,486,good,4,1855,west_welmwood,1855 -0,1,459,good,11,1728,west_welmwood,1728 -1,1,344,great,6,2267,east_elmwood,2267 -2,1,861,poor,43,3040,northwest,2900.16 -3,2,1239,poor,24,4816,northwest,4777.472 -0,1,361,poor,33,1427,northwest,1389.898 -3,2,913,good,1,4794,west_welmwood,4794 -0,1,220,good,5,1619,west_welmwood,1619 -2,1,574,poor,47,2698,northwest,2552.308 -1,1,309,good,2,1832,west_welmwood,1832 -3,2,998,good,12,4758,west_welmwood,4758 -0,1,525,poor,44,1460,northwest,1389.92 -1,1,384,good,7,1776,west_welmwood,1776 -0,1,175,great,13,1952,east_elmwood,1952 -3,2,872,poor,53,4207,northwest,3929.338 -0,1,419,great,7,2209,east_elmwood,2209 -3,2,862,great,13,5060,east_elmwood,5060 -1,1,671,poor,40,1776,northwest,1704.96 -0,1,243,great,11,2075,east_elmwood,2075 -1,1,576,good,12,1992,west_welmwood,1992 -1,1,419,good,13,1777,west_welmwood,1777 -2,1,827,good,10,3303,west_welmwood,3303 -3,2,994,good,1,4871,west_welmwood,4871 -0,1,355,great,14,2076,east_elmwood,2076 -3,2,1024,good,9,4791,west_welmwood,4791 -1,1,340,great,6,2288,east_elmwood,2288 -1,1,433,good,9,1827,west_welmwood,1827 -0,1,473,poor,55,1351,northwest,1256.43 -1,1,579,great,1,2610,east_elmwood,2610 -3,2,1201,good,3,5067,west_welmwood,5067 -1,1,412,poor,64,1248,northwest,1138.176 -1,1,306,great,13,2225,east_elmwood,2225 -3,2,1218,good,0,5106,west_welmwood,5106 -0,1,386,good,2,1746,west_welmwood,1746 -2,1,634,good,12,3135,west_welmwood,3135 -1,1,505,great,10,2402,east_elmwood,2402 -2,1,847,poor,16,3301,northwest,3301 -0,1,126,good,6,1438,west_welmwood,1438 -3,2,1088,good,7,4873,west_welmwood,4873 -3,2,1171,good,13,4918,west_welmwood,4918 -3,2,1182,good,5,4972,west_welmwood,4972 -3,2,1076,poor,38,4529,northwest,4365.956 -2,1,607,poor,52,2714,northwest,2540.304 -1,1,576,poor,28,1804,northwest,1775.136 -3,2,1102,poor,17,4823,northwest,4823 -3,2,1128,good,0,4987,west_welmwood,4987 -1,1,629,good,6,2080,west_welmwood,2080 -0,1,449,poor,20,1602,northwest,1602 -2,1,744,good,3,3345,west_welmwood,3345 -3,2,1172,good,7,4932,west_welmwood,4932 -0,1,315,great,14,2072,east_elmwood,2072 -0,1,154,great,6,2003,east_elmwood,2003 -2,1,631,good,2,3185,west_welmwood,3185 -2,1,503,good,9,2963,west_welmwood,2963 -0,1,333,good,5,1724,west_welmwood,1724 -3,2,803,good,7,4602,west_welmwood,4602 -3,2,969,good,6,4761,west_welmwood,4761 -2,1,624,good,6,3195,west_welmwood,3195 -3,2,1138,good,1,4989,west_welmwood,4989 -2,1,633,good,8,3167,west_welmwood,3167 -3,2,1170,good,11,4900,west_welmwood,4900 -3,2,1107,good,6,4866,west_welmwood,4866 -0,1,147,great,10,1927,east_elmwood,1927 -2,1,788,poor,38,3017,northwest,2908.388 -3,2,1239,good,13,4991,west_welmwood,4991 -1,1,383,great,7,2356,east_elmwood,2356 -0,1,205,good,0,1587,west_welmwood,1587 -0,1,315,good,5,1654,west_welmwood,1654 -2,1,659,great,10,3684,east_elmwood,3684 -1,1,361,poor,53,1348,northwest,1259.032 -1,1,382,poor,43,1469,northwest,1401.426 -2,1,569,poor,37,2830,northwest,2733.78 -1,1,351,great,9,2283,east_elmwood,2283 -0,1,110,poor,55,961,northwest,893.73 -3,2,1160,great,8,5455,east_elmwood,5455 -1,1,376,good,6,1861,west_welmwood,1861 -0,1,119,poor,62,871,northwest,797.836 -0,1,323,great,13,2098,east_elmwood,2098 -0,1,262,good,12,1584,west_welmwood,1584 -0,1,119,poor,15,1373,northwest,1373 -2,1,899,good,12,3397,west_welmwood,3397 -3,2,971,good,5,4819,west_welmwood,4819 -2,1,938,good,12,3393,west_welmwood,3393 -2,1,655,good,3,3246,west_welmwood,3246 -0,1,449,good,9,1746,west_welmwood,1746 -1,1,613,poor,32,1777,northwest,1734.352 -0,1,251,great,2,2139,east_elmwood,2139 -1,1,374,good,11,1775,west_welmwood,1775 -3,2,1081,poor,34,4581,northwest,4452.732 -2,1,514,poor,25,2884,northwest,2855.16 -0,1,450,good,6,1773,west_welmwood,1773 -0,1,519,good,6,1882,west_welmwood,1882 -3,2,831,great,3,5122,east_elmwood,5122 -2,1,568,poor,40,2724,northwest,2615.04 -3,2,1194,good,7,4960,west_welmwood,4960 -2,1,620,poor,44,2758,northwest,2625.616 -3,2,990,great,7,5286,east_elmwood,5286 -2,1,513,great,14,3453,east_elmwood,3453 -0,1,196,good,11,1462,west_welmwood,1462 -2,1,594,good,9,3139,west_welmwood,3139 -0,1,167,poor,55,967,northwest,899.31 -3,2,1098,good,6,4855,west_welmwood,4855 -3,2,1072,good,11,4819,west_welmwood,4819 -1,1,397,good,13,1743,west_welmwood,1743 -1,1,383,good,13,1751,west_welmwood,1751 -2,1,546,poor,60,2515,northwest,2313.8 -3,2,1200,good,0,5080,west_welmwood,5080 -2,1,815,poor,43,2999,northwest,2861.046 -1,1,401,good,9,1768,west_welmwood,1768 -2,1,565,good,2,3137,west_welmwood,3137 -1,1,362,good,1,1883,west_welmwood,1883 -3,2,1156,good,5,4997,west_welmwood,4997 -1,1,502,poor,51,1522,northwest,1427.636 -1,1,487,poor,30,1648,northwest,1615.04 -3,2,1179,good,7,4937,west_welmwood,4937 -0,1,412,poor,37,1438,northwest,1389.108 -1,1,455,good,3,1891,west_welmwood,1891 -1,1,677,good,9,2040,west_welmwood,2040 -2,1,633,good,14,3098,west_welmwood,3098 -3,2,949,good,9,4699,west_welmwood,4699 -3,2,1125,great,2,5435,east_elmwood,5435 -1,1,581,poor,40,1711,northwest,1642.56 -2,1,900,poor,56,2957,northwest,2744.096 -3,2,980,great,3,5251,east_elmwood,5251 -0,1,549,good,0,1940,west_welmwood,1940 -1,1,615,poor,21,1870,northwest,1866.26 -0,1,473,good,14,1770,west_welmwood,1770 -1,1,523,good,7,1941,west_welmwood,1941 -2,1,745,poor,26,3105,northwest,3067.74 -1,1,566,good,7,1980,west_welmwood,1980 -1,1,544,good,7,1998,west_welmwood,1998 -2,1,903,poor,63,2855,northwest,2609.47 -2,1,600,great,6,3601,east_elmwood,3601 -1,1,465,good,0,1994,west_welmwood,1994 -2,1,675,poor,27,3009,northwest,2966.874 -3,2,893,great,7,5126,east_elmwood,5126 -3,2,1111,great,1,5487,east_elmwood,5487 -3,2,1144,poor,20,4783,northwest,4783 -2,1,508,poor,24,2863,northwest,2840.096 -1,1,551,great,12,2456,east_elmwood,2456 -2,1,676,poor,29,2943,northwest,2890.026 -1,1,699,good,7,2090,west_welmwood,2090 -1,1,429,good,7,1843,west_welmwood,1843 -1,1,590,good,4,2030,west_welmwood,2030 -0,1,128,good,4,1496,west_welmwood,1496 -3,2,1150,poor,57,4427,northwest,4099.402 -1,1,392,good,12,1758,west_welmwood,1758 -2,1,806,good,8,3286,west_welmwood,3286 -2,1,938,good,10,3427,west_welmwood,3427 -2,1,552,good,5,3072,west_welmwood,3072 -1,1,316,great,8,2283,east_elmwood,2283 -0,1,319,great,12,2071,east_elmwood,2071 -1,1,483,poor,59,1430,northwest,1318.46 -1,1,400,great,0,2413,east_elmwood,2413 -1,1,586,poor,48,1568,northwest,1480.192 -1,1,484,poor,24,1700,northwest,1686.4 -3,2,859,poor,23,4430,northwest,4403.42 -0,1,534,poor,56,1386,northwest,1286.208 -2,1,936,poor,47,3071,northwest,2905.166 -3,2,911,good,7,4665,west_welmwood,4665 -2,1,900,poor,24,3258,northwest,3231.936 -3,2,933,good,10,4719,west_welmwood,4719 -1,1,464,great,5,2366,east_elmwood,2366 -2,1,773,poor,44,2961,northwest,2818.872 -0,1,203,poor,26,1295,northwest,1279.46 -1,1,321,poor,18,1605,northwest,1605 -1,1,539,good,4,1976,west_welmwood,1976 -1,1,655,poor,45,1723,northwest,1636.85 -3,2,1185,good,4,4991,west_welmwood,4991 -1,1,611,poor,25,1904,northwest,1884.96 -0,1,509,good,11,1833,west_welmwood,1833 -2,1,666,good,8,3165,west_welmwood,3165 -2,1,539,poor,47,2621,northwest,2479.466 -0,1,442,poor,30,1546,northwest,1515.08 -1,1,690,poor,34,1809,northwest,1758.348 -3,2,1062,good,14,4744,west_welmwood,4744 -1,1,428,good,8,1880,west_welmwood,1880 -3,2,814,poor,60,4057,northwest,3732.44 -2,1,742,poor,38,2989,northwest,2881.396 -1,1,618,poor,62,1495,northwest,1369.42 -1,1,384,great,10,2250,east_elmwood,2250 -2,1,506,great,13,3521,east_elmwood,3521 -0,1,286,poor,35,1344,northwest,1303.68 -0,1,152,great,4,1973,east_elmwood,1973 -0,1,131,good,7,1412,west_welmwood,1412 -3,2,1079,good,9,4866,west_welmwood,4866 -2,1,904,poor,33,3141,northwest,3059.334 -2,1,804,good,6,3327,west_welmwood,3327 -0,1,384,good,7,1736,west_welmwood,1736 -0,1,264,poor,27,1344,northwest,1325.184 -0,1,542,poor,17,1816,northwest,1816 -3,2,1236,poor,55,4549,northwest,4230.57 -0,1,487,good,10,1820,west_welmwood,1820 -0,1,286,poor,55,1091,northwest,1014.63 -0,1,351,good,12,1606,west_welmwood,1606 -0,1,534,great,8,2345,east_elmwood,2345 -0,1,461,good,6,1780,west_welmwood,1780 -2,1,768,good,6,3269,west_welmwood,3269 -2,1,949,great,5,3998,east_elmwood,3998 -1,1,638,poor,31,1802,northwest,1762.356 -0,1,135,good,10,1466,west_welmwood,1466 -1,1,724,poor,44,1760,northwest,1675.52 -0,1,131,good,8,1439,west_welmwood,1439 -3,2,1241,great,2,5615,east_elmwood,5615 -0,1,524,great,5,2348,east_elmwood,2348 -1,1,494,good,10,1875,west_welmwood,1875 -1,1,592,poor,33,1739,northwest,1693.786 -2,1,759,poor,61,2720,northwest,2496.96 -0,1,176,good,3,1572,west_welmwood,1572 -1,1,435,poor,16,1814,northwest,1814 -3,2,966,poor,28,4583,northwest,4509.672 -0,1,466,poor,31,1529,northwest,1495.362 -1,1,507,good,8,1932,west_welmwood,1932 -3,2,1012,poor,35,4527,northwest,4391.19 -0,1,533,good,6,1849,west_welmwood,1849 -2,1,837,great,13,3834,east_elmwood,3834 -1,1,509,good,11,1916,west_welmwood,1916 -0,1,165,good,5,1562,west_welmwood,1562 -3,2,1055,great,0,5400,east_elmwood,5400 -0,1,267,good,2,1634,west_welmwood,1634 -2,1,545,good,9,3067,west_welmwood,3067 -3,2,1085,good,1,4931,west_welmwood,4931 -0,1,273,poor,30,1400,northwest,1372 -2,1,692,poor,31,2948,northwest,2883.144 -2,1,527,great,11,3468,east_elmwood,3468 -3,2,1144,good,3,4994,west_welmwood,4994 -0,1,288,great,0,2151,east_elmwood,2151 -0,1,313,poor,45,1239,northwest,1177.05 -0,1,124,good,12,1406,west_welmwood,1406 -1,1,332,poor,36,1496,northwest,1448.128 -2,1,724,good,14,3220,west_welmwood,3220 -1,1,429,great,0,2445,east_elmwood,2445 -2,1,752,good,1,3361,west_welmwood,3361 -3,2,825,great,6,5124,east_elmwood,5124 -3,2,1190,good,1,5008,west_welmwood,5008 -2,1,579,poor,58,2575,northwest,2379.3 -3,2,1069,poor,49,4472,northwest,4212.624 -0,1,345,poor,45,1309,northwest,1243.55 -0,1,293,great,8,2124,east_elmwood,2124 -0,1,134,good,4,1490,west_welmwood,1490 -1,1,477,poor,31,1643,northwest,1606.854 -0,1,211,poor,35,1281,northwest,1242.57 -3,2,1106,good,3,4951,west_welmwood,4951 -1,1,429,great,3,2442,east_elmwood,2442 -3,2,1186,poor,45,4551,northwest,4323.45 -0,1,287,good,13,1594,west_welmwood,1594 -2,1,927,great,0,3981,east_elmwood,3981 -3,2,1073,great,4,5379,east_elmwood,5379 -0,1,166,poor,15,1394,northwest,1394 -2,1,598,great,7,3600,east_elmwood,3600 -2,1,908,good,5,3482,west_welmwood,3482 -2,1,788,good,1,3364,west_welmwood,3364 -1,1,330,poor,43,1414,northwest,1348.956 -3,2,1233,good,9,4982,west_welmwood,4982 -1,1,496,good,3,1929,west_welmwood,1929 -3,2,1209,good,6,5039,west_welmwood,5039 -3,2,1224,poor,55,4500,northwest,4185 -1,1,409,good,2,1863,west_welmwood,1863 -2,1,605,good,10,3120,west_welmwood,3120 -2,1,517,poor,17,2916,northwest,2916 -1,1,515,great,0,2558,east_elmwood,2558 -1,1,373,great,4,2363,east_elmwood,2363 -2,1,637,great,8,3702,east_elmwood,3702 -0,1,219,poor,46,1158,northwest,1097.784 -0,1,151,great,0,2053,east_elmwood,2053 -0,1,187,great,3,2058,east_elmwood,2058 -0,1,158,poor,20,1338,northwest,1338 -0,1,146,good,6,1463,west_welmwood,1463 -1,1,493,good,4,1960,west_welmwood,1960 -1,1,348,good,2,1853,west_welmwood,1853 -2,1,603,poor,22,2956,northwest,2944.176 -1,1,546,good,9,1930,west_welmwood,1930 -3,2,1144,good,2,4932,west_welmwood,4932 -0,1,413,good,12,1674,west_welmwood,1674 -0,1,341,great,12,2156,east_elmwood,2156 -2,1,624,great,12,3642,east_elmwood,3642 -1,1,478,great,5,2434,east_elmwood,2434 -1,1,683,poor,64,1559,northwest,1421.808 -1,1,581,poor,34,1782,northwest,1732.104 -3,2,1210,good,1,5081,west_welmwood,5081 -1,1,658,great,3,2633,east_elmwood,2633 -2,1,758,poor,58,2745,northwest,2536.38 -1,1,611,good,2,2109,west_welmwood,2109 -1,1,522,good,10,1878,west_welmwood,1878 -1,1,612,great,11,2516,east_elmwood,2516 -0,1,408,good,2,1783,west_welmwood,1783 -2,1,789,poor,34,3021,northwest,2936.412 -3,2,963,good,7,4710,west_welmwood,4710 -2,1,718,great,1,3856,east_elmwood,3856 -0,1,403,poor,40,1428,northwest,1370.88 -1,1,598,poor,22,1879,northwest,1871.484 -1,1,689,good,1,2183,west_welmwood,2183 -0,1,418,great,1,2314,east_elmwood,2314 -2,1,727,poor,54,2809,northwest,2617.988 -1,1,738,great,8,2665,east_elmwood,2665 -3,2,1130,great,10,5400,east_elmwood,5400 -3,2,1057,good,9,4773,west_welmwood,4773 -1,1,719,great,0,2669,east_elmwood,2669 -1,1,472,good,1,1971,west_welmwood,1971 -1,1,745,good,6,2170,west_welmwood,2170 -1,1,443,good,5,1934,west_welmwood,1934 -3,2,1075,great,14,5236,east_elmwood,5236 -3,2,1144,good,2,5021,west_welmwood,5021 -1,1,339,great,8,2210,east_elmwood,2210 -1,1,575,good,5,1995,west_welmwood,1995 -2,1,542,good,1,3084,west_welmwood,3084 -0,1,439,good,4,1793,west_welmwood,1793 -2,1,877,great,3,3963,east_elmwood,3963 -0,1,457,poor,46,1407,northwest,1333.836 -0,1,388,poor,36,1380,northwest,1335.84 -0,1,484,poor,21,1677,northwest,1673.646 -0,1,500,poor,21,1687,northwest,1683.626 -3,2,942,good,0,4781,west_welmwood,4781 -3,2,1204,poor,64,4437,northwest,4046.544 -3,2,1102,good,13,4820,west_welmwood,4820 -1,1,603,poor,44,1712,northwest,1629.824 -1,1,499,good,6,1979,west_welmwood,1979 -2,1,582,good,3,3200,west_welmwood,3200 -3,2,874,poor,44,4292,northwest,4085.984 -3,2,967,great,9,5269,east_elmwood,5269 -1,1,668,great,4,2619,east_elmwood,2619 -0,1,530,great,7,2369,east_elmwood,2369 -2,1,750,good,11,3190,west_welmwood,3190 -0,1,263,good,4,1653,west_welmwood,1653 -1,1,467,good,2,1985,west_welmwood,1985 -3,2,1039,poor,39,4471,northwest,4301.102 -0,1,187,good,6,1489,west_welmwood,1489 -0,1,209,poor,50,1100,northwest,1034 -0,1,398,poor,59,1201,northwest,1107.322 -3,2,945,poor,63,4131,northwest,3775.734 -2,1,657,poor,31,2920,northwest,2855.76 -1,1,537,good,1,2023,west_welmwood,2023 -0,1,334,great,2,2231,east_elmwood,2231 -2,1,660,great,9,3678,east_elmwood,3678 -1,1,479,good,6,1903,west_welmwood,1903 -3,2,917,poor,64,4102,northwest,3741.024 -2,1,898,good,7,3389,west_welmwood,3389 -2,1,822,great,1,3950,east_elmwood,3950 -0,1,270,poor,47,1156,northwest,1093.576 -2,1,625,poor,64,2613,northwest,2383.056 -0,1,268,poor,45,1248,northwest,1185.6 -0,1,218,poor,46,1131,northwest,1072.188 -0,1,422,poor,45,1402,northwest,1331.9 -2,1,766,poor,61,2790,northwest,2561.22 -0,1,224,good,3,1638,west_welmwood,1638 -2,1,762,good,1,3326,west_welmwood,3326 -2,1,782,good,3,3318,west_welmwood,3318 -0,1,518,poor,36,1537,northwest,1487.816 -1,1,548,good,1,2076,west_welmwood,2076 -1,1,665,poor,44,1750,northwest,1666 -2,1,723,good,3,3340,west_welmwood,3340 -0,1,373,good,14,1599,west_welmwood,1599 -0,1,442,poor,63,1205,northwest,1101.37 -2,1,743,poor,49,2861,northwest,2695.062 -2,1,872,poor,35,3072,northwest,2979.84 -1,1,504,good,5,1988,west_welmwood,1988 -2,1,829,great,10,3837,east_elmwood,3837 -1,1,314,good,13,1656,west_welmwood,1656 -0,1,418,great,13,2186,east_elmwood,2186 -1,1,312,poor,29,1509,northwest,1481.838 -2,1,548,poor,17,3012,northwest,3012 -1,1,524,great,7,2475,east_elmwood,2475 -3,2,1142,good,14,4841,west_welmwood,4841 -2,1,929,great,9,3918,east_elmwood,3918 -1,1,505,great,3,2499,east_elmwood,2499 -2,1,665,poor,16,3097,northwest,3097 -0,1,109,great,13,1873,east_elmwood,1873 -0,1,290,poor,37,1274,northwest,1230.684 -3,2,1213,good,10,5001,west_welmwood,5001 -2,1,728,good,12,3185,west_welmwood,3185 -2,1,933,good,5,3520,west_welmwood,3520 -2,1,895,poor,23,3263,northwest,3243.422 -2,1,547,poor,23,2896,northwest,2878.624 -0,1,549,poor,41,1587,northwest,1520.346 -0,1,217,poor,34,1258,northwest,1222.776 -1,1,492,poor,56,1414,northwest,1312.192 -1,1,421,good,10,1818,west_welmwood,1818 -3,2,1034,poor,15,4684,northwest,4684 -2,1,944,good,10,3477,west_welmwood,3477 -1,1,347,good,12,1681,west_welmwood,1681 -3,2,1208,good,14,4951,west_welmwood,4951 -0,1,449,good,4,1802,west_welmwood,1802 -0,1,464,great,5,2343,east_elmwood,2343 -0,1,205,poor,55,1085,northwest,1009.05 -1,1,435,good,9,1891,west_welmwood,1891 -1,1,682,great,10,2617,east_elmwood,2617 -1,1,317,poor,57,1244,northwest,1151.944 -1,1,730,great,12,2649,east_elmwood,2649 -1,1,668,good,14,2036,west_welmwood,2036 -3,2,1199,good,4,5000,west_welmwood,5000 -0,1,461,great,11,2224,east_elmwood,2224 -2,1,576,poor,43,2727,northwest,2601.558 -3,2,1028,good,3,4845,west_welmwood,4845 -2,1,601,good,5,3154,west_welmwood,3154 -0,1,122,poor,33,1178,northwest,1147.372 -0,1,390,great,5,2241,east_elmwood,2241 -2,1,845,great,7,3891,east_elmwood,3891 -0,1,511,good,1,1881,west_welmwood,1881 -3,2,998,poor,46,4435,northwest,4204.38 -3,2,1042,good,2,4860,west_welmwood,4860 -0,1,195,great,0,2122,east_elmwood,2122 -1,1,305,poor,17,1636,northwest,1636 -2,1,839,great,13,3820,east_elmwood,3820 -3,2,1218,good,1,5045,west_welmwood,5045 -1,1,443,great,6,2401,east_elmwood,2401 -3,2,1203,poor,57,4453,northwest,4123.478 -1,1,429,good,5,1838,west_welmwood,1838 -3,2,1189,good,10,4899,west_welmwood,4899 -1,1,303,good,14,1695,west_welmwood,1695 -0,1,254,poor,64,1061,northwest,967.632 -0,1,208,poor,55,1053,northwest,979.29 -1,1,453,great,8,2397,east_elmwood,2397 -0,1,230,great,14,2012,east_elmwood,2012 -0,1,198,good,9,1470,west_welmwood,1470 -3,2,1132,good,13,4886,west_welmwood,4886 -0,1,314,great,5,2184,east_elmwood,2184 -2,1,882,good,12,3382,west_welmwood,3382 -3,2,1056,good,6,4849,west_welmwood,4849 -3,2,1072,great,7,5385,east_elmwood,5385 -0,1,154,good,1,1536,west_welmwood,1536 -1,1,612,poor,48,1668,northwest,1574.592 -0,1,201,good,7,1539,west_welmwood,1539 -2,1,540,good,10,3068,west_welmwood,3068 -3,2,1088,good,13,4829,west_welmwood,4829 -3,2,804,great,12,5012,east_elmwood,5012 -3,2,904,good,12,4659,west_welmwood,4659 -1,1,600,good,8,1992,west_welmwood,1992 -2,1,614,great,7,3612,east_elmwood,3612 -0,1,197,good,9,1472,west_welmwood,1472 -1,1,544,poor,24,1838,northwest,1823.296 -2,1,551,good,8,3079,west_welmwood,3079 -1,1,560,good,4,2067,west_welmwood,2067 -2,1,742,great,12,3692,east_elmwood,3692 -0,1,145,good,2,1486,west_welmwood,1486 -2,1,830,great,2,3897,east_elmwood,3897 -1,1,404,great,4,2348,east_elmwood,2348 -0,1,525,poor,57,1321,northwest,1223.246 -0,1,142,great,12,1940,east_elmwood,1940 -3,2,1088,good,11,4836,west_welmwood,4836 -0,1,232,poor,32,1280,northwest,1249.28 -2,1,598,poor,41,2780,northwest,2663.24 -0,1,199,great,4,2096,east_elmwood,2096 -2,1,584,great,6,3607,east_elmwood,3607 -3,2,857,good,9,4606,west_welmwood,4606 -1,1,530,great,14,2405,east_elmwood,2405 -2,1,614,poor,54,2714,northwest,2529.448 -0,1,363,good,11,1690,west_welmwood,1690 -2,1,649,poor,60,2663,northwest,2449.96 -2,1,535,good,9,2996,west_welmwood,2996 -3,2,1041,good,12,4744,west_welmwood,4744 -2,1,844,poor,53,2908,northwest,2716.072 -2,1,576,poor,64,2516,northwest,2294.592 -2,1,872,good,5,3402,west_welmwood,3402 -3,2,1172,good,10,4946,west_welmwood,4946 -3,2,874,great,9,5121,east_elmwood,5121 -3,2,1217,poor,34,4729,northwest,4596.588 -3,2,1220,poor,47,4600,northwest,4351.6 -3,2,1123,poor,46,4519,northwest,4284.012 -0,1,276,great,5,2095,east_elmwood,2095 -2,1,688,good,8,3246,west_welmwood,3246 -1,1,577,great,9,2528,east_elmwood,2528 -3,2,1232,great,6,5518,east_elmwood,5518 -3,2,973,poor,54,4298,northwest,4005.736 -1,1,668,poor,36,1815,northwest,1756.92 -1,1,711,good,11,2083,west_welmwood,2083 -0,1,235,good,14,1478,west_welmwood,1478 -3,2,1114,good,14,4836,west_welmwood,4836 -2,1,741,great,2,3832,east_elmwood,3832 -0,1,210,poor,52,1131,northwest,1058.616 -1,1,699,good,0,2162,west_welmwood,2162 -1,1,612,poor,33,1771,northwest,1724.954 -3,2,978,poor,20,4651,northwest,4651 -3,2,1160,poor,28,4737,northwest,4661.208 -0,1,103,good,9,1411,west_welmwood,1411 -1,1,739,great,9,2698,east_elmwood,2698 -1,1,589,poor,18,1948,northwest,1948 -0,1,429,great,8,2214,east_elmwood,2214 -0,1,399,great,5,2253,east_elmwood,2253 -2,1,789,great,1,3920,east_elmwood,3920 -3,2,1132,good,14,4820,west_welmwood,4820 -2,1,515,good,2,3078,west_welmwood,3078 -2,1,936,great,11,3934,east_elmwood,3934 -2,1,669,great,14,3633,east_elmwood,3633 -1,1,523,great,0,2527,east_elmwood,2527 -0,1,192,poor,58,1038,northwest,959.112 -0,1,500,poor,50,1448,northwest,1361.12 -1,1,615,good,2,2101,west_welmwood,2101 -1,1,352,poor,17,1649,northwest,1649 -3,2,863,great,11,5108,east_elmwood,5108 -2,1,827,good,11,3304,west_welmwood,3304 -2,1,799,great,4,3887,east_elmwood,3887 -2,1,613,poor,54,2672,northwest,2490.304 -2,1,524,good,10,3052,west_welmwood,3052 -0,1,307,good,3,1664,west_welmwood,1664 -1,1,343,poor,57,1250,northwest,1157.5 -0,1,474,great,10,2319,east_elmwood,2319 -2,1,803,poor,38,3053,northwest,2943.092 -3,2,1168,poor,28,4688,northwest,4612.992 -2,1,704,good,6,3234,west_welmwood,3234 -3,2,1237,good,2,5104,west_welmwood,5104 -3,2,865,good,10,4637,west_welmwood,4637 -0,1,268,good,0,1676,west_welmwood,1676 -1,1,697,good,2,2146,west_welmwood,2146 -1,1,372,good,12,1775,west_welmwood,1775 -3,2,904,poor,51,4256,northwest,3992.128 -2,1,590,poor,61,2554,northwest,2344.572 -1,1,436,good,2,1873,west_welmwood,1873 -3,2,845,good,12,4561,west_welmwood,4561 -3,2,1059,great,3,5336,east_elmwood,5336 -0,1,424,great,14,2177,east_elmwood,2177 -1,1,735,good,1,2181,west_welmwood,2181 -2,1,698,good,5,3214,west_welmwood,3214 -1,1,543,good,10,1984,west_welmwood,1984 -2,1,811,poor,45,2921,northwest,2774.95 -2,1,623,good,0,3211,west_welmwood,3211 -1,1,338,good,14,1709,west_welmwood,1709 -2,1,810,great,7,3875,east_elmwood,3875 -0,1,466,good,13,1715,west_welmwood,1715 -0,1,530,poor,35,1625,northwest,1576.25 -2,1,684,good,12,3164,west_welmwood,3164 -2,1,780,poor,60,2822,northwest,2596.24 -3,2,1241,good,4,5089,west_welmwood,5089 -3,2,1039,poor,19,4720,northwest,4720 -3,2,866,great,13,5039,east_elmwood,5039 -2,1,606,poor,59,2590,northwest,2387.98 -3,2,848,poor,58,4103,northwest,3791.172 -1,1,463,great,11,2357,east_elmwood,2357 -0,1,355,great,6,2189,east_elmwood,2189 -3,2,875,good,12,4608,west_welmwood,4608 -3,2,1075,good,13,4761,west_welmwood,4761 -3,2,915,good,7,4729,west_welmwood,4729 -1,1,326,poor,19,1617,northwest,1617 -3,2,905,good,1,4709,west_welmwood,4709 -0,1,441,great,8,2275,east_elmwood,2275 -2,1,748,poor,30,3078,northwest,3016.44 -0,1,514,good,3,1835,west_welmwood,1835 -0,1,443,poor,40,1410,northwest,1353.6 -0,1,207,good,10,1524,west_welmwood,1524 -1,1,657,great,9,2579,east_elmwood,2579 -2,1,866,poor,45,2988,northwest,2838.6 -0,1,221,great,12,1992,east_elmwood,1992 -3,2,986,good,1,4785,west_welmwood,4785 -2,1,852,poor,54,2881,northwest,2685.092 -0,1,176,good,8,1514,west_welmwood,1514 -0,1,372,poor,16,1590,northwest,1590 -3,2,936,good,2,4740,west_welmwood,4740 -3,2,1012,good,12,4721,west_welmwood,4721 -0,1,153,poor,28,1260,northwest,1239.84 -1,1,515,great,0,2514,east_elmwood,2514 -1,1,567,poor,47,1549,northwest,1465.354 -1,1,470,great,7,2387,east_elmwood,2387 -2,1,633,good,5,3211,west_welmwood,3211 -0,1,262,good,10,1549,west_welmwood,1549 -0,1,230,great,3,2071,east_elmwood,2071 -2,1,601,poor,29,2891,northwest,2838.962 -2,1,682,good,5,3189,west_welmwood,3189 -1,1,463,great,11,2345,east_elmwood,2345 -3,2,1197,poor,44,4649,northwest,4425.848 -2,1,925,great,1,4037,east_elmwood,4037 -0,1,277,poor,39,1305,northwest,1255.41 -3,2,1033,good,2,4852,west_welmwood,4852 -3,2,1208,poor,59,4443,northwest,4096.446 -0,1,190,good,11,1483,west_welmwood,1483 -0,1,483,good,8,1782,west_welmwood,1782 -3,2,896,great,11,5111,east_elmwood,5111 -3,2,1085,great,3,5399,east_elmwood,5399 -2,1,929,poor,48,3098,northwest,2924.512 -3,2,804,good,11,4496,west_welmwood,4496 -1,1,666,great,2,2652,east_elmwood,2652 -0,1,118,great,5,1953,east_elmwood,1953 -0,1,230,poor,52,1070,northwest,1001.52 -0,1,298,poor,32,1418,northwest,1383.968 -0,1,214,poor,29,1329,northwest,1305.078 -0,1,525,great,1,2463,east_elmwood,2463 -3,2,1241,good,3,5020,west_welmwood,5020 -1,1,446,good,11,1822,west_welmwood,1822 -2,1,761,great,2,3878,east_elmwood,3878 -2,1,615,poor,18,3053,northwest,3053 -0,1,189,good,7,1500,west_welmwood,1500 -0,1,443,good,9,1738,west_welmwood,1738 -0,1,146,poor,64,878,northwest,800.736 -2,1,787,poor,35,2989,northwest,2899.33 -0,1,356,poor,19,1594,northwest,1594 -3,2,1240,good,9,5011,west_welmwood,5011 -2,1,720,poor,28,3088,northwest,3038.592 -0,1,379,poor,34,1427,northwest,1387.044 -3,2,994,good,13,4675,west_welmwood,4675 -3,2,1213,good,12,4952,west_welmwood,4952 -0,1,338,good,3,1736,west_welmwood,1736 -3,2,919,great,9,5192,east_elmwood,5192 -2,1,501,good,9,3054,west_welmwood,3054 -1,1,476,good,3,1912,west_welmwood,1912 -2,1,577,great,6,3602,east_elmwood,3602 -1,1,400,poor,58,1340,northwest,1238.16 -3,2,1104,poor,59,4363,northwest,4022.686 -2,1,879,poor,16,3359,northwest,3359 -1,1,320,good,4,1808,west_welmwood,1808 -3,2,1038,great,1,5356,east_elmwood,5356 -3,2,884,great,8,5168,east_elmwood,5168 -0,1,445,great,6,2307,east_elmwood,2307 -2,1,841,poor,38,3096,northwest,2984.544 -2,1,704,good,12,3137,west_welmwood,3137 -2,1,571,good,11,3035,west_welmwood,3035 -3,2,1145,poor,59,4396,northwest,4053.112 -2,1,787,good,7,3317,west_welmwood,3317 -3,2,862,good,12,4626,west_welmwood,4626 -2,1,789,poor,52,2838,northwest,2656.368 -0,1,465,good,7,1809,west_welmwood,1809 -0,1,390,poor,30,1451,northwest,1421.98 -0,1,374,good,14,1683,west_welmwood,1683 -2,1,518,poor,43,2723,northwest,2597.742 -0,1,328,great,1,2224,east_elmwood,2224 -3,2,1227,great,12,5497,east_elmwood,5497 -3,2,873,good,6,4672,west_welmwood,4672 -3,2,1236,good,9,5010,west_welmwood,5010 -0,1,294,great,7,2162,east_elmwood,2162 -3,2,801,poor,55,4114,northwest,3826.02 -0,1,522,poor,44,1514,northwest,1441.328 -3,2,940,poor,39,4446,northwest,4277.052 -1,1,501,great,0,2485,east_elmwood,2485 -3,2,867,poor,49,4232,northwest,3986.544 -3,2,1156,poor,26,4697,northwest,4640.636 -1,1,669,good,8,2065,west_welmwood,2065 -3,2,853,poor,53,4126,northwest,3853.684 -2,1,786,poor,52,2862,northwest,2678.832 -0,1,306,good,5,1687,west_welmwood,1687 -0,1,525,good,12,1781,west_welmwood,1781 -0,1,400,good,3,1732,west_welmwood,1732 -1,1,329,poor,61,1186,northwest,1088.748 -0,1,474,good,11,1723,west_welmwood,1723 -1,1,698,poor,27,1949,northwest,1921.714 -2,1,914,good,5,3414,west_welmwood,3414 -1,1,712,poor,61,1626,northwest,1492.668 -2,1,541,great,3,3614,east_elmwood,3614 -1,1,397,good,13,1798,west_welmwood,1798 -3,2,1137,good,7,4908,west_welmwood,4908 -3,2,859,poor,56,4118,northwest,3821.504 -1,1,532,good,0,1982,west_welmwood,1982 -2,1,525,poor,37,2731,northwest,2638.146 -3,2,870,poor,61,4098,northwest,3761.964 -2,1,677,poor,31,2978,northwest,2912.484 -2,1,873,good,5,3399,west_welmwood,3399 -3,2,824,good,12,4510,west_welmwood,4510 -0,1,347,poor,36,1365,northwest,1321.32 -3,2,1215,great,0,5580,east_elmwood,5580 -0,1,201,good,1,1548,west_welmwood,1548 -2,1,590,poor,18,3026,northwest,3026 -2,1,540,good,12,2970,west_welmwood,2970 -3,2,960,good,11,4696,west_welmwood,4696 -0,1,417,good,7,1794,west_welmwood,1794 -3,2,962,great,0,5270,east_elmwood,5270 -1,1,513,good,14,1858,west_welmwood,1858 -2,1,621,good,8,3169,west_welmwood,3169 -2,1,612,poor,27,2919,northwest,2878.134 -1,1,522,great,11,2368,east_elmwood,2368 -3,2,1178,good,0,5045,west_welmwood,5045 -3,2,935,great,10,5137,east_elmwood,5137 -1,1,692,great,10,2570,east_elmwood,2570 -0,1,504,poor,28,1650,northwest,1623.6 -3,2,1058,poor,56,4393,northwest,4076.704 -0,1,112,good,14,1336,west_welmwood,1336 -0,1,396,great,0,2256,east_elmwood,2256 -1,1,744,great,3,2692,east_elmwood,2692 -2,1,715,poor,50,2783,northwest,2616.02 -3,2,1077,great,4,5369,east_elmwood,5369 -3,2,883,poor,47,4249,northwest,4019.554 -3,2,1056,good,4,4887,west_welmwood,4887 -0,1,425,good,12,1663,west_welmwood,1663 -1,1,602,good,4,2082,west_welmwood,2082 -1,1,642,great,3,2564,east_elmwood,2564 -1,1,351,poor,51,1365,northwest,1280.37 -1,1,674,poor,39,1758,northwest,1691.196 -3,2,1039,good,7,4858,west_welmwood,4858 -1,1,528,poor,37,1635,northwest,1579.41 -2,1,627,poor,42,2791,northwest,2668.196 -0,1,305,poor,40,1265,northwest,1214.4 -1,1,645,good,0,2097,west_welmwood,2097 -3,2,1074,good,5,4857,west_welmwood,4857 -1,1,536,poor,17,1836,northwest,1836 -0,1,313,good,8,1617,west_welmwood,1617 -1,1,532,great,6,2440,east_elmwood,2440 -3,2,983,good,13,4746,west_welmwood,4746 -3,2,1075,great,5,5398,east_elmwood,5398 -2,1,558,good,1,3175,west_welmwood,3175 -1,1,664,good,6,2145,west_welmwood,2145 -2,1,813,good,2,3399,west_welmwood,3399 -2,1,635,poor,25,3028,northwest,2997.72 -3,2,1029,great,4,5297,east_elmwood,5297 -0,1,137,great,3,2009,east_elmwood,2009 -1,1,372,poor,15,1723,northwest,1723 -0,1,512,poor,58,1371,northwest,1266.804 -3,2,1086,good,7,4880,west_welmwood,4880 -2,1,919,great,4,3951,east_elmwood,3951 -0,1,401,poor,59,1238,northwest,1141.436 -1,1,495,poor,57,1378,northwest,1276.028 -1,1,462,good,10,1910,west_welmwood,1910 -1,1,714,poor,55,1616,northwest,1502.88 -1,1,458,good,6,1879,west_welmwood,1879 -1,1,460,good,2,1922,west_welmwood,1922 -2,1,889,poor,20,3308,northwest,3308 -1,1,658,poor,37,1739,northwest,1679.874 -1,1,696,poor,61,1544,northwest,1417.392 -0,1,357,poor,46,1294,northwest,1226.712 -3,2,1131,poor,18,4839,northwest,4839 -2,1,845,poor,34,3151,northwest,3062.772 -0,1,377,good,12,1686,west_welmwood,1686 -0,1,464,great,12,2259,east_elmwood,2259 -3,2,1131,good,11,4873,west_welmwood,4873 -0,1,297,poor,27,1411,northwest,1391.246 -1,1,578,poor,57,1474,northwest,1364.924 -3,2,1036,good,0,4891,west_welmwood,4891 -3,2,1026,great,13,5220,east_elmwood,5220 -2,1,837,poor,49,2923,northwest,2753.466 -2,1,803,good,5,3384,west_welmwood,3384 -3,2,875,great,5,5213,east_elmwood,5213 -3,2,1000,good,8,4787,west_welmwood,4787 -3,2,842,good,4,4701,west_welmwood,4701 -3,2,1138,poor,21,4819,northwest,4809.362 -1,1,623,great,14,2476,east_elmwood,2476 -2,1,904,poor,25,3289,northwest,3256.11 -2,1,900,great,2,3976,east_elmwood,3976 -0,1,533,poor,48,1480,northwest,1397.12 -0,1,507,great,3,2426,east_elmwood,2426 -2,1,764,great,12,3706,east_elmwood,3706 -2,1,570,good,13,3066,west_welmwood,3066 -3,2,1218,good,12,4926,west_welmwood,4926 -2,1,686,good,4,3226,west_welmwood,3226 -1,1,708,great,5,2655,east_elmwood,2655 -3,2,985,poor,54,4296,northwest,4003.872 -1,1,346,poor,52,1304,northwest,1220.544 -2,1,743,great,0,3847,east_elmwood,3847 -0,1,144,great,0,2067,east_elmwood,2067 -3,2,921,poor,21,4569,northwest,4559.862 -2,1,520,poor,43,2665,northwest,2542.41 -2,1,762,good,14,3181,west_welmwood,3181 -3,2,1180,poor,47,4574,northwest,4327.004 -3,2,1246,poor,15,4923,northwest,4923 -3,2,1127,good,8,4910,west_welmwood,4910 -1,1,588,good,7,2015,west_welmwood,2015 -2,1,906,good,12,3393,west_welmwood,3393 -2,1,738,poor,57,2759,northwest,2554.834 -2,1,557,good,8,3118,west_welmwood,3118 -2,1,644,good,6,3203,west_welmwood,3203 -0,1,517,great,13,2293,east_elmwood,2293 -3,2,869,great,11,5135,east_elmwood,5135 -1,1,624,poor,41,1727,northwest,1654.466 -0,1,126,poor,21,1348,northwest,1345.304 -2,1,840,good,0,3404,west_welmwood,3404 -1,1,428,good,9,1816,west_welmwood,1816 -2,1,689,poor,28,3040,northwest,2991.36 -1,1,470,good,7,1892,west_welmwood,1892 -0,1,238,great,11,2041,east_elmwood,2041 -1,1,461,good,8,1857,west_welmwood,1857 -2,1,548,great,3,3659,east_elmwood,3659 -1,1,570,poor,48,1635,northwest,1543.44 -0,1,515,poor,37,1528,northwest,1476.048 -1,1,411,good,10,1782,west_welmwood,1782 -0,1,246,poor,57,1066,northwest,987.116 -2,1,859,good,0,3418,west_welmwood,3418 -3,2,971,good,10,4705,west_welmwood,4705 -2,1,549,good,4,3158,west_welmwood,3158 -3,2,1091,great,7,5411,east_elmwood,5411 -0,1,191,poor,56,1008,northwest,935.424 -1,1,377,great,8,2262,east_elmwood,2262 -0,1,250,poor,57,1063,northwest,984.338 -3,2,836,good,3,4697,west_welmwood,4697 -2,1,702,great,0,3784,east_elmwood,3784 -1,1,394,great,1,2337,east_elmwood,2337 -2,1,719,good,6,3223,west_welmwood,3223 -2,1,682,great,12,3623,east_elmwood,3623 -1,1,318,poor,42,1367,northwest,1306.852 -0,1,332,good,3,1739,west_welmwood,1739 -1,1,498,good,11,1904,west_welmwood,1904 -2,1,672,great,4,3703,east_elmwood,3703 -1,1,575,poor,62,1504,northwest,1377.664 -3,2,1036,good,11,4768,west_welmwood,4768 -2,1,812,good,4,3356,west_welmwood,3356 -1,1,706,poor,40,1770,northwest,1699.2 -2,1,584,good,5,3107,west_welmwood,3107 -1,1,555,poor,59,1419,northwest,1308.318 -1,1,653,good,4,2103,west_welmwood,2103 -1,1,604,good,2,2088,west_welmwood,2088 -2,1,798,poor,26,3098,northwest,3060.824 -3,2,1163,good,2,4995,west_welmwood,4995 -2,1,646,poor,27,2982,northwest,2940.252 -2,1,845,poor,46,3024,northwest,2866.752 -1,1,387,poor,47,1392,northwest,1316.832 -0,1,112,poor,60,948,northwest,872.16 -3,2,1079,poor,46,4445,northwest,4213.86 -3,2,1234,great,14,5394,east_elmwood,5394 -1,1,533,good,3,1992,west_welmwood,1992 -2,1,752,good,10,3204,west_welmwood,3204 -1,1,307,poor,55,1237,northwest,1150.41 -0,1,167,good,1,1517,west_welmwood,1517 -2,1,905,poor,46,3034,northwest,2876.232 -1,1,307,poor,40,1368,northwest,1313.28 -0,1,276,good,10,1586,west_welmwood,1586 -2,1,813,good,1,3364,west_welmwood,3364 -0,1,276,poor,43,1249,northwest,1191.546 -1,1,547,good,12,1953,west_welmwood,1953 -0,1,376,good,12,1695,west_welmwood,1695 -1,1,653,good,14,1999,west_welmwood,1999 -3,2,1017,poor,62,4264,northwest,3905.824 -1,1,582,great,6,2483,east_elmwood,2483 -3,2,1068,good,7,4889,west_welmwood,4889 -0,1,343,great,14,2120,east_elmwood,2120 -2,1,860,good,13,3357,west_welmwood,3357 -2,1,905,great,12,3863,east_elmwood,3863 -2,1,937,poor,18,3336,northwest,3336 -2,1,809,good,7,3295,west_welmwood,3295 -1,1,698,poor,56,1607,northwest,1491.296 -0,1,336,poor,62,1143,northwest,1046.988 -1,1,483,great,8,2417,east_elmwood,2417 -2,1,852,great,2,3975,east_elmwood,3975 -3,2,840,poor,58,4096,northwest,3784.704 -2,1,936,poor,53,3020,northwest,2820.68 -2,1,556,great,12,3535,east_elmwood,3535 -2,1,572,good,14,3047,west_welmwood,3047 -3,2,879,great,7,5149,east_elmwood,5149 -1,1,700,good,11,2079,west_welmwood,2079 -2,1,564,poor,63,2512,northwest,2295.968 -3,2,1148,great,4,5468,east_elmwood,5468 -0,1,542,poor,49,1420,northwest,1337.64 -2,1,668,good,0,3240,west_welmwood,3240 -2,1,813,great,4,3921,east_elmwood,3921 -3,2,1034,good,10,4782,west_welmwood,4782 -1,1,311,great,13,2185,east_elmwood,2185 -3,2,846,poor,62,4055,northwest,3714.38 -3,2,859,poor,16,4525,northwest,4525 -2,1,672,good,7,3244,west_welmwood,3244 -2,1,695,good,6,3190,west_welmwood,3190 -3,2,1025,good,12,4775,west_welmwood,4775 -0,1,519,poor,64,1270,northwest,1158.24 -2,1,779,poor,40,3022,northwest,2901.12 -3,2,1054,great,4,5342,east_elmwood,5342 -0,1,400,poor,29,1555,northwest,1527.01 -2,1,717,poor,28,3041,northwest,2992.344 -3,2,1163,poor,28,4763,northwest,4686.792 -0,1,408,great,14,2128,east_elmwood,2128 -3,2,1190,great,3,5475,east_elmwood,5475 -2,1,615,poor,41,2806,northwest,2688.148 -2,1,660,good,0,3220,west_welmwood,3220 -1,1,667,poor,22,1927,northwest,1919.292 -1,1,693,good,3,2129,west_welmwood,2129 -0,1,458,poor,36,1464,northwest,1417.152 -0,1,276,good,2,1638,west_welmwood,1638 -1,1,702,poor,44,1787,northwest,1701.224 -0,1,206,good,7,1541,west_welmwood,1541 -2,1,762,good,7,3317,west_welmwood,3317 -1,1,369,poor,26,1559,northwest,1540.292 -3,2,1164,good,11,4944,west_welmwood,4944 -2,1,858,good,9,3374,west_welmwood,3374 -1,1,509,good,11,1919,west_welmwood,1919 -0,1,344,good,14,1554,west_welmwood,1554 -2,1,509,great,6,3567,east_elmwood,3567 -2,1,860,poor,28,3223,northwest,3171.432 -2,1,699,good,8,3249,west_welmwood,3249 -0,1,541,good,14,1833,west_welmwood,1833 -2,1,898,good,3,3457,west_welmwood,3457 -0,1,391,good,13,1618,west_welmwood,1618 -0,1,516,great,13,2272,east_elmwood,2272 -2,1,546,poor,57,2606,northwest,2413.156 -3,2,898,good,3,4767,west_welmwood,4767 -2,1,895,good,0,3476,west_welmwood,3476 -2,1,877,poor,38,3084,northwest,2972.976 -1,1,706,good,10,2135,west_welmwood,2135 -0,1,542,poor,35,1597,northwest,1549.09 -3,2,1204,poor,54,4545,northwest,4235.94 -2,1,582,poor,48,2710,northwest,2558.24 -3,2,968,good,14,4679,west_welmwood,4679 -2,1,586,good,9,3106,west_welmwood,3106 -1,1,598,poor,28,1818,northwest,1788.912 -1,1,391,great,8,2328,east_elmwood,2328 -2,1,643,good,8,3195,west_welmwood,3195 -3,2,1036,good,3,4890,west_welmwood,4890 -3,2,867,poor,35,4388,northwest,4256.36 -3,2,1096,good,1,4971,west_welmwood,4971 -3,2,815,poor,21,4469,northwest,4460.062 -0,1,295,good,10,1553,west_welmwood,1553 -2,1,902,good,4,3453,west_welmwood,3453 -2,1,749,good,6,3240,west_welmwood,3240 -3,2,847,poor,38,4307,northwest,4151.948 -3,2,1170,poor,38,4655,northwest,4487.42 -1,1,346,good,12,1716,west_welmwood,1716 -2,1,566,good,6,3093,west_welmwood,3093 -2,1,768,poor,50,2823,northwest,2653.62 -1,1,616,poor,38,1727,northwest,1664.828 -2,1,501,good,4,3044,west_welmwood,3044 -2,1,538,poor,22,2927,northwest,2915.292 -3,2,912,poor,28,4463,northwest,4391.592 -2,1,683,poor,33,2923,northwest,2847.002 -3,2,1205,great,7,5532,east_elmwood,5532 -2,1,837,good,0,3457,west_welmwood,3457 -2,1,702,good,14,3190,west_welmwood,3190 -0,1,136,great,2,2008,east_elmwood,2008 -3,2,1075,poor,21,4690,northwest,4680.62 -1,1,432,good,10,1852,west_welmwood,1852 -3,2,941,good,11,4695,west_welmwood,4695 -0,1,220,great,0,2076,east_elmwood,2076 -1,1,671,poor,45,1699,northwest,1614.05 -2,1,682,good,10,3206,west_welmwood,3206 -1,1,700,good,1,2171,west_welmwood,2171 -1,1,411,great,14,2306,east_elmwood,2306 -2,1,552,poor,43,2729,northwest,2603.466 -0,1,497,poor,57,1371,northwest,1269.546 -3,2,951,good,3,4729,west_welmwood,4729 -0,1,430,great,12,2217,east_elmwood,2217 -3,2,1220,poor,32,4714,northwest,4600.864 -1,1,430,good,0,1928,west_welmwood,1928 -3,2,837,good,6,4594,west_welmwood,4594 -0,1,435,good,11,1681,west_welmwood,1681 -3,2,847,great,11,5051,east_elmwood,5051 -0,1,396,good,12,1708,west_welmwood,1708 -3,2,1119,good,6,4922,west_welmwood,4922 -0,1,386,poor,54,1268,northwest,1181.776 -3,2,1208,good,2,5004,west_welmwood,5004 -0,1,240,good,8,1609,west_welmwood,1609 -1,1,323,good,14,1697,west_welmwood,1697 -2,1,876,great,6,3866,east_elmwood,3866 -2,1,653,great,0,3801,east_elmwood,3801 -2,1,579,good,11,3039,west_welmwood,3039 -1,1,335,poor,24,1635,northwest,1621.92 -3,2,898,poor,62,4129,northwest,3782.164 -1,1,593,good,0,2059,west_welmwood,2059 -1,1,392,great,7,2282,east_elmwood,2282 -1,1,374,good,12,1708,west_welmwood,1708 -1,1,725,great,7,2672,east_elmwood,2672 -3,2,831,poor,27,4424,northwest,4362.064 -2,1,835,good,1,3378,west_welmwood,3378 -0,1,233,good,11,1506,west_welmwood,1506 -2,1,830,great,7,3879,east_elmwood,3879 -1,1,645,good,13,1976,west_welmwood,1976 -3,2,831,poor,36,4354,northwest,4214.672 -1,1,478,good,1,2009,west_welmwood,2009 -2,1,790,good,11,3300,west_welmwood,3300 -0,1,472,poor,27,1650,northwest,1626.9 -3,2,1200,good,11,4941,west_welmwood,4941 -2,1,805,poor,48,2897,northwest,2734.768 -1,1,561,good,6,1990,west_welmwood,1990 -2,1,857,good,8,3357,west_welmwood,3357 -0,1,126,poor,26,1275,northwest,1259.7 -0,1,412,great,9,2197,east_elmwood,2197 -2,1,911,poor,43,3085,northwest,2943.09 -0,1,398,good,10,1740,west_welmwood,1740 -0,1,459,poor,18,1671,northwest,1671 -1,1,407,good,1,1858,west_welmwood,1858 -3,2,928,good,6,4700,west_welmwood,4700 -0,1,232,good,3,1585,west_welmwood,1585 -2,1,703,good,0,3336,west_welmwood,3336 -1,1,628,great,11,2469,east_elmwood,2469 -1,1,643,poor,59,1602,northwest,1477.044 -1,1,359,poor,48,1380,northwest,1302.72 -2,1,755,good,8,3278,west_welmwood,3278 -2,1,613,good,5,3121,west_welmwood,3121 -3,2,1009,good,1,4829,west_welmwood,4829 -0,1,306,great,13,2099,east_elmwood,2099 -3,2,828,poor,57,4157,northwest,3849.382 -3,2,1245,poor,26,4824,northwest,4766.112 -3,2,859,good,9,4654,west_welmwood,4654 -3,2,1196,great,5,5537,east_elmwood,5537 -2,1,698,good,3,3229,west_welmwood,3229 -0,1,162,good,1,1533,west_welmwood,1533 -0,1,417,great,8,2254,east_elmwood,2254 -0,1,247,good,10,1578,west_welmwood,1578 -3,2,959,good,0,4847,west_welmwood,4847 -1,1,740,good,9,2101,west_welmwood,2101 -0,1,493,good,5,1802,west_welmwood,1802 -3,2,878,good,11,4572,west_welmwood,4572 -1,1,612,good,11,2031,west_welmwood,2031 -2,1,805,poor,40,2978,northwest,2858.88 -0,1,139,good,11,1445,west_welmwood,1445 -2,1,875,poor,26,3194,northwest,3155.672 -2,1,658,good,11,3156,west_welmwood,3156 -1,1,603,good,12,1962,west_welmwood,1962 -0,1,394,poor,61,1148,northwest,1053.864 -2,1,648,great,8,3707,east_elmwood,3707 -3,2,1038,great,11,5299,east_elmwood,5299 -0,1,249,good,8,1528,west_welmwood,1528 -1,1,362,poor,25,1654,northwest,1637.46 -2,1,622,good,0,3257,west_welmwood,3257 -2,1,724,good,13,3208,west_welmwood,3208 -0,1,186,poor,57,1011,northwest,936.186 -2,1,599,poor,22,2933,northwest,2921.268 -3,2,1248,good,10,4948,west_welmwood,4948 -0,1,272,good,9,1631,west_welmwood,1631 -2,1,772,great,9,3796,east_elmwood,3796 -1,1,427,good,0,1917,west_welmwood,1917 -3,2,1189,poor,49,4509,northwest,4247.478 -2,1,590,poor,59,2598,northwest,2395.356 -3,2,925,good,2,4795,west_welmwood,4795 -3,2,1074,poor,43,4525,northwest,4316.85 -2,1,559,good,1,3103,west_welmwood,3103 -3,2,931,poor,34,4416,northwest,4292.352 -1,1,584,poor,51,1585,northwest,1486.73 -3,2,879,good,3,4719,west_welmwood,4719 -1,1,417,poor,58,1377,northwest,1272.348 -2,1,800,poor,18,3177,northwest,3177 -2,1,507,poor,25,2849,northwest,2820.51 -1,1,351,good,14,1729,west_welmwood,1729 -3,2,1062,poor,28,4679,northwest,4604.136 -3,2,937,good,14,4598,west_welmwood,4598 -2,1,921,poor,23,3296,northwest,3276.224 -2,1,942,good,7,3441,west_welmwood,3441 -1,1,701,great,2,2699,east_elmwood,2699 -2,1,922,good,1,3538,west_welmwood,3538 -3,2,826,good,3,4688,west_welmwood,4688 -3,2,1147,good,0,5021,west_welmwood,5021 -1,1,602,poor,63,1431,northwest,1307.934 -2,1,814,poor,32,3072,northwest,2998.272 -2,1,875,good,12,3316,west_welmwood,3316 -1,1,365,poor,47,1373,northwest,1298.858 -0,1,263,good,9,1580,west_welmwood,1580 -0,1,371,poor,40,1354,northwest,1299.84 -2,1,877,poor,27,3169,northwest,3124.634 -2,1,606,good,12,3071,west_welmwood,3071 -0,1,316,great,12,2103,east_elmwood,2103 -0,1,487,good,10,1813,west_welmwood,1813 -3,2,1169,poor,26,4764,northwest,4706.832 -3,2,1233,good,6,5054,west_welmwood,5054 -3,2,1138,great,3,5414,east_elmwood,5414 -3,2,976,great,11,5255,east_elmwood,5255 -0,1,364,good,10,1615,west_welmwood,1615 -2,1,837,poor,30,3098,northwest,3036.04 -3,2,992,poor,39,4469,northwest,4299.178 -3,2,1151,great,13,5398,east_elmwood,5398 -0,1,528,good,12,1764,west_welmwood,1764 -1,1,453,good,12,1853,west_welmwood,1853 -2,1,578,good,14,3081,west_welmwood,3081 -2,1,910,great,11,3917,east_elmwood,3917 -3,2,1073,good,8,4819,west_welmwood,4819 -1,1,736,great,10,2649,east_elmwood,2649 -2,1,780,good,3,3329,west_welmwood,3329 -0,1,360,great,3,2266,east_elmwood,2266 -0,1,480,poor,21,1694,northwest,1690.612 -2,1,878,poor,59,2889,northwest,2663.658 -0,1,113,great,10,1927,east_elmwood,1927 -3,2,1161,great,3,5449,east_elmwood,5449 -3,2,937,poor,25,4565,northwest,4519.35 -1,1,581,good,10,1986,west_welmwood,1986 -3,2,1204,good,2,5041,west_welmwood,5041 -3,2,947,poor,35,4427,northwest,4294.19 -2,1,536,good,0,3093,west_welmwood,3093 -0,1,385,good,2,1778,west_welmwood,1778 -1,1,533,poor,36,1672,northwest,1618.496 -0,1,115,good,10,1402,west_welmwood,1402 -1,1,416,great,0,2413,east_elmwood,2413 -2,1,566,good,14,3041,west_welmwood,3041 -3,2,979,great,6,5257,east_elmwood,5257 -3,2,1152,good,4,4987,west_welmwood,4987 -1,1,384,good,11,1760,west_welmwood,1760 -2,1,541,good,11,3063,west_welmwood,3063 -2,1,799,poor,18,3174,northwest,3174 -0,1,535,poor,22,1701,northwest,1694.196 -0,1,210,good,3,1602,west_welmwood,1602 -3,2,912,poor,51,4207,northwest,3946.166 -3,2,1221,poor,40,4713,northwest,4524.48 -3,2,1091,good,1,4978,west_welmwood,4978 -3,2,1207,good,5,5055,west_welmwood,5055 -0,1,372,poor,31,1499,northwest,1466.022 -1,1,537,poor,33,1754,northwest,1708.396 -3,2,806,great,9,5111,east_elmwood,5111 -3,2,1062,great,12,5306,east_elmwood,5306 -1,1,695,good,8,2111,west_welmwood,2111 -0,1,531,great,4,2403,east_elmwood,2403 -1,1,693,poor,31,1909,northwest,1867.002 -1,1,732,good,3,2206,west_welmwood,2206 -1,1,664,poor,43,1744,northwest,1663.776 -3,2,998,great,5,5277,east_elmwood,5277 -2,1,657,poor,62,2598,northwest,2379.768 -3,2,1248,poor,40,4677,northwest,4489.92 -3,2,1112,great,13,5334,east_elmwood,5334 -1,1,451,good,8,1899,west_welmwood,1899 -2,1,562,good,2,3189,west_welmwood,3189 -1,1,671,great,5,2596,east_elmwood,2596 -3,2,1103,good,10,4852,west_welmwood,4852 -0,1,398,poor,62,1182,northwest,1082.712 -1,1,474,poor,16,1794,northwest,1794 -1,1,622,great,5,2566,east_elmwood,2566 -2,1,543,good,13,2991,west_welmwood,2991 -0,1,540,poor,15,1812,northwest,1812 -2,1,671,poor,61,2631,northwest,2415.258 -3,2,1171,poor,57,4437,northwest,4108.662 -0,1,316,great,4,2156,east_elmwood,2156 -3,2,1026,poor,48,4374,northwest,4129.056 -1,1,407,good,3,1898,west_welmwood,1898 -0,1,227,good,14,1462,west_welmwood,1462 -1,1,429,good,8,1866,west_welmwood,1866 -1,1,707,great,0,2752,east_elmwood,2752 -0,1,533,great,10,2371,east_elmwood,2371 -3,2,1106,poor,46,4483,northwest,4249.884 -0,1,414,great,5,2298,east_elmwood,2298 -2,1,745,poor,55,2828,northwest,2630.04 -0,1,334,good,5,1706,west_welmwood,1706 -3,2,1102,poor,32,4675,northwest,4562.8 -0,1,309,poor,43,1241,northwest,1183.914 -0,1,428,good,0,1851,west_welmwood,1851 -2,1,655,great,7,3697,east_elmwood,3697 -2,1,826,poor,17,3240,northwest,3240 -3,2,832,poor,63,4013,northwest,3667.882 -0,1,227,great,12,1977,east_elmwood,1977 -0,1,351,great,4,2251,east_elmwood,2251 -3,2,844,poor,57,4131,northwest,3825.306 -2,1,688,poor,41,2883,northwest,2761.914 -2,1,523,good,14,2998,west_welmwood,2998 -0,1,546,poor,30,1620,northwest,1587.6 -0,1,428,good,1,1865,west_welmwood,1865 -1,1,642,poor,49,1622,northwest,1527.924 -0,1,156,good,4,1474,west_welmwood,1474 -2,1,578,poor,30,2845,northwest,2788.1 -3,2,940,great,12,5177,east_elmwood,5177 -2,1,748,great,3,3813,east_elmwood,3813 -2,1,699,poor,32,2936,northwest,2865.536 -0,1,349,poor,50,1230,northwest,1156.2 -2,1,849,good,5,3390,west_welmwood,3390 -1,1,374,good,5,1850,west_welmwood,1850 -2,1,611,good,14,3061,west_welmwood,3061 -1,1,497,poor,52,1511,northwest,1414.296 -0,1,356,good,3,1703,west_welmwood,1703 -0,1,308,great,11,2137,east_elmwood,2137 -3,2,983,good,3,4837,west_welmwood,4837 -2,1,549,poor,51,2612,northwest,2450.056 -3,2,871,good,5,4711,west_welmwood,4711 -2,1,788,good,10,3313,west_welmwood,3313 -1,1,465,good,6,1881,west_welmwood,1881 -0,1,259,great,0,2137,east_elmwood,2137 -0,1,312,good,7,1612,west_welmwood,1612 -2,1,691,great,5,3696,east_elmwood,3696 -0,1,471,great,5,2353,east_elmwood,2353 -2,1,869,great,8,3876,east_elmwood,3876 -0,1,387,poor,56,1247,northwest,1157.216 -2,1,683,poor,38,2930,northwest,2824.52 -0,1,440,poor,29,1533,northwest,1505.406 -3,2,827,poor,48,4191,northwest,3956.304 -0,1,157,good,3,1487,west_welmwood,1487 -3,2,937,poor,64,4110,northwest,3748.32 -3,2,1037,poor,16,4721,northwest,4721 -2,1,918,poor,36,3175,northwest,3073.4 -2,1,543,good,0,3101,west_welmwood,3101 -1,1,663,good,1,2133,west_welmwood,2133 -1,1,529,good,0,2056,west_welmwood,2056 -0,1,446,great,5,2250,east_elmwood,2250 -3,2,972,good,12,4670,west_welmwood,4670 -2,1,509,great,1,3624,east_elmwood,3624 -0,1,382,poor,22,1597,northwest,1590.612 -3,2,1114,good,6,4892,west_welmwood,4892 -1,1,444,good,1,1948,west_welmwood,1948 -3,2,1019,great,12,5206,east_elmwood,5206 -2,1,936,great,12,3913,east_elmwood,3913 -0,1,319,poor,42,1261,northwest,1205.516 -0,1,352,good,9,1619,west_welmwood,1619 -3,2,1042,good,0,4915,west_welmwood,4915 -2,1,758,good,6,3271,west_welmwood,3271 -3,2,1156,good,4,5003,west_welmwood,5003 -3,2,849,good,1,4734,west_welmwood,4734 -1,1,350,great,10,2257,east_elmwood,2257 -2,1,793,good,6,3298,west_welmwood,3298 -1,1,650,good,14,2034,west_welmwood,2034 -1,1,526,great,8,2423,east_elmwood,2423 -3,2,857,good,2,4709,west_welmwood,4709 -1,1,520,good,5,1945,west_welmwood,1945 -1,1,508,great,6,2417,east_elmwood,2417 -3,2,1158,poor,53,4455,northwest,4160.97 -2,1,835,good,5,3383,west_welmwood,3383 -2,1,851,good,10,3310,west_welmwood,3310 -2,1,784,good,5,3287,west_welmwood,3287 -3,2,852,good,13,4550,west_welmwood,4550 -1,1,508,great,5,2484,east_elmwood,2484 -2,1,527,good,11,2990,west_welmwood,2990 -2,1,715,poor,64,2685,northwest,2448.72 -0,1,134,good,7,1513,west_welmwood,1513 -0,1,541,poor,22,1764,northwest,1756.944 -0,1,501,poor,41,1444,northwest,1383.352 -1,1,622,poor,56,1538,northwest,1427.264 -0,1,401,poor,40,1373,northwest,1318.08 -3,2,1167,poor,50,4535,northwest,4262.9 -0,1,196,poor,45,1133,northwest,1076.35 -3,2,916,good,7,4706,west_welmwood,4706 -1,1,536,good,2,1978,west_welmwood,1978 -0,1,174,good,12,1424,west_welmwood,1424 -3,2,1113,good,7,4869,west_welmwood,4869 -1,1,377,poor,62,1266,northwest,1159.656 -0,1,270,great,2,2134,east_elmwood,2134 -3,2,879,good,5,4670,west_welmwood,4670 -3,2,875,poor,22,4490,northwest,4472.04 -1,1,675,good,2,2189,west_welmwood,2189 -3,2,907,poor,25,4498,northwest,4453.02 -0,1,300,poor,35,1392,northwest,1350.24 -2,1,939,great,11,3895,east_elmwood,3895 -1,1,521,good,1,2004,west_welmwood,2004 -1,1,359,poor,45,1430,northwest,1358.5 -3,2,1246,good,7,4980,west_welmwood,4980 -2,1,530,good,1,3138,west_welmwood,3138 -2,1,501,good,11,2952,west_welmwood,2952 -0,1,307,poor,51,1243,northwest,1165.934 -3,2,1026,good,3,4834,west_welmwood,4834 -2,1,869,good,10,3406,west_welmwood,3406 -0,1,108,poor,45,1025,northwest,973.75 -3,2,1010,good,9,4813,west_welmwood,4813 -0,1,197,good,2,1622,west_welmwood,1622 -0,1,282,poor,38,1311,northwest,1263.804 -3,2,1096,poor,26,4685,northwest,4628.78 -0,1,537,good,12,1817,west_welmwood,1817 -2,1,939,poor,39,3118,northwest,2999.516 -0,1,503,great,7,2313,east_elmwood,2313 -2,1,914,good,12,3432,west_welmwood,3432 -2,1,799,poor,49,2874,northwest,2707.308 -1,1,594,poor,16,1886,northwest,1886 -2,1,911,good,5,3478,west_welmwood,3478 -0,1,286,good,10,1543,west_welmwood,1543 -0,1,433,good,14,1685,west_welmwood,1685 -1,1,392,good,4,1842,west_welmwood,1842 -0,1,222,good,4,1575,west_welmwood,1575 -2,1,939,poor,36,3162,northwest,3060.816 -2,1,507,good,2,3070,west_welmwood,3070 -1,1,410,poor,18,1736,northwest,1736 -0,1,434,poor,21,1636,northwest,1632.728 -1,1,385,good,5,1883,west_welmwood,1883 -2,1,821,poor,46,2942,northwest,2789.016 -1,1,659,good,5,2136,west_welmwood,2136 -3,2,1167,good,7,4938,west_welmwood,4938 -2,1,836,good,9,3335,west_welmwood,3335 -3,2,1236,poor,57,4563,northwest,4225.338 -0,1,456,poor,33,1564,northwest,1523.336 -0,1,421,poor,44,1365,northwest,1299.48 -0,1,461,poor,26,1562,northwest,1543.256 -3,2,1060,good,0,4875,west_welmwood,4875 -0,1,138,good,2,1491,west_welmwood,1491 -1,1,480,good,1,1924,west_welmwood,1924 -0,1,341,poor,17,1559,northwest,1559 -2,1,570,great,1,3670,east_elmwood,3670 -2,1,580,poor,53,2629,northwest,2455.486 -1,1,520,poor,29,1764,northwest,1732.248 -2,1,686,poor,20,3118,northwest,3118 -3,2,1183,great,3,5521,east_elmwood,5521 -0,1,380,good,2,1800,west_welmwood,1800 -2,1,727,poor,23,3098,northwest,3079.412 -3,2,1097,good,0,4910,west_welmwood,4910 -2,1,703,good,5,3255,west_welmwood,3255 -3,2,1046,great,14,5214,east_elmwood,5214 -0,1,336,great,8,2195,east_elmwood,2195 -2,1,500,poor,33,2778,northwest,2705.772 -1,1,578,poor,16,1906,northwest,1906 -1,1,571,great,3,2562,east_elmwood,2562 -2,1,735,good,11,3213,west_welmwood,3213 -0,1,384,poor,35,1394,northwest,1352.18 -0,1,200,great,6,1996,east_elmwood,1996 -2,1,612,good,9,3169,west_welmwood,3169 -1,1,619,good,2,2126,west_welmwood,2126 -2,1,905,poor,24,3282,northwest,3255.744 -0,1,307,good,3,1679,west_welmwood,1679 -3,2,962,good,1,4824,west_welmwood,4824 -1,1,653,good,4,2089,west_welmwood,2089 -2,1,638,good,3,3219,west_welmwood,3219 -2,1,666,good,7,3237,west_welmwood,3237 -0,1,235,poor,40,1186,northwest,1138.56 -0,1,487,poor,39,1483,northwest,1426.646 -3,2,919,great,1,5254,east_elmwood,5254 -0,1,504,great,3,2369,east_elmwood,2369 -0,1,532,great,2,2409,east_elmwood,2409 -0,1,216,good,6,1547,west_welmwood,1547 -2,1,848,poor,44,3015,northwest,2870.28 -3,2,1222,poor,15,4924,northwest,4924 -3,2,1159,poor,56,4404,northwest,4086.912 -2,1,712,great,14,3632,east_elmwood,3632 -0,1,411,poor,44,1322,northwest,1258.544 -3,2,1147,poor,19,4809,northwest,4809 -2,1,652,poor,57,2728,northwest,2526.128 -1,1,504,great,13,2416,east_elmwood,2416 -1,1,619,poor,20,1872,northwest,1872 -1,1,728,good,1,2254,west_welmwood,2254 -2,1,878,poor,47,3018,northwest,2855.028 -1,1,680,good,5,2133,west_welmwood,2133 -1,1,719,poor,41,1822,northwest,1745.476 -2,1,923,good,2,3458,west_welmwood,3458 -1,1,424,poor,41,1513,northwest,1449.454 -1,1,397,poor,26,1640,northwest,1620.32 -0,1,131,poor,37,1111,northwest,1073.226 -1,1,461,good,6,1863,west_welmwood,1863 -2,1,835,poor,38,3083,northwest,2972.012 -0,1,320,poor,36,1311,northwest,1269.048 -0,1,415,good,4,1814,west_welmwood,1814 -3,2,1034,good,13,4729,west_welmwood,4729 -2,1,786,poor,28,3144,northwest,3093.696 -3,2,807,poor,59,4091,northwest,3771.902 -2,1,724,good,14,3149,west_welmwood,3149 -2,1,512,good,6,3098,west_welmwood,3098 -2,1,753,great,12,3737,east_elmwood,3737 -1,1,521,good,5,1944,west_welmwood,1944 -0,1,147,good,13,1369,west_welmwood,1369 -1,1,627,poor,55,1610,northwest,1497.3 -0,1,174,good,8,1495,west_welmwood,1495 -1,1,590,good,9,1972,west_welmwood,1972 -1,1,655,good,1,2138,west_welmwood,2138 -0,1,506,poor,26,1601,northwest,1581.788 -3,2,1084,poor,16,4724,northwest,4724 diff --git a/docker/db_images/postgres/sql-scripts/prepare.sql b/docker/db_images/postgres/sql-scripts/prepare.sql deleted file mode 100644 index 16ac31c7b48..00000000000 --- a/docker/db_images/postgres/sql-scripts/prepare.sql +++ /dev/null @@ -1,12 +0,0 @@ -CREATE TABLE rentals ( -number_of_rooms INT, -number_of_bathrooms INT, -sqft varchar(25), -location varchar(25), -days_on_market INT, -initial_price FLOAT, -neighborhood varchar(25), -rental_price FLOAT -); - -COPY rentals FROM '/home_rentals.csv' DELIMITER ',' CSV HEADER; diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl deleted file mode 100644 index 852a82f7839..00000000000 --- a/docker/docker-bake.hcl +++ /dev/null @@ -1,141 +0,0 @@ -# The default targets to be built if none are specified -group "default" { - targets = ["bare", "devel", "cloud", "cloud-cpu"] -} - -variable "PUSH_TO_DOCKERHUB" { - default = false -} -variable "IMAGE" { - default = "mindsdb" -} -# This is a semver for releases but otherwise is a github sha -variable "VERSION" { - default = "unknown" -} -variable "PLATFORMS" { - default = "linux/amd64,linux/arm64" -} -variable PLATFORM_LIST { - default = split(",", PLATFORMS) -} -variable "BRANCH" { - default = "main" -} -variable "ECR_REPO" { - default = "168681354662.dkr.ecr.us-east-1.amazonaws.com" -} -variable "PUSH_CACHE" { - default = true -} -variable "CACHE_ONLY" { - default = false -} -variable "PRERELEASE" { - default = can(regex("v?[0-9]+\\.[0-9]+\\.[0-9]+(a|b|rc)[0-9]+", VERSION)) -} -variable "RELEASE_CANDIDATE" { - default = can(regex("v?[0-9]+\\.[0-9]+\\.[0-9]+rc[0-9]+", VERSION)) -} - -function "get_cache_to" { - params = [image] - result = PUSH_CACHE ? [ - "type=registry,image-manifest=true,oci-mediatypes=true,mode=max,ref=${ECR_REPO}/${IMAGE}-cache:${replace("${BRANCH}", "/", "-")}-${image}" - ] : [] -} -function "get_cache_from" { - params = [image] - result = flatten([for p in PLATFORM_LIST: - split("\n", < /etc/apt/apt.conf.d/keep-cache -# Install system dependencies, with caching for faster builds -RUN --mount=target=/var/lib/apt,type=cache,sharing=locked \ - --mount=target=/var/cache/apt,type=cache,sharing=locked \ - apt update -qy \ - && apt-get upgrade -qy \ - && apt-get install -qy \ - -o APT::Install-Recommends=false \ - -o APT::Install-Suggests=false \ - freetds-dev freetds-bin libpq5 curl unixodbc unixodbc-dev gnupg # freetds-dev required to build pymssql on arm64 for mssql_handler. Can be removed when we are on python3.11+ - -# Install Microsoft ODBC Driver 18 for SQL Server -# Use Debian 12 (bookworm) repo as it's the latest stable version supported by Microsoft -RUN --mount=target=/var/lib/apt,type=cache,sharing=locked \ - --mount=target=/var/cache/apt,type=cache,sharing=locked \ - curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg \ - && echo "deb [arch=amd64,arm64 signed-by=/usr/share/keyrings/microsoft-prod.gpg] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/mssql-release.list \ - && apt-get update \ - && ACCEPT_EULA=Y apt-get install -y msodbcsql18 - -# Use a specific tag so the file doesn't change -COPY --from=ghcr.io/astral-sh/uv:0.8.11 /uv /usr/local/bin/uv -# Copy requirements files from the first stage -COPY --from=deps /mindsdb . - -# - Silence uv complaining about not being able to use hard links, -# - prevent uv from accidentally downloading isolated Python builds, -# - pick a Python, -# - and finally declare `/mindsdb` as the target dir. -ENV UV_LINK_MODE=copy \ - UV_PYTHON_DOWNLOADS=never \ - UV_PYTHON=python3.10.20 \ - UV_PROJECT_ENVIRONMENT=/mindsdb \ - VIRTUAL_ENV=/venv \ - PATH=/venv/bin:$PATH - -# Install all requirements for mindsdb and all the default handlers -# Installs everything into a venv in /mindsdb so that everything is isolated -RUN --mount=type=cache,target=/root/.cache \ - uv venv /venv \ - && uv pip install pip "." - - - - -FROM build AS extras - -# Apply latest security patches so the final image picks up fixes -# even when the build stage layers are cached -RUN --mount=target=/var/lib/apt,type=cache,sharing=locked \ - --mount=target=/var/cache/apt,type=cache,sharing=locked \ - apt-get update -qy && apt-get upgrade -qy - -ARG EXTRAS -# Install extras on top of the bare mindsdb -# The torch index is provided for "-cpu" images which install the cpu-only version of torch -RUN --mount=type=cache,target=/root/.cache \ - if [ -n "$EXTRAS" ]; then uv pip install --index-strategy unsafe-first-match --index https://pypi.org/simple --index https://download.pytorch.org/whl/ $EXTRAS; fi - -# Copy all of the mindsdb code over finally -# Here is where we invalidate the cache again if ANY file has changed -COPY . . -# Install the "mindsdb" package now that we have the code for it -RUN --mount=type=cache,target=/root/.cache uv pip install --no-deps "." - -COPY docker/mindsdb_config.release.json /root/mindsdb_config.json - -ENV PYTHONUNBUFFERED=1 -ENV MINDSDB_DOCKER_ENV=1 -ENV VIRTUAL_ENV=/venv -ENV PATH=/venv/bin:$PATH - -EXPOSE 47334/tcp -EXPOSE 47335/tcp - -HEALTHCHECK --interval=30s --timeout=10s --retries=5 --start-period=60s CMD curl -fsS "http://localhost:47334/api/status" - -# Pre-load web GUI -RUN python -m mindsdb --config=/root/mindsdb_config.json --update-gui - -# Same as extras image, but with dev dependencies installed. -# This image is used in our docker-compose -FROM extras AS dev -WORKDIR /mindsdb - -# Configure apt to retain downloaded packages so we can store them in a cache mount -RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache -# Install system dependencies, with caching for faster builds -RUN --mount=target=/var/lib/apt,type=cache,sharing=locked \ - --mount=target=/var/cache/apt,type=cache,sharing=locked \ - export DEBIAN_FRONTEND=noninteractive ACCEPT_EULA=Y && \ - apt update -qy \ - && apt-get upgrade -qy \ - && apt-get install -qy \ - -o APT::Install-Recommends=false \ - -o APT::Install-Suggests=false \ - libpq5 freetds-bin curl - -# Install dev requirements and install 'mindsdb' as an editable package -RUN --mount=type=cache,target=/root/.cache uv pip install -r requirements/requirements-dev.txt \ - && uv pip install --no-deps -e "." - -COPY docker/mindsdb_config.release.json /root/mindsdb_config.json - -ENTRYPOINT [ "bash", "-c", "watchfiles --filter python 'python -Im mindsdb --config=/root/mindsdb_config.json --api=http,mysql' mindsdb" ] - - - - -# Make sure the regular image is the default -FROM extras - -ENTRYPOINT [ "bash", "-c", "python -Im mindsdb --config=/root/mindsdb_config.json --api=http,mysql" ] diff --git a/docker/mindsdb_config.release.json b/docker/mindsdb_config.release.json deleted file mode 100644 index 35d9c9ffaa6..00000000000 --- a/docker/mindsdb_config.release.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "config_version": "1.4", - "paths": { - "root": "/root/mdb_storage" - }, - "debug": false, - "integrations": {}, - "api": { - "http": { - "host": "0.0.0.0", - "port": "47334" - }, - "mysql": { - "host": "0.0.0.0", - "password": "", - "port": "47335", - "user": "mindsdb", - "database": "mindsdb", - "ssl": true - } - } -} diff --git a/docker/nginx.conf b/docker/nginx.conf new file mode 100644 index 00000000000..4b5895bd83f --- /dev/null +++ b/docker/nginx.conf @@ -0,0 +1,18 @@ +server { + listen 80; + root /usr/share/nginx/html; + index index.html; + + location /v1/ { + proxy_pass http://api:26866; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_read_timeout 120s; + } + + location / { + try_files $uri $uri/ /index.html; + } +} diff --git a/docker/web.Dockerfile b/docker/web.Dockerfile new file mode 100644 index 00000000000..d07667cb954 --- /dev/null +++ b/docker/web.Dockerfile @@ -0,0 +1,17 @@ +FROM node:22-slim AS builder + +WORKDIR /build +COPY frontend/package.json frontend/package-lock.json ./ +RUN npm ci --ignore-scripts +COPY frontend/ ./ +RUN npm run build:web + +FROM nginx:alpine AS runtime + +LABEL org.opencontainers.image.title="cowork-web" +LABEL org.opencontainers.image.source="https://github.com/mindsdb/minds-platform" + +COPY --from=builder /build/dist/renderer-web/ /usr/share/nginx/html/ +COPY docker/nginx.conf /etc/nginx/conf.d/default.conf + +EXPOSE 80 diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 5dfe70871f6..00000000000 --- a/docs/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# MindsDB Documentation MindsDB Docs - -## Running the docs locally - -First install `mintlify`: - -``` -npm i -g mintlify -``` -Then, start the server: - -``` -mintlify dev -``` - -The documentation website will be available at `http://127.0.0.1:3000` - - - -## How can you help us? [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/mindsdb/mindsdb-docs/issues) - -* [How to Contribute docs](https://docs.mindsdb.com/contribute) -* [Writing Documentation](https://docs.mindsdb.com/contribute/docs) diff --git a/docs/api.html b/docs/api.html new file mode 100644 index 00000000000..4b3a815a5cf --- /dev/null +++ b/docs/api.html @@ -0,0 +1,497 @@ + + + + +API Reference — Minds Cowork Docs + + + + + + + + + + +
+ + +
+
API Reference
+

REST API

+

+ The Cowork server exposes a versioned REST API at /api/v1. + All endpoints return JSON. The interactive docs are available at + /docs when the server is running. +

+ +
+ Base URL + http://localhost:26866/api/v1 +
+ + +
+

Responses

+

Stream AI responses from the agent. Compatible with the OpenAI Responses API shape.

+ +
+
+ POST + /responses + Create a streaming response +
+
+

Sends a message and streams back the agent's response as server-sent events.

+
+
curl
+
curl -X POST http://localhost:26866/api/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{"conversation_id": "...", "input": "Summarise last week'\''s sales data"}'
+
+
+
+ +
+
+ GET + /responses/in-flight-list + List active streams +
+
+ +
+
+ GET + /responses/in-flight + Check a specific stream +
+
+
+ + +
+

Conversations

+

Manage conversation threads. Each conversation holds an ordered list of messages.

+ +
+
+ GET + /conversations + List conversations +
+
+
+
+ POST + /conversations + Create a conversation +
+
+
+
curl
+
curl -X POST http://localhost:26866/api/v1/conversations \
+  -H "Content-Type: application/json" \
+  -d '{"project_id": "...", "title": "Q2 analysis"}'
+
+
+
+
+
+ GET + /conversations/{id} + Get a conversation +
+
+
+
+ PATCH + /conversations/{id} + Update title or metadata +
+
+
+
+ GET + /conversations/{id}/items + Get messages in a conversation +
+
+
+
+ DELETE + /conversations/{id} + Delete a conversation +
+
+
+ + +
+

Projects

+

Projects are workspaces that group conversations, files, and artifacts.

+ +
+
+ GET + /projects + List all projects +
+
+
+
+ POST + /projects + Create a project +
+
+
+
curl
+
curl -X POST http://localhost:26866/api/v1/projects \
+  -H "Content-Type: application/json" \
+  -d '{"name": "Sales Automation", "description": "Weekly reporting flows"}'
+
+
+
+
+
+ PATCH + /projects/{id} + Update a project +
+
+
+
+ DELETE + /projects/{id} + Delete a project +
+
+
+ + +
+

Artifacts

+

Artifacts are the outputs the agent creates — apps, reports, dashboards, documents. Each artifact belongs to a project.

+ +
+
+ GET + /artifacts + List artifacts (optionally filter by project) +
+
+
+
+ GET + /artifacts/preview + Preview an artifact by path +
+
+
+
+ POST + /artifacts/open + Open an artifact in the desktop shell +
+
+
+
+ GET + /artifacts/serve/{project}/{path} + Serve an artifact file +
+
+
+ + +
+

Schedules

+

Run tasks on a cron schedule. Schedules belong to a project and trigger a conversation or workflow automatically.

+ +
+
+ GET + /schedules + List schedules +
+
+
+
+ POST + /schedules + Create a schedule +
+
+
+
curl
+
curl -X POST http://localhost:26866/api/v1/schedules \
+  -H "Content-Type: application/json" \
+  -d '{"project_id": "...", "cron": "0 9 * * 1", "prompt": "Generate weekly sales digest"}'
+
+
+
+
+
+ PATCH + /schedules/{id} + Update a schedule +
+
+
+
+ POST + /schedules/{id}/pause + Pause a schedule +
+
+
+
+ DELETE + /schedules/{id} + Delete a schedule +
+
+
+ + +
+

Files

+

Upload and manage files that the agent can read, search, and reference during conversations.

+ +
+
+ GET + /files + List uploaded files +
+
+
+
+ POST + /files + Upload a file +
+
+
+
+ DELETE + /files/{id} + Delete a file +
+
+
+ + +
+

Connectors

+

Integrate external data sources. Connectors are defined by specs and instantiated as connections with encrypted credentials.

+ +
+
+ GET + /connectors/specs + List available connector specs +
+
+
+
+ GET + /connectors/connections + List active connections +
+
+
+
+ POST + /connectors/connections + Create a connection +
+
+
+
+ GET + /connectors/oauth/{spec}/start + Start OAuth flow for a connector +
+
+
+ + + + +
+
+ + + + + diff --git a/docs/assets/SLBot-Hero-Whizfizz.png b/docs/assets/SLBot-Hero-Whizfizz.png deleted file mode 100644 index a0ed1d3afdf..00000000000 Binary files a/docs/assets/SLBot-Hero-Whizfizz.png and /dev/null differ diff --git a/docs/assets/SLBot-response1.png b/docs/assets/SLBot-response1.png deleted file mode 100644 index 54ee538ff48..00000000000 Binary files a/docs/assets/SLBot-response1.png and /dev/null differ diff --git a/docs/assets/SLBot-response2.png b/docs/assets/SLBot-response2.png deleted file mode 100644 index cb987cd280f..00000000000 Binary files a/docs/assets/SLBot-response2.png and /dev/null differ diff --git a/docs/assets/SLBot-response3.png b/docs/assets/SLBot-response3.png deleted file mode 100644 index 786a3bc730c..00000000000 Binary files a/docs/assets/SLBot-response3.png and /dev/null differ diff --git a/docs/assets/SLBot-response4.png b/docs/assets/SLBot-response4.png deleted file mode 100644 index 67fb418bb58..00000000000 Binary files a/docs/assets/SLBot-response4.png and /dev/null differ diff --git a/docs/assets/ai-integrations.png b/docs/assets/ai-integrations.png deleted file mode 100644 index c6c0c2c29bf..00000000000 Binary files a/docs/assets/ai-integrations.png and /dev/null differ diff --git a/docs/assets/automation.png b/docs/assets/automation.png deleted file mode 100644 index ba3b210a8f7..00000000000 Binary files a/docs/assets/automation.png and /dev/null differ diff --git a/docs/assets/byom_diagram.png b/docs/assets/byom_diagram.png deleted file mode 100644 index 95120aedf57..00000000000 Binary files a/docs/assets/byom_diagram.png and /dev/null differ diff --git a/docs/assets/byom_empty_form.png b/docs/assets/byom_empty_form.png deleted file mode 100644 index fcc9a7e7f85..00000000000 Binary files a/docs/assets/byom_empty_form.png and /dev/null differ diff --git a/docs/assets/byom_form.png b/docs/assets/byom_form.png deleted file mode 100644 index 0c0f6e62233..00000000000 Binary files a/docs/assets/byom_form.png and /dev/null differ diff --git a/docs/assets/byom_upload_custom_model.png b/docs/assets/byom_upload_custom_model.png deleted file mode 100644 index 83dadde703e..00000000000 Binary files a/docs/assets/byom_upload_custom_model.png and /dev/null differ diff --git a/docs/assets/chatbot_diagram.png b/docs/assets/chatbot_diagram.png deleted file mode 100644 index 472d11de0e9..00000000000 Binary files a/docs/assets/chatbot_diagram.png and /dev/null differ diff --git a/docs/assets/cloud/gui_query.png b/docs/assets/cloud/gui_query.png deleted file mode 100644 index bec1d41134f..00000000000 Binary files a/docs/assets/cloud/gui_query.png and /dev/null differ diff --git a/docs/assets/cloud/main_mdb.png b/docs/assets/cloud/main_mdb.png deleted file mode 100644 index c58471981eb..00000000000 Binary files a/docs/assets/cloud/main_mdb.png and /dev/null differ diff --git a/docs/assets/connect_tableau.png b/docs/assets/connect_tableau.png deleted file mode 100644 index 65fc3563b3c..00000000000 Binary files a/docs/assets/connect_tableau.png and /dev/null differ diff --git a/docs/assets/connect_tableau_2.png b/docs/assets/connect_tableau_2.png deleted file mode 100644 index bdcd1442d96..00000000000 Binary files a/docs/assets/connect_tableau_2.png and /dev/null differ diff --git a/docs/assets/connect_tableau_3.png b/docs/assets/connect_tableau_3.png deleted file mode 100644 index b657e22584b..00000000000 Binary files a/docs/assets/connect_tableau_3.png and /dev/null differ diff --git a/docs/assets/connect_tableau_4.png b/docs/assets/connect_tableau_4.png deleted file mode 100644 index 6b9c5bc86dc..00000000000 Binary files a/docs/assets/connect_tableau_4.png and /dev/null differ diff --git a/docs/assets/connect_tableau_5.png b/docs/assets/connect_tableau_5.png deleted file mode 100644 index bbad251e02d..00000000000 Binary files a/docs/assets/connect_tableau_5.png and /dev/null differ diff --git a/docs/assets/connect_tableau_6.png b/docs/assets/connect_tableau_6.png deleted file mode 100644 index 37ae544ee78..00000000000 Binary files a/docs/assets/connect_tableau_6.png and /dev/null differ diff --git a/docs/assets/connect_tableau_7.png b/docs/assets/connect_tableau_7.png deleted file mode 100644 index 4780aa03b1e..00000000000 Binary files a/docs/assets/connect_tableau_7.png and /dev/null differ diff --git a/docs/assets/docker/docker_desktop/containers-running-extension.png b/docs/assets/docker/docker_desktop/containers-running-extension.png deleted file mode 100644 index 4ec19256c47..00000000000 Binary files a/docs/assets/docker/docker_desktop/containers-running-extension.png and /dev/null differ diff --git a/docs/assets/docker/docker_desktop/enable-extension-containers.png b/docs/assets/docker/docker_desktop/enable-extension-containers.png deleted file mode 100644 index 17e800072b4..00000000000 Binary files a/docs/assets/docker/docker_desktop/enable-extension-containers.png and /dev/null differ diff --git a/docs/assets/docker/docker_desktop/enable-win-dev-mode.png b/docs/assets/docker/docker_desktop/enable-win-dev-mode.png deleted file mode 100644 index 0c81a60a633..00000000000 Binary files a/docs/assets/docker/docker_desktop/enable-win-dev-mode.png and /dev/null differ diff --git a/docs/assets/docker/docker_desktop/mindsdb-container-logs.png b/docs/assets/docker/docker_desktop/mindsdb-container-logs.png deleted file mode 100644 index 759456b50cd..00000000000 Binary files a/docs/assets/docker/docker_desktop/mindsdb-container-logs.png and /dev/null differ diff --git a/docs/assets/docker/docker_desktop/mindsdb_docker_desktop.png b/docs/assets/docker/docker_desktop/mindsdb_docker_desktop.png deleted file mode 100644 index dcad7339d67..00000000000 Binary files a/docs/assets/docker/docker_desktop/mindsdb_docker_desktop.png and /dev/null differ diff --git a/docs/assets/docker/docker_desktop/pull-latest-image.png b/docs/assets/docker/docker_desktop/pull-latest-image.png deleted file mode 100644 index 5454057aa95..00000000000 Binary files a/docs/assets/docker/docker_desktop/pull-latest-image.png and /dev/null differ diff --git a/docs/assets/faqs_download.csv.png b/docs/assets/faqs_download.csv.png deleted file mode 100644 index a821cc663b0..00000000000 Binary files a/docs/assets/faqs_download.csv.png and /dev/null differ diff --git a/docs/assets/files/upload_file.png b/docs/assets/files/upload_file.png deleted file mode 100644 index 2ec12adc4f5..00000000000 Binary files a/docs/assets/files/upload_file.png and /dev/null differ diff --git a/docs/assets/files/upload_file_from_computer.png b/docs/assets/files/upload_file_from_computer.png deleted file mode 100644 index c727c9cb2f7..00000000000 Binary files a/docs/assets/files/upload_file_from_computer.png and /dev/null differ diff --git a/docs/assets/files/upload_file_from_url.png b/docs/assets/files/upload_file_from_url.png deleted file mode 100644 index b595bc444cc..00000000000 Binary files a/docs/assets/files/upload_file_from_url.png and /dev/null differ diff --git a/docs/assets/install-dependencies-gui.png b/docs/assets/install-dependencies-gui.png deleted file mode 100644 index cbed8cb5745..00000000000 Binary files a/docs/assets/install-dependencies-gui.png and /dev/null differ diff --git a/docs/assets/integrations/Arjuna.png b/docs/assets/integrations/Arjuna.png deleted file mode 100644 index f56b9a7e057..00000000000 Binary files a/docs/assets/integrations/Arjuna.png and /dev/null differ diff --git a/docs/assets/jssdk_install_output.png b/docs/assets/jssdk_install_output.png deleted file mode 100644 index d7ae8a80d04..00000000000 Binary files a/docs/assets/jssdk_install_output.png and /dev/null differ diff --git a/docs/assets/kb_data_insertion.png b/docs/assets/kb_data_insertion.png deleted file mode 100644 index fe55364b323..00000000000 Binary files a/docs/assets/kb_data_insertion.png and /dev/null differ diff --git a/docs/assets/kb_hybrid_search.jpg b/docs/assets/kb_hybrid_search.jpg deleted file mode 100644 index 9055e5a8872..00000000000 Binary files a/docs/assets/kb_hybrid_search.jpg and /dev/null differ diff --git a/docs/assets/mcp.png b/docs/assets/mcp.png deleted file mode 100644 index 11645bf12c9..00000000000 Binary files a/docs/assets/mcp.png and /dev/null differ diff --git a/docs/assets/mcp_cursor_chat.png b/docs/assets/mcp_cursor_chat.png deleted file mode 100644 index 3538218bd81..00000000000 Binary files a/docs/assets/mcp_cursor_chat.png and /dev/null differ diff --git a/docs/assets/mcp_cursor_chat_mode.png b/docs/assets/mcp_cursor_chat_mode.png deleted file mode 100644 index 578dbdaf426..00000000000 Binary files a/docs/assets/mcp_cursor_chat_mode.png and /dev/null differ diff --git a/docs/assets/mcp_cursor_chat_tool.png b/docs/assets/mcp_cursor_chat_tool.png deleted file mode 100644 index 04bc299a56e..00000000000 Binary files a/docs/assets/mcp_cursor_chat_tool.png and /dev/null differ diff --git a/docs/assets/mcp_cursor_mcp_server.png b/docs/assets/mcp_cursor_mcp_server.png deleted file mode 100644 index 850f6e608ff..00000000000 Binary files a/docs/assets/mcp_cursor_mcp_server.png and /dev/null differ diff --git a/docs/assets/mcp_cursor_settings.png b/docs/assets/mcp_cursor_settings.png deleted file mode 100644 index 8d55b5bcd94..00000000000 Binary files a/docs/assets/mcp_cursor_settings.png and /dev/null differ diff --git a/docs/assets/metabase_add_database.png b/docs/assets/metabase_add_database.png deleted file mode 100644 index b6d456b2655..00000000000 Binary files a/docs/assets/metabase_add_database.png and /dev/null differ diff --git a/docs/assets/metabase_connected.png b/docs/assets/metabase_connected.png deleted file mode 100644 index 683ee859892..00000000000 Binary files a/docs/assets/metabase_connected.png and /dev/null differ diff --git a/docs/assets/metabase_run_query_failure.png b/docs/assets/metabase_run_query_failure.png deleted file mode 100644 index 04e9598858a..00000000000 Binary files a/docs/assets/metabase_run_query_failure.png and /dev/null differ diff --git a/docs/assets/metabase_run_query_home_rentals.png b/docs/assets/metabase_run_query_home_rentals.png deleted file mode 100644 index 075f314f5a7..00000000000 Binary files a/docs/assets/metabase_run_query_home_rentals.png and /dev/null differ diff --git a/docs/assets/metabase_run_query_show_tables.png b/docs/assets/metabase_run_query_show_tables.png deleted file mode 100644 index 3135f4c5425..00000000000 Binary files a/docs/assets/metabase_run_query_show_tables.png and /dev/null differ diff --git a/docs/assets/minds/Dashboard_Minds.png b/docs/assets/minds/Dashboard_Minds.png deleted file mode 100644 index 36488d03c12..00000000000 Binary files a/docs/assets/minds/Dashboard_Minds.png and /dev/null differ diff --git a/docs/assets/minds/DatasourcesConn_Minds.png b/docs/assets/minds/DatasourcesConn_Minds.png deleted file mode 100644 index 3d7abbaa358..00000000000 Binary files a/docs/assets/minds/DatasourcesConn_Minds.png and /dev/null differ diff --git a/docs/assets/minds/DatasourcesTab_Minds.png b/docs/assets/minds/DatasourcesTab_Minds.png deleted file mode 100644 index 3c35f7a3ea1..00000000000 Binary files a/docs/assets/minds/DatasourcesTab_Minds.png and /dev/null differ diff --git a/docs/assets/minds/DatasourcesType_Minds.png b/docs/assets/minds/DatasourcesType_Minds.png deleted file mode 100644 index 13b2ee3ddd0..00000000000 Binary files a/docs/assets/minds/DatasourcesType_Minds.png and /dev/null differ diff --git a/docs/assets/minds/MindChat_Minds.png b/docs/assets/minds/MindChat_Minds.png deleted file mode 100644 index 28e579ba30e..00000000000 Binary files a/docs/assets/minds/MindChat_Minds.png and /dev/null differ diff --git a/docs/assets/minds/MindsTab_Minds.png b/docs/assets/minds/MindsTab_Minds.png deleted file mode 100644 index 87f6ce71ae3..00000000000 Binary files a/docs/assets/minds/MindsTab_Minds.png and /dev/null differ diff --git a/docs/assets/minds/MindsWorkflow.png b/docs/assets/minds/MindsWorkflow.png deleted file mode 100644 index 6926c8b61a4..00000000000 Binary files a/docs/assets/minds/MindsWorkflow.png and /dev/null differ diff --git a/docs/assets/minds/NewMind_Minds.png b/docs/assets/minds/NewMind_Minds.png deleted file mode 100644 index 2ceda04d016..00000000000 Binary files a/docs/assets/minds/NewMind_Minds.png and /dev/null differ diff --git a/docs/assets/minds/Playground_Mind.png b/docs/assets/minds/Playground_Mind.png deleted file mode 100644 index 0b54f895a70..00000000000 Binary files a/docs/assets/minds/Playground_Mind.png and /dev/null differ diff --git a/docs/assets/minds/PreviewData_Minds.png b/docs/assets/minds/PreviewData_Minds.png deleted file mode 100644 index 9660517687b..00000000000 Binary files a/docs/assets/minds/PreviewData_Minds.png and /dev/null differ diff --git a/docs/assets/mindsdb-editor.png b/docs/assets/mindsdb-editor.png deleted file mode 100644 index fca25ac9de6..00000000000 Binary files a/docs/assets/mindsdb-editor.png and /dev/null differ diff --git a/docs/assets/mindsdb-fqe.png b/docs/assets/mindsdb-fqe.png deleted file mode 100644 index fe475bde6e5..00000000000 Binary files a/docs/assets/mindsdb-fqe.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/create_model_1.png b/docs/assets/mindsdb_gui_editor/create_model_1.png deleted file mode 100644 index b779296ae06..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/create_model_1.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/create_model_2.png b/docs/assets/mindsdb_gui_editor/create_model_2.png deleted file mode 100644 index 01acb34f92d..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/create_model_2.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/mindsdb_editor.png b/docs/assets/mindsdb_gui_editor/mindsdb_editor.png deleted file mode 100644 index 415aa97e5f0..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/mindsdb_editor.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/multiple_query_editor.png b/docs/assets/mindsdb_gui_editor/multiple_query_editor.png deleted file mode 100644 index d4491144667..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/multiple_query_editor.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/object_explorer.png b/docs/assets/mindsdb_gui_editor/object_explorer.png deleted file mode 100644 index 063a0103dd9..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/object_explorer.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/object_explorer_query.png b/docs/assets/mindsdb_gui_editor/object_explorer_query.png deleted file mode 100644 index 1f805f68dc2..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/object_explorer_query.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/query_editor.png b/docs/assets/mindsdb_gui_editor/query_editor.png deleted file mode 100644 index fe7b3571d91..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/query_editor.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_editor/results_viewer.png b/docs/assets/mindsdb_gui_editor/results_viewer.png deleted file mode 100644 index 628ec7b8cef..00000000000 Binary files a/docs/assets/mindsdb_gui_editor/results_viewer.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_respond.png b/docs/assets/mindsdb_gui_respond.png deleted file mode 100644 index c4ff5a99bcb..00000000000 Binary files a/docs/assets/mindsdb_gui_respond.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_respond_agents.png b/docs/assets/mindsdb_gui_respond_agents.png deleted file mode 100644 index 4a274da37d1..00000000000 Binary files a/docs/assets/mindsdb_gui_respond_agents.png and /dev/null differ diff --git a/docs/assets/mindsdb_gui_respond_chat.png b/docs/assets/mindsdb_gui_respond_chat.png deleted file mode 100644 index a45422092b0..00000000000 Binary files a/docs/assets/mindsdb_gui_respond_chat.png and /dev/null differ diff --git a/docs/assets/model-management.png b/docs/assets/model-management.png deleted file mode 100644 index 01ad73a1dd5..00000000000 Binary files a/docs/assets/model-management.png and /dev/null differ diff --git a/docs/assets/pythonsdk_install_output.png b/docs/assets/pythonsdk_install_output.png deleted file mode 100644 index b6d59b67891..00000000000 Binary files a/docs/assets/pythonsdk_install_output.png and /dev/null differ diff --git a/docs/assets/sql/data-insights-1.png b/docs/assets/sql/data-insights-1.png deleted file mode 100644 index 4dd107ef74c..00000000000 Binary files a/docs/assets/sql/data-insights-1.png and /dev/null differ diff --git a/docs/assets/sql/data-insights-2.png b/docs/assets/sql/data-insights-2.png deleted file mode 100644 index 072209b3fb4..00000000000 Binary files a/docs/assets/sql/data-insights-2.png and /dev/null differ diff --git a/docs/assets/sql/data-insights-3.png b/docs/assets/sql/data-insights-3.png deleted file mode 100644 index f5d1336c6f5..00000000000 Binary files a/docs/assets/sql/data-insights-3.png and /dev/null differ diff --git a/docs/assets/sql/data-insights-4.png b/docs/assets/sql/data-insights-4.png deleted file mode 100644 index 9949fdc177f..00000000000 Binary files a/docs/assets/sql/data-insights-4.png and /dev/null differ diff --git a/docs/assets/sql/data-insights-5.png b/docs/assets/sql/data-insights-5.png deleted file mode 100644 index 526e3baa1f5..00000000000 Binary files a/docs/assets/sql/data-insights-5.png and /dev/null differ diff --git a/docs/assets/sql/data-insights-6.png b/docs/assets/sql/data-insights-6.png deleted file mode 100644 index 3bde03e24a3..00000000000 Binary files a/docs/assets/sql/data-insights-6.png and /dev/null differ diff --git a/docs/assets/sql/dbeaver_1.png b/docs/assets/sql/dbeaver_1.png deleted file mode 100644 index f66ab7acf3a..00000000000 Binary files a/docs/assets/sql/dbeaver_1.png and /dev/null differ diff --git a/docs/assets/sql/dbeaver_2.png b/docs/assets/sql/dbeaver_2.png deleted file mode 100644 index b902566a48d..00000000000 Binary files a/docs/assets/sql/dbeaver_2.png and /dev/null differ diff --git a/docs/assets/sql/dbeaver_3.png b/docs/assets/sql/dbeaver_3.png deleted file mode 100644 index 3988c861daf..00000000000 Binary files a/docs/assets/sql/dbeaver_3.png and /dev/null differ diff --git a/docs/assets/sql/dbeaver_4.png b/docs/assets/sql/dbeaver_4.png deleted file mode 100644 index 6d37ac65a5e..00000000000 Binary files a/docs/assets/sql/dbeaver_4.png and /dev/null differ diff --git a/docs/assets/sql/dbeaver_5.png b/docs/assets/sql/dbeaver_5.png deleted file mode 100644 index 17315fa0285..00000000000 Binary files a/docs/assets/sql/dbeaver_5.png and /dev/null differ diff --git a/docs/assets/sql/grafana_1.png b/docs/assets/sql/grafana_1.png deleted file mode 100644 index bd6bb978afc..00000000000 Binary files a/docs/assets/sql/grafana_1.png and /dev/null differ diff --git a/docs/assets/sql/grafana_2.png b/docs/assets/sql/grafana_2.png deleted file mode 100644 index 0439d425b95..00000000000 Binary files a/docs/assets/sql/grafana_2.png and /dev/null differ diff --git a/docs/assets/sql/grafana_3.png b/docs/assets/sql/grafana_3.png deleted file mode 100644 index 8f7e2e1f915..00000000000 Binary files a/docs/assets/sql/grafana_3.png and /dev/null differ diff --git a/docs/assets/sql/grafana_4.png b/docs/assets/sql/grafana_4.png deleted file mode 100644 index 7dbfc3d20ba..00000000000 Binary files a/docs/assets/sql/grafana_4.png and /dev/null differ diff --git a/docs/assets/sql/grafana_5.png b/docs/assets/sql/grafana_5.png deleted file mode 100644 index 8cd3e000f84..00000000000 Binary files a/docs/assets/sql/grafana_5.png and /dev/null differ diff --git a/docs/assets/sql/income_vs_debt.png b/docs/assets/sql/income_vs_debt.png deleted file mode 100644 index 26f024d66ce..00000000000 Binary files a/docs/assets/sql/income_vs_debt.png and /dev/null differ diff --git a/docs/assets/sql/income_vs_debt_known_value.png b/docs/assets/sql/income_vs_debt_known_value.png deleted file mode 100644 index dc5d30aec55..00000000000 Binary files a/docs/assets/sql/income_vs_debt_known_value.png and /dev/null differ diff --git a/docs/assets/sql/income_vs_debt_prediction.png b/docs/assets/sql/income_vs_debt_prediction.png deleted file mode 100644 index 97c68b03b8a..00000000000 Binary files a/docs/assets/sql/income_vs_debt_prediction.png and /dev/null differ diff --git a/docs/assets/sql/income_vs_debt_predictor.png b/docs/assets/sql/income_vs_debt_predictor.png deleted file mode 100644 index 1f313ee2398..00000000000 Binary files a/docs/assets/sql/income_vs_debt_predictor.png and /dev/null differ diff --git a/docs/assets/sql/income_vs_debt_unknown_value.png b/docs/assets/sql/income_vs_debt_unknown_value.png deleted file mode 100644 index f038008570b..00000000000 Binary files a/docs/assets/sql/income_vs_debt_unknown_value.png and /dev/null differ diff --git a/docs/assets/sql/kb_retrieval_example1.png b/docs/assets/sql/kb_retrieval_example1.png deleted file mode 100644 index 2a45988648c..00000000000 Binary files a/docs/assets/sql/kb_retrieval_example1.png and /dev/null differ diff --git a/docs/assets/sql/kb_retrieval_example2.png b/docs/assets/sql/kb_retrieval_example2.png deleted file mode 100644 index 85f209c3cd5..00000000000 Binary files a/docs/assets/sql/kb_retrieval_example2.png and /dev/null differ diff --git a/docs/assets/sql/kb_retrieval_example3.png b/docs/assets/sql/kb_retrieval_example3.png deleted file mode 100644 index ce2d8331937..00000000000 Binary files a/docs/assets/sql/kb_retrieval_example3.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/customer_churn/.gitkeep b/docs/assets/sql/tutorials/customer_churn/.gitkeep deleted file mode 100644 index 8b137891791..00000000000 --- a/docs/assets/sql/tutorials/customer_churn/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/docs/assets/sql/tutorials/generating_images_1.png b/docs/assets/sql/tutorials/generating_images_1.png deleted file mode 100644 index 0546c328dd1..00000000000 Binary files a/docs/assets/sql/tutorials/generating_images_1.png and /dev/null differ diff --git a/docs/assets/sql/tutorials/generating_images_2.png b/docs/assets/sql/tutorials/generating_images_2.png deleted file mode 100644 index 12b55953b52..00000000000 Binary files a/docs/assets/sql/tutorials/generating_images_2.png and /dev/null differ diff --git a/docs/assets/sql/upload_file1.png b/docs/assets/sql/upload_file1.png deleted file mode 100644 index ee0ca6fd133..00000000000 Binary files a/docs/assets/sql/upload_file1.png and /dev/null differ diff --git a/docs/assets/sql/upload_file2.png b/docs/assets/sql/upload_file2.png deleted file mode 100644 index 2e94e283d49..00000000000 Binary files a/docs/assets/sql/upload_file2.png and /dev/null differ diff --git a/docs/assets/sql/use.png b/docs/assets/sql/use.png deleted file mode 100644 index a3456745e80..00000000000 Binary files a/docs/assets/sql/use.png and /dev/null differ diff --git a/docs/assets/supported_integrations.png b/docs/assets/supported_integrations.png deleted file mode 100644 index f55b5e1cad9..00000000000 Binary files a/docs/assets/supported_integrations.png and /dev/null differ diff --git a/docs/assets/tutorials/crops/.gitkeep b/docs/assets/tutorials/crops/.gitkeep deleted file mode 100644 index 8b137891791..00000000000 --- a/docs/assets/tutorials/crops/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/docs/assets/tutorials/llm-chatbot-ui/chat.png b/docs/assets/tutorials/llm-chatbot-ui/chat.png deleted file mode 100644 index 6aaf2f1ec3a..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/chat.png and /dev/null differ diff --git a/docs/assets/tutorials/llm-chatbot-ui/prompt.png b/docs/assets/tutorials/llm-chatbot-ui/prompt.png deleted file mode 100644 index fbc63db0898..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/prompt.png and /dev/null differ diff --git a/docs/assets/tutorials/llm-chatbot-ui/publish.png b/docs/assets/tutorials/llm-chatbot-ui/publish.png deleted file mode 100644 index 30af7e47e28..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/publish.png and /dev/null differ diff --git a/docs/assets/tutorials/llm-chatbot-ui/settings.png b/docs/assets/tutorials/llm-chatbot-ui/settings.png deleted file mode 100644 index 341872a9060..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/settings.png and /dev/null differ diff --git a/docs/assets/tutorials/llm-chatbot-ui/settings2.png b/docs/assets/tutorials/llm-chatbot-ui/settings2.png deleted file mode 100644 index 40c18fc03a6..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/settings2.png and /dev/null differ diff --git a/docs/assets/tutorials/llm-chatbot-ui/slack-chat.png b/docs/assets/tutorials/llm-chatbot-ui/slack-chat.png deleted file mode 100644 index 032399426f7..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/slack-chat.png and /dev/null differ diff --git a/docs/assets/tutorials/llm-chatbot-ui/slack.png b/docs/assets/tutorials/llm-chatbot-ui/slack.png deleted file mode 100644 index b44b203346d..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/slack.png and /dev/null differ diff --git a/docs/assets/tutorials/llm-chatbot-ui/welcome.png b/docs/assets/tutorials/llm-chatbot-ui/welcome.png deleted file mode 100644 index f778d521c1f..00000000000 Binary files a/docs/assets/tutorials/llm-chatbot-ui/welcome.png and /dev/null differ diff --git a/docs/assets/tutorials/monkeylearn/.gitkeep b/docs/assets/tutorials/monkeylearn/.gitkeep deleted file mode 100644 index 8b137891791..00000000000 --- a/docs/assets/tutorials/monkeylearn/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/docs/assets/tutorials/monkeylearn/1.create_ml.png b/docs/assets/tutorials/monkeylearn/1.create_ml.png deleted file mode 100644 index b77b25d9eb4..00000000000 Binary files a/docs/assets/tutorials/monkeylearn/1.create_ml.png and /dev/null differ diff --git a/docs/assets/tutorials/monkeylearn/10.select_prediction.png b/docs/assets/tutorials/monkeylearn/10.select_prediction.png deleted file mode 100644 index f4ae4a47983..00000000000 Binary files a/docs/assets/tutorials/monkeylearn/10.select_prediction.png and /dev/null differ diff --git a/docs/assets/tutorials/monkeylearn/4.describe.png b/docs/assets/tutorials/monkeylearn/4.describe.png deleted file mode 100644 index e4e8afa07ec..00000000000 Binary files a/docs/assets/tutorials/monkeylearn/4.describe.png and /dev/null differ diff --git a/docs/assets/tutorials/monkeylearn/5.select_prediction.png b/docs/assets/tutorials/monkeylearn/5.select_prediction.png deleted file mode 100644 index b55d3f0cbd6..00000000000 Binary files a/docs/assets/tutorials/monkeylearn/5.select_prediction.png and /dev/null differ diff --git a/docs/assets/tutorials/monkeylearn/createmodel1.png b/docs/assets/tutorials/monkeylearn/createmodel1.png deleted file mode 100644 index 5dee4139f75..00000000000 Binary files a/docs/assets/tutorials/monkeylearn/createmodel1.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png deleted file mode 100644 index 64234227e24..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console-new-DS.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png deleted file mode 100644 index bd5f977403c..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-console.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png deleted file mode 100644 index c18d5639869..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-CS-SQL.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png deleted file mode 100644 index 7bef6e5e043..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-IP-allow.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png deleted file mode 100644 index 6a51c6246ea..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-mariadb-sql.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png deleted file mode 100644 index 12c1305b46c..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-new-DS-SQL.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png deleted file mode 100644 index 1ecd9357fda..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-sky-allowlist.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png deleted file mode 100644 index 95fee576c58..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-mindsdb-testing-live-twitter.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png b/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png deleted file mode 100644 index 2fda20329b1..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/Twitter-chatbot-snoopstien.png and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif b/docs/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif deleted file mode 100644 index 95ef4fce238..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/animated-gif-skysql-service-create.gif and /dev/null differ diff --git a/docs/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif b/docs/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif deleted file mode 100644 index 0b34c95d1ec..00000000000 Binary files a/docs/assets/tutorials/twitter_chatbot/mariadb-sky-connect.gif and /dev/null differ diff --git a/docs/assets/twilio-chatbot-diagram.png b/docs/assets/twilio-chatbot-diagram.png deleted file mode 100644 index 4a846a87c42..00000000000 Binary files a/docs/assets/twilio-chatbot-diagram.png and /dev/null differ diff --git a/docs/assets/twilio-chatbot-response.png b/docs/assets/twilio-chatbot-response.png deleted file mode 100644 index 63c90e5d64a..00000000000 Binary files a/docs/assets/twilio-chatbot-response.png and /dev/null differ diff --git a/docs/assets/twilio-image-model-image.png b/docs/assets/twilio-image-model-image.png deleted file mode 100644 index e32268e1b99..00000000000 Binary files a/docs/assets/twilio-image-model-image.png and /dev/null differ diff --git a/docs/assets/twilio-image-model-response.png b/docs/assets/twilio-image-model-response.png deleted file mode 100644 index 3dc4085fe4b..00000000000 Binary files a/docs/assets/twilio-image-model-response.png and /dev/null differ diff --git a/docs/assets/twilio-text-model-response.png b/docs/assets/twilio-text-model-response.png deleted file mode 100644 index 7e1807550f1..00000000000 Binary files a/docs/assets/twilio-text-model-response.png and /dev/null differ diff --git a/docs/assets/upload_custom_function.png b/docs/assets/upload_custom_function.png deleted file mode 100644 index da63aab5697..00000000000 Binary files a/docs/assets/upload_custom_function.png and /dev/null differ diff --git a/docs/assets/upload_custom_function2.png b/docs/assets/upload_custom_function2.png deleted file mode 100644 index 423fe8026cd..00000000000 Binary files a/docs/assets/upload_custom_function2.png and /dev/null differ diff --git a/docs/assets/upload_custom_function_empty_form.png b/docs/assets/upload_custom_function_empty_form.png deleted file mode 100644 index 80108be6b80..00000000000 Binary files a/docs/assets/upload_custom_function_empty_form.png and /dev/null differ diff --git a/docs/assets/use_cases/ai_agents.jpg b/docs/assets/use_cases/ai_agents.jpg deleted file mode 100644 index 9b816a97b3f..00000000000 Binary files a/docs/assets/use_cases/ai_agents.jpg and /dev/null differ diff --git a/docs/assets/use_cases/ai_workflow_automation.jpg b/docs/assets/use_cases/ai_workflow_automation.jpg deleted file mode 100644 index b1b10036248..00000000000 Binary files a/docs/assets/use_cases/ai_workflow_automation.jpg and /dev/null differ diff --git a/docs/assets/use_cases/aipowered_data_retrieval.jpg b/docs/assets/use_cases/aipowered_data_retrieval.jpg deleted file mode 100644 index f98c6eb0a35..00000000000 Binary files a/docs/assets/use_cases/aipowered_data_retrieval.jpg and /dev/null differ diff --git a/docs/assets/use_cases/automated_finetuning.jpg b/docs/assets/use_cases/automated_finetuning.jpg deleted file mode 100644 index 56683bd2e13..00000000000 Binary files a/docs/assets/use_cases/automated_finetuning.jpg and /dev/null differ diff --git a/docs/assets/use_cases/data_enrichment.jpg b/docs/assets/use_cases/data_enrichment.jpg deleted file mode 100644 index 31487e53fbb..00000000000 Binary files a/docs/assets/use_cases/data_enrichment.jpg and /dev/null differ diff --git a/docs/assets/use_cases/indatabase_ml.jpg b/docs/assets/use_cases/indatabase_ml.jpg deleted file mode 100644 index a6e42ee7628..00000000000 Binary files a/docs/assets/use_cases/indatabase_ml.jpg and /dev/null differ diff --git a/docs/assets/use_cases/predictive_analytics.jpg b/docs/assets/use_cases/predictive_analytics.jpg deleted file mode 100644 index 7026079d216..00000000000 Binary files a/docs/assets/use_cases/predictive_analytics.jpg and /dev/null differ diff --git a/docs/contribute/app-handlers.mdx b/docs/contribute/app-handlers.mdx deleted file mode 100644 index 040d3e2bc37..00000000000 --- a/docs/contribute/app-handlers.mdx +++ /dev/null @@ -1,268 +0,0 @@ ---- -title: Build an Application Handler -sidebarTitle: Build an Application Handler -icon: "gear" ---- - -In this section, you'll find how to add new application integrations to MindsDB. - - - **Prerequisite** - - You should have the latest version of the MindsDB repository installed locally. Follow [this guide](/contribute/install/) to learn how to install MindsDB for development. - - -## What are API Handlers? - -Application handlers act as a bridge between MindsDB and any application that provides APIs. You use application handlers to create databases using the [`CREATE DATABASE`](/sql/create/databases/) statement. So you can reach data from any application that has its handler implemented within MindsDB. - - - **Database Handlers** - - To learn more about handlers and how to implement a database handler, visit our [doc page here](/contribute/data-handlers/). - - -## Creating an Application Handler - -You can create your own application handler within MindsDB by inheriting from the [`APIHandler`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L150) class. - -By providing the implementation for some or all of the methods contained in the [`APIHandler`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L150) class, you can interact with the application APIs. - -### Core Methods - -Apart from the `__init__()` method, there are five core methods that must be implemented. We recommend checking actual examples in the codebase to get an idea of what goes into each of these methods, as they can change a bit depending on the nature of the system being integrated. - -Let's review the purpose of each method. - -| Method | Purpose | -|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [`_register_table()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L164) | It registers the data resource in memory. For example, if you are using Twitter API it registers the `tweets` resource from `/api/v2/tweets`. | -| [`connect()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L23) | It performs the necessary steps to connect/authenticate to the underlying system. | -| [`check_connection()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L39) | It evaluates if the connection is alive and healthy. | -| [`native_query()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L47) | It parses any *native* statement string and acts upon it (for example, raw syntax commands). | -| `call_application_api()` | It calls the application API and maps the data to pandas DataFrame. This method handles the pagination and data mapping. | - -Authors can opt for adding private methods, new files and folders, or any combination of these to structure all the necessary work that will enable the core methods to work as intended. - - - **Other Common Methods** - - Under the [`mindsdb.integrations.utilities`](main/mindsdb/integrations/utilities) library, contributors can find various methods that may be useful while implementing new handlers. - - -### API Table - -Once the data returned from the API call is registered using the [`_register_table()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L164) method, you can use it to map to the [`APITable`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L93) class. -The [`APITable`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L93) class provides CRUD methods. - -| Method | Purpose | -|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `select()` | It implements the mappings from the ast.Select and calls the actual API through the `call_application_api`. | -| `insert()` | It implements the mappings from the ast.Insert and calls the actual API through the `call_application_api`. | -| `update()` | It implements the mappings from the ast.Update and calls the actual API through the `call_application_api`. | | -| `delete()` | It implements the mappings from the ast.Delete and calls the actual API through the `call_application_api`. -| `add()` | Adds new rows to the data dictionary. -| `list()` | List data based on certain conditions by providing FilterCondition, limits, sorting and target fields. | | -| `get_columns()` | It maps the data columns returned by the API. | - - -### Implementation - -Each application handler should inherit from the [`APIHandler`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L150) class. - -Here is a step-by-step guide: - -* Implementing the [`__init__()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/api_handler.py#L155) method: - - This method initializes the handler. - - ```py - def __init__(self, name: str): - super().__init__(name) - """ constructor - Args: - name (str): the handler name - """ - - self._tables = {} - ``` - -* Implementing the [`connect()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L23) method: - - The `connect()` method sets up the connection. - - ```py - def connect(self) -> HandlerStatusResponse: - """ Set up any connections required by the handler - Should return output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - HandlerStatusResponse - """ - ``` - -* Implementing the [`check_connection()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L39) method: - - The `check_connection()` method performs the health check for the connection. - - ```py - def check_connection(self) -> HandlerStatusResponse: - """ Check connection to the handler - Returns: - HandlerStatusResponse - """ - ``` - -* Implementing the [`native_query()`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L47) method: - - The `native_query()` method runs commands of the native API syntax. - - ```py - def native_query(self, query: Any) -> TableResponse | OkResponse | ErrorResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - api's json etc) - Returns: - TableResponse | OkResponse | ErrorResponse - """ - ``` - -* Implementing the `call_application_api()` method: - - This method makes the API calls. It is **not mandatory** to implement this method, but it can help make the code more reliable and readable. - - ```py - def call_application_api(self, method_name:str = None, params:dict = None) -> DataFrame: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. Can be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - DataFrame - """ - ``` - -### Exporting the `connection_args` Dictionary - -The `connection_args` dictionary contains all of the arguments used to establish the connection along with their descriptions, types, labels, and whether they are required or not. - -The `connection_args` dictionary should be stored in the `connection_args.py` file inside the handler folder. - - -The `connection_args` dictionary is stored in a separate file in order to be able to hide sensitive information such as passwords or API keys. - -By default, when querying for `connection_data` from the `information_schema.databases` table, all sensitive information is hidden. To unhide it, use this command: - -```sql -set show_secrets=true; -``` - - -Here is an example of the `connection_args.py` file from the [GitHub handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/github_handler) where the API key value is set to hidden with `"secret": True`. - -```py -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - repository={ - "type": ARG_TYPE.STR, - "description": " GitHub repository name.", - "required": True, - "label": "Repository", - }, - api_key={ - "type": ARG_TYPE.PWD, - "description": "Optional GitHub API key to use for authentication.", - "required": False, - "label": "Api key", - "secret": True - }, - github_url={ - "type": ARG_TYPE.STR, - "description": "Optional GitHub URL to connect to a GitHub Enterprise instance.", - "required": False, - "label": "Github url", - }, -) - -connection_args_example = OrderedDict( - repository="mindsdb/mindsdb", - api_key="ghp_xxx", - github_url="https://github.com/mindsdb/mindsdb" -) -``` - -### Exporting All Required Variables - -The following should be exported in the `__init__.py` file of the handler: - -- The `Handler` class. -- The `version` of the handler. -- The `name` of the handler. -- The `type` of the handler, either `DATA` handler or `ML` handler. -- The `icon_path` to the file with the database icon. -- The `title` of the handler or a short description. -- The `description` of the handler. -- The `connection_args` dictionary with the connection arguments. -- The `connection_args_example` dictionary with an example of the connection arguments. -- The `import_error` message that is used if the import of the `Handler` class fails. - -A few of these variables are defined in another file called `__about__.py`. This file is imported into the `__init__.py` file. - -Here is an example of the `__init__.py` file for the [GitHub handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/github_handler). - -```py -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .github_handler import ( - GithubHandler as Handler, - connection_args_example, - connection_args, - ) - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "GitHub" -name = "github" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", "version", "name", "type", "title", "description", - "import_error", "icon_path", "connection_args_example", "connection_args", -] -``` - -The `__about__.py` file for the same [GitHub handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/github_handler) contains the following variables: - -```py -__title__ = "MindsDB GitHub handler" -__package_name__ = "mindsdb_github_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for GitHub" -__author__ = "Artem Veremey" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" - -``` - -## Check out our Application Handlers! - -To see some integration handlers that are currently in use, we encourage you to check out the following handlers inside the MindsDB repository: - -* [GitHub handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/github_handler) -* [TwitterHandler](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/twitter_handler) - -And here are [all the handlers available in the MindsDB repository](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers). diff --git a/docs/contribute/community.mdx b/docs/contribute/community.mdx deleted file mode 100644 index 9424aec601b..00000000000 --- a/docs/contribute/community.mdx +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: Join our Community -sidebarTitle: Join our Community -icon: "users" ---- - -If you have questions or you want to chat with the MindsDB core team or other community members, you can join our [Slack workspace](https://mindsdb.com/joincommunity) - -## MindsDB Newsletter - -To get updates on MindsDB's latest announcements, releases, and events, [sign up for our newsletter](https://mindsdb.com/newsletter/). - -## Contact Us - -If you are interested in MindsDB for large-scale projects, contact us by submitting [this form](https://mindsdb.com/contact-us/). diff --git a/docs/contribute/contribute.mdx b/docs/contribute/contribute.mdx deleted file mode 100644 index 76e8747daee..00000000000 --- a/docs/contribute/contribute.mdx +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: How to Contribute to MindsDB -sidebarTitle: Contribute to MindsDB -icon: "handshake" ---- - -Thank you for your interest in contributing to MindsDB. MindsDB is free, open-source software and all types of contributions are welcome, whether they’re documentation changes, bug reports, bug fixes, or new source code changes. - -## Contributing to MindsDB - -Contributors should fork the MindsDB repository and create a PR targeting the `main` branch of the MindsDB repository. Subsequently, the MindsDB team reviews the contributors' PRs and targets them at the appropriate release branch. - - -To learn more about MindsDB release process, follow [this link](/releases). - - -## Contributor Testing Requirements - -As a contributor, you are responsible for writing the code according to the [Python Coding Standards](/contribute/python-coding-standards) and thoroughly testing all features or fixes that you implement before they are merged into the `develop` branch. - -### Feature Branch Testing - -Before merging your changes, the following types of testing must be completed to validate your work in isolation: - -* Unit Tests -Verify that individual components or functions behave as expected during development. - -* Integration Tests -Ensure that your new code works correctly with existing functionality and doesn't introduce regressions. - -### Post-Release Testing - -After a release that includes your features or fixes is published, contributors are encouraged to: - -* Test their changes in the released environment, and -* Report any issues or unexpected behavior that may arise. diff --git a/docs/contribute/data-handlers.mdx b/docs/contribute/data-handlers.mdx deleted file mode 100644 index cb13aa0621d..00000000000 --- a/docs/contribute/data-handlers.mdx +++ /dev/null @@ -1,475 +0,0 @@ ---- -title: Build a Database Handler -sidebarTitle: Build a Database Handler -icon: "gear" ---- - -In this section, you'll find how to add new integrations/databases to MindsDB. - - - **Prerequisite** - - You should have the latest version of the MindsDB repository installed locally. Follow [this guide](/contribute/install/) to learn how to install MindsDB for development. - - -## What are Database Handlers? - -Database handlers act as a bridge to any database. You use database handlers to create databases using [the CREATE DATABASE command](/sql/create/databases/). So you can reach data from any database that has its handler implemented within MindsDB. - -## Creating a Database Handler - -You can create your own database handler within MindsDB by inheriting from the [`DatabaseHandler`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L102) class. - -By providing the implementation for some or all of the methods contained in the `DatabaseHandler` class, you can connect with the database of your choice. - -### Core Methods - -Apart from the `__init__()` method, there are seven core methods that must be implemented. We recommend checking actual examples in the codebase to get an idea of what goes into each of these methods, as they can change a bit depending on the nature of the system being integrated. - -Let's review the purpose of each method. - -| Method | Purpose | -|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `connect()` | It performs the necessary steps to connect to the underlying system. | -| `disconnect()` | It gracefully closes connections established in the `connect()` method. | -| `check_connection()` | It evaluates if the connection is alive and healthy. This method is called frequently. | -| `native_query()` | It parses any *native* statement string and acts upon it (for example, raw SQL commands). | -| `query()` | It takes a parsed SQL command in the form of an abstract syntax tree and executes it. | -| `get_tables()` | It lists and returns all the available tables. Each handler decides what a *table* means for the underlying system when interacting with it from the data layer. Typically, these are actual tables. | -| `get_columns()` | It returns columns of a table registered in the handler with the respective data type. | - -Authors can opt for adding private methods, new files and folders, or any combination of these to structure all the necessary work that will enable the core methods to work as intended. - - - **Other Common Methods** - - Under the `mindsdb.integrations.libs.utils` library, contributors can find various methods that may be useful while implementing new handlers. - - For response formatting, use the following classes from `mindsdb.integrations.libs.response`: - - [TableResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for queries returning data (SELECT, SHOW, etc.) - - [OkResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for successful operations without data (CREATE, DROP, INSERT, etc.) - - [ErrorResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for error cases - - [HandlerStatusResponse](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/response.py) - for connection status checks - - - The legacy `HandlerResponse` class is deprecated. Use `TableResponse`, `OkResponse`, or `ErrorResponse` instead. - - - -### Implementation - -Each database handler should inherit from the [`DatabaseHandler`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/libs/base.py#L102) class. - -Here is a step-by-step guide: - -* Setting the `name` class property: - - MindsDB uses it internally as the name of the handler. - - For example, the `CREATE DATABASE` statement uses the handler's name. - - ```sql - CREATE DATABASE integration_name - WITH ENGINE = 'postgres', --- here, the handler's name is `postgres` - PARAMETERS = { - 'host': '127.0.0.1', - 'user': 'root', - 'password': 'password' - }; - ``` - -* Implementing the `__init__()` method: - - This method initializes the handler. The `connection_data` argument contains the `PARAMETERS` from the `CREATE DATABASE` statement, such as `user`, `password`, etc. - - ```py - def __init__(self, name: str, connection_data: Optional[dict]): - """ constructor - Args: - name (str): the handler name - """ - ``` - -* Implementing the `connect()` method: - - The `connect()` method sets up the connection. - - ```py - def connect(self) -> HandlerStatusResponse: - """ Set up any connections required by the handler - Should return the output of check_connection() method after attempting - connection. Should switch self.is_connected. - Returns: - HandlerStatusResponse - """ - ``` - -* Implementing the `disconnect()` method: - - The `disconnect()` method closes the existing connection. - - ```py - def disconnect(self): - """ Close any existing connections - Should switch self.is_connected. - """ - ``` - -* Implementing the `check_connection()` method: - - The `check_connection()` method performs the health check for the connection. - - ```py - def check_connection(self) -> HandlerStatusResponse: - """ Check connection to the handler - Returns: - HandlerStatusResponse - """ - ``` - -* Implementing the `native_query()` method: - - The `native_query()` method runs commands of the native database language. - - ```py - def native_query(self, query: Any) -> TableResponse | OkResponse | ErrorResponse: - """Receive raw query and act upon it somehow. - Args: - query (Any): query in native format (str for sql databases, - etc) - Returns: - TableResponse | OkResponse | ErrorResponse - """ - ``` - -* Implementing the `query()` method: - - The query method runs parsed SQL commands. - - ```py - def query(self, query: ASTNode) -> TableResponse | OkResponse | ErrorResponse: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - Returns: - TableResponse | OkResponse | ErrorResponse - """ - ``` - -* Implementing the `get_tables()` method: - - The `get_tables()` method lists all the available tables. - - ```py - def get_tables(self) -> TableResponse | ErrorResponse: - """ Return list of entities - Return a list of entities that will be accessible as tables. - Returns: - TableResponse | ErrorResponse: should have the same columns as information_schema.tables - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html) - Column 'TABLE_NAME' is mandatory, other is optional. - """ - ``` - -* Implementing the `get_columns()` method: - - The `get_columns()` method lists all columns of a specified table. - - ```py - def get_columns(self, table_name: str) -> TableResponse | ErrorResponse: - """ Returns a list of entity columns - Args: - table_name (str): name of one of tables returned by self.get_tables() - Returns: - TableResponse | ErrorResponse: data should have the same columns as information_schema.columns - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) - Column 'COLUMN_NAME' is mandatory, other is optional. Highly - recommended to define also 'DATA_TYPE': it should be one of - python data types (by default it is str). - """ - ``` - -### Response Classes - -The data-returning methods (`native_query()`, `query()`, `get_tables()`, `get_columns()`) should return one of the following response classes from `mindsdb.integrations.libs.response`: - -| Response Class | Use Case | Key Attributes | -|---------------|----------|----------------| -| `TableResponse` | Queries that return data (SELECT, SHOW, etc.) | `data`, `data_generator`, `columns`, `affected_rows` | -| `OkResponse` | Successful operations without data (CREATE, DROP, INSERT, UPDATE, DELETE) | `affected_rows` | -| `ErrorResponse` | Error cases | `error_code`, `error_message`, `is_expected_error` | - -#### TableResponse - -`TableResponse` is used when returning data from queries. It supports two modes of data delivery: - -1. **Immediate data**: Pass all data at once via the `data` parameter (pandas DataFrame) -2. **Streaming data**: Pass a generator via the `data_generator` parameter for lazy loading - -```py -from mindsdb.integrations.libs.response import TableResponse, OkResponse, ErrorResponse - -# Immediate data response -def native_query(self, query: str) -> TableResponse: - result = self.execute_query(query) - df = pd.DataFrame(result) - return TableResponse(data=df) - -# Streaming data response (for large datasets) -def native_query(self, query: str) -> TableResponse: - def data_generator(): - cursor = self.connection.cursor() - cursor.execute(query) - while batch := cursor.fetchmany(size=1000): - yield pd.DataFrame(batch) - - return TableResponse(data_generator=data_generator()) -``` - -#### OkResponse - -`OkResponse` is used for operations that don't return data: - -```py -def native_query(self, query: str) -> OkResponse: - cursor = self.connection.cursor() - cursor.execute(query) - self.connection.commit() - return OkResponse(affected_rows=cursor.rowcount) -``` - -#### ErrorResponse - -`ErrorResponse` is used to report errors: - -```py -def native_query(self, query: str) -> ErrorResponse: - try: - # ... execute query - except DatabaseError as e: - return ErrorResponse( - error_code=e.code, - error_message=str(e), - is_expected_error=True # Set to True for user errors (syntax, permissions, etc.) - ) -``` - -### Streaming Support - -For handlers that deal with large datasets, implementing streaming support is recommended. This allows data to be returned in chunks rather than loading everything into memory at once. - -To enable streaming: - -1. Set the `stream_response` class attribute to `True`: - - ```py - class MyDatabaseHandler(DatabaseHandler): - name = "mydatabase" - stream_response = True # Indicates that handler can return data as a generator - ``` - -2. Implement `native_query()` to return a `TableResponse` with a `data_generator`: - - ```py - def native_query(self, query: str, stream: bool = True) -> TableResponse | OkResponse | ErrorResponse: - if stream: - return self._execute_streaming(query) - else: - return self._execute_immediate(query) - - def _execute_streaming(self, query: str) -> TableResponse: - """Execute query and return results as a stream.""" - cursor = self.connection.cursor(name="server_side_cursor") - cursor.execute(query) - - columns = [Column(name=col.name, type=col.type) for col in cursor.description] - - def generate_data(): - while batch := cursor.fetchmany(size=1000): - yield pd.DataFrame(batch, columns=[c.name for c in columns]) - - return TableResponse(columns=columns, data_generator=generate_data()) - ``` - - -For a complete example of streaming implementation, see the [PostgreSQL handler](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py). - - -### Exporting the `connection_args` Dictionary - -The `connection_args` dictionary contains all of the arguments used to establish the connection along with their descriptions, types, labels, and whether they are required or not. - -The `connection_args` dictionary should be stored in the `connection_args.py` file inside the handler folder. - - -The `connection_args` dictionary is stored in a separate file in order to be able to hide sensitive information such as passwords or API keys. - -By default, when querying for `connection_data` from the `information_schema.databases` table, all sensitive information is hidden. To unhide it, use this command: - -```sql -set show_secrets=true; -``` - - -Here is an example of the `connection_args.py` file from the [MySQL handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mysql_handler) where the password value is set to hidden with `'secret': True`. - -```py -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - url={ - 'type': ARG_TYPE.STR, - 'description': 'The URI-Like connection string to the MySQL server. If provided, it will override the other connection arguments.', - 'required': False, - 'label': 'URL' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the MySQL server.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the MySQL server.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the MySQL server.', - 'required': True, - 'label': 'Database' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the MySQL server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the MySQL server. Must be an integer.', - 'required': True, - 'label': 'Port' - }, - ssl={ - 'type': ARG_TYPE.BOOL, - 'description': 'Set it to True to enable ssl.', - 'required': False, - 'label': 'ssl' - }, - ssl_ca={ - 'type': ARG_TYPE.PATH, - 'description': 'Path or URL of the Certificate Authority (CA) certificate file', - 'required': False, - 'label': 'ssl_ca' - }, - ssl_cert={ - 'type': ARG_TYPE.PATH, - 'description': 'Path name or URL of the server public key certificate file', - 'required': False, - 'label': 'ssl_cert' - }, - ssl_key={ - 'type': ARG_TYPE.PATH, - 'description': 'The path name or URL of the server private key file', - 'required': False, - 'label': 'ssl_key', - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=3306, - user='root', - password='password', - database='database' -) -``` - -### Exporting All Required Variables - -The following should be exported in the `__init__.py` file of the handler: - -- The `Handler` class. -- The `version` of the handler. -- The `name` of the handler. -- The `type` of the handler, either `DATA` handler or `ML` handler. -- The `icon_path` to the file with the database icon. -- The `title` of the handler or a short description. -- The `description` of the handler. -- The `connection_args` dictionary with the connection arguments. -- The `connection_args_example` dictionary with an example of the connection arguments. -- The `import_error` message that is used if the import of the `Handler` class fails. - -A few of these variables are defined in another file called `__about__.py`. This file is imported into the `__init__.py` file. - -Here is an example of the `__init__.py` file for the [MySQL handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mysql_handler). - -```py -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example -try: - from .mysql_handler import ( - MySQLHandler as Handler, - connection_args_example, - connection_args - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'MySQL' -name = 'mysql' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'connection_args', 'connection_args_example', 'import_error', 'icon_path' -] -``` - -The `__about__.py` file for the same [MySQL handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mysql_handler) contains the following variables: - -```py -__title__ = 'MindsDB MySQL handler' -__package_name__ = 'mindsdb_mysql_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for MySQL" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' - -``` - -### Exporting Requirements - -In the case if the integration requires other packages to function correctly, list them in the `requirements.txt` file. - -Create a text file named `requirements.txt` that stores all packages required for using the integration. Here is an example: - -``` -mysql-connector-python==9.1.0 -... -``` - -## Check out our Database Handlers! - -To see some integration handlers that are currently in use, we encourage you to check out the following handlers inside the MindsDB repository: - -* [MySQL](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mysql_handler) -* [Postgres](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/postgres_handler) - -And here are [all the handlers available in the MindsDB repository](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers). diff --git a/docs/contribute/docs.mdx b/docs/contribute/docs.mdx deleted file mode 100644 index 3032e5daca2..00000000000 --- a/docs/contribute/docs.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: How to Write MindsDB Documentation -sidebarTitle: Write Documentation -icon: "pen" ---- - -This section gets you started on how to contribute to the MindsDB documentation. - -MindsDB's documentation is run using Mintlify. If you want to contribute to our docs, please follow the steps below to set up the environment locally. - -## Running the Docs Locally - - - **Prerequisite** - You should have installed Git (version 2.30.1 or higher) and Node.js (version 18.10.0 or higher). - - -Step 1. Clone the MindsDB Git repository: - -```console -git clone https://github.com/mindsdb/mindsdb.git -``` - -Step 2. Install Mintlify on your OS: - -```console -npm i mintlify -g -``` - -Step 3. Go to the `docs` folder inside the cloned MindsDB Git repository and start Mintlify there: - -```console -mintlify dev -``` - -The documentation website is now available at `http://localhost:3000`. - - - **Getting an Error?** - If you use the Windows operating system, you may get an error saying `no such file or directory: C:/Users/Username/.mintlify/mint/client`. Here are the steps to troubleshoot it: - - Go to the `C:/Users/Username/.mintlify/` directory. - - Remove the `mint` folder. - - Open the Git Bash in this location and run `git clone https://github.com/mintlify/mint.git`. - - Repeat step 3. - - -## MindsDB Repository Structure - -Here is the structure of the MindsDB docs repository: - -``` -docs # All documentation source files -|__assets/ # Images and icons used throughout the docs -│ ├─ ... -│__folders_with_mdx_files/ # All remaining folders that store the .mdx files -|__mdx_files # Some of the .mdx files are stored in the docs directory -|__mintlify.json # This JSON file stores navigation and page setup -``` diff --git a/docs/contribute/install.mdx b/docs/contribute/install.mdx deleted file mode 100644 index 8437d27eb8f..00000000000 --- a/docs/contribute/install.mdx +++ /dev/null @@ -1,129 +0,0 @@ ---- -title: MindsDB Installation for Development -sidebarTitle: Installation for Development -icon: "gears" ---- - -If you want to contribute to the development of MindsDB, you need to install from source. - - -If you do not want to contribute to the development of MindsDB but simply install and use it, then [install MindsDB via Docker](/setup/self-hosted/docker). - - -## Install MindsDB for Development - -Here are the steps to install MindsDB from source. You can either -follow the steps below or visit the provided link. - - -Before installing MindsDB from source, ensure that you use one of the following Python versions: `3.10.x`, `3.11.x`, `3.12.x`, `3.13.x`. - - -1. Fork the [MindsDB repository from GitHub](https://github.com/mindsdb/mindsdb). - -2. Clone the fork locally: - - ```bash - git clone https://github.com//mindsdb.git - ``` - -3. Create a virtual environment: - - ```bash - python -m venv mindsdb-venv - ``` - -4. Activate the virtual environment: - - Windows: - - ```bash - .\mindsdb-venv\Scripts\activate - ``` - - macOS/Linux: - - ```bash - source mindsdb-venv/bin/activate - ``` - -5. Install MindsDB with its local development dependencies: - - Install dependencies: - - ```bash - cd mindsdb - pip install -e . - ``` - -6. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - - -Alternatively, you can use a makefile to install dependencies and start MindsDB: - - ```bash - make install_mindsdb - make run_mindsdb - ``` - - -Now you should see the following message in the console: - -``` -... - -mindsdb.api.http.initialize: - GUI available at http://127.0.0.1:47334/ -mindsdb.api.mysql.mysql_proxy.mysql_proxy: Starting MindsDB Mysql proxy server on tcp://127.0.0.1:47335 -mindsdb.api.mysql.mysql_proxy.mysql_proxy: Waiting for incoming connections... -mindsdb: mysql API: started on 47335 -mindsdb: http API: started on 47334 -``` - -You can access the MindsDB Editor at `localhost:47334`. - - -## Install dependencies - -The core installation includes everything needed to run the Federated Query Engine and essential database capabilities. -The dependencies for many of the data or ML integrations are not installed by default. - -If you need additional features — such as Agents, the Knowledge Base, MCP or A2A protocol — you can enable them through extras, rather than installing everything by default. - -### Install Features via Extras - -Optional integrations and features can be installed as needed using extras. - -| Feature | Install command | -|---------|----------------| -| Agents / LLMs | `pip install ".[agents]"` | -| Knowledge Base | `pip install ".[kb]"` | -| Multiple features at once | `pip install ".[agents,knowledgebase]"` | -| Integrations | `pip install .[integration_name]` | - - -You can find all available [handlers here](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers). - - -## What's Next? - -Now that you installed and started MindsDB locally, go ahead and find out how to create and train a model using the [`CREATE MODEL`](/sql/create/model) statement. - -Check out the [Use Cases](/use-cases/overview) section to follow tutorials that cover Large Language Models, Chatbots, Time Series, Classification, and Regression models, Semantic Search, and more. diff --git a/docs/contribute/integrations-readme.mdx b/docs/contribute/integrations-readme.mdx deleted file mode 100644 index 0be4d1b92a1..00000000000 --- a/docs/contribute/integrations-readme.mdx +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: How to Write Handlers README -sidebarTitle: Write Handlers README -icon: "pen" ---- - -The README file is a crucial document that guides users in understanding, using, and contributing to the MindsDb integration. It serves as the first point of contact for anyone interacting with the integration, hence the need for it to be comprehensive, clear, and user-friendly. - -## Sections to Include - -### Table of Contents - -A well-organized table of contents is provided for easy navigation through the document, allowing users to quickly find the information they need. - -### About - -Explain what specific database, application, or framework the integration targets. Provide a concise overview of the integration’s purpose, highlighting its key features and benefits. - -### Handler Implementation - - * Setup - * Detail the installation and initial setup process, including any prerequisites. - * Connection - * Describe the steps to establish and manage connections, with clear instructions. - * Include SQL examples for better clarity. - * Required Parameters - * List and describe all essential parameters necessary for the operation of the integration. - * Optional Parameters - * Detail additional, non-mandatory parameters that can enhance the integration's functionality. - -### Example Usage - - * Practical Examples: Offer detailed examples showing how to use the integration effectively. - * Coverage: Ensure examples encompass a range of functionalities, from basic to advanced operations. - * SQL Examples: Include SQL statements and their expected outputs to illustrate use cases. - -### Supported Tables/Tasks - -Clearly enumerate the tables, tasks, or operations that the integration supports, possibly in a list or table format. - -### Limitations - -Transparently outline any limitations or constraints known in the integration. - -### TODO - - * Future Developments: Highlight areas for future enhancements or improvements. - * GitHub Issues: Link to open GitHub issues tagged as enhancements, indicating ongoing or planned feature additions. diff --git a/docs/contribute/python-coding-standards.mdx b/docs/contribute/python-coding-standards.mdx deleted file mode 100644 index 0e3d85ea13d..00000000000 --- a/docs/contribute/python-coding-standards.mdx +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Python Coding Standards -sidebarTitle: Python Coding Standards -icon: "python" ---- - -# PEP8 - - Strict adherence to [PEP8](https://peps.python.org/pep-0008/) standards is mandatory for all code contributions to MindsDB. - - -**Why PEP8?** -[PEP8](https://peps.python.org/pep-0008/) provides an extensive set of guidelines for Python code styling, promoting readability and a uniform coding standard. By aligning with PEP8, we ensure our codebase remains clean, maintainable, and easily understandable for Python developers at any level. - -#### Automated Checks - - * Upon submission of a Pull Request (PR), an automated process checks the code for PEP8 compliance. - * Non-compliance with PEP8 can result in the failure of the build process. Adherence to PEP8 is not just a best practice but a necessity to ensure smooth integration of new code into the codebase. - * If a PR fails due to PEP8 violations, the contributor is required to review the automated feedback provided. - * Pay special attention to common PEP8 compliance issues such as proper indentation, appropriate line length, correct use of whitespace, and following the recommended naming conventions. - * Contributors are encouraged to iteratively improve their code based on the feedback until full compliance is achieved. - - -# Logging - -Always instantiate a logger using the MindsDB utilities module. This practice ensures a uniform approach to logging across different parts of the application. -Example of Logger Creation: - - -```python -from mindsdb.utilities import log -logger = log.getLogger(__name__) -``` -### Setting Logging - - * Environment Variable: Use `MINDSDB_LOG_LEVEL` to set the desired logging level. This approach allows for dynamic adjustment of log verbosity without needing code modifications. - * Log Levels: Available levels include: - * `DEBUG`: Detailed information, typically of interest only when diagnosing problems. - * `INFO:` Confirmation that things are working as expected. - * `WARNING`: An indication that something unexpected happened, or indicative of some problem in the near future. - * `ERROR`: Due to a more serious problem, the software has not been able to perform some function. - * `CRITICAL`: A serious error, indicating that the program itself may be unable to continue running. - * Avoid print() statements. They lack the flexibility and control offered by logging mechanisms, particularly in terms of output redirection and level-based filtering. - * The logger name should be `__name__ ` to automatically reflect the module's name. This convention is crucial for pinpointing the origin of log messages. - -# Docstrings - - Docstrings are essential for documenting Python code. They provide a clear explanation of the functionality of classes, functions, modules, etc., making the codebase easier to understand and maintain. - - A well-written docstring should include: - * Function's Purpose: Describe what the function/class/module does. - * Parameters: List and explain the parameters it takes. - * Return Value: Describe what the function returns. - * Exceptions: Mention any exceptions that the function might raise. - -```python -def example_function(param1, param2): - """This is an example docstring. - Args: - param1 (type): Description of param1. - param2 (type): Description of param2. - - Returns: - type: Description of the return value. - - Raises: - ExceptionType: Description of the exception. - """ - # function body... -``` - -# Exception Handling - -Implementing robust error handling strategies is essential to maintain the stability and reliability of MindsDB. Proper exception management ensures that the application behaves predictably under error conditions, providing clear feedback and preventing unexpected crashes or behavior. - -* Utilizing MindsDB Exceptions: To ensure uniformity and clarity in error reporting, always use predefined exceptions from the MindsDB exceptions library. -* Adding New Exceptions: If during development you encounter a scenario where none of the existing exceptions adequately represent the error, consider defining a new, specific exception. \ No newline at end of file diff --git a/docs/faqs/benefits.mdx b/docs/faqs/benefits.mdx deleted file mode 100644 index b53cb9a6328..00000000000 --- a/docs/faqs/benefits.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Benefits of MindsDB -sidebarTitle: Benefits of MindsDB ---- - -MindsDB facilitates development of AI-powered apps by bridging the gap between data and AI. Thanks to its numerous integrations with data sources (including databases, vector stores, and applications) and AI frameworks (including LLMs and AutoML), you can mix and match between the available integrations to create custom AI workflows with MindsDB. - -Here are some prominent benefits of using MindsDB: - -1. **Unified AI Deployment and Management**

- MindsDB integrates directly with the database, warehouse, or stream. This eliminates the need to build and maintain custom, complex data pipelines or separate systems for AI/ML deployment. - -2. **Automated AI Workflows**

- MindsDB automates the entire AI workflow to execute on time-based or event-based triggers. No need to build custom automation logic to get predictions, move data, or (re)train models. - -3. **Turn every developer into an AI Engineer**

- MindsDB enables developers to leverage their existing SQL skills, accelerating the adoption of AI across teams and departments, turning every developer into an AI Engineer. - -4. **Enhanced Scalability and Performance**

- Whether in your private cloud or using MindsDB’s managed service, MindsDB enables you to handle large-scale AI/ML workloads efficiently. MindsDB can scale to meet the demands of your use case, ensuring optimal performance and responsiveness. diff --git a/docs/faqs/disposable-email-doman-and-openai.mdx b/docs/faqs/disposable-email-doman-and-openai.mdx deleted file mode 100644 index 5703f73971f..00000000000 --- a/docs/faqs/disposable-email-doman-and-openai.mdx +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: Disposable Email Domains and OpenAI -sidebarTitle: Disposable Email Domains and OpenAI ---- - -Disposable email domains can't make use of OpenAI, therefore users will encounter errors with using MindsDB's integration with OpenAI. - -To check if your email domain is disposable, you can verify it on [QuickEmailVerification](https://quickemailverification.com/tools/disposable-email-address-detector) or [VerifyEmail.IO](https://verifymail.io/domain/ipnuc.com). diff --git a/docs/faqs/mindsdb-with-php.mdx b/docs/faqs/mindsdb-with-php.mdx deleted file mode 100644 index 50d7f18aa27..00000000000 --- a/docs/faqs/mindsdb-with-php.mdx +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: How to Interact with MindsDB from PHP -sidebarTitle: MindsDB and PHP ---- - -To get started with MindsDB, you need to install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -There are a few ways you can interact with MindsDB from the PHP code. - -1. You can connect to MindsDB using the [PHP Data Objects](https://www.php.net/manual/en/book.pdo.php) and execute statements directly on MindsDB with the `PDO::query` method. - -2. You can use the [REST API](/rest/overview) endpoints to interact with MindsDB directly from PHP. diff --git a/docs/faqs/missing-required-cpu-features.mdx b/docs/faqs/missing-required-cpu-features.mdx deleted file mode 100644 index f3797649af0..00000000000 --- a/docs/faqs/missing-required-cpu-features.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Missing required CPU features -sidebarTitle: Missing required CPU features ---- - -Depending on the operating system and its setup, you may encounter this runtime warning when starting MindsDB: - -```bash -RuntimeWarning: Missing required CPU features. -The following required CPU features were not detected: -avx2, fma, bmi1, bmi2, lzcnt -``` - -The solution is to install the `polars-lts-cpu` package in the environment where MindsDB runs. - -If you are on an Apple ARM machine (e.g. M1), this warning is likely due to running Python under Rosetta. To troubleshoot it, install a native version of Python that does not run under Rosetta x86-64 emulation. diff --git a/docs/faqs/persist-predictions.mdx b/docs/faqs/persist-predictions.mdx deleted file mode 100644 index f7bd67c48a4..00000000000 --- a/docs/faqs/persist-predictions.mdx +++ /dev/null @@ -1,78 +0,0 @@ ---- -title: How to Persist Predictions -sidebarTitle: Persisting Predictions ---- - -MindsDB provides a range of options for persisting predictions and forecasts. Let's explore all possibilities to save the prediction results. - - -**Reasons to Save Predictions** - -Every time you want to get predictions, you need to query the model, usually joined with an input data table, like this: - -```sql -SELECT input.product_name, input.review, output.sentiment -FROM mysql_demo_db.amazon_reviews AS input -JOIN sentiment_classifier AS output; -``` - -However, querying the model returns the result set that is not persistent by default. For future use, it is recommended to persist the result set instead of querying the model again with the same data. - -MindsDB enables you to save predictions into a view or a table or download as a CSV file. - - -## Creating a View - -After creating the model, you can save the prediction results into a view. - -```sql -CREATE VIEW review_sentiment ( - - -- querying for predictions - SELECT input.product_name, input.review, output.sentiment - FROM mysql_demo_db.amazon_reviews AS input - JOIN sentiment_classifier AS output - LIMIT 10 - -); -``` - -Now the `review_sentiment` view stores sentiment predictions made for all customer reviews. - - -Here is a [comprehensive tutorial](/nlp/sentiment-analysis-inside-mysql-with-openai) on how to predict sentiment of customer reviews using OpenAI. - - -## Creating a Table - -After creating the model, you can save predictions into a database table. - -```sql -CREATE TABLE local_postgres.question_answers ( - - -- querying for predictions - SELECT input.article_title, input.question, output.answer - FROM mysql_demo_db.questions AS input - JOIN question_answering_model AS output - LIMIT 10 - -); -``` - -Here, the `local_postgres` database is a PostgreSQL database connected to MindsDB with a user that has the write access. - -Now the `question_answers` table stores all prediction results. - - -Here is a [comprehensive tutorial](/nlp/question-answering-inside-mysql-with-openai) on how to answer questions using OpenAI. - - -## Downloading a CSV File - -After executing the `SELECT` statement, you can download the output as a CSV file. - -

- -

- -Click the `Export` button and choose the `CSV` option. diff --git a/docs/homepage.mdx b/docs/homepage.mdx deleted file mode 100644 index 460cc0a0d91..00000000000 --- a/docs/homepage.mdx +++ /dev/null @@ -1,29 +0,0 @@ ---- -mode: "custom" ---- - -
-

- Documentation -

-

- Everything you need to get up and running. Choose a path below to get started. -

-
- -
-
- - Build AI-powered applications on top of your data sources using SQL, APIs, SDKs, and native AI and data integrations. -



- Visit the documentation. -
-
-
- - Launch multiple AI agent harnesses like OpenClaw, NanoClaw, Hermes, and Anton Cowork, and use them side by side in a unified workspace. -



- Visit the webpage. -
-
-
diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000000..d6fc53b4777 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,290 @@ + + + + +Minds Cowork — Docs + + + + + + + + + + +
+
Documentation
+

Build and automate
with Minds Cowork

+

+ A general-purpose AI platform for knowledge workers. Automate recurring tasks, + create artifacts, and deploy anywhere — on your terms. +

+ +
+
+
+
Guide
+

Get Started

+

Clone, install dependencies, and run the desktop or web app in under five minutes.

+ Setup guide → +
+
+
+
Reference
+

API

+

REST endpoints for conversations, projects, artifacts, schedules, connectors, and more.

+ API reference → +
+
+
+
Examples
+

Use Cases

+

See what creators, operators, and strategists are building with the platform.

+ Explore examples → +
+
+
+ +
+ +
+
+
frontend
+
Electron + Vite desktop app. Also ships as a web SPA via make dev-web.
+
github.com/mindsdb/cowork
+
+
+
backend / core_api
+
FastAPI server. Conversations, projects, artifacts, schedules, connectors.
+
github.com/mindsdb/cowork-server
+
+
+
backend / core_agent
+
Anton — the autonomous coding and reasoning agent powering task execution.
+
github.com/mindsdb/anton
+
+
+
+ + + + + diff --git a/docs/integrations/ai-engines/byom.mdx b/docs/integrations/ai-engines/byom.mdx deleted file mode 100644 index ffdc564ff57..00000000000 --- a/docs/integrations/ai-engines/byom.mdx +++ /dev/null @@ -1,213 +0,0 @@ ---- -title: Bring Your Own Model -sidebarTitle: BYOM ---- - -The Bring Your Own Model (BYOM) feature lets you upload your own models in the form of Python code and use them within MindsDB. - -## How It Works - -You can upload your custom model via the MindsDB editor by clicking `Add` and `Upload custom model`, like this: - -

- -

- -Here is the form that needs to be filled out in order to bring your model to MindsDB: - -

- -

- -Let's briefly go over the files that need to be uploaded: - -* The Python file stores an implementation of your model. It should contain the class with the implementation for the `train` and `predict` methods. Here is the sample format: - - ```py - class CustomPredictor(): - - def train(self, df, target_col, args=None): - - return '' - - def predict(self, df): - - return df - ``` - - - ```py - import os - import pandas as pd - - from sklearn.cross_decomposition import PLSRegression - from sklearn import preprocessing - - class CustomPredictor(): - - def train(self, df, target_col, args=None): - print(args, '1111') - - self.target_col = target_col - y = df[self.target_col] - x = df.drop(columns=self.target_col) - x_cols = list(x.columns) - - x_scaler = preprocessing.StandardScaler().fit(x) - y_scaler = preprocessing.StandardScaler().fit(y.values.reshape(-1, 1)) - - xs = x_scaler.transform(x) - ys = y_scaler.transform(y.values.reshape(-1, 1)) - - pls = PLSRegression(n_components=1) - pls.fit(xs, ys) - - self.pls = pls - self.y_scaler = y_scaler - - T = pls.x_scores_ - W = pls.x_weights_ - P = pls.x_loadings_ - R = pls.x_rotations_ - - self.x_cols = x_cols - self.x_scaler = x_scaler - self.P = P - - def calc_limit(df): - res = None - for column in df.columns: - if column == self.target_col: continue - tbl = df.groupby(self.target_col).agg({column: ['mean', 'min', 'max', 'std']}) - tbl.columns = tbl.columns.get_level_values(1) - tbl['name'] = column - tbl['std'] = tbl['std'].fillna(0) - tbl['lower'] = tbl['mean'] - 3 * tbl['std'] - tbl['upper'] = tbl['mean'] + 3 * tbl['std'] - tbl['lower'] = tbl[["lower", "min"]].max(axis=1) # lower >= min - tbl['upper'] = tbl[["upper", "max"]].min(axis=1) # upper <= max - tbl = tbl[['name', 'lower', 'mean', 'upper']] - try: - res = pd.concat([res, tbl]) - except: - res = tbl - return res - - trdf = pd.DataFrame() - trdf[self.target_col] = y.values - trdf['T1'] = T.squeeze() - limit = calc_limit(trdf).reset_index() - - self.limit = limit - - return "Trained predictor ready to be stored" - - def predict(self, df): - - - xt = df[self.x_cols] - - xt = self.x_scaler.transform(xt) - - excess_cols = list(set(df.columns) - set(self.x_cols)) - - pred_df = df[excess_cols].copy() - - ys_pred = self.pls.predict(xt) - y_pred = self.y_scaler.inverse_transform(ys_pred).ravel() - pred_df[self.target_col] = y_pred - - pred_df['T1'] = (xt @ self.P).squeeze() - return pred_df - ``` - - -* The optional requirements file, or `requirements.txt`, stores all dependencies along with their versions. Here is the sample format: - - ```sql - dependency_package_1 == version - dependency_package_2 >= version - dependency_package_3 >= version, < version - ... - ``` - - - ```sql - pandas - scikit-learn - ``` - - -Once you upload the above files, please provide an engine name. - -Please note that your custom model is uploaded to MindsDB as an engine. Then you can use this engine to create a model. - -

- -

- -## Configuration - -The BYOM feature can be configured with the following environment variables: - -* `MINDSDB_BYOM_ENABLED` - - This environment variable defines whether the BYOM feature is enabled (`MINDSDB_BYOM_ENABLED=true`) or disabled (`MINDSDB_BYOM_ENABLED=false`). Note that by default, it is disabled. - - Alternatively, you can enable it in the MindsDB configuration file: - ```json - { - "byom": { - "enabled": true - } - } - ``` - -* `MINDSDB_BYOM_DEFAULT_TYPE` - - This environment variable defines the modes of operation of the BYOM feature. - - - `MINDSDB_BYOM_DEFAULT_TYPE=venv`

- When using the `venv` mode, MindsDB creates a virtual environment and installs in it the packages listed in the `requirements.txt` file. This virtual environment is dedicated for the custom model. Note that when running MindsDB locally, it is the default mode. - - - `MINDSDB_BYOM_DEFAULT_TYPE=inhouse`

- When using the `inhouse` mode, there is no dedicated virtual environment for the custom model. It uses the environment of MindsDB, therefore, the `requirements.txt` file is not used with this mode. - -* `MINDSDB_BYOM_INHOUSE_ENABLED` - - This environment variable defines whether the `inhouse` mode is enabled (`MINDSDB_BYOM_INHOUSE_ENABLED=true`) or disabled (`MINDSDB_BYOM_INHOUSE_ENABLED=false`). Note that when running MindsDB locally, it is enabled by default. - -## Example - -We upload the custom model, as below: - -

- -

- -Here we upload the `model.py` file that stores an implementation of the model and the `requirements.txt` file that stores all the dependencies. - -Once the model is uploaded, it becomes an ML engine within MindsDB. Now we use this `custom_model_engine` to create a model as follows: - -```sql -CREATE MODEL custom_model -FROM my_integration - (SELECT * FROM my_table) -PREDICT target -USING - ENGINE = 'custom_model_engine'; -``` - -Let's query for predictions by joining the custom model with the data table. Please note that when querying for predictions, do not include the target column in the `input` data selection. - -```sql -SELECT - input.feature_column, - model.target AS predicted_target -FROM my_integration.my_table AS input -JOIN custom_model AS model; -``` - - -Check out the [BYOM handler folder](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/byom_handler) to see the implementation details. - diff --git a/docs/integrations/ai-engines/mlflow.mdx b/docs/integrations/ai-engines/mlflow.mdx deleted file mode 100644 index 0869352ff4e..00000000000 --- a/docs/integrations/ai-engines/mlflow.mdx +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: MindsDB and MLflow -sidebarTitle: MLflow ---- - -MLflow allows you to create, train, and serve machine learning models, apart from other features, such as organizing experiments, tracking metrics, and more. - -## How to Use MLflow Models in MindsDB - -Here are the prerequisites for using MLflow-served models in MindsDB: - -1. Train a model via a wrapper class that inherits from the `mlflow.pyfunc.PythonModel` class. It should expose the `predict()` method that returns the predicted output for some input data when called. - - - Please ensure that the Python version specified for Conda environment matches the one used to train the model. - - -2. Start the MLflow server: - - ```bash - mlflow server -p 5001 --backend-store-uri sqlite:////path/to/mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0 - ``` - -3. Serve the trained model: - - ```bash - mlflow models serve --model-uri ./model_folder_name - ``` - -## Example - -Let's create a model that registers an MLflow-served model as an AI Table: - -```sql -CREATE MODEL mindsdb.mlflow_model -PREDICT target -USING - engine = 'mlflow', - model_name = 'model_folder_name', -- replace the model_folder_name variable with a real value - mlflow_server_url = 'http://0.0.0.0:5001/', -- match the port number with the MLflow server (point 2 in the previous section) - mlflow_server_path = 'sqlite:////path/to/mlflow.db', -- replace the path with a real value (here we use the sqlite database) - predict_url = 'http://localhost:5000/invocations'; -- match the port number that serves the trained model (point 3 in the previous section) -``` - -Here is how to check the models status: - -```sql -DESCRIBE mlflow_model; -``` - -Once the status is `complete`, we can query for predictions. - -One way is to query for a single prediction using synthetic data in the `WHERE` clause. - -```sql -SELECT target -FROM mindsdb.mlflow_model -WHERE text = 'The tsunami is coming, seek high ground'; -``` - -Another way is to query for batch predictions by joining the model with the data table. - -```sql -SELECT t.text, m.predict -FROM mindsdb.mlflow_model AS m -JOIN files.some_text as t; -``` - -Here, the data table comes from the `files` integration. It is joined with the model and predictions are made for all the records at once. - - -**Get More Insights** - -Check out the article on [How to bring your own machine learning model to databases](https://medium.com/mindsdb/how-to-bring-your-own-machine-learning-model-to-databases-47a188d6db00) by [Patricio Cerda Mardini](https://medium.com/@paxcema) to learn more. - diff --git a/docs/integrations/app-integrations/binance.mdx b/docs/integrations/app-integrations/binance.mdx deleted file mode 100644 index 596e8a32531..00000000000 --- a/docs/integrations/app-integrations/binance.mdx +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: Binance -sidebarTitle: Binance ---- - -In this section, we present how to connect Binance to MindsDB. - -[Binance](https://www.binance.com/en) is one of the world's largest cryptocurrency exchanges. It's an online platform where you can buy, sell, and trade a wide variety of cryptocurrencies. Binance offers a range of services beyond just trading, including staking, lending, and various financial products related to cryptocurrencies. - -Binance provides real-time trade data that can be utilized within MindsDB to make real-time forecasts. - -## Connection - -This handler integrates with the [Binance API](https://binance-docs.github.io/apidocs/spot/en/#change-log) to make aggregate trade (kline) data available to use for model training and predictions. - -Since there are no parameters required to connect to Binance using MindsDB, you can use the below statement: - -```sql -CREATE DATABASE my_binance -WITH - ENGINE = 'binance'; -``` - -## Usage - -### Select Data -By default, aggregate data (klines) from the latest 1000 trading intervals with a length of one minute (1m) each will be returned. - -```sql -SELECT * -FROM my_binance.aggregated_trade_data -WHERE symbol = 'BTCUSDT'; -``` - - Here is the sample output data: - - ``` - | symbol | open_time | open_price | high_price | low_price | close_price | volume | close_time | quote_asset_volume | number_of_trades | taker_buy_base_asset_volume | taker_buy_quote_asset_volume | - | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ------------------ | ---------------- | --------------------------- | ---------------------------- | - | BTCUSDT | 1678338600 | 21752.65000 | 21761.33000 | 21751.53000 | 21756.7000 | 103.8614100 | 1678338659.999| 2259656.20520700 | 3655 | 55.25763000 | 1202219.60971860 | - ``` - where: - * `symbol` - Trading pair (BTC to USDT in the above example) - * `open_time` - Start time of interval in seconds since the Unix epoch (default interval is 1m) - * `open_price` - Price of a base asset at the beginning of a trading interval - * `high_price` - The highest price of a base asset during trading interval - * `low_price` - Lowest price of a base asset during a trading interval - * `close_price` - Price of a base asset at the end of a trading interval - * `volume` - Total amount of base asset traded during an interval - * `close_time` - End time of interval in seconds since the Unix epoch - * `quote_asset_volume` - Total amount of quote asset (USDT in the above case) traded during an interval - * `number_of_trades` - Total number of trades made during an interval - * `taker_buy_base_asset_volume` - How much of the base asset volume is contributed by taker buy orders - * `taker_buy_quote_asset_volume` - How much of the quote asset volume is contributed by taker buy orders - - -To get a customized response we can pass open_time, close_time, and interval: - -```sql -SELECT * -FROM my_binance.aggregated_trade_data -WHERE symbol = 'BTCUSDT' -AND open_time > '2023-01-01' -AND close_time < '2023-01-03 08:00:00' -AND interval = '1s' -LIMIT 10000; -``` - - -Supported intervals are [listed here](https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data) - diff --git a/docs/integrations/app-integrations/confluence.mdx b/docs/integrations/app-integrations/confluence.mdx deleted file mode 100644 index 1037828a358..00000000000 --- a/docs/integrations/app-integrations/confluence.mdx +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Confluence -sidebarTitle: Confluence ---- - -This documentation describes the integration of MindsDB with [Confluence](https://www.atlassian.com/software/confluence), a popular collaboration and documentation tool developed by Atlassian. -The integration allows MindsDB to access data from Confluence and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to Confluence from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/confluence_handler) as an engine. - -```sql -CREATE DATABASE confluence_datasource -WITH - ENGINE = 'confluence', - PARAMETERS = { - "api_base": "https://example.atlassian.net", - "username": "john.doe@example.com", - "password": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" - }; -``` - -Required connection parameters include the following: - -* `api_base`: The base URL for your Confluence instance/server. -* `username`: The email address associated with your Confluence account. -* `password`: The API token generated for your Confluence account. - - -Refer this [guide](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) for instructions on how to create API tokens for your account. - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM confluence_datasource.table_name -LIMIT 10; -``` - - -The above example utilize `confluence_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Supported Tables - -* `spaces`: The table containing information about the spaces in Confluence. -* `pages`: The table containing information about the pages in Confluence. -* `blogposts`: The table containing information about the blog posts in Confluence. -* `whiteboards`: The table containing information about the whiteboards in Confluence. -* `databases`: The table containing information about the databases in Confluence. -* `tasks`: The table containing information about the tasks in Confluence. \ No newline at end of file diff --git a/docs/integrations/app-integrations/dockerhub.mdx b/docs/integrations/app-integrations/dockerhub.mdx deleted file mode 100644 index fadba90e2d0..00000000000 --- a/docs/integrations/app-integrations/dockerhub.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Docker Hub -sidebarTitle: Docker Hub ---- - -In this section, we present how to connect Docker Hub repository to MindsDB. - -[Docker Hub](https://hub.docker.com/) is the world's easiest way to create, manage, and deliver your team's container applications. - -Data from Docker Hub can be utilized within MindsDB to train models and make predictions about Docker Hub repositories. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Docker Hub to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Docker Hub. - -## Connection - -This handler is implemented using the `requests` library that makes http calls to https://docs.docker.com/docker-hub/api/latest/#tag/resources. - -The required arguments to establish a connection are as follows: - -* `username`: Username used to login to DockerHub. -* `password`: Password used to login to DockerHub. - - -Read about creating an account [here](https://hub.docker.com/). - - -Here is how to connect to Docker Hub using MindsDB: - -```sql -CREATE DATABASE dockerhub_datasource -WITH ENGINE = 'dockerhub', -PARAMETERS = { - "username": "username", - "password": "password" -}; -``` - -## Usage - -Now, you can query Docker Hub as follows: - -```sql -SELECT * FROM dockerhub_datasource.repo_images_summary WHERE namespace="docker" AND repository="trusted-registry-nginx"; -``` - - -Both the `namespace` and `repository` parameters are required in the WHERE clause. - diff --git a/docs/integrations/app-integrations/email.mdx b/docs/integrations/app-integrations/email.mdx deleted file mode 100644 index 6976497c68c..00000000000 --- a/docs/integrations/app-integrations/email.mdx +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Email -sidebarTitle: Email ---- - -In this section, we present how to connect Email accounts to MindsDB. - -By connecting your email account to MindsDB, you can utilize various AI models available within MindsDB to summarize emails, detect spam, or even automate email replies. - - -Please note that currently you can connect Gmail and Outlook accounts using this integration. - - -## Connection - -This handler was implemented using standard Python libraries: `email`, `imaplib`, and `smtplib`. - -The Email handler is initialized with the following required parameters: - -* `email` stores an email address used for authentication. -* `password` stores a password used for authentication. - -Additionally, the following optional parameters can be passed: - -* `smtp_server` used to send emails. Defaults to `smtp.gmail.com`. -* `smtp_port` used to send emails. Defaults to `587`. -* `imap_server` used to receive emails. Defaults to `imap.gmail.com`. - - -At the moment, the handler has been tested with Gmail and Outlook accounts. - -To use the handler on a Gmail account, you must create an app password following [this instruction](https://support.google.com/accounts/answer/185833?hl=en) and use its value for the `password` parameter. - -By default, the Email handler connects to Gmail. If you want to use other email providers as Outlook, add the values for `imap_server` and `smtp_server` parameters. - - -### Gmail - -To connect your Gmail account to MindsDB, use the below `CREATE DATABASE` statement: - -```sql -CREATE DATABASE email_datasource -WITH ENGINE = 'email', -PARAMETERS = { - "email": "youremail@gmail.com", - "password": "yourpassword" -}; -``` - -It creates a database that comes with the `emails` table. - -### Outlook - -To connect your Outlook account to MindsDB, use the below `CREATE DATABASE` statement: - -```sql -CREATE DATABASE email_datasource -WITH ENGINE = 'email', -PARAMETERS = { - "email": "youremail@outlook.com", - "password": "yourpassword", - "smtp_server": "smtp.office365.com", - "smtp_port": "587", - "imap_server": "outlook.office365.com" -}; -``` - -It creates a database that comes with the `emails` table. - -## Usage - -Now you can query for emails like this: - -```sql -SELECT * -FROM email_datasource.emails; -``` - -And you can apply filters like this: - -```sql -SELECT id, body, subject, to_field, from_field, datetime -FROM email_datasource.emails -WHERE subject = 'MindsDB' -ORDER BY id -LIMIT 5; -``` - -Or, write emails like this: - -```sql -INSERT INTO email_datasource.emails(to_field, subject, body) -VALUES ("toemail@outlook.com", "MindsDB", "Hello from MindsDB!"); -``` diff --git a/docs/integrations/app-integrations/github.mdx b/docs/integrations/app-integrations/github.mdx deleted file mode 100644 index e8647118c4d..00000000000 --- a/docs/integrations/app-integrations/github.mdx +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: GitHub -sidebarTitle: GitHub ---- - -In this section, we present how to connect GitHub repository to MindsDB. - -[GitHub](https://github.com/) is a web-based platform and service that is primarily used for version control and collaborative software development. It provides a platform for developers and teams to host, review, and manage source code for software projects. - -Data from GitHub, including issues and PRs, can be utilized within MindsDB to make relevant predictions or automate the issue/PR creation. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect GitHub to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to GitHub. - -## Connection - -This handler is implemented using the `pygithub` library, a Python library that wraps GitHub API v3. - -The required arguments to establish a connection are as follows: - -* `repository` is the GitHub repository name. -* `api_key` is an optional GitHub API key to use for authentication. -* `github_url` is an optional GitHub URL to connect to a GitHub Enterprise instance. - - -Check out [this guide](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) on how to create the GitHub API key. - -It is recommended to use the API key to avoid the `API rate limit exceeded` error. - - -Here is how to connect the MindsDB GitHub repository: - -```sql -CREATE DATABASE mindsdb_github -WITH ENGINE = 'github', -PARAMETERS = { - "repository": "mindsdb/mindsdb" -}; -``` - -## Usage - -The `mindsdb_github` connection contains two tables: `issues` and `pull_requests`. - -Here is how to query for all issues: - -```sql -SELECT * -FROM mindsdb_github.issues; -``` - -You can run more advanced queries to fetch specific issues in a defined order: - -```sql -SELECT number, state, creator, assignees, title, labels -FROM mindsdb_github.issues -WHERE state = 'open' -LIMIT 10; -``` - -And the same goes for pull requests: - -```sql -SELECT number, state, title, creator, head, commits -FROM mindsdb_github.pull_requests -WHERE state = 'open' -LIMIT 10; -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/github_handler/README.md). - diff --git a/docs/integrations/app-integrations/gitlab.mdx b/docs/integrations/app-integrations/gitlab.mdx deleted file mode 100644 index 4708f5c3e58..00000000000 --- a/docs/integrations/app-integrations/gitlab.mdx +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: GitLab -sidebarTitle: GitLab ---- - -In this section, we present how to connect GitLab repository to MindsDB. -[GitLab](https://about.gitlab.com/) is a DevSecOps Platform. Data from GitLab, including issues and MRs, can be utilized within MindsDB to make relevant predictions or automate the issue/MR creation. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect GitLab to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to GitLab. - -## Connection - -This handler was implemented using the [python-gitlab](https://github.com/python-gitlab/python-gitlab) library. -python-gitlab is a Python library that wraps GitLab API. - -The GitLab handler is initialized with the following parameters: - -* `repository`: a required name of a GitLab repository to connect to. -* `api_key`: an optional GitLab API key to use for authentication. - -Here is how to connect MindsDB to a GitLab repository: - -```sql -CREATE DATABASE mindsdb_gitlab -WITH ENGINE = 'gitlab', -PARAMETERS = { - "repository": "gitlab-org/gitlab", - "api_key": "api_key", -- optional GitLab API key -}; -``` - -## Usage - -The `mindsdb_gitlab` connection contains two tables: `issues` and `merge_requests`. - -Now, you can use this established connection to query this table as: - -```sql -SELECT * FROM mindsdb_gitlab.issues; -``` - -You can run more advanced queries to fetch specific issues in a defined order: - -```sql -SELECT number, state, creator, assignee, title, created, labels - FROM mindsdb_gitlab.issues - WHERE state="opened" - ORDER BY created ASC, creator DESC - LIMIT 10; -``` - -And the same goes for merge requests: - -```sql -SELECT number, state, creator, reviewers, title, created, has_conflicts - FROM mindsdb_gitlab.merge_requests - WHERE state="merged" - ORDER BY created ASC, creator DESC - LIMIT 10; -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/gitlab_handler/README.md). - diff --git a/docs/integrations/app-integrations/gmail.mdx b/docs/integrations/app-integrations/gmail.mdx deleted file mode 100644 index 0825a020a82..00000000000 --- a/docs/integrations/app-integrations/gmail.mdx +++ /dev/null @@ -1,186 +0,0 @@ ---- -title: Gmail -sidebarTitle: Gmail ---- - -In this section, we present how to connect Gmail accounts to MindsDB. - -[Gmail](https://gmail.com/) is a widely used and popular email service developed by Google. - -By connecting your Gmail account to MindsDB, you can utilize various AI models available within MindsDB to summarize emails, detect spam, or even automate email replies. - - -Please note that currently you can connect your Gmail account to local MindsDB installation by providing a path to the credentials file stored locally. - -If you want to connect your Gmail account to MindsDB Cloud, you can upload the credentials file, for instance, to your S3 bucket and provide a link to it as a parameter. - - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Gmail to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Gmail. - -## Connection - -The required arguments to establish a connection are as follows: - -* `credentials_file` local path to the credentials.json or `credentials_url` in case your file is uploaded to s3. Follow the instructions below to generate the credentials file. -* `scopes` define the level of access granted. It is optional and by default it uses 'https://.../gmail.compose' and 'https://.../gmail.readonly' scopes. - -In order to make use of this handler and connect the Google Calendar app to MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE mindsdb_gmail -WITH ENGINE = 'gmail', -parameters = { - "credentials_file": "mindsdb/integrations/handlers/gmail_handler/credentials.json", - "scopes": ['https://.../gmail.compose', 'https://.../gmail.readonly', ...] -}; -``` - -Or, you can also connect by giving the credentials file from an s3 [pre signed url](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html). To do this you need to pass in the credentials_file parameter as a [pre signed url](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html). For example: - -```sql -CREATE DATABASE mindsdb_gmail -WITH ENGINE = 'gmail', -parameters = { - "credentials_url": "https://s3.amazonaws.com/your_bucket/credentials.json?response-content-disposition=inline&X-Amz-Security-Token=12312...", - -- "scopes": ['SCOPE_1', 'SCOPE_2', ...] -- Optional scopes. By default 'https://.../gmail.compose' & 'https://.../gmail.readonly' scopes are used -}; -``` - - -You need a Google account in order to use this integration. Here is how to get the credentials file: - -1. Create a Google Cloud Platform (GCP) Project: - - 1.1 Go to the GCP Console (https://console.cloud.google.com/). - - 1.2 If you haven't created a project before, you'll be prompted to do so now. - - 1.3 Give your new project a name. - - 1.4 Click `Create` to create the new project. - -2. Enable the Gmail API: - - 2.1 In the GCP Console, select your project. - - 2.2 Navigate to `APIs & Services` > `Library`. - - 2.3 In the search bar, search for `Gmail`. - - 2.4 Click on `Gmail API`, then click `Enable`. - -3. Create credentials for the Gmail API: - - 3.1 Navigate to `APIs & Services` > `Credentials`. - - 3.2 Click on the `Create Credentials` button and choose `OAuth client ID`. - - 3.3 If you haven't configured the OAuth consent screen before, you'll be prompted to do so now. Make sure to choose `External` for User Type, and select the necessary scopes. Make sure to save the changes. - - Now, create the OAuth client ID. Choose `Web application` for the Application Type and give it a name. - - 3.4 Add the following MindsDB URL to `Authorized redirect URIs`: - - For local installation, add `http://localhost/verify-auth` - - For Cloud, add `http://cloud.mindsdb.com/verify-auth`. - - 3.5 Click `Create`. - -4. Download the JSON file: - - 4.1 After creating your credentials, click the download button (an icon of an arrow pointing down) on the right side of your client ID. This will download a JSON file, so you will use the location to it in the `credentials_file` param. - - -## Usage - -This creates a database called mindsdb_gmail. This database ships with a table called emails that we can use to search for emails as well as to write emails. - -Now you can use your Gmail data, like this: - -* searching for email: - - ```sql - SELECT * - FROM mindsdb_gmail.emails - WHERE query = 'alert from:*@google.com' - AND label_ids = "INBOX,UNREAD" - LIMIT 20; - ``` - -* writing emails: - - ```sql - INSERT INTO mindsdb_gmail.emails (thread_id, message_id, to_email, subject, body) - VALUES ('187cbdd861350934d', '8e54ccfd-abd0-756b-a12e-f7bc95ebc75b@Spark', 'test@example2.com', 'Trying out MindsDB', - 'This seems awesome. You must try it out whenever you can.'); - ``` - -## Example 1: Automating Email Replies - -Now that we know how to pull emails into our database and write emails, we can make use of OpenAPI engine to write email replies. - -First, create an OpenAI engine, passing your OpenAI API key: - -```sql -CREATE ML_ENGINE openai_engine -FROM openai -USING - openai_api_key = 'your-openai-api-key'; -``` - -Then, create a model using this engine: - -```sql -CREATE MODEL mindsdb.gpt_model -PREDICT response -USING - engine = 'openai_engine', - max_tokens = 500, - api_key = 'your_api_key', - model_name = 'gpt-3.5-turbo', - prompt_template = 'From input message: {{body}}\ - by from_user: {{sender}}\ - In less than 500 characters, write an email response to {{sender}} in the following format:\ - Start with proper salutation and respond with a short message in a casual tone, and sign the email with my name mindsdb'; -``` - -## Example 2: Detecting Spam Emails - -You can check if an email is spam by using one of the Hugging Face pre-trained models. - -```sql -CREATE MODEL mindsdb.spam_classifier -PREDICT PRED -USING - engine = 'huggingface', - task = 'text-classification', - model_name = 'mrm8488/bert-tiny-finetuned-sms-spam-detection', - input_column = 'text_spammy', - labels = ['ham', 'spam']; -``` - -Then, create a view that contains the snippet or the body of the email. - -```sql -CREATE VIEW mindsdb.emails_text AS( - SELECT snippet AS text_spammy - FROM mindsdb_gmail.emails -); -``` - -Finally, you can use the model to classify emails into spam or ham: - -```sql -SELECT h.PRED, h.PRED_explain, t.text_spammy AS input_text -FROM mindsdb.emails_text AS t -JOIN mindsdb.spam_classifier AS h; -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/gmail_handler/README.md). - diff --git a/docs/integrations/app-integrations/gong.mdx b/docs/integrations/app-integrations/gong.mdx deleted file mode 100644 index 9ed78726226..00000000000 --- a/docs/integrations/app-integrations/gong.mdx +++ /dev/null @@ -1,245 +0,0 @@ ---- -title: Gong -sidebarTitle: Gong ---- - -This documentation describes the integration of MindsDB with [Gong](https://www.gong.io/), a conversation intelligence platform that captures, analyzes, and provides insights from customer conversations. -The integration allows MindsDB to access call recordings, transcripts, analytics, and other conversation data from Gong and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Gong to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain a Gong API key from your [Gong API settings page](https://app.gong.io/settings/api-keys). - -## Connection - -Establish a connection to Gong from MindsDB by executing the following SQL command and providing its handler name as an engine. - -### Using Bearer Token (Recommended) - -```sql -CREATE DATABASE gong_datasource -WITH - ENGINE = 'gong', - PARAMETERS = { - "api_key": "your_gong_api_key_here" - }; -``` - -### Using Basic Authentication - -```sql -CREATE DATABASE gong_datasource -WITH - ENGINE = 'gong', - PARAMETERS = { - "access_key": "your_access_key", - "secret_key": "your_secret_key" - }; -``` - -Required connection parameters include the following: - -**Authentication (choose one method):** - -* `api_key`: Bearer token for authentication (recommended) -* `access_key` + `secret_key`: Basic authentication credentials (alternative method) - -Optional connection parameters include the following: - -* `base_url`: Gong API base URL. This parameter defaults to `https://api.gong.io`. -* `timeout`: Request timeout in seconds. This parameter defaults to `30`. - - -If both authentication methods are provided, basic auth (`access_key` + `secret_key`) takes precedence. - - -## Usage - -The following usage examples utilize `gong_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - -### Available Tables - -The Gong handler provides access to the following tables: - -* `calls` - Access call recordings and metadata -* `users` - Get user information and permissions -* `analytics` - Access AI-generated conversation insights -* `transcripts` - Get full conversation transcripts - -### Basic Queries - -Retrieve recent calls with date filters (recommended for best performance): - -```sql -SELECT * -FROM gong_datasource.calls -WHERE date >= '2024-01-01' AND date < '2024-02-01' -ORDER BY date DESC -LIMIT 20; -``` - -Get all users in your organization: - -```sql -SELECT user_id, name, email, role, status -FROM gong_datasource.users -LIMIT 100; -``` - -Get analytics for calls with high sentiment scores: - -```sql -SELECT call_id, sentiment_score, key_phrases, topics -FROM gong_datasource.analytics -WHERE sentiment_score > 0.7 - AND date >= '2024-01-01' -LIMIT 50; -``` - -Get transcripts for a specific call: - -```sql -SELECT speaker, timestamp, text -FROM gong_datasource.transcripts -WHERE call_id = '12345' -ORDER BY timestamp; -``` - -### Advanced Queries with JOINs - -Get calls with their sentiment analysis: - -```sql -SELECT - c.title, - c.date, - c.duration, - a.sentiment_score, - a.key_phrases -FROM gong_datasource.calls c -JOIN gong_datasource.analytics a ON c.call_id = a.call_id -WHERE c.date >= '2024-01-01' AND c.date < '2024-02-01' -ORDER BY a.sentiment_score DESC -LIMIT 25; -``` - -Find calls where specific keywords were mentioned: - -```sql -SELECT - c.title, - c.date, - t.speaker, - t.text -FROM gong_datasource.calls c -JOIN gong_datasource.transcripts t ON c.call_id = t.call_id -WHERE c.date >= '2024-01-01' - AND t.text LIKE '%pricing%' -LIMIT 50; -``` - -Get user performance with call sentiment: - -```sql -SELECT - u.name, - u.email, - c.call_id, - c.title, - a.sentiment_score -FROM gong_datasource.users u -JOIN gong_datasource.calls c ON u.user_id = c.user_id -JOIN gong_datasource.analytics a ON c.call_id = a.call_id -WHERE c.date >= '2024-01-01' - AND a.sentiment_score > 0.8 -LIMIT 100; -``` - -## Data Schema - -### calls Table - -| Column | Description | -|--------|-------------| -| `call_id` | Unique identifier for the call (Primary Key) | -| `title` | Call title or description | -| `date` | Call date and time (ISO-8601 format) | -| `duration` | Call duration in seconds | -| `recording_url` | URL to the call recording | -| `call_type` | Type of call (e.g., "sales", "demo") | -| `user_id` | ID of the user who made the call | -| `participants` | Comma-separated list of participants | -| `status` | Call status | - -### users Table - -| Column | Description | -|--------|-------------| -| `user_id` | Unique identifier for the user (Primary Key) | -| `name` | User's full name | -| `email` | User's email address | -| `role` | User's role in the organization | -| `permissions` | Comma-separated list of user permissions | -| `status` | User status | - -### analytics Table - -| Column | Description | -|--------|-------------| -| `call_id` | Reference to the call (Primary Key, Foreign Key to calls.call_id) | -| `sentiment_score` | Sentiment analysis score | -| `topic_score` | Topic detection score | -| `key_phrases` | Comma-separated list of key phrases | -| `topics` | Comma-separated list of detected topics | -| `emotions` | Comma-separated list of detected emotions | -| `confidence_score` | Confidence score for the analysis | - -### transcripts Table - -| Column | Description | -|--------|-------------| -| `segment_id` | Unique identifier for the transcript segment (Primary Key) | -| `call_id` | Reference to the call (Foreign Key to calls.call_id) | -| `speaker` | Name of the speaker | -| `timestamp` | Timestamp of the transcript segment (ISO-8601 format) | -| `text` | Transcribed text | -| `confidence` | Confidence score for the transcription | - -## Troubleshooting - - -`Authentication Error` - -* **Symptoms**: Failure to connect MindsDB with Gong. -* **Checklist**: - 1. Verify that your Gong API key is valid and not expired. - 2. Ensure you have the necessary permissions in Gong to access the API. - 3. Check that your API key has access to the specific data you're querying. - 4. If using basic authentication, verify both `access_key` and `secret_key` are correct. - - - -`Empty Results or Missing Data` - -* **Symptoms**: Queries return no results or incomplete data. -* **Checklist**: - 1. Verify that date filters are included in your query (required for calls, analytics, transcripts). - 2. Check that the date range includes data (analytics and transcripts have ~1 hour lag). - 3. Ensure call_id exists when querying transcripts for a specific call. - 4. Verify that your Gong account has data for the requested time period. - - - -`Slow Query Performance` - -* **Symptoms**: Queries take a long time to execute. -* **Checklist**: - 1. Add date filters to limit the data range (essential for large datasets). - 2. Use LIMIT to restrict the number of results. - 3. Filter by call_id when querying transcripts. - 4. Avoid querying transcripts without filters (can return thousands of rows per call). - diff --git a/docs/integrations/app-integrations/google-analytics.mdx b/docs/integrations/app-integrations/google-analytics.mdx deleted file mode 100644 index 98fce05fa8b..00000000000 --- a/docs/integrations/app-integrations/google-analytics.mdx +++ /dev/null @@ -1,144 +0,0 @@ ---- -title: Google Analytics -sidebarTitle: Google Analytics ---- - -In this section, we present how to connect Google Analytics to MindsDB. - -[Google Analytics](https://analytics.google.com/) is a web analytics service offered by Google that tracks and reports website traffic and also the mobile app traffic & events. - -Data from Google Analytics can be utilized within MindsDB to train AI models, make predictions, and automate user engagement and events with AI. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Google Analytics to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Google Analytics. - -## Connection - -The required arguments to establish a connection are as follows: - -* `credentials_file` optional, is a path to the JSON file that stores credentials to the Google account. -* `credentials_json`: optional, is the content of the JSON file that stores credentials to the Google account. -* `property_id` required, is the property id of your Google Analytics website. [Here](https://developers.google.com/analytics/devguides/reporting/data/v1/property-id) is some information on how to get the property id. -> ⚠️ One of credentials_file or credentials_json has to be chosen. - - -Please note that a Google account with enabled Google Analytics Admin API is required. You can find more information [here](https://developers.google.com/analytics/devguides/config/admin/v1/quickstart-client-libraries).

-Also an active website connected with Google Analytics is required. You can find more information [here](https://support.google.com/analytics/answer/9304153?hl=en). -
- -To make use of this handler and connect the Google Analytics app to MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE my_ga -WITH - ENGINE = 'google_analytics', - PARAMETERS = { - 'credentials_file': '\path-to-your-file\credentials.json', - 'property_id': '' - }; -``` - - -You need a Google account in order to use this integration. Here is how to get the credentials file: - -1. Create a Google Cloud Platform (GCP) Project: - - 1.1 Go to the GCP Console (https://console.cloud.google.com/). - - 1.2 If you haven't created a project before, you'll be prompted to do so now. - - 1.3 Give your new project a name. - - 1.4 Click `Create` to create the new project. - -2. Enable the Google Analytics Admin API: - - 2.1 In the GCP Console, select your project. - - 2.2 Navigate to `APIs & Services` > `Library`. - - 2.3 In the search bar, search for `Google Analytics Admin API`. - - 2.4 Click on ` Google Analytics Admin API `, then click `Enable`. - -3. Create credentials for the Google Analytics Admin API : - - 3.1 Navigate to `APIs & Services` > `Credentials`. - - 3.2 Click on the `Create Credentials` button and choose `Service account`. - - 3.3 Enter a unique `Service account ID` . - - 3.4 Click `Done`. - - 3.5 Copy the service account you created. Find it under `Service Accounts`. - - 3.6 Now click on the service account you created, and navigate `KEYS` - - 3.7 Click `ADD KEY` > `Create new key`. - - 3.8 Choose `JSON` then click `CREATE` - - 3.9 After this the credentials file will be downloaded directly. Locate the file and use the location to it in the `credentials_file` param. - -4. Add Service account to Google Analytics Property: - - 4.1 In the Google Analytics Admin Console, select the Account or Property to which you want to grant access. - - 4.2 Navigate to the `Admin` panel. - - 4.3 Navigate `Account` > `Account Access Management`. - - 4.4 Click on the "+" icon to add a new user. - - 4.5 Enter the service account you copied in step 3.5 as the email address. - - 4.6 Assign the appropriate permissions to the service account. At a minimum, you'll need to grant it `Edit` permissions. - - 4.7 Click on the `Add` button to add the service account as a user with the specified permissions. - - -## Usage - -This creates a database that comes with the `conversion_events` table. - -Now you can use your Google Analytics data like this: - -* searching for conversion events: - - ```sql - SELECT event_name, custom, countingMethod - FROM my_ga.conversion_events; - ``` - -* creating conversion event: - - ```sql - INSERT INTO my_ga.conversion_events (event_name, countingMethod) - VALUES ('mindsdb_event', 2); - ``` - -* updating one conversion event: - - ```sql - UPDATE my_ga.conversion_events - SET countingMethod = 1, - WHERE name = ''; - ``` - -* deleting one conversion event: - - ```sql - DELETE - FROM my_ga.conversion_events - WHERE name = ''; - ``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/google_analytics_handler). - diff --git a/docs/integrations/app-integrations/google-calendar.mdx b/docs/integrations/app-integrations/google-calendar.mdx deleted file mode 100644 index b4ab35b2262..00000000000 --- a/docs/integrations/app-integrations/google-calendar.mdx +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Google Calendar -sidebarTitle: Google Calendar ---- - -In this section, we present how to connect Google Calendar to MindsDB. - -[Google Calendar](https://calendar.google.com/calendar/) is an online calendar service and application developed by Google. It allows users to create, manage, and share events and appointments, as well as schedule and organize their personal, work, or team activities. - -Data from Google Calendar can be utilized within MindsDB to train AI models, make predictions, and automate time management with AI. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Google Calendar to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Google Calendar. - -## Connection - -The required arguments to establish a connection are as follows: - -* `credentials_file` is a path to the JSON file that stores credentials to the Google account. - - -Please note that a Google account with enabled Google Calendar is required. You can find more information [here](https://developers.google.com/calendar/api/quickstart/python). - - -In order to make use of this handler and connect the Google Calendar app to MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE my_calendar -WITH - ENGINE = 'google_calendar', - PARAMETERS = { - 'credentials_file': '\path-to-your-file\credentials.json' - }; -``` - - -You need a Google account in order to use this integration. Here is how to get the credentials file: - -1. Create a Google Cloud Platform (GCP) Project: - - 1.1 Go to the GCP Console (https://console.cloud.google.com/). - - 1.2 If you haven't created a project before, you'll be prompted to do so now. - - 1.3 Give your new project a name. - - 1.4 Click `Create` to create the new project. - -2. Enable the Google Calendar API: - - 2.1 In the GCP Console, select your project. - - 2.2 Navigate to `APIs & Services` > `Library`. - - 2.3 In the search bar, search for `Google Calendar API`. - - 2.4 Click on ` Google Calendar API `, then click `Enable`. - -3. Create credentials for the Google Calendar API : - - 3.1 Navigate to `APIs & Services` > `Credentials`. - - 3.2 Click on the `Create Credentials` button and choose `OAuth client ID`. - - 3.3 If you haven't configured the OAuth consent screen before, you'll be prompted to do so now. Make sure to choose `External` for User Type, and add all the necessary scopes. Make sure to save the changes. - Now, create the OAuth client ID. Choose `Desktop app` for the Application Type and give it a name. - - 3.4 Click `Create`. - -4. Download the JSON file: - - 4.1 After creating your credentials, click the download button (an icon of an arrow pointing down) on the right side of your client ID. This will download a JSON file, so you will use the location to it in the `credentials_file` param. - - -## Usage - -This creates a database that comes with the `calendar` table. - -Now you can use your Google Calendar data, like this: - -* searching for events: - - ```sql - SELECT id, created_at, author_username, text - FROM my_calendar.events - WHERE start_time = '2023-02-16' - AND end_time = '2023-04-09' LIMIT 20; - ``` - -* creating events: - - ```sql - INSERT INTO my_calendar.events(start_time, end_time, summary, description, location, attendees, reminders, timeZone) - VALUES ('2023-02-16 10:00:00', '2023-02-16 11:00:00', 'MindsDB Meeting', 'Discussing the future of MindsDB', 'MindsDB HQ', '', 'Europe/Athens'); - ``` - -* updating one or more events: - - ```sql - UPDATE my_calendar.events - SET summary = 'MindsDB Meeting', - description = 'Discussing the future of MindsDB', - location = 'MindsDB HQ', - attendees = '', - reminders = '' - WHERE event_id > 1 AND event_id < 10; -- used to update events in a given range - ``` - -* deleting one or more events: - - ```sql - DELETE - FROM my_calendar.events - WHERE id = '1'; - ``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/google_calendar_handler/README.md). - diff --git a/docs/integrations/app-integrations/hackernews.mdx b/docs/integrations/app-integrations/hackernews.mdx deleted file mode 100644 index 2b0d3fa0262..00000000000 --- a/docs/integrations/app-integrations/hackernews.mdx +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: Hacker News -sidebarTitle: Hacker News ---- - -In this section, we present how to connect Hacker News to MindsDB. - -[Hacker News](https://news.ycombinator.com/) is an online platform and community for discussions related to technology, startups, computer science, entrepreneurship, and a wide range of other topics of interest to the tech and hacker communities. It was created by Y Combinator, a well-known startup accelerator. - -Data from Hacker News, including articles and user comments, can be utilized within MindsDB to train AI models and chatbots with the knowledge and discussions shared at Hacker News. - -## Connection - -This handler is implemented using the official Hacker News API. It provides a simple and easy-to-use interface to access the Hacker News API. - -There are no connection arguments required. - -In order to make use of this handler and connect the Hacker News to MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE my_hackernews -WITH ENGINE = 'hackernews'; -``` - -It creates a database that comes with the `stories` and `comments` tables. - -## Usage - -Now you can query the articles, like this: - -```sql -SELECT * -FROM my_hackernews.stories -LIMIT 2; -``` - -And here is how to fetch comments for a specific article: - -```sql -SELECT * -FROM my_hackernews.comments -WHERE item_id=35662571 -LIMIT 1; -``` diff --git a/docs/integrations/app-integrations/hubspot.mdx b/docs/integrations/app-integrations/hubspot.mdx deleted file mode 100644 index c29bba13436..00000000000 --- a/docs/integrations/app-integrations/hubspot.mdx +++ /dev/null @@ -1,158 +0,0 @@ ---- -title: HubSpot -sidebarTitle: HubSpot ---- - -This documentation describes the integration of MindsDB with HubSpot. The HubSpot handler for MindsDB provides interfaces to connect to HubSpot via APIs and pull store data into MindsDB. HubSpot is a comprehensive CRM platform providing marketing, sales, content management, and customer service tools. This integration exposes HubSpot CRM data through MindsDB's SQL interface. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect HubSpot to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Authentication - -The handler supports two authentication methods: - -### Personal Access Token Authentication - -Recommended for server-to-server integrations and production environments. - -**Steps to obtain an access token:** -1. Navigate to your HubSpot account settings -2. Go to Integrations -> Private Apps -3. Create a new private app or select an existing one -4. Configure required scopes for the tables you plan to access -5. Copy the generated access token - -### OAuth Authentication - -Recommended for applications requiring user consent and dynamic scope management. - -**Required OAuth Parameters:** -- `client_id`: Your app's client identifier -- `client_secret`: Your app's client secret (store securely) - -OAuth token exchange and refresh are handled externally. - -## Supported Tables - -### Core CRM and Engagement Tables - -These tables support `SELECT`, `INSERT`, `UPDATE`, and `DELETE` operations. - -| Table Name | Description | Reference | -|------------|-------------|-------------| -| `companies` | Company records from HubSpot CRM | https://developers.hubspot.com/docs/api-reference/crm-companies-v3/guide | -| `contacts` | Contact records from HubSpot CRM | https://developers.hubspot.com/docs/api-reference/crm-contacts-v3/guide | -| `deals` | Deal records from HubSpot CRM | https://developers.hubspot.com/docs/api-reference/crm-deals-v3/guide | -| `tickets` | Support ticket records | https://developers.hubspot.com/docs/api-reference/crm-tickets-v3/guide | -| `tasks` | Task and follow-up records | https://developers.hubspot.com/docs/api-reference/crm-tasks-v3/guide | -| `calls` | Call log records | https://developers.hubspot.com/docs/api-reference/crm-calls-v3/guide | -| `emails` | Email log records | https://developers.hubspot.com/docs/api-reference/crm-emails-v3/guide | -| `meetings` | Meeting records | https://developers.hubspot.com/docs/api-reference/crm-meetings-v3/guide | -| `notes` | Timeline notes | https://developers.hubspot.com/docs/api-reference/crm-notes-v3/guide | - -### Metadata Tables - -These tables are read-only and support `SELECT` only. - -| Table Name | Description | Reference | -|------------|-------------|-------------| -| `owners` | HubSpot owners with names and emails | https://developers.hubspot.com/docs/api-reference/crm-owners-v3/guide | -| `pipelines` | Deal pipelines with names and stages | https://developers.hubspot.com/docs/api-reference/crm-pipelines-v3/guide | - -### Association Tables - -Association tables are read-only and support `SELECT` only. They expose relationships between objects and include `association_type` and `association_label` columns. - - Reference: https://developers.hubspot.com/docs/api-reference/crm-associations-v4/guide - -| Table Name | Description | -|------------|-------------| -| `company_contacts` | Company to contact associations | -| `company_deals` | Company to deal associations | -| `company_tickets` | Company to ticket associations | -| `contact_companies` | Contact to company associations | -| `contact_deals` | Contact to deal associations | -| `contact_tickets` | Contact to ticket associations | -| `deal_companies` | Deal to company associations | -| `deal_contacts` | Deal to contact associations | -| `ticket_companies` | Ticket to company associations | -| `ticket_contacts` | Ticket to contact associations | -| `ticket_deals` | Ticket to deal associations | - -## Connection - -**Using Access Token:** -```sql -CREATE DATABASE hubspot_datasource -WITH ENGINE = 'hubspot', -PARAMETERS = { - "access_token": "pat-na1-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -}; -``` - -**Using OAuth (Advanced):** -```sql -CREATE DATABASE hubspot_datasource -WITH ENGINE = 'hubspot', -PARAMETERS = { - "client_id": "your-client-id", - "client_secret": "your-client-secret" -}; -``` - -## Usage - -**Basic Data Retrieval:** -```sql -SELECT * FROM hubspot_datasource.companies LIMIT 10; -SELECT * FROM hubspot_datasource.contacts LIMIT 10; -SELECT * FROM hubspot_datasource.deals LIMIT 10; -``` - -**Date Filters (Supported Functions):** -```sql -SELECT * FROM hubspot_datasource.deals -WHERE closedate >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR); -``` - -**Creating Records:** -```sql -INSERT INTO hubspot_datasource.companies (name, domain, industry, city, state) -VALUES ('Acme Corp', 'acme.com', 'COMPUTER_SOFTWARE', 'New York', 'NY'); - -INSERT INTO hubspot_datasource.contacts (email, firstname, phone) -VALUES ('john.doe@example.com', 'John', '+1234567890'); - -INSERT INTO hubspot_datasource.tasks (hs_task_subject, hs_task_status) -VALUES ('Follow up with Acme', 'WAITING'); -``` - -**Updating Records:** -```sql -UPDATE hubspot_datasource.companies -SET industry = 'COMPUTER_SOFTWARE', city = 'Austin' -WHERE name = 'Acme Corp'; - -UPDATE hubspot_datasource.deals -SET dealstage = '110382973', amount = '75000' -WHERE dealname = 'New Deal'; -``` - -**Deleting Records:** -```sql -DELETE FROM hubspot_datasource.deals -WHERE dealstage = 'closedlost' - AND createdate < '2023-01-01'; -``` - -## Notes on Filters and Limits - -- Supported filter operators include `=`, `!=`, `<`, `<=`, `>`, `>=`, `IN`, and `NOT IN`. -- Date helpers supported in filters include `CURDATE()`/`CURRENT_DATE`, `NOW()`/`CURRENT_TIMESTAMP`, `DATE_SUB`, and `DATE_ADD`. -- Updates and deletes evaluate conditions against a sample of up to 200 records before applying changes. -- Unsupported filters or order-by expressions are skipped rather than raising errors. diff --git a/docs/integrations/app-integrations/instatus.mdx b/docs/integrations/app-integrations/instatus.mdx deleted file mode 100644 index 65f97e68fc1..00000000000 --- a/docs/integrations/app-integrations/instatus.mdx +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Instatus -sidebarTitle: Instatus ---- - -In this section, we present how to connect Instatus to MindsDB. - -[Instatus](https://instatus.com/) is a cloud-based status page software that enables users to communicate status information using incidents and maintenances. It serves as a SaaS platform for creating status pages for services. - -The Instatus Handler for MindsDB offers an interface to connect with Instatus via APIs and retrieve status pages. - -## Connection - -Initialize the Instatus handler with the following parameter: - -- `api_key`: Instatus API key for authentication. Obtain it from [Instatus Developer Dashboard](https://dashboard.instatus.com/developer). - -Start by creating a database with the new instatus engine using the following SQL command: - -```sql -CREATE DATABASE mindsdb_instatus --- Display name for the database. -WITH - ENGINE = 'instatus', --- Name of the MindsDB handler. - PARAMETERS = { - "api_key": "" --- Instatus API key to use for authentication. - }; -``` - -## Usage - -To get a status page, use the `SELECT` statement: - -```sql -SELECT id, name, status, subdomain -FROM mindsdb_instatus.status_pages -WHERE id = '' -LIMIT 10; -``` - -To create a new status page, use the `INSERT` statement: - -```sql -INSERT INTO mindsdb_instatus.status_pages (email, name, subdomain, components, logoUrl, faviconUrl, websiteUrl, language, useLargeHeader, brandColor, okColor, disruptedColor, degradedColor, downColor, noticeColor, unknownColor, googleAnalytics, subscribeBySms, smsService, twilioSid, twilioToken, twilioSender, nexmoKey, nexmoSecret, nexmoSender, htmlInMeta, htmlAboveHeader, htmlBelowHeader, htmlAboveFooter, htmlBelowFooter, htmlBelowSummary, cssGlobal, launchDate, dateFormat, dateFormatShort, timeFormat) -VALUES ('yourname@gmail.com', 'mindsdb', 'mindsdb-instatus', '["Website", "App", "API"]', 'https://instatus.com/sample.png', 'https://instatus.com/favicon-32x32.png', 'https://instatus.com', 'en', 'true', '#111', '#33B17E', '#FF8C03', '#ECC94B', '#DC123D', '#70808F', '#DFE0E1', 'UA-00000000-1', 'true', 'twilio', 'YOUR_TWILIO_SID', 'YOUR_TWILIO_TOKEN', 'YOUR_TWILIO_SENDER', null, null, null, null, null, null, null, null, null, null, null, 'MMMMMM d, yyyy', 'MMM yyyy', 'p'); -``` - - -The following fields are required when inserting new status pages: - -- `email` (e.g. 'yourname@gmail.com') -- `name` (e.g 'mindsdb') -- `subdomain` (e.g. 'mindsdb-docs') -- `components` (e.g. '["Website", "App", "API"]') - -The other fields are optional. - - -To update an existing status page, use the `UPDATE` statement: - -```sql -UPDATE mindsdb_instatus.status_pages -SET name = 'mindsdb', - status = 'UP', - logoUrl = 'https://instatus.com/sample.png', - faviconUrl = 'https://instatus.com/favicon-32x32.png', - websiteUrl = 'https://instatus.com', - language = 'en', - translations = '{ - "name": { - "fr": "nasa" - } - }' -WHERE id = ''; -``` \ No newline at end of file diff --git a/docs/integrations/app-integrations/intercom.mdx b/docs/integrations/app-integrations/intercom.mdx deleted file mode 100644 index 0ec3bbb0793..00000000000 --- a/docs/integrations/app-integrations/intercom.mdx +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: Intercom -sidebarTitle: Intercom ---- - -[Intercom](https://intercom.com) is a software company that provides customer messaging and engagement tools for businesses. They offer products and services for customer support, marketing, and sales, allowing companies to communicate with their customers through various channels like chat, email, and more. - -## Connection - -To get started with the Intercom API, you need to initialize the API handler with the required access token for authentication. You can do this as follows: - -- `access_token`: Your Intercom access token for authentication. - - -Check out [this guide](https://developers.intercom.com/docs/build-an-integration/learn-more/authentication/) on how to get the intercom access token in order to access Intercom data. - - -To create a database using the Intercom engine, you can use a SQL-like syntax as shown below: - -```sql -CREATE DATABASE myintercom -WITH - ENGINE = 'intercom', - PARAMETERS = { - "access_token" : "your-intercom-access-token" - }; -``` - -## Usage - -You can retrieve data from Intercom using a `SELECT` statement. For example: - -```sql -SELECT * -FROM myintercom.articles; -``` - -You can filter data based on specific criteria using a `WHERE` clause. Here's an example: - -```sql -SELECT * -FROM myintercom.articles -WHERE id = ; -``` - -To create a new article in Intercom, you can use the `INSERT` statement. Here's an example: - -```sql -INSERT INTO myintercom.articles (title, description, body, author_id, state, parent_id, parent_type) -VALUES ( - 'Thanks for everything', - 'Description of the Article', - 'Body of the Article', - 6840572, - 'published', - 6801839, - 'collection' -); -``` - -You can update existing records in Intercom using the `UPDATE` statement. For instance: - -```sql -UPDATE myintercom.articles -SET title = 'Christmas is here!', - body = '

New gifts in store for the jolly season

' -WHERE id = ; -``` \ No newline at end of file diff --git a/docs/integrations/app-integrations/jira.mdx b/docs/integrations/app-integrations/jira.mdx deleted file mode 100644 index 3671aa31af4..00000000000 --- a/docs/integrations/app-integrations/jira.mdx +++ /dev/null @@ -1,100 +0,0 @@ ---- -title: Jira -sidebarTitle: Jira ---- - -This documentation describes the integration of MindsDB with [Jira](https://www.atlassian.com/software/jira/guides/getting-started/introduction), the #1 agile project management tool used by teams to plan, track, release and support world-class software with confidence. -The integration allows MindsDB to access data from Jira and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Jira to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Jira from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/jira_handler) as an engine. - -```sql -CREATE DATABASE jira_datasource -WITH - ENGINE = 'jira', - PARAMETERS = { - "jira_url": "https://example.atlassian.net", - "jira_username": "john.doe@example.com", - "jira_api_token": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", - "cloud": true - }; -``` - -Required connection parameters include the following: - -- `jira_url`: The base URL for your Jira instance/server. -- `cloud` (optional): Set `true` for Jira Cloud or `false` for Jira Server. Defaults to `true`. -- Jira Cloud credentials: - - `jira_username` - - `jira_api_token` -- Jira Server credentials (set `cloud: false`): - - Either `jira_personal_access_token`, **or** - - `jira_username` and `jira_password` - - -Refer this [guide](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) for instructions on how to create API tokens for your account. - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM jira_datasource.table_name -LIMIT 10; -``` - - -The above example utilize `jira_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Available tables - -The handler registers the following tables: - -- `projects`: Basic project metadata. -- `issues`: Normalized issue fields (project, summary, description, priority, status, labels, components, creator/reporter/assignee, timestamps). -- `attachments`: Attachments derived from issues. -- `comments`: Comments derived from issues. -- `users`: Users available to the current Jira context. Column set depends on `cloud`: - - Cloud columns: `accountId, accountType, emailAddress, displayName, active, timeZone, locale, applicationRoles, avatarUrls, groups` - - Server columns: `key, name, emailAddress, displayName, active, timeZone, locale, lastLoginTime, applicationRoles, avatarUrls, groups, deleted, expand` -- `groups`: User groups (`groupId, name, html`). - -Attachments and comments are fetched by first loading issues. Use `LIMIT` whenever possible to reduce API calls. - -## Query examples - -List projects: - -```sql -SELECT id, key, name -FROM jira_datasource.projects; -``` - -Fetch recent issues for a project: - -```sql -SELECT key, summary, status, assignee, created -FROM jira_datasource.issues -WHERE project_key = 'ENG' -LIMIT 50; -``` - -Retrieve comments for a specific issue: - -```sql -SELECT body, author, created -FROM jira_datasource.comments -WHERE issue_key = 'ENG-123'; -``` diff --git a/docs/integrations/app-integrations/mediawiki.mdx b/docs/integrations/app-integrations/mediawiki.mdx deleted file mode 100644 index 8a6d7125d92..00000000000 --- a/docs/integrations/app-integrations/mediawiki.mdx +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: MediaWiki -sidebarTitle: MediaWiki ---- - -In this section, we present how to connect MediaWiki to MindsDB. - -[MediaWiki](https://www.mediawiki.org/wiki/MediaWiki) is a free and open-source wiki software platform that is designed to enable the creation and management of wikis. It was originally developed for and continues to power Wikipedia. MediaWiki is highly customizable and can be used to create a wide range of collaborative websites and knowledge bases. - -Data from MediaWiki can be utilized within MindsDB to train AI models and chatbots using the wide range of available information. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect MediaWiki to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to MediaWiki. - -## Connection - -This handler was implemented using [MediaWikiAPI](https://github.com/lehinevych/MediaWikiAPI), the Python wrapper for the MediaWiki API. - -There are no connection arguments required to initialize the handler. - -To connect the MediaWiki API to MindsDB, the following CREATE DATABASE statement can be used: - -```sql -CREATE DATABASE mediawiki_datasource -WITH ENGINE = 'mediawiki' -``` - -## Usage - -Now, you can query the MediaWiki API as follows: - -```sql -SELECT * FROM mediawiki_datasource.pages -``` - -You can run more advanced queries to fetch specific pages in a defined order: - -```sql -SELECT * -FROM mediawiki_datasource.pages -WHERE title = 'Barack' -ORDER BY pageid -LIMIT 5 -``` \ No newline at end of file diff --git a/docs/integrations/app-integrations/microsoft-onedrive.mdx b/docs/integrations/app-integrations/microsoft-onedrive.mdx deleted file mode 100644 index 647cf45ee18..00000000000 --- a/docs/integrations/app-integrations/microsoft-onedrive.mdx +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: Microsoft One Drive -sidebarTitle: Microsoft One Drive ---- - -This documentation describes the integration of MindsDB with [Microsoft OneDrive](https://www.microsoft.com/en-us/microsoft-365/onedrive/online-cloud-storage), a cloud storage service that lets you back up, access, edit, share, and sync your files from any device. - -## Prerequisites - -1. Before proceeding, ensure that MindsDB is installed locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. Register an application in the [Azure portal](https://portal.azure.com/). - - Navigate to the [Azure Portal](https://portal.azure.com/#home) and sign in with your Microsoft account. - - Locate the **Microsoft Entra ID** service and click on it. - - Click on **App registrations** and then click on **New registration**. - - Enter a name for your application and select the `Accounts in this organizational directory only` option for the **Supported account types** field. - - Keep the **Redirect URI** field empty and click on **Register**. - - Click on **API permissions** and then click on **Add a permission**. - - Select **Microsoft Graph** and then click on **Delegated permissions**. - - Search for the `Files.Read` permission and select it. - - Click on **Add permissions**. - - Request an administrator to grant consent for the above permissions. If you are the administrator, click on **Grant admin consent for [your organization]** and then click on **Yes**. - - Copy the **Application (client) ID** and record it as the `client_id` parameter, and copy the **Directory (tenant) ID** and record it as the `tenant_id` parameter. - - Click on **Certificates & secrets** and then click on **New client secret**. - - Enter a description for your client secret and select an expiration period. - - Click on **Add** and copy the generated client secret and record it as the `client_secret` parameter. - - Click on **Authentication** and then click on **Add a platform**. - - Select **Web** and enter URL where MindsDB has been deployed followed by `/verify-auth` in the **Redirect URIs** field. For example, if you are running MindsDB locally (on `https://localhost:47334`), enter `https://localhost:47334/verify-auth` in the **Redirect URIs** field. - -## Connection - -Establish a connection to Microsoft OneDrive from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE one_drive_datasource -WITH - engine = 'one_drive', - parameters = { - "client_id": "12345678-90ab-cdef-1234-567890abcdef", - "client_secret": "abcd1234efgh5678ijkl9012mnop3456qrst7890uvwx", - "tenant_id": "abcdef12-3456-7890-abcd-ef1234567890", - }; -``` - - -Note that sample parameter values are provided here for reference, and you should replace them with your connection parameters. - - -Required connection parameters include the following: - -* `client_id`: The client ID of the registered application. -* `client_secret`: The client secret of the registered application. -* `tenant_id`: The tenant ID of the registered application. - -## Usage - -Retrieve data from a specified file in Microsoft OneDrive by providing the integration name and the file name: - -```sql -SELECT * -FROM one_drive_datasource.`my-file.csv`; -LIMIT 10; -``` - - -Wrap the object key in backticks (\`) to avoid any issues parsing the SQL statements provided. This is especially important when the file name contains spaces, special characters or prefixes, such as `my-folder/my-file.csv`. - -At the moment, the supported file formats are CSV, TSV, JSON, and Parquet. - - - -The above examples utilize `one_drive_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -The special `files` table can be used to list the files available in Microsoft OneDrive: - -```sql -SELECT * -FROM one_drive_datasource.files LIMIT 10 -``` - -The content of files can also be retrieved by explicitly requesting the `content` column. This column is empty by default to avoid unnecessary data transfer: - -```sql -SELECT path, content -FROM one_drive_datasource.files LIMIT 10 -``` - - -This table will return all objects regardless of the file format, however, only the supported file formats mentioned above can be queried. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with Microsoft OneDrive. -* **Checklist**: - 1. Ensure the `client_id`, `client_secret` and `tenant_id` parameters are correctly provided. - 2. Ensure the registered application has the required permissions. - 3. Ensure the generated client secret is not expired. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing object names containing spaces, special characters or prefixes. -* **Checklist**: - 1. Ensure object names with spaces, special characters or prefixes are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel/travel_data.csv - * Incorrect: SELECT * FROM integration.'travel/travel_data.csv' - * Correct: SELECT * FROM integration.\`travel/travel_data.csv\` - \ No newline at end of file diff --git a/docs/integrations/app-integrations/microsoft-teams.mdx b/docs/integrations/app-integrations/microsoft-teams.mdx deleted file mode 100644 index ba8c68b292c..00000000000 --- a/docs/integrations/app-integrations/microsoft-teams.mdx +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: Microsoft Teams -sidebarTitle: Microsoft Teams ---- - -This documentation describes the integration of MindsDB with [Microsoft Teams](https://www.microsoft.com/en-us/microsoft-teams/group-chat-software), the ultimate messaging app for your organization. -The integration allows MindsDB to access data from Microsoft Teams and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Microsoft Teams to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Microsoft Teams from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/ms_teams_handler) as an engine. - -```sql -CREATE DATABASE teams_datasource -WITH ENGINE = 'teams', -PARAMETERS = { - "client_id": "12345678-90ab-cdef-1234-567890abcdef", - "client_secret": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", - "tenant_id": "abcdef12-3456-7890-abcd-ef1234567890" -}; -``` - -Required connection parameters include the following: - -* `client_id`: The client ID of the registered Microsoft Entra ID application. -* `client_secret`: The client secret of the registered Microsoft Entra ID application. -* `tenant_id`: The tenant ID of the Microsoft Entra ID directory. - -Optional connection parameters include the following: - -* `permission_mode`: The type of permissions used to access data in Microsoft Teams. Can be either `delegated` (default) or `application`. - - -The `delegated` permission mode requires user sign-in and allows the app to access data on behalf of the signed-in user. The `application` permission mode does not require user sign-in and allows the app to access data without a user context. You can learn more about permission types in the [Microsoft Graph permissions documentation](https://learn.microsoft.com/en-us/graph/auth/auth-concepts#delegated-and-application-permissions). -Note that application permissions generally require higher privileges and admin consent compared to delegated permissions, as they allow broader access to organizational data without user context. - - - -Microsoft Entra ID was previously known as Azure Active Directory (Azure AD). - - -### How to set up the Microsoft Entra ID app registration - -Follow the instructions below to set up the Microsoft Teams app that will be used to connect with MindsDB. - - - - - Navigate to Microsoft Entra ID in the Azure portal, click on *Add* and then on *App registration*. - - Click on *New registration* and fill out the *Name* and select the `Accounts in any organizational directory (Any Azure AD directory - Multitenant)` option under *Supported account types*. - - If you chose the `application` permission mode you may skip this step, but if you are using `delegated` permissions, select `Web` as the platform and enter URL where MindsDB has been deployed followed by /verify-auth under *Redirect URI*. For example, if you are running MindsDB locally (on https://localhost:47334), enter https://localhost:47334/verify-auth in the Redirect URIs field. - - Click on *Register*. **Save the *Application (client) ID* and *Directory (tenant) ID* for later use.** - - Click on *API Permissions* and then click on *Add a permission*. - - Select *Microsoft Graph* and then click on either *Delegated permissions* or *Application permissions* based on the permission mode you have chosen. - - Search for the following permissions and select them: - - `delegated` permission mode: - - Team.ReadBasic.All - - Channel.ReadBasic.All - - ChannelMessage.Read.All - - Chat.Read - - `application` permission mode: - - Group.Read.All - - ChannelMessage.Read.All - - Chat.Read.All - - Click on **Add permissions**. - - Request an administrator to grant consent for the above permissions. If you are the administrator, click on **Grant admin consent for [your organization]** and then click on **Yes**. - - Click on *Certificates & secrets* under *Manage*. - - Click on *New client secret* and fill out the *Description* and select an appropriate *Expires* period, and click on *Add*. - - Copy and **save the client secret in a secure location.** - - If you already have an existing app registration, you can use it instead of creating a new one and skip the above steps. - - - - - Open the MindsDB editor and create a connection to Microsoft Teams using the client ID, client secret and tenant ID obtained in the previous steps. Use the `CREATE DATABASE` statement as shown above. - - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM teams_datasource.table_name -LIMIT 10; -``` - - -The above example utilize `teams_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Supported Tables - -* `teams`: The table containing information about the teams in Microsoft Teams. -* `channels`: The table containing information about the channels in Microsoft Teams. -* `channel_messages`: The table containing information about messages from channels in Microsoft Teams. -* `chats`: The table containing information about the chats in Microsoft Teams. -* `chat_messages`: The table containing information about messages from chats in Microsoft Teams. diff --git a/docs/integrations/app-integrations/netsuite.mdx b/docs/integrations/app-integrations/netsuite.mdx deleted file mode 100644 index 4afb636acb4..00000000000 --- a/docs/integrations/app-integrations/netsuite.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Oracle NetSuite -sidebarTitle: Oracle NetSuite ---- - -This documentation describes the integration of MindsDB with Oracle NetSuite using the REST Record API and SuiteQL. -It lets you query NetSuite data in SQL and run SuiteQL directly when you need full control over filtering and joins. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. Enable Token-Based Authentication (TBA) and REST Web Services in NetSuite: - - Setup > Company > Enable Features > SuiteCloud tab - - Check "Token-Based Authentication" and "REST Web Services" - -## Connection - -Establish a connection to NetSuite from MindsDB by executing the following SQL command and providing its handler name as an engine. - -```sql -CREATE DATABASE netsuite_datasource -WITH - ENGINE = 'netsuite', - PARAMETERS = { - "account_id": "123456_SB1", - "consumer_key": "ck_...", - "consumer_secret": "cs_...", - "token_id": "token_...", - "token_secret": "token_secret_..." - }; -``` - -Required connection parameters include the following: - -- `account_id`: NetSuite account/realm ID (e.g. `123456_SB1`) -- `consumer_key`: Integration consumer key -- `consumer_secret`: Integration consumer secret -- `token_id`: Access token ID -- `token_secret`: Access token secret - -Optional connection parameters include the following: - -- `rest_domain`: Override REST domain from Company Information (REST Web Services URL) -- `record_types`: Comma-separated record types to expose as tables (defaults to [this list]()) - -## Token-Based Authentication setup - -To create the required credentials in NetSuite: - -1. Create an Integration record: Setup > Integrations > Manage Integrations > New. Enable Token-Based Authentication. -2. Create/choose a role for the integration and grant: - - Setup > REST Web Services (Full) - - Setup > User Access Tokens (Full) - - Record-level permissions you will query (e.g., Transactions > Sales Order, Lists > Customers). -3. Assign that role to the user. -4. Generate an Access Token: Setup > Users/Roles > Access Tokens > New. -5. Copy the Consumer Key/Secret and Token ID/Secret. - -## Usage - -Retrieve data from a record table: - -```sql -SELECT * -FROM netsuite_datasource.salesOrder -WHERE id = 48; -``` - -REST record tables: -- Use `WHERE id = ...` (or `internalId`) to fetch a full record directly. -- Other filters are pushed down as `q` where possible; remaining filters are applied locally. - -Run SuiteQL directly using the native query syntax (recommended for complex filters): - -```sql -SELECT * FROM netsuite_datasource ( - SELECT id, tranid, total - FROM transaction - WHERE type = 'SalesOrd' - FETCH NEXT 5 ROWS ONLY -); -``` - - -Use the `rest_domain` parameter if your account uses a REST domain that differs from the default derived from `account_id`. - - - -Access to both REST record tables and SuiteQL depends on the NetSuite role tied to your access token. -If a query fails with 403/permission errors, ensure the role includes REST Web Services, User Access Tokens, and record-specific permissions for the tables you are querying (plus SuiteAnalytics permissions for SuiteQL). - diff --git a/docs/integrations/app-integrations/newsapi.mdx b/docs/integrations/app-integrations/newsapi.mdx deleted file mode 100644 index 1518c7e2ecd..00000000000 --- a/docs/integrations/app-integrations/newsapi.mdx +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: News API -sidebarTitle: News API ---- - -In this section, we present how to connect News API to MindsDB. - -[News API](https://newsapi.org/) is a simple HTTP REST API for searching and retrieving live articles from all over the web. - -Data from News API can be utilized within MindsDB for model training and predictions. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect News API to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to News API. - -## Connection - -This handler is implemented using the [newsapi-python](https://newsapi.org/docs/client-libraries/python) library. - -The required arguments to establish a connection are as follows: - -* `api_key` News API key to use for authentication. - - -Check out [this guide](https://newsapi.org/docs/authentication) on how to create the API key. - -It is recommended to use the API key to avoid the `API rate limit exceeded` error. - - -Here is how to connect News API to MindsDB: - -```sql -CREATE DATABASE newsAPI -WITH ENGINE = 'newsapi' -PARAMETERS = { - "api_key": "Your api key" -}; -``` - -## Usage - -Simple Search for recent articles: - -```sql -SELECT * -FROM newsAPI.article -WHERE query = 'Python'; -``` - -Advanced search for recent articles per specific sources between dates: - -```sql -SELECT * -FROM newsAPI.article -WHERE query = 'Python' -AND sources="bbc-news" -AND publishedAt >= "2021-03-23" AND publishedAt <= "2023-04-23" -LIMIT 4; -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/newsapi_handler/README.md). - diff --git a/docs/integrations/app-integrations/paypal.mdx b/docs/integrations/app-integrations/paypal.mdx deleted file mode 100644 index ad2305ce86b..00000000000 --- a/docs/integrations/app-integrations/paypal.mdx +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: PayPal -sidebarTitle: PayPal ---- - -In this section, we present how to connect PayPal to MindsDB. - -[PayPal](https://www.bankrate.com/finance/credit-cards/guide-to-using-paypal/) is an online payment system that makes paying for things online and sending and receiving money safe and secure. - -Data from PayPal can be utilized within MindsDB to train models and make predictions about your transactions. - -## Connection - -This handler is implemented using [PayPal-Python-SDK](https://github.com/paypal/PayPal-Python-SDK), the Python SDK for PayPal RESTful APIs. - -The required arguments to establish a connection are as follows: - -* `mode`: The mode of the PayPal API. Can be `sandbox` or `live`. -* `client_id`: The client ID of the PayPal API. -* `client_secret`: The client secret of the PayPal API. - -To connect to PayPal using MindsDB, the following CREATE DATABASE statement can be used: - -```sql -CREATE DATABASE paypal_datasource -WITH ENGINE = 'paypal', -PARAMETERS = { - "mode": "your-paypal-mode", - "client_id": "your-paypal-client-id", - "client_secret": "your-paypal-client-secret" -}; -``` - - -Check out [this guide](https://developer.paypal.com/api/rest/) on how to create client credentials for PayPal. - - -## Usage - -Now, you can query PayPal as follows: - -Payments: -```sql -SELECT * FROM paypal_datasource.payments -``` - -Invoices: -```sql -SELECT * FROM paypal_datasource.invoices -``` - -Subscriptions: -```sql -SELECT * FROM paypal_datasource.subscriptions -``` - -Orders: -```sql -SELECT * FROM paypal_datasource.orders -``` - -Payouts: -```sql -SELECT * FROM paypal_datasource.payouts -``` - -You can also run more advanced queries on your data: - -Payments: -```sql -SELECT intent, cart -FROM paypal_datasource.payments -WHERE state = 'approved' -ORDER BY id -LIMIT 5 -``` - -Invoices: -```sql -SELECT invoice_number, total_amount -FROM paypal_datasource.invoices -WHERE status = 'PAID' -ORDER BY total_amount DESC -LIMIT 5 -``` - -Subscriptions: -```sql -SELECT id, state, name -FROM paypal_datasource.subscriptions -WHERE state ="CREATED" -LIMIT 5 -``` - -Orders: -```sql -SELECT id, state, amount -FROM paypal_datasource.orders -WHERE state = 'APPROVED' -ORDER BY total_amount DESC -LIMIT 5 -``` - -Payouts: -```sql -SELECT payout_batch_id, amount_currency, amount_value -FROM paypal_datasource.payouts -ORDER BY amount_value DESC -LIMIT 5 -``` - -## Supported Tables - -The following tables are supported by the PayPal handler: - -* `payments`: payments made. -* `invoices`: invoices created. -* `subscriptions`: subscriptions created. -* `orders`: orders created. -* `payouts`: payouts made. \ No newline at end of file diff --git a/docs/integrations/app-integrations/plaid.mdx b/docs/integrations/app-integrations/plaid.mdx deleted file mode 100644 index 24e9ac912b5..00000000000 --- a/docs/integrations/app-integrations/plaid.mdx +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: Plaid -sidebarTitle: Plaid ---- - -In this section, we present how to connect Plaid to MindsDB. - -[Plaid](https://plaid.com/) is a financial technology company that offers a platform and a set of APIs that facilitate the integration of financial services and data into applications and websites. Its services primarily focus on enabling developers to connect with and access financial accounts and data from various financial institutions. - -Data from Plaid can be utilized within MindsDB to train AI models and make financial forecasts. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Plaid to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Plaid. - -## Connection - -The required arguments to establish a connection are as follows: - -* `client_id` -* `secret` -* `access_token` -* `plaid_env` - - -You can get the `client_id`, `secret`, and `access_token` values [here](https://dashboard.plaid.com/team/keys) once you sign in to your Plaid account. -And [here](https://plaid.com/docs/api/items/#itempublic_tokenexchange) is how you generate the `access_token` value. - - -In order to make use of this handler and connect the Plaid app to MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE my_plaid -WITH - ENGINE = 'plaid', - PARAMETERS = { - "client_id": "YOUR_CLIENT_ID", - "secret": "YOUR_SECRET", - "access_token": "YOUR_PUBLIC_KEY", - "plaid_env": "ENV" - }; -``` - -It creates a database that comes with two tables: `transactions` and `balance`. - -## Usage - -Now you can query your data, like this: - -```sql -SELECT id, merchant_name, authorized_date, amount, payment_channel -FROM my_plaid.transactions -WHERE start_date = '2022-01-01' -AND end_date = '2023-04-11' -LIMIT 20; -``` - -And if you want to use functions provided by the Plaid API, you can use the native queries syntax, like this: - -```sql -SELECT * FROM my_plaid ( - get_transactions( - start_date = '2022-01-01', - end_date = '2022-02-01' - ) -); -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/plaid_handler/README.md). - diff --git a/docs/integrations/app-integrations/pypi.mdx b/docs/integrations/app-integrations/pypi.mdx deleted file mode 100644 index 08c3f6a501b..00000000000 --- a/docs/integrations/app-integrations/pypi.mdx +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: PyPI -sidebarTitle: PyPI ---- - -In this section, we present how to connect PyPI to MindsDB. - -[PyPI](https://pypi.org) is a host for maintaining and storing Python packages. It's a good place for publishing your Python packages in different versions and releases. - -Data from PyPI can be utilized within MindsDB to train models and make predictions about your Python packages. - -## Connection - -This handler is implemented using the standard Python `requests` library. It is used to connect to the RESTful service that [pypistats.org](https://pypistats.org) is serving. - -There are no connection arguments required to initialize the handler. - -To connect to PyPI using MindsDB, the following CREATE DATABASE statement can be used: - -```sql -CREATE DATABASE pypi_datasource -WITH ENGINE = 'pypi' -``` - -## Usage - -Now, you can use the following queries to view the statistics for Python packages (MindsDB, for example): - -Overall downloads, including mirrors: -```sql -SELECT * -FROM pypi_datasource.overall WHERE package="mindsdb" AND mirrors=true; -``` - -Overall downloads on CPython==2.7: -```sql -SELECT * -FROM pypi_datasource.python_minor WHERE package="mindsdb" AND version="2.7"; -``` - -Recent downloads: -```sql -SELECT * -FROM pypi_datasource.recent WHERE package="mindsdb"; -``` - -Recent downloads in the last day: -```sql -SELECT * -FROM pypi_datasource.recent WHERE package="mindsdb" AND period="day"; -``` - -All downloads on Linux-based distributions: -```sql -SELECT date, downloads -FROM pypi_datasource.system WHERE package="mindsdb" AND os="Linux"; -``` - - -Each table takes a required `package` argument in the WHERE clause, which is the name of the package you want to query. - - - -## Supported Tables - -The following tables are supported by the PyPI handler: - -* `overall`: daily download quantities for packages. -* `recent`: recent download quantities for packages. -* `python_major`: daily download quantities for packages, grouped by Python major version. -* `python_minor`: daily download quantities for packages, grouped by Python minor version. -* `system`: daily download quantities for packages, grouped by operating system. - diff --git a/docs/integrations/app-integrations/reddit.mdx b/docs/integrations/app-integrations/reddit.mdx deleted file mode 100644 index b2254aae455..00000000000 --- a/docs/integrations/app-integrations/reddit.mdx +++ /dev/null @@ -1,76 +0,0 @@ ---- -title: Reddit -sidebarTitle: Reddit ---- - -In this section, we present how to connect Reddit to MindsDB. - -[Reddit](https://www.reddit.com/) is a social media platform and online community where registered users can engage in discussions, share content, and participate in various communities called subreddits. - -Data from Reddit can be utilized within MindsDB to train AI models and chatbots. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Reddit to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Reddit. - -## Connection - -This handler is implemented using the [PRAW (Python Reddit API Wrapper)](https://praw.readthedocs.io/en/latest/) library, which is a Python package that provides a simple and easy-to-use interface to access the Reddit API. - -The required arguments to establish a connection are as follows: - -* `client_id` is a Reddit API client ID. -* `client_secret` is a Reddit API client secret. -* `user_agent` is a user agent string to identify your application. - - -Here is how to get your Reddit credentials: - -1. Go to Reddit App Preferences at https://www.reddit.com/prefs/apps or https://old.reddit.com/prefs/apps/ -2. Scroll down to the bottom of the page and click *Create another app...* -3. Fill out the form with the name, description, and redirect URL for your app, then click *Create app* -4. Now you should be able to see the personal user script, secret, and name of your app. Store those as environment variables: `CLIENT_ID`, `CLIENT_SECRET`, and `USER_AGENT`, respectively. - - -In order to make use of this handler and connect the Reddit app to MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE my_reddit -WITH - ENGINE = 'reddit', - PARAMETERS = { - "client_id": "YOUR_CLIENT_ID", - "client_secret": "YOUR_CLIENT_SECRET", - "user_agent": "YOUR_USER_AGENT" - }; -``` - -It creates a database that comes with two tables: `submission` and `comment`. - -## Usage - -Now you can fetch data from Reddit, like this: - -```sql -SELECT * -FROM my_reddit.submission -WHERE subreddit = 'MachineLearning' -AND sort_type = 'top' -- specifies the sorting type for the subreddit (possible values include 'hot', 'new', 'top', 'controversial', 'gilded', 'wiki', 'mod', 'rising') -AND items = 5; -- specifies the number of items to fetch from the subreddit -``` - -You can also fetch comments for a particular post/submission, like this: - -```sql -SELECT * -FROM my_reddit.comment -WHERE submission_id = '12gls93' -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/reddit_handler/README.md). - diff --git a/docs/integrations/app-integrations/rest-api.mdx b/docs/integrations/app-integrations/rest-api.mdx deleted file mode 100644 index 1ead2f519ae..00000000000 --- a/docs/integrations/app-integrations/rest-api.mdx +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: REST API -sidebarTitle: REST API ---- - -In this section, we present how to connect any REST API to MindsDB using bearer-token authentication. - -The REST API handler is a generic integration that lets you forward HTTP requests to any API through MindsDB using stored credentials. Unlike named integrations (HubSpot, Shopify, etc.), it requires no handler-specific knowledge — just a base URL and a bearer token. - -This is useful for APIs that MindsDB doesn't have a dedicated handler for, or when you only need direct HTTP access without SQL table mapping. - -## Connection - -The required arguments to establish a connection are as follows: - -- `base_url`: the base URL of the REST API (e.g. `https://api.example.com`). All request paths are appended to this URL. -- `bearer_token`: the token used for authentication. Injected as `Authorization: Bearer ` on every request. - -Optional arguments: - -- `default_headers`: a JSON object of static headers added to every request (e.g. `{"Accept": "application/json"}`). -- `allowed_hosts`: a list of allowed hostnames for requests. Defaults to the hostname of `base_url`. Use `["*"]` to disable host containment. -- `test_path`: the path used by the test endpoint to verify connectivity. Defaults to `/`. - -To connect a REST API to MindsDB, create a new database: - -```sql -CREATE DATABASE my_api -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://api.example.com", - "bearer_token": "your_token_here" -}; -``` - -### Example: Connect to HubSpot - -```sql -CREATE DATABASE my_hubspot -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://api.hubapi.com", - "bearer_token": "pat-eu1-..." -}; -``` - -### Example: Connect with default headers and a custom test path - -```sql -CREATE DATABASE my_internal_api -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://internal.example.com/api/v2", - "bearer_token": "sk-...", - "default_headers": {"Accept": "application/json"}, - "test_path": "/health" -}; -``` - -### Example: Multiple allowed hosts - -```sql -CREATE DATABASE my_multi_region_api -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://api.example.com", - "bearer_token": "your_token", - "allowed_hosts": ["api.example.com", "api.eu.example.com"] -}; -``` - -## Usage - -This handler is **passthrough-only** — it does not expose SQL tables. All interaction is through the REST passthrough endpoint. - -### Sending requests - -Forward HTTP requests to the upstream API: - -``` -POST /api/integrations/my_api/passthrough -``` - -```json -{ - "method": "GET", - "path": "/v1/users", - "query": {"limit": "10"}, - "headers": {"Accept": "application/json"} -} -``` - -The response wraps the upstream HTTP response: - -```json -{ - "status_code": 200, - "headers": {"content-type": "application/json"}, - "body": {"results": [...]}, - "content_type": "application/json" -} -``` - -Supported HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`. - -### Testing the connection - -Verify that the base URL, token, and host allowlist are configured correctly: - -``` -POST /api/integrations/my_api/passthrough/test -``` - -A successful response: - -```json -{"ok": true, "status_code": 200, "host": "api.example.com", "latency_ms": 140} -``` - -A failed response: - -```json -{"ok": false, "error_code": "auth_failed", "message": "upstream rejected credentials; base URL and allowlist look correct"} -``` - -## Security - -- Credentials are stored in MindsDB and never exposed to the caller. -- Requests are restricted to hostnames in the allowlist. Private and loopback IP addresses are rejected by default. -- Callers cannot override `Authorization`, `Host`, `Cookie`, or `Proxy-*` headers. -- If the upstream API echoes the token in responses, it is replaced with `[REDACTED_API_KEY]`. -- Request bodies are capped at 1 MB, response bodies at 10 MB. - - -**`host 'X' is not in the datasource allowlist`** - -The request path resolved to a different hostname than `base_url`. Add the hostname to `allowed_hosts`, or use `["*"]` to disable host containment (not recommended for production). - - - -**`upstream rejected credentials (401/403)`** - -The token is invalid, expired, or missing required scopes. Verify the token with the upstream API provider. - - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/rest_api_handler/README.md). - diff --git a/docs/integrations/app-integrations/salesforce.mdx b/docs/integrations/app-integrations/salesforce.mdx deleted file mode 100644 index 0d942c46193..00000000000 --- a/docs/integrations/app-integrations/salesforce.mdx +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Salesforce -sidebarTitle: Salesforce ---- - -This documentation describes the integration of MindsDB with [Salesforce](https://www.salesforce.com/), the world’s most trusted customer relationship management (CRM) platform. -The integration allows MindsDB to access data from Salesforce and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Salesforce to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Salesforce from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/salesforce_handler) as an engine. - -```sql -CREATE DATABASE salesforce_datasource -WITH - ENGINE = 'salesforce', - PARAMETERS = { - "username": "demo@example.com", - "password": "demo_password", - "client_id": "3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - "client_secret": "5A52C1A1E21DF9012IODC9ISNXXAADDA9" - }; -``` - -Required connection parameters include the following: - -* `username`: The username for the Salesforce account. -* `password`: The password for the Salesforce account. -* `client_id`: The client ID (consumer key) from a connected app in Salesforce. -* `client_secret`: The client secret (consumer secret) from a connected app in Salesforce. - -Optional connection parameters include the following: - -* `is_sandbox`: The setting to indicate whether to connect to a Salesforce sandbox environment (`true`) or production environment (`false`). This parameter defaults to `false`. - - -To create a connected app in Salesforce and obtain the client ID and client secret, follow the steps given below: -1. Log in to your Salesforce account. -2. Go to `Settings` > `Open Advanced Setup` > `Apps` > `App Manager`. -3. Click `New Connected App`, select `Create a Connected App` and click `Continue`. -4. Fill in the required details, i.e., `Connected App Name`, `API Name` and `Contact Phone`. -5. Select the `Enable OAuth Settings` checkbox, set the `Callback URL` to wherever MindsDB is deployed followed by `/verify-auth` (e.g., `http://localhost:47334/verify-auth`), and choose the following OAuth scopes: - - Manage user data via APIs (api) - - Perform requests at any time (refresh_token, offline_access) -6. Click `Save` and then `Continue`. -7. Click on `Manage Consumer Details` under `API (Enable OAuth Settings)`, and copy the Consumer Key (client ID) and Consumer Secret (client secret). -8. Click on `Back to Manage Connected Apps` and then `Manage`. -9. Click `Edit Policies`. -10. Under `OAuth Policies`, configure the `Permitted Users` and `IP Relaxation` settings according to your security policies. For example, to enable all users to access the app without enforcing any IP restrictions, select `All users may self-authorize` and `Relax IP restrictions` respectively. Leave the `Refresh Token Policy` set to `Refresh token is valid until revoked`. -11. Click `Save`. -12. Go to `Identity` > `OAuth and OpenID Connect Settings`. -13. Ensure that the `Allow OAuth Username-Password Flows` checkbox is checked. - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM salesforce_datasource.table_name -LIMIT 10; -``` - -Run [SOQL](https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql.htm) queries directly on the connected Salesforce account: - -```sql -SELECT * FROM salesforce_datasource ( - - --Native Query Goes Here - SELECT Name, Account.Name, Account.Industry - FROM Contact - WHERE Account.Industry = 'Technology' - LIMIT 5 - -); -``` - - -The above examples utilize `salesforce_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Salesforce Table Filtering - -We have implemented a filtering logic to exclude tables that are generally not useful for direct business queries, which fall into the following categories: - -* System and Auditing Tables: We exclude tables that track field history, record sharing rules, and data change events (e.g., objects ending in History, Share, or ChangeEvent). These are important for system administration but not for typical business analysis. -* Configuration and Metadata: We remove tables that define the structure and configuration of Salesforce itself. This includes objects related to user permissions, internal rules, platform settings, and data definitions (e.g., FieldDefinition, PermissionSet, AssignmentRule). -* Feature-Specific Technical Objects: Tables that support specific backend Salesforce features are excluded. This includes objects related to: - * AI and Einstein: (AI...) - * Developer Components: (Apex..., Aura...) - * Data Privacy and Consent: (objects ending in Consent or containing Policy) - * Chatter and Collaboration Feeds: (...Feed, Collaboration...) -* Archived or Legacy Objects: Older objects that have been replaced by modern equivalents, such as ContentWorkspace, are also excluded to simplify the list. diff --git a/docs/integrations/app-integrations/sendinblue.mdx b/docs/integrations/app-integrations/sendinblue.mdx deleted file mode 100644 index 1d192e1cfdb..00000000000 --- a/docs/integrations/app-integrations/sendinblue.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Sendinblue -sidebarTitle: Sendinblue ---- - -In this section, we present how to connect Sendinblue to MindsDB. - -[Brevo (formerly Sendinblue)](https://www.brevo.com/) is an all-in-one platform to automate your marketing campaigns over Email, SMS, WhatsApp or chat. - -Data from Sendinblue can be used to understand the impact of email marketing. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Sendinblue to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Sendinblue. - -## Connection - -This handler is implemented using the [sib-api-v3-sdk](https://github.com/sendinblue/APIv3-python-library) library, a Python library that wraps Sendinblue APIs. - -The required arguments to establish a connection are as follows: - -* `api_key`: a required Sendinblue API key to use for authentication - - -Check out [this guide](https://developers.brevo.com/docs) on how to create the Sendinblue API key. - -It is recommended to use the API key to avoid the `API rate limit exceeded` error. - - -Here is how to connect the SendinBlue to MindsDB: - -```sql -CREATE DATABASE sib_datasource -WITH ENGINE = 'sendinblue', -PARAMETERS = { - "api_key": "xkeysib-..." -}; -``` - -## Usage - -Use the established connection to query your database: - -```sql -SELECT * FROM sib_datasource.email_campaigns -``` - -Run more advanced queries: - -```sql -SELECT id, name -FROM sib_datasource.email_campaigns -WHERE status = 'sent' -ORDER BY name -LIMIT 5 -``` - - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/sendinblue_handler/README.md). - diff --git a/docs/integrations/app-integrations/shopify.mdx b/docs/integrations/app-integrations/shopify.mdx deleted file mode 100644 index 147564d1a3b..00000000000 --- a/docs/integrations/app-integrations/shopify.mdx +++ /dev/null @@ -1,182 +0,0 @@ ---- -title: Shopify -sidebarTitle: Shopify ---- - -In this section, we present how to connect Shopify to MindsDB. - -[Shopify](https://www.shopify.com/) is an e-commerce platform that enables businesses to create and manage online stores. It is one of the leading e-commerce solutions, providing a wide range of tools and services to help entrepreneurs and businesses sell products and services online. - -Data from Shopify can be utilized within MindsDB to train AI models and chatbots using Products, Customers and Orders data, and make predictions relevant for businesses. - -## Connection - -The required arguments to establish a connection are as follows: - -- `shop_url`: a required URL to your Shopify store. -- `access_token`: a required access token to use for authentication. - - -Here is how you can [create a Shopify access token](https://www.youtube.com/watch?v=4f_aiC5oTNc&t=302s). - - -Optionally, if you want to access customer reviews, provide the following parameters: - -- `yotpo_app_key`: a token needed to access customer reviews via the Yotpo Product Reviews app. -- `yotpo_access_token`: a token needed to access customer reviews via the Yotpo Product Reviews app. - - -If you want to query customer reviews, use the [Yotpo Product Reviews](https://apps.shopify.com/yotpo-social-reviews) app available in Shopify. Here are the steps to follow: -1. Install the [Yotpo Product Reviews](https://apps.shopify.com/yotpo-social-reviews) app for your Shopify store. -2. Generate `yotpo_app_key` following [this instruction](https://support.yotpo.com/docs/finding-your-yotpo-app-key-and-secret-key) for retrieving your app key. Learn more about [Yotpo authentication here](https://apidocs.yotpo.com/reference/yotpo-authentication). -3. Generate `yotpo_access_token` following [this instruction](https://develop.yotpo.com/reference/generate-a-token). - - -To connect your Shopify account to MindsDB, you must first create a new handler instance. You can do it by the following query: - -```sql -CREATE DATABASE shopify_datasource -WITH ENGINE = 'shopify', -PARAMETERS = { - "shop_url": "your-shop-name.myshopify.com", - "access_token": "shppa_..." -}; -``` - -## Usage - -Once you have created the database, you can query the following tables: - -- Products table -- Customers table -- Orders table -- CustomerReviews table (requires the [Yotpo Product Reviews](https://apps.shopify.com/yotpo-social-reviews) app to be installed in your Shopify account) -- InventoryLevel table -- Location table -- CarrierService table -- ShippingZone table -- SalesChannel table - -### Products table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.products; -``` - -Also, you can run more advanced queries and filter products by status, like this: - -```sql -SELECT id, title -FROM shopify_datasource.products -WHERE status = 'active' -ORDER BY id -LIMIT 5; -``` - -To insert new data, run the `INSERT INTO` statement, providing the following values: `title`, `body_html`, `vendor`, `product_type`, `tags`, `status`. - -To update existing data, run the `UPDATE` statement. - -To delete data, run the `DELETE` statement. - -### Customers table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.customers; -``` - -To insert new data, run this statement: - -```sql -INSERT INTO shopify_datasource.customers(first_name, last_name, email, phone) -VALUES ('John', 'Doe', 'john.doe@example.com', '+10001112222'); -``` - -To update existing data, run the `UPDATE` statement. - -To delete data, run the `DELETE` statement. - -### Orders table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.orders; -``` - -To insert new data, run the `INSERT INTO` statement. - -To update existing data, run the `UPDATE` statement. - -To delete data, run the `DELETE` statement. - -### CustomerReviews table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.customer_reviews; -``` - -### InventoryLevel table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.inventory_level; -``` - -### Location table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.locations; -``` - -### CarrierService table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.carrier_service; -``` - -To insert new data, run the `INSERT INTO` statement, providing the following values: `name`, `callback_url`, `service_discovery`. - -To update existing data, run the `UPDATE` statement. - -To delete data, run the `DELETE` statement. - -### ShippingZone table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.shipping_zone; -``` - -### SalesChannel table - -You can query this table as below: - -```sql -SELECT * -FROM shopify_datasource.sales_channel; -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/shopify_handler/README.md). - diff --git a/docs/integrations/app-integrations/slack.mdx b/docs/integrations/app-integrations/slack.mdx deleted file mode 100644 index 6638c2b2ef7..00000000000 --- a/docs/integrations/app-integrations/slack.mdx +++ /dev/null @@ -1,268 +0,0 @@ ---- -title: Slack -sidebarTitle: Slack ---- - -This documentation describes the integration of MindsDB with [Slack](https://slack.com/), a cloud-based collaboration platform. -The integration allows MindsDB to access data from Slack and enhance Slack with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Slack to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Slack. - -## Connection - -Establish a connection to Slack from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/slack_handler) as an engine. - -```sql -CREATE DATABASE slack_datasource -WITH ENGINE = 'slack', -PARAMETERS = { - "token": "values", -- required parameter - "app_token": "values" -- optional parameter -}; -``` - -The Slack handler is initialized with the following parameters: - -* `token` is a Slack bot token to use for authentication. -* `app_token` is a Slack app token to use for authentication. - - -Please note that `app_token` is an optional parameter. Without providing it, you need to integrate an app into a Slack channel. - - -### Method 1: Chatbot responds in direct messages to a Slack app - -One way to connect Slack is to use both bot and app tokens. By following the instructions below, you'll set up the Slack app and be able to message this Slack app directly to chat with the bot. - - -If you want to use Slack in the [`CREATE CHATBOT`](/agents/chatbot) syntax, use this method of connecting Slack to MindsDB. - - - -Here is how to set up a Slack app and generate both a Slack bot token and a Slack app token: - - 1. Follow [this link](https://api.slack.com/apps) and sign in with your Slack account. - 2. Create a new app `From scratch` or select an existing app. - - Please note that the following instructions support apps created `From scratch`. - - For apps created `From an app manifest`, please follow the [Slack docs here](https://api.slack.com/reference/manifests). - 3. Go to *Basic Information* under *Settings*. - - Under *App-Level Tokens*, click on *Generate Token and Scopes*. - - Name the token `socket` and add the `connections:write` scope. - - **Copy and save the `xapp-...` token - you'll need it to publish the chatbot.** - 4. Go to *Socket Mode* under *Settings* and toggle the button to *Enable Socket Mode*. - 5. Go to *OAuth & Permissions* under *Features*. - - Add the following *Bot Token Scopes*: - - app_mentions:read - - channels:history - - channels:read - - chat:write - - groups:history - - groups:read (optional) - - im:history - - im:read - - im:write - - mpim:read (optional) - - users.profile:read - - users:read (optional) - - In the *OAuth Tokens for Your Workspace* section, click on *Install to Workspace* and then *Allow*. - - **Copy and save the `xoxb-...` token - you'll need it to publish the chatbot.** - 6. Go to *App Home* under *Features* and click on the checkbox to *Allow users to send Slash commands and messages from the messages tab*. - 7. Go to *Event Subscriptions* under *Features*. - - Toggle the button to *Enable Events*. - - Under *Subscribe to bot events*, click on *Add Bot User Event* and add `app_mention` and `message.im`. - - Click on *Save Changes*. - 8. Now you can use tokens from points 3 and 5 to initialize the Slack handler in MindsDB. - - - -This connection method enables you to chat directly with an app via Slack. - -Alternatively, you can connect an app to the Slack channel: - - Go to the channel where you want to use the bot. - - Right-click on the channel and select *View Channel Details*. - - Select *Integrations*. - - Click on *Add an App*. - - -Here is how to connect Slack to MindsDB: - -```sql -CREATE DATABASE slack_datasource -WITH - ENGINE = 'slack', - PARAMETERS = { - "token": "xoxb-...", - "app_token": "xapp-..." - }; -``` - -It comes with the `conversations` and `messages` tables. - -### Method 2: Chatbot responds on a defined Slack channel - -Another way to connect to Slack is to use the bot token only. By following the instructions below, you'll set up the Slack app and integrate it into one of the channels from which you can directly chat with the bot. - - -Here is how to set up a Slack app and generate a Slack bot token: - - 1. Follow [this link](https://api.slack.com/apps) and sign in with your Slack account. - 2. Create a new app `From scratch` or select an existing app. - - Please note that the following instructions support apps created `From scratch`. - - For apps created `From an app manifest`, please follow the [Slack docs here](https://api.slack.com/reference/manifests). - 3. Go to the *OAuth & Permissions* section. - 4. Under the *Scopes* section, add the *Bot Token Scopes* necessary for your application. You can add more later as well. - - channels:history - - channels:read - - chat:write - - groups:read - - im:read - - mpim:read - - users:read - 5. Install the bot in your workspace. - 6. Under the *OAuth Tokens for Your Workspace* section, copy the the *Bot User OAuth Token* value. - 7. Open your Slack application and add the App/Bot to one of the channels: - - Go to the channel where you want to use the bot. - - Right-click on the channel and select *View Channel Details*. - - Select *Integrations*. - - Click on *Add an App*. - 8. Now you can use the token from step 6 to initialize the Slack handler in MindsDB and use the channel name to query and write messages. - - -Here is how to connect Slack to MindsDB: - -```sql -CREATE DATABASE slack_datasource -WITH - ENGINE = 'slack', - PARAMETERS = { - "token": "xoxb-..." - }; -``` - -## Usage - - -The following usage applies when **Connection Method 2** was used to connect Slack. - -See the usage for **Connection Method 1** [via the `CREATE CHATBOT` syntax](/sql/tutorials/create-chatbot). - - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM slack_datasource.table_name -LIMIT 10; -``` - -## Supported Tables - -The Slack integration supports the following tables: - -### `conversations` Table - -The `conversations` virtual table is used to query conversations (channels, DMs, and groups) in the connected Slack workspace. - -```sql --- Retrieve all conversations in the workspace -SELECT * -FROM slack_datasource.conversations; - --- Retrieve a specific conversation using its ID -SELECT * -FROM slack_datasource.conversations -WHERE id = ""; - --- Retrieve a specific conversation using its name -SELECT * -FROM slack_datasource.conversations -WHERE name = ""; -``` - -### `messages` Table - -The `messages` virtual table is used to query, post, update, and delete messages in specific conversations within the connected Slack workspace. - -```sql --- Retrieve all messages from a specific conversation --- channel_id is a required parameter and can be found in the conversations table -SELECT * -FROM slack_datasource.messages -WHERE channel_id = ""; - --- Post a new message --- channel_id and text are required parameters -INSERT INTO slack_datasource.messages (channel_id, text) -VALUES("", "Hello from SQL!"); - --- Update a bot-posted message --- channel_id, ts, and text are required parameters -UPDATE slack_datasource.messages -SET text = "Updated message content" -WHERE channel_id = "" AND ts = ""; - --- Delete a bot-posted message --- channel_id and ts are required parameters -DELETE FROM slack_datasource.messages -WHERE channel_id = "" AND ts = ""; -``` - - -You can also find the channel ID by right-clicking on the conversation in Slack, selecting 'View conversation details' or 'View channel details,' and copying the channel ID from the bottom of the 'About' tab. - - -### `threads` Table - -The `threads` virtual table is used to query and post messages in threads within the connected Slack workspace. - -```sql --- Retrieve all messages in a specific thread --- channel_id and thread_ts are required parameters --- thread_ts is the timestamp of the parent message and can be found in the messages table -SELECT * -FROM slack_datasource.threads -WHERE channel_id = "" AND thread_ts = ""; - --- Post a message to a thread -INSERT INTO slack_datasource.threads (channel_id, thread_ts, text) -VALUES("", "", "Replying to the thread!"); -``` - -### `users` Table - -The `users` virtual table is used to query user information in the connected Slack workspace. - -```sql --- Retrieve all users in the workspace -SELECT * -FROM slack_datasource.users; - --- Retrieve a specific user by name -SELECT * -FROM slack_datasource.users -WHERE name = "John Doe"; -``` - -## Rate Limit Considerations - -The Slack API enforces rate limits on data retrieval. Therefore, when querying the above tables, by default, the first 1000 (999 for `messages`) records are returned. - -To retrieve more records, use the `LIMIT` clause in your SQL queries. For example: - -```sql -SELECT * -FROM slack_datasource.conversations -LIMIT 2000; -``` - -When using the LIMIT clause to query additional records, you may encounter Slack API rate limits. - -## Next Steps - -Follow [this tutorial](/use-cases/ai_agents/build_ai_agents) to build an AI agent with MindsDB. diff --git a/docs/integrations/app-integrations/strapi.mdx b/docs/integrations/app-integrations/strapi.mdx deleted file mode 100644 index e92d560d632..00000000000 --- a/docs/integrations/app-integrations/strapi.mdx +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Strapi -sidebarTitle: Strapi ---- - -[Strapi](https://strapi.io/) is a popular open-source Headless Content Management System (CMS) that empowers developers to work with their preferred tools and frameworks, while providing content editors with a user-friendly interface to manage and distribute content across various platforms. - -The Strapi Handler is a MindsDB handler that enables SQL-based querying of Strapi collections. This documentation provides a brief overview of its features, initialization parameters, and example usage. - -## Connection - -To use the Strapi Handler, initialize it with the following parameters: - -- `host`: Strapi server host. -- `port`: Strapi server port (typically 1337). -- `api_token`: Strapi server API token for authentication. -- `endpoints`: List of collection endpoints. - -To get started, create a Strapi engine database with the following SQL command: - -```sql -CREATE DATABASE myshop --- Display name for the database. -WITH ENGINE = 'strapi', --- Name of the MindsDB handler. -PARAMETERS = { - "host" : "", --- Host (can be an IP address or URL). - "port" : "", --- Common port is 1337. - "api_token": "", --- API token of the Strapi server. - "endpoints" : [""] --- Collection endpoints. -}; -``` - -## Usage - -Retrieve data from a collection: - -```sql -SELECT * -FROM myshop.; -``` - -Filter data based on specific criteria: - -```sql -SELECT * -FROM myshop. -WHERE documentId = ''; -``` - -Insert new data into a collection: - -```sql -INSERT INTO myshop. (, , ...) -VALUES (, , ...); -``` - - - Note: You only able to insert data into the collection which has `create` - permission. - - - -Modify existing data in a collection: - -```sql -UPDATE myshop. -SET = , = , ... -WHERE documentId = ''; -``` - - - Note: You only able to update data into the collection which has `update` - permission. - diff --git a/docs/integrations/app-integrations/stripe.mdx b/docs/integrations/app-integrations/stripe.mdx deleted file mode 100644 index 6af28f1f5b7..00000000000 --- a/docs/integrations/app-integrations/stripe.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Stripe -sidebarTitle: Stripe ---- - -In this section, we present how to connect Stripe to MindsDB. - -[Stripe](https://stripe.com/) is a financial technology company that provides a set of software and payment processing solutions for businesses and individuals to accept payments over the internet. Stripe is one of the leading payment gateway and online payment processing platforms. - -Data from Stripe can be utilized within MindsDB to train AI models and chatbots based on customers, products, and payment intents, and make relevant predictions and forecasts. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Stripe to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Stripe. - -## Connection - -This handler was implemented using [stripe-python](https://github.com/stripe/stripe-python), the Python library for the Stripe API. - -There is only one parameter required to set up the connection with Stripe: -- `api_key`: a Stripe API key. - - -You can find your API keys in the Stripe Dashboard. [Read more](https://stripe.com/docs/keys). - - -To connect to Stripe using MindsDB, the following CREATE DATABASE statement can be used: - -```sql -CREATE DATABASE stripe_datasource -WITH ENGINE = 'stripe', -PARAMETERS = { - "api_key": "sk_..." -}; -``` - -## Usage - -Now, you can query the data in your Stripe account (customers, for example) as follows: - -```sql -SELECT * FROM stripe_datasource.customers -``` - -You can run more advanced queries to fetch specific customers in a defined order: - -```sql -SELECT name, email -FROM stripe_datasource.customers -WHERE currency = 'inr' -ORDER BY name -LIMIT 5 -``` - -### Supported tables - -The following tables are supported by the Stripe handler: - -- `customers` -- `products` -- `payment_intents` diff --git a/docs/integrations/app-integrations/symbl.mdx b/docs/integrations/app-integrations/symbl.mdx deleted file mode 100644 index 07df1498102..00000000000 --- a/docs/integrations/app-integrations/symbl.mdx +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: Symbl -sidebarTitle: Symbl ---- - -This documentation describes the integration of MindsDB with [Symbl](https://symbl.ai/), a platform with state-of-the-art and task-specific LLMs that enables businesses to analyze multi-party conversations at scale. -This integration allows MindsDB to process conversation data and extract insights from it. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Symbl to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - - -Please note that in order to successfully install the dependencies for Symbl, it is necessary to install `portaudio` and few other Linux packages in the Docker container first. To do this, run the following commands: - -1. Start an interactive shell in the container: -```bash -docker exec -it mindsdb_container sh -``` -If you haven't specified a name when spinning up the MindsDB container with `docker run`, you can find it by running `docker ps`. - - -If you are using Docker Desktop, you can navigate to 'Containers', locate the multi-container application running the extension, click on the `mindsdb_service` container and then click on the 'Exec' tab to start an interactive shell. - - -2. Install the required packages: -```bash -apt-get update && apt-get install -y \ - libportaudio2 libportaudiocpp0 portaudio19-dev \ - python3-dev \ - build-essential \ - && rm -rf /var/lib/apt/lists/* -``` - - -## Connection - -Establish a connection to your Symbl from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE mindsdb_symbl -WITH ENGINE = 'symbl', -PARAMETERS = { - "app_id": "app_id", - "app_secret":"app_secret" -}; -``` - -Required connection parameters include the following: - -* `app_id`: The Symbl app identifier. -* `app_secret`: The Symbl app secret. - -## Usage - -First, process the conversation data and get the conversation ID via the `get_conversation_id` table: - -```sql -SELECT * -FROM mindsdb_symbl.get_conversation_id -WHERE audio_url="https://symbltestdata.s3.us-east-2.amazonaws.com/newPhonecall.mp3"; -``` - -Next, use the conversation ID to get the results of the above from the other supported tables: - -```sql -SELECT * -FROM mindsdb_symbl.get_messages -WHERE conversation_id="5682305049034752"; -``` - -Other supported tables include: - -* `get_topics` -* `get_questions` -* `get_analytics` -* `get_action_items` - - -The above examples utilize `mindsdb_symbl` as the datasource name, which is defined in the `CREATE DATABASE` command. - \ No newline at end of file diff --git a/docs/integrations/app-integrations/twitter.mdx b/docs/integrations/app-integrations/twitter.mdx deleted file mode 100644 index 25f9d9e208f..00000000000 --- a/docs/integrations/app-integrations/twitter.mdx +++ /dev/null @@ -1,163 +0,0 @@ ---- -title: Twitter -sidebarTitle: Twitter ---- - -In this section, we present how to connect Twitter accounts to MindsDB. - -[Twitter](https://twitter.com/) is a widely recognized social media platform and microblogging service that allows users to share short messages called tweets. - -The Twitter handler enables you to fetch tweets and create replies utilizing AI models wthin MindsDB. Furthermore, you can automate the process of fetching tweets, preparing replies, and sending replies to Twitter. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Twitter to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Twitter. - -## Connection - -To connect a Twitter account to MindsDB, you need a Twitter developer account. - - -Please note that it requires a paid developer account. - -We recommend you use the [Elevated access](https://developer.twitter.com/en/support/twitter-api/developer-account) allowing you to pull 2m tweets and to avoid _parameters or authentication issue_ error you might get sometimes. You can check [this step-by-step guide](https://medium.com/@skillcate/set-up-twitter-api-to-pull-2m-tweets-month-44d004c6f7ce) describing how to apply for the Elevated access. - - -If you don't already have a Twitter developer account, follow the steps in the video below to apply for one. - - - - [Begin here to apply for a Twitter developer account](https://developer.twitter.com/apply-for-access) - - Watch this [step-by-step video](https://www.youtube.com/watch?v=qVe7PeC0sUQ) explaining the process. - - When presented with questions under *How will you use the Twitter API or Twitter Data?*, use answers similar to the ones below (tweak to fit your exact use case). The more thorough your answers are, the more likely it is your account will get approved. - - **Intended Usage (In Your Words)** - - *I have a blog and want to educate users how to use the Twitter API with MindsDB.* - - *I will read tweets that mention me and use them with MindsDB machine learning to generate responses. I plan to post tweets 2-3 times a day and keep using Twitter like I normally would.* - - **Are you planning to analyze Twitter data?** - - *I plan to build machine learning algorithms based on Twitter data. I am interested in doing sentiment analysis and topic analysis.* - - *I will potentially extract:* - * *Tweet text* - * *Favorite count and retweet count* - * *Hashtags and mentions* - - **Will your app use Tweet, Retweet, Like, Follow, or Direct Message functionality?** - - *I will use the Twitter API to post responses to tweets that mention me.* - - *I will have word filters to make sure that I never share offensive or potentially controversial subjects.* - - **Do you plan to display Tweets or aggregate data about Twitter content outside Twitter?** - - *I plan to share aggregate data as examples for users of my upcoming blog. I don't intend to create an automated dashboard that consumes a lot of Twitter API calls.* - - *Every API call will be done locally, or automated on a simple web server. Aggregate of data will be for educational purposes only.* - - **Will your product, service, or analysis make Twitter content or derived information available to a government entity?** - - Answer NO to this one. - - -If you already have a Twitter developer account, you need to generate API keys following the instructions below or heading to the [Twitter developer website](https://developer.twitter.com/en). - - - * Create an application with Read/Write permissions activated: - * Open [developer portal](https://developer.twitter.com/en/portal/projects-and-apps). - * Select the `Add app` button to create a new app. - * Select the `Create new` button. - * Select `Production` and give it a name. - * Copy and populate the following in the below `CREATE DATABASE` statement: - * `Bearer Token` as a value of the `bearer_token` parameter. - * `API Key` as a value of the `consumer_key` parameter. - * `API Key Secret` as a value of the `consumer_secret` parameter. - * Setup user authentication settings: - * Click `Setup` under `User authentication settings`: - * On `Permissions`, select `Read and Write`. - * On `Type of app`, select `Web App`, `Automated App or Bot`. - * On `App info`, provide any URL for the callback URL and website URL (you can use the URL of this page). - * Click `Save`. - * Generate access tokens: - * Once you are back in the app settings, click `Keys and Tokens`: - * Generate `Access Token` and `Access Token Secret` and populate it in the below `CREATE DATABASE` statement: - * `Access Token` as a value of the `access_token` parameter. - * `Access Token Secret` as a value of the `access_token_secret` parameter. - - -Once you have all the tokens and keys, here is how to connect your Twitter account to MindsDB: - -```sql -CREATE DATABASE my_twitter -WITH - ENGINE = 'twitter', - PARAMETERS = { - "bearer_token": "twitter bearer token", - "consumer_key": "twitter consumer key", - "consumer_secret": "twitter consumer key secret", - "access_token": "twitter access token", - "access_token_secret": "twitter access token secret" - }; -``` - -## Usage - -The `my_twitter` database contains a table called `tweets` by default. - -Here is how to search tweets containing `mindsdb` keyword: - -```sql -SELECT id, created_at, author_username, text -FROM my_twitter.tweets -WHERE query = '(mindsdb OR #mindsdb) -is:retweet -is:reply' -AND created_at > '2023-02-16' -LIMIT 20; -``` - - -Please note that we can see only recent tweets from the past seven days. The `created_at` column condition is skipped if the provided date is earlier than seven days. - - -Alternatively, you can use a Twitter native query, as below: - -```sql -SELECT * FROM my_twitter ( - search_recent_tweets( - query = '(mindsdb OR #mindsdb) -is:retweet -is:reply', - start_time = '2023-03-16T00:00:00.000Z', - max_results = 2 - ) -); -``` - - -To learn more about native queries in MindsDB, visit our docs [here](/sql/native-queries). - - -Here is how to write tweets: - -```sql -INSERT INTO my_twitter.tweets (reply_to_tweet_id, text) -VALUES - (1626198053446369280, 'MindsDB is great! now its super simple to build ML powered apps'), - (1626198053446369280, 'Holy!! MindsDB is the best thing they have invented for developers doing ML'); -``` - - -For more information about available actions and development plans, visit [this page](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/twitter_handler/README.md). - - - -**What's next?** - -Check out the [tutorial on how to create a Twitter chatbot](/sql/tutorials/twitter-chatbot) to see one of the interesting applications of this integration. - diff --git a/docs/integrations/app-integrations/web-crawler.mdx b/docs/integrations/app-integrations/web-crawler.mdx deleted file mode 100644 index 57bfe0a947a..00000000000 --- a/docs/integrations/app-integrations/web-crawler.mdx +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: Web Crawler -sidebarTitle: Web Crawler ---- - -In this section, we present how to use a web crawler within MindsDB. - -A web crawler is an automated script designed to systematically browse and index content on the internet. Within MindsDB, you can utilize a web crawler to efficiently collect data from various websites. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To use Web Crawler with MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -This handler does not require any connection parameters. - -Here is how to initialize a web crawler: - -```sql -CREATE DATABASE my_web -WITH ENGINE = 'web'; -``` - -The above query creates a database called `my_web`. This database by default has a table called `crawler` that stores data from a given URL or multiple URLs. - - -## Usage - -### Parameters - -#### Crawl Depth - -The `crawl_depth` parameter defines how deep the crawler should navigate through linked pages: - -- `crawl_depth = 0`: Crawls only the specified page. -- `crawl_depth = 1`: Crawls the specified page and all linked pages on it. -- Higher values continue the pattern. - -#### Page Limits - -There are multiple ways to limit the number of pages returned: - -- The `LIMIT` clause defines the maximum number of pages returned globally. -- The `per_url_limit` parameter limits the number of pages returned for each specific URL, if more than one URL is provided. - -### Crawling a Single URL - -The following example retrieves data from a single webpage: - -```sql -SELECT * -FROM my_web.crawler -WHERE url = 'https://docs.mindsdb.com/'; -``` - -Returns **1 row** by default. - -To retrieve more pages from the same URL, specify the `LIMIT`: - -```sql -SELECT * -FROM my_web.crawler -WHERE url = 'https://docs.mindsdb.com/' -LIMIT 30; -``` - -Returns up to **30 rows**. - -### Crawling Multiple URLs - -To crawl multiple URLs at once: - -```sql -SELECT * -FROM my_web.crawler -WHERE url IN ('https://docs.mindsdb.com/', 'https://dev.mysql.com/doc/', 'https://mindsdb.com/'); -``` - -Returns **3 rows** by default (1 row per URL). - -To apply a per-URL limit: - -```sql -SELECT * -FROM my_web.crawler -WHERE url IN ('https://docs.mindsdb.com/', 'https://dev.mysql.com/doc/') -AND per_url_limit = 2; -``` - -Returns **4 rows** (2 rows per URL). - -### Crawling with Depth - -To crawl all pages linked within a website: - -```sql -SELECT * -FROM my_web.crawler -WHERE url = 'https://docs.mindsdb.com/' -AND crawl_depth = 1; -``` - -Returns **1 + x rows**, where `x` is the number of linked webpages. - -For multiple URLs with crawl depth: - -```sql -SELECT * -FROM my_web.crawler -WHERE url IN ('https://docs.mindsdb.com/', 'https://dev.mysql.com/doc/') -AND crawl_depth = 1; -``` - -Returns **2 + x + y rows**, where `x` and `y` are the number of linked pages from each URL. - -### Get PDF Content - -MindsDB accepts [file uploads](/sql/create/file) of `csv`, `xlsx`, `xls`, `sheet`, `json`, and `parquet`. However, you can also configure the web crawler to fetch data from PDF files accessible via URLs. - -```sql -SELECT * -FROM my_web.crawler -WHERE url = '' -LIMIT 1; -``` -### Configuring Web Handler for Specific Domains - -The Web Handler can be configured to interact only with specific domains by using the `web_crawling_allowed_sites` setting in the `config.json` file. -This feature allows you to restrict the handler to crawl and process content only from the domains you specify, enhancing security and control over web interactions. - -To configure this, simply list the allowed domains under the `web_crawling_allowed_sites` key in `config.json`. For example: - -```json -"web_crawling_allowed_sites": [ - "https://docs.mindsdb.com", - "https://another-allowed-site.com" -] -``` - -## Troubleshooting - - -`Web crawler encounters character encoding issues` - -* **Symptoms**: Extracted text appears garbled or contains strange characters instead of the expected text. -* **Checklist**: - 1. Open a GitHub Issue: If you encounter a bug or a repeatable error with encoding, - report it on the [MindsDB GitHub](https://github.com/mindsdb/mindsdb/issues) repository by opening an issue. - - - - -`Web crawler times out while trying to fetch content` - -* **Symptoms**: The crawler fails to retrieve data from a website, resulting in timeout errors. -* **Checklist**: - 1. Check the network connection to ensure the target site is reachable. - diff --git a/docs/integrations/app-integrations/youtube.mdx b/docs/integrations/app-integrations/youtube.mdx deleted file mode 100644 index a87b1554c15..00000000000 --- a/docs/integrations/app-integrations/youtube.mdx +++ /dev/null @@ -1,151 +0,0 @@ ---- -title: YouTube -sidebarTitle: YouTube ---- - -In this section, we present how to connect YouTube to MindsDB. - -[YouTube](https://www.youtube.com/) is a popular online video-sharing platform and social media website where users -can upload, view, share, and interact with videos created by individuals and organizations from around the world. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - - 1. Install MindsDB on your system or obtain access to cloud options. - 2. To use YouTube with MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -There are two ways you can connect YouTube to MindsDB: - -1. Limited permissions: This option provides MindsDB with read-only access to YouTube, including viewing comments data. -2. Elevated permissions: This option provides MindsDB with full access to YouTube, including viewing comments data and posting replies to comments. - -### Option 1: Limited permissions - -Establish a connection to YouTube from MindsDB by executing the below SQL command and following the Google authorization link provided as output: - -```sql -CREATE DATABASE mindsdb_youtube -WITH ENGINE = 'youtube', -PARAMETERS = { - "youtube_api_token": "" -}; -``` - - -Alternatively, you can connect YouTube to MindsDB via the form. - -To do that, click on the `Add` button, choose `New Datasource`, search for `YouTube`, and follow the instructions in the form. After providing the connection name and the YouTube API token, click on the `Test Connection` button. Once the connection is established, click on the `Save and Continue` button. - - -Required connection parameters include the following: - -* `youtube_api_token`: It is a Google API key used for authentication. Check out [this guide](https://blog.hubspot.com/website/how-to-get-youtube-api-key) on how to create the API key to access YouTube data. - -### Option 2: Elevated permissions - -Establish a connection to YouTube from MindsDB by executing the below SQL command and following the Google authorization link provided as output: - -```sql -CREATE DATABASE mindsdb_youtube -WITH ENGINE = 'youtube', -PARAMETERS = { - "credentials_file": "path-to-credentials-json-file" - -- alternatively, use the credentials_url parameter -}; -``` - - -Alternatively, you can connect YouTube to MindsDB via the form. - -To do that, click on the `Add` button, choose `New Datasource`, search for `YouTube`, and follow the instructions in the form. After providing the connection name and the credentials file or URL, click on the `Test Connection` button and complete the authorization process in the pop-up window. Once the connection is established, click on the `Save and Continue` button. - - -Required connection parameters include one of the following: - -* `credentials_file`: It is a path to a file generated from the Google Cloud Console, as described below. -* `credentials_url`: It is a URL to a file generated from the Google Cloud Console, as described below. - - - - 1. Open the Google Cloud Console. - - 2. Create a new project. - - 3. Inside this project, go to APIs & Services: - - - Go to Enabled APIs & services: - - Click on ENABLE APIS AND SERVICES from the top bar. - - Search for YouTube Data API v3 and enable it. - - - Go to OAuth consent screen: - - Click on GET STARTED. - - Provide app name and support email. - - Choose Audience based on who will be using the app. - - Add the Contact Information (email address) of the developer. - - Agree to the terms and click on CONTINUE. - - Click on Create. - - Click on Audience on the left sidebar and under Test users, add the email addresses of the users who will be testing the app. When you are ready to publish the app, you can come back here and click on PUBLISH APP and this app will become available to either the organization or the public based on the audience you have chosen. - - - Go to Credentials: - - Click on CREATE CREDENTIALS from the top bar and choose OAuth client ID. - - Choose type as `Web application` and provide a name. Under Authorized redirect URIs, enter URL where MindsDB has been deployed followed by `/verify-auth`. For example, if you are running MindsDB locally (on `https://localhost:47334`), enter `https://localhost:47334/verify-auth`. - - Click on CREATE. - - Download the JSON file that is required to connect YouTube to MindsDB. - - -## Usage - -Use the established connection to query the `comments` table. - -You can query for one video's comments: - -```sql -SELECT * -FROM mindsdb_youtube.comments -WHERE video_id = "raWFGQ20OfA"; -``` - -Or for one channels's comments: - -```sql -SELECT * -FROM mindsdb_youtube.comments -WHERE channel_id="UC-..."; -``` - -You can include ordering and limiting the output data: - -```sql -SELECT * FROM mindsdb_youtube.comments -WHERE video_id = "raWFGQ20OfA" -ORDER BY display_name ASC -LIMIT 5; -``` - -Use the established connection to query the `channels` table. - -```sql -SELECT * FROM mindsdb_youtube.channels -WHERE channel_id="UC-..."; -``` - -Here, the `channel_id` column is mandatory in the `WHERE` clause. - -Use the established connection to query the `videos` table. - -```sql -SELECT * FROM mindsdb_youtube.videos -WHERE video_id="id"; -``` - -Here, the `video_id` column is mandatory in the `WHERE` clause. - -With the connection option 2, you can insert replies to comments: - -```sql -INSERT INTO mindsdb_youtube.comments (comment_id, reply) -VALUES ("comment_id", "reply message"); -``` diff --git a/docs/integrations/data-integrations/airtable.mdx b/docs/integrations/data-integrations/airtable.mdx deleted file mode 100644 index bb27308973b..00000000000 --- a/docs/integrations/data-integrations/airtable.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Airtable -sidebarTitle: Airtable ---- - -This is the implementation of the Airtable data handler for MindsDB. - -[Airtable](https://www.airtable.com/lp/campaign/database) is a platform that makes it easy to build powerful, custom applications. These tools can streamline just about any process, workflow, or project. And best of all, you can build them without ever learning to write a single line of code. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Airtable to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Airtable. - -## Implementation - -This handler is implemented using `duckdb`, a library that allows SQL queries to be executed on `pandas` DataFrames. - -In essence, when querying a particular table, the entire table is first pulled into a `pandas` DataFrame using the [Airtable API](https://airtable.com/api). Once this is done, SQL queries can be run on the DataFrame using `duckdb`. - -The required arguments to establish a connection are as follows: - -* `base_id` is the Airtable base ID. -* `table_name` is the Airtable table name. -* `api_key` is the API key for the Airtable API. - -## Usage - -In order to make use of this handler and connect to the Airtable database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE airtable_datasource -WITH - engine = 'airtable', - parameters = { - "base_id": "dqweqweqrwwqq", - "table_name": "iris", - "api_key": "knlsndlknslk" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM airtable_datasource.example_tbl; -``` - - -At the moment, only the `SELECT` statement is allowed to be executed through `duckdb`. This, however, has no restriction on running machine learning algorithms against your data in Airtable using the `CREATE MODEL` statement. - diff --git a/docs/integrations/data-integrations/amazon-aurora.mdx b/docs/integrations/data-integrations/amazon-aurora.mdx deleted file mode 100644 index 56c5e41049b..00000000000 --- a/docs/integrations/data-integrations/amazon-aurora.mdx +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Amazon Aurora -sidebarTitle: Amazon Aurora ---- - -This is the implementation of the Amazon Aurora handler for MindsDB. - -[Amazon Aurora](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/CHAP_AuroraOverview.html) is a fully managed relational database engine that's compatible with MySQL and PostgreSQL. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Amazon Aurora to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Amazon Aurora. - -## Implementation - -This handler was implemented using the existing MindsDB handlers for MySQL and PostgreSQL. - -The required arguments to establish a connection are as follows: - -* `host`: the host name or IP address of the Amazon Aurora DB cluster. -* `port`: the TCP/IP port of the Amazon Aurora DB cluster. -* `user`: the username used to authenticate with the Amazon Aurora DB cluster. -* `password`: the password to authenticate the user with the Amazon Aurora DB cluster. -* `database`: the database name to use when connecting with the Amazon Aurora DB cluster. - -There optional arguments that can be used are as follows: - -* `db_engine`: the database engine of the Amazon Aurora DB cluster. This can take one of two values: 'mysql' or 'postgresql'. This parameter is optional, but if it is not provided, `aws_access_key_id` and `aws_secret_access_key` parameters must be provided. -* `aws_access_key_id`: the access key for the AWS account. This parameter is optional and is only required to be provided if the `db_engine` parameter is not provided. -* `aws_secret_access_key`: the secret key for the AWS account. This parameter is optional and is only required to be provided if the `db_engine` parameter is not provided. - -## Usage - -In order to make use of this handler and connect to an Amazon Aurora MySQL DB Cluster in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE aurora_mysql_datasource -WITH - engine = 'aurora', - parameters = { - "db_engine": "mysql", - "host": "mysqlcluster.cluster-123456789012.us-east-1.rds.amazonaws.com", - "port": 3306, - "user": "admin", - "password": "password", - "database": "example_db" - }; -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * -SELECT * FROM aurora_mysql_datasource.example_table; -``` - -Similar commands can be used to establish a connection and query Amazon Aurora PostgreSQL DB Cluster: - -~~~~sql -CREATE DATABASE aurora_postgres_datasource -WITH - engine = 'aurora', - parameters = { - "db_engine": "postgresql", - "host": "postgresmycluster.cluster-123456789012.us-east-1.rds.amazonaws.com", - "port": 5432, - "user": "postgres", - "password": "password", - "database": "example_db " - }; - -SELECT * FROM aurora_postgres_datasource.example_table -~~~~ - - -If you want to switch to different database, you can include it in your query as: -```sql -SELECT * -FROM aurora_datasource.new_database.example_table; -``` - \ No newline at end of file diff --git a/docs/integrations/data-integrations/amazon-dynamodb.mdx b/docs/integrations/data-integrations/amazon-dynamodb.mdx deleted file mode 100644 index 6c11ff6c119..00000000000 --- a/docs/integrations/data-integrations/amazon-dynamodb.mdx +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: Amazon DynamoDB -sidebarTitle: Amazon DynamoDB ---- - -This documentation describes the integration of MindsDB with [Amazon DynamoDB](https://aws.amazon.com/dynamodb/), a serverless, NoSQL database service that enables you to develop modern applications at any scale. - - -This data source integration is thread-safe, utilizing a connection pool where each thread is assigned its own connection. When handling requests in parallel, threads retrieve connections from the pool as needed. - - -## Prerequisites - -Before proceeding, ensure that MindsDB is installed locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to your Amazon DynamoDB from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE dynamodb_datasource -WITH - engine = 'dynamodb', - parameters = { - "aws_access_key_id": "PCAQ2LJDOSWLNSQKOCPW", - "aws_secret_access_key": "U/VjewPlNopsDmmwItl34r2neyC6WhZpUiip57i", - "region_name": "us-east-1" - }; -``` - -Required connection parameters include the following: - -* `aws_access_key_id`: The AWS access key that identifies the user or IAM role. -* `aws_secret_access_key`: The AWS secret access key that identifies the user or IAM role. -* `region_name`: The AWS region to connect to. - -Optional connection parameters include the following: - -* `aws_session_token`: The AWS session token that identifies the user or IAM role. This becomes necessary when using temporary security credentials. - -## Usage - -Retrieve data from a specified table by providing the integration name and the table name: - -```sql -SELECT * -FROM dynamodb_datasource.table_name -LIMIT 10; -``` - -Indexes can also be queried by adding a third-level namespace: - -```sql -SELECT * -FROM dynamodb_datasource.table_name.index_name -LIMIT 10; -``` - - -The queries issued to Amazon DynamoDB are in PartiQL, a SQL-compatible query language for Amazon DynamoDB. For more information, refer to the [PartiQL documentation](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ql-reference.html). - -There are a few limitations to keep in mind when querying data from Amazon DynamoDB (some of which are specific to PartiQL): -- The `LIMIT`, `GROUP BY` and `HAVING` clauses are not supported in PartiQL `SELECT` statements. Furthermore, subqueries and joins are not supported either. Refer to the [PartiQL documentation for SELECT statements](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ql-reference.select.html) for more information. -- `INSERT` statements are not supported by this integration. However, this can be overcome by issuing a 'native query' via an established connection. An example of this is provided below. - - -Run PartiQL queries directly on Amazon DynamoDB: - -```sql -SELECT * FROM dynamodb_datasource ( - - --Native Query Goes Here - INSERT INTO "Music" value {'Artist' : 'Acme Band1','SongTitle' : 'PartiQL Rocks'} - -); -``` - - -The above examples utilize `dynamodb_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Amazon S3 DynamoDB. -* **Checklist**: - 1. Confirm that provided AWS credentials are correct. Try making a direct connection to the Amazon DynamoDB using the AWS CLI. - 2. Ensure a stable network between MindsDB and AWS. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - \ No newline at end of file diff --git a/docs/integrations/data-integrations/amazon-redshift.mdx b/docs/integrations/data-integrations/amazon-redshift.mdx deleted file mode 100644 index 1b63cac786c..00000000000 --- a/docs/integrations/data-integrations/amazon-redshift.mdx +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: Amazon Redshift -sidebarTitle: Amazon Redshift ---- - -This documentation describes the integration of MindsDB with [Amazon Redshift](https://docs.aws.amazon.com/redshift/latest/mgmt/welcome.html), a fully managed, petabyte-scale data warehouse service in the cloud. You can start with just a few hundred gigabytes of data and scale to a petabyte or more, enabling you to use your data to acquire new insights for your business and customers. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Redshift to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your Redshift database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE redshift_datasource -WITH - engine = 'redshift', - parameters = { - "host": "examplecluster.abc123xyz789.us-west-1.redshift.amazonaws.com", - "port": 5439, - "database": "example_db", - "user": "awsuser", - "password": "my_password" - }; -``` - -Required connection parameters include the following: - -* `host`: The host name or IP address of the Redshift cluster. -* `port`: The port to use when connecting with the Redshift cluster. -* `database`: The database name to use when connecting with the Redshift cluster. -* `user`: The username to authenticate the user with the Redshift cluster. -* `password`: The password to authenticate the user with the Redshift cluster. - -Optional connection parameters include the following: - -* `schema`: The database schema to use. Default is public. -* `sslmode`: The SSL mode for the connection. - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM redshift_datasource.schema_name.table_name -LIMIT 10; -``` - -Run Amazon Redshift SQL queries directly on the connected Redshift database: - -```sql -SELECT * FROM redshift_datasource ( - - --Native Query Goes Here - WITH VENUECOPY AS (SELECT * FROM VENUE) - SELECT * FROM VENUECOPY ORDER BY 1 LIMIT 10; - -); -``` - - -The above examples utilize `redshift_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Amazon Redshift cluster. -* **Checklist**: - 1. Make sure the Redshift cluster is active. - 2. Confirm that host, port, user, password and database are correct. Try a direct Redshift connection using a client like DBeaver. - 3. Ensure that the security settings of the Redshift cluster allow connections from MindsDB. - 4. Ensure a stable network between MindsDB and Redshift. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - - -This [troubleshooting guide](https://docs.aws.amazon.com/redshift/latest/mgmt/troubleshooting-connections.html) provided by AWS might also be helpful. \ No newline at end of file diff --git a/docs/integrations/data-integrations/amazon-s3.mdx b/docs/integrations/data-integrations/amazon-s3.mdx deleted file mode 100644 index 33ec87f05c6..00000000000 --- a/docs/integrations/data-integrations/amazon-s3.mdx +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: Amazon S3 -sidebarTitle: Amazon S3 ---- - -This documentation describes the integration of MindsDB with [Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html), an object storage service that offers industry-leading scalability, data availability, security, and performance. - - -This data source integration is thread-safe, utilizing a connection pool where each thread is assigned its own connection. When handling requests in parallel, threads retrieve connections from the pool as needed. - - -## Prerequisites - -Before proceeding, ensure that MindsDB is installed locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to your Amazon S3 bucket from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE s3_datasource -WITH - engine = 's3', - parameters = { - "aws_access_key_id": "AQAXEQK89OX07YS34OP", - "aws_secret_access_key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", - "bucket": "my-bucket" - }; -``` - - -Note that sample parameter values are provided here for reference, and you should replace them with your connection parameters. - - -Required connection parameters include the following: - -* `aws_access_key_id`: The AWS access key that identifies the user or IAM role. -* `aws_secret_access_key`: The AWS secret access key that identifies the user or IAM role. - -Optional connection parameters include the following: - -* `aws_session_token`: The AWS session token that identifies the user or IAM role. This becomes necessary when using temporary security credentials. -* `bucket`: The name of the Amazon S3 bucket. If not provided, all available buckets can be queried, however, this can affect performance, especially when listing all of the available objects. - -## Usage - -Retrieve data from a specified object (file) in a S3 bucket by providing the integration name and the object key: - -```sql -SELECT * -FROM s3_datasource.`my-file.csv`; -LIMIT 10; -``` - - -If a bucket name is provided in the `CREATE DATABASE` command, querying will be limited to that bucket and the bucket name can be ommitted from the object key as shown in the example above. However, if the bucket name is not provided, the object key must include the bucket name, such as `s3_datasource.`my-bucket/my-folder/my-file.csv`. - -Wrap the object key in backticks (\`) to avoid any issues parsing the SQL statements provided. This is especially important when the object key contains spaces, special characters or prefixes, such as `my-folder/my-file.csv`. - -At the moment, the supported file formats are CSV, TSV, JSON, and Parquet. - - - -The above examples utilize `s3_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -The special `files` table can be used to list all objects available in the specified bucket or all buckets if the bucket name is not provided: - -```sql -SELECT * -FROM s3_datasource.files LIMIT 10 -``` - -The content of files can also be retrieved by explicitly requesting the `content` column. This column is empty by default to avoid unnecessary data transfer: - -```sql -SELECT path, content -FROM s3_datasource.files LIMIT 10 -``` - - -This table will return all objects regardless of the file format, however, only the supported file formats mentioned above can be queried. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Amazon S3 bucket. -* **Checklist**: - 1. Make sure the Amazon S3 bucket exists. - 2. Confirm that provided AWS credentials are correct. Try making a direct connection to the S3 bucket using the AWS CLI. - 3. Ensure a stable network between MindsDB and AWS. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing object names containing spaces, special characters or prefixes. -* **Checklist**: - 1. Ensure object names with spaces, special characters or prefixes are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel/travel_data.csv - * Incorrect: SELECT * FROM integration.'travel/travel_data.csv' - * Correct: SELECT * FROM integration.\`travel/travel_data.csv\` - \ No newline at end of file diff --git a/docs/integrations/data-integrations/apache-cassandra.mdx b/docs/integrations/data-integrations/apache-cassandra.mdx deleted file mode 100644 index faf51c0231e..00000000000 --- a/docs/integrations/data-integrations/apache-cassandra.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Apache Cassandra -sidebarTitle: Apache Cassandra ---- - -This is the implementation of the Cassandra data handler for MindsDB. - -[Cassandra](https://cassandra.apache.org/_/index.html) is a free and open-source, distributed, wide-column store, NoSQL database management system designed to handle large amounts of data across many commodity servers, providing high availability with no single point of failure. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Apache Cassandra to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Apache Cassandra. - -## Implementation - -As ScyllaDB is API-compatible with Apache Cassandra, the Cassandra data handler extends the ScyllaDB handler and uses the `scylla-driver` Python library. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the Cassandra database. -* `port` is the port to use when connecting. -* `user` is the user to authenticate. -* `password` is the password to authenticate the user. -* `keyspace` is the keyspace to connect, the top level container for tables. -* `protocol_version` is not required and defaults to 4. - -## Usage - -In order to make use of this handler and connect to the Cassandra server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE sc -WITH - engine = "cassandra", - parameters = { - "host": "127.0.0.1", - "port": "9043", - "user": "user", - "password": "pass", - "keyspace": "test_data", - "protocol_version": 4 - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM cassandra_datasource.example_table LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/apache-druid.mdx b/docs/integrations/data-integrations/apache-druid.mdx deleted file mode 100644 index 5fbb160ef53..00000000000 --- a/docs/integrations/data-integrations/apache-druid.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Apache Druid -sidebarTitle: Apache Druid ---- - -This is the implementation of the Druid data handler for MindsDB. - -[Apache Druid](https://druid.apache.org/docs/latest/design) is a real-time analytics database designed for fast slice-and-dice analytics (_OLAP_ queries) on large data sets. Most often, Druid powers use cases where real-time ingestion, fast query performance, and high uptime are important. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Apache Druid to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Apache Druid. - -## Implementation - -This handler was implemented using the `pydruid` library, the Python API for Apache Druid. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the Apache Druid database. -* `port` is the port that Apache Druid is running on. -* `path` is the query path. -* `scheme` is the URI schema. This parameter is optional and defaults to `http`. -* `user` is the username used to authenticate with Apache Druid. This parameter is optional. -* `password` is the password used to authenticate with Apache Druid. This parameter is optional. - -## Usage - -In order to make use of this handler and connect to Apache Druid in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE druid_datasource -WITH - engine = 'druid', - parameters = { - "host": "localhost", - "port": 8888, - "path": "/druid/v2/sql/", - "scheme": "http" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM druid_datasource.example_tbl; -``` diff --git a/docs/integrations/data-integrations/apache-hive.mdx b/docs/integrations/data-integrations/apache-hive.mdx deleted file mode 100644 index cbac1914208..00000000000 --- a/docs/integrations/data-integrations/apache-hive.mdx +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Apache Hive -sidebarTitle: Apache Hive ---- - -This documentation describes the integration of MindsDB with [Apache Hive](https://hive.apache.org/), a data warehouse software project built on top of Apache Hadoop for providing data query and analysis. Hive gives an SQL-like interface to query data stored in various databases and file systems that integrate with Hadoop. -The integration allows MindsDB to access data from Apache Hive and enhance Apache Hive with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Apache Hive to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Apache Hive from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/hive_handler) as an engine. - -```sql -CREATE DATABASE hive_datasource -WITH - engine = 'hive', - parameters = { - "username": "demo_user", - "password": "demo_password", - "host": "127.0.0.1", - "database": "default" - }; -``` - -Required connection parameters include the following: - -* `host`: The hostname, IP address, or URL of the Apache Hive server. -* `database`: The name of the Apache Hive database to connect to. - -Optional connection parameters include the following: - -* `username`: The username for the Apache Hive database. -* `password`: The password for the Apache Hive database. -* `port`: The port number for connecting to the Apache Hive server. Default is `10000`. -* `auth`: The authentication mechanism to use. Default is `CUSTOM`. Other options are `NONE`, `NOSASL`, `KERBEROS` and `LDAP`. - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM hive_datasource.table_name -LIMIT 10; -``` - -Run HiveQL queries directly on the connected Apache Hive database: - -```sql -SELECT * FROM hive_datasource ( - - --Native Query Goes Here - FROM (FROM (FROM src - SELECT TRANSFORM(value) - USING 'mapper' - AS value, count) mapped - SELECT cast(value as double) AS value, cast(count as int) AS count - SORT BY value, count) sorted - SELECT TRANSFORM(value, count) - USING 'reducer' - AS whatever - -); -``` - - -The above examples utilize `hive_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Apache Hive database. -* **Checklist**: - 1. Ensure that the Apache Hive server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct Apache Hive connection using a client like DBeaver. - 3. Test the network connection between the MindsDB host and the Apache Hive server. - diff --git a/docs/integrations/data-integrations/apache-ignite.mdx b/docs/integrations/data-integrations/apache-ignite.mdx deleted file mode 100644 index 343d0c139ca..00000000000 --- a/docs/integrations/data-integrations/apache-ignite.mdx +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Apache Ignite -sidebarTitle: Apache Ignite ---- - -This is the implementation of the Apache Ignite data handler for MindsDB. - -[Apache Ignite](https://ignite.apache.org/docs/latest/) is a distributed database for high-performance computing with in-memory speed. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Apache Ignite to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Apache Ignite. - -## Implementation - -This handler is implemented using the `pyignite` library, the Apache Ignite thin (binary protocol) client for Python. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the Apache Ignite cluster's node. -* `port` is the TCP/IP port of the Apache Ignite cluster's node. Must be an integer. - -There are several optional arguments that can be used as well, - -* `username` is the username used to authenticate with the Apache Ignite cluster. This parameter is optional. Default: None. -* `password` is the password to authenticate the user with the Apache Ignite cluster. This parameter is optional. Default: None. -* `schema` is the schema to use for the connection to the Apache Ignite cluster. This parameter is optional. Default: PUBLIC. - -## Usage - -In order to make use of this handler and connect to an Apache Ignite database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE ignite_datasource -WITH - ENGINE = 'ignite', - PARAMETERS = { - "host": "127.0.0.1", - "port": 10800, - "username": "admin", - "password": "password", - "schema": "example_schema" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM ignite_datasource.demo_table -LIMIT 10; -``` - - -Currently, a connection can be established only to a single node in the cluster. In the future, we'll configure the client to automatically fail over to another node if the connection to the current node fails or times out by providing the hosts and ports for many nodes as explained [here](https://ignite.apache.org/docs/latest/thin-clients/python-thin-client). - \ No newline at end of file diff --git a/docs/integrations/data-integrations/apache-impala.mdx b/docs/integrations/data-integrations/apache-impala.mdx deleted file mode 100644 index 7fd8e8eee62..00000000000 --- a/docs/integrations/data-integrations/apache-impala.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Apache Impala -sidebarTitle: Apache Impala ---- - -This is the implementation of the Impala data handler for MindsDB. - -[Apache Impala](https://impala.apache.org/) is an MPP (Massive Parallel Processing) SQL query engine for processing huge volumes of data that is stored in the Apache Hadoop cluster. It is an open source software written in C++ and Java. It provides high performance and low latency compared to other SQL engines for Hadoop. In other words, Impala is the highest performing SQL engine (giving RDBMS-like experience) that provides the fastest way to access data stored in Hadoop Distributed File System. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Apache Impala to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Apache Impala. - -## Implementation - -This handler is implemented using `impyla`, a Python library that allows you to use Python code to run SQL commands on Impala. - -The required arguments to establish a connection are: - -* `user` is the username associated with the database. -* `password` is the password to authenticate your access. -* `host` is the server IP address or hostname. -* `port` is the port through which TCP/IP connection is to be made. -* `database` is the database name to be connected. - -## Usage - -In order to make use of this handler and connect to the Impala database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE impala_datasource -WITH - engine = 'impala', - parameters = { - "user":"root", - "password":"p@55w0rd", - "host":"127.0.0.1", - "port":21050, - "database":"Db_NamE" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM impala_datasource.TEST; -``` diff --git a/docs/integrations/data-integrations/apache-pinot.mdx b/docs/integrations/data-integrations/apache-pinot.mdx deleted file mode 100644 index a5f9717f52b..00000000000 --- a/docs/integrations/data-integrations/apache-pinot.mdx +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: Apache Pinot -sidebarTitle: Apache Pinot ---- - -This is the implementation of the Pinot data handler for MindsDB. - -[Apache Pinot](https://pinot.apache.org/) is a real-time distributed OLAP database designed for low-latency query execution even at extremely high throughput. Apache Pinot can ingest directly from streaming sources like Apache Kafka and make events available for querying immediately. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Apache Pinot to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Apache Pinot. - -## Implementation - -This handler was implemented using the `pinotdb` library, the Python DB-API and SQLAlchemy dialect for Pinot. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the Apache Pinot cluster. -* `broker_port` is the port that the Broker of the Apache Pinot cluster is running on. -* `controller_port` is the port that the Controller of the Apache Pinot cluster is running on. -* `path` is the query path. - -## Usage - -In order to make use of this handler and connect to the Pinot cluster in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE pinot_datasource -WITH - engine = 'pinot', - parameters = { - "host":"localhost", - "broker_port": 8000, - "controller_port": 9000, - "path": "/query/sql", - "scheme": "http" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM pinot_datasource.example_tbl; -``` diff --git a/docs/integrations/data-integrations/apache-solr.mdx b/docs/integrations/data-integrations/apache-solr.mdx deleted file mode 100644 index 29a90d1bf56..00000000000 --- a/docs/integrations/data-integrations/apache-solr.mdx +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Apache Solr -sidebarTitle: Apache Solr ---- - -This is the implementation of the Solr data handler for MindsDB. - -[Apache Solr](https://solr.apache.org/) is a highly reliable, scalable and fault tolerant, providing distributed indexing, replication and load-balanced querying, automated failover and recovery, centralized configuration, and more. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Apache Solr to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Apache Solr. - -## Implementation - -This handler is implemented using the `sqlalchemy-solr` library, which provides a Python/SQLAlchemy interface. - -The required arguments to establish a connection are as follows: - -* `username` is the username used to authenticate with the Solr server. This parameter is optional. -* `password` is the password to authenticate the user with the Solr server. This parameter is optional. -* `host` is the host name or IP address of the Solr server. -* `port` is the port number of the Solr server. -* `server_path` defaults to `solr` if not provided. -* `collection` is the Solr Collection name. -* `use_ssl` defaults to `false` if not provided. - - -Further reference: [https://pypi.org/project/sqlalchemy-solr/](https://pypi.org/project/sqlalchemy-solr/). - - -## Usage - -In order to make use of this handler and connect to the Solr database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE solr_datasource -WITH - engine = 'solr', - parameters = { - "username": "demo_user", - "password": "demo_password", - "host": "127.0.0.1", - "port": "8981", - "server_path": "solr", - "collection": "gettingstarted", - "use_ssl": "false" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM solr_datasource.gettingstarted -LIMIT 10000; -``` - - -**Requirements** - -A Solr instance with a Parallel SQL supported up and running. - -There are certain limitations that need to be taken into account when issuing queries to Solr. Refer to [https://solr.apache.org/guide/solr/latest/query-guide/sql-query.html#parallel-sql-queries](https://solr.apache.org/guide/solr/latest/query-guide/sql-query.html#parallel-sql-queries). - - - -Don't forget to put limit in the end of the SQL statement - diff --git a/docs/integrations/data-integrations/ckan.mdx b/docs/integrations/data-integrations/ckan.mdx deleted file mode 100644 index 3cf4e346171..00000000000 --- a/docs/integrations/data-integrations/ckan.mdx +++ /dev/null @@ -1,78 +0,0 @@ -## CKAN Integration handler - -This handler facilitates integration with [CKAN](https://ckan.org/). -an open-source data catalog platform for managing and publishing open data. CKAN organizes datasets and stores data in its [DataStore](http://docs.ckan.org/en/2.11/maintaining/datastore.html).To retrieve data from CKAN, the [CKANAPI](https://github.com/ckan/ckanapi) must be used. - -# Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect SAP HANA to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -The CKAN handler is included with MindsDB by default, so no additional installation is required. - -## Configuration - -To use the CKAN handler, you need to provide the URL of the CKAN instance you want to connect to. You can do this by setting the `CKAN_URL` environment variable. For example: - -```sql -CREATE DATABASE ckan_datasource -WITH ENGINE = 'ckan', -PARAMETERS = { - "url": "https://your-ckan-instance-url.com", - "api_key": "your-api-key-if-required" -}; -``` - -> **_NOTE:_** Some CKAN instances will require you to provide an API Token. You can create one in the CKAN user panel. - -## Usage - -The CKAN handler provides three main tables: - -- `datasets`: Lists all datasets in the CKAN instance. -- `resources`: Lists all resources metadata across all packages. -- `datastore`: Allows querying individual datastore resources. - -## Example Queries - -1. List all datasets: - - ```sql - SELECT * FROM `your-datasource`.datasets; - ``` - -2. List all resources: - - ```sql - SELECT * FROM `your-datasource`.resources ; - ``` - -3. Query a specific datastore resource: - - ```sql - SELECT * FROM `your-datasource`.datastore WHERE resource_id = 'your-resource-id'; - ``` - -Replace `your-resource-id-here` with the actual resource ID you want to query. - -## Querying Large Resources - -The CKAN handler supports automatic pagination when querying datastore resources. This allows you to retrieve large datasets without worrying about API limits. - -You can still use the `LIMIT` clause to limit the number of rows returned by the query. For example: - -```sql -SELECT * FROM ckan_datasource.datastore -WHERE resource_id = 'your-resource-id-here' -LIMIT 1000; -``` - -## Limitations - -- The handler currently supports read operations only. Write operations are not supported. -- Performance may vary depending on the size of the CKAN instance and the complexity of your queries. -- The handler may not work with all CKAN instances, especially those with custom configurations. -- The handler does not support all CKAN API features. Some advanced features may not be available. -- The datastore search will return limited records up to 32000. Please refer to the [CKAN API](https://docs.ckan.org/en/2.11/maintaining/datastore.html#ckanext.datastore.logic.action.datastore_search_sql) documentation for more information. diff --git a/docs/integrations/data-integrations/clickhouse.mdx b/docs/integrations/data-integrations/clickhouse.mdx deleted file mode 100644 index 0fb1971722b..00000000000 --- a/docs/integrations/data-integrations/clickhouse.mdx +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: ClickHouse -sidebarTitle: ClickHouse ---- - -This documentation describes the integration of MindsDB with [ClickHouse](https://clickhouse.com/docs/en/intro), a high-performance, column-oriented SQL database management system (DBMS) for online analytical processing (OLAP). -The integration allows MindsDB to access data from ClickHouse and enhance ClickHouse with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect ClickHouse to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to ClickHouse from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/clickhouse_handler) as an engine. - -```sql -CREATE DATABASE clickhouse_conn -WITH ENGINE = 'clickhouse', -PARAMETERS = { - "host": "127.0.0.1", - "port": "8443", - "user": "root", - "password": "mypass", - "database": "test_data", - "protocol" : "https" - } -``` - -Required connection parameters include the following: - -* `host`: is the hostname or IP address of the ClickHouse server. -* `port`: is the TCP/IP port of the ClickHouse server. -* `user`: is the username used to authenticate with the ClickHouse server. -* `password`: is the password to authenticate the user with the ClickHouse server. -* `database`: defaults to `default`. It is the database name to use when connecting with the ClickHouse server. -* `protocol`: defaults to `native`. It is an optional parameter. Its supported values are `native`, `http` and `https`. - -## Usage - -The following usage examples utilize the connection to ClickHouse made via the `CREATE DATABASE` statement and named `clickhouse_conn`. - -Retrieve data from a specified table by providing the integration and table name. - -```sql -SELECT * -FROM clickhouse_conn.table_name -LIMIT 10; -``` - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the ClickHouse database. -* **Checklist**: - 1. Ensure that the ClickHouse server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct MySQL connection. - 3. Test the network connection between the MindsDB host and the ClickHouse server. - - - -`Slow Connection Initialization` - -* **Symptoms**: Connecting to the ClickHouse server takes an exceptionally long time, or connections hang without completing -* **Checklist**: - 1. Ensure that you are using the appropriate protocol (http, https, or native) for your ClickHouse setup. Misconfigurations here can lead to significant delays. - 2. Ensure that firewalls or security groups (in cloud environments) are properly configured to allow traffic on the necessary ports (as 8123 for HTTP or 9000 for native). - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces, reserved words or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/docs/integrations/data-integrations/cloud-spanner.mdx b/docs/integrations/data-integrations/cloud-spanner.mdx deleted file mode 100644 index 8a2afaecedc..00000000000 --- a/docs/integrations/data-integrations/cloud-spanner.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Cloud Spanner -sidebarTitle: Cloud Spanner ---- - -This is the implementation of the Cloud Spanner data handler for MindsDB. - -[Cloud Spanner](https://cloud.google.com/spanner) is a fully managed, mission-critical, relational database service that offers transactional consistency at global scale, automatic, synchronous replication for high availability. It supports two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Cloud Spanner to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Cloud Spanner. - -## Implementation - -This handler was implemented using the `google-cloud-spanner` Python client library. - -The required arguments to establish a connection are as follows: - -* `instance_id` is the instance identifier. -* `database_id` is the database identifier. -* `project` is the identifier of the project that owns the resources. -* `credentials` is a stringified GCP service account key JSON. - -## Usage - -In order to make use of this handler and connect to the Cloud Spanner database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE cloud_spanner_datasource -WITH - engine = 'cloud_spanner', - parameters = { - "instance_id": "my-instance", - "database_id": "example-id", - "project": "my-project", - "credentials": "{...}" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM cloud_spanner_datasource.my_table; -``` - - -Cloud Spanner supports both PostgreSQL and GoogleSQL dialects. However, not all PostgresSQL features are supported. - diff --git a/docs/integrations/data-integrations/cockroachdb.mdx b/docs/integrations/data-integrations/cockroachdb.mdx deleted file mode 100644 index d241cb72334..00000000000 --- a/docs/integrations/data-integrations/cockroachdb.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: CockroachDB -sidebarTitle: CockroachDB ---- - -This is the implementation of the CockroachDB data handler for MindsDB. - -[CockroachDB](https://www.cockroachlabs.com/docs/) was architected for complex, high performant distributed writes and delivers scale-out read capability. CockroachDB delivers simple relational SQL transactions and obscures complexity away from developers. It is wire-compatible with PostgreSQL and provides a familiar and easy interface for developers. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect CockroachDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to CockroachDB. - -## Implementation - -CockroachDB is wire-compatible with PostgreSQL. Therefore, its implementation extends the PostgreSQL handler. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the CockroachDB. -* `database` is the name of the database to connect to. -* `user` is the user to authenticate with the CockroachDB. -* `port` is the port to use when connecting. -* `password` is the password to authenticate the user. - -In order to make use of this handler and connect to the CockroachDB server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE cockroachdb -WITH - engine = 'cockroachdb', - parameters = { - "host": "localhost", - "database": "dbname", - "user": "admin", - "password": "password", - "port": "5432" - }; -``` - -## Usage - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM cockroachdb.public.db; -``` - - diff --git a/docs/integrations/data-integrations/couchbase.mdx b/docs/integrations/data-integrations/couchbase.mdx deleted file mode 100644 index f4e8a224c26..00000000000 --- a/docs/integrations/data-integrations/couchbase.mdx +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Couchbase -sidebarTitle: Couchbase ---- - -This is the implementation of the Couchbase data handler for MindsDB. - -[Couchbase](https://www.couchbase.com/) is an open-source, distributed multi-model NoSQL document-oriented database software package optimized for interactive applications. These applications may serve many concurrent users by creating, storing, retrieving, aggregating, manipulating, and presenting data. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Couchbase to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Couchbase. - -## Implementation - -This handler is implemented using the `couchbase` library, the Python driver for Couchbase. - -The required arguments to establish a connection are as follows: -* `connection_string`: the connection string for the endpoint of the Couchbase server -* `bucket`: the bucket name to use when connecting with the Couchbase server -* `user`: the user to authenticate with the Couchbase server -* `password`: the password to authenticate the user with the Couchbase server -* `scope`: scopes are a level of data organization within a bucket. If omitted, will default to `_default` - -Note: The connection string expects either the couchbases:// or couchbase:// protocol. - - -If you are using Couchbase Capella, you can find the `connection_string` under the Connect tab -It will also be required to whitelist the machine(s) that will be running MindsDB and database credentials will need to be created for the user. These steps can also be taken under the Connect tab. - - -In order to make use of this handler and connect to a Couchbase server in MindsDB, the following syntax can be used. Note, that the example uses the default `travel-sample` bucket which can be enabled from the couchbase UI with pre-defined scope and documents. - -```sql -CREATE DATABASE couchbase_datasource -WITH -engine='couchbase', -parameters={ - "connection_string": "couchbase://localhost", - "bucket": "travel-sample", - "user": "admin", - "password": "password", - "scope": "inventory" -}; -``` - -## Usage - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * FROM couchbase_datasource.airport -``` diff --git a/docs/integrations/data-integrations/cratedb.mdx b/docs/integrations/data-integrations/cratedb.mdx deleted file mode 100644 index 1d93e4dab69..00000000000 --- a/docs/integrations/data-integrations/cratedb.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: CrateDB -sidebarTitle: CrateDB ---- - -This is the implementation of the CrateDB data handler for MindsDB. - -[CrateDB](https://crate.io/) is a distributed SQL database management system that integrates a fully searchable document-oriented data store. It is open-source, written in Java, based on a shared-nothing architecture, and designed for high scalability. CrateDB includes components from Lucene, Elasticsearch and Netty. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect CrateDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to CrateDB. - -## Implementation - -This handler is implemented using `crate`, a Python library that allows you to use Python code to run SQL commands on CrateDB. - -The required arguments to establish a connection are as follows: - -* `user` is the username associated with the database. -* `password` is the password to authenticate your access. -* `host` is the hostname or IP adress of the server. -* `port` is the port through which connection is to be made. -* `schema_name` is schema name to get tables from. Defaults to `doc`. - -## Usage - -In order to make use of this handler and connect to the CrateDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE crate_datasource -WITH - engine = 'crate', - parameters = { - "user": "crate", - "password": "", - "host": "127.0.0.1", - "port": 4200, - "schema_name": "doc" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM crate_datasource.demo; -``` diff --git a/docs/integrations/data-integrations/d0lt.mdx b/docs/integrations/data-integrations/d0lt.mdx deleted file mode 100644 index 00be7236072..00000000000 --- a/docs/integrations/data-integrations/d0lt.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: D0lt -sidebarTitle: D0lt ---- - -This is the implementation of the D0lt data handler for MindsDB. - -[D0lt](https://docs.dolthub.com/introduction/what-is-dolt) is a single-node and embedded DBMS that incorporates Git-style versioning as a first-class entity. D0lt behaves like Git - it is a content-addressable local database where the main objects are tables instead of files. In D0lt, a user creates a database locally. The database contains tables that can be read and updated using SQL. Similar to Git, writes are staged until the user issues a commit. Upon commit, the writes are appended to permanent storage. - -Branch and merge semantics are supported allowing for the tables to evolve at a different pace for multiple users. This allows for loose collaboration on data as well as multiple views on the same core data. Merge conflicts are detected for schema and data conflicts. Data conflicts are cell-based, not line-based. Remote repositories allow for cooperation among repository instances. Clone, push, and pull semantics are all available. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect D0lt to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to D0lt. - -## Implementation - -This handler is implemented using `mysql-connector`, a Python library that allows you to use Python code to run SQL commands on the D0lt database. - -The required arguments to establish a connection are as follows: - -* `user` is the username associated with the database. -* `password` is the password to authenticate your access. -* `host` is the hostname or IP address of the server. -* `port` is the port through which a TCP/IP connection is to be made. -* `database` is the database name to be connected. - -## Usage - -In order to make use of this handler and connect to the D0lt database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE d0lt_datasource -WITH - engine = 'd0lt', - parameters = { - "user": "root", - "password": "", - "host": "127.0.0.1", - "port": 3306, - "database": "information_schema" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM D0lt_datasource.TEST; -``` diff --git a/docs/integrations/data-integrations/databend.mdx b/docs/integrations/data-integrations/databend.mdx deleted file mode 100644 index f7aa87f0bec..00000000000 --- a/docs/integrations/data-integrations/databend.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Databend -sidebarTitle: Databend ---- - -This is the implementation of the Databend data handler for MindsDB. - -[Databend](https://databend.rs/) is a modern cloud data warehouse that empowers your object storage for real-time analytics. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Databend to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Databend. - -## Implementation - -This handler is implemented by extending the ClickHouse handler. - -The required arguments to establish a connection are as follows: - -* `protocol` is the protocol to query Databend. Supported values include `native`, `http`, `https`. It defaults to `native` if not provided. -* `host` is the host name or IP address of the Databend warehouse. -* `port` is the TCP/IP port of the Databend warehouse. -* `user` is the username used to authenticate with the Databend warehouse. -* `password` is the password to authenticate the user with the Databend warehouse. -* `database` is the database name to use when connecting with the Databend warehouse. - -## Usage - -In order to make use of this handler and connect to the Databend database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE databend_datasource -WITH - engine = 'databend', - parameters = { - "protocol": "https", - "user": "root", - "port": 443, - "password": "password", - "host": "some-url.aws-us-east-2.default.databend.com", - "database": "test_db" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM databend_datasource.example_tbl; -``` diff --git a/docs/integrations/data-integrations/databricks.mdx b/docs/integrations/data-integrations/databricks.mdx deleted file mode 100644 index 4d0909a62fe..00000000000 --- a/docs/integrations/data-integrations/databricks.mdx +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Databricks -sidebarTitle: Databricks ---- -This documentation describes the integration of MindsDB with [Databricks](https://www.databricks.com/), the world's first data intelligence platform powered by generative AI. -The integration allows MindsDB to access data stored in a Databricks workspace and enhance it with AI capabilities. - - -This data source integration is thread-safe, utilizing a connection pool where each thread is assigned its own connection. When handling requests in parallel, threads retrieve connections from the pool as needed. - - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Databricks to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - - -If the Databricks cluster you are attempting to connect to is terminated, executing the queries given below will attempt to start the cluster and therefore, the first query may take a few minutes to execute. - -To avoid any delays, ensure that the Databricks cluster is running before executing the queries. - - -## Connection - -Establish a connection to your Databricks workspace from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE databricks_datasource -WITH - engine = 'databricks', - parameters = { - "server_hostname": "adb-1234567890123456.7.azuredatabricks.net", - "http_path": "sql/protocolv1/o/1234567890123456/1234-567890-test123", - "access_token": "dapi1234567890ab1cde2f3ab456c7d89efa", - "schema": "example_db" - }; -``` - -Required connection parameters include the following: - -* `server_hostname`: The server hostname for the cluster or SQL warehouse. -* `http_path`: The HTTP path of the cluster or SQL warehouse. -* `access_token`: A Databricks personal access token for the workspace. - - -Refer the instructions given https://docs.databricks.com/en/integrations/compute-details.html and https://docs.databricks.com/en/dev-tools/python-sql-connector.html#authentication to find the connection parameters mentioned above for your compute resource. - - -Optional connection parameters include the following: - -* `session_configuration`: Additional (key, value) pairs to set as Spark session configuration parameters. This should be provided as a JSON string. -* `http_headers`: Additional (key, value) pairs to set in HTTP headers on every RPC request the client makes. This should be provided as `"http_headers": [['Header-1', 'value1'], ['Header-2', 'value2']]`. -* `catalog`: The catalog to use for the connection. Default is `hive_metastore`. -* `schema`: The schema (database) to use for the connection. Default is `default`. - -## Usage - -Retrieve data from a specified table by providing the integration name, catalog, schema, and table name: - -```sql -SELECT * -FROM databricks_datasource.catalog_name.schema_name.table_name -LIMIT 10; -``` - - -The catalog and schema names only need to be provided if the table to be queried is not in the specified (or default) catalog and schema. - - -Run Databricks SQL queries directly on the connected Databricks workspace: - -```sql -SELECT * FROM databricks_datasource ( - - --Native Query Goes Here - SELECT - city, - car_model, - RANK() OVER (PARTITION BY car_model ORDER BY quantity) AS rank - FROM dealer - QUALIFY rank = 1; -); - -``` - - -The above examples utilize `databricks_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Databricks workspace. -* **Checklist**: - 1. Make sure the Databricks workspace is active. - 2. Confirm that server hostname, HTTP path, access token are correctly provided. If the catalog and schema are provided, ensure they are correct as well. - 3. Ensure a stable network between MindsDB and Databricks workspace. - - - -SQL statements running against tables (of reasonable size) are taking longer than expected. - -* **Symptoms**: SQL queries taking longer than expected to execute. -* **Checklist**: - 1. Ensure the Databricks cluster is running before executing the queries. - 2. Check the network connection between MindsDB and Databricks workspace. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - diff --git a/docs/integrations/data-integrations/datastax.mdx b/docs/integrations/data-integrations/datastax.mdx deleted file mode 100644 index a0e99915c17..00000000000 --- a/docs/integrations/data-integrations/datastax.mdx +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: DataStax -sidebarTitle: DataStax ---- - -This is the implementation of the DataStax data handler for MindsDB. - -https://docs.datastax.com/en/astra-db-serverless/index.html[DataStax Astra DB] is a cloud database-as-a-service based on Apache Cassandra. DataStax also offers on-premises solutions, DataStax Enterprise (DSE) and Hyper-Converged Database (HCD), as well as Astra Streaming, a messaging and event streaming cloud service based on Apache Pulsar. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect DataStax to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Create an [Astra DB database](https://docs.datastax.com/en/astra-db-serverless/databases/create-database.html). - -## Implementation - -DataStax Astra DB is API-compatible with Apache Cassandra and ScyllaDB. Therefore, its implementation extends the ScyllaDB handler and is using the `scylla-driver` Python library. - -The required arguments to establish a connection are as follows: - -* `user`: The literal string `token` -* `password`: An [Astra application token](https://docs.datastax.com/en/astra-db-serverless/administration/manage-application-tokens.html) -* `secure_connect_bundle`: The path to your database's [Secure Connect Bundle](https://docs.datastax.com/en/astra-db-serverless/databases/secure-connect-bundle.html) zip file - -## Usage - -In order to make use of this handler and connect to the Astra DB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE astra_connection -WITH - engine = "astra", - parameters = { - "user": "token", - "password": "application_token", - "secure_connect_bundle": "/home/Downloads/file.zip" - }; -``` - -or, reference the bundle from Datastax s3 as: - -```sql -CREATE DATABASE astra_connection -WITH ENGINE = "astra", -PARAMETERS = { - "user": "token", - "password": "application_token", - "secure_connect_bundle": "https://datastax-cluster-config-prod.s3.us-east-2.amazonaws.com/32312-b9eb-4e09-a641-213eaesa12-1/secure-connect-demo.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK..." -} -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM astra_connection.keystore.example_table -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/duckdb.mdx b/docs/integrations/data-integrations/duckdb.mdx deleted file mode 100644 index db2698bdd28..00000000000 --- a/docs/integrations/data-integrations/duckdb.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: DuckDB -sidebarTitle: DuckDB ---- - -This is the implementation of the DuckDB data handler for MindsDB. - -[DuckDB](https://duckdb.org/) is an open-source analytical database system. It is designed for fast execution of analytical queries. There are no external dependencies and the DBMS runs completely embedded within a host process, similar to SQLite. DuckDB provides a rich SQL dialect with support for complex queries with transactional guarantees (ACID). - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect DuckDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to DuckDB. - -## Implementation - -This handler is implemented using the `duckdb` Python client library. - - -The DuckDB handler is currently using the `0.7.1.dev187` pre-relase version of the Python client library. In case of issues, make sure your DuckDB database is compatible with this version. See the [`requirements.txt`](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/duckdb_handler/requirements.txt) for details. - - -The required arguments to establish a connection are as follows: - -* `database` is the name of the DuckDB database file. It can be set to `:memory:` to create an in-memory database. - -The optional arguments are as follows: - -* `read_only` is a flag that specifies whether the connection is in the read-only mode. This is required if multiple processes want to access the same database file at the same time. - -## Usage - -In order to make use of this handler and connect to the DuckDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE duckdb_datasource -WITH - engine = 'duckdb', - parameters = { - "database": "db.duckdb" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM duckdb_datasource.my_table; -``` diff --git a/docs/integrations/data-integrations/edgelessdb.mdx b/docs/integrations/data-integrations/edgelessdb.mdx deleted file mode 100644 index a1c9b8ef69a..00000000000 --- a/docs/integrations/data-integrations/edgelessdb.mdx +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: EdgelessDB -sidebarTitle: EdgelessDB ---- - -This is the implementation of the EdgelessDB data handler for MindsDB. - -[Edgeless](https://edgeless.systems/) is a full SQL database, tailor-made for confidential computing. It seamlessly integrates with your existing tools and workflows to help you unlock the full potential of your data. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect EdgelessDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to EdgelessDB. - -## Implementation - -This handler is implemented by extending the MySQL connector. -The required arguments to establish a connection are as follows: - -* `host`: the host name of the EdgelessDB connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name - -To use the full potensial of EdgelessDB, you can also specify the following arguments: - -* `ssl`: whether to use SSL or not -* `ssl_ca`: path or url to the CA certificate -* `ssl_cert`: path or url to the client certificate -* `ssl_key`: path or url to the client key - -## Usage - -In order to use EdgelessDB as a data source in MindsDB, you need to use the following syntax: - -```sql -CREATE DATABASE edgelessdb_datasource - WITH ENGINE = "EdgelessDB", - PARAMETERS = { - "user": "root", - "password": "test123@!Aabvhj", - "host": "localhost", - "port": 3306, - "database": "test_schema" - } -``` -Or you can use the following syntax: - -```sql - -CREATE DATABASE edgelessdb_datasource2 - WITH ENGINE = "EdgelessDB", - PARAMETERS = { - "user": "root", - "password": "test123@!Aabvhj", - "host": "localhost", - "port": 3306, - "database": "test_schema", - "ssl_cert": "/home/marios/demo/cert.pem", - "ssl_key": "/home/marios/demo/key.pem" - } -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * FROM edgelessdb_datasource.table_name -``` diff --git a/docs/integrations/data-integrations/elasticsearch.mdx b/docs/integrations/data-integrations/elasticsearch.mdx deleted file mode 100644 index b672ad22b99..00000000000 --- a/docs/integrations/data-integrations/elasticsearch.mdx +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: ElasticSearch -sidebarTitle: ElasticSearch ---- - -This documentation describes the integration of MindsDB with [ElasticSearch](https://www.elastic.co/), a distributed, multitenant-capable full-text search engine with an HTTP web interface and schema-free JSON documents.. -The integration allows MindsDB to access data from ElasticSearch and enhance ElasticSearch with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect ElasticSearch to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to ElasticSearch. - -## Connection - -Establish a connection to ElasticSearch from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/elasticsearch_handler) as an engine. - -```sql -CREATE DATABASE elasticsearch_datasource -WITH ENGINE = 'elasticsearch', -PARAMETERS={ - 'cloud_id': 'xyz', -- optional, if hosts are provided - 'hosts': 'https://xyz.xyz.gcp.cloud.es.io:123', -- optional, if cloud_id is provided - 'api_key': 'xyz', -- optional, if user and password are provided - 'user': 'elastic', -- optional, if api_key is provided - 'password': 'xyz' -- optional, if api_key is provided -}; -``` - -The connection parameters include the following: - -* `cloud_id`: The Cloud ID provided with the ElasticSearch deployment. Required only when `hosts` is not provided. -* `hosts`: The ElasticSearch endpoint provided with the ElasticSearch deployment. Required only when `cloud_id` is not provided. -* `api_key`: The API key that you generated for the ElasticSearch deployment. Required only when `user` and `password` are not provided. -* `user` and `password`: The user and password used to authenticate. Required only when `api_key` is not provided. - - -If you want to connect to the local instance of ElasticSearch, use the below statement: - -```sql -CREATE DATABASE elasticsearch_datasource -WITH ENGINE = 'elasticsearch', -PARAMETERS = { - "hosts": "127.0.0.1:9200", - "user": "user", - "password": "password" -}; -``` - -Required connection parameters include the following (at least one of these parameters should be provided): - -* `hosts`: The IP address and port where ElasticSearch is deployed. -* `user`: The user used to autheticate access. -* `password`: The password used to autheticate access. - - -## Usage - -Retrieve data from a specified index by providing the integration name and index name: - -```sql -SELECT * -FROM elasticsearch_datasource.my_index -LIMIT 10; -``` - - -The above examples utilize `elasticsearch_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - - -At the moment, the Elasticsearch SQL API has certain limitations that have an impact on the queries that can be issued via MindsDB. The most notable of these limitations are listed below: -1. Only `SELECT` queries are supported at the moment. -2. Array fields are not supported. -3. Nested fields cannot be queried directly. However, they can be accessed using the `.` operator. - -For a detailed guide on the limitations of the Elasticsearch SQL API, refer to the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/sql-limitations.html). - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Elasticsearch server. -* **Checklist**: - 1. Make sure the Elasticsearch server is active. - 2. Confirm that server, cloud ID and credentials are correct. - 3. Ensure a stable network between MindsDB and Elasticsearch. - - - -`Transport Error` or `Request Error` - -* **Symptoms**: Errors related to the issuing of unsupported queries to Elasticsearch. -* **Checklist**: - 1. Ensure the query is a `SELECT` query. - 2. Avoid querying array fields. - 3. Access nested fields using the `.` operator. - 4. Refer to the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/sql-limitations.html) for more information if needed. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing index names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - - -This [troubleshooting guide](https://www.elastic.co/guide/en/elasticsearch/reference/current/troubleshooting.html) provided by Elasticsearch might also be helpful. diff --git a/docs/integrations/data-integrations/firebird.mdx b/docs/integrations/data-integrations/firebird.mdx deleted file mode 100644 index 4028e49dcf4..00000000000 --- a/docs/integrations/data-integrations/firebird.mdx +++ /dev/null @@ -1,50 +0,0 @@ ---- -title: Firebird -sidebarTitle: Firebird ---- - -This is the implementation of the Firebird data handler for MindsDB. - -[Firebird](https://firebirdsql.org/en/about-firebird/) is a relational database offering many ANSI SQL standard features that runs on Linux, Windows, and a variety of Unix platforms. Firebird offers excellent concurrency, high performance, and powerful language support for stored procedures and triggers. It has been used in production systems, under a variety of names, since 1981. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Firebird to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Firebird. - -## Implementation - -This handler is implemented using the `fdb` library, the Python driver for Firebird. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the Firebird server. -* `database` is the port to use when connecting with the Firebird server. -* `user` is the username to authenticate the user with the Firebird server. -* `password` is the password to authenticate the user with the Firebird server. - -## Usage - -In order to make use of this handler and connect to the Firebird server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE firebird_datasource -WITH - engine = 'firebird', - parameters = { - "host": "localhost", - "database": "C:\Users\minura\Documents\mindsdb\example.fdb", - "user": "sysdba", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM firebird_datasource.example_tbl; -``` diff --git a/docs/integrations/data-integrations/google-bigquery.mdx b/docs/integrations/data-integrations/google-bigquery.mdx deleted file mode 100644 index 69f3fd43d02..00000000000 --- a/docs/integrations/data-integrations/google-bigquery.mdx +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: Google BigQuery -sidebarTitle: Google BigQuery ---- - -This documentation describes the integration of MindsDB with [Google BigQuery](https://cloud.google.com/bigquery?hl=en), a fully managed, AI-ready data analytics platform that helps you maximize value from your data. -The integration allows MindsDB to access data stored in the BigQuery warehouse and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect BigQuery to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your BigQuery warehouse from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE bigquery_datasource -WITH - engine = "bigquery", - parameters = { - "project_id": "bgtest-1111", - "dataset": "mydataset", - "service_account_keys": "/tmp/keys.json" - }; -``` - -Required connection parameters include the following: - -- `project_id`: The globally unique identifier for your project in Google Cloud where BigQuery is located. -- `dataset`: The default dataset to connect to. - -Optional connection parameters include the following: - -- `service_account_keys`: The full path to the service account key file. -- `service_account_json`: The content of a JSON file defined by the `service_account_keys` parameter. - - - One of `service_account_keys` or `service_account_json` has to be provided to - establish a connection to BigQuery. - - -## Usage - -Retrieve data from a specified table in the default dataset by providing the integration name and table name: - -```sql -SELECT * -FROM bigquery_datasource.table_name -LIMIT 10; -``` - -Retrieve data from a specified table in a different dataset by providing the integration name, dataset name and table name: - -```sql -SELECT * -FROM bigquery_datasource.dataset_name.table_name -LIMIT 10; -``` - -Run SQL in any supported BigQuery dialect directly on the connected BigQuery database: - -```sql -SELECT * FROM bigquery_datasource ( - - --Native Query Goes Here - SELECT * - FROM t1 - WHERE t1.a IN (SELECT t2.a - FROM t2 FOR SYSTEM_TIME AS OF t1.timestamp_column); - -); -``` - - - The above examples utilize `bigquery_datasource` as the datasource name, which - is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -- **Symptoms**: Failure to connect MindsDB with the BigQuery warehouse. -- **Checklist**: - 1. Make sure that the Google Cloud account is active and the Google BigQuery service is enabled. - 2. Confirm that the project ID, dataset and service account credentials are correct. Try a direct BigQuery connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and Google BigQuery. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -- **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -- **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - Examples: - _ Incorrect: SELECT _ FROM integration.travel data - _ Incorrect: SELECT _ FROM integration.'travel data' - _ Correct: SELECT _ FROM integration.\`travel data\` - diff --git a/docs/integrations/data-integrations/google-cloud-sql.mdx b/docs/integrations/data-integrations/google-cloud-sql.mdx deleted file mode 100644 index bc14e499c0f..00000000000 --- a/docs/integrations/data-integrations/google-cloud-sql.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Google Cloud SQL -sidebarTitle: Google Cloud SQL ---- - -This is the implementation of the Google Cloud SQL data handler for MindsDB. - -[Cloud SQL](https://cloud.google.com/sql) is a fully-managed database service that makes it easy to set up, maintain, manage, and administer your relational PostgreSQL, MySQL, and SQL Server databases in the cloud. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Google Cloud SQL to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Google Cloud SQL. - -## Implementation - -This handler was implemented using the existing MindsDB handlers for MySQL, PostgreSQL and SQL Server. - -The required arguments to establish a connection are, -* `host`: the host name or IP address of the Google Cloud SQL instance. -* `port`: the TCP/IP port of the Google Cloud SQL instance. -* `user`: the username used to authenticate with the Google Cloud SQL instance. -* `password`: the password to authenticate the user with the Google Cloud SQL instance. -* `database`: the database name to use when connecting with the Google Cloud SQL instance. -* `db_engine`: the database engine of the Google Cloud SQL instance. This can take one of three values: 'mysql', 'postgresql' or 'mssql'. - -## Usage - -In order to make use of this handler and connect to the Google Cloud SQL instance, you need to create a datasource with the following syntax: - -```sql -CREATE DATABASE cloud_sql_mysql_datasource -WITH ENGINE = 'cloud_sql', -PARAMETERS = { - "db_engine": "mysql", - "host": "53.170.61.16", - "port": 3306, - "user": "admin", - "password": "password", - "database": "example_db" -}; -``` -To successfully connect to the Google Cloud SQL instance you have to make sure that the IP address of the machine you are using to connect is added to the authorized networks of the Google Cloud SQL instance. You can do this by following the steps below: - -1. Go to the [Cloud SQL Instances](https://console.cloud.google.com/sql/instances) page. -2. Click on the instance you want to add authorized networks to. -3. Click on the **Connections** tab. -4. Click on **Networking** tab. -5. Click on **Add network**. -5. Enter the IP address of the machine you want to connect from. - -If you are using MindsDB cloud version you can use the following IP address: ` -18.220.205.95 -3.19.152.46 -52.14.91.162 -` - -You can use this established connection to query your table as follows: - -```sql -SELECT * FROM cloud_sql_mysql_datasource.example_tbl; -``` diff --git a/docs/integrations/data-integrations/google-sheets.mdx b/docs/integrations/data-integrations/google-sheets.mdx deleted file mode 100644 index 3803cbcea63..00000000000 --- a/docs/integrations/data-integrations/google-sheets.mdx +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: Google Sheets -sidebarTitle: Google Sheets ---- - -This is the implementation of the Google Sheets data handler for MindsDB. - -[Google Sheets](https://www.google.com/sheets/about/) is a spreadsheet program included as a part of the free, web-based Google Docs Editors suite offered by Google. - - -Please note that the integration of MindsDB with Google Sheets works for public sheets only. - - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Google Sheets to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Google Sheets. - -## Implementation - -This handler is implemented using `duckdb`, a library that allows SQL queries to be executed on `pandas` DataFrames. - -In essence, when querying a particular sheet, the entire sheet is first pulled into a `pandas` DataFrame using the [Google Visualization API](https://developers.google.com/chart/interactive/docs/reference). Once this is done, SQL queries can be run on the DataFrame using `duckdb`. - - -Since the entire sheet needs to be pulled into memory first (DataFrame), it is recommended to be somewhat careful when querying large datasets so as not to overload your machine. - - -The required arguments to establish a connection are as follows: - -* `spreadsheet_id` is the unique ID of the Google Sheet. -* `sheet_name` is the name of the sheet within the Google Sheet. - -## Usage - -In order to make use of this handler and connect to a Google Sheet in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE sheets_datasource -WITH - engine = 'sheets', - parameters = { - "spreadsheet_id": "12wgS-1KJ9ymUM-6VYzQ0nJYGitONxay7cMKLnEE2_d0", - "sheet_name": "iris" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM sheets_datasource.example_tbl; -``` - -The name of the table will be the name of the relevant sheet, provided as an input to the `sheet_name` parameter. - - -At the moment, only the `SELECT` statemet is allowed to be executed through `duckdb`. This, however, has no restriction on running machine learning algorithms against your data in Google Sheets using the `CREATE MODEL` statement. - diff --git a/docs/integrations/data-integrations/greptimedb.mdx b/docs/integrations/data-integrations/greptimedb.mdx deleted file mode 100644 index 41438d4fdf8..00000000000 --- a/docs/integrations/data-integrations/greptimedb.mdx +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: GreptimeDB -sidebarTitle: GreptimeDB ---- - -This is the implementation of the GreptimeDB data handler for MindsDB. - -[GreptimeDB](https://greptime.com/) is an open-source, cloud-native time series database features analytical capabilities, scalebility and open protocols support. - -## Implementation - -This handler is implemented by extending the MySQLHandler. - -Connect GreptimeDB to MindsDB by providing the following parameters: - -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. -* `user` is the database user. -* `password` is the database password. - -There are several optional parameters that can be used as well. - -* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`). -* `ssl_ca` is the SSL Certificate Authority. -* `ssl_cert` stores SSL certificates. -* `ssl_key` stores SSL keys. - -## Usage - -In order to make use of this handler and connect to the GreptimeDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE greptimedb_datasource -WITH - engine = 'greptimedb', - parameters = { - "host": "127.0.0.1", - "port": 4002, - "database": "public", - "user": "username", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows. - -```sql -SELECT * -FROM greptimedb_datasource.example_table; -``` diff --git a/docs/integrations/data-integrations/ibm-db2.mdx b/docs/integrations/data-integrations/ibm-db2.mdx deleted file mode 100644 index 0b29be1b38d..00000000000 --- a/docs/integrations/data-integrations/ibm-db2.mdx +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: IBM Db2 -sidebarTitle: IBM Db2 ---- - -This documentation describes the integration of MindsDB with [IBM Db2](https://www.ibm.com/db2), the cloud-native database built to power low-latency transactions, real-time analytics and AI applications at scale. -The integration allows MindsDB to access data stored in the IBM Db2 database and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect IBM Db2 to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your IBM Db2 database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE db2_datasource -WITH - engine = 'db2', - parameters = { - "host": "127.0.0.1", - "user": "db2inst1", - "password": "password", - "database": "example_db" - }; -``` - -Required connection parameters include the following: - -* `host`: The hostname, IP address, or URL of the IBM Db2 database. -* `user`: The username for the IBM Db2 database. -* `password`: The password for the IBM Db2 database. -* `database`: The name of the IBM Db2 database to connect to. - -Optional connection parameters include the following: - -* `port`: The port number for connecting to the IBM Db2 database. Default is `50000`. -* `schema`: The database schema to use within the IBM Db2 database. - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM db2_datasource.schema_name.table_name -LIMIT 10; -``` - -Run IBM Db2 native queries directly on the connected database: - -```sql -SELECT * FROM db2_datasource ( - - --Native Query Goes Here - WITH - DINFO (DEPTNO, AVGSALARY, EMPCOUNT) AS - (SELECT OTHERS.WORKDEPT, AVG(OTHERS.SALARY), COUNT(*) - FROM EMPLOYEE OTHERS - GROUP BY OTHERS.WORKDEPT - ), - DINFOMAX AS - (SELECT MAX(AVGSALARY) AS AVGMAX FROM DINFO) - SELECT THIS_EMP.EMPNO, THIS_EMP.SALARY, - DINFO.AVGSALARY, DINFO.EMPCOUNT, DINFOMAX.AVGMAX - FROM EMPLOYEE THIS_EMP, DINFO, DINFOMAX - WHERE THIS_EMP.JOB = 'SALESREP' - AND THIS_EMP.WORKDEPT = DINFO.DEPTNO - -); -``` - - -The above examples utilize `db2_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the IBM Db2 database. -* **Checklist**: - 1. Make sure the IBM Db2 database is active. - 2. Confirm that host, user, password and database are correct. Try a direct connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and the IBM Db2 database. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - - -This [guide](https://www.ibm.com/docs/en/db2/11.5?topic=connect-common-db2-problems) of common connection Db2 connection issues provided by IBM might also be helpful. diff --git a/docs/integrations/data-integrations/ibm-informix.mdx b/docs/integrations/data-integrations/ibm-informix.mdx deleted file mode 100644 index 321a604ff0b..00000000000 --- a/docs/integrations/data-integrations/ibm-informix.mdx +++ /dev/null @@ -1,139 +0,0 @@ ---- -title: IBM Informix -sidebarTitle: IBM Informix ---- - -This is the implementation of the IBM Informix data handler for MindsDB. - -[IBM Informix](https://www.ibm.com/products/informix) is a product family within IBM's Information Management division that is centered on several relational database management system (RDBMS) offerings. The Informix server supports object–relational models and (through extensions) data types that are not a part of the SQL standard. The most widely used of these are the JSON, BSON, time series, and spatial extensions, which provide both data type support and language extensions that permit high-performance domain-specific queries and efficient storage for data sets based on semi-structured, time series, and spatial data. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect IBM Informix to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to IBM Informix. - -## Implementation - -This handler is implemented using `IfxPy/IfxPyDbi`, a Python library that allows you to use Python code to run SQL commands on the Informix database. - -The required arguments to establish a connection are as follows: - -* `user` is the username associated with database. -* `password` is the password to authenticate your access. -* `host` is the hostname or IP address of the server. -* `port` is the port through which TCP/IP connection is to be made. -* `database` is the database name to be connected. -* `schema_name` is the schema name to get tables. -* `server` is the name of server you want connect. -* `logging_enabled` defines whether logging is enabled or not. Defaults to `True` if not provided. - -## Usage - -In order to make use of this handler and connect to the Informix database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE informix_datasource -WITH - engine='informix', - parameters={ - "server": "server", - "host": "127.0.0.1", - "port": 9091, - "user": "informix", - "password": "in4mix", - "database": "stores_demo", - "schema_name": "love", - "loging_enabled": False - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM informix_datasource.items; -``` - - -This integration uses `IfxPy`. As it is in development stage, it can be install using `pip install IfxPy`. However, it doesn't work with higher versions of Python, therefore, you have to build it from source. - - - - - -1. This code downloads and extracts the `onedb-ODBC` driver used to make connection: -```bash -cd $HOME -mkdir Informix -cd Informix -mkdir -p home/informix/cli -wget https://hcl-onedb.github.io/odbc/OneDB-Linux64-ODBC-Driver.tar -sudo tar xvf OneDB-Linux64-ODBC-Driver.tar -C ./home/informix/cli -rm OneDB-Linux64-ODBC-Driver.tar -``` - -2. Add enviroment variables in the `.bashrc` file: -```bash -export INFORMIXDIR=$HOME/Informix/home/informix/cli/onedb-odbc-driver -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}${INFORMIXDIR}/lib:${INFORMIXDIR}/lib/esql:${INFORMIXDIR}/lib/cli -``` - -3. This code clones the `IfxPy` repo, builds a wheel, and installs it: -```bash -pip install wheel -mkdir Temp -cd Temp -git clone https://github.com/OpenInformix/IfxPy.git -cd IfxPy/IfxPy -python setup.py bdist_wheel -pip install --find-links=./dist IfxPy -cd .. -cd .. -cd .. -rm -rf Temp -``` - - - - - -1. This code downloads and extracts the `onedb-ODBC` driver used to make connection: -```bash -cd $HOME -mkdir Informix -cd Informix -mkdir /home/informix/cli -wget https://hcl-onedb.github.io/odbc/OneDB-Win64-ODBC-Driver.zip -tar xvf OneDB-Win64-ODBC-Driver.zip -C ./home/informix/cli -del OneDB-Win64-ODBC-Driver.zip -``` - -2. Add an enviroment variable: -```bash -set INFORMIXDIR=$HOME/Informix/home/informix/cli/onedb-odbc-driver -``` - -3. Add `%INFORMIXDIR%\bin` to the PATH environment variable. - -4. This code clones the `IfxPy` repo, builds a wheel, and installs it: -```bash -pip install wheel -mkdir Temp -cd Temp -git clone https://github.com/OpenInformix/IfxPy.git -cd IfxPy/IfxPy -python setup.py bdist_wheel -pip install --find-links=./dist IfxPy -cd .. -cd .. -cd .. -rmdir Temp -``` - - - - - diff --git a/docs/integrations/data-integrations/influxdb.mdx b/docs/integrations/data-integrations/influxdb.mdx deleted file mode 100644 index a3f85bd9c73..00000000000 --- a/docs/integrations/data-integrations/influxdb.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: InfluxDB -sidebarTitle: InfluxDB ---- - -This is the implementation of the InfluxDB data handler for MindsDB. - -[InfluxDB](https://www.influxdata.com/) is a time series database that can be used to collect data and monitor the system and devices, especially Edge devices. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect InfluxDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to InfluxDB. - -## Implementation - -The required arguments to establish a connection are as follows: - -* `influxdb_url` is the hosted URL of InfluxDB Cloud. -* `influxdb_token` is the authentication token for the hosted InfluxDB Cloud instance. -* `influxdb_db_name` is the database name of the InfluxDB Cloud instance. -* `influxdb_table_name` is the table name of the InfluxDB Cloud instance. - - -Please follow [this link](https://docs.influxdata.com/influxdb/cloud/security/tokens/create-token/#create-a-token-in-the-influxdb-ui) to generate token for accessing InfluxDB API. - - -## Usage - -In order to make use of this handler and connect to the InfluxDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE influxdb_source -WITH - ENGINE = 'influxdb', - PARAMETERS = { - "influxdb_url": "", - "influxdb_token": "", - "influxdb_table_name": "" - }; -``` - -You can use this established connection to query your table as follows. - -```sql -SELECT name, time, sensor_id, temperature -FROM influxdb_source.tables -ORDER BY temperature DESC -LIMIT 65; -``` diff --git a/docs/integrations/data-integrations/mariadb.mdx b/docs/integrations/data-integrations/mariadb.mdx deleted file mode 100644 index 046aec3c4d7..00000000000 --- a/docs/integrations/data-integrations/mariadb.mdx +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: MariaDB -sidebarTitle: MariaDB ---- - -This documentation describes the integration of MindsDB with [MariaDB](https://mariadb.org/), one of the most popular open source relational databases. -The integration allows MindsDB to access data from MariaDB and enhance MariaDB with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect MariaDB to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to MariaDB from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mariadb_handler) as an engine. - -```sql -CREATE DATABASE mariadb_conn -WITH ENGINE = 'mariadb', -PARAMETERS = { - "host": "host-name", - "port": 3307, - "database": "db-name", - "user": "user-name", - "password": "password" -}; -``` - -Or: - -```sql -CREATE DATABASE mariadb_conn -WITH - ENGINE = 'mariadb', - PARAMETERS = { - "url": "mariadb://user-name@host-name:3307" - }; -``` - -Required connection parameters include the following: - -* `user`: The username for the MariaDB database. -* `password`: The password for the MariaDB database. -* `host`: The hostname, IP address, or URL of the MariaDB server. -* `port`: The port number for connecting to the MariaDB server. -* `database`: The name of the MariaDB database to connect to. - -Or: - -* `url`: You can specify a connection to MariaDB Server using a URI-like string, as an alternative connection option. You can also use `mysql://` as the protocol prefix - -Optional connection parameters include the following: - - * `ssl`: Boolean parameter that indicates whether SSL encryption is enabled for the connection. Set to True to enable SSL and enhance connection security, or set to False to use the default non-encrypted connection. - * `ssl_ca`: Specifies the path to the Certificate Authority (CA) file in PEM format. - * `ssl_cert`: Specifies the path to the SSL certificate file. This certificate should be signed by a trusted CA specified in the `ssl_ca` file or be a self-signed certificate trusted by the server. - * `ssl_key`: Specifies the path to the private key file (in PEM format). - * `use_pure` (`True` by default): Whether to use pure Python or C Extension. If `use_pure=False` and the C Extension is not available, then Connector/Python will automatically fall back to the pure Python implementation. - -## Usage - -The following usage examples utilize the connection to MariaDB made via the `CREATE DATABASE` statement and named `mariadb_conn`. - -Retrieve data from a specified table by providing the integration and table name. - -```sql -SELECT * -FROM mariadb_conn.table_name -LIMIT 10; -``` - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the MariaDB database. -* **Checklist**: - 1. Ensure that the MariaDB server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct MySQL connection. - 3. Test the network connection between the MindsDB host and the MariaDB server. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces, reserved words or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/docs/integrations/data-integrations/matrixone.mdx b/docs/integrations/data-integrations/matrixone.mdx deleted file mode 100644 index 9accaf58af5..00000000000 --- a/docs/integrations/data-integrations/matrixone.mdx +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: MatrixOne -sidebarTitle: MatrixOne ---- - -This is the implementation of the MatrixOne data handler for MindsDB. - -[MatrixOne](https://github.com/matrixorigin/matrixone) is a future-oriented hyper-converged cloud and edge native DBMS that supports transactional, analytical, and streaming workloads with a simplified and distributed database engine, across multiple data centers, clouds, edges, and other heterogeneous infrastructures. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect MatrixOne to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to MatrixOne. - -## Implementation - -This handler is implemented using `PyMySQL`, a Python library that allows you to use Python code to run SQL commands on the MatrixOne database. - -The required arguments to establish a connection are as follows: - -* `user` is the username associated with the database. -* `password` is the password to authenticate your access. -* `host` is the hostname or IP address of the database. -* `port` is the port through which TCP/IP connection is to be made. -* `database` is the database name to be connected. - -There are several optional arguments that can be used as well. - -* `ssl` indicates whether SSL is enabled (`True`) or disabled (`False`). -* `ssl_ca` is the SSL Certificate Authority. -* `ssl_cert` stores the SSL certificates. -* `ssl_key` stores the SSL keys. - -## Usage - -In order to make use of this handler and connect to the MatrixOne database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE matrixone_datasource -WITH - engine = 'matrixone', - parameters = { - "user": "dump", - "password": "111", - "host": "127.0.0.1", - "port": 6001, - "database": "mo_catalog" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM Matrixone_datasource.demo; -``` diff --git a/docs/integrations/data-integrations/microsoft-access.mdx b/docs/integrations/data-integrations/microsoft-access.mdx deleted file mode 100644 index dabc8e476b1..00000000000 --- a/docs/integrations/data-integrations/microsoft-access.mdx +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Microsoft Access -sidebarTitle: Microsoft Access ---- - -This is the implementation of the Microsoft Access data handler for MindsDB. - -[Microsoft Access](https://www.microsoft.com/en-us/microsoft-365/access) is a pseudo-relational database engine from Microsoft. It is part of the Microsoft Office suite of applications that also includes Word, Outlook, and Excel, among others. Access is also available for purchase as a stand-alone product. It uses the Jet Database Engine for data storage. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Microsoft Access to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Microsoft Access. - -## Implementation - -This handler is implemented using `pyodbc`, the Python ODBC bridge. - -The only required argument to establish a connection is `db_file` that points to a database file to be queried. - -## Usage - -In order to make use of this handler and connect to the Access database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE access_datasource -WITH - engine = 'access', - parameters = { - "db_file":"C:\\Users\\minurap\\Documents\\example_db.accdb" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM access_datasource.example_tbl; -``` diff --git a/docs/integrations/data-integrations/microsoft-sql-server.mdx b/docs/integrations/data-integrations/microsoft-sql-server.mdx deleted file mode 100644 index 7c323478b79..00000000000 --- a/docs/integrations/data-integrations/microsoft-sql-server.mdx +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: Microsoft SQL Server -sidebarTitle: Microsoft SQL Server ---- - -This documentation describes the integration of MindsDB with Microsoft SQL Server, a relational database management system developed by Microsoft. -The integration allows for advanced SQL functionalities, extending Microsoft SQL Server's capabilities with MindsDB's features. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB [locally via Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or use [MindsDB Cloud](https://cloud.mindsdb.com/). -2. To connect Microsoft SQL Server to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -### Installation - -The MSSQL handler supports two connection methods: - -#### Option 1: Standard Connection (pymssql - Recommended) - -```bash -pip install mindsdb[mssql] -``` - -This installs `pymssql`, which provides native FreeTDS-based connections. Works on all platforms. - -#### Option 2: ODBC Connection (pyodbc) - -```bash -pip install mindsdb[mssql-odbc] -``` - -This installs both `pymssql` and `pyodbc` for ODBC driver support. - -**Additional requirements for ODBC:** -- **System ODBC libraries**: On Linux, install `unixodbc` and `unixodbc-dev` - ```bash - sudo apt-get install unixodbc unixodbc-dev - ``` -- **Microsoft ODBC Driver for SQL Server**: - - **Linux**: - ```bash - # Add Microsoft repository - curl https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc - curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list - - # Install ODBC Driver 18 - sudo apt-get update - sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 - ``` - - **macOS**: `brew install msodbcsql18` - - **Windows**: Download from [Microsoft](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server) - -To verify installed drivers: - -```bash -odbcinst -q -d -``` - -## Connection - -Establish a connection to your Microsoft SQL Server database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE mssql_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "127.0.0.1", - "port": 1433, - "user": "sa", - "password": "password", - "database": "master" -}; -``` - -Required connection parameters include the following: - -* `user`: The username for the Microsoft SQL Server. -* `password`: The password for the Microsoft SQL Server. -* `host` The hostname, IP address, or URL of the Microsoft SQL Server. -* `database` The name of the Microsoft SQL Server database to connect to. - -Optional connection parameters include the following: - -* `port`: The port number for connecting to the Microsoft SQL Server. Default is 1433. -* `server`: The server name to connect to. Typically only used with named instances or Azure SQL Database. - -### ODBC Connection - -The handler also supports ODBC connections via `pyodbc` for advanced scenarios like Windows Authentication or specific driver requirements. - - -#### Setup - -1. Install: `pip install mindsdb[mssql-odbc]` -2. Install system ODBC driver (see Installation section above) - -Basic ODBC Connection: - -```sql -CREATE DATABASE mssql_odbc_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "127.0.0.1", - "port": 1433, - "user": "sa", - "password": "password", - "database": "master", - "driver": "ODBC Driver 18 for SQL Server" -- Specifying driver enables ODBC -}; -``` -ODBC-specific Parameters: - -* `driver`: The ODBC driver name (e.g., "ODBC Driver 18 for SQL Server"). When specified, enables ODBC mode. -* `use_odbc`: Set to `true` to explicitly use ODBC. Optional if `driver` is specified. If this is true default driver is set as `ODBC Driver 17 for SQL Server`. -* `encrypt`: Connection encryption: `"yes"` or `"no"`. Driver 18 defaults to `"yes"`. -* `trust_server_certificate`: Whether to trust self-signed certificates: `"yes"` or `"no"`. -* `connection_string_args`: Additional connection string arguments. - -#### Example: Azure SQL Database with Encryption: - -```sql -CREATE DATABASE azure_sql_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "myserver.database.windows.net", - "port": 1433, - "user": "adminuser", - "password": "SecurePass123!", - "database": "mydb", - "driver": "ODBC Driver 18 for SQL Server", - "encrypt": "yes", - "trust_server_certificate": "no" -}; -``` - -#### Example: Local Development (Self-Signed Certificate): - -```sql -CREATE DATABASE local_mssql -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "localhost", - "port": 1433, - "user": "sa", - "password": "YourStrong@Passw0rd", - "database": "testdb", - "driver": "ODBC Driver 18 for SQL Server", - "encrypt": "yes", - "trust_server_certificate": "yes" -- Allow self-signed certs -}; -``` - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM mssql_datasource.schema_name.table_name -LIMIT 10; -``` - -Run T-SQL queries directly on the connected Microsoft SQL Server database: - -```sql -SELECT * FROM mssql_datasource ( - - --Native Query Goes Here - SELECT - SUM(orderqty) total - FROM Product p JOIN SalesOrderDetail sd ON p.productid = sd.productid - JOIN SalesOrderHeader sh ON sd.salesorderid = sh.salesorderid - JOIN Customer c ON sh.customerid = c.customerid - WHERE (Name = 'Racing Socks, L') AND (companyname = 'Riding Cycles'); - -); -``` - - -The above examples utilize `mssql_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -### Performance Optimization for Large Datasets - -The handler is optimized for efficient data processing, but for very large result sets (millions of rows): - -1. **Use SQL Server's filtering**: Apply `WHERE` clauses to filter data on the server side -2. **Use pagination**: Use `TOP`/`OFFSET-FETCH` in SQL Server or `LIMIT` in MindsDB queries -3. **Aggregate when possible**: Use `GROUP BY`, `COUNT()`, `AVG()`, etc. to reduce data volume -4. **Index your tables**: Ensure proper indexes on SQL Server for query performance - -**Example - Paginated Query:** -```sql -SELECT * FROM mssql_datasource ( - SELECT TOP 100000 * - FROM large_table - ORDER BY id - OFFSET 0 ROWS -); -``` - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Microsoft SQL Server database. -* **Checklist**: - 1. Make sure the Microsoft SQL Server is active. - 2. Confirm that host, port, user, and password are correct. Try a direct Microsoft SQL Server connection using a client like SQL Server Management Studio or DBeaver. - 3. Ensure a stable network between MindsDB and Microsoft SQL Server. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - - - -`ODBC Driver Connection Error` - -* **Symptoms**: Errors like "Driver not found", "Can't open lib 'ODBC Driver 17 for SQL Server'", or "pyodbc is not installed". -* **Checklist**: - 1. **Verify pyodbc is installed**: `pip list | grep pyodbc` - 2. **Check system ODBC libraries**: `ldconfig -p | grep odbc` (Linux) should show libodbc.so - 3. **Verify ODBC drivers**: Run `odbcinst -q -d` to list installed drivers - 4. **Match driver name exactly**: Use the exact name from `odbcinst -q -d` (case-sensitive) - 5. **For Driver 18 encryption errors**: Add `"encrypt": "yes", "trust_server_certificate": "yes"` for local/dev servers - 6. **Test connection manually**: - ```python - import pyodbc - print(pyodbc.drivers()) # Should list available drivers - ``` - \ No newline at end of file diff --git a/docs/integrations/data-integrations/monetdb.mdx b/docs/integrations/data-integrations/monetdb.mdx deleted file mode 100644 index d53e77f8e02..00000000000 --- a/docs/integrations/data-integrations/monetdb.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: MonetDB -sidebarTitle: MonetDB ---- - -This is the implementation of the MonetDB data handler for MindsDB. - -[MonetDB](https://www.monetdb.org/) is an open-source column-oriented relational database management system originally developed at the Centrum Wiskunde & Informatica in the Netherlands. It is designed to provide high performance on complex queries against large databases, such as combining tables with hundreds of columns and millions of rows. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect MonetDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to MonetDB. - -## Implementation - -This handler is implemented using `pymonetdb`, a Python library that allows you to use Python code to run SQL commands on the MonetDB database. - -The required arguments to establish a connection are as follows: - -* `user` is the username associated with the database. -* `password` is the password to authenticate your access. -* `host` is the host name or IP address. -* `port` is the port through which TCP/IP connection is to be made. -* `database` is the database name to be connected. -* `schema_name` is the schema name to get tables. It is optional and defaults to the current schema if not provided. - -## Usage - -In order to make use of this handler and connect to the MonetDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE monetdb_datasource -WITH - engine = 'monetdb', - parameters = { - "user": "monetdb", - "password": "monetdb", - "host": "127.0.0.1", - "port": 50000, - "schema_name": "sys", - "database": "demo" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM monetdb_datasource.demo; -``` diff --git a/docs/integrations/data-integrations/mongodb.mdx b/docs/integrations/data-integrations/mongodb.mdx deleted file mode 100644 index 57420ce78ac..00000000000 --- a/docs/integrations/data-integrations/mongodb.mdx +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: MongoDB -sidebarTitle: MongoDB ---- - -This documentation describes the integration of MindsDB with [MongoDB](https://www.mongodb.com/company/what-is-mongodb), a document database with the scalability and flexibility that you want with the querying and indexing that you need. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -## Connection - -Establish a connection to MongoDB from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE mongodb_datasource -WITH - ENGINE = 'mongodb', - PARAMETERS = { - "host": "mongodb+srv://admin:admin_pass@demo.mongodb.net/public" - }; -``` - -Use the following parameters to establish the connection: - -* `host`: The connection string of the MongoDB server that includes user (`admin`), password (`admin_pass`), host and port (`demo.mongodb.net`), and database (`public`). -* `database`: If the connection string does not include the `/database` path, provide it in this parameter. - -Alternatively, the following set of connection parameters can be used: - -* `username`: The username associated with the database. -* `password`: The password to authenticate your access. -* `host`: The host of the MongoDB server. -* `port`: The port through which TCP/IP connection is to be made. -* `database`: The database name to be connected. - -## Usage - -Retrieve data from a specified collection by providing the integration name and collection name: - -```sql -SELECT * -FROM mongodb_datasource.my_collection -LIMIT 10; -``` - - -The above examples utilize `mongodb_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - - -At the moment, this integration only supports `SELECT` and `UPDATE` queries. - - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the MongoDB server. -* **Checklist**: - 1. Make sure the MongoDB server is active. - 2. Confirm that host and credentials provided are correct. Try a direct MongoDB connection using a client like MongoDB Compass. - 3. Ensure a stable network between MindsDB and MongoDB. For example, if you are using MongoDB Atlas, ensure that the IP address of the machine running MindsDB is whitelisted. - - - -`Unknown statement` - -* **Symptoms**: Errors related to the issuing of unsupported queries to MongoDB via the integration. -* **Checklist**: - 1. Ensure the query is a `SELECT` or `UPDATE` query. - - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing collection names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - \ No newline at end of file diff --git a/docs/integrations/data-integrations/mysql.mdx b/docs/integrations/data-integrations/mysql.mdx deleted file mode 100644 index 223677b30b7..00000000000 --- a/docs/integrations/data-integrations/mysql.mdx +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: MySQL -sidebarTitle: MySQL ---- - -This documentation describes the integration of MindsDB with [MySQL](https://www.mysql.com/), a fast, reliable, and scalable open-source database. -The integration allows MindsDB to access data from MySQL and enhance MySQL with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect MySQL to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to MySQL from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mysql_handler) as an engine. - -```sql -CREATE DATABASE mysql_conn -WITH ENGINE = 'mysql', -PARAMETERS = { - "host": "host-name", - "port": 3306, - "database": "db-name", - "user": "user-name", - "password": "password" -}; -``` - -Or: - -```sql -CREATE DATABASE mysql_datasource -WITH - ENGINE = 'mysql', - PARAMETERS = { - "url": "mysql://user-name@host-name:3306" - }; -``` - -Required connection parameters include the following: - -* `user`: The username for the MySQL database. -* `password`: The password for the MySQL database. -* `host`: The hostname, IP address, or URL of the MySQL server. -* `port`: The port number for connecting to the MySQL server. -* `database`: The name of the MySQL database to connect to. - -Or: - -* `url`: You can specify a connection to MySQL Server using a URI-like string, as an alternative connection option. - -Optional connection parameters include the following: - - * `ssl`: Boolean parameter that indicates whether SSL encryption is enabled for the connection. Set to True to enable SSL and enhance connection security, or set to False to use the default non-encrypted connection. - * `ssl_ca`: Specifies the path to the Certificate Authority (CA) file in PEM format. - * `ssl_cert`: Specifies the path to the SSL certificate file. This certificate should be signed by a trusted CA specified in the `ssl_ca` file or be a self-signed certificate trusted by the server. - * `ssl_key`: Specifies the path to the private key file (in PEM format). - * `use_pure` (`True` by default): Whether to use pure Python or C Extension. If `use_pure=False` and the C Extension is not available, then Connector/Python will automatically fall back to the pure Python implementation. - -## Usage - -The following usage examples utilize the connection to MySQL made via the `CREATE DATABASE` statement and named `mysql_conn`. - -Retrieve data from a specified table by providing the integration and table name. - -```sql -SELECT * -FROM mysql_conn.table_name -LIMIT 10; -``` - - -**Next Steps** - -Follow [this tutorial](https://docs.mindsdb.com/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai) to see more use case examples. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the MySQL database. -* **Checklist**: - 1. Ensure that the MySQL server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct MySQL connection. - 3. Test the network connection between the MindsDB host and the MySQL server. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces, reserved words or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/docs/integrations/data-integrations/oceanbase.mdx b/docs/integrations/data-integrations/oceanbase.mdx deleted file mode 100644 index a27effe0c4f..00000000000 --- a/docs/integrations/data-integrations/oceanbase.mdx +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: OceanBase -sidebarTitle: OceanBase ---- - -This is the implementation of the OceanBase data handler for MindsDB. - -OceanBase is a distributed relational database. It is the only distributed database in the world that has broken both TPC-C and TPC-H records. OceanBase adopts an independently developed integrated architecture, which encompasses both the scalability of a distributed architecture and the performance advantage of a centralized architecture. It supports hybrid transaction/analytical processing (HTAP) with one engine. Its features include strong data consistency, high availability, high performance, online scalability, high compatibility with SQL and mainstream relational databases, transparency to applications, and a high cost/performance ratio. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect OceanBase to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to OceanBase. - -## Implementation - -This handler is implemented by extending the MySQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the OceanBase server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE oceanbase_datasource -WITH - ENGINE = 'oceanbase', - PARAMETERS = { - "host": "127.0.0.1", - "user": "oceanbase_user", - "password": "password", - "port": 2881, - "database": "oceanbase_db" - }; -``` - -Now, you can use this established connection to query your database as follows: - -```sql -SELECT * -FROM oceanbase_datasource.demo_table -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/opengauss.mdx b/docs/integrations/data-integrations/opengauss.mdx deleted file mode 100644 index 3aca678e925..00000000000 --- a/docs/integrations/data-integrations/opengauss.mdx +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: OpenGauss -sidebarTitle: OpenGauss ---- - -This is the implementation of the OpenGauss data handler for MindsDB. - -[OpenGauss](https://opengauss.org/en/) is an open-source relational database management system released with the Mulan PSL v2 and the kernel built on Huawei's years of experience in the database field. It continuously provides competitive features tailored to enterprise-grade scenarios. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect OpenGauss to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to OpenGauss. - -## Implementation - -This handler is implemented by extending the PostgreSQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the OpenGauss database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE opengauss_datasource -WITH - ENGINE = 'opengauss', - PARAMETERS = { - "host": "127.0.0.1", - "port": 5432, - "database": "opengauss", - "user": "mindsdb", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM opengauss_datasource.demo_table -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/oracle.mdx b/docs/integrations/data-integrations/oracle.mdx deleted file mode 100644 index bedf9816630..00000000000 --- a/docs/integrations/data-integrations/oracle.mdx +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Oracle -sidebarTitle: Oracle ---- - -This documentation describes the integration of MindsDB with [Oracle](https://www.techopedia.com/definition/8711/oracle-database), one of the most trusted and widely used relational database engines for storing, organizing and retrieving data by type while still maintaining relationships between the various types. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Oracle to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your Oracle database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE oracle_datasource -WITH - ENGINE = 'oracle', - PARAMETERS = { - "host": "localhost", - "service_name": "FREEPDB1", - "user": "SYSTEM", - "password": "password" - }; -``` - -Required connection parameters include the following: - -* `user`: The username for the Oracle database. -* `password`: The password for the Oracle database. - -* `dsn`: The data source name (DSN) for the Oracle database. -OR -* `host`: The hostname, IP address, or URL of the Oracle server. -AND -* `sid`: The system identifier (SID) of the Oracle database. -OR -* `service_name`: The service name of the Oracle database. - -Optional connection parameters include the following: - -* `port`: The port number for connecting to the Oracle database. Default is 1521. -* `disable_oob`: The boolean parameter to disable out-of-band breaks. Default is `false`. -* `auth_mode`: The authorization mode to use. -* `thick_mode`: Set to `true` to use thick mode for the connection. Thin mode is used by default. -* `oracle_client_lib_dir`: The directory path where Oracle Client libraries are located. Required if `thick_mode` is set to `true`. - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM oracle_datasource.schema_name.table_name -LIMIT 10; -``` - -Run PL/SQL queries directly on the connected Oracle database: - -```sql -SELECT * FROM oracle_datasource ( - - --Native Query Goes Here - SELECT employee_id, first_name, last_name, email, hire_date - FROM oracle_datasource.hr.employees - WHERE department_id = 10 - ORDER BY hire_date DESC; - -); -``` - - -The above examples utilize `oracle_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Oracle database. -* **Checklist**: - 1. Make sure the Oracle database is active. - 2. Confirm that the connection parameters provided (DSN, host, SID, service_name) and the credentials (user, password) are correct. - 3. Ensure a stable network between MindsDB and Oracle. -* **Symptoms**: Connection timeout errors. -* **Checklist**: - 1. Verify that the Oracle database is reachable from the MindsDB server. - 2. Check for any firewall or network restrictions that might be causing delays. -* **Symptoms**: Can't connect to db: Failed to initialize Oracle client: DPI-1047: Cannot locate a 64-bit Oracle Client library: -* **Checklist**: - 1. Ensure that the Oracle Client libraries are installed on the MindsDB server. - 2. Verify that the `oracle_client_lib_dir` parameter is set correctly in the connection configuration. - 3. Check that the installed Oracle Client libraries match the architecture (64-bit) of the MindsDB server. - - -This [troubleshooting guide](https://docs.oracle.com/en/database/oracle/oracle-database/19/ntqrf/database-connection-issues.html) provided by Oracle might also be helpful. diff --git a/docs/integrations/data-integrations/orioledb.mdx b/docs/integrations/data-integrations/orioledb.mdx deleted file mode 100644 index 7227273e774..00000000000 --- a/docs/integrations/data-integrations/orioledb.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: OrioleDB -sidebarTitle: OrioleDB ---- - -This is the implementation of the OrioleDB data handler for MindsDB. - -[OrioleDB](https://www.orioledata.com/) is a new storage engine for PostgreSQL, bringing a modern approach to database capacity, capabilities, and performance to the world's most-loved database platform. It consists of an extension, building on the innovative table access method framework and other standard Postgres extension interfaces. By extending and enhancing the current table access methods, OrioleDB opens the door to a future of more powerful storage models that are optimized for cloud and modern hardware architectures. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect OrioleDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to OrioleDB. - -## Implementation - -This handler is implemented by extending the PostgreSQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `server` is the OrioleDB server. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the OrioleDB server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE orioledb_datasource -WITH ENGINE = 'orioledb', -PARAMETERS = { - "user": "orioledb_user", - "password": "password", - "host": "127.0.0.1", - "port": 55505, - "server": "server_name", - "database": "oriole_db" -}; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM orioledb_data.demo_table -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/planetscale.mdx b/docs/integrations/data-integrations/planetscale.mdx deleted file mode 100644 index 40b24fae4c1..00000000000 --- a/docs/integrations/data-integrations/planetscale.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: PlanetScale -sidebarTitle: PlanetScale ---- - -This is the implementation of the PlanetScale data handler for MindsDB. - -[PlanetScale](https://planetscale.com/) is a MySQL-compatible, serverless database platform. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect PlanetScale to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to PlanetScale. - -## Implementation - -This handler is implemented by extending the MySQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the PlanetScale database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE planetscale_datasource -WITH - ENGINE = 'planet_scale', - PARAMETERS = { - "host": "127.0.0.1", - "port": 3306, - "user": "planetscale_user", - "password": "password", - "database": "planetscale_db" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM planetscale_datasource.my_table; -``` diff --git a/docs/integrations/data-integrations/postgresql.mdx b/docs/integrations/data-integrations/postgresql.mdx deleted file mode 100644 index b94225ba593..00000000000 --- a/docs/integrations/data-integrations/postgresql.mdx +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: PostgreSQL -sidebarTitle: PostgreSQL ---- - -This documentation describes the integration of MindsDB with [PostgreSQL](https://www.postgresql.org/), a powerful, open-source, object-relational database system. -The integration allows MindsDB to access data stored in the PostgreSQL database and enhance PostgreSQL with AI capabilities. - - -This data source integration is thread-safe, utilizing a connection pool where each thread is assigned its own connection. When handling requests in parallel, threads retrieve connections from the pool as needed. - - -### Prerequisites - -Before proceeding, ensure the following prerequisites are met: - - 1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). - 2. To connect PostgreSQL to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your PostgreSQL database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE postgresql_conn -WITH ENGINE = 'postgres', -PARAMETERS = { - "host": "127.0.0.1", - "port": 5432, - "database": "postgres", - "user": "postgres", - "schema": "data", - "password": "password" -}; -``` - -Required connection parameters include the following: - -* `user`: The username for the PostgreSQL database. -* `password`: The password for the PostgreSQL database. -* `host`: The hostname, IP address, or URL of the PostgreSQL server. -* `port`: The port number for connecting to the PostgreSQL server. -* `database`: The name of the PostgreSQL database to connect to. - -Optional connection parameters include the following: - -* `schema`: The database schema to use. Default is public. -* `sslmode`: The SSL mode for the connection. -* `connection_parameters`: allows passing any PostgreSQL libpq parameters, such as: - * SSL settings: sslrootcert, sslcert, sslkey, sslcrl, sslpassword - * Network and reliability options: connect_timeout, keepalives, keepalives_idle, keepalives_interval, keepalives_count - * Session options: application_name, options, client_encoding - * Any other libpq-supported parameter - -## Usage - -The following usage examples utilize the connection to PostgreSQL made via the `CREATE DATABASE` statement and named `postgresql_conn`. - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM postgresql_conn.table_name -LIMIT 10; -``` - -Run PostgreSQL-native queries directly on the connected PostgreSQL database: - -```sql -SELECT * FROM postgresql_conn ( - - --Native Query Goes Here - SELECT - model, - COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell, - ROUND((CAST(tax AS decimal) / price), 3) AS tax_div_price - FROM used_car_price - -); -``` - - -**Next Steps** - -Follow [this tutorial](https://docs.mindsdb.com/use-cases/predictive_analytics/house-sales-forecasting) to see more use case examples. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the PostgreSQL database. -* **Checklist**: - 1. Make sure the PostgreSQL server is active. - 2. Confirm that host, port, user, schema, and password are correct. Try a direct PostgreSQL connection. - 3. Ensure a stable network between MindsDB and PostgreSQL. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/docs/integrations/data-integrations/questdb.mdx b/docs/integrations/data-integrations/questdb.mdx deleted file mode 100644 index 8b600480b09..00000000000 --- a/docs/integrations/data-integrations/questdb.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: QuestDB -sidebarTitle: QuestDB ---- - -This is the implementation of the QuestDB data handler for MindsDB. - -[QuestDB](https://questdb.io/) is a columnar time-series database with high performance ingestion and SQL analytics. It is open-source and available on the cloud. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect QuestDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to QuestDB. - -## Implementation - -This handler is implemented by extending the PostgreSQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. -* `public` stores a value of `True` or `False`. Defaults to `True` if left blank. - -## Usage - -In order to make use of this handler and connect to the QuestDB server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE questdb_datasource -WITH - ENGINE = 'questdb', - PARAMETERS = { - "host": "127.0.0.1", - "port": 8812, - "database": "qdb", - "user": "admin", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM questdb_datasource.demo_table -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/sap-hana.mdx b/docs/integrations/data-integrations/sap-hana.mdx deleted file mode 100644 index cea583db9a2..00000000000 --- a/docs/integrations/data-integrations/sap-hana.mdx +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: SAP HANA -sidebarTitle: SAP HANA ---- - -This documentation describes the integration of MindsDB with [SAP HANA](https://www.sap.com/products/technology-platform/hana/what-is-sap-hana.html), a multi-model database with a column-oriented in-memory design that stores data in its memory instead of keeping it on a disk. -The integration allows MindsDB to access data from SAP HANA and enhance SAP HANA with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect SAP HANA to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to SAP HANA from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/hana_handler) as an engine. - -```sql -CREATE DATABASE sap_hana_datasource -WITH - ENGINE = 'hana', - PARAMETERS = { - "address": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", - "port": "443", - "user": "demo_user", - "password": "demo_password", - "encrypt": true - }; -``` - -Required connection parameters include the following: - -* `address`: The hostname, IP address, or URL of the SAP HANA database. -* `port`: The port number for connecting to the SAP HANA database. -* `user`: The username for the SAP HANA database. -* `password`: The password for the SAP HANA database. - -Optional connection parameters include the following: - -* 'database': The name of the database to connect to. This parameter is not used for SAP HANA Cloud. -* `schema`: The database schema to use. Defaults to the user's default schema. -* `encrypt`: The setting to enable or disable encryption. Defaults to `True' - -## Usage - -Retrieve data from a specified table by providing the integration, schema and table names: - -```sql -SELECT * -FROM sap_hana_datasource.schema_name.table_name -LIMIT 10; -``` - -Run Teradata SQL queries directly on the connected Teradata database: - -```sql -SELECT * FROM sap_hana_datasource ( - - --Native Query Goes Here - SELECT customer, year, SUM(sales) - FROM t1 - GROUP BY ROLLUP(customer, year); - - SELECT customer, year, SUM(sales) - FROM t1 - GROUP BY GROUPING SETS - ( - (customer, year), - (customer) - ) - UNION ALL - SELECT NULL, NULL, SUM(sales) - FROM t1; - -); -``` - - -The above examples utilize `sap_hana_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the SAP HANA database. -* **Checklist**: - 1. Make sure the SAP HANA database is active. - 2. Confirm that address, port, user and password are correct. Try a direct connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and SAP HANA. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - \ No newline at end of file diff --git a/docs/integrations/data-integrations/sap-sql-anywhere.mdx b/docs/integrations/data-integrations/sap-sql-anywhere.mdx deleted file mode 100644 index 48f4f8e0afd..00000000000 --- a/docs/integrations/data-integrations/sap-sql-anywhere.mdx +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: SAP SQL Anywhere -sidebarTitle: SAP SQL Anywhere ---- - -This is the implementation of the SAP SQL Anywhere data handler for MindsDB. - -[SAP SQL Anywhere](https://www.sap.com/products/technology-platform/sql-anywhere.html) is an embedded database for application software that enables secure and reliable data management for servers where no DBA is available and synchronization for tens of thousands of mobile devices, Internet of Things (IoT) systems, and remote environments. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect SAP SQL Anywhere to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to SAP SQL Anywhere. - -## Implementation - -This handler is implemented using `sqlanydb`, the Python driver for SAP SQL Anywhere. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the SAP SQL Anywhere instance. -* `port` is the port number of the SAP SQL Anywhere instance. -* `user` specifies the user name. -* `password` specifies the password for the user. -* `database` sets the current database. -* `server` sets the current server. - -## Usage - -You can use the below SQL statements to create a table in SAP SQL Anywhere called `TEST`. - -```sql -CREATE TABLE TEST -( - ID INTEGER NOT NULL, - NAME NVARCHAR(1), - DESCRIPTION NVARCHAR(1) -); - -CREATE UNIQUE INDEX TEST_ID_INDEX - ON TEST (ID); - -ALTER TABLE TEST - ADD CONSTRAINT TEST_PK - PRIMARY KEY (ID); - -INSERT INTO TEST -VALUES (1, 'h', 'w'); -``` - -In order to make use of this handler and connect to the SAP SQL Anywhere database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE sap_sqlany_trial -WITH - ENGINE = 'sqlany', - PARAMETERS = { - "user": "DBADMIN", - "password": "password", - "host": "localhost", - "port": "55505", - "server": "TestMe", - "database": "MINDSDB" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM sap_sqlany_trial.test; -``` - -On execution, we get: - -| ID | NAME | DESCRIPTION | -|----|------|-------------| -| 1 | h | w | diff --git a/docs/integrations/data-integrations/scylladb.mdx b/docs/integrations/data-integrations/scylladb.mdx deleted file mode 100644 index adbb762f73c..00000000000 --- a/docs/integrations/data-integrations/scylladb.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: ScyllaDB -sidebarTitle: ScyllaDB ---- - -This is the implementation of the ScyllaDB data handler for MindsDB. - -[ScyllaDB](https://www.scylladb.com/) is an open-source distributed NoSQL wide-column data store. It was purposefully designed to offer compatibility with Apache Cassandra while outperforming it with higher throughputs and reduced latencies. For a comprehensive understanding of ScyllaDB, visit ScyllaDB's official website. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect ScyllaDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to ScyllaDB. - -### Implementation - -The ScyllaDB handler for MindsDB was developed using the scylla-driver library for Python. -The required arguments to establish a connection are as follows: - -- `host`: Host name or IP address of ScyllaDB. -- `port`: Connection port. -- `user`: Authentication username. Optional; required only if authentication is enabled. -- `password`: Authentication password. Optional; required only if authentication is enabled. -- `keyspace`: The specific keyspace (top-level container for tables) to connect to. -- `protocol_version`: Optional. Defaults to 4. -- `secure_connect_bundle`: Optional. Needed only for connections to DataStax Astra. - -## Usage - -To set up a connection between MindsDB and a Scylla server, utilize the following SQL syntax: - -```sql -CREATE DATABASE scylladb_datasource -WITH - ENGINE = 'scylladb', - PARAMETERS = { - "user": "user@mindsdb.com", - "password": "pass", - "host": "127.0.0.1", - "port": "9042", - "keyspace": "test_data" - }; -``` - - - The protocol version is set to 4 by default. Should you wish to modify it, - simply include "protocol_version": 5 within the PARAMETERS dictionary in the - query above. - - -With the connection established, you can execute queries on your keyspace as demonstrated below: - -```sql -SELECT * FROM scylladb_datasource.keystore.example_table LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/singlestore.mdx b/docs/integrations/data-integrations/singlestore.mdx deleted file mode 100644 index f73892fac36..00000000000 --- a/docs/integrations/data-integrations/singlestore.mdx +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: SingleStore -sidebarTitle: SingleStore ---- - -This is the implementation of the SingleStore data handler for MindsDB. - -[SingleStore](https://www.singlestore.com/) is a proprietary, cloud-native database designed for data-intensive applications. A distributed, relational, SQL database management system that features ANSI SQL support. It is known for speed in data ingest, transaction processing, and query processing. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect SingleStore to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to SingleStore. - -## Implementation - -This handler is implemented by extending the MySQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -There are several optional arguments that can be used as well. - -* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`). -* `ssl_ca` is the SSL Certificate Authority. -* `ssl_cert` stores SSL certificates. -* `ssl_key` stores SSL keys. - -## Usage - -In order to make use of this handler and connect to the SingleStore database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE singlestore_datasource -WITH - ENGINE = 'singlestore', - PARAMETERS = { - "host": "127.0.0.1", - "port": 3306, - "database": "singlestore", - "user": "root", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM singlestore_datasource.example_table; -``` diff --git a/docs/integrations/data-integrations/snowflake.mdx b/docs/integrations/data-integrations/snowflake.mdx deleted file mode 100644 index f1cf39ebffc..00000000000 --- a/docs/integrations/data-integrations/snowflake.mdx +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Snowflake -sidebarTitle: Snowflake ---- - -This documentation describes the integration of MindsDB with [Snowflake](https://www.snowflake.com/en/), a cloud data warehouse used to store and analyze data. -The integration allows MindsDB to access data stored in the Snowflake database and enhance it with AI capabilities. - - -**Important!** - -When querying data from Snowflake, MindsDB automatically converts column names to lower-case. To prevent this, users can provide an alias name as shown below. - -**This update is introduced with the MindsDB version 25.3.4.1. It is not backward-compatible and has the following implications:** - -1. Queries to Snowflake will return column names in lower-case from now on. -2. The models created with Snowflake as a data source must be recreated. - -**How it works** - -The below query presents how Snowflake columns are output when queried from MindsDB. - -```sql -SELECT - CC_NAME, -- converted to lower-case - CC_CLASS AS `CC_CLASS`, -- provided alias name in upper-case - CC_EMPLOYEES, - cc_employees -FROM snowflake_data.TPCDS_SF100TCL.CALL_CENTER; -``` - -Here is the output: - -```sql -+--------------+----------+--------------+--------------+ -| cc_name | CC_CLASS | cc_employees | cc_employees | -+--------------+----------+--------------+--------------+ -| NY Metro | large | 597159671 | 597159671 | -| Mid Atlantic | medium | 944879074 | 944879074 | -+--------------+----------+--------------+--------------+ -``` - - - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Snowflake to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -The Snowflake handler supports two authentication methods: - -### 1. Password Authentication (Legacy) - -Establish a connection using username and password: - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "password": "your_password", - "database": "test_db", - "auth_type": "password" - }; -``` - -### 2. Key Pair Authentication (Recommended) - -Key pair authentication is more secure and is the recommended method by Snowflake: - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "private_key_path": "/path/to/your/private_key.pem", - "database": "test_db", - "auth_type": "key_pair" - }; -``` - -If the private key cannot be accesed from disk (for example when running MindsDB on Cloud), provide the PEM content directly: - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "private_key": "-----BEGIN PRIVATE KEY-----\\n...\\n-----END PRIVATE KEY-----", - "database": "test_db", - "auth_type": "key_pair" - }; -``` - - -With encrypted private key (passphrase protected): - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "private_key_path": "/path/to/your/private_key.pem", - "private_key_passphrase": "your_passphrase", - "database": "test_db", - "auth_type": "key_pair" - }; -``` - -### Connection Parameters - -Required parameters: - -* `account`: The Snowflake account identifier. This [guide](https://docs.snowflake.com/en/user-guide/admin-account-identifier) will help you find your account identifier. -* `user`: The username for the Snowflake account. -* `database`: The name of the Snowflake database to connect to. -* `auth_type`: The authentication type to use. Options: `"password"` or `"key_pair"`. - -Authentication parameters (one method required): - -* `password`: The password for the Snowflake account (password authentication). -* `private_key_path`: Path to the private key file for key pair authentication. -* `private_key`: PEM-formatted private key content for key pair authentication. -* `private_key_passphrase`: Optional passphrase for encrypted private key (key pair authentication). - -Optional parameters: - -* `warehouse`: The Snowflake warehouse to use for running queries. -* `schema`: The database schema to use within the Snowflake database. Default is `PUBLIC`. -* `role`: The Snowflake role to use. - - -For detailed instructions on setting up key pair authentication, please refer to [AUTHENTICATION.md](AUTHENTICATION.md) or the [Snowflake Key Pair Authentication documentation](https://docs.snowflake.com/en/user-guide/key-pair-auth.html). - - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM snowflake_datasource.schema_name.table_name -LIMIT 10; -``` - -Run Snowflake SQL queries directly on the connected Snowflake database: - -```sql -SELECT * FROM snowflake_datasource ( - - --Native Query Goes Here - SELECT - employee_table.* EXCLUDE department_id, - department_table.* RENAME department_name AS department - FROM employee_table INNER JOIN department_table - ON employee_table.department_id = department_table.department_id - ORDER BY department, last_name, first_name; - -); -``` - - -The above examples utilize `snowflake_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Snowflake account. -* **Checklist**: - 1. Make sure the Snowflake is active. - 2. Confirm that account, user, password and database are correct. Try a direct Snowflake connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and Snowflake. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - - -This [troubleshooting guide](https://community.snowflake.com/s/article/Snowflake-Client-Connectivity-Troubleshooting) provided by Snowflake might also be helpful. diff --git a/docs/integrations/data-integrations/sqlite.mdx b/docs/integrations/data-integrations/sqlite.mdx deleted file mode 100644 index f91bf433d42..00000000000 --- a/docs/integrations/data-integrations/sqlite.mdx +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: SQLite -sidebarTitle: SQLite ---- - -This is the implementation of the SQLite data handler for MindsDB. - -[SQLite](https://www.sqlite.org/about.html) is an in-process library that implements a self-contained, serverless, zero-configuration, transactional SQL database engine. The code for SQLite is in the public domain and is thus free to use for either commercial or private purpose. SQLite is the most widely deployed database in the world with more applications than we can count, including several high-profile projects. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect SQLite to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to SQLite. - -## Implementation - -This handler is implemented using the standard `sqlite3` library that comes with Python. - -The only required argument to establish a connection is `db_file` that points to the database file that the connection is to be made to. - -Optionally, this may also be set to `:memory:` to create an in-memory database. - -## Usage - -In order to make use of this handler and connect to the SQLite database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE sqlite_datasource -WITH - engine = 'sqlite', - parameters = { - "db_file": "example.db" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM sqlite_datasource.example_tbl; -``` diff --git a/docs/integrations/data-integrations/starrocks.mdx b/docs/integrations/data-integrations/starrocks.mdx deleted file mode 100644 index 62aa7f83016..00000000000 --- a/docs/integrations/data-integrations/starrocks.mdx +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: StarRocks -sidebarTitle: StarRocks ---- - -This is the implementation of the StarRocks data handler for MindsDB. - -[StarRocks](https://www.starrocks.io/) is the next-generation data platform designed to make data-intensive real-time analytics fast and easy. It delivers query speeds 5 to 10 times faster than other popular solutions. StarRocks can perform real-time analytics well while updating historical records. It can also enhance real-time analytics with historical data from data lakes easily. With StarRocks, you can get rid of the de-normalized tables and get the best performance and flexibility. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect StarRocks to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to StarRocks. - -## Implementation - -This handler is implemented by extending the MySQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the StarRocks server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE starrocks_datasource -WITH - ENGINE = 'starrocks', - PARAMETERS = { - "host": "127.0.0.1", - "user": "starrocks_user", - "password": "password", - "port": 8030, - "database": "starrocks_db" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM starrocks_datasource.demo_table -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/supabase.mdx b/docs/integrations/data-integrations/supabase.mdx deleted file mode 100644 index 0727095132f..00000000000 --- a/docs/integrations/data-integrations/supabase.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Supabase -sidebarTitle: Supabase ---- - -This is the implementation of the Supabase data handler for MindsDB. - -[Supabase](https://supabase.com/) is an open-source Firebase alternative. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Supabase to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Supabase. - -## Implementation - -This handler is implemented by extending the PostgreSQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the Supabase server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE supabase_datasource -WITH ENGINE = 'supabase', -PARAMETERS = { - "host": "127.0.0.1", - "port": 54321, - "database": "test", - "user": "supabase", - "password": "password" -}; -``` - -You can use this established connection to query your database as follows: - -```sql -SELECT * -FROM supabase_datasource.public.rentals -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/surrealdb.mdx b/docs/integrations/data-integrations/surrealdb.mdx deleted file mode 100644 index 2e57d6613e6..00000000000 --- a/docs/integrations/data-integrations/surrealdb.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: SurrealDB -sidebarTitle: SurrealDB ---- - -This is the implementation of the SurrealDB data handler for MindsDB. - -[SurrealDB](https://surrealdb.com/) is an innovative NewSQL cloud database, suitable for serverless applications, jamstack applications, single-page applications, and traditional applications. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect SurrealDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to SurrealDB. - -## Implementation - -This handler was implemented by using the python library `pysurrealdb`. - -The required arguments to establish a connection are: - -* `host`: the host name of the Surrealdb connection -* `port`: the port to use when connecting -* `user`: the user to authenticate -* `password`: the password to authenticate the user -* `database`: database name to be connected -* `namespace`: namespace name to be connected - -## Usage - -To establish a connection with our SurrealDB server which is running locally with the public cloud instance. We are going to use `ngrok tunneling` to connect cloud instance to the local SurrealDB server. You can follow this [guide](https://docs.mindsdb.com/sql/create/database#making-your-local-database-available-to-mindsdb) for that. - -Let's make the connection with the MindsDB public cloud - -```sql -CREATE DATABASE exampledb -WITH ENGINE = 'surrealdb', -PARAMETERS = { - "host": "6.tcp.ngrok.io", - "port": "17141", - "user": "root", - "password": "root", - "database": "testdb", - "namespace": "testns" -}; -``` - -Please change the `host` and `port` properties in the `PARAMETERS` clause based on the values which you got. - -We can also query the `dev` table which we created with -```sql -SELECT * FROM exampledb.dev; -``` \ No newline at end of file diff --git a/docs/integrations/data-integrations/tdengine.mdx b/docs/integrations/data-integrations/tdengine.mdx deleted file mode 100644 index df771233bfc..00000000000 --- a/docs/integrations/data-integrations/tdengine.mdx +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: TDengine -sidebarTitle: TDengine ---- - -This is the implementation of the TDEngine data handler for MindsDB. - -[TDengine](https://tdengine.com/) is an open source, high-performance, cloud native time-series database optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. It enables efficient, real-time data ingestion, processing, and monitoring of TB and even PB scale data per day, generated by billions of sensors and data collectors. TDengine differentiates itself from other time-series databases with numerous advantages, such as high performance, simplified solution, cloud-native, ease of use, easy data analytics, and open-source. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect TDengine to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to TDengine. - -## Implementation - -This handler is implemented using `taos/taosrest`, a Python library that allows you to use Python code to run SQL commands on the TDEngine server. - -The required arguments to establish a connection are as follows: - -* `user` is the username associated with the server. -* `password` is the password to authenticate your access. -* `url` is the URL to the TDEngine server. For local server, the URL is `localhost:6041` by default. -* `token` is the unique token provided while using TDEngine Cloud. -* `database` is the database name to be connected. - -## Usage - -In order to make use of this handler and connect to the TDEngine database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE tdengine_datasource -WITH - ENGINE = 'tdengine', - PARAMETERS = { - "user": "tdengine_user", - "password": "password", - "url": "localhost:6041", - "token": "token", - "database": "tdengine_db" - }; -``` - - -You can specify `token` instead of `user` and `password` while using TDEngine. - - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM tdengine_datasource.demo_table; -``` diff --git a/docs/integrations/data-integrations/teradata.mdx b/docs/integrations/data-integrations/teradata.mdx deleted file mode 100644 index aaa707b8ffb..00000000000 --- a/docs/integrations/data-integrations/teradata.mdx +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Teradata -sidebarTitle: Teradata ---- - -This documentation describes the integration of MindsDB with [Teradata](https://www.teradata.com/why-teradata), the complete cloud analytics and data platform for Trusted AI. -The integration allows MindsDB to access data from Teradata and enhance Teradata with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Teradata to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Teradata from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/teradata_handler) as an engine. - -```sql -CREATE DATABASE teradata_datasource -WITH - ENGINE = 'teradata', - PARAMETERS = { - "host": "192.168.0.41", - "user": "demo_user", - "password": "demo_password", - "database": "example_db" - }; -``` - -Required connection parameters include the following: - -* `host`: The hostname, IP address, or URL of the Teradata server. -* `user`: The username for the Teradata database. -* `password`: The password for the Teradata database. - -Optional connection parameters include the following: - -* `database`: The name of the Teradata database to connect to. Defaults is the user's default database. - -## Usage - -Retrieve data from a specified table by providing the integration, database and table names: - -```sql -SELECT * -FROM teradata_datasource.database_name.table_name -LIMIT 10; -``` - -Run Teradata SQL queries directly on the connected Teradata database: - -```sql -SELECT * FROM teradata_datasource ( - - --Native Query Goes Here - SELECT emp_id, emp_name, job_duration AS tsp - FROM employee - EXPAND ON job_duration AS tsp BY INTERVAL '1' YEAR - FOR PERIOD(DATE '2006-01-01', DATE '2008-01-01'); - -); -``` - - -The above examples utilize `teradata_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Teradata database. -* **Checklist**: - 1. Make sure the Teradata database is active. - 2. Confirm that host, user and password are correct. Try a direct connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and Teradata. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - - - -`Connection Timeout Error` - -* **Symptoms**: Connection to the Teradata database times out or queries take too long to execute. -* **Checklist**: - 1. Ensure the Teradata server is running and accessible (if the server has been idle for a long time, it may have shut down automatically). - - \ No newline at end of file diff --git a/docs/integrations/data-integrations/tidb.mdx b/docs/integrations/data-integrations/tidb.mdx deleted file mode 100644 index 9bd1cc357c9..00000000000 --- a/docs/integrations/data-integrations/tidb.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: TiDB -sidebarTitle: TiDB ---- - -This is the implementation of the TiDB data handler for MindsDB. - -[TiDB](https://www.pingcap.com/tidb/) is an open-source NewSQL database that supports Hybrid Transactional and Analytical Processing workloads. It is MySQL-compatible and can provide horizontal scalability, strong consistency, and high availability. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect TiDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to TiDB. - -## Implementation - -This handler is implemented by extending the MySQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the TiDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE tidb_datasource -WITH - ENGINE = 'tidb', - PARAMETERS = { - "host": "127.0.0.1", - "port": 4000, - "database": "tidb", - "user": "root", - "password": "password" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM tidb_datasource.demo_table; -``` diff --git a/docs/integrations/data-integrations/timescaledb.mdx b/docs/integrations/data-integrations/timescaledb.mdx deleted file mode 100644 index 005dce0e845..00000000000 --- a/docs/integrations/data-integrations/timescaledb.mdx +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: TimescaleDB -sidebarTitle: TimescaleDB ---- - -This documentation describes the integration of MindsDB with [TimescaleDB](https://docs.timescale.com). - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect TimescaleDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to TimescaleDB from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/timescaledb_handler) as an engine. - -```sql -CREATE DATABASE timescaledb_datasource -WITH - engine = 'timescaledb', - parameters = { - "host": "examplehost.timescaledb.com", - "port": 5432, - "user": "example_user", - "password": "my_password", - "database": "tsdb" - }; -``` - - -Required connection parameters include the following: - -* `user`: The username for the TimescaleDB database. -* `password`: The password for the TimescaleDB database. -* `host`: The hostname, IP address, or URL of the TimescaleDB server. -* `port`: The port number for connecting to the TimescaleDB server. -* `database`: The name of the TimescaleDB database to connect to. - -Optional connection parameters include the following: - -* `schema`: The database schema to use. Default is public. - - -## Usage - -Before attempting to connect to a TimescaleDB server using MindsDB, ensure that it accepts incoming connections using [this guide](https://docs.timescale.com/latest/getting-started/setup/remote-connections/). - -The following usage examples utilize the connection to TimescaleDB made via the `CREATE DATABASE` statement and named `timescaledb_datasource`. - -Retrieve data from a specified table by providing the integration and table name. - - -You can use this established connection to query your table as follows, - -```sql -SELECT * -FROM timescaledb_datasource.sensor; -``` - -Run PostgreSQL-native queries directly on the connected TimescaleDB database: - -```sql -SELECT * FROM timescaledb_datasource ( - - --Native Query Goes Here - SELECT - model, - COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell, - ROUND((CAST(tax AS decimal) / price), 3) AS tax_div_price - FROM used_car_price - -); -``` - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the TimescaleDB database. -* **Checklist**: - 1. Make sure the TimescaleDB server is active. - 2. Confirm that host, port, user, schema, and password are correct. Try a direct TimescaleDB connection. - 3. Ensure a stable network between MindsDB and TimescaleDB. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - \ No newline at end of file diff --git a/docs/integrations/data-integrations/trino.mdx b/docs/integrations/data-integrations/trino.mdx deleted file mode 100644 index 37ff047b809..00000000000 --- a/docs/integrations/data-integrations/trino.mdx +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Trino -sidebarTitle: Trino ---- - -This is the implementation of the Trino data handler for MindsDB. - -[Trino](https://trino.io/) is an open-source distributed SQL query engine designed to query large data sets distributed over one or more heterogeneous data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Trino to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Trino. - -## Implementation - -This handler is implemented using `pyhive`, a collection of Python DB-API and SQLAlchemy interfaces for Presto and Hive. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. - -There are some optional arguments as follows: - -* `auth` is the authentication method. Currently, only `basic` is supported. -* `http_scheme` takes the value of `http`by default. It can be set to `https` as well. -* `catalog` is the catalog. -* `schema` is the schema name. -* `with` defines default WITH-clause (properties) for ALL tables. This parameter is experimental and might be changed or removed in future release. - -## Usage - -In order to make use of this handler and connect to the Trino database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE trino_datasource -WITH - ENGINE = 'trino', - PARAMETERS = { - "host": "127.0.0.1", - "port": 443, - "auth": "basic", - "http_scheme": "https", - "user": "trino", - "password": "password", - "catalog": "default", - "schema": "test", - "with": "with (transactional = true)" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM trino_datasource.demo_table; -``` diff --git a/docs/integrations/data-integrations/vertica.mdx b/docs/integrations/data-integrations/vertica.mdx deleted file mode 100644 index 71119d592fa..00000000000 --- a/docs/integrations/data-integrations/vertica.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Vertica -sidebarTitle: Vertica ---- - -This is the implementation of the Vertica data handler for MindsDB. - -The column-oriented [Vertica Analytics Platform](https://www.vertica.com/overview/) was designed to manage large, fast-growing volumes of data and with fast query performance for data warehouses and other query-intensive applications. The product claims to greatly improve query performance over traditional relational database systems, and to provide high availability and exabyte scalability on commodity enterprise servers. Vertica runs on multiple cloud computing systems as well as on Hadoop nodes. Vertica's Eon Mode separates compute from storage, using S3 object storage and dynamic allocation of compute notes. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Vertica to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Vertica. - -## Implementation - -This handler is implemented using `vertica-python`, a Python library that allows you to use Python code to run SQL commands on the Vertica database. - -The required arguments to establish a connection are as follows: - -* `user` is the username asscociated with the database. -* `password` is the password to authenticate your access. -* `host` is the host name or IP address of the server. -* `port` is the port through which TCP/IP connection is to be made. -* `database` is the database name to be connected. -* `schema` is the schema name to get tables from. - -## Usage - -In order to make use of this handler and connect to the Vertica database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE vertica_datasource -WITH - engine = 'vertica', - parameters = { - "user": "dbadmin", - "password": "password", - "host": "127.0.0.1", - "port": 5433, - "schema_name": "public", - "database": "VMart" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM vertica_datasource.TEST; -``` diff --git a/docs/integrations/data-integrations/vitess.mdx b/docs/integrations/data-integrations/vitess.mdx deleted file mode 100644 index 24c80ad2ff9..00000000000 --- a/docs/integrations/data-integrations/vitess.mdx +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Vitess -sidebarTitle: Vitess ---- - -This is the implementation of the Vitess data handler for MindsDB. - -[Vitess](https://vitess.io/) is a database solution for deploying, scaling, and managing large clusters of open-source database instances. It currently supports MySQL and Percona Server for MySQL. It's architected to run as effectively in a public or private cloud architecture as it does on dedicated hardware. It combines and extends many important SQL features with the scalability of a NoSQL database. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Vitess to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Vitess. - -## Implementation - -This handler is implemented by extending the MySQL data handler. - -The required arguments to establish a connection are as follows: - -* `user` is the database user. -* `password` is the database password. -* `host` is the host name, IP address, or URL. -* `port` is the port used to make TCP/IP connection. -* `database` is the database name. - -## Usage - -In order to make use of this handler and connect to the Vitess server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE vitess_datasource -WITH - ENGINE = "vitess", - PARAMETERS = { - "user": "root", - "password": "", - "host": "localhost", - "port": 33577, - "database": "commerce" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM vitess_datasource.product -LIMIT 10; -``` diff --git a/docs/integrations/data-integrations/yugabytedb.mdx b/docs/integrations/data-integrations/yugabytedb.mdx deleted file mode 100644 index 126b500bdb7..00000000000 --- a/docs/integrations/data-integrations/yugabytedb.mdx +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: YugabyteDB -sidebarTitle: YugabyteDB ---- - -This is the implementation of the YugabyteDB data handler for MindsDB. - -[YugabyteDB](https://www.yugabyte.com/) is a high-performance, cloud-native distributed SQL database that aims to support all PostgreSQL features. It is best fit for cloud-native OLTP (i.e. real-time, business-critical) applications that need absolute data correctness and require at least one of the following: scalability, high tolerance to failures, or globally-distributed deployments. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect YugabyteDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to YugabyteDB. - -## Implementation - -This handler is implemented using `psycopg2`, a Python library that allows you to use Python code to run SQL commands on the YugabyteDB database. - -The required arguments to establish a connection are as follows: - -- `user` is the database user. -- `password` is the database password. -- `host` is the host name, IP address, or URL. -- `port` is the port used to make TCP/IP connection. -- `database` is the database name. -- `schema` is the schema to which your table belongs. - -## Usage - -In order to make use of this handler and connect to the YugabyteDB database in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE yugabyte_datasource -WITH - engine = 'yugabyte', - parameters = { - "user": "admin", - "password": "1234", - "host": "127.0.0.1", - "port": 5433, - "database": "yugabyte", - "schema": "your_schema_name" - }; -``` - -You can use this established connection to query your table as follows: - -```sql -SELECT * -FROM yugabyte_datasource.demo; -``` - - -NOTE : If you are using YugabyteDB Cloud with MindsDB Cloud website you need to add below 3 static IPs of MindsDB Cloud to `allow IP list` for accessing it publicly. -``` -18.220.205.95 -3.19.152.46 -52.14.91.162 -``` -![public](https://github-production-user-asset-6210df.s3.amazonaws.com/75653580/238903548-1b054591-f5db-4a6d-a3d0-d048671e4cfa.png) - diff --git a/docs/integrations/data-overview.mdx b/docs/integrations/data-overview.mdx deleted file mode 100644 index 48b5b6e73b0..00000000000 --- a/docs/integrations/data-overview.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Data Integrations -sidebarTitle: Overview -icon: "database" ---- - -MindsDB integrates with numerous data sources, including databases, vector stores, and applications, making data available to AI agents and apps by connecting data sources to MindsDB. - -MindsDB supports the Model Context Protocol (MCP) where MindsDB is an MCP server that enables your MCP applications to answer questions over large-scale federated data. [Learn more here](/mcp/overview). - - -Note that MindsDB doesn't store or copy your data. Instead, it fetches data directly from your connected sources each time you make a query, ensuring that any changes to the data are instantly reflected. This means your data remains in its original location, and MindsDB always works with the most up-to-date information. - - -## Officially Supported Integrations - -Integrations built, tested, and maintained by the MindsDB team. These are fully supported and recommended for production use. - - - - - - - - - - - - - - - - - - -## Community Integrations - -Integrations developed by the MindsDB community. They may offer valuable functionality but are not officially supported and may have limited feature coverage or known issues. - -All remaining integrations available in the documentation sections below and in the [handlers folder](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers) are considered community integrations. - - -If you don't find a data source of your interest, you can [request a feature here](https://github.com/mindsdb/mindsdb/issues/new?assignees=&labels=enhancement&projects=&template=feature_request_v2.yaml) or build a handler following [this instruction for data handlers](/contribute/data-handlers) and [this instruction for applications](/contribute/app-handlers). - - - -Here is how you can query for all the available data handlers used to connect data sources to MindsDB. - - ```sql - SELECT * FROM information_schema.handlers WHERE type = 'data'; - --or - SHOW HANDLERS WHERE type = 'data'; - ``` - diff --git a/docs/integrations/files/csv-xlsx-xls.mdx b/docs/integrations/files/csv-xlsx-xls.mdx deleted file mode 100644 index c39227be671..00000000000 --- a/docs/integrations/files/csv-xlsx-xls.mdx +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: Upload CSV, XLSX, XLS files to MindsDB -sidebarTitle: CSV, XLSX, XLS ---- - -You can upload CSV, XLSX, and XLS files of any size to MindsDB that runs locally via [Docker](/setup/self-hosted/docker) or [pip](/contribute/install). - -CSV, XLSX, XLS files are stored in the form of a table inside MindsDB. - -## Upload files - -Follow the steps below to upload a file: - -1. Click on the `Add` dropdown and choose `Upload file`. - -

- -

- -2. Upload a file and provide a name used to access it within MindsDB. - -

- -

- -3. Alternatively, upload a file as a link and provide a name used to access it within MindsDB. - -

- -

- -## Query files - -The CSV, XLSX, and XLS files may contain one or more sheets. Here is how to query data within MindsDB. - -Query for the list of available sheets in the file uploaded under the name `my_file`. - -```sql -SELECT * -FROM files.my_file; -``` - -Query for the content of one of the sheets listed with the command above. - -```sql -SELECT * -FROM files.my_file.my_sheet; -``` diff --git a/docs/integrations/files/json.mdx b/docs/integrations/files/json.mdx deleted file mode 100644 index ef9b21218d6..00000000000 --- a/docs/integrations/files/json.mdx +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: Upload JSON files to MindsDB -sidebarTitle: JSON ---- - -You can upload JSON files of any size to MindsDB that runs locally via [Docker](/setup/self-hosted/docker) or [pip](/contribute/install). - -JSON files are converted into a table, if the JSON file structure allows for it. Otherwise, JSON files are stored similarly to text files. - - -Here is the sample format of a JSON file that can be uploaded to MindsDB: - -``` -[ - { - "id": 1, - "name": "Alice", - "contact": { - "email": "alice@example.com", - "phone": "123-456-7890" - }, - "address": { - "street": "123 Maple Street", - "city": "Wonderland", - "zip": "12345" - } - }, - { - "id": 2, - "name": "Bob", - "contact": { - "email": "bob@example.com", - "phone": "987-654-3210" - }, - "address": { - "street": "456 Oak Avenue", - "city": "Builderland", - "zip": "67890" - } - } -] -``` - -MindsDB converts it into a table where each row stores the high-level object. - -```sql -| id | name | contact | address | -| --- | ----- | ---------------------------------------------------- | --------------------------------------------------------------- | -| 1 | Alice | {"email":"alice@example.com","phone":"123-456-7890"} | {"city":"Wonderland","street":"123 Maple Street","zip":"12345"} | -| 2 | Bob | {"email":"bob@example.com","phone":"987-654-3210"} | {"city":"Builderland","street":"456 Oak Avenue","zip":"67890"} | -``` - -You can extract the JSON fields from `contact` and `address` columns with the `json_extract` function. - -```sql -SELECT id, - name, - json_extract(contact, '$.email') AS email, - json_extract(address, '$.city') AS city -FROM files.json_file_name; -``` - - -## Upload files - -Follow the steps below to upload a file: - -1. Click on the `Add` dropdown and choose `Upload file`. - -

- -

- -2. Upload a file and provide a name used to access it within MindsDB. - -

- -

- -3. Alternatively, upload a file as a link and provide a name used to access it within MindsDB. - -

- -

- -## Query files - -Here is how to query data within MindsDB. - -Query for the content of the file uploaded under the name `my_file`. - -```sql -SELECT * -FROM files.my_file; -``` diff --git a/docs/integrations/files/parquet.mdx b/docs/integrations/files/parquet.mdx deleted file mode 100644 index b15a71c56e1..00000000000 --- a/docs/integrations/files/parquet.mdx +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Upload Parquet files to MindsDB -sidebarTitle: Parquet ---- - -You can upload Parquet files of any size to MindsDB that runs locally via [Docker](/setup/self-hosted/docker) or [pip](/contribute/install). - -Parquet files are stored in the form of a table inside MindsDB. - -## Upload files - -Follow the steps below to upload a file: - -1. Click on the `Add` dropdown and choose `Upload file`. - -

- -

- -2. Upload a file and provide a name used to access it within MindsDB. - -

- -

- -3. Alternatively, upload a file as a link and provide a name used to access it within MindsDB. - -

- -

- -## Query files - -Here is how to query data within MindsDB. - -Query for the content of the file uploaded under the name `my_file`. - -```sql -SELECT * -FROM files.my_file; -``` diff --git a/docs/integrations/files/pdf.mdx b/docs/integrations/files/pdf.mdx deleted file mode 100644 index 475952edb5c..00000000000 --- a/docs/integrations/files/pdf.mdx +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Upload PDF files to MindsDB -sidebarTitle: PDF ---- - -You can upload PDF files of any size to MindsDB that runs locally via [Docker](/setup/self-hosted/docker) or [pip](/contribute/install). - -Note that MindsDB supports only searchable PDFs, as opposed to scanned PDFs. These are stored in the form of a table inside MindsDB. - -## Upload files - -Follow the steps below to upload a file: - -1. Click on the `Add` dropdown and choose `Upload file`. - -

- -

- -2. Upload a file and provide a name used to access it within MindsDB. - -

- -

- -## Query files - -Here is how to query data within MindsDB. - -Query for the content of the file uploaded under the name `my_file`. - -```sql -SELECT * -FROM files.my_file; -``` diff --git a/docs/integrations/files/txt.mdx b/docs/integrations/files/txt.mdx deleted file mode 100644 index ab4cb9a6e2b..00000000000 --- a/docs/integrations/files/txt.mdx +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Upload TXT files to MindsDB -sidebarTitle: TXT ---- - -You can upload TXT files of any size to MindsDB that runs locally via [Docker](/setup/self-hosted/docker) or [pip](/contribute/install). - -TXT files are divided into chunks and stored in multiple table cells. MindsDB uses the [TextLoader from LangChain](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.text.TextLoader.html) to load TXT files. - -## Upload files - -Follow the steps below to upload a file: - -1. Click on the `Add` dropdown and choose `Upload file`. - -

- -

- -2. Upload a file and provide a name used to access it within MindsDB. - -

- -

- -## Query files - -Here is how to query data within MindsDB. - -Query for the content of the file uploaded under the name `my_file`. - -```sql -SELECT * -FROM files.my_file; -``` diff --git a/docs/integrations/sample-database.mdx b/docs/integrations/sample-database.mdx deleted file mode 100644 index 76835adb219..00000000000 --- a/docs/integrations/sample-database.mdx +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: Sample Database -sidebarTitle: Sample Database -icon: "book" ---- - -MindsDB provides a read-only PostgreSQL database pre-loaded with various datasets. These datasets are curated to cover a wide range of scenarios and use cases, allowing you to experiment with different features of MindsDB. - -Our publicly accessible PostgreSQL database is designed for testing and playground purposes. By using these datasets, you can quickly get started with MindsDB, understand how it works, and see how it can be applied to real-world problems. - -## Connection - -To connect to our read-only PostgreSQL database and access the example datasets, use the following connection parameters: - -```python -CREATE DATABASE postgresql_conn -WITH ENGINE = 'postgres', -PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo", - "schema": "demo" -}; -``` - -Below is the list of all avaiable datasets as tables. - -## Data Tables - -Here are the tables converted into Markdown format: - -### Fraud Detection Dataset - -This `fraud_detection` table contains data on mobile money transactions where each step represents an hour of simulation. - -| step | type | amount | nameOrig | oldbalanceOrg | newbalanceOrig | nameDest | oldbalanceDest | newbalanceDest | isFraud | isFlaggedFraud | -|------|-----------|---------|--------------|---------------|----------------|--------------|----------------|----------------|---------|----------------| -| 1 | PAYMENT | 9839.64 | C1231006815 | 170136.0 | 160296.36 | M1979787155 | 0.0 | 0.0 | 0 | 0 | -| 1 | PAYMENT | 1864.28 | C1666544295 | 21249.0 | 19384.72 | M2044282225 | 0.0 | 0.0 | 0 | 0 | -| 1 | TRANSFER | 181.0 | C1305486145 | 181.0 | 0.0 | C553264065 | 0.0 | 0.0 | 1 | 0 | - -To learn more about this dataset, please visit [this link](https://www.kaggle.com/datasets/ealaxi/paysim1). - -### Customer Churn Dataset - -This `customer_churn` table contains data of a U.S. bank customer churn. - -| RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | -|-----------|------------|------------|-------------|-----------|--------|-----|--------|-----------|---------------|-----------|----------------|-----------------|--------| -| 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0 | 1 | 1 | 1 | 101348.88 | 1 | -| 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 83807.86 | 1 | 0 | 1 | 112542.58 | 0 | -| 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 159660.8 | 3 | 1 | 0 | 113931.57 | 1 | - -To learn more about this dataset, please visit [this link](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction). - -### Customer Support Chat Dataset - -This `customer_support_chat` table contains data of chat with customer support, which can be used to train Large Language Models (LLMs) for Fine Tuning and Domain Adaptation. - -| flags | instruction | category | intent | response | -|-------|-------------------------------------------|----------|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------| -| B | question about cancelling order Order Number | ORDER | cancel_order | I've understood you have a question regarding canceling order Order Number, and I'm here to provide you with the information you need. Please go ahead and ask your question, and I'll do my best to assist you. | -| BQZ | i have a question about cancelling order Order Number | ORDER | cancel_order | I've been informed that you have a question about canceling order Order Number. I'm here to assist you! Please go ahead and let me know what specific question you have, and I'll provide you with all the information and guidance you need. Your satisfaction is my top priority. | - -To learn more about this dataset, please visit [this link](https://huggingface.co/datasets/bitext/Bitext-customer-support-llm-chatbot-training-dataset). - -### Bank Customer Transactions Dataset - -This `bank_customer_transactions` table contains data of customer transactions with demographic and shopping behavior information. - -| Customer ID | Name | Surname | Gender | Birthdate | Transaction Amount | Date | Merchant Name | Category | -|-------------|-----------|-----------|--------|-------------|--------------------|------------|-------------------------|-----------| -| 752858 | Sean | Rodriguez | F | 2002-10-20 | 35.47 | 2023-04-03 | Smith-Russell | Cosmetic | -| 26381 | Michelle | Phelps | | 1985-10-24 | 2552.72 | 2023-07-17 | Peck, Spence and Young | Travel | -| 305449 | Jacob | Williams | M | 1981-10-25 | 115.97 | 2023-09-20 | Steele Inc | Clothing | - -To learn more about this dataset, please visit [this link](https://www.kaggle.com/datasets/bkcoban/customer-transactions). - -### Telecom Customer Churn Dataset - -This `telecom_customer_churn` table contains data on customer activities, preferences, and behaviors. - -| age | gender | security_no | region_category | membership_category | joining_date | joined_through_referral | referral_id | preferred_offer_types | medium_of_operation | internet_option | last_visit_time | days_since_last_login | avg_time_spent | avg_transaction_value | avg_frequency_login_days | points_in_wallet | used_special_discount | offer_application_preference | past_complaint | complaint_status | feedback | churn_risk_score | -|-----|--------|-------------|-----------------|---------------------|--------------|-------------------------|-------------|------------------------|---------------------|-----------------|-----------------|------------------------|----------------|-----------------------|---------------------------|------------------|------------------------|---------------------------|-----------------|------------------------|--------------------------|------------------| -| 18 | F | XW0DQ7H | Village | Platinum Membership | 17-08-2017 | No | xxxxxxxx | Gift Vouchers/Coupons | ? | Wi-Fi | 16:08:02 | 17 | 300.63 | 53005.25 | 17 | 781.75 | Yes | Yes | No | Not Applicable | Products always in Stock | 0 | -| 32 | F | 5K0N3X1 | City | Premium Membership | 28-08-2017 | ? | CID21329 | Gift Vouchers/Coupons | Desktop | Mobile_Data | 12:38:13 | 16 | 306.34 | 12838.38 | 10 | | Yes | No | Yes | Solved | Quality Customer Care | 0 | -| 44 | F | 1F2TCL3 | Town | No Membership | 11-11-2016 | Yes | CID12313 | Gift Vouchers/Coupons | Desktop | Wi-Fi | 22:53:21 | 14 | 516.16 | 21027 | 22 | 500.69 | No | Yes | Yes | Solved in Follow-up | Poor Website | 1 | - -To learn more about this dataset, please visit [this link](https://huggingface.co/datasets/d0r1h/customer_churn). - -### House Sales Dataset - -This `house_sales` table contains data on houses sold throughout the years. - -| saledate | ma | type | bedrooms | created_at | -|------------|---------|-------|---------|----------------------------| -| 2007-09-30 | 441854 | house | 2 | 2007-02-02 15:41:51.922127 | -| 2007-12-31 | 441854 | house | 2 | 2007-02-23 22:36:08.540248 | -| 2008-03-31 | 441854 | house | 2 | 2007-02-25 19:23:52.585358 | - -To learn more about this dataset, please visit [this link](https://www.kaggle.com/datasets/). diff --git a/docs/integrations/vector-db-integrations/chromadb.mdx b/docs/integrations/vector-db-integrations/chromadb.mdx deleted file mode 100644 index 9aede209eb6..00000000000 --- a/docs/integrations/vector-db-integrations/chromadb.mdx +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: ChromaDB -sidebarTitle: ChromaDB ---- - -In this section, we present how to connect ChromaDB to MindsDB. - -[ChromaDB](https://www.trychroma.com/) is the open-source embedding database. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect ChromaDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to ChromaDB. - -## Connection - -This handler is implemented using the `chromadb` Python library. - -To connect to a remote ChromaDB instance, use the following statement: - -```sql -CREATE DATABASE chromadb_datasource -WITH ENGINE = 'chromadb' -PARAMETERS = { - "host": "YOUR_HOST", - "port": YOUR_PORT, - "distance": "l2/cosine/ip" -- optional, default is cosine -} -``` - -The required parameters are: - -* `host`: The host name or IP address of the ChromaDB instance. -* `port`: The TCP/IP port of the ChromaDB instance. -* `distance`: It defines how the distance between vectors is calculated. Available method include l2, cosine, and ip, as [explained here](https://docs.trychroma.com/docs/collections/configure). - -To connect to an in-memory ChromaDB instance, use the following statement: - -```sql -CREATE DATABASE chromadb_datasource -WITH ENGINE = "chromadb", -PARAMETERS = { - "persist_directory": "YOUR_PERSIST_DIRECTORY", - "distance": "l2/cosine/ip" -- optional -} -``` - -The required parameters are: - -* `persist_directory`: The directory to use for persisting data. -* `distance`: It defines how the distance between vectors is calculated. Available method include l2, cosine, and ip, as [explained here](https://docs.trychroma.com/docs/collections/configure). - -## Usage - -Now, you can use the established connection to create a collection (or table in the context of MindsDB) in ChromaDB and insert data into it: - -```sql -CREATE TABLE chromadb_datasource.test_embeddings ( - SELECT embeddings,'{"source": "fda"}' as metadata - FROM mysql_datasource.test_embeddings -); -``` - - -`mysql_datasource` is another MindsDB data source that has been created by connecting to a MySQL database. The `test_embeddings` table in the `mysql_datasource` data source contains the embeddings that we want to store in ChromaDB. - - -You can query your collection (table) as shown below: - -```sql -SELECT * -FROM chromadb_datasource.test_embeddings; -``` - -To filter the data in your collection (table) by metadata, you can use the following query: - -```sql -SELECT * -FROM chromadb_datasource.test_embeddings -WHERE `metadata.source` = "fda"; - -``` - -To conduct a similarity search, the following query can be used: - -```sql -SELECT * -FROM chromadb_datasource.test_embeddings -WHERE search_vector = ( - SELECT embeddings - FROM mysql_datasource.test_embeddings - LIMIT 1 -); diff --git a/docs/integrations/vector-db-integrations/couchbase.mdx b/docs/integrations/vector-db-integrations/couchbase.mdx deleted file mode 100644 index 132239ff795..00000000000 --- a/docs/integrations/vector-db-integrations/couchbase.mdx +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: Couchbase -sidebarTitle: Couchbase ---- - -This is the implementation of the Couchbase Vector store data handler for MindsDB. - -[Couchbase](https://www.couchbase.com/) is an open-source, distributed multi-model NoSQL document-oriented database software package optimized for interactive applications. These applications may serve many concurrent users by creating, storing, retrieving, aggregating, manipulating, and presenting data. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Couchbase to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Couchbase. - -## Implementation - -In order to make use of this handler and connect to a Couchbase server in MindsDB, the following syntax can be used. Note, that the example uses the default `travel-sample` bucket which can be enabled from the couchbase UI with pre-defined scope and documents. - -```sql -CREATE DATABASE couchbase_vectorsource -WITH -engine='couchbasevector', -parameters={ - "connection_string": "couchbase://localhost", - "bucket": "travel-sample", - "user": "admin", - "password": "password", - "scope": "inventory" -}; -``` - -This handler is implemented using the `couchbase` library, the Python driver for Couchbase. - -The required arguments to establish a connection are as follows: -* `connection_string`: the connection string for the endpoint of the Couchbase server -* `bucket`: the bucket name to use when connecting with the Couchbase server -* `user`: the user to authenticate with the Couchbase server -* `password`: the password to authenticate the user with the Couchbase server -* `scope`: scopes are a level of data organization within a bucket. If omitted, will default to `_default` - -Note: The connection string expects either the couchbases:// or couchbase:// protocol. - - -If you are using Couchbase Capella, you can find the connection_string under the Connect tab. -It will also be required to whitelist the machine(s) that will be running MindsDB and database credentials will need to be created for the user. These steps can also be taken under the Connect tab. - - -## Usage - -Now, you can use the established connection to create a collection (or table in the context of MindsDB) in Couchbase and insert data into it: - -### Creating tables - -Now, you can use the established connection to create a collection (or table in the context of MindsDB) in Couchbase and insert data into it: - -```sql -CREATE TABLE couchbase_vectorsource.test_embeddings ( - SELECT embeddings - FROM mysql_datasource.test_embeddings -); -``` - - -`mysql_datasource` is another MindsDB data source that has been created by connecting to a MySQL database. The `test_embeddings` table in the `mysql_datasource` data source contains the embeddings that we want to store in Couchbase. - - -### Querying and searching - -You can query your collection (table) as shown below: - -```sql -SELECT * -FROM couchbase_vectorsource.test_embeddings; -``` - -To filter the data in your collection (table) by metadata, you can use the following query: - -```sql -SELECT * -FROM couchbase_vectorsource.test_embeddings -WHERE id = "some_id"; - -``` - -To perform a vector search, the following query can be used: - -```sql -SELECT * -FROM couchbase_vectorsource.test_embeddings -WHERE embeddings = ( - SELECT embeddings - FROM mysql_datasource.test_embeddings - LIMIT 1 -); -``` - -### Deleting records - -You can delete documents using `DELETE` just like in SQL. - - -```sql -DELETE FROM couchbase_vectorsource.test_embeddings -WHERE `metadata.test` = 'test1'; -``` - -### Dropping connection - -To drop the connection, use this command - -```sql -DROP DATABASE couchbase_vectorsource; -``` \ No newline at end of file diff --git a/docs/integrations/vector-db-integrations/milvus.mdx b/docs/integrations/vector-db-integrations/milvus.mdx deleted file mode 100644 index 46a95f2255e..00000000000 --- a/docs/integrations/vector-db-integrations/milvus.mdx +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Milvus -sidebarTitle: Milvus ---- - -This is the implementation of the Milvus handler for MindsDB. - -Milvus is an open-source and blazing fast vector database built for scalable similarity search. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Milvus to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection and Usage - -Visit the [Milvus page for details](https://milvus.io/docs/integration_with_mindsdb.md). diff --git a/docs/integrations/vector-db-integrations/pgvector.mdx b/docs/integrations/vector-db-integrations/pgvector.mdx deleted file mode 100644 index 5ac7c54c238..00000000000 --- a/docs/integrations/vector-db-integrations/pgvector.mdx +++ /dev/null @@ -1,122 +0,0 @@ ---- -title: PGVector -sidebarTitle: PGVector ---- - -This is the implementation of the PGVector for MindsDB. - -PGVector is an open-source vector similarity search for Postgres. It supports the following: - -- exact and approximate nearest neighbor search, -- L2 distance, inner product, and cosine distance, -- any language with a Postgres client, -- ACID compliance, point-in-time recovery, JOINs, and all of the other great features of Postgres. - -## Connection - -This handler uses `pgvector` Python library. - -To connect to a PGVector instance, use the following statement: - -```sql -CREATE DATABASE pvec -WITH - ENGINE = 'pgvector', - PARAMETERS = { - "host": "127.0.0.1", - "port": 5432, - "database": "postgres", - "user": "user", - "password": "password", - "distance": "cosine" - }; -``` - -The required arguments to establish a connection are the following: - -* `host`: The host name or IP address of the postgres instance. -* `port`: The port to use when connecting. -* `database`: The database to connect to. -* `user`: The user to connect as. -* `password`: The password to use when connecting. -* `distance`: It defines how the distance between vectors is calculated. Available methods include cosine (default), l1, l2, ip, hamming, jaccard. [Learn more here](https://github.com/pgvector/pgvector/blob/master/README.md). - -## Usage - -### Installing the pgvector extension - -where you have postgres installed run the following commands to install the pgvector extension - -`cd /tmp -git clone --branch v0.4.4 https://github.com/pgvector/pgvector.git -cd pgvector -make -make install` - -### Installing the pgvector python library -Ensure you install all from requirements.txt in the pgvector_handler folder - -### Creating a database connection in MindsDB - -You can create a database connection like you would for a regular postgres database, the only difference is that you need to specify the engine as `pgvector` - -```sql -CREATE DATABASE pvec -WITH - ENGINE = 'pgvector', - PARAMETERS = { - "host": "127.0.0.1", - "port": 5432, - "database": "postgres", - "user": "user", - "password": "password" - }; -``` - -You can insert data into a new collection like so - -```sql -CREATE TABLE pvec.embed - (SELECT embeddings FROM mysql_demo_db.test_embeddings -); - -CREATE ML_ENGINE openai -FROM openai -USING - api_key = 'your-openai-api-key'; - -CREATE MODEL openai_emb -PREDICT embedding -USING - engine = 'openai', - model_name='text-embedding-ada-002', - mode = 'embedding', - question_column = 'review'; - -create table pvec.itemstest ( -SELECT m.embedding AS embeddings, t.review content FROM mysql_demo_db.amazon_reviews t - join openai_emb m -); - -``` - -You can query a collection within your PGVector as follows: - -```sql -SELECT * -FROM pvec.embed -Limit 5; - -SELECT * -FROM pvec.itemstest -Limit 5; -``` - - -You can query on semantic search like so: - -```sql -SELECT * -FROM pvec3.items_test -WHERE embeddings = (select * from mindsdb.embedding) LIMIT 5; -``` diff --git a/docs/integrations/vector-db-integrations/pinecone.mdx b/docs/integrations/vector-db-integrations/pinecone.mdx deleted file mode 100644 index 40328e3dbdf..00000000000 --- a/docs/integrations/vector-db-integrations/pinecone.mdx +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: Pinecone -sidebarTitle: Pinecone ---- - -This is the implementation of the Pinecone for MindsDB. - -Pinecone is a vector database which is fully-managed, developer-friendly, and easily scalable. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Pinecone to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Pinecone. - -## Implementation - -This handler uses `pinecone-client` python library connect to a pinecone environment. - -The required arguments to establish a connection are: - -* `api_key`: the API key that can be found in your pinecone account - -These optional arguments are used with `CREATE TABLE` statements: - -* `dimension`: dimensions of the vectors to be stored in the index (default=8) -* `metric`: distance metric to be used for similarity search (default='cosine') -* `spec`: the spec of the index to be created. This is a dictionary that can contain the following keys: - * `cloud`: the cloud provider to use (default='aws') - * `region`: the region to use (default='us-east-1') - - -Only the creation of serverless indexes is supported at the moment when running `CREATE TABLE` statements. - - -## Limitations - -- [ ] `DROP TABLE` support -- [ ] Support for [namespaces](https://docs.pinecone.io/docs/namespaces) -- [ ] Display score/distance -- [ ] Support for creating/reading sparse values -- [ ] `content` column is not supported since it does not exist in Pinecone - -## Usage - -In order to make use of this handler and connect to an environment, use the following syntax: - -```sql -CREATE DATABASE pinecone_dev -WITH ENGINE = "pinecone", -PARAMETERS = { - "api_key": "..." -}; -``` - -You can query pinecone indexes (`temp` in the following examples) based on `id` or `search_vector`, but not both: - -```sql -SELECT * from pinecone_dev.temp -WHERE id = "abc" -LIMIT 1 -``` - -```sql -SELECT * from pinecone_dev.temp -WHERE search_vector = "[1,2,3,4,5,6,7,8]" -``` - -If you are using subqueries, make sure that the result is only a single row since the use of multiple search vectors is not allowed - -```sql -SELECT * from pinecone_database.temp -WHERE search_vector = ( - SELECT embeddings FROM sqlitetesterdb.test WHERE id = 10 -) -``` - -Optionally, you can filter based on metadata too: - -```sql -SELECT * from pinecone_dev.temp -WHERE id = "abc" AND metadata.hello < 100 -``` - -You can delete records using `id` or `metadata` like so: - -```sql -DELETE FROM pinecone_dev.temp -WHERE id = "abc" -``` - -Note that deletion through metadata is not supported in starter tier - -```sql -DELETE FROM pinecone_dev.temp -WHERE metadata.tbd = true -``` - -You can insert data into a new collection like so: - -```sql -CREATE TABLE pinecone_dev.temp ( -SELECT * FROM mysql_demo_db.temp LIMIT 10); -``` - -To update records, you can use insert statement. When there is a conflicting ID in pinecone index, the record is updated with new values. It might take a bit to see it reflected. - -```sql -INSERT INTO pinecone_test.testtable (id,content,metadata,embeddings) -VALUES ( - 'id1', 'this is a test', '{"test": "test"}', '[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]' -); -``` diff --git a/docs/integrations/vector-db-integrations/weaviate.mdx b/docs/integrations/vector-db-integrations/weaviate.mdx deleted file mode 100644 index 135941d8a73..00000000000 --- a/docs/integrations/vector-db-integrations/weaviate.mdx +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Weaviate -sidebarTitle: Weaviate ---- - -This is the implementation of the Weaviate for MindsDB. - -Weaviate is an open-source vector database. It allows you to store data objects and vector embeddings from your favorite ML-models, and scale seamlessly into billions of data objects. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Weaviate to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to Weaviate. - -## Implementation - -This handler uses `weaviate-client` python library connect to a weaviate instance. - -The required arguments to establish a connection are: - -* `weaviate_url`: url of the weaviate database -* `weaviate_api_key`: API key to authenticate with weaviate (in case of cloud instance). -* `persistence_directory`: directory to be used in case of local storage - - -### Creating connection - -In order to make use of this handler and connect to a Weaviate server in MindsDB, the following syntax can be used: - -```sql -CREATE DATABASE weaviate_datasource - WITH ENGINE = "weaviate", - PARAMETERS = { - "weaviate_url" : "https://sample.weaviate.network", - "weaviate_api_key": "api-key" -}; -``` - -```sql -CREATE DATABASE weaviate_datasource - WITH ENGINE = "weaviate", - PARAMETERS = { - "weaviate_url" : "https://localhost:8080", -}; -``` - -```sql -CREATE DATABASE weaviate_datasource - WITH ENGINE = "weaviate", - PARAMETERS = { - "persistence_directory" : "db_path", -}; -``` - -### Dropping connection - -To drop the connection, use this command - -```sql -DROP DATABASE weaviate_datasource; -``` - -### Creating tables - -To insert data from a pre-existing table, use `CREATE` - -```sql -CREATE TABLE weaviate_datascource.test -(SELECT * FROM sqlitedb.test); -``` -As weaviate currently doesn't support json field. -So, this creates another table for the "metadata" field and a reference is created in the original table which points to -its metadata entry. - -Weaviate follows GraphQL conventions where classes (which are table schemas) start with a capital letter and -properties start with a lowercase letter. - -So whenever we create a table, the table's name gets capitalized. - -### Dropping collections - -To drop a Weaviate table use this command - -```sql -DROP TABLE weaviate_datasource.tablename; -``` - -### Querying and selecting - -To query database using a search vector, you can use `search_vector` or `embeddings` in `WHERE` clause - -```sql -SELECT * from weaviate_datasource.test -WHERE search_vector = '[3.0, 1.0, 2.0, 4.5]' -LIMIT 10; -``` - -Basic query - -```sql -SELECT * from weaviate_datasource.test -``` - -You can use `WHERE` clause on dynamic fields like normal SQL - -```sql -SELECT * FROM weaviate_datasource.createtest -WHERE category = "science"; -``` - -### Deleting records - -You can delete entries using `DELETE` just like in SQL. - - -```sql -DELETE FROM weaviate_datasource.test -WHERE id IN (1, 2, 3); -``` - -Update is not supported by mindsdb vector database diff --git a/docs/logo/dark.svg b/docs/logo/dark.svg deleted file mode 100644 index 20d302da3a8..00000000000 --- a/docs/logo/dark.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/docs/logo/light.svg b/docs/logo/light.svg deleted file mode 100644 index 9504b73851e..00000000000 --- a/docs/logo/light.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/docs/mindsdb-connect.mdx b/docs/mindsdb-connect.mdx deleted file mode 100644 index 7910f9d53ef..00000000000 --- a/docs/mindsdb-connect.mdx +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: Connect -sidebarTitle: Introduction -icon: "house" ---- - -MindsDB enables connecting data from various data sources and operating on data without moving it from its source. Granting MindsDB access to data is the foundation for all other capabilities. - -* **Broad integration support**

-Seamlessly connect to databases, applications, and more. -* **Real-time data access**

-Work with the most up-to-date data without delays from batch processing. -* **No data movement required**

-Operate directly on data at the source. No copying, syncing, or ETL needed. - -This documentation includes the following content. - - - - These are all the data sources that can be connected to MindsDB. - Use MindsDB's SQL Editor or connect MindsDB to any SQL client. - - Use SQL to connect data to MindsDB. - - diff --git a/docs/mindsdb-fqe.mdx b/docs/mindsdb-fqe.mdx deleted file mode 100644 index 7fa2ff78041..00000000000 --- a/docs/mindsdb-fqe.mdx +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: MindsDB as a Federated Query Engine -sidebarTitle: MindsDB Query Engine -icon: "engine" ---- - -MindsDB supports federated querying, enabling users to access and analyze data across a wide variety of structured and unstructured data sources using SQL. - -

- -

- -## How Query Pushdown Works in MindsDB - -MindsDB acts as a federated query engine by translating and pushing down SQL queries to the native engines of connected data sources. Rather than retrieving data and processing queries within MindsDB, it delegates computation to the underlying data sources. This “pushdown” approach ensures: - -* High performance: Queries leverage the indexing and processing capabilities of the native engines. - -* Low resource usage: MindsDB avoids executing resource-heavy and high-latency operations within the query engine, preventing bottlenecks in CPU, memory, or network. - -## Query Translation Limits - -Each connected data source has its own SQL dialect, features, and constraints. While MindsDB SQL provides a unified interface, not all SQL expressions or data types can be translated across every database engine. In cases where a native data type or expression is not supported by the underlying engine: - -* The query is passed from MindsDB to the data source in its current form, with unsupported data types handled as strings. - -* If the data source does not support the syntax, it may return an error. - -* Errors originating from the underlying data source are passed through to the user to provide the most accurate context. - -## Cross-Database Join Limits - -MindsDB allows joining tables across disparate data sources. However, cross-database joins introduce complexity: - -* Pushdown can occur partially, not for all joined data sources. - -* Join conditions for a particular data source must be executable by its underlying database engine. - -## Recap - -MindsDB’s federated query engine enables seamless integration with diverse data systems, but effective use requires understanding the limitations of SQL translation and pushdown: - -* Pushdown is preferred to optimize performance and avoid resource strain. - -* Not all SQL constructs are translatable, especially for vector stores or non-relational systems. - -* Errors may occur when a connected data source cannot parse the generated query. - -* Workarounds include query decomposition, using simpler expressions, and avoiding unsupported joins or vector logic. - -Understanding these nuances helps users debug query errors more effectively and make full use of MindsDB’s federated query capabilities. diff --git a/docs/mindsdb-gui.mdx b/docs/mindsdb-gui.mdx deleted file mode 100644 index 385736aad17..00000000000 --- a/docs/mindsdb-gui.mdx +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: Navigating the MindsDB GUI -sidebarTitle: MindsDB GUI Overview ---- - -MindsDB offers a user-friendly graphical interface that allows users to execute SQL commands, view their outputs, and easily navigate connected data sources, projects, and their contents. - -Let's explore the features and usage of the MindsDB editor. - -## Accessing the MindsDB GUI Editor - -Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -

- -

- -## Exploring the MindsDB GUI Editor - -### Query Editor - -This is the primary component where users can input SQL commands and queries. It provides a code editor environment where users can write, edit, and execute SQL statements. - -It is located in the top center of the MindsDB GUI. - -

- -

- -You can open multiple query editor tabs by clicking the plus button next to the current tab, like this: - -

- -

- -### Results Viewer - -Once a query is executed, the results viewer displays the output of the query. It presents the results in a tabular format, showing rows and columns of data. - -It is located in the bottom center of the MindsDB GUI. - -

- -

- -MindsDB supports additional features such as the following: - -1. The [Data Insights](/sql/data-insights) feature provides useful data visualization charts. -2. The Export feature lets you export the query output as a CSV or Markdown file. - -### Object Explorer - -The object explorer provides an overview of the projects, models, views, connected data sources, and tables. - -

- -

- -Users can navigate through the available objects by expanding the tree structure items. Upon hovering over the tables, you can query their content using the provided `SELECT` statement, as below. - -

- -

- -### Model Progress Bar - -MindsDB provides a custom SQL statement to create and deploy models as virtual tables. Upon executing the [`CREATE MODEL`](/sql/create/model) statement, you can monitor the training progress at the bottom-left corner below the object explorer. - -

- -

- -Once the model is ready, its status updates to complete. - -

- -

- -### Add New Data Sources - -You can connect a data source to MindsDB by clicking the `Add` button and choosing `New Datasource`. It takes you to a page that lists all available data sources, including, databases, data warehouses, applications, and more. Here, you can search for a data source you want to connect to and follow the instructions. - -For more information, visit the **Data Sources** section of the docs. - -### Upload Files - -You can upload a file to MindsDB by clicking the `Add` button and choosing `Upload File`. It takes you to a form where you can upload a file and give it a name. - -For more information, visit [our docs here](/sql/create/file). - -### Upload Custom Models - -MindsDB offers a way to upload your custom model in the form of Python code and incorporate it into the MindsDB ecosystem. You can do that by clicking the `Add` button and choosing `Upload custom model`. - -For more information, visit [our docs here](/custom-model/byom). - - diff --git a/docs/mindsdb-objects.mdx b/docs/mindsdb-objects.mdx deleted file mode 100644 index 859f770d9fe..00000000000 --- a/docs/mindsdb-objects.mdx +++ /dev/null @@ -1,199 +0,0 @@ ---- -title: Naming Standards for MindsDB Objects -sidebarTitle: MindsDB Objects -icon: "puzzle-piece-simple" ---- - -MindsDB allows you to create and manage a variety of entities within its ecosystem. All MindsDB objects follow the same naming conventions to ensure consistency and compatibility across the platform. - -## MindsDB Entities - -The following entities can be created in MindsDB: - -* Databases → [CREATE DATABASE](https://docs.mindsdb.com/mindsdb_sql/sql/create/database) - -* Knowledge Bases (KBs) → [CREATE KNOWLEDGE_BASE](https://docs.mindsdb.com/mindsdb_sql/knowledge_bases/create) - -* Tables → [CREATE TABLE](https://docs.mindsdb.com/mindsdb_sql/sql/create/table) - -* Views → [CREATE VIEW](https://docs.mindsdb.com/mindsdb_sql/sql/create/view) - -* Projects → [CREATE PROJECT](https://docs.mindsdb.com/mindsdb_sql/sql/create/project) - -* Jobs → [CREATE JOB](https://docs.mindsdb.com/mindsdb_sql/sql/create/jobs) - -* Triggers → [CREATE TRIGGER](https://docs.mindsdb.com/mindsdb_sql/sql/create/trigger) - -* Agents → [CREATE AGENT](https://docs.mindsdb.com/mindsdb_sql/agents/agent_syntax) - -## General Naming Rules - -When creating these entities, the following conventions apply: - -* **Case-insensitive names** - - Object names are not sensitive to letter casing. For example: - - ```sql - CREATE VIEW my_view (...); -- creates "my_view" - CREATE VIEW My_View (...); -- also creates "my_view" - CREATE VIEW MY_VIEW (...); -- also creates "my_view" - ``` - - All names are automatically converted to lowercase. - -* **Allowed characters** - - Lowercase letters (`a–z`) - Numbers (`0–9`) - Underscores (`_`) - - Example: - - ```sql - CREATE AGENT my_agent345 (...); -- creates "my_agent345" - ``` - -* **Special characters** - - If you need special characters or spaces in object names, enclose them in backticks. - - ```sql - CREATE VIEW `my view` (...); -- creates “my view” - CREATE VIEW `my-view!` (...); -- creates “my-view!” - ``` - - However, names inside backticks must be lowercase. Using uppercase letters will result in an error because all object names must be in lowercase letters. - - ```sql - CREATE VIEW `My View` (...); -- error - ``` - - -When working with entities from a data source connected to MindsDB, their original names are preserved and are not subject to MindsDB naming rules. - -For example, if you connect a Snowflake data source that contains a table named `ANALYTICS_101` with a column named `Date_Time`, you must reference them exactly as they appear in the source, utilizing backticks, as shown below: - -```sql -SELECT `Date_Time` -FROM snowflake_data.`ANALYTICS_101`; -``` - - -## Backward Compatibility - -Older objects created with uppercase letters are still supported for backward compatibility. To reference them, wrap the name in backticks. - -```sql -SELECT * FROM `MyView`; -- selects from “MyView” -DROP VIEW `MyView`; -- deletes “MyView” -``` - -You cannot create new objects with uppercase letters. For example: - -```sql -CREATE VIEW `MyView` (...); -- error -``` - -## Examples - -Here are some practical examples: - -### Databases - -Note that when enclosing the object name in backticks, it preserves the case-sensitivity and special characters included in the name. Otherwise, the upper-case letters are automatically converted to lower-case letters. - -See the usage examples below. - -```sql -CREATE DATABASE my_database WITH …; -- creates my_database -SELECT * FROM my_database.table_name; -- selects from my_database -DROP DATABASE my_database; -- drops my_database - -CREATE DATABASE MY_DATABASE WITH …; -- creates my_database (note that upper-case letters are converted to lower-case letters) -SELECT * FROM my_database.table_name; -- selects from my_database -SELECT * FROM MY_DATABASE.table_name; -- selects from my_database -DROP DATABASE MY_DATABASE; -- drops my_database - -CREATE DATABASE `My-database` WITH …; -- creates My-database (note that the name must be enclosed in backticks because it contains a special character) -SELECT * FROM `My-database`.table_name; -- selects from My-database -DROP DATABASE `My-database`; -- drops My-database -``` - -```sql --- this works -CREATE DATABASE demodata WITH …; -SELECT * FROM demodata.table_name; -SELECT * FROM `demodata`.table_name; -DROP DATABASE demodata; - --- this works and converts all letters to lower-case -CREATE DATABASE demoData WITH …; -SELECT * FROM demoData ... -DROP DATABASE demoData; - --- this works and keeps upper/lower-case letters because the name is enclosed in backticks -CREATE DATABASE `DemoData` WITH …; -SELECT * FROM `DemoData` ... -DROP DATABASE `DemoData` ... -``` - -```sql -CREATE DATABASE DemoData WITH …; -- creates demodata -CREATE DATABASE `DemoData` WITH …; -- cannot create DemoData because demodata already exists -DROP DATABASE `DemoData`; -- cannot drop DemoData because DemoData does not exist -DROP DATABASE DemoData; -- drops demodata - -CREATE DATABASE `DemoData` WITH …; -- creates DemoData -CREATE DATABASE demodata WITH …; -- cannot create demodata because DemoData already exists -DROP DATABASE demodata; -- cannot drop demodata because demodata does not exist -DROP DATABASE `DemoData`; -- drops demodata -``` - -```sql -CREATE DATABASE demodata WITH …; -- creates demodata -SELECT * FROM DEMODATA.table_name; -- selects from demodata, because DEMODATA is converted to demodata -DROP DATABASE demodata; -- drops demodata - -CREATE DATABASE `DemoData` WITH …; -- creates DemoData -SELECT * FROM demodata.table_name; -- cannot select from demodata -SELECT * FROM `DemoData`.table_name; -- selects from DemoData -DROP DATABASE demodata; -- cannot drop demodata because demodata does not exist -DROP DATABASE `DemoData`; -- drops DemoData - -CREATE DATABASE `Dèmo data 2` WITH …; -SELECT * FROM `Dèmo data 2`.table_name; -DROP DATABASE `Dèmo data 2`; -``` - -### Views - -```sql -CREATE VIEW my_view (...); -- creates "my_view" -CREATE VIEW My_View (...); -- also creates "my_view" -CREATE VIEW `my view` (...); -- creates "my view" -CREATE VIEW `My_View` (...); -- error -``` - -If an older object named `My_View` exists, you can still use it: - -```sql -SELECT * FROM `My_View`; -- selects from “My_View” -DROP VIEW `My_View`; -- deletes “My_View” -``` - -### Agents - -```sql -CREATE AGENT my_agent USING ...; -- creates "my_agent" -CREATE AGENT My_Agent USING ...; -- also creates "my_agent" -CREATE AGENT `my agent 1` USING ...; -- creates "my agent 1" -CREATE AGENT `My agent 1` USING ...; -- error -``` - -If an older object named `My agent 1` exists, you can still use it: - -```sql -SELECT * FROM `My agent 1`; -- selects from “My agent 1” -DROP AGENT `My agent 1`; -- deletes “My agent 1” -``` diff --git a/docs/mindsdb-respond.mdx b/docs/mindsdb-respond.mdx deleted file mode 100644 index 9ea3ae8e7d3..00000000000 --- a/docs/mindsdb-respond.mdx +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: Respond -sidebarTitle: Introduction -icon: "house" ---- - -MindsDB enables generating insightful and accurate responses from unified data using natural language. Whether answering questions, powering applications, or enabling automations, responses are context-aware and grounded in real-time data. - -* **Natural language data queries**

-Ask questions in natural language and receive precise answers. - -* **AI-powered insights**

-Leverage integrated models to analyze, predict, and explain data in context. - -* **Actionable responses**

-Drive decisions and automations directly from query results. - -This documentation includes the following content. - - - - Deploy agents specialized in answering questions over connected and unified data. - Connect to MindsDB through MCP (Model Context Protocol) for seamless interaction. - - diff --git a/docs/mindsdb-unify.mdx b/docs/mindsdb-unify.mdx deleted file mode 100644 index 8eb2d07a9d6..00000000000 --- a/docs/mindsdb-unify.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Unify -sidebarTitle: Introduction -icon: "house" ---- - -MindsDB enables unifying data from structured and unstructured data sources into a single, queryable interface. This unified view allows seamless querying and model-building across all data without consolidation into one system. - -* **Federated query engine**

-Query across multiple data sources as if they were a single database. - -* **Structured and unstructured data support**

-Unify relational data, documents, vector data, and more in one place. - -* **No data transformation required**

-Use data in its native format without the need for preprocessing. - -This documentation includes the following content. - - - - Index and organize unstructured data for efficient retrieval. - Simplify data access by creating unified views across different sources. - - Organize views, knowledge bases, and models into projects. - Operate on data using functions. - - Schedule tasks with jobs. - Set up triggering events on data. - - diff --git a/docs/mindsdb.mdx b/docs/mindsdb.mdx deleted file mode 100644 index 6c91df7ac52..00000000000 --- a/docs/mindsdb.mdx +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: MindsDB, an AI Data Solution -sidebarTitle: Introduction -icon: "house" ---- - -MindsDB enables humans, AI, agents, and applications to get highly accurate answers across sprawled and large scale data sources. - -

- -

- -## Core Philosophy - -MindsDB is built around three fundamental capabilities that form the foundation of MindsDB, enabling seamless integration, organization, and utilization of data. - - - - Connect data from [hundreds of data sources](/integrations/data-overview) that integrate with MindsDB, including databases, data warehouses, applications, and vector databases. - - Learn more [here](/mindsdb-connect). - - - Unify and organize data from one or multiple (structured and unstructured) data sources, by creating - [knowledge bases](/mindsdb_sql/knowledge_bases/overview), [views](/mindsdb_sql/sql/create/view) and [jobs](/mindsdb_sql/sql/create/jobs). - - Learn more [here](/mindsdb-unify). - - - Generate accurate, context-aware responses from unified data using [agents](/mindsdb_sql/agents/agent) or [MCP API](/mcp/overview), making insights easily accessible across applications and teams. - - Learn more [here](/mindsdb-respond). - - - -## Install MindsDB - -MindsDB is an open-source server that can be deployed anywhere, including local machines and clouds, and customized to fit the purpose. - - * Use [MindsDB via Docker Desktop](/setup/self-hosted/docker-desktop). This is the fastest and recommended way to get started. - * Use [MindsDB via Docker](/setup/self-hosted/docker). This provides greater flexibility in customizing the MindsDB instance by rebuilding Docker images. - * Use [MindsDB via AWS Marketplace](/setup/cloud/aws-marketplace). This enables running MindsDB in cloud. - * Use [MindsDB via PyPI](/contribute/install). This option enables contributions to MindsDB. diff --git a/docs/mindsdb_sql/agents/agent.mdx b/docs/mindsdb_sql/agents/agent.mdx deleted file mode 100644 index d2e23033c3b..00000000000 --- a/docs/mindsdb_sql/agents/agent.mdx +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: MindsDB Agents -sidebarTitle: Overview ---- - -**MindsDB Agents** are part of the open-source MindsDB product and enable users to ask natural language questions over connected data sources. They provide intelligent querying capabilities across databases and knowledge bases directly within MindsDB. - -For organizations with advanced requirements, **Minds** is the enterprise counterpart to MindsDB Agents. Minds extends the same core concept with enhanced capabilities, scalability, context management, and production-grade features. - -## How MindsDB Agents Work - -MindsDB Agents are built on the Pydantic framework and follow a structured workflow to interpret and answer user questions over the connected data. - - - - -When a query is received, the agent: -* Builds a real-time data catalog generated dynamically and based on a sample of 5 rows from each connected data object -* Extracts prompts and user messages -* Prepares structured input for reasoning -This lightweight catalog enables the agent to understand available schemas and data types. - - - -Using the processed input, the agent: -* Determines which connected data sources are relevant -* Plans query execution steps -* Prepares SQL queries as needed -This stage ensures that the agent selects appropriate tables and avoids unnecessary exploration. - - - -The agent enters an iterative execution cycle: -* Executes queries against connected tables or knowledge bases -* Collects and evaluates results -* Adjusts queries if needed -This loop continues until sufficient relevant data is collected, up to a maximum of 20 queries. - - - -If execution errors occur: -* Errors are analyzed -* The agent attempts corrective adjustments -* Up to three accumulated errors are retained for context -This iterative correction improves answer accuracy within session constraints. - - - -Finally, the agent: -* Aggregates collected data -* Synthesizes a natural language or structured response -* Returns the answer based on the query format - - - - -## Recommended Usage - -To ensure optimal performance and accuracy, follow these guidelines. - -1. **Data Preparation** - - High-quality input significantly improves agent performance. - - Clean your data: - - * Remove irrelevant columns - * Filter unnecessary rows - * Normalize inconsistent formats - - Create views to simplify the agent's reasoning: - - * Filtered views: Keep only relevant tables, columns, and rows - * Aggregated views: Provide summary tables for frequent analytical queries - * Joined views: Combine tables that are commonly queried together - - Well-designed views reduce cognitive load on the agent. - -2. **Agent Data Setup** - - To maintain performance: - - * Limit connected objects (tables + knowledge bases) to 10 or fewer - * Ensure objects are relevant to the use case - * Provide descriptive context in the prompt - - Example of helpful context: - - * Describe data stored in connected tables or knowledge bases - * Clarify relationships between available data objects - * Specify expected output format - - The clearer the setup, the more accurate the agent's reasoning. - -3. **Querying an Agent** - - Agent outputs are non-deterministic, due to the nature of large language models. However, MindsDB provides mechanisms for limited output control. - - To receive a natural language answer: - - ```sql - SELECT answer - FROM agent_name - WHERE question = "What is the capital of France?"; - ``` - - This ensures the agent returns a human-readable response contained in the `answer` column. - - To enforce a specific output structure: - - ```sql - SELECT product_name, monthly_sales_count - FROM agent_name - WHERE question = "How many sales were there per product last month?"; - ``` - - The agent will format its response to match the defined columns. - - This enables: - - * Programmatic consumption - * Dashboard integration - * Downstream automation workflows - -## Usage Example - -Agents enable conversation with data, including structured and unstructured data connected to MindsDB. - -Connect your data to MindsDB by [connecting databases or applications](/integrations/data-overview) or [uploading files](/mindsdb_sql/sql/create/file). Users can opt for using [knowledge bases](/mindsdb_sql/knowledge_bases/overview) to store and retrieve data efficiently. - -Create an agent, passing the connected data and defining the underlying model. - -```sql -CREATE AGENT my_agent -USING - model = { - "provider": "openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123" - }, - data = { - "knowledge_bases": ["mindsdb.sales_kb", "mindsdb.orders_kb"], - "tables": ["postgres_conn.customers", "mysql_conn.products"] - }, - prompt_template=' - mindsdb.sales_kb stores sales analytics data - mindsdb.orders_kb stores order data - postgres_conn.customers stores customers data - mysql_conn.products stores products data - '; -``` - -Query an agent and ask question over the connected data. - -```sql -SELECT answer -FROM my_agent -WHERE question = 'What is the average number of orders per customers?'; - --- or - -SELECT customer_name, avg_number_of_orders -FROM my_agent -WHERE question = 'What is the average number of orders per customers?'; -``` - - -Follow [this doc page to learn more about the usage of agents](/mindsdb_sql/agents/agent_syntax). - - -## MindsDB Agents vs Minds: Feature Comparison - -Both MindsDB Agents (open-source) and Minds (enterprise) are designed to answer questions over data connected to MindsDB. - -The key difference lies in scope, scale, and advanced functionality: - -| Feature | MindsDB Agents (Open-Source) | Minds (Enterprise) | -|--------------------|-----------------------------------------------------------|-----------------------------------------------------| -| Data catalog | Built dynamically from sample data (5 rows per object) | Full data catalog with complete metadata | -| Context window | Limited | Extended | -| Error memory | Up to 3 accumulated errors | Extended memory and learning | -| Message history | Cleared with thread reset | Persistent across threads | -| Production controls| Basic | Advanced governance and controls | -| Scalability | Recommended ≤10 connected objects | Designed for complex, large-scale environments | - -MindsDB Agents enable querying over connected data sources that is good for: - -* Prototyping -* Small-to-medium complexity use cases -* Controlled data environments - -As complexity grows, Minds offers a seamless enterprise-grade upgrade path with expanded capabilities, memory, and performance. - -Consider upgrading to Minds if: - -* You require full data catalog visibility -* You need persistent conversation memory -* You are connecting many tables or knowledge bases -* Your workflows involve complex multi-step reasoning -* You require production-level governance and control - -Minds extends the same fundamental concept as MindsDB Agents, but is designed for enterprise-scale intelligence workflows. diff --git a/docs/mindsdb_sql/agents/agent_gui.mdx b/docs/mindsdb_sql/agents/agent_gui.mdx deleted file mode 100644 index f504790de42..00000000000 --- a/docs/mindsdb_sql/agents/agent_gui.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: How to Chat with Agents -sidebarTitle: Chat Interface ---- - -Agents enable conversation with data, including structured and unstructured data connected to MindsDB. - -MindsDB provides a chat interface that enables users to chat with their data. - -

- -

- -Select an agent from the list of existing agents, or create one if none exists yet. - -

- -

- -Now the chat interface is connected to this agent via [Agent2Agent Protocol](https://google.github.io/A2A/) and users can chat with the data connected to this agent. - -

- -

diff --git a/docs/mindsdb_sql/agents/agent_syntax.mdx b/docs/mindsdb_sql/agents/agent_syntax.mdx deleted file mode 100644 index 42597689170..00000000000 --- a/docs/mindsdb_sql/agents/agent_syntax.mdx +++ /dev/null @@ -1,433 +0,0 @@ ---- -title: How to Use Agents -sidebarTitle: Syntax ---- - -Agents enable conversation with data, including structured and unstructured data connected to MindsDB. - -## `CREATE AGENT` Syntax - -Here is the syntax for creating an agent: - -```sql -CREATE AGENT my_agent -USING - model = { - "provider": "openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "http://example.com", - "api_version": "2024-02-01" - }, - data = { - "knowledge_bases": ["project_name.kb_name", ...], - "tables": ["datasource_conn_name.table_name", ...] - }, - prompt_template='describe data', - timeout=10, - mode='text'; -``` - -It creates an agent that uses the defined model and has access to the connected data. - -```sql -SHOW AGENTS -WHERE name = 'my_agent'; -``` - - -Note that you can insert all tables from a connected data source and all knowledge bases from a project using the `*` syntax. - -```sql - ... - data = { - "knowledge_bases": ["project_name.*", ...], - "tables": ["datasource_conn_name.*", ...] - }, - ... -``` - - -### `model` - -This parameter defines the underlying language model, including: - -* `provider` -It is a required parameter. It defines the model provider from the list below. - -* `model_name` -It is a required parameter. It defines the model name from the list below. - -* `api_key` -It is an optional parameter (applicable to selected providers), which stores the API key to access the model. Users can provide it either in this `api_key` parameter, or using [environment variables](/mindsdb_sql/functions/from_env). - -* `base_url` -It is an optional parameter (applicable to selected providers), which stores the base URL for accessing the model. It is the root URL used to send API requests. - -* `api_version` -It is an optional parameter (applicable to selected providers), which defines the API version. - -The available models and providers include the following. - - - - -Available models: -- claude-3-opus-20240229 -- claude-3-sonnet-20240229 -- claude-3-haiku-20240307 -- claude-2.1 -- claude-2.0 -- claude-instant-1.2 - - - -Available models include all models accessible from Bedrock. - -Note that in order to use Bedrock as a model provider, you should ensure the following packages are installed: `langchain_aws` and `transformers`. - -The following parameters are specific to this provider: - -* `aws_region_name` is a required parameter. -* `aws_access_key_id` is a required parameter. -* `aws_secret_access_key` is a required parameter. -* `aws_session_token` is an optional parameter. It may be required depending on the AWS permissions setup. - - - -Available models: -- gemini-2.5-pro-preview-03-25 -- gemini-2.0-flash -- gemini-2.0-flash-lite -- gemini-1.5-flash -- gemini-1.5-flash-8b -- gemini-1.5-pro - - - -Available models: -- gemma -- llama2 -- mistral -- mixtral -- llava -- neural-chat -- codellama -- dolphin-mixtral -- qwen -- llama2-uncensored -- mistral-openorca -- deepseek-coder -- nous-hermes2 -- phi -- orca-mini -- dolphin-mistral -- wizard-vicuna-uncensored -- vicuna -- tinydolphin -- llama2-chinese -- openhermes -- zephyr -- nomic-embed-text -- tinyllama -- openchat -- wizardcoder -- phind-codellama -- starcoder -- yi -- orca2 -- falcon -- starcoder2 -- wizard-math -- dolphin-phi -- nous-hermes -- starling-lm -- stable-code -- medllama2 -- bakllava -- codeup -- wizardlm-uncensored -- solar -- everythinglm -- sqlcoder -- nous-hermes2-mixtral -- stable-beluga -- yarn-mistral -- samantha-mistral -- stablelm2 -- meditron -- stablelm-zephyr -- magicoder -- yarn-llama2 -- wizard-vicuna -- llama-pro -- deepseek-llm -- codebooga -- mistrallite -- dolphincoder -- nexusraven -- open-orca-platypus2 -- all-minilm -- goliath -- notux -- alfred -- megadolphin -- xwinlm -- wizardlm -- duckdb-nsql -- notus - - - -Available models: -- gpt-3.5-turbo -- gpt-3.5-turbo-16k -- gpt-3.5-turbo-instruct -- gpt-4 -- gpt-4-32k -- gpt-4-1106-preview -- gpt-4-0125-preview -- gpt-4.1 -- gpt-4.1-mini -- gpt-4o -- o4-mini -- o3-mini -- o1-mini - - - -Available models: -- microsoft/phi-3-mini-4k-instruct -- mistralai/mistral-7b-instruct-v0.2 -- writer/palmyra-med-70b -- mistralai/mistral-large -- mistralai/codestral-22b-instruct-v0.1 -- nvidia/llama3-chatqa-1.5-70b -- upstage/solar-10.7b-instruct -- google/gemma-2-9b-it -- adept/fuyu-8b -- google/gemma-2b -- databricks/dbrx-instruct -- meta/llama-3_1-8b-instruct -- microsoft/phi-3-medium-128k-instruct -- 01-ai/yi-large -- nvidia/neva-22b -- meta/llama-3_1-70b-instruct -- google/codegemma-7b -- google/recurrentgemma-2b -- google/gemma-2-27b-it -- deepseek-ai/deepseek-coder-6.7b-instruct -- mediatek/breeze-7b-instruct -- microsoft/kosmos-2 -- microsoft/phi-3-mini-128k-instruct -- nvidia/llama3-chatqa-1.5-8b -- writer/palmyra-med-70b-32k -- google/deplot -- meta/llama-3_1-405b-instruct -- aisingapore/sea-lion-7b-instruct -- liuhaotian/llava-v1.6-mistral-7b -- microsoft/phi-3-small-8k-instruct -- meta/codellama-70b -- liuhaotian/llava-v1.6-34b -- nv-mistralai/mistral-nemo-12b-instruct -- microsoft/phi-3-medium-4k-instruct -- seallms/seallm-7b-v2.5 -- mistralai/mixtral-8x7b-instruct-v0.1 -- mistralai/mistral-7b-instruct-v0.3 -- google/paligemma -- google/gemma-7b -- mistralai/mixtral-8x22b-instruct-v0.1 -- google/codegemma-1.1-7b -- nvidia/nemotron-4-340b-instruct -- meta/llama3-70b-instruct -- microsoft/phi-3-small-128k-instruct -- ibm/granite-8b-code-instruct -- meta/llama3-8b-instruct -- snowflake/arctic -- microsoft/phi-3-vision-128k-instruct -- meta/llama2-70b -- ibm/granite-34b-code-instruct - - - -Available models: -- palmyra-x5 -- palmyra-x4 - - - - -Users can define the model for the agent choosing one of the following options. - -**Option 1.** Use the `model` parameter to define the specification. - -```sql -CREATE AGENT my_agent -USING - model = { - "provider": "openai", - "model_name" : "got-4o", - "api_key": "sk-abc123", - "base_url": "https://example.com/", - "api_version": "2024-02-01" - }, - ... -``` - -**Option 2.** Define the default model in the [MindsDB configuration file](/setup/custom-config). - -If you define `default_llm` in the configuration file, you do not need to provide the `model` parameter when creating an agent. If provide both, then the values from the `model` parameter are used. - - -You can define the default models in the Settings of the MindsDB Editor GUI. - - -```bash -"default_llm": { - - "provider": "openai", - "model_name" : "got-4o", - "api_key": "sk-abc123", - "base_url": "https://example.com/", - "api_version": "2024-02-01" - -} -``` - -### `data` - -This parameter stores data connected to the agent, including knowledge bases and data sources connected to MindsDB. - -The following parameters store the list of connected data. - -* `knowledge_bases` stores the list of [knowledge bases](/mindsdb_sql/knowledge_bases/overview) to be used by the agent. - -* `tables` stores the list of tables from data sources connected to MindsDB. - -### `prompt_template` - -This parameter stores instructions for the agent. - -It is recommended to provide data description of the data sources listed in the `knowledge_bases` and `tables` parameters to help the agent locate relevant data for answering questions. - -### `timeout` - -This parameter defines the time the agent can take to come back with an answer. - -For example, when the `timeout` parameter is set to 10, the agent has 10 seconds to return an answer. If the agent takes longer than 10 seconds, it aborts the process and comes back with an answer indicating its failure to return an answer within the defined time interval. - -### `mode` - -This parameter defines the agent's response style, allowing users to partially control the output format. Supported values include `text` and `sql`. - -When set, the agent will tailor its responses to match the specified format. Note that the agent may still adapt its output when necessary to ensure clarity or correctness. - -## `SELECT FROM AGENT` Syntax - -Query an agent to generate responses to questions. - -```sql -SELECT answer -FROM my_agent -WHERE question = 'What is the average number of orders per customers?'; - --- or - -SELECT customer_name, avg_number_of_orders -FROM my_agent -WHERE question = 'What is the average number of orders per customers?'; -``` - -You can redefine the agent's parameters at the query time as below. - -```sql -SELECT answer -FROM my_agent -WHERE question = 'What is the average number of orders per customers?'; -USING - model = { - "provider": "openai", - "model_name" : "gpt-4.1", - "api_key": "sk-abc123" - }, - data = { - "knowledge_bases": ["project_name.kb_name", ...], - "tables": ["datasource_conn_name.table_name", ...] - }, - prompt_template='describe data', - timeout=10; -``` - -The `USING` clause may contain any combination of parameters from the `CREATE AGENT` command, depending on which parameters users want to update for the query. - -For example, users may want to check the performance of other models to decide which model works better for their use case. - -```sql -SELECT answer -FROM my_agent -WHERE question = 'What is the average number of orders per customers?'; -USING - model = { - "provider": "google", - "model_name" : "gemini-2.5-flash", - "api_key": "ABc123" - }; -``` - -## `ALTER AGENT` Syntax - -Update existing agents with new data, model, or prompt. - -```sql -ALTER AGENT my_agent -USING - model = { - "provider": "openai", - "model_name" : "gpt-4.1", - "api_key": "sk-abc123", - "base_url": "http://example.com", - "api_version": "2024-02-01" - }, - data = { - "knowledge_bases": ["project_name.kb_name", ...], - "tables": ["datasource_conn_name.table_name", ...] - }, - prompt_template='describe data'; -``` - -Note that all parameters are optional. Users can update any combination of parameters. - - -See detailed descriptions of parameters in the [`CREATE AGENT` section](/mindsdb_sql/agents/agent_syntax#create-agent-syntax). - - -Here is how to connect new data to an agent. - -```sql -ALTER AGENT my_agent -USING - data = { - "knowledge_bases": ["mindsdb.sales_kb"], - "tables": ["mysql_db.car_sales", "mysql_db.car_info"] - }; -``` - -And here is how to update a model used by the agent. - -```sql -ALTER AGENT my_agent -USING - model = { - "provider": "openai", - "model_name" : "gpt-4.1", - "api_key": "sk-abc123" - }; -``` - -## `DROP AGENT` Syntax - -Here is the syntax for deleting an agent: - -```sql -DROP AGENT my_agent; -``` diff --git a/docs/mindsdb_sql/connect/connect-mariadb-skysql.mdx b/docs/mindsdb_sql/connect/connect-mariadb-skysql.mdx deleted file mode 100644 index 46e864c73df..00000000000 --- a/docs/mindsdb_sql/connect/connect-mariadb-skysql.mdx +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: MariaDB SkySQL Setup Guide with MindsDB -sidebarTitle: MariaDB SkySQL ---- - -Find more information on MariaDB Sky SQL [here](https://cloud.MariaDB.com/) - -## 1. Select your service for MindsDB - -If you haven't already, identify the service to be enabled with MindsDB and make -sure it is running. Otherwise, skip to step 2. - -## 2. Add MindsDB to your service Allowlist - -Access to MariaDB SkySQL services is [restricted on a per-service basis](https://mariadb.com/products/skysql/docs/security/firewalls/ip-allowlist-services/). Add the following IP addresses to allow MindsDB to connect to your MariaDB service, do this by clicking on the cog icon and navigating to Security Access. In the dialog, input as prompted – one by one – the following IPs: - -``` -18.220.205.95 -3.19.152.46 -52.14.91.162 -``` - -## 3. Download your service .pem file - -A [certificate authority chain](https://mariadb.com/products/skysql/docs/connect/connection-parameters-portal/#certificate-authority-chain) (.pem file) must be provided for proper TLS certificate validation. - -From your selected service, click on the world globe icon (Connect to service). In the Login Credentials section, click Download. The `aws_skysql_chain.pem` -file will download onto your machine. - -## 4. Publically Expose your service .pem File - -Select secure storage for the `aws_skysql_chain.pem` file that allows a working public URL or localpath. For example, you can store it in an S3 bucket. - -## 5. Link MindsDB to your MariaDB SkySQL Service - -To print the query template, go to MindsDB Editor and add a new data source from the Connect tab, choose MariaDB SkySQL from the list. Fill in the values and run a query to complete the setup. - -Here are the codes: - - - - ```sql Template - CREATE DATABASE maria_datasource --- display name for the database - WITH ENGINE = 'MariaDB', --- name of the MindsDB handler - PARAMETERS = { - "host": " ", --- host IP address or URL - "port": , --- port used to make TCP/IP connection - "database": " ", --- database name - "user": " ", --- database user - "password": " ", --- database password - "ssl": True/False, --- optional, the `ssl` parameter value indicates whether SSL is enabled (`True`) or disabled (`False`) - "ssl_ca": { --- optional, SSL Certificate Authority - "path": " " --- either "path" or "url" - }, - "ssl_cert": { --- optional, SSL certificates - "url": " " --- either "path" or "url" - }, - "ssl_key": { --- optional, SSL keys - "path": " " --- either "path" or "url" - } - }; - ``` - - ```sql Example for MariaDB SkySQL Service - CREATE DATABASE skysql_datasource - WITH ENGINE = 'MariaDB', - PARAMETERS = { - "host": "mindsdbtest.mdb0002956.db1.skysql.net", - "port": 5001, - "database": "mindsdb_data", - "user": "DB00007539", - "password": "password", - --- here, the SSL certificate is required - "ssl-ca": { - "url": "https://mindsdb-web-builds.s3.amazonaws.com/aws_skysql_chain.pem" - } - }; - ``` - - - -

- -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/dbeaver.mdx b/docs/mindsdb_sql/connect/dbeaver.mdx deleted file mode 100644 index 3f7bb40341f..00000000000 --- a/docs/mindsdb_sql/connect/dbeaver.mdx +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: MindsDB and DBeaver -sidebarTitle: DBeaver ---- - -DBeaver is a database tool that allows you to connect to and work with various database engines. You can download it [here](https://dbeaver.io/). - -## Data Setup - -First, create a new database connection in DBeaver by clicking the icon, as shown below. - -

- -

- -Next, choose the MySQL database engine and click the _Next_ button. - - -If you have multiple `MySQL` options, choose the `Driver for MySQL8 and later`. - - -

- -

- -Now it's time to fill in the connection details. - -

- -

- -Use the following parameters: - -* `127.0.0.1` or `localhost` for the host name. If you run MindsDB in cloud, specify the host name accordingly. - -* `47335` for the port, which is the port of the MySQL API exposed by MindsDB. Learn more about [available APIs here](/setup/environment-vars#mindsdb-apis). - -* `mindsdb` for the database name. - -* `mindsdb` for the user name, unless specified differently in the [`config.json` file](/setup/custom-config#auth). - -* `` for the password, unless specified differently in the [`config.json` file](/setup/custom-config#auth). - -Now we are ready to test the connection. - -## Testing the Connection - -Click on the `Test Connection...` button to check if all the provided data allows you to connect to MindsDB. - -On success, you should see the message, as below. - -

- -

- -## Let's Run Some Queries - -To finally make sure that our MindsDB database connection works, let's run some queries. - -```sql -SHOW FULL DATABASES; -``` - -On execution, we get: - -```sql -+----------------------+---------+--------+ -| Database | TYPE | ENGINE | -+----------------------+---------+--------+ -| information_schema | system | [NULL] | -| mindsdb | project | [NULL] | -| files | data | files | -+----------------------+---------+--------+ -``` - -Here is how it looks in DBeaver: - -

- -

- - -How to [whitelist MindsDB Cloud IP address](/faqs/whitelist-ips)? - - -## What's Next? - -Now that you are all set, we recommend you to check out our [Tutorials](/sql/tutorials/house-sales-forecasting) section where you'll find various examples of -regression, classification, and time series predictions with MindsDB or [Community Tutorials](/tutorials) list. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/deepnote.mdx b/docs/mindsdb_sql/connect/deepnote.mdx deleted file mode 100644 index 06840ebc2d8..00000000000 --- a/docs/mindsdb_sql/connect/deepnote.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: MindsDB and Deepnote -sidebarTitle: Deepnote ---- - - - -We have worked with the team at Deepnote, and built native integration to Deepnote notebooks. -Please check: - -- [Deepnote Demo Guide](https://deepnote.com/project/Machine-Learning-With-SQL-8GDF7bc7SzKlhBLorqoIcw/%2Fmindsdb_demo.ipynb) -- [Deepnote Integration Docs](https://docs.deepnote.com/integrations/mindsdb) - - - -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/grafana.mdx b/docs/mindsdb_sql/connect/grafana.mdx deleted file mode 100644 index a07a72982f1..00000000000 --- a/docs/mindsdb_sql/connect/grafana.mdx +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: MindsDB and Grafana -sidebarTitle: Grafana ---- - -[Grafana](https://grafana.com/) is an open-source analytics and interactive visualization web application -that allows users to ingest data from various sources, query this data, and display it on customizable charts for easy analysis. - -## How to Connect - -To begin, set up Grafana by following one of the methods outlined in the [Grafana Installation Documentation](https://grafana.com/docs/grafana/latest/setup-grafana/installation/#supported-operating-systems). - -Once Grafana is successfully set up in your environment, navigate to the Connections section, click on Add new connection, and select the MySQL plugin -, as shown below. - -

- -

- -Now it's time to fill in the connection details. - -

- -

- -There are three options, as below. - - - - You can connect to your local MindsDB. To do that, please use the connection details below: - - ``` - Host: `127.0.0.1:47335` - Username: `mindsdb` - Password: - Database: - ``` - - - -Now we are ready to Save & test the connection. - -## Testing the Connection - -Click on the `Save & test` button to check if all the provided data -allows you to connect to MindsDB. - -On success, you should see the message, as below. - -

- -

- -## Examples - -### Querying - -To verify the functionality of our MindsDB database connection, -you can query data in the Explore view. Use the text edit mode to compose your queries. - -```sql -SHOW FULL DATABASES; -``` - -On execution, we get: - -

- -

- -### Visual Query Builder - -Now you can build a dashboard with a MindsDB database connection. -Example query : - -```sql -CREATE DATABASE mysql_demo_db -WITH ENGINE = "mysql", -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" - }; - -SELECT * FROM mysql_demo_db.air_passengers; -``` - -On execution, we get: - -

- -

- - -How to [whitelist MindsDB Cloud IP address](/faqs/whitelist-ips)? - - -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/jupysql.mdx b/docs/mindsdb_sql/connect/jupysql.mdx deleted file mode 100644 index 32e3f302562..00000000000 --- a/docs/mindsdb_sql/connect/jupysql.mdx +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: MindsDB and Jupyter Notebooks -sidebarTitle: Jupyter Notebooks ---- - -Jupysql - full SQL client on Jupyter. It allows you to run SQL and plot large -datasets in Jupyter via a %sql and %%sql magics. It also allows users to plot -the data directly from the DB ( via %sqlplot magics). - -Jupysql facilitates working with databases and Jupyter. You can download it -[here](https://github.com/ploomber/jupysql) or run a `pip install jupysql`. - - -You can consider an option to interact with MindsDB directly from [MySQL CLI](/connect/mysql-client/) or [Postgres CLI](/connect/postgres-client/). - - -## How to Connect - -#### Pre-requisite: -- Make sure you have *jupysql* installed: To install it, run `pip install jupysql` -- Make sure you have *pymysql* installed: To install it, run `pip install pymysql` - - - You can easily verify the installation of jupysql by running this code: - - ```python - %load_ext sql - ``` - - This command loads the package and allows you to run cell magics on top of Jupyter. - - And for pymysql, validate by running this command: - - ```python - import pymysql - ``` - - -Please follow the instructions below to connect to your MindsDB via Jupysql and Jupyter. - - - - You can use the Python code below to connect your Jupyter notebook (or lab) to Local MindsDB database (via Jupysql). - Load the extension: - - ```python - %load_ext sql - ``` - - Connect to your DB: - - ```python - %sql mysql+pymysql://mindsdb:@127.0.0.1:47335/mindsdb - ``` - - Testing connection by listing the existing tables (pure SQL): - - ```python - %sql show tables - ``` - - Please note that we use the following connection details: - - - Username is `mindsdb` - - Password is left empty - - Host is `127.0.0.1` - - Port is `47335` - - Database name is `mindsdb` - - *Docker* - connecting to docker might have a different port. - - - - -Create a database connection and execute the code above. On success, only the last command which lists the tables will output. The expected output is: - -```bash -* mysql+pymysql://mindsdb:***@127.0.0.1:47335/mindsdb -2 rows affected. -Tables_in_mindsdb -models -``` - -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/metabase.mdx b/docs/mindsdb_sql/connect/metabase.mdx deleted file mode 100644 index 8dfadc96b98..00000000000 --- a/docs/mindsdb_sql/connect/metabase.mdx +++ /dev/null @@ -1,138 +0,0 @@ ---- -title: MindsDB and Metabase -sidebarTitle: Metabase ---- - -Metabase is open-source software that facilitates data analysis. It lets you visualize your data easily and intuitively. Now that MindsDB supports the MySQL binary protocol, you can connect it to Metabase and see the forecasts by creating and training the models. - -For more information, visit [Metabase](https://www.metabase.com/). - -## Setup - -### MindsDB - -Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -### Metabase - -Now, let's set up the Metabase by following one of the approaches presented on -[the Metabase Open Source Edition page](https://www.metabase.com/start/oss/). - -Here, we use the -[.jar approach](https://www.metabase.com/docs/latest/installation-and-operation/running-the-metabase-jar-file.html) -for Metabase. - -## How to Connect - -Follow the steps below to connect your MindsDB to Metabase. - -1. Open your Metabase and navigate to the _Admin settings_ by clicking the cog - in the bottom left corner. -2. Once there, click on _Databases_ in the top navigation bar. -3. Click on _Add database_ in the top right corner. -4. Fill in the form using the following data: - - - - ```text - Database type: `MySQL` - Display name: `MindsDB` - Host: `localhost` - Port: `47335` - Database name: `mindsdb` - Username: `mindsdb` - Password: *leave it empty* - ``` - - - - -

- -

- -5. Click on _Save_. - -Now you're connected! - -

- -

- -## Example - -Now that the connection between MindsDB and Metabase is established, let's do -some examples. - -Most of the SQL statements that you usually run in your -[MindsDB SQL Editor](/connect/mindsdb_editor/) can be run in Metabase as well. -Let's start with something easy. - -On your Metabase's home page, click on _New > SQL query_ in the top right corner -and then, select your MindsDB database. - -Let's execute the following command in the editor. - -```sql -SHOW TABLES; -``` - -On execution, we get: - -

- -

- -Please note that creating a -[database connection](/sql/tutorials/home-rentals/#connecting-the-data) using -the `CREATE DATABASE` statement fails because of the curly braces (`{}`) being -used by JDBC as the escape sequences. - -```sql -CREATE DATABASE example_db - WITH ENGINE = "postgres", - PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo" -}; -``` - -On execution, we get: - -

- -

- -You can overcome this issue using the -[MindsDB SQL Editor](/connect/mindsdb_editor/) to create a database. - -Now, getting back to the Metabase, let's run some queries on the database -created with the help of the [MindsDB SQL Editor](/connect/mindsdb_editor/). - -```sql -SELECT * -FROM example_db.demo_data.home_rentals -LIMIT 10; -``` - -On execution, we get: - -

- -

- -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/mindsdb_editor.mdx b/docs/mindsdb_sql/connect/mindsdb_editor.mdx deleted file mode 100644 index f6168383512..00000000000 --- a/docs/mindsdb_sql/connect/mindsdb_editor.mdx +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: MindsDB SQL Editor -sidebarTitle: MindsDB SQL Editor ---- - -MindsDB provides a SQL Editor, so you don't need to download additional SQL clients to connect to MindsDB. - -## How to Use the MindsDB SQL Editor - -There are two ways you can use the Editor, as below. - - - - - After setting up the MindsDB using [Docker](/setup/self-hosted/docker), or pip - on - [Linux](/setup/self-hosted/pip/linux)/[Windows](/setup/self-hosted/pip/windows)/[MacOS](/setup/self-hosted/pip/macos), - or pip via [source code](/setup/self-hosted/pip/source), go to your terminal and - execute the following: - - ```bash - python -m mindsdb - ``` - - On execution, we get: - - ```bash - ... - 2022-05-06 14:07:04,599 - INFO - - GUI available at http://127.0.0.1:47334/ - ... - ``` - - Immediately after, your browser automatically opens the MindsDB SQL Editor. In - case if it doesn't, visit the URL - [`http://127.0.0.1:47334/`](http://127.0.0.1:47334/) in your browser of - preference. - - - - - -Here is a sneak peek of the MindsDB SQL Editor: -![GUI](/assets/cloud/gui_query.png) - -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/mysql-client.mdx b/docs/mindsdb_sql/connect/mysql-client.mdx deleted file mode 100644 index c0952063e1d..00000000000 --- a/docs/mindsdb_sql/connect/mysql-client.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: MindsDB and MySQL CLI -sidebarTitle: MySQL CLI ---- - -MindsDB provides a powerful MySQL API that allows users to connect to it -using the MySQL Command Line Client. - -Please note that connecting to MindsDB's MySQL API is the same as connecting to -a MySQL database. Find more information on MySQL CLI -[here](https://dev.mysql.com/doc/refman/8.0/en/mysql.html). - - -By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - -```bash -python -m mindsdb --api http,mysql,postgres -``` - -If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - -```bash -python -m mindsdb --no_studio -``` - - -## How to Connect - -To connect MindsDB in MySQL, use the `mysql` client program: - -```bash -mysql -h [hostname] --port [TCP/IP port number] -u [user] -p [password] -``` - -Here is the command that allows you to connect to MindsDB. - -```bash -mysql -h 127.0.0.1 --port 47335 -u mindsdb -``` - -On execution, we get: - -```bash -Welcome to the MariaDB monitor. Commands end with ";" or "\g". -Server version: 5.7.1-MindsDB-1.0 (MindsDB) - -Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. - -MySQL [(none)]> -``` - -## What's Next? - -Now that you are all set, we recommend you check out our [Use Cases](/use-cases/overview) section, where you'll find various examples of regression, classification, time series, and NLP predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on [MindsDB database structure](/sql/table-structure/). Also, don't miss out on the remaining pages from the **MindsDB SQL** section, as they explain a common SQL syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/sql-alchemy.mdx b/docs/mindsdb_sql/connect/sql-alchemy.mdx deleted file mode 100644 index 138d114358b..00000000000 --- a/docs/mindsdb_sql/connect/sql-alchemy.mdx +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: MindsDB and SQL Alchemy -sidebarTitle: SQL Alchemy ---- - -SQL Alchemy is a Python SQL toolkit, that provides object-relational mapping features for the Python programming language. - -SQL Alchemy facilitates working with databases and Python. You can download it [here](https://www.sqlalchemy.org/) or run a `pip install sqlalchemy`. - - -You can consider an option to interact with MindsDB directly from [MySQL CLI](/connect/mysql-client/) or [Postgres CLI](/connect/postgres-client/). - - -## How to Connect - -Please follow the instructions below to connect your MindsDB to SQL Alchemy. - - - - You can use the Python code below to connect your MindsDB database to SQL Alchemy. - - Make sure you have the *pymysql* module installed before executing the Python code. To install it, run the `pip install pymysql` command. - - ```python - from sqlalchemy import create_engine - - user = 'mindsdb' - password = '' - host = '127.0.0.1' - port = 47335 - database = '' - - def get_connection(): - return create_engine( - url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(user, password, host, port, database) - ) - - if __name__ == '__main__': - try: - engine = get_connection() - engine.connect() - print(f"Connection to the {host} for user {user} created successfully.") - except Exception as ex: - print("Connection could not be made due to the following error: \n", ex) - ``` - - Please note that we use the following connection details: - - - Username is `mindsdb` - - Password is left empty - - Host is `127.0.0.1` - - Port is `47335` - - Database name is left empty - - To create a database connection, execute the code above. On success, the following output is expected: - - ```bash - Connection to the 127.0.0.1 for user mindsdb created successfully. - ``` - - - - -
- - - - The Sqlachemy `create_engine` is lazy. This implies any human error when - entering the connection details would be undetectable until an action becomes - necessary, such as when calling the `execute` method to execute SQL commands. - - -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - -Have fun! diff --git a/docs/mindsdb_sql/connect/tableau.mdx b/docs/mindsdb_sql/connect/tableau.mdx deleted file mode 100644 index 57d43c357ff..00000000000 --- a/docs/mindsdb_sql/connect/tableau.mdx +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: MindsDB and Tableau -sidebarTitle: Tableau ---- - -Tableau lets you visualize your data easily and intuitively. Now that MindsDB -supports the MySQL binary protocol, you can connect it to Tableau and see the -forecasts. - -## How to Connect - -Follow the steps below to connect your MindsDB to Tableau. - -First, create a new workbook in Tableau and open the _Connectors_ tab in the -_Connect to Data_ window. - -

- -

- -Next, choose _MySQL_ and provide the details of your MindsDB connection, such as -the IP, port, and database name. Optionally, you can provide a username and -password. Then, click _Sign In_. - -

- -

- -Here are the connection parameters: - -```text -Host: `localhost` -Port: `47335` -Database name: `mindsdb` -Username: `mindsdb` -Password: *leave it empty* -``` - - -You can [set up the authetication with user and password in the config file](/setup/custom-config#auth). - - -Now you're connected! - -## Overview of MindsDB in Tableau - -The content of your MindsDB is visible in the right-side pane. - -

- -

- -All the predictors are listed under the _Table_ section. You can also switch -between the integrations, such as _mindsdb_ or _files_, in the _Database_ -section using the drop-down. - -

- -

- -Now, let's run some examples! - -## Examples - -### Example 1 - -Previewing one of the tables from the _mysql_ integration: - -

- -

- -### Example 2 - -There is one technical limitation. Namely, we cannot join tables from different -databases/integrations in Tableau. To overcome this challenge, you can use -either views or custom SQL queries. - -- Previewing a view that joins a data table with a predictor table: - -

- -

- -- Using a custom SQL query by clicking the _New Custom SQL_ button in the - right-side pane: - -

- -

- -## What's Next? - -Now that you are all set, we recommend you check out our **Tutorials** and -**Community Tutorials** sections, where you'll find various examples of -regression, classification, and time series predictions with MindsDB. - -To learn more about MindsDB itself, follow the guide on -[MindsDB database structure](/sql/table-structure/). Also, don't miss out on the -remaining pages from the **SQL API** section, as they explain a common SQL -syntax with examples. - - -**From Our Community** - -Check out the articles and video guides created by our community: - -- Article on [Predicting & Visualizing Hourly Electricity Demand in the US with MindsDB and Tableau](https://teslimodus.medium.com/predicting-visualizing-hourly-electricity-demand-in-the-us-with-mindsdb-and-tableau-126d1c74d860) - by [Teslim Odumuyiwa](https://teslimodus.medium.com/) - -- Article on [Predicting & Visualizing Petroleum Production with MindsDB and Tableau](https://dev.to/tesprogram/predicting-visualizing-petroleum-production-with-mindsdb-and-tableau-373f) - by [Teslim Odumuyiwa](https://github.com/Tes-program) - -- Article on [Predicting & Visualizing Gas Prices with MindsDB and Tableau](https://dev.to/tesprogram/predicting-visualizing-gas-prices-with-mindsdb-and-tableau-d1p) - by [Teslim Odumuyiwa](https://github.com/Tes-program) - -- Article on [How To Visualize MindsDB Predictions with Tableau](https://dev.to/ephraimx/how-to-visualize-mindsdb-predictions-with-tableau-2bpd) - by [Ephraimx](https://dev.to/ephraimx) - -- Video guide on [Connecting MindsDB to Tableau](https://www.youtube.com/watch?v=eUiBVrm85v4) - by [Alissa Troiano](https://github.com/alissatroiano) - -- Video guide on [Visualizing prediction result in Tableau](https://youtu.be/4aio-8kNbOo) by - [Teslim Odumuyiwa](https://github.com/Tes-program) - - -Have fun! - diff --git a/docs/mindsdb_sql/functions/custom_functions.mdx b/docs/mindsdb_sql/functions/custom_functions.mdx deleted file mode 100644 index c8bd4f19e92..00000000000 --- a/docs/mindsdb_sql/functions/custom_functions.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Bring Your Own Function -sidebarTitle: BYOF ---- - -Custom functions provide advanced means of manipulating data. Users can upload custom functions written in Python to MindsDB and apply them to data. - -## How It Works - -You can upload your custom functions via the MindsDB editor by clicking `Add` and `Upload custom functions`, like this: - -

- -

- -Here is the form that needs to be filled out in order to bring your custom functions to MindsDB: - -

- -

- -Let's briefly go over the files that need to be uploaded: - -* The Python file stores an implementation of your custom functions. Here is the sample format: - - ```py - def function_name_1(a:type, b:type) -> type: - - return x - - def function_name_2(a:type, b:type, c:type) -> type: - - return x - ``` - - - Note that if the input and output types are not set, then `str` is used by default. - - - - ```py - def add_integers(a:int, b:int) -> int: - return a+b - ``` - - -* The optional requirements file, or `requirements.txt`, stores all dependencies along with their versions. Here is the sample format: - - ```sql - dependency_package_1 == version - dependency_package_2 >= version - dependency_package_3 >= verion, < version - ... - ``` - - - ```sql - pandas - scikit-learn - ``` - - -Once you upload the above files, please provide the name for a storage collection. - -Let's look at an example. - -## Example - -We upload the custom functions, as below: - -

- -

- -Here we upload the `functions.py` file that stores an implementation of the functions and the `requirements.txt` file that stores all the dependencies. We named the storage collection as `custom_functions`. - -Now we can use the functions as below: - -```sql -SELECT functions.add_integers(sqft, 1) AS added_one, sqft -FROM example_db.home_rentals -LIMIT 1; -``` - -Here is the output: - -```sql -+-----------+------+ -| added_one | sqft | -+-----------+------+ -| 918 | 917 | -+-----------+------+ -``` diff --git a/docs/mindsdb_sql/functions/from_env.mdx b/docs/mindsdb_sql/functions/from_env.mdx deleted file mode 100644 index 1cdd88a0aa2..00000000000 --- a/docs/mindsdb_sql/functions/from_env.mdx +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: The FROM_ENV() Function -sidebarTitle: FROM_ENV() ---- - -MindsDB provides the `FROM_ENV()` function that lets users pull values from the environment variables into MindsDB. - -## Usage - -Here is how to use the `FROM_ENV()` function. - -```sql -FROM_ENV("MDB_MY_ENV_VAR") -``` - -Note that due to security concerns, **only the environment variables with name starting with `MDB_` can be extracted with the `from_env()` function**. - -Learn more about [MindsDB variables here](/mindsdb_sql/functions/variables). diff --git a/docs/mindsdb_sql/functions/llm_function.mdx b/docs/mindsdb_sql/functions/llm_function.mdx deleted file mode 100644 index 1a6aacc0dd8..00000000000 --- a/docs/mindsdb_sql/functions/llm_function.mdx +++ /dev/null @@ -1,166 +0,0 @@ ---- -title: The LLM() Function -sidebarTitle: LLM() ---- - -MindsDB provides the `LLM()` function that lets users incorporate the LLM-generated output directly into the data queries. - -## Prerequisites - -The `LLM()` function requires a large language model, which can be defined in the following ways: - -- By setting the `default_llm` parameter in the [MindsDB configuration file](/setup/custom-config#default-llm). -- By saving the default model in the MindsDB Editor under Settings. -- By defining the environment variables as below, choosing one of the available model providers. - - - - - Here are the environment variables for the OpenAI provider: - - ``` - LLM_FUNCTION_MODEL_NAME - LLM_FUNCTION_TEMPERATURE - LLM_FUNCTION_MAX_RETRIES - LLM_FUNCTION_MAX_TOKENS - LLM_FUNCTION_BASE_URL - OPENAI_API_KEY - LLM_FUNCTION_API_ORGANIZATION - LLM_FUNCTION_REQUEST_TIMEOUT - ``` - - Note that the values stored in the environment variables are specific for each provider. - - - - Here are the environment variables for the Anthropic provider: - - ``` - LLM_FUNCTION_MODEL_NAME - LLM_FUNCTION_TEMPERATURE - LLM_FUNCTION_MAX_TOKENS - LLM_FUNCTION_TOP_P - LLM_FUNCTION_TOP_K - LLM_FUNCTION_DEFAULT_REQUEST_TIMEOUT - LLM_FUNCTION_API_KEY - LLM_FUNCTION_BASE_URL - ``` - - Note that the values stored in the environment variables are specific for each provider. - - - - Here are the environment variables for the LiteLLM provider: - - ``` - LLM_FUNCTION_MODEL_NAME - LLM_FUNCTION_TEMPERATURE - LLM_FUNCTION_API_BASE - LLM_FUNCTION_MAX_RETRIES - LLM_FUNCTION_MAX_TOKENS - LLM_FUNCTION_TOP_P - LLM_FUNCTION_TOP_K - ``` - - Note that the values stored in the environment variables are specific for each provider. - - - - Here are the environment variables for the Ollama provider: - - ``` - LLM_FUNCTION_BASE_URL - LLM_FUNCTION_MODEL_NAME - LLM_FUNCTION_TEMPERATURE - LLM_FUNCTION_TOP_P - LLM_FUNCTION_TOP_K - LLM_FUNCTION_REQUEST_TIMEOUT - LLM_FUNCTION_FORMAT - LLM_FUNCTION_HEADERS - LLM_FUNCTION_NUM_PREDICT - LLM_FUNCTION_NUM_CTX - LLM_FUNCTION_NUM_GPU - LLM_FUNCTION_REPEAT_PENALTY - LLM_FUNCTION_STOP - LLM_FUNCTION_TEMPLATE - ``` - - Note that the values stored in the environment variables are specific for each provider. - - - - Here are the environment variables for the Nvidia NIMs provider: - - ``` - LLM_FUNCTION_BASE_URL - LLM_FUNCTION_MODEL_NAME - LLM_FUNCTION_TEMPERATURE - LLM_FUNCTION_TOP_P - LLM_FUNCTION_REQUEST_TIMEOUT - LLM_FUNCTION_FORMAT - LLM_FUNCTION_HEADERS - LLM_FUNCTION_NUM_PREDICT - LLM_FUNCTION_NUM_CTX - LLM_FUNCTION_NUM_GPU - LLM_FUNCTION_REPEAT_PENALTY - LLM_FUNCTION_STOP - LLM_FUNCTION_TEMPLATE - LLM_FUNCTION_NVIDIA_API_KEY - ``` - - Note that the values stored in the environment variables are specific for each provider. - - - - -**OpenAI-compatible model providers** can be used like OpenAI models. - -There is a number of OpenAI-compatible model providers including OpenRouter or vLLM. To use models via these providers, users need to define the base URL and the API key of the provider. - -Here is an example of using OpenRouter. - -``` -LLM_FUNCTION_MODEL_NAME = "mistralai/devstral-small-2505" -LLM_FUNCTION_BASE_URL = "https://openrouter.ai/api/v1" -OPENAI_API_KEY = "openrouter-api-key" -``` - - -## Usage - -You can use the `LLM()` function to simply ask a question and get an answer. - -```sql -SELECT LLM('How many planets are there in the solar system?'); -``` - -Here is the output: - -```sql -+------------------------------------------+ -| llm | -+------------------------------------------+ -| There are 8 planets in the solar system. | -+------------------------------------------+ -``` - -Moreover, you can use the `LLM()` function with your data to swiftly complete tasks such as text generation or summarization. - -```sql -SELECT - comment, - LLM('Describe the comment''s category in one word: ' || comment) AS category -FROM example_db.user_comments; -``` - -Here is the output: - -```sql -+--------------------------+----------+ -| comment | category | -+--------------------------+----------+ -| I hate tacos | Dislike | -| I want to dance | Desire | -| Baking is not a big deal | Opinion | -+--------------------------+----------+ -``` diff --git a/docs/mindsdb_sql/functions/standard-functions.mdx b/docs/mindsdb_sql/functions/standard-functions.mdx deleted file mode 100644 index 2109e429c01..00000000000 --- a/docs/mindsdb_sql/functions/standard-functions.mdx +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: Standard Functions -sidebarTitle: Standard Functions ---- - -MindsDB supports standard SQL functions via DuckDB and MySQL engines. - -## DuckDB Functions - -MindsDB executes functions on the underlying DuckDB engine. Therefore, [all DuckDB functions](https://duckdb.org/docs/stable/sql/functions/overview) are supported within MindsDB out of the box. - -* [Aggregate Functions](https://duckdb.org/docs/stable/sql/functions/aggregates) -* [Array Functions](https://duckdb.org/docs/stable/sql/functions/array) -* [Bitstring Functions](https://duckdb.org/docs/stable/sql/functions/bitstring) -* [Blob Functions](https://duckdb.org/docs/stable/sql/functions/blob) -* [Date Format Functions](https://duckdb.org/docs/stable/sql/functions/dateformat) -* [Date Functions](https://duckdb.org/docs/stable/sql/functions/date) -* [Date Part Functions](https://duckdb.org/docs/stable/sql/functions/datepart) -* [Enum Functions](https://duckdb.org/docs/stable/sql/functions/enum) -* [Interval Functions](https://duckdb.org/docs/stable/sql/functions/interval) -* [Lambda Functions](https://duckdb.org/docs/stable/sql/functions/lambda) -* [List Functions](https://duckdb.org/docs/stable/sql/functions/list) -* [Map Functions](https://duckdb.org/docs/stable/sql/functions/map) -* [Nested Functions](https://duckdb.org/docs/stable/sql/functions/nested) -* [Numeric Functions](https://duckdb.org/docs/stable/sql/functions/numeric) -* [Pattern Matching](https://duckdb.org/docs/stable/sql/functions/pattern_matching) -* [Regular Expressions](https://duckdb.org/docs/stable/sql/functions/regular_expressions) -* [Struct Functions](https://duckdb.org/docs/stable/sql/functions/struct) -* [Text Functions](https://duckdb.org/docs/stable/sql/functions/text) -* [Time Functions](https://duckdb.org/docs/stable/sql/functions/time) -* [Timestamp Functions](https://duckdb.org/docs/stable/sql/functions/timestamp) -* [Timestamp with Time Zone Functions](https://duckdb.org/docs/stable/sql/functions/timestamptz) -* [Union Functions](https://duckdb.org/docs/stable/sql/functions/union) -* [Utility Functions](https://duckdb.org/docs/stable/sql/functions/utility) -* [Window Functions](https://duckdb.org/docs/stable/sql/functions/window_functions) - -## MySQL Functions - -MindsDB executes MySQL-style functions on the underlying DuckDB engine. The following functions have been adapted to MySQL-style functions. - -String functions: - -* [`CHAR`](https://dev.mysql.com/doc/refman/8.4/en/string-functions.html#function_char) -* [`FORMAT`](https://dev.mysql.com/doc/refman/8.4/en/string-functions.html#function_format) -* [`INSTR`](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_instr) -* [`LENGTH`](https://dev.mysql.com/doc/refman/8.4/en/string-functions.html#function_length) -* [`LOCATE`](https://dev.mysql.com/doc/refman/8.4/en/string-functions.html#function_locate) -* [`SUBSTRING_INDEX`](https://dev.mysql.com/doc/refman/8.4/en/string-functions.html#function_substring-index) -* [`UNHEX`](https://dev.mysql.com/doc/refman/8.4/en/string-functions.html#function_unhex) - -Date and time functions: - -* [`ADDDATE`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_adddate) -* [`ADDTIME`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_addtime) -* [`CONVERT_TZ`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_convert-tz) -* [`CURDATE`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_curdate) -* [`CURTIME`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_curtime) -* [`DATE_ADD`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_date-add) -* [`DATE_FORMAT`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_date-format) -* [`DATE_SUB`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_date-sub) -* [`DATEDIFF`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_datediff) -* [`DAYNAME`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayname) -* [`DAYOFMONTH`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofmonth) -* [`DAYOFWEEK`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofweek) -* [`DAYOFYEAR`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofyear) -* [`EXTRACT`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_extract) -* [`FROM_DAYS`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_from-days) -* [`FROM_UNIXTIME`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_from-unixtime) -* [`GET_FORMAT`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_get-format) -* [`TIMESTAMPDIFF`](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_timestampdiff) - -Other functions: - -* [`REGEXP_SUBSTR`](https://dev.mysql.com/doc/refman/8.4/en/regexp.html#function_regexp-substr) -* [`SHA2`](https://dev.mysql.com/doc/refman/8.4/en/encryption-functions.html#function_sha2) diff --git a/docs/mindsdb_sql/functions/to_markdown_function.mdx b/docs/mindsdb_sql/functions/to_markdown_function.mdx deleted file mode 100644 index 079cc791b94..00000000000 --- a/docs/mindsdb_sql/functions/to_markdown_function.mdx +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: The TO_MARKDOWN() Function -sidebarTitle: TO_MARKDOWN() ---- - -MindsDB provides the `TO_MARKDOWN()` function that lets users extract the content of their documents in markdown by simply specifying the document path or URL. This function is especially useful for passing the extracted content of documents through LLMs or for storing them in a [Knowledge Base](/mindsdb_sql/agents/knowledge-bases). - -## Configuration - -The `TO_MARKDOWN()` function supports different file formats and methods of passing documents into it, as well as an LLM required for processing documents. - -### Supported File Formats - -The `TO_MARKDOWN()` function supports PDF, XML, and Nessus file formats. The documents can be provided from URLs, file storage, or Amazon S3 storage. - -### Supported LLMs - -The `TO_MARKDOWN()` function requires an LLM to process the document content into the Markdown format. - -The supported LLM providers include: - -- OpenAI -- Azure OpenAI -- Google - - -The model you select must support multi-modal inputs, that is, both images and text. For example, OpenAI’s gpt-4o is a supported multi-modal model. - - -User can provide an LLM using one of the below methods: - -1. Set the default model in the Settings of MindsDB Editor. -2. Set the default model in the [MindsDB configuration file](/setup/custom-config#default-llm). -3. Use environment variables defined below to set an LLM specifically for the `TO_MARKDOWN()` function. - - The `TO_MARKDOWN_FUNCTION_PROVIDER` environment variable defines the selected provider, which is one of `openai`, `azure_openai`, or `google`. - - - - Here are the environment variables for the OpenAI provider: - - ``` - TO_MARKDOWN_FUNCTION_API_KEY (required) - TO_MARKDOWN_FUNCTION_MODEL_NAME - TO_MARKDOWN_FUNCTION_TEMPERATURE - TO_MARKDOWN_FUNCTION_MAX_RETRIES - TO_MARKDOWN_FUNCTION_MAX_TOKENS - TO_MARKDOWN_FUNCTION_BASE_URL - TO_MARKDOWN_FUNCTION_API_ORGANIZATION - TO_MARKDOWN_FUNCTION_REQUEST_TIMEOUT - ``` - - - - - Here are the environment variables for the Azure OpenAI provider: - - ``` - TO_MARKDOWN_FUNCTION_API_KEY (required) - TO_MARKDOWN_FUNCTION_BASE_URL (required) - TO_MARKDOWN_FUNCTION_API_VERSION (required) - TO_MARKDOWN_FUNCTION_MODEL_NAME - TO_MARKDOWN_FUNCTION_TEMPERATURE - TO_MARKDOWN_FUNCTION_MAX_RETRIES - TO_MARKDOWN_FUNCTION_MAX_TOKENS - TO_MARKDOWN_FUNCTION_API_ORGANIZATION - TO_MARKDOWN_FUNCTION_REQUEST_TIMEOUT - ``` - - - - Here are the environment variables for the Google provider: - - ``` - TO_MARKDOWN_FUNCTION_API_KEY - TO_MARKDOWN_FUNCTION_MODEL_NAME - TO_MARKDOWN_FUNCTION_TEMPERATURE - TO_MARKDOWN_FUNCTION_MAX_TOKENS - TO_MARKDOWN_FUNCTION_REQUEST_TIMEOUT - ``` - - - -## Usage - -You can use the `TO_MARKDOWN()` function to extract the content of your documents in markdown format. The arguments for this function are: - -- `file_path_or_url`: The path or URL of the document you want to extract content from. - - - - - The following example shows how to use the `TO_MARKDOWN()` function with a PDF document from [Amazon S3 storage connected to MindsDB](/integrations/data-integrations/amazon-s3). - - ```sql - SELECT TO_MARKDOWN(public_url) FROM s3_datasource.files; - ``` - - Here are the steps for passing files from Amazon S3 into TO_MARKDOWN(). - - 1. Connect Amazon S3 to MindsDB following [this instruction](/integrations/data-integrations/amazon-s3). - 2. The `public_url` of the file is generated in the `s3_datasource.files` table upon connecting the Amazon S3 data source to MindsDB. - 3. Upon running the above query, the `public_url` of the file is selected from the `s3_datasource.files` table. - - - - - - The following example shows how to use the `TO_MARKDOWN()` function with a PDF document from URL. - - ```sql - SELECT TO_MARKDOWN('https://www.princexml.com/howcome/2016/samples/invoice/index.pdf'); - ``` - - Here is the output: - - ```sql - +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | to_markdown | - +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ```markdown | - | # Invoice | - | | - | YesLogic Pty. Ltd. | - | 7 / 39 Bouverie St | - | Carlton VIC 3053 | - | Australia | - | | - | www.yeslogic.com | - | ABN 32 101 193 560 | - | | - | Customer Name | - | Street | - | Postcode City | - | Country | - | | - | Invoice date: | Nov 26, 2016 | - | --- | --- | - | Invoice number: | 161126 | - | Payment due: | 30 days after invoice date | - | | - | | Description | From | Until | Amount | | - | |---------------------------|-------------|-------------|------------| | - | | Prince Upgrades & Support | Nov 26, 2016 | Nov 26, 2017 | USD $950.00 | | - | | Total | | | USD $950.00 | | - | | - | Please transfer amount to: | - | | - | Bank account name: | Yes Logic Pty Ltd | - | --- | --- | - | Name of Bank: | Commonwealth Bank of Australia (CBA) | - | Bank State Branch (BSB): | 063010 | - | Bank State Branch (BSB): | 063010 | - | Bank State Branch (BSB): | 063019 | - | Bank account number: | 13201652 | - | Bank SWIFT code: | CTBAAU2S | - | Bank address: | 231 Swanston St, Melbourne, VIC 3000, Australia | - | | - | The BSB number identifies a branch of a financial institution in Australia. When transferring money to Australia, the BSB number is used together with the bank account number and the SWIFT code. Australian banks do not use IBAN numbers. | - | | - | www.yeslogic.com | - | ``` | - +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - ``` - - - - -The content of each PDF page is intelligently extracted by first assessing how visually complex the page is. Based on this assessment, the system decides whether traditional text parsing is sufficient or if the page should be processed using an LLM. - -### Usage with Knowledge Bases - -You can also use the `TO_MARKDOWN()` function to extract content from documents and store it in a [Knowledge Base](/mindsdb_sql/agents/knowledge-bases). This is particularly useful for creating a Knowledge Base from a collection of documents. - -```sql -INSERT INTO my_kb ( - SELECT - HASH('https://www.princexml.com/howcome/2016/samples/invoice/index.pdf') as id, - TO_MARKDOWN('https://www.princexml.com/howcome/2016/samples/invoice/index.pdf') as content -) diff --git a/docs/mindsdb_sql/functions/variables.mdx b/docs/mindsdb_sql/functions/variables.mdx deleted file mode 100644 index 7405261920b..00000000000 --- a/docs/mindsdb_sql/functions/variables.mdx +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: Variables -sidebarTitle: Variables ---- - -MindsDB supports the usage of variables. Users can save values of API keys or other frequently used values and pass them as variables when creating knowledge bases, agents, or other MindsDB object. - -## Usage - -Here is how to create variables in MindsDB. - -* Create variables using `SET` and save values either using the [`from_env()` function](/mindsdb_sql/functions/from_env) or directly. - -```sql -SET @my_env_var = from_env("MDB_MY_ENV_VAR") - -SET @my_value = "123456" -``` - -* Use variables to pass parameters when creating objects in MindsDB. - -Here is an example for [knowledge bases](/mindsdb_sql/knowledge_bases/overview). - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - embedding_model = { - "provider": "openai", - "model_name" : "text-embedding-3-large", - "api_key": @my_env_var - }, - ...; -``` diff --git a/docs/mindsdb_sql/knowledge_bases/alter.mdx b/docs/mindsdb_sql/knowledge_bases/alter.mdx deleted file mode 100644 index 4484ab49e3e..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/alter.mdx +++ /dev/null @@ -1,102 +0,0 @@ ---- -title: How to Alter Existing Knowledge Bases -sidebarTitle: Alter KB ---- - -The `ALTER KNOWLEDGE_BASE` command enables users to modify the configuration of the existing knowledge base without the need to recreate it. - -This document lists parameters that can be altered, explains the process and the effect on the existing knowledge base. - -## `ALTER KNOWLEDGE_BASE` Syntax - -Here is the syntax used to alter the existing knowledge base. - -```sql -ALTER KNOWLEDGE_BASE -USING - = , - ...; -``` - -The following parameters can be altered: - -* `embedding_model` - - Users can alter only the API key of the provider used for the embedding model, while users cannot alter the provider and the model itself because it would be incompatible with the already embedded content that is stored in a knowledge base. - - ```sql - ALTER KNOWLEDGE_BASE my_kb - USING - embedding_model = { 'api_key': 'new-api-key' }; - ``` - - Upon altering the API key of the embedding model’s provider, ensure that the new API key has access to the same embedding model so that the knowledge base can continue to function without issues. - -* `reranking_model` - - Users can turn off reranking by setting `reranking_model = false`, or change the provider, API key, and model used for reranking. - - ```sql - ALTER KNOWLEDGE_BASE my_kb - USING - reranking_model = { ‘provider’: ‘new_provider’, ‘model_name’: ‘new_model’, 'api_key': 'new-api-key' }; - - ALTER KNOWLEDGE_BASE my_kb - USING - reranking_model = false; - ``` - - Upon updating the reranking model, the knowledge base will use the newly defined reranking model when reranking results, provided that reranking is turned on. - -* `content_columns` - - Users can change the content columns. - - ```sql - ALTER KNOWLEDGE_BASE my_kb - USING - content_columns=['content_col1', 'conten_col2', ...]; - ``` - - Upon changing the content columns, all the previously inserted content stays unchanged. Now the knowledge base will be embedding content from columns defined in the most recent call to `ALTER KNOWLEDGE_BASE`. - -* `metadata_columns` - - Users can change the metadata columns, overriding the existing metadata columns. - - ```sql - ALTER KNOWLEDGE_BASE my_kb - USING - metadata_columns=['metadata_col1', 'metadata_col2', ...]; - ``` - - Upon changing the metadata columns: - - All metadata fields are stored in the knowledge base. No data is removed. - - Users can filter only by metadata fields defined in the most recent call to `ALTER KNOWLEDGE_BASE`. - - To be able to filter by all metadata fields, include them in the list as below. - - ```sql - ALTER KNOWLEDGE_BASE my_kb - USING - metadata_columns=[‘existing_metadata_fields’, ..., 'new_metadata_fields', ...]; - ``` - -* `id_column` - - Users can change the ID column. - - ```sql - ALTER KNOWLEDGE BASE my_kb - USING - id_column='my_id'; - ``` - - Upon changing the ID column, users must keep in mind that inserting data with an already existing ID value will update the existing row and not create a new one. - -* `storage` - - Users cannot update the underlying vector database of the existing knowledge base. - -* `preprocessing` - - Users can modify the [`preprocessing` parameters as defined here](/mindsdb_sql/knowledge_bases/insert_data#chunking-data). diff --git a/docs/mindsdb_sql/knowledge_bases/create.mdx b/docs/mindsdb_sql/knowledge_bases/create.mdx deleted file mode 100644 index f2fd9ede060..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/create.mdx +++ /dev/null @@ -1,569 +0,0 @@ ---- -title: How to Create Knowledge Bases -sidebarTitle: Create KB ---- - -A knowledge base is an advanced system that organizes information based on semantic meaning rather than simple keyword matching. It integrates embedding models, reranking models, and vector stores to enable context-aware data retrieval. - -## `CREATE KNOWLEDGE_BASE` Syntax - -Here is the syntax for creating a knowledge base: - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - embedding_model = { - "provider": "openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-..." - }, - reranking_model = { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-..." - }, - storage = my_vector_store.storage_table, - metadata_columns = ['date', 'creator', ...], - content_columns = ['review', 'content', ...], - id_column = 'id'; -``` - -Upon execution, it registers `my_kb` and associates the specified models and storage. `my_kb` is a unique identifier of the knowledge base within MindsDB. - -Here is how to list all knowledge bases: - -```sql -SHOW KNOWLEDGE_BASES; -``` - - -Users can use the variables and the [`from_env()` function](/mindsdb_sql/functions/from_env) to pass parameters when creating knowledge bases. - - - -As MindsDB stores objects, such as models or knowledge bases, inside [projects](/mindsdb_sql/sql/create/project), you can create a knowledge base inside a custom project. - -```sql -CREATE PROJECT my_project; - -CREATE KNOWLEDGE_BASE my_project.my_kb -USING - ... -``` - - -### Supported LLMs - -Below is the list of all language models supported for the `embedding_model` and `reranking_model` parameters. - -#### `provider = 'openai'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from OpenAI in Settings of the MindsDB GUI. - -Furthermore, users can select `Custom OpenAI API` from the dropdown and use models from any OpenAI-compatible API. - - -When choosing `openai` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the OpenAI model to be used. -* `api_key` stores the OpenAI API key. - -Learn more about the [OpenAI integration with MindsDB here](/integrations/ai-engines/openai). - -#### `provider = 'openai_azure'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from Azure OpenAI in Settings of the MindsDB GUI. - - -When choosing `openai_azure` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the OpenAI model to be used. -* `api_key` stores the OpenAI API key. -* `base_url` stores the base URL of the Azure instance. -* `api_version` stores the version of the Azure instance. - - -Users need to log in to their Azure OpenAI instance to retrieve all relevant parameter values. Next, click on `Explore Azure AI Foundry portal` and go to `Models + endpoints`. Select the model and copy the parameter values. - - -#### `provider = 'google'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from Google in Settings of the MindsDB GUI. - - -When choosing `google` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the Google model to be used. -* `api_key` stores the Google API key. - -Learn more about the [Google Gemini integration with MindsDB here](/integrations/ai-engines/google_gemini). - -#### `provider = 'bedrock'` - -This provider is supported for both `embedding_model` and `reranking_model`. - -When choosing `bedrock` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the model available via Amazon Bedrock. -* `aws_access_key_id` stores a unique identifier associated with your AWS account, used to identify the user or application making requests to AWS. -* `aws_region_name` stores the name of the AWS region you want to send your requests to (e.g., `"us-west-2"`). -* `aws_secret_access_key` stores the secret key associated with your AWS access key ID. It is used to sign your requests securely. -* `aws_session_token` is an optional parameter that stores a temporary token used for short-term security credentials when using AWS Identity and Access Management (IAM) roles or temporary credentials. - -#### `provider = 'snowflake'` - -This provider is supported for both `embedding_model` and `reranking_model`. - -When choosing `snowflake` as the model provider, users should choose one of the available models from [Snowflake Cortex AI](https://www.snowflake.com/en/product/features/cortex/) and define the following model parameters. - -* `model_name` stores the name of the model available via Snowflake Cortex AI. -* `api_key` stores the Snowflake Cortex AI API key. -* `account_id` stores the Snowflake account ID. - - - -Follow the below steps to generate the API key. - -1. Generate a key pair according to [this instruction](https://docs.snowflake.com/en/user-guide/key-pair-auth) as below. - - * Execute these commands in the console: - - ```bash - # generate private key - openssl genrsa 2048 | openssl pkcs8 -topk8 -inform PEM -out rsa_key.p8 -nocrypt - # generate public key - openssl rsa -in rsa_key.p8 -pubout -out rsa_key.pub - ``` - - * Save the public key, that is, the content of rsa_key.pub, into your database user: - - ```sql - ALTER USER my_user SET RSA_PUBLIC_KEY = "" - ``` - -2. Verify the key pair with the database user. - - * Install `snowsql` following [this instruction](https://docs.snowflake.com/en/user-guide/snowsql-install-config). - - * Execute this command in the console: - - ```bash - snowsql -a -u my_user --private-key-path rsa_key.p8 - ``` - -3. Generate JWT token. - - * Download the Python script from [Snowflake's Developer Guide for Authentication](https://docs.snowflake.com/en/developer-guide/sql-api/authenticating). Here is a [direct download link](https://docs.snowflake.com/en/_downloads/aeb84cdfe91dcfbd889465403b875515/sql-api-generate-jwt.py). - - * Ensure to have the PyJWT module installed that is required for running the script. - - * Run the script using this command: - - ```bash - sql-api-generate-jwt.py --account --user my_user --private_key_file_path rsa_key.p8 - ``` - - This command returns the JWT token, which is used in the `api_key` parameter for the `snowflake` provider. - - - -#### `provider = 'ollama'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from Ollama in Settings of the MindsDB GUI. - - -When choosing `ollama` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the model to be used. -* `base_url` stores the base URL of the Ollama instance. - -### `embedding_model` - -The embedding model is a required component of the knowledge base. It stores specifications of the embedding model to be used. - -Users can define the embedding model choosing one of the following options. - -**Option 1.** Use the `embedding_model` parameter to define the specification. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - embedding_model = { - - "provider": "azure_openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01" - - }, - ... -``` - -**Option 2.** Define the default embedding model in the [MindsDB configuration file](/setup/custom-config). - - -You can define the default models in the Settings of the MindsDB Editor GUI. - - - -Note that if you define [`default_embedding_model` in the configuration file](/setup/custom-config#default_embedding_model), you do not need to provide the `embedding_model` parameter when creating a knowledge base. If provide both, then the values from the `embedding_model` parameter are used. - -When using `default_embedding_model` from the configuration file, the knowledge base saves this model internally. Therefore, when changing `default_embedding_model` in the configuration file to a different one after the knowledge base is created, it does not affect the already created knowledge bases. - - -```bash -"default_embedding_model": { - - "provider": "azure_openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01" - -} -``` - -The embedding model specification includes: - -* `provider` -It is a required parameter. It defines the model provider. - -* `model_name` -It is a required parameter. It defines the embedding model name as specified by the provider. - -* `api_key` -The API key is required to access the embedding model assigned to a knowledge base. Users can provide it either in this `api_key` parameter, or in the `OPENAI_API_KEY` environment variable for `"provider": "openai"` and `AZURE_OPENAI_API_KEY` environment variable for `"provider": "azure_openai"`. - -* `base_url` -It is an optional parameter, which defaults to `https://api.openai.com/v1/`. It is a required parameter when using the `azure_openai` provider. It is the root URL used to send API requests. - -* `api_version` -It is an optional parameter. It is a required parameter when using the `azure_openai` provider. It defines the API version. - -### `reranking_model` - -The reranking model is an optional component of the knowledge base. It stores specifications of the reranking model to be used. - -Users can disable reranking features of knowledge bases by setting this parameter to `false`. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - reranking_model = false, - ... -``` - -Users can enable reranking features of knowledge bases by defining the reranking model choosing one of the following options. - -**Option 1.** Use the `reranking_model` parameter to define the specification. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - reranking_model = { - - "provider": "azure_openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01", - "method": "multi-class" - - }, - ... -``` - -**Option 2.** Define the default reranking model in the [MindsDB configuration file](/setup/custom-config). - - -You can define the default models in the Settings of the MindsDB Editor GUI. - - - -Note that if you define [`default_reranking_model` in the configuration file](/setup/custom-config#default-reranking-model), you do not need to provide the `reranking_model` parameter when creating a knowledge base. If provide both, then the values from the `reranking_model` parameter are used. - -When using `default_reranking_model` from the configuration file, the knowledge base saves this model internally. Therefore, when changing `default_reranking_model` in the configuration file to a different one after the knowledge base is created, it does not affect the already created knowledge bases. - - -```bash -"default_reranking_model": { - - "provider": "azure_openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01", - "method": "multi-class" - -} -``` - -The reranking model specification includes: - -* `provider` -It is a required parameter. It defines the model provider as listed in [supported LLMs](/mindsdb_sql/knowledge_bases/create#supported-llms). - -* `model_name` -It is a required parameter. It defines the embedding model name as specified by the provider. - -* `api_key` -The API key is required to access the embedding model assigned to a knowledge base. Users can provide it either in this `api_key` parameter, or in the `OPENAI_API_KEY` environment variable for `"provider": "openai"` and `AZURE_OPENAI_API_KEY` environment variable for `"provider": "azure_openai"`. - -* `base_url` -It is an optional parameter, which defaults to `https://api.openai.com/v1/`. It is a required parameter when using the `azure_openai` provider. It is the root URL used to send API requests. - -* `api_version` -It is an optional parameter. It is a required parameter when using the `azure_openai` provider. It defines the API version. - -* `method` -It is an optional parameter. It defines the method used to calculate the relevance of the output rows. The available options include `multi-class` and `binary`. It defaults to `multi-class`. - - -**Reranking Method** - -The `multi-class` reranking method classifies each document chunk (that meets any specified metadata filtering conditions) into one of four relevance classes: - -1. Not relevant with class weight of 0.25. -2. Slightly relevant with class weight of 0.5. -3. Moderately relevant with class weight of 0.75. -4. Highly relevant with class weight of 1. - -The overall `relevance_score` of a document is calculated as the sum of each chunk’s class weight multiplied by its class probability (from model logprob output). - -The `binary` reranking method simplifies classification by determining whether a document is relevant or not, without intermediate relevance levels. With this method, the overall `relevance_score` of a document is calculated based on the model log probability. - - -### `storage` - -The vector store is a required component of the knowledge base. It stores data in the form of embeddings. - - -When using [MindsDB via Docker Desktop Extension](/setup/self-hosted/docker-desktop), the `storage` parameter is optional. The default storage is PGVector that is integrated into the MindsDB Docker Desktop Extension. - -When using [MindsDB via PyPI](/contribute/install) or [MindsDB via Docker image](/setup/self-hosted/docker), the `storage` parameter is required. Users need to connect their vector storage, such as PGVector, using the `CREATE DATABASE` command and then use it for the storage of the knowledge bases. - - -The recommended vector store for knowledge bases is [PGVector](/integrations/vector-db-integrations/pgvector), specifically, version 0.8.0 or higher for a better performance. - -In order to provide the storage vector database, it is required to connect it to MindsDB beforehand. - -Here is an example for [PGVector](/integrations/vector-db-integrations/pgvector). - -```sql -CREATE DATABASE my_pgvector -WITH ENGINE = 'pgvector', -PARAMETERS = { - "host": "127.0.0.1", - "port": 5432, - "database": "postgres", - "user": "user", - "password": "password", - "distance": "cosine" -}; - -CREATE KNOWLEDGE_BASE my_kb -USING - ... - storage = my_pgvector.storage_table, - ... -``` - - -Note that you do not need to have the `storage_table` created as it is created when creating a knowledge base. - - -### `metadata_columns` - -The data inserted into the knowledge base can be classified as metadata, which enables users to filter the search results using defined data fields. - - -Note that source data column(s) included in `metadata_columns` cannot be used in `content_columns`, and vice versa. - - -This parameter is an array of strings that lists column names from the source data to be used as metadata. If not provided, then all inserted columns (except for columns defined as `id_column` and `content_columns`) are considered metadata columns. - -Here is an example of usage. A user wants to store the following data in a knowledge base. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -The `product` column can be used as metadata to enable metadata filtering. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - metadata_columns = ['product'], - ... -``` - -### `content_columns` - -The data inserted into the knowledge base can be classified as content, which is embedded by the embedding model and stored in the underlying vector store. - - -Note that source data column(s) included in `content_columns` cannot be used in `metadata_columns`, and vice versa. - - -This parameter is an array of strings that lists column names from the source data to be used as content and processed into embeddings. If not provided, the `content` column is expected by default when inserting data into the knowledge base. - -Here is an example of usage. A user wants to store the following data in a knowledge base. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -The `notes` column can be used as content. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - content_columns = ['notes'], - ... -``` - -### `id_column` - -The ID column uniquely identifies each source data row in the knowledge base. - -It is an optional parameter. If provided, this parameter is a string that contains the source data ID column name. If not provided, it is generated from the hash of the content columns. - -Here is an example of usage. A user wants to store the following data in a knowledge base. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -The `order_id` column can be used as ID. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - id_column = 'order_id', - ... -``` - - -Note that if the source data row is chunked into multiple chunks by the knowledge base (that is, to optimize the storage), then these rows in the knowledge base have the same ID value that identifies chunks from one source data row. - - - -**Available options for the ID column values** - -* User-Defined ID Column:

-When users defined the `id_column` parameter, the values from the provided source data column are used to identify source data rows within the knowledge base. - -* User-Generated ID Column:

-When users do not have a column that uniquely identifies each row in their source data, they can generate the ID column values when inserting data into the knowledge base using functions like `HASH()` or `ROW_NUMBER()`. - -```sql -INSERT INTO my_kb ( - SELECT ROW_NUMBER() OVER (ORDER BY order_id) AS id, * - FROM sample_data.orders -); -``` - -* Default ID Column:

-If the `id_column` parameter is not defined, its default values are build from the hash of the content columns and follow the format: ``. -
- -### Example - -Here is a sample knowledge base that will be used for examples in the following content. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - embedding_model = { - "provider": "openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123" - }, - reranking_model = { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-abc123" - }, - metadata_columns = ['product'], - content_columns = ['notes'], - id_column = 'order_id'; -``` - -## `DESCRIBE KNOWLEDGE_BASE` Syntax - -Users can get details about the knowledge base using the `DESCRIBE KNOWLEDGE_BASE` command. - -```sql -DESCRIBE KNOWLEDGE_BASE my_kb; -``` - -Here is the sample output: - -```sql -+---------+---------+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+--------------------+----------------+-------+----------+ -| NAME | PROJECT | MODEL | STORAGE | PARAMS | INSERT_STARTED_AT | INSERT_FINISHED_AT | PROCESSED_ROWS | ERROR | QUERY_ID | -+---------+---------+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+--------------------+----------------+-------+----------+ -| my_kb | mindsdb | [NULL] | my_kb_chromadb.default_collection | {"embedding_model": {"provider": "openai", "model_name": "text-embedding-ada-002", "api_key": "sk-xxx"}, "reranking_model": {"provider": "openai", "model_name": "gpt-4o", "api_key": "sk-xxx"}, "default_vector_storage": "my_kb_chromadb"} | [NULL] | [NULL] | [NULL] | [NULL]| [NULL] | -+---------+---------+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+--------------------+----------------+-------+----------+ -``` - -## `DROP KNOWLEDGE_BASE` Syntax - -Here is the syntax for deleting a knowledge base: - -```sql -DROP KNOWLEDGE_BASE my_kb; -``` - -Upon execution, it removes the knowledge base with its content. - -Upon execution, it identifies matching records based on the user-defined condition and removes all associated data (metadata, content, chunks, embeddings) for matching records from the KB's storage. diff --git a/docs/mindsdb_sql/knowledge_bases/evaluate.mdx b/docs/mindsdb_sql/knowledge_bases/evaluate.mdx deleted file mode 100644 index 54546ea3c94..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/evaluate.mdx +++ /dev/null @@ -1,131 +0,0 @@ ---- -title: How to Evaluate Knowledge Bases -sidebarTitle: Evaluate KB ---- - -Evaluating knowledge bases verifies how accurate and relevant is the data returned by the knowledge base. - -## `EVALUATE KNOWLEDGE_BASE` Syntax - -With the `EVALUATE KNOWLEDGE_BASE` command, users can evaluate the relevancy and accuracy of the documents and data returned by the knowledge base. - -Below is the complete syntax that includes both required and optional parameters. - -```sql -EVALUATE KNOWLEDGE_BASE my_kb -USING - test_table = my_datasource.my_test_table, - version = 'doc_id', - generate_data = { - 'from_sql': 'SELECT id, content FROM my_datasource.my_table', - 'count': 100 - }, - evaluate = false, - llm = { - 'provider': 'openai', - 'api_key':'sk-xxx', - 'model_name':'gpt-4' - }, - save_to = my_datasource.my_result_table; -``` - -### `test_table` - -This is a required parameter that stores the name of the table from one of the data sources connected to MindsDB. For example, `test_table = my_datasource.my_test_table` defines a table named `my_test_table` from a data source named `my_datasource`. - -This test table stores test data commonly in form of questions and answers. Its content depends on the `version` parameter defined below. - -Users can provide their own test data or have the test data generated by the `EVALUATE KNOWLEDGE_BASE` command, which is performed when setting the `generate_data` parameter defined below. - -### `version` - -This is an optional parameter that defines the version of the evaluator. If not defined, its default value is `doc_id`. - -* `version = 'doc_id'` - The evaluator checks whether the document ID returned by the knowledge base matched the expected document ID as defined in the test table. - -* `version = 'llm_relevancy'` - The evaluator uses a language model to rank and evaluate responses from the knowledge base. - -### `generate_data` - -This is an optional parameter used to configure the test data generation, which is saved into the table defined in the `test_table` parameter. If not defined, its default value is `false`, meaning that no test data is generated. - -Available values are as follows: - -* A dictionary containing the following values: - * `from_sql` defines the SQL query that fetches the test data. For example, `'from_sql': 'SELECT id, content FROM my_datasource.my_table'`. If not defined, it fetches test data from the knowledge base on which the `EVALUATE` command is executed: `SELECT chunk_content, id FROM my_kb`. - * `count` defines the size of the test dataset. For example, `'count': 100`. Its default value is 20. - - - When providing the `from_sql` parameter, it requires specific column names as follows: - - * With `version = 'doc_id'`, the `from_sql` parameter should contain a query that returns the `id` and `content` columns, like this: `'from_sql': 'SELECT id_column_name AS id, content_column_names AS content FROM my_datasource.my_table'` - - * With `version = 'llm_relevancy'`, the `from_sql` parameter should contain a query that returns the `content` column, like this: `'from_sql': 'SELECT content_column_names AS content FROM my_datasource.my_table'` - - -* A value of `true`, such as `generate_data = true`, which implies that default values for `from_sql` and `count` will be used. - -### `evaluate` - -This is an optional parameter that defines whether to evaluate the knowledge base. If not defined, its default value is `true`. - -Users can opt for setting it to false, `evaluate = false`, in order to generate test data into the test table without running the evaluator. - -### `llm` - -This is an optional parameter that defines a language model to be used for evaluations, if `version` is set to `llm_relevancy`. - -If not defined, its default value is the [`reranking_model` defined with the knowledge base](/mindsdb_sql/knowledge_bases/create#reranking-model). - -Users can define it with the `EVALUATE KNOWLEDGE_BASE` command in the same manner. - -```sql -EVALUATE KNOWLEDGE_BASE my_kb -USING - ... - llm = { - "provider": "azure_openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01", - "method": "multi-class" - }, - ... -``` - -### `save_to` - -This is an optional parameter that stores the name of the table from one of the data sources connected to MindsDB. For example, `save_to = my_datasource.my_result_table` defines a table named `my_result_table` from the data source named `my_datasource`. If not defined, the results are not saved into a table. - -This table is used to save the evaluation results. - -By default, evaluation results are returned after executing the `EVALUATE KNOWLEDGE_BASE` statement. - -### Evaluation Results - -When using `version = 'doc_id'`, the following columns are included in the evaluation results: - -- `total` stores the total number of questions. -- `total_found` stores the number of questions to which the knowledge bases provided correct answers. -- `retrieved_in_top_10` stores the number of top 10 questions to which the knowledge bases provided correct answers. -- `cumulative_recall` stores data that can be used to create a chart. -- `avg_query_time` stores the execution time of a search query of the knowledge base. -- `name` stores the knowledge base name. -- `created_at` stores the timestamp when the evaluation was created. - -When using `version = 'llm_relevancy'`, the following columns are included in the evaluation results: - -- `avg_relevancy` stores the average relevancy. -- `avg_relevance_score_by_k` stores the average relevancy at k. -- `avg_first_relevant_position` stores the average first relevant position. -- `mean_mrr` stores the Mean Reciprocal Rank (MRR). -- `hit_at_k` stores the Hit@k value. -- `bin_precision_at_k` stores the Binary Precision@k. -- `avg_entropy` stores the average relevance score entropy. -- `avg_ndcg` stores the average nDCG. -- `avg_query_time` stores the execution time of a search query of the knowledge base. -- `name` stores the knowledge base name. -- `created_at` stores the timestamp when the evaluation was created. diff --git a/docs/mindsdb_sql/knowledge_bases/examples.mdx b/docs/mindsdb_sql/knowledge_bases/examples.mdx deleted file mode 100644 index 95564e71087..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/examples.mdx +++ /dev/null @@ -1,171 +0,0 @@ ---- -title: How to Use Knowledge Bases -sidebarTitle: Examples ---- - -This section contains examples of usage of knowledge bases. - -### Sales Data - -Here is the data that will be inserted into the knowledge base. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - -You can access this sample data as below: - -```sql -CREATE DATABASE sample_data -WITH ENGINE = 'postgres', -PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo", - "schema": "demo_data" -}; - -SELECT * FROM sample_data.orders; -``` - -Here is how to create a knowledge base specifically for the data. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - embedding_model = { - "provider": "openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123" - }, - reranking_model = { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-abc123" - }, - metadata_columns = ['product'], - content_columns = ['notes'], - id_column = 'order_id'; -``` - -Here is how to insert the data. - -```sql -INSERT INTO my_kb -SELECT order_id, product, notes -FROM sample_data.orders; -``` - -Here is how to query the knowledge base. - -```sql -SELECT * -FROM my_kb -WHERE product = 'Wireless Mouse' -AND content = 'color' -AND relevance > 0.5; -``` - -### Financial Data - -You can access the sample data as below: - -```sql -CREATE DATABASE sample_data -WITH ENGINE = 'postgres', -PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo", - "schema": "demo_data" -}; - -SELECT * FROM sample_data.financial_headlines; -``` - -Here is how to create a knowledge base specifically for the data. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - embedding_model = { - "provider": "openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-xxx" - }, - reranking_model = { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-xxx" - }, - metadata_columns = ['sentiment_labelled'], - content_columns = ['headline']; -``` - -Here is how to insert the data. - -```sql -INSERT INTO my_kb -SELECT * -FROM sample_data.financial_headlines -USING - batch_size = 500, - threads = 10; -``` - -Here is how to query the knowledge base. - -* Query without defined `LIMIT` - -```sql -SELECT * -FROM my_kb -WHERE content = 'investors'; -``` - -This query returns 10 rows, as the default `LIMIT` is set to 10. - -

- -

- -* Query with defined `LIMIT` - -```sql -SELECT * -FROM my_kb -WHERE content = 'investors' -LIMIT 20; -``` - -This query returns 20 rows, as the user-defined `LIMIT` is set to 20. - -

- -

- -* Query with defined `LIMIT` and `relevance` - -```sql -SELECT * -FROM my_kb -WHERE content = 'investors' -AND relevance >= 0.8 -LIMIT 20; -``` - -This query may return 20 or less rows, depending on whether the relevance scores of the rows match the user-defined condition. - -

- -

diff --git a/docs/mindsdb_sql/knowledge_bases/hybrid_search.mdx b/docs/mindsdb_sql/knowledge_bases/hybrid_search.mdx deleted file mode 100644 index d3ade014522..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/hybrid_search.mdx +++ /dev/null @@ -1,132 +0,0 @@ ---- -title: How to Hybrid Search Knowledge Bases -sidebarTitle: Hybrid Search ---- - -Knowledge bases support two primary search methods: [semantic search](/mindsdb_sql/knowledge_bases/query#semantic-search) and [metadata/keyword search](/mindsdb_sql/knowledge_bases/query#metadata-filtering). Each method has its strengths and ideal use cases. - -Semantic similarity search uses vector embeddings to retrieve content that is semantically related to a given query. This is especially powerful when users are searching for concepts, ideas, or questions expressed in natural language. - -However, semantic search may fall short when users are looking for specific keywords, such as acronyms, internal terminology, or custom identifiers. These types of terms are often not well-represented in the embedding model's training data. As a result, embedding-based semantic search might entirely miss results that do contain the exact keyword. - -To address this gap, knowledge bases offer hybrid search, which combines the best of both worlds: semantic similarity and exact keyword matching. Hybrid search ensures that results relevant by meaning and results matching specific terms are both considered and ranked appropriately. - -## Enabling Hybrid Search - -To use hybrid search, you first need to [create a knowledge base](/mindsdb_sql/knowledge_bases/create) and [insert data into it](/mindsdb_sql/knowledge_bases/insert_data). - -Hybrid search can be enabled at the time of querying the knowledge base by specifying the appropriate configuration options, as shown below. - -```sql -SELECT * from my_kb -WHERE - content = ”ACME-213” -AND hybrid_search_alpha = 0.8; -``` - -The `hybrid_search_alpha` parameter enables hybrid search functionality and allows you to control the balance between semantic and keyword relevance, with values varying between 0 (more importance on keyword relevance) and 1 (more importance on semantic relevance) and the default value of 0.5. - -Alternatively, you can use the `hybrid_search` parameter and set it to `true` in order to enable hybrid search with default `hybrid_search_alpha = 0.5`. - - -Note that hybrid search works only on knowledge bases that use PGVector as a [storage](/mindsdb_sql/knowledge_bases/create#storage). Ensure to [install the PGVector handler to connect it to MindsDB](/integrations/vector-db-integrations/pgvector#usage). - - -Knowledge bases provide optional [reranking features](/mindsdb_sql/knowledge_bases/create#reranking-model) that users can decide to use in specific use cases. When the reranker is available, it is used to rerank results from both the full-text index search and the embedding-based semantic search. It estimates the relevance of each document and orders them from most to least relevant. - -However, users can disable the reranker using `reranking = false`, which might be desirable for performance reasons or specific use cases. When reranking is disabled, the system still needs to combine the two search result sets. In this case, the final ranking of each document is computed as a weighted average of the embedding similarity score and the [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) keyword relevance score from the full-text search. - - -**Relevance-Based Document Selection for Reranking** - -When retrieving documents from the full-text index, there is a practical limit on how many documents can be passed to the reranker, since reranking is typically computationally expensive. To ensure that only the most promising candidates are selected for reranking, we apply relevance heuristics during the keyword search stage. - -One widely used heuristic is BM25, a ranking function that scores documents based on their keyword relevance to the user query. BM25 considers both the frequency of a keyword within a document and how common that keyword is across the entire corpus. - -By scoring documents using BM25, the system can prioritize more relevant matches and limit reranker input to a smaller, high-quality subset of documents. This helps achieve a balance between performance and retrieval accuracy in hybrid search. - - -This is the so-called alpha reranking. - -## Implementation of Hybrid Search - -Hybrid search in knowledge bases combines semantic similarity and keyword-based search methods into a unified search mechanism. - -The diagram below illustrates the hybrid search process. - -

- -

- -When a user submits a query, it is simultaneously routed through two parallel search mechanisms: an embedding-based semantic search (left) and a full-text keyword search (right). - -Below is a breakdown of how hybrid search works under the hood: - -* **Semantic Search** (path on the left) - - It takes place in parallel with the keyword search. Semantic search starts by embedding the search query and searching against the content of the knowledge base. This results in a set of relevant documents found. - -* **Keyword Search** (path on the right) - - It takes place in parallel with the semantic search. The system performs a keyword-based search, using one or more keywords provided in the search query, over the content of the knowledge base. To ensure performance, especially at scale, when dealing with millions of documents, we rely on a full-text indexing system. - - This index is typically built as an inverted index, mapping keywords to the documents in which they appear. It allows for efficient lookups and rapid retrieval of all entries that contain the given terms. - - - Storage of Full-Text Index - - Just as embeddings are stored to support semantic similarity search, a full-text index must also be stored to enable efficient keyword-based retrieval. This index serves as the foundation for fast and scalable full-text search and is tightly integrated with the knowledge base. - - Each knowledge base maintains its own dedicated full-text index, built and updated as documents are ingested or modified. Maintaining this index alongside the stored embeddings ensures that both semantic and keyword search capabilities are always available and performant, forming the backbone of hybrid search. - - - This step ensures that exact matches, like specific acronyms, ticket numbers, or product identifiers, can be found quickly, even if the semantic model wouldn’t have surfaced them. - -* **Combining Results** - - At this step, results from both searches are merged. Semantic search returned documents similar in meaning to the user’s query using embeddings, while keyword search returned documents containing the keywords extracted from the user’s query. This complete result set is passed to the reranker. - -* **Reranking** - - The results are reranked, considering relevance scores from both search types, and ordered accordingly. - - There are two mechanisms for reranking the results: - - * Using the reranking model of the knowledge base - - If the knowledge base was created with the reranking model provided, the hybrid search uses it to rerank the result set. - - ```sql - SELECT * from my_kb - WHERE - content = ”ACME-213” - AND hybrid_search = true; -- here, hybrid_search_alpha = 0.5 - ``` - - In this query, the hybrid search uses the reranking features enabled with the knowledge base. - - * Using the alpha reranking that can be further customized for hybrid search - - Users can opt for using the alpha reranking that can be customized specifically for hybrid search. By setting the `hybrid_search_alpha` parameter to any value between 0 and 1, users can give importance to results from the keyword search (if the value is closer to 0) or the semantic search (if the value is closer to 1). - - ```sql - SELECT * from my_kb - WHERE - content = ”ACME-213” - AND hybrid_search_alpha = 0.4 - AND reranking = false; - ``` - - This query uses hybrid search with emphasis on results from the keyword search. - - - Relevance-Based Document Selection for Reranking - - When retrieving documents from the full-text index, there is a practical limit on how many documents can be passed to the reranker, since reranking is typically computationally expensive. To ensure that only the most promising candidates are selected for reranking, we apply relevance heuristics during the keyword search stage. - - One widely used heuristic is BM25, a ranking function that scores documents based on their keyword relevance to the user query. BM25 considers both the frequency of a keyword within a document and how common that keyword is across the entire corpus. - - By scoring documents using BM25, the system can prioritize more relevant matches and limit reranker input to a smaller, high-quality subset of documents. This helps achieve a balance between performance and retrieval accuracy in hybrid search. - - - Overall, the reranker ensures that highly relevant keyword matches appear alongside semantically similar results, offering users a balanced and accurate response. diff --git a/docs/mindsdb_sql/knowledge_bases/insert_data.mdx b/docs/mindsdb_sql/knowledge_bases/insert_data.mdx deleted file mode 100644 index 941204f70e1..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/insert_data.mdx +++ /dev/null @@ -1,360 +0,0 @@ ---- -title: How to Insert Data into Knowledge Bases -sidebarTitle: Insert Data into KB ---- - -Knowledge Bases (KBs) organize data across data sources, including databases, files, documents, webpages, enabling efficient search capabilities. - -Here is what happens to data when it is inserted into the knowledge base. - -

- -

- -Upon inserting data into the knowledge base, it is split into chunks, transformed into the embedding representation to enhance the search capabilities, and stored in a vector database. - -## `INSERT INTO` Syntax - -Here is the syntax for inserting data into a knowledge base: - -```sql -INSERT INTO my_kb -SELECT order_id, product, notes -FROM sample_data.orders; -``` - -Upon execution, it inserts data into a knowledge base, using the embedding model to embed it into vectors before inserting into an underlying vector database. - - -The status of the `INSERT INTO` is logged in the `information_schema.queries` table with the timestamp when it was ran, and can be queried as follows: - -```sql -SELECT * -FROM information_schema.queries; -``` - - - -**Default Batch Inserts** -The batch inserts into knowledge bases (see "Insert Data using Partitions") are enabled by default for all vector stores except PGVector. Note that in order for batch inserts to work by default, users must provide the `id_column` when creating the knowledge base. - -To enable default batch inserts for PGVector, set the `DISABLE_PGVECTOR_AUTOBATCH` environment variable or the `knowledge_bases.disable_pgvector_autobatch` configuration variable to `false` (it is set to `true` by default). - -To disable default batch inserts, set the `DISABLE_AUTOBATCH` environment variable or the `knowledge_bases.disable_autobatch` configuration variable to `true` (it is set to `false` by default). - - - -To speed up data insertion, you can use these performance optimization flags: - -**Skip duplicate checking (kb_no_upsert)** -```sql -INSERT INTO my_kb -SELECT * -FROM table_name -USING kb_no_upsert = true; -``` -This skips all duplicate checking and directly inserts data. Use only when the knowledge base is empty (initial data load). - -**Skip existing items (kb_skip_existing)** -```sql -INSERT INTO my_kb -SELECT * -FROM table_name -USING kb_skip_existing = true; -``` -This checks for existing items and skips them entirely, including avoiding embedding calculation for existing content. More efficient than upsert when you only want to insert new items. - - - -**Handling duplicate data while inserting into the knowledge base** - -Knowledge bases uniquely identify data rows using an ID column, which prevents from inserting duplicate data, as follows. - -* **Case 1: Inserting data into the knowledge base without the `id_column` defined.** - - When users do not define the `id_column` during the creation of a knowledge base, MindsDB generates the ID for each row using a hash of the content columns, as [explained here](/mindsdb_sql/knowledge_bases/create#id-column). - - **Example:** - - If two rows have exactly the same content in the content columns, their hash (and thus their generated ID) will be the same. - - Note that duplicate rows are skipped and not inserted. - - Since both rows in the below table have the same content, only one row will be inserted. - - | name | age | - |-------|-----| - | Alice | 25 | - | Alice | 25 | - -* **Case 2: Inserting data into the knowledge base with the `id_column` defined.** - - When users define the `id_column` during the creation of a knowledge base, then the knowledge base uses that column's values as the row ID. - - **Example:** - - If the `id_column` has duplicate values, the knowledge base skips the duplicate row(s) during the insert. - - The second row in the below table has the same `id` as the first row, so only one of these rows is inserted. - - | id | name | age | - |-----|-------|-----| - | 1 | Alice | 25 | - | 1 | Bob | 30 | - -**Best practice** - -Ensure the `id_column` uniquely identifies each row to avoid unintentional data loss due to duplicate ID skipping. - -**Performance optimization for duplicate handling** - -For better performance when handling duplicates, you can use: -- `kb_skip_existing = true`: Checks for existing IDs and skips them completely (no embedding calculation, more efficient) -- `kb_no_upsert = true`: Skips duplicate checking entirely (fastest, use only for initial load into empty KB) - - - -### Update Existing Data - -In order to update existing data in the knowledge base, insert data with the column ID that you want to update and the updated content. - -Here is an example of usage. A knowledge base stores the following data. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - -A user updated `Laptop Stand` to `Aluminum Laptop Stand`. - -```sql -+----------+-----------------------+------------------------+ -| order_id | product | notes | -+----------+-----------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Aluminum Laptop Stand | Prefer aluminum finish | -+----------+-----------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -Here is how to propagate this change into the knowledge base. - -```sql -INSERT INTO my_kb -SELECT order_id, product, notes -FROM sample_data.orders -WHERE order_id = 'Q7P'; -``` - -The knowledge base matches the ID value to the existing one and updates the data if required. - -### Insert Data using Partitions - -In order to optimize the performance of data insertion into the knowledge base, users can set up partitions and threads to insert batches of data in parallel. This also enables tracking the progress of data insertion process including cancelling and resuming it if required. - -Here is an example. - -```sql -INSERT INTO my_kb -SELECT order_id, product, notes -FROM sample_data.orders -USING - batch_size = 200, - track_column = order_id, - threads = 10, - error = 'skip'; -``` - -The parameters include the following: - -* `batch_size` defines the number of rows fetched per iteration to optimize data extraction from the source. It defaults to 1000. - -* `threads` defines threads for running partitions. Note that if the [ML task queue](/setup/custom-config#overview-of-config-parameters) is enabled, threads are used automatically. The available values for `threads` are: - - a number of threads to be used, for example, `threads = 10`, - - a boolean value that defines whether to enable threads, setting `threads = true`, or disable threads, setting `threads = false`. - -* `track_column` defines the column used for sorting data before partitioning. - -* `error` defines the error processing options. The available values include `raise`, used to raise errors as they come, or `skip`, used to subside errors. It defaults to `raise` if not provided. - -After executing the `INSERT INTO` statement with the above parameters, users can view the data insertion progress by querying the `information_schema.queries` table. - -```sql -SELECT * FROM information_schema.queries; -``` - -Users can cancel the data insertion process using the process ID from the `information_schema.queries` table. - -```sql -SELECT query_cancel(1); -``` - -Note that canceling the query will not remove the already inserted data. - -Users can resume the data insertion process using the process ID from the `information_schema.queries` table. - -```sql -SELECT query_resume(1); -``` - -### Chunking Data - -Upon inserting data into the knowledge base, the data chunking is performed in order to optimize the storage and search of data. - -Each chunk is identified by its chunk ID of the following format: `:of:to`. - -#### Text - -Users can opt for defining the chunking parameters when creating a knowledge base. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - preprocessing = { - "text_chunking_config" : { - "chunk_size": 2000, - "chunk_overlap": 200 - } - }, - ...; -``` - -The `chunk_size` parameter defines the size of the chunk as the number of characters. And the `chunk_overlap` parameter defines the number of characters that should overlap between subsequent chunks. - -#### JSON - -Users can opt for defining the chunking parameters specifically for JSON data. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - preprocessing = { - "type": "json_chunking", - "json_chunking_config" : { - ... - } - }, - ...; -``` - -When the `type` of chunking is set to `json_chunking`, users can configure it by setting the following parameter values in the `json_chunking_config` parameter: - -* `flatten_nested` -It is of the `bool` data type with the default value of `True`. -It defines whether to flatten nested JSON structures. - -* `include_metadata` -It is of the `bool` data type with the default value of `True`. -It defines whether to include original metadata in chunks. - -* `chunk_by_object` -It is of the `bool` data type with the default value of `True`. -It defines whether to chunk by top-level objects (`True`) or create a single document (`False`). - -* `exclude_fields` -It is of the `List[str]` data type with the default value of an empty list. -It defines the list of fields to exclude from chunking. - -* `include_fields` -It is of the `List[str]` data type with the default value of an empty list. -It defines the list of fields to include in chunking (if empty, all fields except excluded ones are included). - -* `metadata_fields` -It is of the `List[str]` data type with the default value of an empty list. -It defines the list of fields to extract into metadata for filtering (can include nested fields using dot notation). If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure). - -* `extract_all_primitives` -It is of the `bool` data type with the default value of `False`. -It defines whether to extract all primitive values (strings, numbers, booleans) into metadata. - -* `nested_delimiter` -It is of the `str` data type with the default value of `"."`. -It defines the delimiter for flattened nested field names. - -* `content_column` -It is of the `str` data type with the default value of `"content"`. -It defines the name of the content column for chunk ID generation. - -### Underlying Vector Store - -Each knowledge base has its underlying vector store that stores data inserted into the knowledge base in the form of embeddings. - -Users can query the underlying vector store as follows. - -* KB with the default ChromaDB vector store: - -```sql -SELECT id, content, metadata, embeddings -FROM _chromadb.storage_table; -``` - -* KB with user-defined vector store (either [PGVector](/integrations/vector-db-integrations/pgvector) or [ChromaDB](/integrations/vector-db-integrations/chromadb)): - -```sql -SELECT id, content, metadata, embeddings -FROM .; -``` - -### Example - -Here a sample knowledge base created in the previous **Example** section is inserted into. - -```sql -INSERT INTO my_kb -SELECT order_id, product, notes -FROM sample_data.orders; -``` - - -When inserting into a knowledge base where the `content_columns` parameter was not specified, the column storing content must be aliased `AS content` as below. - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - ... - id_column = 'order_id', - ... -``` - -```sql -INSERT INTO my_kb -SELECT order_id, notes AS content -FROM sample_data.orders; -``` - - -## `DELETE FROM` Syntax - -Here is the syntax for deleting from a knowledge base: - -```sql -DELETE FROM my_kb -WHERE id = 'A1B'; -``` - -## `CREATE INDEX ON KNOWLEDGE_BASE` Syntax - -Users can create index on the knowledge base to speed up the search operations. - -```sql -CREATE INDEX ON KNOWLEDGE_BASE my_kb; -``` - - -Note that this feature works only when PGVector is used as the [storage of the knowledge base](/mindsdb_sql/knowledge_bases/create#storage), as ChromaDB provides the index features by default. - - -Upon executing this statement, an index is created on the knowledge base's underlying vector store. This is essentially a database index created on the vector database. - -Note that having an index on the knowledge base may reduce the speed of the insert operations. Therefore, it is recommended to insert bulk data into the knowledge base before creating an index. The index improves performance of querying the knowledge base, while it may slow down subsequent data inserts. diff --git a/docs/mindsdb_sql/knowledge_bases/overview.mdx b/docs/mindsdb_sql/knowledge_bases/overview.mdx deleted file mode 100644 index 8eb446bf699..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/overview.mdx +++ /dev/null @@ -1,130 +0,0 @@ ---- -title: How Knowledge Bases Work -sidebarTitle: Overview ---- - -A knowledge base is an advanced AI-table that organizes information based on semantic meaning rather than simple keyword matching. It integrates embedding models, reranking models, and vector stores to enable context-aware data retrieval. - -By performing semantic reasoning across multiple data points, a knowledge base delivers deeper insights and more accurate responses, making it a powerful tool for intelligent data access. - - - -Before diving into the syntax, here is a quick walkthrough showing how knowledge bases work in MindsDB. - -We start by creating a knowledge base and inserting data. Next we can run semantic search queries with metadata filtering. - - - - Use the `CREATE KNOWLEDGE_BASE` command to create a knowledge base, specifying all its components. - - ```sql - CREATE KNOWLEDGE_BASE my_kb - USING - embedding_model = { - "provider": "openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123" - }, - reranking_model = { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-abc123" - }, - metadata_columns = ['product'], - content_columns = ['notes'], - id_column = 'order_id'; - ``` - - - In this example, we use a simple dataset containing customer notes for product orders which will be inserted into the knowledge base. - - ```sql - +----------+-----------------------+------------------------+ - | order_id | product | notes | - +----------+-----------------------+------------------------+ - | A1B | Wireless Mouse | Request color: black | - | 3XZ | Bluetooth Speaker | Gift wrap requested | - | Q7P | Aluminum Laptop Stand | Prefer aluminum finish | - +----------+-----------------------+------------------------+ - ``` - - Use the `INSERT INTO` command to ingest data into the knowledge base. - - ```sql - INSERT INTO my_kb - SELECT order_id, product, notes - FROM sample_data.orders; - ``` - - - Query the knowledge base using semantic search. - - ```sql - SELECT * - FROM my_kb - WHERE content = 'color preference' - ``` - - This query returns: - - ```sql - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | id | chunk_id | chunk_content | metadata | product | distance | relevance | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | - | Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | - | 3XZ | 3XZ_notes:1of1:0to19 | Gift wrap requested | {"chunk_index":0,"content_column":"notes","end_char":19,"original_doc_id":"3XZ_notes","original_row_id":"3XZ","product":"Bluetooth Speaker","source":"TextChunkingPreprocessor","start_char":0} | Bluetooth Speaker | 0.8010851611432231 | 0.2500003885558766 | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - ``` - - - Query the knowledge base using semantic search and define the `relevance` parameter to receive only the best matching data for your use case. - - ```sql - SELECT * - FROM my_kb - WHERE content = 'color' - AND relevance >= 0.2502; - ``` - - This query returns: - - ```sql - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | id | chunk_id | chunk_content | metadata | product | distance | relevance | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | - | Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - ``` - - - Add metadata filtering to focus your search. - - ```sql - SELECT * - FROM my_kb - WHERE product = 'Wireless Mouse' - AND content = 'color' - AND relevance >= 0.2502; - ``` - - This query returns: - - ```sql - +-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ - | id | chunk_id | chunk_content | metadata | product | distance | relevance | - +-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ - | A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.504396172197583 | - +-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ - ``` - - - -The following sections explain the syntax and other features of knowledge bases. diff --git a/docs/mindsdb_sql/knowledge_bases/query.mdx b/docs/mindsdb_sql/knowledge_bases/query.mdx deleted file mode 100644 index 0059afa12cd..00000000000 --- a/docs/mindsdb_sql/knowledge_bases/query.mdx +++ /dev/null @@ -1,381 +0,0 @@ ---- -title: How to Query Knowledge Bases -sidebarTitle: Query KB ---- - -Knowledge Bases support two primary querying approaches: semantic search and metadata filtering, each of which offers different filtering capabilities, including filtering by the relevance score to ensure only data most relevant to the query is returned. - -* **Semantic Search** - - Semantic search enables users to query Knowledge Bases using natural language. When searching semantically, you reference the content column in your SQL statement. MindsDB will interpret the input as a semantic query and use vector-based similarity to find relevant results. - - ```sql - SELECT * FROM my_kb - WHERE content = 'what document types store reviews?'; - ``` - - - Only specific operators are allowed when filtering semantically using the content column. - - * Standard vector search: `content = ‘xxx’`, `content LIKE ‘xxx’` - * Exclusions from search: `id != xxx`, `id <> xxx`, `content NOT LIKE ‘zzz’` - * Nested queries: `id NOT IN (SELECT DISTINCT id FROM my_kb WHERE content = ‘xxx’)` - * Multiple queries: `content IN (‘xxx’, ‘yyy’)` which is equivalent to `content = ‘xxx’ OR content = ‘yyy’`, `content NOT IN (‘zzz’, ‘aaa’)` - * Logical operators: `content = ‘xxx’ OR content = ‘yyy’` which is a union of results for both conditions, `content = ‘xxx’ AND content = ‘yyy’` which is an intersection of results for both conditions - - -* **Metadata Filtering** - - It allows users to query Knowledge Bases based on the available metadata fields. These fields can be used in the `WHERE` clause of a SQL statement. - - ```sql - SELECT * FROM my_kb - WHERE document_type = ‘cover letter’ - AND document_author = 'bot'; - ``` - - - You can apply a variety of filtering conditions to metadata columns, such as equality checks, range filters, or pattern matches. - - * Equality checks: `=`, `<>`, `!=` - * Range filters: `>`, `<`, `>=`, `<=`, `BETWEEN ... AND ...` - * Pattern matching: `LIKE`, `NOT LIKE`, `IN`, `NOT IN` - * Logical operators: `AND`, `OR`, `NOT` - - -* **Relevance Filtering** - - Every semantic search result is assigned a relevance score, which indicates how closely a given entry matches your query. You can filter results by this score to ensure only the most relevant entries are returned. - - - Here is how to fine-tune the filtering of data. - - * Start by querying the knowledge base without a WHERE clause on the relevance column. This will show you a range of relevance scores returned by your query. - - * Determine a cutoff relevance value that fits your use case. For example, `relevance > 0.75`. - - * Re-run your query with the condition on `relevance` to restrict results to those above your chosen threshold. The results set contains only data with relevance greater than 0.75. - - ```sql - SELECT * FROM my_kb - WHERE content = 'what document types store reviews?’ - AND relevance > 0.75; - ``` - - - -See more [examples here](/mindsdb_sql/knowledge_bases/query#examples). - - -## `SELECT FROM KB` Syntax - -Knowledge bases provide an abstraction that enables users to see the stored data. - -Note that here a sample knowledge base created and inserted into in the previous **Example** sections is searched. - -```sql -SELECT * -FROM my_kb; -``` - -Here is the sample output: - -```sql -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | -| Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | -| 3XZ | 3XZ_notes:1of1:0to19 | Gift wrap requested | {"chunk_index":0,"content_column":"notes","end_char":19,"original_doc_id":"3XZ_notes","original_row_id":"3XZ","product":"Bluetooth Speaker","source":"TextChunkingPreprocessor","start_char":0} | Bluetooth Speaker | 0.8010851611432231 | 0.2500003885558766 | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -``` - -The following columns are stored in the knowledge base. - -* `id` -It stores values from the column defined in the `id_column` parameter when creating the knowledge base. These are the source data IDs. - -* `chunk_id` -Knowledge bases chunk the inserted data in order to fit the defined chunk size. If the chunking is performed, the following chunk ID format is used: `:of:to`. - -* `chunk_content` -It stores values from the column(s) defined in the `content_columns` parameter when creating the knowledge base. - -* `metadata` -It stores the general metadata and the metadata defined in the `metadata_columns` parameter when creating the knowledge base. - -* `distance` -It stores the calculated distance between the chunk's content and the search phrase. - -* `relevance` -It stores the calculated relevance of the chunk as compared to the search phrase. Its values are between 0 and 1. - - -Note that the calculation method of `relevance` differs as follows: - -- When the ranking model is provided, the default `relevance` is equal or greater than 0, unless defined otherwise in the `WHERE` clause. -- When the reranking model is not provided and the `relevance` is not defined in the query, then no relevance filtering is applied and the output includes all rows matched based on the similarity and metadata search. -- When the reranking model is not provided but the `relevance` is defined in the query, then the relevance is calculated based on the `distance` column (`1/(1+ distance)`) and the `relevance` value is compared with this relevance value to filter the output. - - -### Semantic Search - -Users can query a knowledge base using semantic search by providing the search phrase (called `content`) to be searched for. - -```sql -SELECT * -FROM my_kb -WHERE content = 'color' -``` - -Alternatively, users can filter by the `chunk_content` column of the knowledge base. - -```sql -SELECT * -FROM my_kb -WHERE chunk_content LIKE '%color%' -``` - -Here is the output: - -```sql -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | -| Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | -| 3XZ | 3XZ_notes:1of1:0to19 | Gift wrap requested | {"chunk_index":0,"content_column":"notes","end_char":19,"original_doc_id":"3XZ_notes","original_row_id":"3XZ","product":"Bluetooth Speaker","source":"TextChunkingPreprocessor","start_char":0} | Bluetooth Speaker | 0.8010851611432231 | 0.2500003885558766 | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -``` - - -When querying a knowledge base, the default values include the following: - -* `relevance`

-If not provided, its default value is equal to or greater than 0, ensuring there is no filtering of rows based on their relevance. - -* `LIMIT`

-If not provided, its default value is 10, returning a maximum of 10 rows. -
- - -Note that when specifying both `relevance` and `LIMIT` as follows: - -```sql -SELECT * -FROM my_kb -WHERE content = 'color' -AND relevance >= 0.5 -LIMIT 20; -``` - -The query extracts 20 rows (as defined in the `LIMIT` clause) that match the defined `content`. Next, these set of rows is filtered out to match the defined `relevance`. - - -Users can limit the `relevance` in order to get only the most relevant results. - -```sql -SELECT * -FROM my_kb -WHERE content = 'color' -AND relevance >= 0.5; -``` - -Here is the output: - -```sql -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5103766499957533 | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+--------------------+ -``` - -By providing the `relevance` filter, the output is limited to only data with relevance score of the provided value. The available values of `relevance` are between 0 and 1, and its default value covers all available relevance values ensuring no filtering based on the relevance score. - -Users can limit the number of rows returned. - -```sql -SELECT * -FROM my_kb -WHERE content = 'color' -LIMIT 2; -``` - -Here is the output: - -```sql -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | -| Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -``` - -### Metadata Filtering - -Besides semantic search features, knowledge bases enable users to filter the result set by the defined metadata. - -```sql -SELECT * -FROM my_kb -WHERE product = 'Wireless Mouse'; -``` - -Here is the output: - -```sql -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------+----------+ -| id | chunk_id | chunk_content | metadata | product | relevance | distance | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------+----------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | [NULL] | [NULL] | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------+----------+ -``` - -Note that when searching by metadata alone, the `relevance` column values are not calculated. - -Users can do both, filter by metadata and search by content. - -```sql -SELECT * -FROM my_kb -WHERE product = 'Wireless Mouse' -AND content = 'color' -AND relevance >= 0.5; -``` - -Here is the output: - -```sql -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.504396172197583 | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ -``` - -## `JOIN` Syntax - -Knowledge bases can be used in the standard SQL JOIN statements. - -```sql -SELECT t.order_id, t.product, t.notes, kb.chunk_content, kb.relevance -FROM local_postgres.orders AS t -JOIN my_kb AS kb -ON t.order_id = kb.id -WHERE t.order_id = 'A1B' -AND kb.content = 'color' -AND kb.product = 'Wireless Mouse'; -``` - -Here is the output: - -```sql -+----------+------------------+------------------------+------------------------+--------------------+ -| order_id | product | notes | chunk_content | relevance | -+----------+------------------+------------------------+------------------------+--------------------+ -| A1B | Wireless Mouse | Request color: black | Request color: black | 0.5106591666649376 | -+----------+------------------+------------------------+------------------------+--------------------+ -``` - -## Examples - -We have a knowledge base that stores data about movies. - -```sql -+----------+-----------------------------------+-------------------------------------------------------------------------+ -| id | content | metadata | -+----------+-----------------------------------+-------------------------------------------------------------------------+ -| movie_id | "A bank security expert plots..." | {"genre":"Crime","rating":6.3,"expanded_genres":"Comedy, Crime, Drama"} | -+----------+-----------------------------------+-------------------------------------------------------------------------+ -``` - -It uses the `movie_id` column to uniquely identify each entry. The `content` column stores the description of the movie, and the metadata includes `genre`, `rating`, and `expanded_genre` columns. - -Let's see the query examples. - -* Selecting high-rated action movies with heist themes and no romance. - - ```sql - SELECT * FROM movies_kb - WHERE content LIKE 'heist bank robbery space alien planet' - AND genre != 'Romance' - AND expanded_genres NOT LIKE '%Romance%' - AND rating > 7.0; - ``` - - This query includes a semantic search filtering condition - `content LIKE 'heist bank robbery space alien planet'` - and multiple metadata filtering conditions - `genre != 'Romance' AND expanded_genres NOT LIKE '%Romance%' AND rating > 7.0`. - -* Selecting action-comedies with car chase scenes. - - ```sql - SELECT * FROM movies_kb - WHERE content LIKE 'car chase driving speed race' - AND expanded_genres LIKE '%Action%' - AND expanded_genres LIKE '%Comedy%' - AND rating > 6.5; - ``` - - This query includes a semantic search filtering condition - `content LIKE 'car chase driving speed race'` - and multiple metadata filtering conditions - `expanded_genres LIKE '%Action%' AND expanded_genres LIKE '%Comedy%' AND rating > 6.5`. - -* Selecting historical dramas without war themes. - - ```sql - SELECT * FROM movies_kb - WHERE content LIKE 'historical period past century era' - AND content NOT LIKE 'war battle soldier military' - AND content NOT LIKE 'fight combat weapon' - AND expanded_genres LIKE '%Drama%' - AND rating > 3.5; - ``` - - This query includes multiple semantic search filtering conditions - `content LIKE 'historical period past century era' AND content NOT LIKE 'war battle soldier military' AND content NOT LIKE 'fight combat weapon'` - and multiple metadata filtering conditions - `expanded_genres LIKE '%Drama%' AND rating > 3.5`. - -* Selecting multi-genre movies with different ratings. - - ```sql - SELECT * FROM movies_kb - WHERE (content LIKE 'detective mystery investigation' AND (genre = 'Mystery' OR expanded_genres LIKE '%Thriller%')) - OR (content LIKE 'romance love relationship' AND (genre = 'Romance' OR expanded_genres LIKE '%Romance%')) - AND rating > 7.0; - ``` - - This query includes nested semantic search filtering conditions - `(content LIKE 'detective mystery investigation' AND (genre = 'Mystery' OR expanded_genres LIKE '%Thriller%'))` - and a metadata filtering condition - `rating > 7.0`. - -* Selecting adventure movies excluding some genres. - - ```sql - SELECT * FROM movies_kb - WHERE content LIKE 'adventure journey quest treasure' - AND genre NOT IN ('Horror', 'Romance', 'Family') - AND rating > 6.5; - ``` - - This query includes multiple semantic search filtering conditions - `content LIKE 'adventure journey quest treasure'` - and multiple metadata filtering conditions - `genre NOT IN ('Horror', 'Romance', 'Family') AND rating > 6.5`. - -* Selecting comedy movies in specific rating range. - - ```sql - SELECT * FROM movies_kb - WHERE content LIKE 'comedy funny humor laugh' - AND rating BETWEEN 7.0 AND 9.0 - AND expanded_genres LIKE '%Comedy%'; - ``` - - This query includes multiple semantic search filtering conditions - `content LIKE 'comedy funny humor laugh'` - and multiple metadata filtering conditions - `rating BETWEEN 7.0 AND 9.0 AND expanded_genres LIKE '%Comedy%'`. - -* Selecting different thriller subgenres. - - ```sql - SELECT * FROM movies_kb - WHERE content LIKE 'detective investigation mystery' AND rating > 7.0 - UNION - SELECT * FROM movies_kb - WHERE content LIKE 'heist robbery theft steal' AND rating > 7.0 - UNION - SELECT * FROM movies_kb - WHERE content LIKE 'spy secret agent undercover' AND rating > 7.0; - ``` - - This query combines the results of three queries using the `UNION` operator. diff --git a/docs/mindsdb_sql/overview.mdx b/docs/mindsdb_sql/overview.mdx deleted file mode 100644 index 92db4439d02..00000000000 --- a/docs/mindsdb_sql/overview.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: SQL API -sidebarTitle: Overview -icon: "database" ---- - -MindsDB enhances standard SQL by providing AI building blocks. - -This section introduces custom SQL syntax provided by MindsDB to bring data and AI together. - -Follow these steps to get started: - - - - Use [CREATE DATABASE](/mindsdb_sql/sql/create/database) to connect your data source to MindsDB.

- Explore all available [data sources here](/integrations/data-overview). -
- - Use [CREATE ML_ENGINE](/mindsdb_sql/sql/create/ml-engine) to configure an engine of your choice.

- Explore all available [AI engines here](/integrations/ai-overview). -
- - Use [CREATE MODEL](/mindsdb_sql/sql/create/model) to create, train, and deploy AI/ML models within MindsDB. - - - Query for a [single prediction](/mindsdb_sql/sql/get-single-prediction) or [batch predictions](/mindsdb_sql/sql/get-batch-predictions) by joining data with models. - - - Use [JOB](/mindsdb_sql/sql/create/jobs), [TRIGGER](/mindsdb_sql/sql/create/trigger), or [AGENT](/mindsdb_sql/agents/agent) to automate workflows. - -
diff --git a/docs/mindsdb_sql/sql/api/alter-view.mdx b/docs/mindsdb_sql/sql/api/alter-view.mdx deleted file mode 100644 index 3889a13bb1b..00000000000 --- a/docs/mindsdb_sql/sql/api/alter-view.mdx +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: Alter a View -sidebarTitle: Alter a View ---- - -## Description - -The `ALTER VIEW` statement updates the query assigned to a view created with the [`CREATE VIEW` command](/mindsdb_sql/sql/create/view). - -## Syntax - -Here is the syntax: - -```sql -ALTER VIEW view_name [AS] ( - SELECT * FROM integration_name.table_name -); - ---or - -ALTER VIEW name -FROM integration_name ( - SELECT * FROM table_name -); -``` diff --git a/docs/mindsdb_sql/sql/api/delete.mdx b/docs/mindsdb_sql/sql/api/delete.mdx deleted file mode 100644 index 415c4ae6d33..00000000000 --- a/docs/mindsdb_sql/sql/api/delete.mdx +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: Delete From a Table -sidebarTitle: Delete From a Table ---- - -## Description - -The `DELETE` statement removes rows that fulfill the `WHERE` clause criteria. - -## Syntax - -Here is the syntax: - -```sql -DELETE FROM integration_name.table_name -WHERE column_name = column_value_to_be_removed; -``` - -This statement removes all rows from the `table_name` table (that belongs to the `integration_name` integration) wherever the `column_name` column value is equal to `column_value_to_be_removed`. - -And here is another way to filter the rows using a subquery: - -```sql -DELETE FROM integration_name.table_name -WHERE column_name IN - ( - SELECT column_value_to_be_removed - FROM some_integration.some_table - WHERE some_column = some_value - ); -``` - -This statement removes all rows from the `table_name` table (that belongs to the `integration_name` integration) wherever the `column_name` column value is equal to one of the values returned by the subquery. diff --git a/docs/mindsdb_sql/sql/api/insert.mdx b/docs/mindsdb_sql/sql/api/insert.mdx deleted file mode 100644 index c99a66419ed..00000000000 --- a/docs/mindsdb_sql/sql/api/insert.mdx +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Insert Into a Table -sidebarTitle: Insert Into a Table ---- - -## Description - -The `INSERT INTO` statement inserts data into a table. The data comes from a subselect query. It is commonly used to input prediction results into a database table. - -## Syntax - -Here is the syntax: - -```sql -INSERT INTO integration_name.table_name - (SELECT ...); -``` - -Please note that the destination table (`integration_name.table_name`) must -exist and contain all the columns where the data is to be inserted. - -And the steps followed by the syntax: - -- It executes a subselect query to get the output dataset. -- It uses the `INSERT INTO` statement to insert the output of the - `(SELECT ...)` query into the `integration_name.table_name` table. - -On execution, we get: - -```sql -Query OK, 0 row(s) updated - x.xxxs -``` - -### Example - -We want to save the prediction results into the `int1.tbl1` table. - -Here is the schema structure used throughout this example: - -```bash -int1 -└── tbl1 -mindsdb -└── predictor_name -int2 -└── tbl2 -``` - -Where: - -| Name | Description | -| ---------------- | ------------------------------------------------------------------------------------- | -| `int1` | Integration where the table that stores prediction results resides. | -| `tbl1` | Table that stores prediction results. | -| `predictor_name` | Name of the model. | -| `int2` | Integration where the data source table used in the inner `SELECT` statement resides. | -| `tbl2` | Data source table used in the inner `SELECT` statement. | - -Let's execute the query. - -```sql -INSERT INTO int1.tbl1 ( - SELECT * - FROM int2.tbl2 AS ta - JOIN mindsdb.predictor_name AS tb - WHERE ta.date > '2015-12-31' -); -``` - -On execution, we get: - -```sql -Query OK, 0 row(s) updated - x.xxxs -``` diff --git a/docs/mindsdb_sql/sql/api/join-on.mdx b/docs/mindsdb_sql/sql/api/join-on.mdx deleted file mode 100644 index 7bd3b0b7a07..00000000000 --- a/docs/mindsdb_sql/sql/api/join-on.mdx +++ /dev/null @@ -1,248 +0,0 @@ ---- -title: Join Tables On -sidebarTitle: Join Tables On ---- - -## Description - -The `JOIN` statement combines two or more tables based `ON` a specified column(s). It functions as a standard `JOIN` in SQL while offering the added capability of **combining data from multiple data sources**, allowing users to join data from one or more data sources seamlessly. - -## Syntax - -Here is the syntax: - -```sql -SELECT t1.column_name, t2.column_name, t3.column_name -FROM datasource1.table1 [AS] t1 -JOIN datasource2.table2 [AS] t2 -ON t1.column_name = t2.column_name -JOIN datasource3.table3 [AS] t3 -ON t1.column_name = t3.column_name; -``` - -This query joins data from three different datasources - `datasource1`, `datasource2`, and `datasource3` - allowing users to execute federated queries accross multiple data sources. - - - -**Nested `JOINs`** - -MindsDB provides you with two categories of `JOINs`. One is [the `JOIN` statement which combines the data table with the model table](/mindsdb_sql/sql/api/join) in order to fetch bulk predictions. Another is the regular `JOIN` used throughout SQL, which requires the `ON` clause. - -You can nest these types of `JOINs` as follows: - -```sql -SELECT * FROM ( - SELECT * - FROM project_name.model_table AS m - JOIN datasource_name.data_table AS d; -) AS t1 -JOIN ( - SELECT * - FROM project_name.model_table AS m - JOIN datasource_name.data_table AS d; -) AS t2 -ON t1.column_name = t2.column_name; -``` - - - -## Example 1 - -Let's use the following data to see how the different types of `JOINs` work. - -The `pets` table that stores pets: - -```sql -+------+-------+ -|pet_id|name | -+------+-------+ -|1 |Moon | -|2 |Ripley | -|3 |Bonkers| -|4 |Star | -|5 |Luna | -|6 |Lake | -+------+-------+ -``` - -And the `owners` table that stores pets' owners: - -```sql -+--------+-------+------+ -|owner_id|name |pet_id| -+--------+-------+------+ -|1 |Amy |4 | -|2 |Bob |1 | -|3 |Harry |5 | -|4 |Julia |2 | -|5 |Larry |3 | -|6 |Henry |0 | -+--------+-------+------+ -``` - -### `JOIN` or `INNER JOIN` - -The `JOIN` or `INNER JOIN` command joins the rows of the `owners` and `pets` tables wherever there is a match. For example, a pet named Lake does not have an owner, so it'll be left out. - -```sql -SELECT * -FROM files.owners o -[INNER] JOIN files.pets p -ON o.pet_id = p.pet_id; -``` - -On execution, we get: - -```sql -+--------+-------+------+------+-------+ -|owner_id|name |pet_id|pet_id|name | -+--------+-------+------+------+-------+ -|1 |Amy |4 |4 |Star | -|2 |Bob |1 |1 |Moon | -|3 |Harry |5 |5 |Luna | -|4 |Julia |2 |2 |Ripley | -|5 |Larry |3 |3 |Bonkers| -+--------+-------+------+------+-------+ -``` - -As in standard SQL, you can use the `WHERE` clause to filter the output data. - -```sql -SELECT * -FROM files.owners o -[INNER] JOIN files.pets p -ON o.pet_id = p.pet_id -WHERE o.name = 'Amy' -OR o.name = 'Bob'; -``` - -On execution, we get: - -```sql -+--------+-------+------+------+-------+ -|owner_id|name |pet_id|pet_id|name | -+--------+-------+------+------+-------+ -|1 |Amy |4 |4 |Star | -|2 |Bob |1 |1 |Moon | -+--------+-------+------+------+-------+ -``` - -### `LEFT JOIN` - -The `LEFT JOIN` command joins the rows of two tables such that all rows from the left table, even the ones with no match, show up. Here, the left table is the `owners` table. - -```sql -SELECT * -FROM files.owners o -LEFT JOIN files.pets p -ON o.pet_id = p.pet_id; -``` - -On execution, we get: - -```sql -+--------+-------+------+------+-------+ -|owner_id|name |pet_id|pet_id|name | -+--------+-------+------+------+-------+ -|1 |Amy |4 |4 |Star | -|2 |Bob |1 |1 |Moon | -|3 |Harry |5 |5 |Luna | -|4 |Julia |2 |2 |Ripley | -|5 |Larry |3 |3 |Bonkers| -|6 |Henry |0 |[NULL]|[NULL] | -+--------+-------+------+------+-------+ -``` - -### `RIGHT JOIN` - -The `RIGHT JOIN` command joins the rows of two tables such that all rows from the right table, even the ones with no match, show up. Here, the right table is the `pets` table. - -```sql -SELECT * -FROM files.owners o -RIGHT JOIN files.pets p -ON o.pet_id = p.pet_id; -``` - -On execution, we get: - -```sql -+--------+-------+------+------+-------+ -|owner_id|name |pet_id|pet_id|name | -+--------+-------+------+------+-------+ -|2 |Bob |1 |1 |Moon | -|4 |Julia |2 |2 |Ripley | -|5 |Larry |3 |3 |Bonkers| -|1 |Amy |4 |4 |Star | -|3 |Harry |5 |5 |Luna | -|[NULL] |[NULL] |[NULL]|6 |Lake | -+--------+-------+------+------+-------+ -``` - -### `FULL JOIN` or `FULL OUTER JOIN` - -The `FULL [OUTER] JOIN` command joins the rows of two tables such that all rows from both tables, even the ones with no match, show up. - -```sql -SELECT * -FROM files.owners o -FULL [OUTER] JOIN files.pets p -ON o.pet_id = p.pet_id; -``` - -On execution, we get: - -```sql -+--------+------+------+------+-------+---------+ -|owner_id|name |pet_id|pet_id|name |animal_id| -+--------+------+------+------+-------+---------+ -|1 |Amy |4 |4 |Star |2 | -|2 |Bob |1 |1 |Moon |1 | -|3 |Harry |5 |5 |Luna |2 | -|4 |Julia |2 |2 |Ripley |1 | -|5 |Larry |3 |3 |Bonkers|3 | -|6 |Henry |0 |[NULL]|[NULL] |[NULL] | -|[NULL] |[NULL]|[NULL]|6 |Lake |4 | -+--------+------+------+------+-------+---------+ -``` - -## Example 2 - -More than two tables can be joined subsequently. - -Let's use another table called `animals`: - -```sql -+---------+-------+ -|animal_id|name | -+---------+-------+ -|1 |Dog | -|2 |Cat | -|3 |Hamster| -|4 |Fish | -+---------+-------+ -``` - -Now we can join all three tables. - -```sql -SELECT * -FROM files.owners o -RIGHT JOIN files.pets p ON o.pet_id = p.pet_id -JOIN files.animals a ON p.animal_id = a.animal_id; -``` - -On execution, we get: - -```sql -+--------+-------+------+------+-------+---------+---------+-------+ -|owner_id|name |pet_id|pet_id|name |animal_id|animal_id|name | -+--------+-------+------+------+-------+---------+---------+-------+ -|2 |Bob |1 |1 |Moon |1 |1 |Dog | -|4 |Julia |2 |2 |Ripley |1 |1 |Dog | -|5 |Larry |3 |3 |Bonkers|3 |3 |Hamster| -|1 |Amy |4 |4 |Star |2 |2 |Cat | -|3 |Harry |5 |5 |Luna |2 |2 |Cat | -|[NULL] |[NULL] |[NULL]|6 |Lake |4 |4 |Fish | -+--------+-------+------+------+-------+---------+---------+-------+ -``` diff --git a/docs/mindsdb_sql/sql/api/select-files.mdx b/docs/mindsdb_sql/sql/api/select-files.mdx deleted file mode 100644 index 82f1824be70..00000000000 --- a/docs/mindsdb_sql/sql/api/select-files.mdx +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: Query a File -sidebarTitle: Query a File ---- - -## Description - -The `SELECT * FROM files.file_name` statement is used to select data from a file. - -First, you upload a file to the MindsDB Editor by following -[this guide](/sql/create/file/). And then, you can -[`CREATE MODEL`](/sql/create/model) from the uploaded file. - -## Syntax - -Here is the syntax: - -```sql -SELECT * -FROM files.file_name; -``` - -On execution, we get: - -```sql -+--------+--------+--------+--------+ -| column | column | column | column | -+--------+--------+--------+--------+ -| value | value | value | value | -+--------+--------+--------+--------+ -``` - -Where: - -| Name | Description | -| ------------- | --------------------------------------------------------------------------------------------- | -| `file_name` | Name of the file uploaded to the MindsDB Editor by following [this guide](/sql/create/file/). | -| `column` | Name of the column from the file. | - -## Example - -Once you uploaded your file by following [this guide](/sql/create/file/), you -can query it like a table. - -```sql -SELECT * -FROM files.home_rentals -LIMIT 10; -``` - -On execution, we get: - -```sql -+-----------------+---------------------+-------+----------+----------------+---------------+--------------+--------------+ -| number_of_rooms | number_of_bathrooms | sqft | location | days_on_market | initial_price | neighborhood | rental_price | -+-----------------+---------------------+-------+----------+----------------+---------------+--------------+--------------+ -| 0 | 1 | 484,8 | great | 10 | 2271 | south_side | 2271 | -| 1 | 1 | 674 | good | 1 | 2167 | downtown | 2167 | -| 1 | 1 | 554 | poor | 19 | 1883 | westbrae | 1883 | -| 0 | 1 | 529 | great | 3 | 2431 | south_side | 2431 | -| 3 | 2 | 1219 | great | 3 | 5510 | south_side | 5510 | -| 1 | 1 | 398 | great | 11 | 2272 | south_side | 2272 | -| 3 | 2 | 1190 | poor | 58 | 4463 | westbrae | 4123.812 | -| 1 | 1 | 730 | good | 0 | 2224 | downtown | 2224 | -| 0 | 1 | 298 | great | 9 | 2104 | south_side | 2104 | -| 2 | 1 | 878 | great | 8 | 3861 | south_side | 3861 | -+-----------------+---------------------+-------+----------+----------------+---------------+--------------+--------------+ -``` - -Now let's create a predictor using the uploaded file. You can learn more about -the [`CREATE MODEL` command here](/sql/create/model). - -```sql -CREATE MODEL mindsdb.home_rentals_model -FROM files - (SELECT * from home_rentals) -PREDICT rental_price; -``` - -On execution, we get: - -```sql -Query OK, 0 rows affected (x.xxx sec) -``` diff --git a/docs/mindsdb_sql/sql/api/select-view.mdx b/docs/mindsdb_sql/sql/api/select-view.mdx deleted file mode 100644 index be3c5eeecb5..00000000000 --- a/docs/mindsdb_sql/sql/api/select-view.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Query a View -sidebarTitle: Query a View ---- - -## Description - -The `SELECT` statement fetches data from a view that resides inside a project. - -## Syntax - -Here is the syntax: - -```sql -SELECT * -FROM project_name.view_name; -``` diff --git a/docs/mindsdb_sql/sql/api/select.mdx b/docs/mindsdb_sql/sql/api/select.mdx deleted file mode 100644 index 7b4d56e340a..00000000000 --- a/docs/mindsdb_sql/sql/api/select.mdx +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: Query a Table -sidebarTitle: Query a Table ---- - -## Description - -The `SELECT` statement fetches data from a table and predictions from a model. - -Here we go over example of selecting data from tables of connected data sources. To learn how to select predictions from a model, visit [this page](/sql/api/select-predictions). - -## Syntax - -## Simple SELECT FROM an integration - -In this example, query contains only tables from one integration. This query will be executed on this integration database (where integration name will be cut from the table name). - -```sql -SELECT location, max(sqft) -FROM example_db.demo_data.home_rentals -GROUP BY location -LIMIT 5; -``` - -## Raw SELECT FROM an integration - -It is also possible to send [native queries](/sql/native-queries) to integration that use syntax native to a given integration. It is useful when a query can not be parsed as SQL. - -```sql -SELECT ... FROM integration_name ( native query goes here ); -``` - -Here is an example of selecting from a Mongo integration using Mongo-QL syntax: - -```sql -SELECT * FROM mongo ( - db.house_sales2.find().limit(1) -); -``` - -## Complex queries - -1. Subselect on data from integration. - -It can be useful in cases when integration engine doesn't support some functions, for example, grouping, as shown below. In this case, all data from raw select are passed to MindsDB and then subselect performs operations on them inside MindsDB. - -```sql -SELECT type, max(bedrooms), last(MA) -FROM mongo ( - db.house_sales2.find().limit(300) -) GROUP BY 1 -``` - -2. Unions - -It is possible to use `UNION` and `UNION ALL` operators. It this case, every subselect from union will be fetched and merged to one result-set on MindsDB side. - -```sql - SELECT data.time as date, data.target - FROM datasource.table_name as data - -UNION ALL - - SELECT model.time as date, model.target as target - FROM mindsdb.model as model - JOIN datasource.table_name as t - WHERE t.time > LATEST AND t.group = 'value'; -``` diff --git a/docs/mindsdb_sql/sql/api/update.mdx b/docs/mindsdb_sql/sql/api/update.mdx deleted file mode 100644 index e865d33d60a..00000000000 --- a/docs/mindsdb_sql/sql/api/update.mdx +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Update a Table -sidebarTitle: Update a Table ---- - -## Description - -MindsDB provides two ways of using the `UPDATE` statement: - -1. The regular `UPDATE` statement updates specific column values in an existing table. - -2. The `UPDATE FROM SELECT` statement updates data in an existing table from a subselect query. It can be used as an alternative to `CREATE TABLE` or `INSERT INTO` to store predictions. - -## Syntax - -Here is an example of the regular `UPDATE` statement: - -```sql -UPDATE integration_name.table_name -SET column_name = new_value -WHERE column_name = old_value -``` - - -Please replace the placeholders as follows: - -- `integration_name` is the name of the connected data source. -- `table_name` is the table name within that data source. -- `column_name` is the column name within that table. - - -And here is an example of the `UPDATE FROM SELECT` statement that updates a table with predictions made within MindsDB: - -```sql -UPDATE - integration_to_be_updated.table_to_be_updated -SET - column_to_be_updated = prediction_data.predicted_value_column, -FROM - ( - SELECT p.predicted_value_column, p.column1, p.column2 - FROM integration_name.table_name as t - JOIN model_name as p - ) AS prediction_data -WHERE - column1 = prediction_data.column1 - AND column2 = prediction_data.column2 -``` - -Below is an alternative for the `UPDATE FROM SELECT` statement that updates a table with predictions. This syntax is easier to write. - -```sql -UPDATE - integration_to_be_updated.table_to_be_updated -ON - column1, column2 -FROM - ( - SELECT p.predicted_value_column as column_to_be_updated, p.column1, p.column2 - FROM integration_name.table_name as t - JOIN model_name as p - ) -``` - - -The steps followed by the syntax: - -- It executes query from the `FROM` clause to get the output data. In our example, we query for predictions, but it could be a simple select from another table. Please note that it is aliased as `prediction_data`. -- It updates all rows from the `table_to_be_updated` table (that belongs to the `integration_to_be_updated` integration) that match the `WHERE` clause criteria. The rows are updated with values as defined in the `SET` clause. - - - -It is recommended to use the primary key column(s) in the WHERE clause (here, `column1` and `column2`), as the primary key column(s) uniquely identify each row. Otherwise, the `UPDATE` statement may lead to unexpected results by altering rows that you didn't want to affect. - diff --git a/docs/mindsdb_sql/sql/api/use.mdx b/docs/mindsdb_sql/sql/api/use.mdx deleted file mode 100644 index e46b31924d4..00000000000 --- a/docs/mindsdb_sql/sql/api/use.mdx +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Use a Data Source -sidebarTitle: Use a Data Source ---- - -## Description - -The `USE integration_name` statement provides an option to use the connected -datasources and `SELECT` from the database tables. Even if you are -connecting to MindsDB as MySQL database, you will still be able to `SELECT` from your database. - -## Syntax - -To connect to your database `USE` the created datasource: - -```sql -USE integration_name; -``` - -Then, simply `SELECT` from the tables: - -```sql -SELECT * FROM table_name; -``` - -![Use datasource](/assets/sql/use.png) diff --git a/docs/mindsdb_sql/sql/create/database.mdx b/docs/mindsdb_sql/sql/create/database.mdx deleted file mode 100644 index d30b93e6802..00000000000 --- a/docs/mindsdb_sql/sql/create/database.mdx +++ /dev/null @@ -1,224 +0,0 @@ ---- -title: Connect a Data Source -sidebarTitle: Connect a Data Source ---- - -## Description - -MindsDB lets you connect to your favorite databases, data warehouses, data lakes, etc., via the `CREATE DATABASE` command. - -The MindsDB SQL API supports creating connections to integrations by passing the -connection parameters specific per integration. You can find more in the -[Supported Integrations](#supported-integrations) chapter. - - -MindsDB doesn't store or copy your data. Instead, it fetches data directly from your connected sources each time you make a query, ensuring that any changes to the data are instantly reflected. This means your data remains in its original location, and MindsDB always works with the most up-to-date information. - - -## Syntax - -Let's review the syntax for the `CREATE DATABASE` command. - -```sql -CREATE DATABASE [IF NOT EXISTS] datasource_name -[WITH] [ENGINE [=] engine_name] [,] -[PARAMETERS [=] { - "key": "value", - ... -}]; -``` - -On execution, we get: - -```sql -Query OK, 0 rows affected (x.xxx sec) -``` - -Where: - -| Name | Description | -| ------------------- | --------------------------------------------------------------------------------- | -| `datasource_name` | Identifier for the data source to be created. | -| `engine_name` | Engine to be selected depending on the database connection. | -| `PARAMETERS` | `{"key": "value"}` object with the connection parameters specific for each engine. | - - -**SQL Commands Resulting in the Same Output** Please note that the -keywords/statements enclosed within square brackets are optional. Also, by -default, the engine is `mindsdb` if not provided otherwise. That yields the -following SQL commands to result in the same output. - -```sql -CREATE DATABASE db; -CREATE DATABASE db ENGINE 'mindsdb'; -CREATE DATABASE db ENGINE = 'mindsdb'; -CREATE DATABASE db WITH ENGINE 'mindsdb'; -CREATE DATABASE db USING ENGINE = 'mindsdb'; -``` - - - - -### What's available on your installation - - -Here is how you can query for all the available data handlers used to create database connections. - - ```sql - SELECT * - FROM information_schema.handlers - WHERE type = 'data'; - ``` - - Or, alternatively: - - ```sql - SHOW HANDLERS - WHERE type = 'data'; - ``` - -And here is how you can query for all the connected databases: - - ```sql - SELECT * - FROM information_schema.databases; - ``` - - Or, alternatively: - - ```sql - SHOW DATABASES; - - SHOW FULL DATABASES; - ``` - - - -## Example - -### Connecting a Data Source - -Here is an example of how to connect to a MySQL database. - -```sql -CREATE DATABASE mysql_datasource -WITH ENGINE = 'mariadb', -PARAMETERS = { - "user": "root", - "port": 3307, - "password": "password", - "host": "127.0.0.1", - "database": "my_database" -}; -``` - -On execution, we get: - -```sql -Query OK, 0 rows affected (8.878 sec) -``` - -### Listing Linked Databases - -You can list all the linked databases using the command below. - -```sql -SHOW DATABASES; -``` - -On execution, we get: - -```sql -+--------------------+ -| Database | -+--------------------+ -| information_schema | -| mindsdb | -| files | -| mysql_datasource | -+--------------------+ -``` - -## Making your Local Database Available to MindsDB - -When connecting your local database to MindsDB Cloud, you should expose the -local database server to be publicly accessible. It is easy to accomplish using -[Ngrok Tunnel](https://ngrok.com). The free tier offers all you need to get -started. - -The installation instructions are easy to follow. Head over to the -[downloads page](https://ngrok.com/download) and choose your operating system. -Follow the instructions for installation. - -Then [create a free account at Ngrok](https://dashboard.ngrok.com/signup) to get -an auth token that you can use to configure your Ngrok instance. - -Once installed and configured, run the following command to obtain the host and -port for your localhost at `port-number`. - -```bash -ngrok tcp port-number -``` - -Here is an example. Assuming that you run a PostgreSQL database at -`localhost:5432`, use the following command: - -```bash -ngrok tcp 5432 -``` - -On execution, we get: - -```bash -Session Status online -Account myaccount (Plan: Free) -Version 2.3.40 -Region United States (us) -Web Interface http://127.0.0.1:4040 -Forwarding tcp://4.tcp.ngrok.io:15093 -> localhost 5432 -``` - -Now you can access your local database at `4.tcp.ngrok.io:15093` instead of -`localhost:5432`. - -So to connect your local database to the MindsDB GUI, use the `Forwarding` -information. The host is `4.tcp.ngrok.io`, and the port is `15093`. - -Proceed to create a database connection in the MindsDB GUI by executing the -`CREATE DATABASE` statement with the host and port number obtained from -Ngrok. - -```sql -CREATE DATABASE psql_datasource -WITH ENGINE = 'postgres', -PARAMETERS = { - "user": "postgres", - "port": 15093, - "password": "password", - "host": "4.tcp.ngrok.io", - "database": "postgres" -}; -``` - -Please note that the Ngrok tunnel loses connection when stopped or canceled. To -reconnect your local database to MindsDB, you should create an Ngrok tunnel -again. In the free tier, Ngrok changes the host and port values each time you -launch the program, so you need to reconnect your database in the MindsDB Cloud -by passing the new host and port values obtained from Ngrok. - -Before resetting the database connection, drop the previously connected data -source using the `DROP DATABASE` statement. - -```sql -DROP DATABASE psql_datasource; -``` - -After dropping the data source and reconnecting your local database, you can use -the predictors that you trained using the previously connected data source. -However, if you have to `RETRAIN` your predictors, please ensure the database -connection has the same name you used when creating the predictor to avoid -failing to retrain. - -## Supported Integrations - -The list of databases supported by MindsDB keeps growing. Check out all our [database integrations here](/data-integrations/all-data-integrations). diff --git a/docs/mindsdb_sql/sql/create/file.mdx b/docs/mindsdb_sql/sql/create/file.mdx deleted file mode 100644 index ef245d15858..00000000000 --- a/docs/mindsdb_sql/sql/create/file.mdx +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: Upload a File -sidebarTitle: Upload a File ---- - -Follow the steps below to upload a file to MindsDB. - - -Note that the trailing whitespaces on column names are erased upon uploading a file to MindsDB. - - - -1. Access the MindsDB Editor. -2. Open the `Add` menu and choose `Upload file`. - -

- -

- -3. Select a file, provide its name, and click on `Save & Continue`. - -

- -

- -4. Now you can query the file. - - ```sql - SELECT * FROM files.file_name; - ``` - -Here is how to list all files: - -```sql -SHOW TABLES FROM files; -``` - -This command is the same as the command for listing tables because files uploaded to MindsDB become tables within the MindsDB ecosystem and are stored in the `files` database. - -### Configuring URL File Upload for Specific Domains - -The File Uploader can be configured to interact only with specific domains by using the [`url_file_upload` key in `config.json` file](/setup/custom-config#url-file-upload). -This feature allows you to restrict the handler to upoad and process files only from the domains you specify, enhancing security and control over web interactions. - -To configure this, simply list the allowed domains under the [`url_file_upload` key in `config.json` file](/setup/custom-config#url-file-upload). - -## What's Next? - -Now, you are ready to create a predictor from a file. Make sure to check out -[this guide](/sql/create/model/) -on how to do that. diff --git a/docs/mindsdb_sql/sql/create/jobs.mdx b/docs/mindsdb_sql/sql/create/jobs.mdx deleted file mode 100644 index 06523235b00..00000000000 --- a/docs/mindsdb_sql/sql/create/jobs.mdx +++ /dev/null @@ -1,308 +0,0 @@ ---- -title: JOBS -sidebarTitle: Create a Job ---- - -MindsDB enables you to automate any pipeline using JOBS, which grant you the power to schedule any query at any frequency. Additionally, it introduces the keyword [LAST](#last), offering the capability for a JOB to act solely on new data, essentially treating any data source as a stream. -

- -

- - -## Description - -The `CREATE JOB` statement lets you schedule the execution of queries by providing relevant parameters, such as start date, end date, or repetition frequency. - -## Syntax - -### `CREATE JOB` - -Here is the syntax: - -```sql -CREATE JOB [IF NOT EXISTS] [project_name.]job_name [AS] ( - [; ][; ...] -) -[START ] -[END ] -[EVERY [number] ] -[IF ([; ][; ...])]; -``` - -Where: - -| Expression | Description | -| --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `[project_name.]job_name` | Name of the job preceded by an optional project name where the job is to be created. If you do not provide the `project_name` value, then the job is created in the default `mindsdb` project. | -| `[; ][; ...]` | One or more statements separated by `;` to be executed by the job. | -| `[START ]` | Optional. The date when the job starts its periodical or one-time execution. If not set, it is the current system date. | -| `[END ]` | Optional. The date when the job ends its periodical or one-time execution. If it is not set (and the repetition rules are set), then the job repeats forever. | -| `[EVERY [number] ]` | Optional. The repetition rules for the job. If not set, the job runs once, not considering the end date value. If the `number` value is not set, it defaults to 1. | -| `[IF ([; ][; ...])]` | Optional. If the last statement returns one or more rows, only then the job will execute. | - - -**Available `` formats** - -Here are the supported `` formats: -- `'%Y-%m-%d %H:%M:%S'` -- `'%Y-%m-%d'` - -Please note that the default time zone is UTC. - - - -**Available `` values** - -And the supported `` values: -- `minute` / `minutes` / `min` -- `hour` / `hours` -- `day` / `days` -- `week` / `weeks` -- `month` / `months` - - -Further, you can query all jobs and their execution history like this: - -```sql -SHOW JOBS; -SELECT * FROM [project_name.]jobs WHERE name = 'job_name'; -SELECT * FROM log.jobs_history WHERE project = 'mindsdb' AND name = 'job_name'; -``` - -### `LAST` - -MindsDB provides a custom `LAST` keyword that enables you to fetch data inserted after the last time you queried for it. It is a convenient way to select only the newly added data rows when running a job. - -Imagine you have the `fruit_data` table that contains the following: - -```sql -+-------+-----------+ -| id | name | -+-------+-----------+ -| 1 | apple | -| 2 | orange | -+-------+-----------+ -``` - -When you run the `SELECT` query with the `LAST` keyword for the first time, it'll give an empty output. - -```sql -SELECT id, name -FROM fruit_data -WHERE id > LAST; -``` - -This query returns: - -```sql -+-------+-----------+ -| id | name | -+-------+-----------+ -| null | null | -+-------+-----------+ -``` - - -If you want to specify a concrete value for `LAST` in the first execution of such a query, use the `COALESCE(LAST, )` function. - -```sql -SELECT id, name -FROM fruit_data -WHERE id > COALESCE(LAST, 1); -``` - -It will result in executing the following query in the first run: - -```sql -SELECT id, name -FROM fruit_data -WHERE id > 1; -``` - -And the below query at each subsequent run: - -```sql -SELECT id, name -FROM fruit_data -WHERE id > LAST; -``` - - -Now imagine you inserted a new record into the `fruit_data` table: - -```sql -+-------+-----------+ -| id | name | -+-------+-----------+ -| 1 | apple | -| 2 | orange | -| 3 | pear | -+-------+-----------+ -``` - -When you run the `SELECT` query with the `LAST` keyword again, you'll get only the newly added record as output. - -```sql -SELECT id, name -FROM fruit_data -WHERE id > LAST; -``` - -This query returns: - -```sql -+-------+-----------+ -| id | name | -+-------+-----------+ -| 3 | pear | -+-------+-----------+ -``` - -From this point on, whenever you add new records into the `fruit_data` table, it'll be returned by the next run of the `SELECT` query with the `LAST` keyword. And, if you do not add any new records between the query runs, then the output will be null. - -If you want to clear context for the `LAST` keyword in the editor, then run `set context = 0` or `set context = null`. - -### Conditional Jobs - -Here is how you can create a conditional job that will execute periodically only if there is new data available: - -```sql -CREATE JOB conditional_job ( - - FINETUNE MODEL model_name - FROM ( - SELECT * - FROM datasource.table_name - WHERE incremental_column > LAST - ) -) -EVERY 1 min -IF ( - SELECT * - FROM datasource.table_name - WHERE incremental_column > LAST -); -``` - -The above job will be triggered every minute, but it will execute its task (i.e. finetuning the model) only if there is new data available. - -## Examples - -### Example 1: Retrain a Model - -In this example, we create a job in the current project to retrain the `home_rentals_model` model and insert predictions into the `rentals` table. - -```sql -CREATE JOB retrain_model_and_save_predictions ( - - RETRAIN mindsdb.home_rentals_model - USING - join_learn_process = true; - - INSERT INTO my_integration.rentals ( - SELECT m.rental_price, m.rental_price_explain - FROM mindsdb.home_rentals_model AS m - JOIN example_db.demo_data.home_rentals AS d - ) -) -END '2023-04-01 00:00:00' -EVERY 2 days; -``` - - -Please note that the `join_learn_process` parameter in the `USING` clause of the [`RETRAIN`](/sql/api/retrain) statement ensures that the retraining process completes before inserting predictions into a table. In general, this parameter is used to prevent several retrain processes from running simultaneously. - - -The `retrain_model_and_save_predictions` job starts its execution on the current system date and ends on the 1st of April 2023. The job is executed every 2 days. - -### Example 2: Save Predictions - -In this example, the job creates a table named as `result_{{START_DATETIME}}` and inserts predictions into it. - -```sql -CREATE JOB save_predictions ( - - CREATE TABLE my_integration.`result_{{START_DATETIME}}` ( - SELECT m.rental_price, m.rental_price_explain - FROM mindsdb.home_rentals_model AS m - JOIN example_db.demo_data.home_rentals AS d - ) -) -EVERY hour; -``` - - -Please note that the uniqueness of the created table name is ensured here by using the `{{START_DATETIME}}` variable that is replaced at runtime by the date and time of the current run. - -You can use the following variables for this purpose: -- `PREVIOUS_START_DATETIME` is replaced by date and time of the previous run of this job. -- `START_DATETIME` is replaced by date and time of the current job run. -- `START_DATE` is replaced by date of the current job run. - - -The `save_predictions` job starts its execution on the current system date and repeats every 2 hours until it is manually disabled. - -### Example 3: Drop a Model - -In this example, we create a job to drop the `home_rentals_model` model scheduled on the 1st of April 2023. - -```sql -CREATE JOB drop_model ( - - DROP MODEL mindsdb.home_rentals_model -) -START '2023-04-01'; -``` - -This job runs once on the 1st of April 2023. - -### Example 4: Twitter Chatbot - -You can easily create a chatbot to respond to tweets using jobs. But before you get to it, you should connect your Twitter account to MindsDB following the instructions [here](/integrations/app-integrations/twitter). - - -Follow the [tutorial on how to create a Twitter chatbot](/sql/tutorials/twitter-chatbot) to learn the details. - - -Let's create a job that runs every hour, checks for new tweets, and responds using the OpenAI model. - -```sql -CREATE JOB mindsdb.gpt4_twitter_job AS ( - - -- insert into tweets the output of joining model and new tweets - INSERT INTO my_twitter_v2.tweets (in_reply_to_tweet_id, text) - SELECT - t.id AS in_reply_to_tweet_id, - r.response AS text - FROM my_twitter.tweets t - JOIN mindsdb.snoopstein_model r - WHERE - t.query = '(@snoopstein OR @snoop_stein OR #snoopstein OR #snoop_stein) -is:retweet -from:snoop_stein' - AND t.created_at > LAST - LIMIT 10 -) -EVERY hour; -``` - -The [`SELECT`](/sql/api/select) statement joins the data table with the model table to get responses for newly posted tweets, thanks to the `LAST` keyword. Then, the [`INSERT INTO`](/sql/api/insert) statement writes these responses to the `tweets` table of the `my_twitter` integration. - - -To learn more about OpenAI integration with MindsDB, visit our docs [here](/nlp/nlp-mindsdb-openai). - - -## Additional Configuration - -Here is the template of the `config.json` file that you can pass as a parameter when starting your local MindsDB instance: - -```bash -"jobs": { - "disable": true, - "check_interval": 30 - } -``` - -The `disable` parameter defines whether the scheduler is active (`true`) or not (`false`). By default, in the MindsDB Editor, the scheduler is active. - -The `check_interval` parameter defines the interval in seconds between consecutive checks of the scheduler table. By default, in the MindsDB Editor, it is 30 seconds. - -You can modify the default configuration in your local MindsDB installation by creating a `config.json` file and starting MindsDB with this file as a parameter. You can find detailed instructions [here](/setup/custom-config#starting-mindsdb-with-extended-configuration). diff --git a/docs/mindsdb_sql/sql/create/project.mdx b/docs/mindsdb_sql/sql/create/project.mdx deleted file mode 100644 index 57ec8759a76..00000000000 --- a/docs/mindsdb_sql/sql/create/project.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Create a Project -sidebarTitle: Create a Project ---- - -## Description - -MindsDB introduces projects that are a natural way to keep artifacts, such as models or views, separate according to what predictive task they solve. You can learn more about MindsDB projects [here](/sql/project). - -## Syntax - -Here is the syntax for creating a project: - -```sql -CREATE PROJECT [IF NOT EXISTS] project_name; -``` diff --git a/docs/mindsdb_sql/sql/create/table.mdx b/docs/mindsdb_sql/sql/create/table.mdx deleted file mode 100644 index 1b2f9b56800..00000000000 --- a/docs/mindsdb_sql/sql/create/table.mdx +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Create a Table -sidebarTitle: Create a Table ---- - -## Description - -The `CREATE TABLE` statement creates a table and optionally fills it with data from provided query. It may be used to materialize prediction results as tables. - -## Syntax - -You can use the `CREATE TABLE` statement to create an empty table: - -```sql -CREATE TABLE integration_name.table_name ( - column_name data_type, - ... -); -``` - -You can use the `CREATE TABLE` statement to create a table and fill it with data: - -```sql -CREATE TABLE integration_name.table_name - (SELECT ...); -``` - -Or the `CREATE OR REPLACE TABLE` statement: - -```sql -CREATE OR REPLACE TABLE integration_name.table_name - (SELECT ...); -``` - -Here is how to list tables from a connected data source: - -```sql -SHOW TABLES FROM data_source_name; -``` - - -Note that the `integration_name` connection must be created with the [`CREATE DATABASE`](/mindsdb_sql/sql/create/database) statement and the user with write access. - - -Here are the steps followed by the syntax: - -- It executes a subselect query to get the output data. -- In the case of the `CREATE OR REPLACE TABLE` statement, the - `integration_name.table_name` table is dropped before recreating it. -- It (re)creates the `integration_name.table_name` table inside the - `integration_name` integration. -- It uses the [`INSERT INTO`](/sql/api/insert/) statement to insert the - output of the `(SELECT ...)` query into the - `integration_name.table_name`. - -## Example - -We want to save the prediction results into the `int1.tbl1` table. - -Here is the schema structure used throughout this example: - -```bash -int1 -└── tbl1 -mindsdb -└── predictor_name -int2 -└── tbl2 -``` - -Where: - -| Name | Description | -| ---------------- | ------------------------------------------------------------------------------------- | -| `int1` | Integration where the table that stores prediction results resides. | -| `tbl1` | Table that stores prediction results. | -| `predictor_name` | Name of the model. | -| `int2` | Integration where the data source table used in the inner `SELECT` statement resides. | -| `tbl2` | Data source table used in the inner `SELECT` statement. | - -Let's execute the query. - -```sql -CREATE OR REPLACE TABLE int1.tbl1 ( - SELECT * - FROM int2.tbl2 AS ta - JOIN mindsdb.predictor_name AS tb - WHERE ta.date > '2015-12-31' -); -``` diff --git a/docs/mindsdb_sql/sql/create/trigger.mdx b/docs/mindsdb_sql/sql/create/trigger.mdx deleted file mode 100644 index de333c0f2b1..00000000000 --- a/docs/mindsdb_sql/sql/create/trigger.mdx +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: Create a Trigger -sidebarTitle: Create a Trigger ---- - -## Description - -Triggers enable users to define event-based actions. For example, if a table is updated, then run a query to update predictions. - - -Currently, you can create triggers on the following data sources: - -- [MongoDB](/integrations/data-integrations/mongodb) (available for MongoDB Atlas Database), -- [Slack](/integrations/app-integrations/slack), -- [Solace](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/solace_handler), -- [PostgreSQL](/integrations/data-integrations/postgresql) (requires write access). - - -## Syntax - -Here is the syntax for creating a trigger: - -```sql -CREATE TRIGGER trigger_name -ON integration_name.table_name -[COLUMNS column_name1, column_name2, ...] -( - sql_code -) -``` - -By creating a trigger on a data source, every time this data source is updated or new data is inserted, the `sql_code` provided in the statement will be executed. - -You can create a trigger either on a table... - -```sql -CREATE TRIGGER trigger_name -ON integration_name.table_name -( - sql_code -) -``` - -...or on one or more columns of a table. - -```sql -CREATE TRIGGER trigger_name -ON integration_name.table_name -COLUMNS column_name1, column_name2 -( - sql_code -) -``` - -Here is how to list all triggers: - -```sql -SHOW TRIGGERS; -``` - -## Example - -Firstly, connect Slack to MindsDB following [this instruction](/integrations/app-integrations/slack#set-up-a-slack-app-and-generate-tokens) and connect the Slack app to a channel. - -```sql -CREATE DATABASE mindsdb_slack -WITH - ENGINE = 'slack', - PARAMETERS = { - "token": "xoxb-...", - "app_token": "xapp-..." - }; -``` - -Create a model that will be used to answer chat questions every time new messages arrive. Here we use the [OpenAI engine](/integrations/ai-engines/openai), but you can use any [other LLM](/integrations/ai-overview#large-language-models). - -```sql -CREATE MODEL chatbot_model -PREDICT answer -USING - engine = 'openai_engine', - prompt_template = 'answer the question: {{text}}'; -``` - -Here is how to generate answers to Slack messages using the model: - -```sql -SELECT s.text AS question, m.answer -FROM chatbot_model m -JOIN mindsdb_slack.messages s -WHERE s.channel_id = 'slack-bot-channel-id' -AND s.user != 'U07J30KPAUF' -AND s.created_at > LAST; -``` - -Let's analyze this query: - -- We select the question from the Slack connection and the answer generated by the model. -- We join the model with the `messages` table. -- In the `WHERE` clause: - - We provide the channel name where the app/bot is integrated. - - We exclude the messages sent by the app/bot. You can find the user ID of the app/bot by querying the `mindsdb_slack.users` table. - - We use the `LAST` keyword to ensure that the model generates answers only to the newly sent messages. - -Finally, create a trigger that will insert an answer generated by the model every time when new messages are sent to the channel. - -```sql -CREATE TRIGGER slack_trigger -ON mindsdb_slack.messages -( - INSERT INTO mindsdb_slack.messages (channel_id, text) - SELECT 'slack-bot-channel-id' AS channel_id, answer AS text - FROM chatbot_model m - JOIN TABLE_DELTA s - WHERE s.user != 'slack-bot-id' # this is to prevent the bot from replying to its own messages - AND s.channel_id = 'slack-bot-channel-id' -); -``` - -Let's analyze this statement: - -- We create a trigger named `slack_trigger`. -- The trigger is created on the `mindsdb_slack.messages` table. Therefore, every time when data is added or updated, the trigger will execute its code. -- We provide the code to be executed by the trigger every time the triggering event takes place. - - We insert an answer generated by the model into the `messages` table. - - The `TABLE_DELTA` stands for the table on which the trigger has been created. - - We exclude the messages sent by the app/bot. You can find the user ID of the app/bot by querying the `mindsdb_slack.users` table. diff --git a/docs/mindsdb_sql/sql/create/view.mdx b/docs/mindsdb_sql/sql/create/view.mdx deleted file mode 100644 index 4304bf740b7..00000000000 --- a/docs/mindsdb_sql/sql/create/view.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a View -sidebarTitle: Create a View ---- - -## Description - -The `CREATE VIEW` statement creates a view, which is a great way to do data preparation in MindsDB. A VIEW is a saved `SELECT` statement, which is executed every time we call this view. - -## Syntax - -Here is the syntax: - -```sql -CREATE VIEW [IF NOT EXISTS] project_name.view_name AS ( - SELECT columns - FROM integration_name.table_name AS a - JOIN integration_name.table_name AS p ON a.id = p.id - JOIN ... -); -``` - -Here is how to list all views: - -```sql -SHOW VIEWS; -``` diff --git a/docs/mindsdb_sql/sql/drop/database.mdx b/docs/mindsdb_sql/sql/drop/database.mdx deleted file mode 100644 index 00434abf9c3..00000000000 --- a/docs/mindsdb_sql/sql/drop/database.mdx +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Remove a Data Source -sidebarTitle: Remove a Data Source ---- - -## Description - -The `DROP DATABASE` statement deletes the database. - -## Syntax - -Here is the syntax: - -```sql -DROP DATABASE [IF EXISTS] database_name; -``` - -On execution, we get: - -```sql -Query successfully completed -``` \ No newline at end of file diff --git a/docs/mindsdb_sql/sql/drop/file.mdx b/docs/mindsdb_sql/sql/drop/file.mdx deleted file mode 100644 index cbb31aac223..00000000000 --- a/docs/mindsdb_sql/sql/drop/file.mdx +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Remove a File -sidebarTitle: Remove a File ---- - -## Description - -The `DROP TABLE` statement is also used to delete a file. - -## Syntax - -Here is the syntax: - -```sql -DROP TABLE files.file_name; -``` - -On execution, we get: - -```sql -Query successfully completed -``` - - -Please note that the uploaded files are tables as well. So to remove an uploaded file, use this `DROP TABLE` statement. - diff --git a/docs/mindsdb_sql/sql/drop/jobs.mdx b/docs/mindsdb_sql/sql/drop/jobs.mdx deleted file mode 100644 index 5df742da6a6..00000000000 --- a/docs/mindsdb_sql/sql/drop/jobs.mdx +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: Remove a Job -sidebarTitle: Remove a Job ---- - -## Description - -The `DROP JOB` statement deletes the job. - -## Syntax - -Here is the syntax for deleting a job: - -```sql -DROP JOB [IF EXISTS] [project_name.]job_name; -``` - -The `project_name` value is optional. The `job_name` value indicates the job to be deleted. - -Let's look at some examples: - -```sql -DROP JOB my_project.retrain_and_save_job; -``` - -Here we drop the `retrain_and_save_job` that resides in the `my_project` project. - -And another example: - -```sql -DROP JOB create_table_job; -``` - -Here we drop the `create_table_job` job that resides in the current project. - -To learn more about projects in MindsDB, visit our docs [here](/sql/project). diff --git a/docs/mindsdb_sql/sql/drop/project.mdx b/docs/mindsdb_sql/sql/drop/project.mdx deleted file mode 100644 index c02c87d6fce..00000000000 --- a/docs/mindsdb_sql/sql/drop/project.mdx +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Remove a Project -sidebarTitle: Remove a Project ---- - -## Description - -The `DROP PROJECT` statement deletes the project. - -## Syntax - -Here is the syntax: - -```sql -DROP PROJECT [IF EXISTS] project_name; -``` - -On execution, we get: - -```sql -Query successfully completed -``` diff --git a/docs/mindsdb_sql/sql/drop/table.mdx b/docs/mindsdb_sql/sql/drop/table.mdx deleted file mode 100644 index 18b962b36a0..00000000000 --- a/docs/mindsdb_sql/sql/drop/table.mdx +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: Remove a Table -sidebarTitle: Remove a Table ---- - -## Description - -The `DROP TABLE` statement deletes a table or a file. - - -Please note that this feature is not yet implemented for tables from connected data sources. - - -## Syntax - -Here is the syntax: - -```sql -DROP TABLE table_name; -``` - -And for files: - -```sql -DROP TABLE files.file_name; -``` - -On execution, we get: - -```sql -Query successfully completed -``` - - -Please note that the uploaded files are tables as well. So to remove an uploaded file, use this `DROP TABLE` statement. - diff --git a/docs/mindsdb_sql/sql/drop/trigger.mdx b/docs/mindsdb_sql/sql/drop/trigger.mdx deleted file mode 100644 index b1db217e7ff..00000000000 --- a/docs/mindsdb_sql/sql/drop/trigger.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Remove a Trigger -sidebarTitle: Remove a Trigger ---- - -## Description - -Triggers enable users to define event-based actions. For example, if a table is updated, then run a query to update predictions. - - -Currently, you can create triggers on the following data sources: [MongoDB](https://docs.mindsdb.com/integrations/data-integrations/mongodb), [Slack](https://docs.mindsdb.com/integrations/app-integrations/slack), [Solace](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/solace_handler). - - -## Syntax - -Here is the syntax for removing a trigger: - -```sql -DROP TRIGGER trigger_name; -``` diff --git a/docs/mindsdb_sql/sql/drop/view.mdx b/docs/mindsdb_sql/sql/drop/view.mdx deleted file mode 100644 index a64571d18bd..00000000000 --- a/docs/mindsdb_sql/sql/drop/view.mdx +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Remove a View -sidebarTitle: Remove a View ---- - -## Description - -The `DROP VIEW` statement deletes the view. - -## Syntax - -Here is the syntax: - -```sql -DROP VIEW [IF EXISTS] view_name; -``` - -On execution, we get: - -```sql -Query successfully completed -``` diff --git a/docs/mindsdb_sql/sql/list-data-handlers.mdx b/docs/mindsdb_sql/sql/list-data-handlers.mdx deleted file mode 100644 index 532331be3ad..00000000000 --- a/docs/mindsdb_sql/sql/list-data-handlers.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: List Data Handlers -sidebarTitle: List Data Handlers ---- - -## Description - -The `SHOW HANDLERS` command lists all available handlers. The `WHERE` clause filter handlers by the type (data or ML). - -## Syntax - -Here is the syntax: - -```sql -SHOW HANDLERS -WHERE type = 'data'; -``` diff --git a/docs/mindsdb_sql/sql/list-projects.mdx b/docs/mindsdb_sql/sql/list-projects.mdx deleted file mode 100644 index 3d559466a3e..00000000000 --- a/docs/mindsdb_sql/sql/list-projects.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: List Projects -sidebarTitle: List Projects ---- - -## Description - -The `SHOW DATABASES` command lists all available data sources and projects. The `WHERE` clause filters all projects. - -## Syntax - -Here is the syntax: - -```sql -SHOW DATABASES -WHERE type = 'project'; -``` - -Alternatively, you can use the `FULL` keyword to get more information: - -```sql -SHOW FULL DATABASES -WHERE type = 'project'; -``` diff --git a/docs/mindsdb_sql/sql/native-queries.mdx b/docs/mindsdb_sql/sql/native-queries.mdx deleted file mode 100644 index 1dc5ebf98b6..00000000000 --- a/docs/mindsdb_sql/sql/native-queries.mdx +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Native Queries -sidebarTitle: Native Queries ---- - -The underlying database engine of MindsDB is MySQL. However, you can run queries native to your database engine within MindsDB. - -## Connect your Database to MindsDB - -To run queries native to your database, you must first connect your database to MindsDB using the `CREATE DATABASE` statement. - -```sql -CREATE DATABASE example_db -WITH ENGINE = "postgres", -PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo" -}; -``` - -Here we connect the `example_db` database, which is a PostgreSQL database. - -## Run Queries Native to your Database - -Once we have our PostgreSQL database connected, we can run PostgreSQL-native queries. - -### Querying - -To run PostgreSQL-native code, we must nest it within the `SELECT` statement like this: - -```sql -SELECT * FROM example_db ( - SELECT - model, - year, - price, - transmission, - mileage, - fueltype, - mpg, -- miles per galon - ROUND(CAST((mpg / 2.3521458) AS numeric), 1) AS kml, -- kilometers per liter - (date_part('year', CURRENT_DATE)-year) AS years_old, -- age of a car - COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell, -- how many units of a certain model are sold in a year - ROUND((CAST(tax AS decimal) / price), 3) AS tax_div_price -- value of tax divided by price of a car - FROM demo_data.used_car_price -); -``` - -On execution, we get: - -```sql -+-----+----+-----+------------+-------+--------+----+----+---------+-------------+-------------+ -|model|year|price|transmission|mileage|fueltype|mpg |kml |years_old|units_to_sell|tax_div_price| -+-----+----+-----+------------+-------+--------+----+----+---------+-------------+-------------+ -| A1 |2010|9990 |Automatic |38000 |Petrol |53.3|22.7|12 |1 |0.013 | -| A1 |2011|6995 |Manual |65000 |Petrol |53.3|22.7|11 |5 |0.018 | -| A1 |2011|6295 |Manual |107000 |Petrol |53.3|22.7|11 |5 |0.02 | -| A1 |2011|4250 |Manual |116000 |Diesel |70.6|30 |11 |5 |0.005 | -| A1 |2011|6475 |Manual |45000 |Diesel |70.6|30 |11 |5 |0 | -+-----+----+-----+------------+-------+--------+----+----+---------+-------------+-------------+ -``` - -The first line (`SELECT * FROM example_db`) informs MindsDB that we select from a PostgreSQL database. After that, we nest a PostgreSQL code within brackets. - -### Creating Views - -We can create a view based on a native query. - -```sql -CREATE VIEW cars FROM example_db ( - SELECT - model, - year, - price, - transmission, - mileage, - fueltype, - mpg, -- miles per galon - ROUND(CAST((mpg / 2.3521458) AS numeric), 1) AS kml, -- kilometers per liter - (date_part('year', CURRENT_DATE)-year) AS years_old, -- age of a car - COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell, -- how many units of a certain model are sold in a year - ROUND((CAST(tax AS decimal) / price), 3) AS tax_div_price -- value of tax divided by price of a car - FROM demo_data.used_car_price -); -``` - -On execution, we get: - -```sql -Query OK, 0 rows affected (x.xxx sec) -``` diff --git a/docs/mindsdb_sql/sql/query-jobs.mdx b/docs/mindsdb_sql/sql/query-jobs.mdx deleted file mode 100644 index ee17887886c..00000000000 --- a/docs/mindsdb_sql/sql/query-jobs.mdx +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Query Jobs -sidebarTitle: Query Jobs ---- - -## Querying Jobs - -Here is how we can view all jobs in a project: - -```sql -SHOW JOBS WHERE project = 'project-name'; - -SELECT * FROM project-name.jobs; -``` - -On execution, we get: - -```sql -+------------------------------------+---------+----------------------------+----------------------------+----------------------------+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| NAME | PROJECT | RUN_START | RUN_END | NEXT_RUN_AT | SCHEDULE_STR | QUERY | -+------------------------------------+---------+----------------------------+----------------------------+----------------------------+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| drop_model | mindsdb | 2023-04-01 00:00:00.000000 | [NULL] | 2023-04-01 00:00:00.000000 | [NULL] | DROP MODEL mindsdb.home_rentals_model | -| retrain_model_and_save_predictions | mindsdb | 2023-02-15 19:19:43.210122 | 2023-04-01 00:00:00.000000 | 2023-02-15 19:19:43.210122 | every 2 days | RETRAIN mindsdb.home_rentals_model USING join_learn_process = true; INSERT INTO my_integration.rentals (SELECT m.rental_price, m.rental_price_explain FROM mindsdb.home_rentals_model AS m JOIN example_db.demo_data.home_rentals AS d) | -| save_predictions | mindsdb | 2023-02-15 19:19:48.545580 | [NULL] | 2023-02-15 19:19:48.545580 | every hour | CREATE TABLE my_integration.`result_{{START_DATETIME}}` (SELECT m.rental_price, m.rental_price_explain FROM mindsdb.home_rentals_model AS m JOIN example_db.demo_data.home_rentals AS d) | -+------------------------------------+---------+----------------------------+----------------------------+----------------------------+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -``` - -Or from all projects at once: - -```sql -SHOW JOBS; - -SELECT * -FROM information_schema.jobs; -``` - -## Querying Jobs History - -You can query the history of jobs similar to querying for jobs. Here you can find information about an error if the job didn't execute successfully. - -Here is how we can view all jobs history in the current project: - -```sql -SELECT * -FROM log.jobs_history -WHERE project = 'mindsdb'; -``` - -On execution, we get: - -```sql -+------------------------------------+---------+----------------------------+----------------------------+----------------------------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| NAME | PROJECT | RUN_START | RUN_END | NEXT_RUN_AT | ERROR | QUERY | -+------------------------------------+---------+----------------------------+----------------------------+----------------------------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| retrain_model_and_save_predictions | mindsdb | 2023-02-15 19:19:43.210122 | 2023-04-01 00:00:00.000000 | 2023-02-15 19:19:43.210122 | [NULL] | RETRAIN mindsdb.home_rentals_model USING join_learn_process = true; INSERT INTO my_integration.rentals (SELECT m.rental_price, m.rental_price_explain FROM mindsdb.home_rentals_model AS m JOIN example_db.demo_data.home_rentals AS d) | -| save_predictions | mindsdb | 2023-02-15 19:19:48.545580 | [NULL] | 2023-02-15 19:19:48.545580 | [NULL] | CREATE TABLE my_integration.`result_{{START_DATETIME}}` (SELECT m.rental_price, m.rental_price_explain FROM mindsdb.home_rentals_model AS m JOIN example_db.demo_data.home_rentals AS d) | -+------------------------------------+---------+----------------------------+----------------------------+----------------------------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -``` - -Please note that the `drop_model` job is not in the `jobs_history` table because it didn't start yet. diff --git a/docs/mindsdb_sql/sql/query-triggers.mdx b/docs/mindsdb_sql/sql/query-triggers.mdx deleted file mode 100644 index 2c450ec9f3f..00000000000 --- a/docs/mindsdb_sql/sql/query-triggers.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Query Triggers -sidebarTitle: Query Triggers ---- - -## Description - -Triggers enable users to define event-based actions. For example, if a table is updated, then run a query to update predictions. - - -Currently, you can create triggers on the following data sources: [MongoDB](https://docs.mindsdb.com/integrations/data-integrations/mongodb), [Slack](https://docs.mindsdb.com/integrations/app-integrations/slack), [Solace](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/solace_handler). - - -## Syntax - -Here is the syntax for querying all triggers: - -```sql -SHOW TRIGGERS; -``` diff --git a/docs/mindsdb_sql/sql/show-databases.mdx b/docs/mindsdb_sql/sql/show-databases.mdx deleted file mode 100644 index 99d52c0c2a0..00000000000 --- a/docs/mindsdb_sql/sql/show-databases.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: List Data Sources -sidebarTitle: List Data Sources ---- - -## Description - -The `SHOW DATABASES` statement lists all connected data sources that MindsDB can access. - -## Syntax - -Here is how to list all connected data sources: - -```sql -SHOW DATABASES; -``` diff --git a/docs/mindsdb_sql/sql/use/project.mdx b/docs/mindsdb_sql/sql/use/project.mdx deleted file mode 100644 index 599595b99f9..00000000000 --- a/docs/mindsdb_sql/sql/use/project.mdx +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Use a Project -sidebarTitle: Use a Project ---- - -## Description - -The `USE` statement will change the context of MindsDB to the specified project. This allows you to run subsequent queries within the context of that project. - -## Syntax - -Here is the syntax: - -```sql -USE project_name; -``` - -On execution, we get: - -```sql -Query successfully completed -``` diff --git a/docs/mindsdb_sql/sql_support/case-when.mdx b/docs/mindsdb_sql/sql_support/case-when.mdx deleted file mode 100644 index 48f99d92f83..00000000000 --- a/docs/mindsdb_sql/sql_support/case-when.mdx +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: The CASE WHEN Statement -sidebarTitle: CASE WHEN ---- - -MindsDB supports standard SQL syntax, including the `CASE WHEN` statement. - -The `CASE WHEN` statement is used for conditional logic within queries. It evaluates conditions and returns specific values based on whether each condition is true or false, allowing for conditional output within `SELECT`, `WHERE`, and other clauses. - -```sql -SELECT - CASE - WHEN a=1 THEN a+b - WHEN 1+2=b*2 THEN 0 - WHEN (a+b>2 or bb THEN b - ELSE c - END -FROM table_name; -``` diff --git a/docs/mindsdb_sql/sql_support/cte.mdx b/docs/mindsdb_sql/sql_support/cte.mdx deleted file mode 100644 index be784ed4c45..00000000000 --- a/docs/mindsdb_sql/sql_support/cte.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Common Table Expressions -sidebarTitle: CTEs ---- - -MindsDB supports standard SQL syntax, including Common Table Expressions (CTEs). - -CTEs are used to create temporary, named result sets that simplify complex queries, enhance readability, and allow for modular query design by breaking down large queries into manageable parts. - -```sql -WITH table_name1 AS ( - - SELECT columns - FROM table1 t1 - JOIN table2 t2 - ON t1.col = t2.col -), - -table_name2 AS ( - - SELECT columns - FROM table1 t1 - JOIN table2 t2 - ON t1.col = t2.col -) - -SELECT columns -FROM table_name1 t1 -JOIN table_name2 t2 -ON t1.col - t2.col; -``` diff --git a/docs/model-context-protocol/anthropic.mdx b/docs/model-context-protocol/anthropic.mdx deleted file mode 100644 index 0b594db756a..00000000000 --- a/docs/model-context-protocol/anthropic.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: MindsDB's MCP Server with Anthropic's MCP Connector -sidebarTitle: Anthropic ---- - -This tutorial walks you through the usage of MindsDB's MCP Server with [Anthropic's MCP Connector](https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector). - -## Setup - -Follow the steps below to connect MindsDB's MCP Server to Anthropic. - -1. Start MindsDB's MCP Server following [this guide](/mcp/usage). - -2. Expose the local instance of MindsDB via [ngrok](https://ngrok.com/) or similar tools. - -3. Get the Anthropic API key and download the `anthropic` package. - -## Chat with Data - -Here is how to connect MindsDB's MCP Server to Anthropic. - -```python -import anthropic - -client = anthropic.Anthropic( - api_key = "anthropic-api-key" -) - -response = client.beta.messages.create( - model = "claude-sonnet-4-20250514", - max_tokens = 1000, - messages = [ - {"role": "user", "content": "What tools do you have available?"} - ], - mcp_servers = [ - { - "type": "url", - "url": "https:///mcp/sse", - "name": "mindsdb-mcp", - "authorization_token": "" - } - ], - betas = ["mcp-client-2025-04-04"] -) - -print(response) -``` - -Here is the output: - -```bash -BetaMessage(id='msg_01SrYiUsK7Jb4a5BA2nszKsc', container=None, content=[BetaTextBlock(citations=None, text="I have access to two tools for working with MindsDB:\n\n1. **mindsdb-mcp_query** - Execute SQL queries against MindsDB\n - Parameters:\n - `query` (required): The SQL query to execute\n - `context` (optional): Additional context parameters for the query\n - Returns: Query results or error information\n\n2. **mindsdb-mcp_list_databases** - List all databases and their tables in MindsDB\n - Parameters: None required\n - Returns: A list of all databases and their associated tables\n\nThese tools allow me to help you explore your MindsDB instance, run SQL queries, and work with your data and ML models. Would you like me to start by showing you what databases are available, or do you have a specific query you'd like to run?", type='text')], model='claude-sonnet-4-20250514', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=None, cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=572, output_tokens=183, server_tool_use=None, service_tier='standard')) -``` - -Follow the [MCP Connector docs from Anthropic](https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector) to learn more. diff --git a/docs/model-context-protocol/cursor_usage.mdx b/docs/model-context-protocol/cursor_usage.mdx deleted file mode 100644 index 8fd1fb4dace..00000000000 --- a/docs/model-context-protocol/cursor_usage.mdx +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: MindsDB's MCP Server with Cursor's MCP Client -sidebarTitle: Cursor ---- - -This tutorial walks you through the usage of MindsDB's MCP Server with [Cursor](https://www.cursor.com/) as an MCP Client. - - -See a [video tutorial here](https://www.youtube.com/watch?v=f5VFd5LIuPg). - - -## Setup - -Follow the steps below to connect MindsDB's MCP Server to Cursor. - -1. Start MindsDB's MCP Server following [this guide](/mcp/usage). - -2. Open Cursor, go to the Cursor Settings, open the MCP tab, and click on *Add new global MCP server*. Alternatively, go to the Cursor settings -> Features -> MCP Servers. - -

- -

- -3. Add the below content to the `mcp.json` file. - - ```yml - { - "mcpServers": { - "mindsdb": { - "url": "http://127.0.0.1:47334/mcp/sse" - } - } - } - ``` - -4. Ensure that MindsDB is listed as an MCP server. - -

- -

- -## Chat with Data - -1. Open the Cursor chat window and select the Agent mode from the dropdown. - -

- -

- -2. Ask questions over your data. *Note that you need to approve each call of the MCP server’s tools by clicking on Run tool.* - -

- -

- -3. The agent provides an answer with helpful suggestions of follow-up information that can be extracted from the available data. - -

- -

diff --git a/docs/model-context-protocol/openai.mdx b/docs/model-context-protocol/openai.mdx deleted file mode 100644 index 3d9736dde9e..00000000000 --- a/docs/model-context-protocol/openai.mdx +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: MindsDB's MCP Server with OpenAI's Remote MCP -sidebarTitle: OpenAI ---- - -This tutorial walks you through the usage of MindsDB's MCP Server with [OpenAI's Remote MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). - -## Setup - -Follow the steps below to connect MindsDB's MCP Server to OpenAI. - -1. Start MindsDB's MCP Server following [this guide](/mcp/usage). - -2. Expose the local instance of MindsDB via [ngrok](https://ngrok.com/) or similar tools. - -3. Get the OpenAI API key and download the `openai` package. - -## Chat with Data - -Here is how to connect MindsDB's MCP Server to OpenAI. - -```python -import openai - -client = openai.OpenAI( - api_key = 'openai-api-key' -) - -response = client.responses.create( - model = "o3", - tools = [ - { - "type": "mcp", - "server_label": "mdb", - "server_url": "https:///mcp/sse", - "headers": { "Authorization": "Bearer " }, - "require_approval": "never", - } - ], - input = "What tools do you have available?" -) - -print(response) -``` - -Here is the output: - -```bash -Response(id='resp_68305d877eac81918e05a35beb23c40f054f254057b1b9a9', created_at=1748000135.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='o3-2025-04-16', object='response', output=[McpListTools(id='mcpl_68305d87913c8191ade2e249dc9a7cce054f254057b1b9a9', server_label='mdb', tools=[McpListToolsTool(input_schema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'context': {'anyOf': [{'type': 'object'}, {'type': 'null'}], 'default': None, 'title': 'Context'}}, 'required': ['query'], 'title': 'queryArguments', 'type': 'object'}, name='query', annotations=None, description='\n Execute a SQL query against MindsDB\n\n Args:\n query: The SQL query to execute\n context: Optional context parameters for the query\n\n Returns:\n Dict containing the query results or error information\n '), McpListToolsTool(input_schema={'properties': {}, 'title': 'list_databasesArguments', 'type': 'object'}, name='list_databases', annotations=None, description='\n List all databases in MindsDB along with their tables\n\n Returns:\n Dict containing the list of databases and their tables\n ')], type='mcp_list_tools', error=None), ResponseReasoningItem(id='rs_68305d8c00c08191964ba4e0b011f98a054f254057b1b9a9', summary=[], type='reasoning', encrypted_content=None, status=None), ResponseOutputMessage(id='msg_68305d8ee2cc8191966e94f464677dab054f254057b1b9a9', content=[ResponseOutputText(annotations=[], text='I currently have access to two kinds of tools:\n\n1. Image Input \n • I can receive an image along with your message and analyze the visible content (objects, text, layout, etc.) to help answer questions or perform tasks related to the image.\n\n2. MindsDB SQL Tools \n • mcp_mdb.list_databases – Lists the databases and tables that are registered in the MindsDB environment. \n • mcp_mdb.query – Lets me run SQL queries against those databases and return the results to you.\n\nLet me know if you’d like me to use either of these tools!', type='output_text')], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[Mcp(server_label='mdb', server_url='https://5a52-88-203-84-191.ngrok-free.app/', type='mcp', allowed_tools=None, headers={'Authorization': ''}, require_approval='always')], top_p=1.0, background=False, max_output_tokens=None, previous_response_id=None, reasoning=Reasoning(effort='medium', generate_summary=None, summary=None), service_tier='default', status='completed', text=ResponseTextConfig(format=ResponseFormatText(type='text')), truncation='disabled', usage=ResponseUsage(input_tokens=136, input_tokens_details=InputTokensDetails(cached_tokens=0), output_tokens=192, output_tokens_details=OutputTokensDetails(reasoning_tokens=64), total_tokens=328), user=None, store=True) -``` - -Follow the [Remote MCP docs from OpenAI](https://platform.openai.com/docs/guides/tools-remote-mcp) to learn more. diff --git a/docs/model-context-protocol/overview.mdx b/docs/model-context-protocol/overview.mdx deleted file mode 100644 index ba961e8aadc..00000000000 --- a/docs/model-context-protocol/overview.mdx +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: Model Context Protocol (MCP) -sidebarTitle: Overview ---- - -The **Model Context Protocol (MCP)** facilitates real-time communication between MCP clients, such as LLMs, AI agents, and AI applications, and MCP servers like MindsDB. - -**MindsDB is an MCP server** that enables intelligent applications to query and reason over federated data from databases, data warehouses, and applications. - -## Key Features - -* **Unified Data Gateway** - - MindsDB abstracts the complexity of dealing with disparate data sources. It enables AI apps and agents to run powerful, federated queries across structured and unstructured data systems. - -* **Seamless User Experience** - - MindsDB enhances MCP implementations with security, monitoring, and governance. It includes built-in integrations to ensure compatibility with traditional and non-MCP applications. - -* **Advanced AI Workflows** - - MindsDB supports composite AI operations like multi-source joins and orchestration of different models or services within a single query, which go beyond the native capabilities of most LLMs using MCP alone. - -## Protocol Overview - -MCP establishes a bidirectional communication channel between clients and servers, enabling LLMs, agents, or apps to execute queries over federated data infrastructures. - -

- MCP Protocol Flow Diagram -

- -Federated data refers to data distributed across multiple systems, formats, or platforms, whether on-premises or in the cloud. - -With MindsDB as your MCP server, you can treat this distributed data as a **single virtual database**. - -## How It Works - -Here's a simplified overview of the MCP data flow: - -1. The client connects to the MCP server. -2. A query is issued from the client to the MCP server. -3. MindsDB routes the query to the appropriate federated data sources. -4. The data sources return results to MindsDB. -5. MindsDB returns unified results back to the client. - -This enables AI-native applications to deliver rich, real-time insights over complex enterprise data with minimal integration effort. diff --git a/docs/model-context-protocol/usage.mdx b/docs/model-context-protocol/usage.mdx deleted file mode 100644 index 43ed653b8d8..00000000000 --- a/docs/model-context-protocol/usage.mdx +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: MindsDB's MCP Server Usage and Tools -sidebarTitle: Usage ---- - -**MindsDB** is an MCP server that enables your MCP applications to answer questions over large-scale federated data spanning databases, data warehouses, and SaaS applications. - -## Start MindsDB as an MCP Server - -Follow the steps below to use MindsDB as an MCP server. - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). - -2. [Connect your data source](/mindsdb_sql/sql/create/database) and/or [upload files](/mindsdb_sql/sql/create/file) to MindsDB in order to ask questions over your data. - - - You can use our sample dataset that stores the sales manager data. - - ```sql - CREATE DATABASE sales_manager_data - WITH ENGINE = "postgres", - PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "sales_manager_data" - }; - ``` - - -3. Start MindsDB MCP server. - - * **Without authentication** (suitable for local tools): - - ```bash - docker run --name mindsdb_container -p 47334:47334 mindsdb/mindsdb - ``` - - * **With PAT authentication** (suitable for remote): - - ```bash - docker run --name mindsdb_container -p 47334:47334 -e MINDSDB_USERNAME=admin -e MINDSDB_PASSWORD=password123 mindsdb/mindsdb - ``` - - Get a Bearer token: - ```bash - curl -X POST -d '{"username":"admin","password":"password123"}' -H "Content-Type: application/json" http://localhost:47334/api/login - ``` - Use this token as `Authorization: Bearer ` in your MCP client. - - * **With OAuth 2.0** (for enterprise deployments): configure `MINDSDB_MCP_OAUTH_ENABLED=true` along with `MINDSDB_MCP_OAUTH_ISSUER_URL`, `MINDSDB_MCP_OAUTH_CLIENT_ID`, and `MINDSDB_MCP_OAUTH_CLIENT_SECRET`. - -4. To confirm the MindsDB MCP server is running use `http://127.0.0.1:47334/mcp/status`. A successful response means your MCP environment is ready. - - -## MCP Capabilities - -### Tools - -**`query`** — Executes SQL queries against MindsDB using MySQL syntax. - -Parameters: -- `query` (required): SQL query string -- `context` (optional): Dict with default database, e.g. `{"db": "my_postgres"}` - -Returns one of: -- `{"type": "table", "column_names": [...], "data": [...]}` — for SELECT results -- `{"type": "ok", "affected_rows": N}` — for INSERT/UPDATE/DELETE -- `{"type": "error", "error_code": N, "error_message": "..."}` — on failure - -### Resources - -MCP resources expose schema information for discovery: - -| Resource URI | Description | -|---|---| -| `schema://databases` | Lists all connected data sources | -| `schema://databases/{db}/tables` | Lists tables in a database | -| `schema://databases/{db}/tables/{table}/columns` | Lists columns with types | -| `schema://knowledge_bases` | Lists knowledge bases | - -### Prompts - -**`sample_table`** — Generates instructions to fetch 5 sample rows and describe a table's structure. - -## Transport Modes - -- **HTTP (SSE)**: `http://127.0.0.1:47334/mcp/sse` -- **HTTP (Streamable)**: `http://127.0.0.1:47334/mcp/streamable` -- **Stdio**: run with `--mcp-stdio` flag for local stdio-based transport - -## Configuration - -CORS, rate limiting, DNS rebinding protection, and OAuth settings for the MCP server are configured via the `api.mcp` section of `config.json` or the corresponding environment variables. See [Extend the Default MindsDB Configuration](/setup/custom-config#mcp-api) for the full parameter reference. diff --git a/docs/openapi.yml b/docs/openapi.yml deleted file mode 100644 index 0820951891e..00000000000 --- a/docs/openapi.yml +++ /dev/null @@ -1,2539 +0,0 @@ -openapi: 3.0.0 -info: - title: MindsDB API - description: >- - OpenAPI Specification for MindsDB's REST API. Each API Endpoint corresponds - to a specific SQL Statement e.g POST /model => CREATE MODEL - version: 0.0.1 - contact: - email: admin@mindsdb.com - license: - name: Server Side Public License (SSPL v1) - url: 'https://github.com/mindsdb/mindsdb/blob/main/LICENSE' -servers: - - url: 'http://127.0.0.1:47334/' - description: MindsDB local deployments -components: - schemas: - Database: - type: object - properties: - name: - type: string - engine: - type: string - description: Handler used to create this database (e.g. postgres) - type: - type: string - description: Type of database (data | project | system) - Model: - type: object - properties: - name: - type: string - accuracy: - type: number - description: Accuracy of trained model between 0 and 1 - active: - type: boolean - description: Whether or not this model is currently the active version - version: - type: number - description: Version of this model - status: - type: string - description: Current status of this model (generating | creating | complete | error) - predict: - type: string - description: Column name that this model predicts - mindsdb_version: - type: string - description: MindsDB version associated with this model - error: - type: string - description: Error encountered during training, if applicable - fetch_data_query: - type: string - description: SQL query used to fetch training data for this model - created_at: - type: string - description: Time model was created at in YYYY-MM-DD HH:MM:SS format (trained models only) - training_time: - type: string - description: How long training this model took in HH:MM:SS format (trained models only) - update: - type: string - description: Set to "available" when a new version of MindsDB is available that makes the model obsolete, or when new data is available in the data that was used to train the model (trained models only). - Project: - type: object - properties: - name: - type: string - Table: - type: object - properties: - name: - type: string - type: - type: string - description: Type of table (data | view) - View: - type: object - properties: - name: - type: string - query: - type: string - description: SELECT query used to create the view - File: - type: object - properties: - columns: - type: array - items: - type: string - name: - type: string - row_count: - type: number - Skill: - type: object - properties: - name: - type: string - type: - type: string - description: Type of skill (text2sql | knowledge_base). - source: # used when type = knowledge_base - type: string - description: Used to store a knowledge_base object when type is set to knowledge_base. - database: # used when type = text2sql - type: string - description: Used to store a data source connection when type is set to text2sql. - tables: # used when type = text2sql - type: array - items: - type: string - description: Used to store table(s) names when type is set to text2sql. - description: - type: string - description: Skill description is important for an agent to decide which skill to use. - Agent: - type: object - properties: - name: - type: string - model: - type: string - description: A conversational model used by an agent - skills: - type: array - items: - type: string - description: One or more skills that an agent can use - Chatbot: - type: object - properties: - name: - type: string - database_name: - type: string - description: Name of the connection to a chat app like Slack or MS Teams - agent_name: - type: string - description: Agent object created beforehand. Alternatively, provide a large language model (LLM) using the `model_name` parameter. - Job: - type: object - properties: - name: - type: string - query: - type: string - description: Tasks to be executed by the job - if_query: - type: string - description: Optional. Condition to be fulfilled before the job executes - start_at: - type: string - format: date-time - description: Optional. Start date/time of the job - end_at: - type: string - format: date-time - description: Optional. End date/time of the job - schedule_str: - type: string - description: Periodicity of the job (e.g. every 1 minute) -paths: - '/api/databases': - get: - summary: Returns a list of database names. - description: Gets all databases created by the user. - responses: - '200': - description: A JSON array of database names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Database' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - post: - summary: Creates a new database connection. - description: Creates a new database connection. The example parameters below are for connecting to [MySQL](/data-integrations/mysql). See [here](/data-integrations/all-data-integrations) for which parameters to use for your datasource. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - database: - type: object - properties: - name: - type: string - engine: - type: string - parameters: - type: object - description: Parameters used to connect to your data source. These example parameters are for connecting to [MySQL](/data-integrations/mysql). See [here](/data-integrations/all-data-integrations) for which parameters to use for your datasource. - properties: - user: - type: string - password: - type: string - host: - type: string - port: - type: string - database: - type: string - responses: - # OpenAPI spec defines '200' status as a successful operation, so even though the - # response code is '201', we need to use '200' so docs are properly generated. - '200': - description: The created database - content: - application/json: - schema: - $ref: '#/components/schemas/Database' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '409': - description: Database already exists - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/databases/status': - post: - summary: Checks the status of a database connection. - description: Checks the status of a database connection by validating the connection parameters. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - engine: - type: string - required: [engine] - additionalProperties: true - responses: - '200': - description: Response always returns HTTP 200, but status is inside the body - content: - application/json: - schema: - type: object - properties: - status: - type: string - enum: ["success", "connection_error", "redirect_required"] - detail: - type: string - description: Additional information about any errors - redirect_url: - type: string - description: URL to redirect to if status is "redirect_required" - required: [status] - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/databases/{databaseName}': - get: - summary: Gets info about existing database. - description: Gets info about an existing database. - parameters: - - name: databaseName - in: path - description: Name of existing database - required: true - schema: - type: string - responses: - '200': - description: A JSON object with database informations - content: - application/json: - schema: - $ref: '#/components/schemas/Database' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - put: - summary: Updates an existing database connection. - description: Updates an existing database connection, or creates a new connection if one doesn't exist. The example parameters below are for updating a [MySQL](/data-integrations/mysql) connection. See [here](/data-integrations/all-data-integrations) for which parameters to use for your datasource. - parameters: - - name: databaseName - in: path - description: The name of the project - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - database: - type: object - properties: - engine: - type: string - parameters: - type: object - description: Parameters used to connect to your data source. These example parameters are for connecting to [MySQL](/data-integrations/mysql). See [here](/data-integrations/all-data-integrations) for which parameters to use for your datasource. - properties: - user: - type: string - password: - type: string - host: - type: string - port: - type: string - database: - type: string - responses: - '200': - description: Database was successfully updated - content: - application/json: - schema: - $ref: '#/components/schemas/Database' - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing database. - description: Deletes an existing database connection by name. - parameters: - - name: databaseName - in: path - description: Name of existing database to delete - required: true - schema: - type: string - responses: - '200': - description: An empty response indicates success - content: - application/json: - schema: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects': - get: - summary: Returns a list of projects. - description: Gets all projects created by the user. - responses: - '200': - description: A JSON array of projects - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Project' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}': - get: - summary: Get a project by name. - description: Gets a project created by the user. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - responses: - '200': - description: The returned project - content: - application/json: - schema: - $ref: '#/components/schemas/Project' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/models': - post: - summary: This endpoint trains a new ML Model. - description: Starts training a new Machine Learning model. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - query: - type: string - description: The SQL CREATE MODEL statement used to train this model. See the [CREATE MODEL](https://docs.mindsdb.com/sql/create/model) statement - responses: - '200': - description: Model training started - content: - application/json: - schema: - $ref: '#/components/schemas/Model' - '400': - description: Bad request - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - get: - summary: Returns a list of models. - description: Gets info about all models in the project. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - responses: - '200': - description: A JSON array of models names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Model' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/models/{modelName}': - get: - summary: Gets info about specific model. - description: Gets info for a specific model in the project. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: modelName - in: path - description: The name of the model - required: true - schema: - type: string - responses: - '200': - description: Model information - content: - application/json: - schema: - $ref: '#/components/schemas/Model' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Model not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing model. - description: Deletes a model in the project. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: modelName - in: path - description: The name of the model - required: true - schema: - type: string - responses: - '200': - description: Empty response if delete is successful - content: - application/json: - schema: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project or model not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/models/{modelName}/predict': - post: - summary: This endpoint fetches predictions from the model. - description: Uses this model to make a prediction for values of new data. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: modelName - in: path - description: The name of the model - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: array - properties: - data: - type: object - description: The data for querying the model as key/value e.g column name/value. Example with [home rental prices](https://docs.mindsdb.com/sql/tutorials/home-rentals) - [{"sqft":823}] - responses: - '200': - description: Model queried succesfully - content: - application/json: - schema: - type: object - items: - type: string - description: Prediction object that contains data columns and the target value, confidence, lower and upper bound. - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project or model not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/models/{modelName}/describe': - get: - summary: Describe a model - description: Gets the attributes of a specific model. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: modelName - in: path - description: The name of the model - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - type: object - properties: - attribute: - type: string - description: Attribute of the model to describe. See our [DESCRIBE documentation](https://docs.mindsdb.com/sql/api/describe). E.g. info, features, model - responses: - '200': - description: A JSON object with model informations - content: - application/json: - schema: - type: object - items: - type: string - description: Array containing model information - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Model not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/views': - post: - summary: This endpoint creates a view which is a result-set of the SELECT statement. - description: Creates a [view](https://docs.mindsdb.com/sql/create/view) from a SELECT statement. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - view: - type: object - properties: - name: - type: string - description: Name of the view - query: - type: string - description: The SQL query that will save the result-set in a view. - responses: - '200': - description: View created - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/View' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - get: - summary: List all views in a project - description: Gets all views in a project. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - responses: - '200': - description: A JSON object with view names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/View' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/views/{viewName}': - get: - summary: SELECT from VIEW - description: Gets a single view. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: viewName - in: path - description: The name of the view - required: true - schema: - type: string - responses: - '200': - description: A JSON object with VIEW data - content: - application/json: - schema: - $ref: '#/components/schemas/View' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: View or project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing view. - description: Deletes an existing view in the project. - parameters: - - name: projectName - in: path - description: Project name the view is in - required: true - schema: - type: string - - name: viewName - in: path - description: View name to delete - required: true - schema: - type: string - responses: - '200': - description: Succesfully deleted - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: View or projectnot found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - put: - summary: Updates an existing view. - description: Updates an existing view, or creates a new view if one doesn't exist. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: viewName - in: path - description: The name of the view - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - view: - type: object - properties: - name: - type: string - description: Name of the view - query: - type: string - description: The SQL query that will save the result-set in a view. - responses: - '200': - description: View was successfully updated - content: - application/json: - schema: - $ref: '#/components/schemas/View' - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/databases/{databaseName}/tables': - post: - summary: This endpoint creates a new table with the predictions result-set. - description: Creates a new table from a select query. See [CREATE table documentation](https://docs.mindsdb.com/sql/create/table). - parameters: - - name: databaseName - in: path - description: Name of the database - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - table: - type: object - properties: - name: - type: string - select: - type: string - description: SELECT statement to create the table from - replace: - type: boolean - description: Whether or not to delete a pre-existing table before creating it - responses: - '200': - description: Table created - content: - application/json: - schema: - $ref: '#/components/schemas/Table' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - get: - summary: Returns a list of tables in a database. - description: Gets metadata for all tables in a database. - parameters: - - name: databaseName - in: path - description: The name of the database - required: true - schema: - type: string - responses: - '200': - description: A JSON array of tables - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Table' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/databases/{databaseName}/tables/{tableName}': - get: - summary: SELECT from Table - description: Gets metadata for a table in a database. - parameters: - - name: databaseName - in: path - description: The name of the database - required: true - schema: - type: string - - name: tableName - in: path - description: The name of the table - required: true - schema: - type: string - responses: - '200': - description: A JSON object with SELECT data - content: - application/json: - schema: - $ref: '#/components/schemas/Table' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Table or database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing table. - description: Deletes a table in a database. - parameters: - - name: databaseName - in: path - description: The name of the database - required: true - schema: - type: string - - name: tableName - in: path - description: Table name to delete - required: true - schema: - type: string - responses: - '200': - description: Successfully deleted - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Table or database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/skills': - get: - summary: Returns a list of all created skills. - description: Gets all skills created by the user. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - responses: - '200': - description: A JSON array of skill names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Skill' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - post: - summary: Creates a new skill. - description: Creates a new skill. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - skill: - type: object - properties: - name: - type: string - type: - type: string - description: Type of skill (text2sql | knowledge_base). - source: # used when type = knowledge_base - type: string - description: Used to store a knowledge_base object when type is set to knowledge_base. - database: # used when type = text2sql - type: string - description: Used to store a data source connection when type is set to text2sql. - tables: # used when type = text2sql - type: array - items: - type: string - description: Used to store table(s) names when type is set to text2sql. - description: - type: string - description: Skill description is important for an agent to decide which skill to use. - responses: - '200': - description: Created a skill - content: - application/json: - schema: - $ref: '#/components/schemas/Skill' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '409': - description: Skill already exists - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/skills/{skillName}': - get: - summary: Gets info about existing skills. - description: Gets info about an existing skill. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: skillName - in: path - description: Name of existing skill - required: true - schema: - type: string - responses: - '200': - description: A JSON object with skills - content: - application/json: - schema: - $ref: '#/components/schemas/Skill' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Skill not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - put: - summary: Updates an existing skill. - description: Updates an existing skill, or creates a new skill if one doesn't exist. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: skillName - in: path - description: The name of the skill - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - skill: - type: object - properties: - name: - type: string - type: - type: string - description: Type of skill (text2sql | knowledge_base). - source: # used when type = knowledge_base - type: string - description: Used to store a knowledge_base object when type is set to knowledge_base. - database: # used when type = text2sql - type: string - description: Used to store a data source connection when type is set to text2sql. - tables: # used when type = text2sql - type: array - items: - type: string - description: Used to store table(s) names when type is set to text2sql. - description: - type: string - description: Skill description is important for an agent to decide which skill to use. - responses: - '200': - description: Skill was successfully updated - content: - application/json: - schema: - $ref: '#/components/schemas/Skill' - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing skill. - description: Deletes an existing skill by name. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: skillName - in: path - description: Name of existing skill to delete - required: true - schema: - type: string - responses: - '200': - description: An empty response indicates success - content: - application/json: - schema: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Skill not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/agents': - get: - summary: Returns a list of all created agents. - description: Gets all agents created by the user. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - responses: - '200': - description: A JSON array of skill names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Agent' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - post: - summary: Creates a new agent. - description: Creates a new agent. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - agent: - type: object - properties: - name: - type: string - model: - type: string - description: A conversational model used by an agent - skills: - type: array - items: - type: string - description: One or more skills that an agent can use - responses: - '200': - description: Created an agent - content: - application/json: - schema: - $ref: '#/components/schemas/Agent' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '409': - description: Agent already exists - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/agents/{agentName}': - get: - summary: Gets info about existing agent. - description: Gets info about an existing agent. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: agentName - in: path - description: Name of existing agent - required: true - schema: - type: string - responses: - '200': - description: A JSON object with skills - content: - application/json: - schema: - $ref: '#/components/schemas/Agent' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Agent not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - put: - summary: Updates an existing agent. - description: Updates an existing agent, or creates a new agent if one doesn't exist. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: agentName - in: path - description: The name of the agent - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - agent: - type: object - properties: - name: - type: string - model: - type: string - description: A conversational model used by an agent - skills: - type: array - items: - type: string - description: One or more skills that an agent can use - responses: - '200': - description: Agent was successfully updated - content: - application/json: - schema: - $ref: '#/components/schemas/Agent' - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing agent. - description: Deletes an existing agent by name. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: agentName - in: path - description: Name of existing agent to delete - required: true - schema: - type: string - responses: - '200': - description: An empty response indicates success - content: - application/json: - schema: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Agent not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/agents/{agentName}/completions': - post: - summary: Quesries an agent. - description: Quesries an agent. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: agentName - in: path - description: The name of the agent - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - messages: - type: array - items: - type: object - properties: - question: - type: string - description: The question being asked - answer: - type: string - nullable: true - description: The answer to the question (can be null) - responses: - '200': - description: Message submission successful - content: - application/json: - schema: - type: object - properties: - message: - type: object - properties: - role: - type: string - description: The role of the responder - content: - type: string - description: The content of the response - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/chatbots': - get: - summary: Returns a list of all created chatbots. - description: Gets all chatbots created by the user. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - responses: - '200': - description: A JSON array of skill names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Chatbot' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - post: - summary: Creates a new chatbot. - description: Creates a new chatbot. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - chatbot: - type: object - properties: - name: - type: string - database_name: - type: string - description: Name of the connection to a chat app like Slack or MS Teams - agent_name: - type: string - description: Agent object created beforehand. Alternatively, provide a large language model (LLM) using the `model_name` parameter. - responses: - '200': - description: Created a chatbot - content: - application/json: - schema: - $ref: '#/components/schemas/Chatbot' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '409': - description: Chatbot already exists - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/chatbots/{chatbotName}': - get: - summary: Gets info about existing chatbots. - description: Gets info about an existing chatbot. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: chatbotName - in: path - description: Name of existing chatbot - required: true - schema: - type: string - responses: - '200': - description: A JSON object with chatbots - content: - application/json: - schema: - $ref: '#/components/schemas/Chatbot' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Chatbot not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - put: - summary: Updates an existing chatbot. - description: Updates an existing chatbot, or creates a new chatbot if one doesn't exist. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: chatbotName - in: path - description: The name of the chatbot - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - chatbot: - type: object - properties: - name: - type: string - database: - type: string - description: Connection to a chat app like Slack or MS Teams - agent: - type: string - description: Agent object created beforehand - responses: - '200': - description: Chatbot was successfully updated - content: - application/json: - schema: - $ref: '#/components/schemas/Chatbot' - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing chatbot. - description: Deletes an existing chatbot by name. - parameters: - - name: projectName - in: path - description: The name of the project where agent resides - required: true - schema: - type: string - - name: chatbotName - in: path - description: Name of existing chatbot to delete - required: true - schema: - type: string - responses: - '200': - description: An empty response indicates success - content: - application/json: - schema: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Chatbot not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/files': - get: - summary: List all files uploaded to MindsDB. - description: Gets all files uploaded to MindsDB. - responses: - '200': - description: A JSON array of file names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/File' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/files/{fileName}': - delete: - summary: Deletes an existing file. - description: Deletes an existing file. - parameters: - - name: fileName - in: path - description: File name to be deleted - required: true - schema: - type: string - responses: - '200': - description: Succesfully deleted - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - put: - summary: Upload a new file. - description: Upload a new file. - parameters: - - name: fileName - in: path - description: The name of the file - required: true - schema: - type: string - requestBody: - required: true - content: - multipart/form-data: - schema: - type: object - properties: - file: - type: string - format: binary - description: The file to upload - original_file_name: - type: string - description: The original name of the file (optional) - responses: - '200': - description: View was successfully updated - content: - application/json: - schema: - $ref: '#/components/schemas/File' - '400': - description: Bad request format - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/jobs': - post: - summary: This endpoint creates a new Job. - description: Create a job to automate tasks. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - name: - type: string - description: Name of the job - query: - type: string - description: Tasks to be executed by the job - if_query: - type: string - description: Optional. Condition to be fulfilled before the job executes - start_at: - type: string - format: date-time - description: Optional. Start date/time of the job - end_at: - type: string - format: date-time - description: Optional. End date/time of the job - schedule_str: - type: string - description: Periodicity of the job (e.g. every 1 minute) - responses: - '200': - description: Job created - content: - application/json: - schema: - $ref: '#/components/schemas/Job' - '400': - description: Bad request - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - get: - summary: Returns a list of jobs. - description: Gets info about all jobs in the project. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - responses: - '200': - description: A JSON array of job names - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Job' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/api/projects/{projectName}/jobs/{jobName}': - get: - summary: Gets info about specific job. - description: Gets info for a specific job in the project. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: jobName - in: path - description: The name of the job - required: true - schema: - type: string - responses: - '200': - description: Job information - content: - application/json: - schema: - $ref: '#/components/schemas/Job' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Job not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - summary: Deletes an existing job. - description: Deletes a job in the project. - parameters: - - name: projectName - in: path - description: The name of the project - required: true - schema: - type: string - - name: jobName - in: path - description: The name of the job - required: true - schema: - type: string - responses: - '200': - description: Empty response if delete is successful - content: - application/json: - schema: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Project or job not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string \ No newline at end of file diff --git a/docs/overview_sdks_apis.mdx b/docs/overview_sdks_apis.mdx deleted file mode 100644 index fc1904c88f1..00000000000 --- a/docs/overview_sdks_apis.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: SDKs & APIs -sidebarTitle: Introduction -icon: "house" ---- - -The [Connect](/mindsdb-connect), [Unify](/mindsdb-unify), and [Respond](/mindsdb-respond) sections present the usage of MindsDB via its SQL interface. - -Alongside the SQL interface, MindsDB provides access via REST APIs, Python SDK, JavaScript SDK. - - - - Interact with MindsDB via API endpoints. - - Integrate MindsDB into the Python code. - - Integrate MindsDB into the JavaScript code. - - - diff --git a/docs/package-lock.json b/docs/package-lock.json deleted file mode 100644 index 551eba76093..00000000000 --- a/docs/package-lock.json +++ /dev/null @@ -1,14997 +0,0 @@ -{ - "name": "docs", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "dependencies": { - "mintlify": "^4.2.500", - "sharp": "^0.34.4" - } - }, - "node_modules/@alcalzone/ansi-tokenize": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/@alcalzone/ansi-tokenize/-/ansi-tokenize-0.2.5.tgz", - "integrity": "sha512-3NX/MpTdroi0aKz134A6RC2Gb2iXVECN4QaAXnvCIxxIm3C3AVB1mkUe8NaaiyvOpDfsrqWhYtj+Q6a62RrTsw==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.2.1", - "is-fullwidth-code-point": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@alloc/quick-lru": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", - "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@ark/schema": { - "version": "0.55.0", - "resolved": "https://registry.npmjs.org/@ark/schema/-/schema-0.55.0.tgz", - "integrity": "sha512-IlSIc0FmLKTDGr4I/FzNHauMn0MADA6bCjT1wauu4k6MyxhC1R9gz0olNpIRvK7lGGDwtc/VO0RUDNvVQW5WFg==", - "license": "MIT", - "dependencies": { - "@ark/util": "0.55.0" - } - }, - "node_modules/@ark/util": { - "version": "0.55.0", - "resolved": "https://registry.npmjs.org/@ark/util/-/util-0.55.0.tgz", - "integrity": "sha512-aWFNK7aqSvqFtVsl1xmbTjGbg91uqtJV7Za76YGNEwIO4qLjMfyY8flmmbhooYMuqPCO2jyxu8hve943D+w3bA==", - "license": "MIT" - }, - "node_modules/@asyncapi/parser": { - "version": "3.4.0", - "resolved": "https://registry.npmjs.org/@asyncapi/parser/-/parser-3.4.0.tgz", - "integrity": "sha512-Sxn74oHiZSU6+cVeZy62iPZMFMvKp4jupMFHelSICCMw1qELmUHPvuZSr+ZHDmNGgHcEpzJM5HN02kR7T4g+PQ==", - "license": "Apache-2.0", - "dependencies": { - "@asyncapi/specs": "^6.8.0", - "@openapi-contrib/openapi-schema-to-json-schema": "~3.2.0", - "@stoplight/json": "3.21.0", - "@stoplight/json-ref-readers": "^1.2.2", - "@stoplight/json-ref-resolver": "^3.1.5", - "@stoplight/spectral-core": "^1.18.3", - "@stoplight/spectral-functions": "^1.7.2", - "@stoplight/spectral-parsers": "^1.0.2", - "@stoplight/spectral-ref-resolver": "^1.0.3", - "@stoplight/types": "^13.12.0", - "@types/json-schema": "^7.0.11", - "@types/urijs": "^1.19.19", - "ajv": "^8.17.1", - "ajv-errors": "^3.0.0", - "ajv-formats": "^2.1.1", - "avsc": "^5.7.5", - "js-yaml": "^4.1.0", - "jsonpath-plus": "^10.0.0", - "node-fetch": "2.6.7" - } - }, - "node_modules/@asyncapi/specs": { - "version": "6.8.1", - "resolved": "https://registry.npmjs.org/@asyncapi/specs/-/specs-6.8.1.tgz", - "integrity": "sha512-czHoAk3PeXTLR+X8IUaD+IpT+g+zUvkcgMDJVothBsan+oHN3jfcFcFUNdOPAAFoUCQN1hXF1dWuphWy05THlA==", - "license": "Apache-2.0", - "dependencies": { - "@types/json-schema": "^7.0.11" - } - }, - "node_modules/@babel/code-frame": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz", - "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==", - "license": "MIT", - "dependencies": { - "@babel/helper-validator-identifier": "^7.28.5", - "js-tokens": "^4.0.0", - "picocolors": "^1.1.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.28.5", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", - "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@canvas/image-data": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@canvas/image-data/-/image-data-1.1.0.tgz", - "integrity": "sha512-QdObRRjRbcXGmM1tmJ+MrHcaz1MftF2+W7YI+MsphnsCrmtyfS0d5qJbk0MeSbUeyM/jCb0hmnkXPsy026L7dA==", - "license": "MIT" - }, - "node_modules/@emnapi/runtime": { - "version": "1.9.2", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz", - "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==", - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, - "node_modules/@emnapi/runtime/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "optional": true - }, - "node_modules/@floating-ui/core": { - "version": "1.7.5", - "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.5.tgz", - "integrity": "sha512-1Ih4WTWyw0+lKyFMcBHGbb5U5FtuHJuujoyyr5zTaWS5EYMeT6Jb2AuDeftsCsEuchO+mM2ij5+q9crhydzLhQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/utils": "^0.2.11" - } - }, - "node_modules/@floating-ui/dom": { - "version": "1.7.6", - "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.6.tgz", - "integrity": "sha512-9gZSAI5XM36880PPMm//9dfiEngYoC6Am2izES1FF406YFsjvyBMmeJ2g4SAju3xWwtuynNRFL2s9hgxpLI5SQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/core": "^1.7.5", - "@floating-ui/utils": "^0.2.11" - } - }, - "node_modules/@floating-ui/utils": { - "version": "0.2.11", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.11.tgz", - "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==", - "license": "MIT", - "peer": true - }, - "node_modules/@img/colour": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz", - "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==", - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/@img/sharp-darwin-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz", - "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-arm64": "1.2.4" - } - }, - "node_modules/@img/sharp-darwin-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz", - "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-x64": "1.2.4" - } - }, - "node_modules/@img/sharp-libvips-darwin-arm64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz", - "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==", - "cpu": [ - "arm64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-darwin-x64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz", - "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==", - "cpu": [ - "x64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-arm": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz", - "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==", - "cpu": [ - "arm" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-arm64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz", - "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==", - "cpu": [ - "arm64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-ppc64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz", - "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==", - "cpu": [ - "ppc64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-riscv64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz", - "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==", - "cpu": [ - "riscv64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-s390x": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz", - "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==", - "cpu": [ - "s390x" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-x64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz", - "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==", - "cpu": [ - "x64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linuxmusl-arm64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz", - "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==", - "cpu": [ - "arm64" - ], - "libc": [ - "musl" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linuxmusl-x64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz", - "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==", - "cpu": [ - "x64" - ], - "libc": [ - "musl" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-linux-arm": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz", - "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==", - "cpu": [ - "arm" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz", - "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==", - "cpu": [ - "arm64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm64": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-ppc64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz", - "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==", - "cpu": [ - "ppc64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-ppc64": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-riscv64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz", - "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==", - "cpu": [ - "riscv64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-riscv64": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-s390x": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz", - "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==", - "cpu": [ - "s390x" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-s390x": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz", - "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==", - "cpu": [ - "x64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-x64": "1.2.4" - } - }, - "node_modules/@img/sharp-linuxmusl-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz", - "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==", - "cpu": [ - "arm64" - ], - "libc": [ - "musl" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" - } - }, - "node_modules/@img/sharp-linuxmusl-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz", - "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==", - "cpu": [ - "x64" - ], - "libc": [ - "musl" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-x64": "1.2.4" - } - }, - "node_modules/@img/sharp-wasm32": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz", - "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==", - "cpu": [ - "wasm32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", - "optional": true, - "dependencies": { - "@emnapi/runtime": "^1.7.0" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz", - "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-ia32": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz", - "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==", - "cpu": [ - "ia32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz", - "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@inquirer/ansi": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@inquirer/ansi/-/ansi-1.0.2.tgz", - "integrity": "sha512-S8qNSZiYzFd0wAcyG5AXCvUHC5Sr7xpZ9wZ2py9XR88jUz8wooStVx5M6dRzczbBWjic9NP7+rY0Xi7qqK/aMQ==", - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/@inquirer/checkbox": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/@inquirer/checkbox/-/checkbox-4.3.2.tgz", - "integrity": "sha512-VXukHf0RR1doGe6Sm4F0Em7SWYLTHSsbGfJdS9Ja2bX5/D5uwVOEjr07cncLROdBvmnvCATYEWlHqYmXv2IlQA==", - "license": "MIT", - "dependencies": { - "@inquirer/ansi": "^1.0.2", - "@inquirer/core": "^10.3.2", - "@inquirer/figures": "^1.0.15", - "@inquirer/type": "^3.0.10", - "yoctocolors-cjs": "^2.1.3" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/confirm": { - "version": "5.1.21", - "resolved": "https://registry.npmjs.org/@inquirer/confirm/-/confirm-5.1.21.tgz", - "integrity": "sha512-KR8edRkIsUayMXV+o3Gv+q4jlhENF9nMYUZs9PA2HzrXeHI8M5uDag70U7RJn9yyiMZSbtF5/UexBtAVtZGSbQ==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.3.2", - "@inquirer/type": "^3.0.10" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/core": { - "version": "10.3.2", - "resolved": "https://registry.npmjs.org/@inquirer/core/-/core-10.3.2.tgz", - "integrity": "sha512-43RTuEbfP8MbKzedNqBrlhhNKVwoK//vUFNW3Q3vZ88BLcrs4kYpGg+B2mm5p2K/HfygoCxuKwJJiv8PbGmE0A==", - "license": "MIT", - "dependencies": { - "@inquirer/ansi": "^1.0.2", - "@inquirer/figures": "^1.0.15", - "@inquirer/type": "^3.0.10", - "cli-width": "^4.1.0", - "mute-stream": "^2.0.0", - "signal-exit": "^4.1.0", - "wrap-ansi": "^6.2.0", - "yoctocolors-cjs": "^2.1.3" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/editor": { - "version": "4.2.23", - "resolved": "https://registry.npmjs.org/@inquirer/editor/-/editor-4.2.23.tgz", - "integrity": "sha512-aLSROkEwirotxZ1pBaP8tugXRFCxW94gwrQLxXfrZsKkfjOYC1aRvAZuhpJOb5cu4IBTJdsCigUlf2iCOu4ZDQ==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.3.2", - "@inquirer/external-editor": "^1.0.3", - "@inquirer/type": "^3.0.10" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/expand": { - "version": "4.0.23", - "resolved": "https://registry.npmjs.org/@inquirer/expand/-/expand-4.0.23.tgz", - "integrity": "sha512-nRzdOyFYnpeYTTR2qFwEVmIWypzdAx/sIkCMeTNTcflFOovfqUk+HcFhQQVBftAh9gmGrpFj6QcGEqrDMDOiew==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.3.2", - "@inquirer/type": "^3.0.10", - "yoctocolors-cjs": "^2.1.3" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/external-editor": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@inquirer/external-editor/-/external-editor-1.0.3.tgz", - "integrity": "sha512-RWbSrDiYmO4LbejWY7ttpxczuwQyZLBUyygsA9Nsv95hpzUWwnNTVQmAq3xuh7vNwCp07UTmE5i11XAEExx4RA==", - "license": "MIT", - "dependencies": { - "chardet": "^2.1.1", - "iconv-lite": "^0.7.0" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/figures": { - "version": "1.0.15", - "resolved": "https://registry.npmjs.org/@inquirer/figures/-/figures-1.0.15.tgz", - "integrity": "sha512-t2IEY+unGHOzAaVM5Xx6DEWKeXlDDcNPeDyUpsRc6CUhBfU3VQOEl+Vssh7VNp1dR8MdUJBWhuObjXCsVpjN5g==", - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/@inquirer/input": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/@inquirer/input/-/input-4.3.1.tgz", - "integrity": "sha512-kN0pAM4yPrLjJ1XJBjDxyfDduXOuQHrBB8aLDMueuwUGn+vNpF7Gq7TvyVxx8u4SHlFFj4trmj+a2cbpG4Jn1g==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.3.2", - "@inquirer/type": "^3.0.10" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/number": { - "version": "3.0.23", - "resolved": "https://registry.npmjs.org/@inquirer/number/-/number-3.0.23.tgz", - "integrity": "sha512-5Smv0OK7K0KUzUfYUXDXQc9jrf8OHo4ktlEayFlelCjwMXz0299Y8OrI+lj7i4gCBY15UObk76q0QtxjzFcFcg==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.3.2", - "@inquirer/type": "^3.0.10" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/password": { - "version": "4.0.23", - "resolved": "https://registry.npmjs.org/@inquirer/password/-/password-4.0.23.tgz", - "integrity": "sha512-zREJHjhT5vJBMZX/IUbyI9zVtVfOLiTO66MrF/3GFZYZ7T4YILW5MSkEYHceSii/KtRk+4i3RE7E1CUXA2jHcA==", - "license": "MIT", - "dependencies": { - "@inquirer/ansi": "^1.0.2", - "@inquirer/core": "^10.3.2", - "@inquirer/type": "^3.0.10" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/prompts": { - "version": "7.9.0", - "resolved": "https://registry.npmjs.org/@inquirer/prompts/-/prompts-7.9.0.tgz", - "integrity": "sha512-X7/+dG9SLpSzRkwgG5/xiIzW0oMrV3C0HOa7YHG1WnrLK+vCQHfte4k/T80059YBdei29RBC3s+pSMvPJDU9/A==", - "license": "MIT", - "dependencies": { - "@inquirer/checkbox": "^4.3.0", - "@inquirer/confirm": "^5.1.19", - "@inquirer/editor": "^4.2.21", - "@inquirer/expand": "^4.0.21", - "@inquirer/input": "^4.2.5", - "@inquirer/number": "^3.0.21", - "@inquirer/password": "^4.0.21", - "@inquirer/rawlist": "^4.1.9", - "@inquirer/search": "^3.2.0", - "@inquirer/select": "^4.4.0" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/rawlist": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@inquirer/rawlist/-/rawlist-4.1.11.tgz", - "integrity": "sha512-+LLQB8XGr3I5LZN/GuAHo+GpDJegQwuPARLChlMICNdwW7OwV2izlCSCxN6cqpL0sMXmbKbFcItJgdQq5EBXTw==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.3.2", - "@inquirer/type": "^3.0.10", - "yoctocolors-cjs": "^2.1.3" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/search": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/@inquirer/search/-/search-3.2.2.tgz", - "integrity": "sha512-p2bvRfENXCZdWF/U2BXvnSI9h+tuA8iNqtUKb9UWbmLYCRQxd8WkvwWvYn+3NgYaNwdUkHytJMGG4MMLucI1kA==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.3.2", - "@inquirer/figures": "^1.0.15", - "@inquirer/type": "^3.0.10", - "yoctocolors-cjs": "^2.1.3" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/select": { - "version": "4.4.2", - "resolved": "https://registry.npmjs.org/@inquirer/select/-/select-4.4.2.tgz", - "integrity": "sha512-l4xMuJo55MAe+N7Qr4rX90vypFwCajSakx59qe/tMaC1aEHWLyw68wF4o0A4SLAY4E0nd+Vt+EyskeDIqu1M6w==", - "license": "MIT", - "dependencies": { - "@inquirer/ansi": "^1.0.2", - "@inquirer/core": "^10.3.2", - "@inquirer/figures": "^1.0.15", - "@inquirer/type": "^3.0.10", - "yoctocolors-cjs": "^2.1.3" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@inquirer/type": { - "version": "3.0.10", - "resolved": "https://registry.npmjs.org/@inquirer/type/-/type-3.0.10.tgz", - "integrity": "sha512-BvziSRxfz5Ov8ch0z/n3oijRSEcEsHnhggm4xFZe93DHcUCTlutlq9Ox4SVENAfcRD22UQq7T/atg9Wr3k09eA==", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - } - } - }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.13", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", - "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", - "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.5", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", - "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", - "license": "MIT" - }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.31", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", - "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/@jsep-plugin/assignment": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz", - "integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==", - "license": "MIT", - "engines": { - "node": ">= 10.16.0" - }, - "peerDependencies": { - "jsep": "^0.4.0||^1.0.0" - } - }, - "node_modules/@jsep-plugin/regex": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz", - "integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==", - "license": "MIT", - "engines": { - "node": ">= 10.16.0" - }, - "peerDependencies": { - "jsep": "^0.4.0||^1.0.0" - } - }, - "node_modules/@jsep-plugin/ternary": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/@jsep-plugin/ternary/-/ternary-1.1.4.tgz", - "integrity": "sha512-ck5wiqIbqdMX6WRQztBL7ASDty9YLgJ3sSAK5ZpBzXeySvFGCzIvM6UiAI4hTZ22fEcYQVV/zhUbNscggW+Ukg==", - "license": "MIT", - "engines": { - "node": ">= 10.16.0" - }, - "peerDependencies": { - "jsep": "^0.4.0||^1.0.0" - } - }, - "node_modules/@leichtgewicht/ip-codec": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz", - "integrity": "sha512-Vo+PSpZG2/fmgmiNzYK9qWRh8h/CHrwD0mo1h1DzL4yzHNSfWYujGTYsWGreD000gcgmZ7K4Ys6Tx9TxtsKdDw==", - "license": "MIT" - }, - "node_modules/@mdx-js/mdx": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/@mdx-js/mdx/-/mdx-3.1.1.tgz", - "integrity": "sha512-f6ZO2ifpwAQIpzGWaBQT2TXxPv6z3RBzQKpVftEWN78Vl/YweF1uwussDx8ECAXVtr3Rs89fKyG9YlzUs9DyGQ==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdx": "^2.0.0", - "acorn": "^8.0.0", - "collapse-white-space": "^2.0.0", - "devlop": "^1.0.0", - "estree-util-is-identifier-name": "^3.0.0", - "estree-util-scope": "^1.0.0", - "estree-walker": "^3.0.0", - "hast-util-to-jsx-runtime": "^2.0.0", - "markdown-extensions": "^2.0.0", - "recma-build-jsx": "^1.0.0", - "recma-jsx": "^1.0.0", - "recma-stringify": "^1.0.0", - "rehype-recma": "^1.0.0", - "remark-mdx": "^3.0.0", - "remark-parse": "^11.0.0", - "remark-rehype": "^11.0.0", - "source-map": "^0.7.0", - "unified": "^11.0.0", - "unist-util-position-from-estree": "^2.0.0", - "unist-util-stringify-position": "^4.0.0", - "unist-util-visit": "^5.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mdx-js/react": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.1.tgz", - "integrity": "sha512-f++rKLQgUVYDAtECQ6fn/is15GkEH9+nZPM3MS0RcxVqoTfawHvDlSCH7JbMhAM6uJ32v3eXLvLmLvjGu7PTQw==", - "license": "MIT", - "dependencies": { - "@types/mdx": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - }, - "peerDependencies": { - "@types/react": ">=16", - "react": ">=16" - } - }, - "node_modules/@mintlify/cli": { - "version": "4.0.1103", - "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.1103.tgz", - "integrity": "sha512-/Tz4ydJp0eY4I5oKv4D4FYK0xPm9fpwCfnSye4UzjRU7bVUv34Qzi6px/1PQJbQtpUiISwF7tuWH6tyB5AWknw==", - "license": "Elastic-2.0", - "dependencies": { - "@inquirer/prompts": "7.9.0", - "@mintlify/common": "1.0.844", - "@mintlify/link-rot": "3.0.1019", - "@mintlify/prebuild": "1.0.986", - "@mintlify/previewing": "4.0.1047", - "@mintlify/validation": "0.1.660", - "adm-zip": "0.5.16", - "chalk": "5.2.0", - "color": "4.2.3", - "detect-port": "1.5.1", - "front-matter": "4.0.2", - "fs-extra": "11.2.0", - "ink": "6.3.0", - "inquirer": "12.3.0", - "js-yaml": "4.1.0", - "mdast-util-mdx-jsx": "3.2.0", - "open": "^8.4.2", - "openid-client": "^6.8.2", - "posthog-node": "5.17.2", - "react": "19.2.3", - "semver": "7.7.2", - "unist-util-visit": "5.0.0", - "yargs": "17.7.1", - "zod": "^4.3.6" - }, - "bin": { - "mint": "bin/index.js", - "mintlify": "bin/index.js" - }, - "engines": { - "node": ">=18.0.0" - }, - "optionalDependencies": { - "keytar": "^7.9.0" - } - }, - "node_modules/@mintlify/common": { - "version": "1.0.844", - "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.844.tgz", - "integrity": "sha512-uTQ5yGFNvP4wpc5FHvBEkJubg5VNW9R2LL9+IcSg/KraDzRn0vCD9YIdq2f2RdwYDYl6sWGMmYjDxUqrOOZVFg==", - "license": "ISC", - "dependencies": { - "@asyncapi/parser": "3.4.0", - "@asyncapi/specs": "6.8.1", - "@mintlify/mdx": "^3.0.4", - "@mintlify/models": "0.0.290", - "@mintlify/openapi-parser": "^0.0.8", - "@mintlify/validation": "0.1.660", - "@sindresorhus/slugify": "2.2.0", - "@types/mdast": "4.0.4", - "acorn": "8.11.2", - "acorn-jsx": "5.3.2", - "color-blend": "4.0.0", - "estree-util-to-js": "2.0.0", - "estree-walker": "3.0.3", - "front-matter": "4.0.2", - "hast-util-from-html": "2.0.3", - "hast-util-to-html": "9.0.4", - "hast-util-to-text": "4.0.2", - "hex-rgb": "5.0.0", - "ignore": "7.0.5", - "js-yaml": "4.1.0", - "lodash": "4.17.21", - "mdast-util-from-markdown": "2.0.2", - "mdast-util-gfm": "3.0.0", - "mdast-util-mdx": "3.0.0", - "mdast-util-mdx-jsx": "3.1.3", - "micromark-extension-gfm": "3.0.0", - "micromark-extension-mdx-jsx": "3.0.1", - "micromark-extension-mdxjs": "3.0.0", - "openapi-types": "12.1.3", - "postcss": "8.5.6", - "rehype-stringify": "10.0.1", - "remark": "15.0.1", - "remark-frontmatter": "5.0.0", - "remark-gfm": "4.0.0", - "remark-math": "6.0.0", - "remark-mdx": "3.1.0", - "remark-parse": "11.0.0", - "remark-rehype": "11.1.1", - "remark-stringify": "11.0.0", - "sucrase": "^3.34.0", - "tailwindcss": "^3.4.17", - "unified": "11.0.5", - "unist-builder": "4.0.0", - "unist-util-map": "4.0.0", - "unist-util-remove": "4.0.0", - "unist-util-remove-position": "5.0.0", - "unist-util-visit": "5.0.0", - "unist-util-visit-parents": "6.0.1", - "vfile": "6.0.3", - "xss": "1.0.15" - } - }, - "node_modules/@mintlify/common/node_modules/@floating-ui/react-dom": { - "version": "2.1.8", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz", - "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/dom": "^1.7.6" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" - } - }, - "node_modules/@mintlify/common/node_modules/@mintlify/mdx": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@mintlify/mdx/-/mdx-3.0.4.tgz", - "integrity": "sha512-tJhdpnM5ReJLNJ2fuDRIEr0zgVd6id7/oAIfs26V46QlygiLsc8qx4Rz3LWIX51rUXW/cfakjj0EATxIciIw+g==", - "license": "MIT", - "dependencies": { - "@shikijs/transformers": "^3.11.0", - "@shikijs/twoslash": "^3.12.2", - "arktype": "^2.1.26", - "hast-util-to-string": "^3.0.1", - "mdast-util-from-markdown": "^2.0.2", - "mdast-util-gfm": "^3.1.0", - "mdast-util-mdx-jsx": "^3.2.0", - "mdast-util-to-hast": "^13.2.0", - "next-mdx-remote-client": "^1.0.3", - "rehype-katex": "^7.0.1", - "remark-gfm": "^4.0.0", - "remark-math": "^6.0.0", - "remark-smartypants": "^3.0.2", - "shiki": "^3.11.0", - "unified": "^11.0.0", - "unist-util-visit": "^5.0.0" - }, - "peerDependencies": { - "@radix-ui/react-popover": "^1.1.15", - "react": "^18.3.1", - "react-dom": "^18.3.1" - } - }, - "node_modules/@mintlify/common/node_modules/@mintlify/mdx/node_modules/mdast-util-gfm": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.1.0.tgz", - "integrity": "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==", - "license": "MIT", - "dependencies": { - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-gfm-autolink-literal": "^2.0.0", - "mdast-util-gfm-footnote": "^2.0.0", - "mdast-util-gfm-strikethrough": "^2.0.0", - "mdast-util-gfm-table": "^2.0.0", - "mdast-util-gfm-task-list-item": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/common/node_modules/@mintlify/mdx/node_modules/mdast-util-mdx-jsx": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz", - "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", - "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", - "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", - "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-popper": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", - "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-portal": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", - "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-presence": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", - "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", - "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/common/node_modules/mdast-util-mdx-jsx": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz", - "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/common/node_modules/next-mdx-remote-client": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz", - "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==", - "license": "MPL 2.0", - "dependencies": { - "@babel/code-frame": "^7.29.0", - "@mdx-js/mdx": "^3.1.1", - "@mdx-js/react": "^3.1.1", - "remark-mdx-remove-esm": "^1.3.1", - "serialize-error": "^13.0.1", - "vfile": "^6.0.3", - "vfile-matter": "^5.0.1" - }, - "engines": { - "node": ">=20.9.0" - }, - "peerDependencies": { - "react": ">= 18.3.0 < 19.0.0", - "react-dom": ">= 18.3.0 < 19.0.0" - } - }, - "node_modules/@mintlify/common/node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/@mintlify/common/node_modules/react-dom": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", - "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0", - "scheduler": "^0.23.2" - }, - "peerDependencies": { - "react": "^18.3.1" - } - }, - "node_modules/@mintlify/common/node_modules/scheduler": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", - "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0" - } - }, - "node_modules/@mintlify/link-rot": { - "version": "3.0.1019", - "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.1019.tgz", - "integrity": "sha512-moUkUUcdfm/ivgavmrcgcnxhJ4XCDAbYPABhQbwo6hP3FHXyTB8jJdbjG/wJLZSzjH3KQpq/+DglMH5cCmSNJQ==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/common": "1.0.844", - "@mintlify/prebuild": "1.0.986", - "@mintlify/previewing": "4.0.1047", - "@mintlify/scraping": "4.0.522", - "@mintlify/validation": "0.1.660", - "fs-extra": "11.1.0", - "unist-util-visit": "4.1.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@mintlify/link-rot/node_modules/@types/unist": { - "version": "2.0.11", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", - "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", - "license": "MIT" - }, - "node_modules/@mintlify/link-rot/node_modules/fs-extra": { - "version": "11.1.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz", - "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/@mintlify/link-rot/node_modules/unist-util-is": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-5.2.1.tgz", - "integrity": "sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/link-rot/node_modules/unist-util-visit": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", - "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0", - "unist-util-visit-parents": "^5.1.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/link-rot/node_modules/unist-util-visit-parents": { - "version": "5.1.3", - "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-5.1.3.tgz", - "integrity": "sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/models": { - "version": "0.0.290", - "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.290.tgz", - "integrity": "sha512-dkUIepQOpyZmgdapL22wdQi7MXupLyqFWP/ebiP0NYLcRRYBLWFVcpHHfIDGC2mWOZxNCVVZDvg2rTzfccpj6A==", - "license": "Elastic-2.0", - "dependencies": { - "axios": "1.13.2", - "openapi-types": "12.1.3" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@mintlify/openapi-parser": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/@mintlify/openapi-parser/-/openapi-parser-0.0.8.tgz", - "integrity": "sha512-9MBRq9lS4l4HITYCrqCL7T61MOb20q9IdU7HWhqYMNMM1jGO1nHjXasFy61yZ8V6gMZyyKQARGVoZ0ZrYN48Og==", - "license": "MIT", - "dependencies": { - "ajv": "^8.17.1", - "ajv-draft-04": "^1.0.0", - "ajv-formats": "^3.0.1", - "jsonpointer": "^5.0.1", - "leven": "^4.0.0", - "yaml": "^2.4.5" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@mintlify/openapi-parser/node_modules/ajv-formats": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", - "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", - "license": "MIT", - "dependencies": { - "ajv": "^8.0.0" - }, - "peerDependencies": { - "ajv": "^8.0.0" - }, - "peerDependenciesMeta": { - "ajv": { - "optional": true - } - } - }, - "node_modules/@mintlify/prebuild": { - "version": "1.0.986", - "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.986.tgz", - "integrity": "sha512-HGQwegpiP0ZwAg/kpISdtad6t5om32HZ/OCWQGHh2G3+gv2Fjg3hGRttagU88oBT9oKC1N7lJPjhxK8FrvwX3w==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/common": "1.0.844", - "@mintlify/openapi-parser": "^0.0.8", - "@mintlify/scraping": "4.0.708", - "@mintlify/validation": "0.1.660", - "chalk": "5.3.0", - "favicons": "7.2.0", - "front-matter": "4.0.2", - "fs-extra": "11.1.0", - "js-yaml": "4.1.0", - "openapi-types": "12.1.3", - "sharp": "0.33.5", - "sharp-ico": "0.1.5", - "unist-util-visit": "4.1.2", - "uuid": "11.1.0" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-darwin-arm64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz", - "integrity": "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-arm64": "1.0.4" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-darwin-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz", - "integrity": "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-x64": "1.0.4" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-darwin-arm64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz", - "integrity": "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==", - "cpu": [ - "arm64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-darwin-x64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz", - "integrity": "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==", - "cpu": [ - "x64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-linux-arm": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz", - "integrity": "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==", - "cpu": [ - "arm" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-linux-arm64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz", - "integrity": "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==", - "cpu": [ - "arm64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-linux-s390x": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz", - "integrity": "sha512-u7Wz6ntiSSgGSGcjZ55im6uvTrOxSIS8/dgoVMoiGE9I6JAfU50yH5BoDlYA1tcuGS7g/QNtetJnxA6QEsCVTA==", - "cpu": [ - "s390x" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-linux-x64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz", - "integrity": "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==", - "cpu": [ - "x64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-linuxmusl-arm64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz", - "integrity": "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==", - "cpu": [ - "arm64" - ], - "libc": [ - "musl" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-libvips-linuxmusl-x64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz", - "integrity": "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==", - "cpu": [ - "x64" - ], - "libc": [ - "musl" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-linux-arm": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz", - "integrity": "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==", - "cpu": [ - "arm" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm": "1.0.5" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-linux-arm64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz", - "integrity": "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==", - "cpu": [ - "arm64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm64": "1.0.4" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-linux-s390x": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.33.5.tgz", - "integrity": "sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q==", - "cpu": [ - "s390x" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-s390x": "1.0.4" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-linux-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz", - "integrity": "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==", - "cpu": [ - "x64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-x64": "1.0.4" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-linuxmusl-arm64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz", - "integrity": "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==", - "cpu": [ - "arm64" - ], - "libc": [ - "musl" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-linuxmusl-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz", - "integrity": "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==", - "cpu": [ - "x64" - ], - "libc": [ - "musl" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-x64": "1.0.4" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-wasm32": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.33.5.tgz", - "integrity": "sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg==", - "cpu": [ - "wasm32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", - "optional": true, - "dependencies": { - "@emnapi/runtime": "^1.2.0" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-win32-ia32": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.33.5.tgz", - "integrity": "sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ==", - "cpu": [ - "ia32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@img/sharp-win32-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz", - "integrity": "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@mintlify/scraping": { - "version": "4.0.708", - "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.708.tgz", - "integrity": "sha512-6GDxVKM7B0NqxXvg4Mm8nVhtybAzkVRZcMGtsp5OoHZrnATZ/C4wv2B82ZnwZvdhzLDATWoSoe3W14IXgYYcCQ==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/common": "1.0.844", - "@mintlify/openapi-parser": "^0.0.8", - "fs-extra": "11.1.1", - "hast-util-to-mdast": "10.1.0", - "js-yaml": "4.1.0", - "mdast-util-mdx-jsx": "3.1.3", - "neotraverse": "0.6.18", - "puppeteer": "22.14.0", - "rehype-parse": "9.0.1", - "remark-gfm": "4.0.0", - "remark-mdx": "3.0.1", - "remark-parse": "11.0.0", - "remark-stringify": "11.0.0", - "unified": "11.0.5", - "unist-util-visit": "5.0.0", - "yargs": "17.7.1", - "zod": "3.24.0" - }, - "bin": { - "mintlify-scrape": "bin/cli.js" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@mintlify/scraping/node_modules/fs-extra": { - "version": "11.1.1", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz", - "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/@mintlify/prebuild/node_modules/@mintlify/scraping/node_modules/unist-util-visit": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz", - "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-is": "^6.0.0", - "unist-util-visit-parents": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/prebuild/node_modules/chalk": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.3.0.tgz", - "integrity": "sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==", - "license": "MIT", - "engines": { - "node": "^12.17.0 || ^14.13 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/@mintlify/prebuild/node_modules/fs-extra": { - "version": "11.1.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz", - "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/@mintlify/prebuild/node_modules/mdast-util-mdx-jsx": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz", - "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/prebuild/node_modules/remark-mdx": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/remark-mdx/-/remark-mdx-3.0.1.tgz", - "integrity": "sha512-3Pz3yPQ5Rht2pM5R+0J2MrGoBSrzf+tJG94N+t/ilfdh8YLyyKYtidAYwTveB20BoHAcwIopOUqhcmh2F7hGYA==", - "license": "MIT", - "dependencies": { - "mdast-util-mdx": "^3.0.0", - "micromark-extension-mdxjs": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/prebuild/node_modules/sharp": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz", - "integrity": "sha512-haPVm1EkS9pgvHrQ/F3Xy+hgcuMV0Wm9vfIBSiwZ05k+xgb0PkBQpGsAA/oWdDobNaZTH5ppvHtzCFbnSEwHVw==", - "hasInstallScript": true, - "license": "Apache-2.0", - "dependencies": { - "color": "^4.2.3", - "detect-libc": "^2.0.3", - "semver": "^7.6.3" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-darwin-arm64": "0.33.5", - "@img/sharp-darwin-x64": "0.33.5", - "@img/sharp-libvips-darwin-arm64": "1.0.4", - "@img/sharp-libvips-darwin-x64": "1.0.4", - "@img/sharp-libvips-linux-arm": "1.0.5", - "@img/sharp-libvips-linux-arm64": "1.0.4", - "@img/sharp-libvips-linux-s390x": "1.0.4", - "@img/sharp-libvips-linux-x64": "1.0.4", - "@img/sharp-libvips-linuxmusl-arm64": "1.0.4", - "@img/sharp-libvips-linuxmusl-x64": "1.0.4", - "@img/sharp-linux-arm": "0.33.5", - "@img/sharp-linux-arm64": "0.33.5", - "@img/sharp-linux-s390x": "0.33.5", - "@img/sharp-linux-x64": "0.33.5", - "@img/sharp-linuxmusl-arm64": "0.33.5", - "@img/sharp-linuxmusl-x64": "0.33.5", - "@img/sharp-wasm32": "0.33.5", - "@img/sharp-win32-ia32": "0.33.5", - "@img/sharp-win32-x64": "0.33.5" - } - }, - "node_modules/@mintlify/prebuild/node_modules/unist-util-visit": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", - "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0", - "unist-util-visit-parents": "^5.1.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/prebuild/node_modules/unist-util-visit/node_modules/@types/unist": { - "version": "2.0.11", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", - "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", - "license": "MIT" - }, - "node_modules/@mintlify/prebuild/node_modules/unist-util-visit/node_modules/unist-util-is": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-5.2.1.tgz", - "integrity": "sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/prebuild/node_modules/unist-util-visit/node_modules/unist-util-visit-parents": { - "version": "5.1.3", - "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-5.1.3.tgz", - "integrity": "sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/prebuild/node_modules/zod": { - "version": "3.24.0", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.0.tgz", - "integrity": "sha512-Hz+wiY8yD0VLA2k/+nsg2Abez674dDGTai33SwNvMPuf9uIrBC9eFgIMQxBBbHFxVXi8W+5nX9DcAh9YNSQm/w==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/colinhacks" - } - }, - "node_modules/@mintlify/previewing": { - "version": "4.0.1047", - "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.1047.tgz", - "integrity": "sha512-4/k7a/kXkD8LK7nHvRGEPCvigpeunFk2Ku07wlXLR4tB8OEG6v5ZjLFKVHArd+UuRmjHB/oBcCht3DARaizPOw==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/common": "1.0.844", - "@mintlify/prebuild": "1.0.986", - "@mintlify/validation": "0.1.660", - "adm-zip": "0.5.16", - "better-opn": "3.0.2", - "chalk": "5.2.0", - "chokidar": "3.5.3", - "express": "4.18.2", - "front-matter": "4.0.2", - "fs-extra": "11.1.0", - "got": "13.0.0", - "ink": "6.3.0", - "ink-spinner": "5.0.0", - "is-online": "10.0.0", - "js-yaml": "4.1.0", - "openapi-types": "12.1.3", - "react": "19.2.3", - "socket.io": "4.7.2", - "tar": "6.1.15", - "unist-util-visit": "4.1.2", - "yargs": "17.7.1" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@mintlify/previewing/node_modules/@types/unist": { - "version": "2.0.11", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", - "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", - "license": "MIT" - }, - "node_modules/@mintlify/previewing/node_modules/fs-extra": { - "version": "11.1.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz", - "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/@mintlify/previewing/node_modules/unist-util-is": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-5.2.1.tgz", - "integrity": "sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/previewing/node_modules/unist-util-visit": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", - "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0", - "unist-util-visit-parents": "^5.1.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/previewing/node_modules/unist-util-visit-parents": { - "version": "5.1.3", - "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-5.1.3.tgz", - "integrity": "sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/scraping": { - "version": "4.0.522", - "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.522.tgz", - "integrity": "sha512-PL2k52WT5S5OAgnT2K13bP7J2El6XwiVvQlrLvxDYw5KMMV+y34YVJI8ZscKb4trjitWDgyK0UTq2KN6NQgn6g==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/common": "1.0.661", - "@mintlify/openapi-parser": "^0.0.8", - "fs-extra": "11.1.1", - "hast-util-to-mdast": "10.1.0", - "js-yaml": "4.1.0", - "mdast-util-mdx-jsx": "3.1.3", - "neotraverse": "0.6.18", - "puppeteer": "22.14.0", - "rehype-parse": "9.0.1", - "remark-gfm": "4.0.0", - "remark-mdx": "3.0.1", - "remark-parse": "11.0.0", - "remark-stringify": "11.0.0", - "unified": "11.0.5", - "unist-util-visit": "5.0.0", - "yargs": "17.7.1", - "zod": "3.21.4" - }, - "bin": { - "mintlify-scrape": "bin/cli.js" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/@floating-ui/react-dom": { - "version": "2.1.8", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz", - "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/dom": "^1.7.6" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/@mintlify/common": { - "version": "1.0.661", - "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.661.tgz", - "integrity": "sha512-/Hdiblzaomp+AWStQ4smhVMgesQhffzQjC9aYBnmLReNdh2Js+ccQFUaWL3TNIxwiS2esaZvsHSV/D+zyRS3hg==", - "license": "ISC", - "dependencies": { - "@asyncapi/parser": "3.4.0", - "@mintlify/mdx": "^3.0.4", - "@mintlify/models": "0.0.255", - "@mintlify/openapi-parser": "^0.0.8", - "@mintlify/validation": "0.1.555", - "@sindresorhus/slugify": "2.2.0", - "@types/mdast": "4.0.4", - "acorn": "8.11.2", - "acorn-jsx": "5.3.2", - "color-blend": "4.0.0", - "estree-util-to-js": "2.0.0", - "estree-walker": "3.0.3", - "front-matter": "4.0.2", - "hast-util-from-html": "2.0.3", - "hast-util-to-html": "9.0.4", - "hast-util-to-text": "4.0.2", - "hex-rgb": "5.0.0", - "ignore": "7.0.5", - "js-yaml": "4.1.0", - "lodash": "4.17.21", - "mdast-util-from-markdown": "2.0.2", - "mdast-util-gfm": "3.0.0", - "mdast-util-mdx": "3.0.0", - "mdast-util-mdx-jsx": "3.1.3", - "micromark-extension-gfm": "3.0.0", - "micromark-extension-mdx-jsx": "3.0.1", - "micromark-extension-mdxjs": "3.0.0", - "openapi-types": "12.1.3", - "postcss": "8.5.6", - "rehype-stringify": "10.0.1", - "remark": "15.0.1", - "remark-frontmatter": "5.0.0", - "remark-gfm": "4.0.0", - "remark-math": "6.0.0", - "remark-mdx": "3.1.0", - "remark-parse": "11.0.0", - "remark-rehype": "11.1.1", - "remark-stringify": "11.0.0", - "tailwindcss": "3.4.4", - "unified": "11.0.5", - "unist-builder": "4.0.0", - "unist-util-map": "4.0.0", - "unist-util-remove": "4.0.0", - "unist-util-remove-position": "5.0.0", - "unist-util-visit": "5.0.0", - "unist-util-visit-parents": "6.0.1", - "vfile": "6.0.3" - } - }, - "node_modules/@mintlify/scraping/node_modules/@mintlify/common/node_modules/remark-mdx": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/remark-mdx/-/remark-mdx-3.1.0.tgz", - "integrity": "sha512-Ngl/H3YXyBV9RcRNdlYsZujAmhsxwzxpDzpDEhFBVAGthS4GDgnctpDjgFl/ULx5UEDzqtW1cyBSNKqYYrqLBA==", - "license": "MIT", - "dependencies": { - "mdast-util-mdx": "^3.0.0", - "micromark-extension-mdxjs": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/scraping/node_modules/@mintlify/mdx": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@mintlify/mdx/-/mdx-3.0.4.tgz", - "integrity": "sha512-tJhdpnM5ReJLNJ2fuDRIEr0zgVd6id7/oAIfs26V46QlygiLsc8qx4Rz3LWIX51rUXW/cfakjj0EATxIciIw+g==", - "license": "MIT", - "dependencies": { - "@shikijs/transformers": "^3.11.0", - "@shikijs/twoslash": "^3.12.2", - "arktype": "^2.1.26", - "hast-util-to-string": "^3.0.1", - "mdast-util-from-markdown": "^2.0.2", - "mdast-util-gfm": "^3.1.0", - "mdast-util-mdx-jsx": "^3.2.0", - "mdast-util-to-hast": "^13.2.0", - "next-mdx-remote-client": "^1.0.3", - "rehype-katex": "^7.0.1", - "remark-gfm": "^4.0.0", - "remark-math": "^6.0.0", - "remark-smartypants": "^3.0.2", - "shiki": "^3.11.0", - "unified": "^11.0.0", - "unist-util-visit": "^5.0.0" - }, - "peerDependencies": { - "@radix-ui/react-popover": "^1.1.15", - "react": "^18.3.1", - "react-dom": "^18.3.1" - } - }, - "node_modules/@mintlify/scraping/node_modules/@mintlify/mdx/node_modules/mdast-util-gfm": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.1.0.tgz", - "integrity": "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==", - "license": "MIT", - "dependencies": { - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-gfm-autolink-literal": "^2.0.0", - "mdast-util-gfm-footnote": "^2.0.0", - "mdast-util-gfm-strikethrough": "^2.0.0", - "mdast-util-gfm-table": "^2.0.0", - "mdast-util-gfm-task-list-item": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/scraping/node_modules/@mintlify/mdx/node_modules/mdast-util-mdx-jsx": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz", - "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/scraping/node_modules/@mintlify/models": { - "version": "0.0.255", - "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.255.tgz", - "integrity": "sha512-LIUkfA7l7ypHAAuOW74ZJws/NwNRqlDRD/U466jarXvvSlGhJec/6J4/I+IEcBvWDnc9anLFKmnGO04jPKgAsg==", - "license": "Elastic-2.0", - "dependencies": { - "axios": "1.10.0", - "openapi-types": "12.1.3" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/@mintlify/validation": { - "version": "0.1.555", - "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.555.tgz", - "integrity": "sha512-11QVUReL4N5u8wSCgZt4RN7PA0jYQoMEBZ5IrUp5pgb5ZJBOoGV/vPsQrxPPa1cxsUDAuToNhtGxRQtOav/w8w==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/mdx": "^3.0.4", - "@mintlify/models": "0.0.255", - "arktype": "2.1.27", - "js-yaml": "4.1.0", - "lcm": "0.0.3", - "lodash": "4.17.21", - "object-hash": "3.0.0", - "openapi-types": "12.1.3", - "uuid": "11.1.0", - "zod": "3.21.4", - "zod-to-json-schema": "3.20.4" - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", - "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", - "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", - "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-popper": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", - "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-portal": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", - "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-presence": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", - "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", - "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/axios": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.10.0.tgz", - "integrity": "sha512-/1xYAC4MP/HEG+3duIhFr4ZQXR4sQXOIe+o6sdqzeykGLx6Upp/1p8MHqhINOvGeP7xyNHe7tsiJByc4SSVUxw==", - "license": "MIT", - "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.0", - "proxy-from-env": "^1.1.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/fs-extra": { - "version": "11.1.1", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz", - "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/@mintlify/scraping/node_modules/glob-parent": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", - "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.3" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/lilconfig": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", - "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==", - "license": "MIT", - "engines": { - "node": ">=10" - } - }, - "node_modules/@mintlify/scraping/node_modules/mdast-util-mdx-jsx": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.3.tgz", - "integrity": "sha512-bfOjvNt+1AcbPLTFMFWY149nJz0OjmewJs3LQQ5pIyVGxP4CdOqNVJL6kTaM5c68p8q82Xv3nCyFfUnuEcH3UQ==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/scraping/node_modules/next-mdx-remote-client": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz", - "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==", - "license": "MPL 2.0", - "dependencies": { - "@babel/code-frame": "^7.29.0", - "@mdx-js/mdx": "^3.1.1", - "@mdx-js/react": "^3.1.1", - "remark-mdx-remove-esm": "^1.3.1", - "serialize-error": "^13.0.1", - "vfile": "^6.0.3", - "vfile-matter": "^5.0.1" - }, - "engines": { - "node": ">=20.9.0" - }, - "peerDependencies": { - "react": ">= 18.3.0 < 19.0.0", - "react-dom": ">= 18.3.0 < 19.0.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/react-dom": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", - "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0", - "scheduler": "^0.23.2" - }, - "peerDependencies": { - "react": "^18.3.1" - } - }, - "node_modules/@mintlify/scraping/node_modules/remark-mdx": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/remark-mdx/-/remark-mdx-3.0.1.tgz", - "integrity": "sha512-3Pz3yPQ5Rht2pM5R+0J2MrGoBSrzf+tJG94N+t/ilfdh8YLyyKYtidAYwTveB20BoHAcwIopOUqhcmh2F7hGYA==", - "license": "MIT", - "dependencies": { - "mdast-util-mdx": "^3.0.0", - "micromark-extension-mdxjs": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/scraping/node_modules/scheduler": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", - "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/tailwindcss": { - "version": "3.4.4", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.4.tgz", - "integrity": "sha512-ZoyXOdJjISB7/BcLTR6SEsLgKtDStYyYZVLsUtWChO4Ps20CBad7lfJKVDiejocV4ME1hLmyY0WJE3hSDcmQ2A==", - "license": "MIT", - "dependencies": { - "@alloc/quick-lru": "^5.2.0", - "arg": "^5.0.2", - "chokidar": "^3.5.3", - "didyoumean": "^1.2.2", - "dlv": "^1.1.3", - "fast-glob": "^3.3.0", - "glob-parent": "^6.0.2", - "is-glob": "^4.0.3", - "jiti": "^1.21.0", - "lilconfig": "^2.1.0", - "micromatch": "^4.0.5", - "normalize-path": "^3.0.0", - "object-hash": "^3.0.0", - "picocolors": "^1.0.0", - "postcss": "^8.4.23", - "postcss-import": "^15.1.0", - "postcss-js": "^4.0.1", - "postcss-load-config": "^4.0.1", - "postcss-nested": "^6.0.1", - "postcss-selector-parser": "^6.0.11", - "resolve": "^1.22.2", - "sucrase": "^3.32.0" - }, - "bin": { - "tailwind": "lib/cli.js", - "tailwindcss": "lib/cli.js" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@mintlify/scraping/node_modules/tailwindcss/node_modules/postcss-load-config": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", - "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "lilconfig": "^3.0.0", - "yaml": "^2.3.4" - }, - "engines": { - "node": ">= 14" - }, - "peerDependencies": { - "postcss": ">=8.0.9", - "ts-node": ">=9.0.0" - }, - "peerDependenciesMeta": { - "postcss": { - "optional": true - }, - "ts-node": { - "optional": true - } - } - }, - "node_modules/@mintlify/scraping/node_modules/tailwindcss/node_modules/postcss-load-config/node_modules/lilconfig": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", - "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", - "license": "MIT", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antonk52" - } - }, - "node_modules/@mintlify/scraping/node_modules/zod": { - "version": "3.21.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", - "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/colinhacks" - } - }, - "node_modules/@mintlify/scraping/node_modules/zod-to-json-schema": { - "version": "3.20.4", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz", - "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==", - "license": "ISC", - "peerDependencies": { - "zod": "^3.20.0" - } - }, - "node_modules/@mintlify/validation": { - "version": "0.1.660", - "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.660.tgz", - "integrity": "sha512-IHlea3t9ZZcQMOfext3fZuG6/hXXTZPBFJkgeHA9lbG2OkdAVRbSMDY9FvC07sEEX1VQJX+bPimRaXUz/ujyYg==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/mdx": "^3.0.4", - "@mintlify/models": "0.0.290", - "arktype": "2.1.27", - "js-yaml": "4.1.0", - "lcm": "0.0.3", - "lodash": "4.17.21", - "neotraverse": "0.6.18", - "object-hash": "3.0.0", - "openapi-types": "12.1.3", - "uuid": "11.1.0", - "zod": "3.24.0", - "zod-to-json-schema": "3.20.4" - } - }, - "node_modules/@mintlify/validation/node_modules/@floating-ui/react-dom": { - "version": "2.1.8", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz", - "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/dom": "^1.7.6" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" - } - }, - "node_modules/@mintlify/validation/node_modules/@mintlify/mdx": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@mintlify/mdx/-/mdx-3.0.4.tgz", - "integrity": "sha512-tJhdpnM5ReJLNJ2fuDRIEr0zgVd6id7/oAIfs26V46QlygiLsc8qx4Rz3LWIX51rUXW/cfakjj0EATxIciIw+g==", - "license": "MIT", - "dependencies": { - "@shikijs/transformers": "^3.11.0", - "@shikijs/twoslash": "^3.12.2", - "arktype": "^2.1.26", - "hast-util-to-string": "^3.0.1", - "mdast-util-from-markdown": "^2.0.2", - "mdast-util-gfm": "^3.1.0", - "mdast-util-mdx-jsx": "^3.2.0", - "mdast-util-to-hast": "^13.2.0", - "next-mdx-remote-client": "^1.0.3", - "rehype-katex": "^7.0.1", - "remark-gfm": "^4.0.0", - "remark-math": "^6.0.0", - "remark-smartypants": "^3.0.2", - "shiki": "^3.11.0", - "unified": "^11.0.0", - "unist-util-visit": "^5.0.0" - }, - "peerDependencies": { - "@radix-ui/react-popover": "^1.1.15", - "react": "^18.3.1", - "react-dom": "^18.3.1" - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", - "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", - "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", - "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-popper": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", - "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", - "license": "MIT", - "peer": true, - "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-portal": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", - "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-presence": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", - "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", - "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@mintlify/validation/node_modules/mdast-util-gfm": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.1.0.tgz", - "integrity": "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==", - "license": "MIT", - "dependencies": { - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-gfm-autolink-literal": "^2.0.0", - "mdast-util-gfm-footnote": "^2.0.0", - "mdast-util-gfm-strikethrough": "^2.0.0", - "mdast-util-gfm-table": "^2.0.0", - "mdast-util-gfm-task-list-item": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@mintlify/validation/node_modules/next-mdx-remote-client": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/next-mdx-remote-client/-/next-mdx-remote-client-1.1.7.tgz", - "integrity": "sha512-12Ap5Z/tFIETMXFSBTH2IFEhJAso7MvOJ5ICyesA4q6FM4vtAcmb+4ZKa4tV1IVQJLBVqOhaEfIESZzdwjmrQQ==", - "license": "MPL 2.0", - "dependencies": { - "@babel/code-frame": "^7.29.0", - "@mdx-js/mdx": "^3.1.1", - "@mdx-js/react": "^3.1.1", - "remark-mdx-remove-esm": "^1.3.1", - "serialize-error": "^13.0.1", - "vfile": "^6.0.3", - "vfile-matter": "^5.0.1" - }, - "engines": { - "node": ">=20.9.0" - }, - "peerDependencies": { - "react": ">= 18.3.0 < 19.0.0", - "react-dom": ">= 18.3.0 < 19.0.0" - } - }, - "node_modules/@mintlify/validation/node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/@mintlify/validation/node_modules/react-dom": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", - "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0", - "scheduler": "^0.23.2" - }, - "peerDependencies": { - "react": "^18.3.1" - } - }, - "node_modules/@mintlify/validation/node_modules/scheduler": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", - "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "loose-envify": "^1.1.0" - } - }, - "node_modules/@mintlify/validation/node_modules/zod": { - "version": "3.24.0", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.0.tgz", - "integrity": "sha512-Hz+wiY8yD0VLA2k/+nsg2Abez674dDGTai33SwNvMPuf9uIrBC9eFgIMQxBBbHFxVXi8W+5nX9DcAh9YNSQm/w==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/colinhacks" - } - }, - "node_modules/@mintlify/validation/node_modules/zod-to-json-schema": { - "version": "3.20.4", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.20.4.tgz", - "integrity": "sha512-Un9+kInJ2Zt63n6Z7mLqBifzzPcOyX+b+Exuzf7L1+xqck9Q2EPByyTRduV3kmSPaXaRer1JCsucubpgL1fipg==", - "license": "ISC", - "peerDependencies": { - "zod": "^3.20.0" - } - }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", - "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@openapi-contrib/openapi-schema-to-json-schema": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/@openapi-contrib/openapi-schema-to-json-schema/-/openapi-schema-to-json-schema-3.2.0.tgz", - "integrity": "sha512-Gj6C0JwCr8arj0sYuslWXUBSP/KnUlEGnPW4qxlXvAl543oaNQgMgIgkQUA6vs5BCCvwTEiL8m/wdWzfl4UvSw==", - "license": "MIT", - "dependencies": { - "fast-deep-equal": "^3.1.3" - } - }, - "node_modules/@posthog/core": { - "version": "1.7.1", - "resolved": "https://registry.npmjs.org/@posthog/core/-/core-1.7.1.tgz", - "integrity": "sha512-kjK0eFMIpKo9GXIbts8VtAknsoZ18oZorANdtuTj1CbgS28t4ZVq//HAWhnxEuXRTrtkd+SUJ6Ux3j2Af8NCuA==", - "license": "MIT", - "dependencies": { - "cross-spawn": "^7.0.6" - } - }, - "node_modules/@puppeteer/browsers": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.0.tgz", - "integrity": "sha512-ioXoq9gPxkss4MYhD+SFaU9p1IHFUX0ILAWFPyjGaBdjLsYAlZw6j1iLA0N/m12uVHLFDfSYNF7EQccjinIMDA==", - "license": "Apache-2.0", - "dependencies": { - "debug": "^4.3.5", - "extract-zip": "^2.0.1", - "progress": "^2.0.3", - "proxy-agent": "^6.4.0", - "semver": "^7.6.3", - "tar-fs": "^3.0.6", - "unbzip2-stream": "^1.4.3", - "yargs": "^17.7.2" - }, - "bin": { - "browsers": "lib/cjs/main-cli.js" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@puppeteer/browsers/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/@puppeteer/browsers/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/@puppeteer/browsers/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/@puppeteer/browsers/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/@puppeteer/browsers/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/@puppeteer/browsers/node_modules/tar-fs": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.2.tgz", - "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==", - "license": "MIT", - "dependencies": { - "pump": "^3.0.0", - "tar-stream": "^3.1.5" - }, - "optionalDependencies": { - "bare-fs": "^4.0.1", - "bare-path": "^3.0.0" - } - }, - "node_modules/@puppeteer/browsers/node_modules/tar-stream": { - "version": "3.1.8", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.8.tgz", - "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==", - "license": "MIT", - "dependencies": { - "b4a": "^1.6.4", - "bare-fs": "^4.5.5", - "fast-fifo": "^1.2.0", - "streamx": "^2.15.0" - } - }, - "node_modules/@puppeteer/browsers/node_modules/yargs": { - "version": "17.7.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", - "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "license": "MIT", - "dependencies": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@radix-ui/primitive": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", - "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", - "license": "MIT", - "peer": true - }, - "node_modules/@radix-ui/react-compose-refs": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz", - "integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==", - "license": "MIT", - "peer": true, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-context": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", - "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", - "license": "MIT", - "peer": true, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-guards": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", - "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", - "license": "MIT", - "peer": true, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-id": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", - "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-callback-ref": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", - "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==", - "license": "MIT", - "peer": true, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-controllable-state": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz", - "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-use-effect-event": "0.0.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-effect-event": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz", - "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-escape-keydown": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz", - "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-layout-effect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", - "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==", - "license": "MIT", - "peer": true, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz", - "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-size": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz", - "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz", - "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==", - "license": "MIT", - "peer": true - }, - "node_modules/@shikijs/core": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.23.0.tgz", - "integrity": "sha512-NSWQz0riNb67xthdm5br6lAkvpDJRTgB36fxlo37ZzM2yq0PQFFzbd8psqC2XMPgCzo1fW6cVi18+ArJ44wqgA==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.23.0", - "@shikijs/vscode-textmate": "^10.0.2", - "@types/hast": "^3.0.4", - "hast-util-to-html": "^9.0.5" - } - }, - "node_modules/@shikijs/core/node_modules/hast-util-to-html": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-9.0.5.tgz", - "integrity": "sha512-OguPdidb+fbHQSU4Q4ZiLKnzWo8Wwsf5bZfbvu7//a9oTYoqD/fWpe96NuHkoS9h0ccGOTe0C4NGXdtS0iObOw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "comma-separated-tokens": "^2.0.0", - "hast-util-whitespace": "^3.0.0", - "html-void-elements": "^3.0.0", - "mdast-util-to-hast": "^13.0.0", - "property-information": "^7.0.0", - "space-separated-tokens": "^2.0.0", - "stringify-entities": "^4.0.0", - "zwitch": "^2.0.4" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/@shikijs/engine-javascript": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.23.0.tgz", - "integrity": "sha512-aHt9eiGFobmWR5uqJUViySI1bHMqrAgamWE1TYSUoftkAeCCAiGawPMwM+VCadylQtF4V3VNOZ5LmfItH5f3yA==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.23.0", - "@shikijs/vscode-textmate": "^10.0.2", - "oniguruma-to-es": "^4.3.4" - } - }, - "node_modules/@shikijs/engine-oniguruma": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.23.0.tgz", - "integrity": "sha512-1nWINwKXxKKLqPibT5f4pAFLej9oZzQTsby8942OTlsJzOBZ0MWKiwzMsd+jhzu8YPCHAswGnnN1YtQfirL35g==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.23.0", - "@shikijs/vscode-textmate": "^10.0.2" - } - }, - "node_modules/@shikijs/langs": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.23.0.tgz", - "integrity": "sha512-2Ep4W3Re5aB1/62RSYQInK9mM3HsLeB91cHqznAJMuylqjzNVAVCMnNWRHFtcNHXsoNRayP9z1qj4Sq3nMqYXg==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.23.0" - } - }, - "node_modules/@shikijs/themes": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.23.0.tgz", - "integrity": "sha512-5qySYa1ZgAT18HR/ypENL9cUSGOeI2x+4IvYJu4JgVJdizn6kG4ia5Q1jDEOi7gTbN4RbuYtmHh0W3eccOrjMA==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.23.0" - } - }, - "node_modules/@shikijs/transformers": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/transformers/-/transformers-3.23.0.tgz", - "integrity": "sha512-F9msZVxdF+krQNSdQ4V+Ja5QemeAoTQ2jxt7nJCwhDsdF1JWS3KxIQXA3lQbyKwS3J61oHRUSv4jYWv3CkaKTQ==", - "license": "MIT", - "dependencies": { - "@shikijs/core": "3.23.0", - "@shikijs/types": "3.23.0" - } - }, - "node_modules/@shikijs/twoslash": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/twoslash/-/twoslash-3.23.0.tgz", - "integrity": "sha512-pNaLJWMA3LU7PhT8tm9OQBZ1epy0jmdgeJzntBtr1EVXLbHxGzTj3mnf9vOdcl84l96qnlJXkJ/NGXZYBpXl5g==", - "license": "MIT", - "dependencies": { - "@shikijs/core": "3.23.0", - "@shikijs/types": "3.23.0", - "twoslash": "^0.3.6" - }, - "peerDependencies": { - "typescript": ">=5.5.0" - } - }, - "node_modules/@shikijs/types": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.23.0.tgz", - "integrity": "sha512-3JZ5HXOZfYjsYSk0yPwBrkupyYSLpAE26Qc0HLghhZNGTZg/SKxXIIgoxOpmmeQP0RRSDJTk1/vPfw9tbw+jSQ==", - "license": "MIT", - "dependencies": { - "@shikijs/vscode-textmate": "^10.0.2", - "@types/hast": "^3.0.4" - } - }, - "node_modules/@shikijs/vscode-textmate": { - "version": "10.0.2", - "resolved": "https://registry.npmjs.org/@shikijs/vscode-textmate/-/vscode-textmate-10.0.2.tgz", - "integrity": "sha512-83yeghZ2xxin3Nj8z1NMd/NCuca+gsYXswywDy5bHvwlWL8tpTQmzGeUuHd9FC3E/SBEMvzJRwWEOz5gGes9Qg==", - "license": "MIT" - }, - "node_modules/@sindresorhus/is": { - "version": "5.6.0", - "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.6.0.tgz", - "integrity": "sha512-TV7t8GKYaJWsn00tFDqBw8+Uqmr8A0fRU1tvTQhyZzGv0sJCGRQL3JGMI3ucuKo3XIZdUP+Lx7/gh2t3lewy7g==", - "license": "MIT", - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sindresorhus/is?sponsor=1" - } - }, - "node_modules/@sindresorhus/slugify": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/@sindresorhus/slugify/-/slugify-2.2.0.tgz", - "integrity": "sha512-9Vybc/qX8Kj6pxJaapjkFbiUJPk7MAkCh/GFCxIBnnsuYCFPIXKvnLidG8xlepht3i24L5XemUmGtrJ3UWrl6w==", - "license": "MIT", - "dependencies": { - "@sindresorhus/transliterate": "^1.0.0", - "escape-string-regexp": "^5.0.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@sindresorhus/transliterate": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/@sindresorhus/transliterate/-/transliterate-1.6.0.tgz", - "integrity": "sha512-doH1gimEu3A46VX6aVxpHTeHrytJAG6HgdxntYnCFiIFHEM/ZGpG8KiZGBChchjQmG0XFIBL552kBTjVcMZXwQ==", - "license": "MIT", - "dependencies": { - "escape-string-regexp": "^5.0.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@socket.io/component-emitter": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.2.tgz", - "integrity": "sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA==", - "license": "MIT" - }, - "node_modules/@stoplight/better-ajv-errors": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@stoplight/better-ajv-errors/-/better-ajv-errors-1.0.3.tgz", - "integrity": "sha512-0p9uXkuB22qGdNfy3VeEhxkU5uwvp/KrBTAbrLBURv6ilxIVwanKwjMc41lQfIVgPGcOkmLbTolfFrSsueu7zA==", - "license": "Apache-2.0", - "dependencies": { - "jsonpointer": "^5.0.0", - "leven": "^3.1.0" - }, - "engines": { - "node": "^12.20 || >= 14.13" - }, - "peerDependencies": { - "ajv": ">=8" - } - }, - "node_modules/@stoplight/better-ajv-errors/node_modules/leven": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", - "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/@stoplight/json": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/@stoplight/json/-/json-3.21.0.tgz", - "integrity": "sha512-5O0apqJ/t4sIevXCO3SBN9AHCEKKR/Zb4gaj7wYe5863jme9g02Q0n/GhM7ZCALkL+vGPTe4ZzTETP8TFtsw3g==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/ordered-object-literal": "^1.0.3", - "@stoplight/path": "^1.3.2", - "@stoplight/types": "^13.6.0", - "jsonc-parser": "~2.2.1", - "lodash": "^4.17.21", - "safe-stable-stringify": "^1.1" - }, - "engines": { - "node": ">=8.3.0" - } - }, - "node_modules/@stoplight/json-ref-readers": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@stoplight/json-ref-readers/-/json-ref-readers-1.2.2.tgz", - "integrity": "sha512-nty0tHUq2f1IKuFYsLM4CXLZGHdMn+X/IwEUIpeSOXt0QjMUbL0Em57iJUDzz+2MkWG83smIigNZ3fauGjqgdQ==", - "license": "Apache-2.0", - "dependencies": { - "node-fetch": "^2.6.0", - "tslib": "^1.14.1" - }, - "engines": { - "node": ">=8.3.0" - } - }, - "node_modules/@stoplight/json-ref-resolver": { - "version": "3.1.6", - "resolved": "https://registry.npmjs.org/@stoplight/json-ref-resolver/-/json-ref-resolver-3.1.6.tgz", - "integrity": "sha512-YNcWv3R3n3U6iQYBsFOiWSuRGE5su1tJSiX6pAPRVk7dP0L7lqCteXGzuVRQ0gMZqUl8v1P0+fAKxF6PLo9B5A==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/json": "^3.21.0", - "@stoplight/path": "^1.3.2", - "@stoplight/types": "^12.3.0 || ^13.0.0", - "@types/urijs": "^1.19.19", - "dependency-graph": "~0.11.0", - "fast-memoize": "^2.5.2", - "immer": "^9.0.6", - "lodash": "^4.17.21", - "tslib": "^2.6.0", - "urijs": "^1.19.11" - }, - "engines": { - "node": ">=8.3.0" - } - }, - "node_modules/@stoplight/json-ref-resolver/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@stoplight/ordered-object-literal": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@stoplight/ordered-object-literal/-/ordered-object-literal-1.0.5.tgz", - "integrity": "sha512-COTiuCU5bgMUtbIFBuyyh2/yVVzlr5Om0v5utQDgBCuQUOPgU1DwoffkTfg4UBQOvByi5foF4w4T+H9CoRe5wg==", - "license": "Apache-2.0", - "engines": { - "node": ">=8" - } - }, - "node_modules/@stoplight/path": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/@stoplight/path/-/path-1.3.2.tgz", - "integrity": "sha512-lyIc6JUlUA8Ve5ELywPC8I2Sdnh1zc1zmbYgVarhXIp9YeAB0ReeqmGEOWNtlHkbP2DAA1AL65Wfn2ncjK/jtQ==", - "license": "Apache-2.0", - "engines": { - "node": ">=8" - } - }, - "node_modules/@stoplight/spectral-core": { - "version": "1.21.0", - "resolved": "https://registry.npmjs.org/@stoplight/spectral-core/-/spectral-core-1.21.0.tgz", - "integrity": "sha512-oj4e/FrDLUhBRocIW+lRMKlJ/q/rDZw61HkLbTFsdMd+f/FTkli2xHNB1YC6n1mrMKjjvy7XlUuFkC7XxtgbWw==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/better-ajv-errors": "1.0.3", - "@stoplight/json": "~3.21.0", - "@stoplight/path": "1.3.2", - "@stoplight/spectral-parsers": "^1.0.0", - "@stoplight/spectral-ref-resolver": "^1.0.4", - "@stoplight/spectral-runtime": "^1.1.2", - "@stoplight/types": "~13.6.0", - "@types/es-aggregate-error": "^1.0.2", - "@types/json-schema": "^7.0.11", - "ajv": "^8.17.1", - "ajv-errors": "~3.0.0", - "ajv-formats": "~2.1.1", - "es-aggregate-error": "^1.0.7", - "jsonpath-plus": "^10.3.0", - "lodash": "~4.17.23", - "lodash.topath": "^4.5.2", - "minimatch": "3.1.2", - "nimma": "0.2.3", - "pony-cause": "^1.1.1", - "simple-eval": "1.0.1", - "tslib": "^2.8.1" - }, - "engines": { - "node": "^16.20 || ^18.18 || >= 20.17" - } - }, - "node_modules/@stoplight/spectral-core/node_modules/@stoplight/types": { - "version": "13.6.0", - "resolved": "https://registry.npmjs.org/@stoplight/types/-/types-13.6.0.tgz", - "integrity": "sha512-dzyuzvUjv3m1wmhPfq82lCVYGcXG0xUYgqnWfCq3PCVR4BKFhjdkHrnJ+jIDoMKvXb05AZP/ObQF6+NpDo29IQ==", - "license": "Apache-2.0", - "dependencies": { - "@types/json-schema": "^7.0.4", - "utility-types": "^3.10.0" - }, - "engines": { - "node": "^12.20 || >=14.13" - } - }, - "node_modules/@stoplight/spectral-core/node_modules/lodash": { - "version": "4.17.23", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", - "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", - "license": "MIT" - }, - "node_modules/@stoplight/spectral-core/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@stoplight/spectral-formats": { - "version": "1.8.2", - "resolved": "https://registry.npmjs.org/@stoplight/spectral-formats/-/spectral-formats-1.8.2.tgz", - "integrity": "sha512-c06HB+rOKfe7tuxg0IdKDEA5XnjL2vrn/m/OVIIxtINtBzphZrOgtRn7epQ5bQF5SWp84Ue7UJWaGgDwVngMFw==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/json": "^3.17.0", - "@stoplight/spectral-core": "^1.19.2", - "@types/json-schema": "^7.0.7", - "tslib": "^2.8.1" - }, - "engines": { - "node": "^16.20 || ^18.18 || >= 20.17" - } - }, - "node_modules/@stoplight/spectral-formats/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@stoplight/spectral-functions": { - "version": "1.10.1", - "resolved": "https://registry.npmjs.org/@stoplight/spectral-functions/-/spectral-functions-1.10.1.tgz", - "integrity": "sha512-obu8ZfoHxELOapfGsCJixKZXZcffjg+lSoNuttpmUFuDzVLT3VmH8QkPXfOGOL5Pz80BR35ClNAToDkdnYIURg==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/better-ajv-errors": "1.0.3", - "@stoplight/json": "^3.17.1", - "@stoplight/spectral-core": "^1.19.4", - "@stoplight/spectral-formats": "^1.8.1", - "@stoplight/spectral-runtime": "^1.1.2", - "ajv": "^8.17.1", - "ajv-draft-04": "~1.0.0", - "ajv-errors": "~3.0.0", - "ajv-formats": "~2.1.1", - "lodash": "~4.17.21", - "tslib": "^2.8.1" - }, - "engines": { - "node": "^16.20 || ^18.18 || >= 20.17" - } - }, - "node_modules/@stoplight/spectral-functions/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@stoplight/spectral-parsers": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@stoplight/spectral-parsers/-/spectral-parsers-1.0.5.tgz", - "integrity": "sha512-ANDTp2IHWGvsQDAY85/jQi9ZrF4mRrA5bciNHX+PUxPr4DwS6iv4h+FVWJMVwcEYdpyoIdyL+SRmHdJfQEPmwQ==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/json": "~3.21.0", - "@stoplight/types": "^14.1.1", - "@stoplight/yaml": "~4.3.0", - "tslib": "^2.8.1" - }, - "engines": { - "node": "^16.20 || ^18.18 || >= 20.17" - } - }, - "node_modules/@stoplight/spectral-parsers/node_modules/@stoplight/types": { - "version": "14.1.1", - "resolved": "https://registry.npmjs.org/@stoplight/types/-/types-14.1.1.tgz", - "integrity": "sha512-/kjtr+0t0tjKr+heVfviO9FrU/uGLc+QNX3fHJc19xsCNYqU7lVhaXxDmEID9BZTjG+/r9pK9xP/xU02XGg65g==", - "license": "Apache-2.0", - "dependencies": { - "@types/json-schema": "^7.0.4", - "utility-types": "^3.10.0" - }, - "engines": { - "node": "^12.20 || >=14.13" - } - }, - "node_modules/@stoplight/spectral-parsers/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@stoplight/spectral-ref-resolver": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@stoplight/spectral-ref-resolver/-/spectral-ref-resolver-1.0.5.tgz", - "integrity": "sha512-gj3TieX5a9zMW29z3mBlAtDOCgN3GEc1VgZnCVlr5irmR4Qi5LuECuFItAq4pTn5Zu+sW5bqutsCH7D4PkpyAA==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/json-ref-readers": "1.2.2", - "@stoplight/json-ref-resolver": "~3.1.6", - "@stoplight/spectral-runtime": "^1.1.2", - "dependency-graph": "0.11.0", - "tslib": "^2.8.1" - }, - "engines": { - "node": "^16.20 || ^18.18 || >= 20.17" - } - }, - "node_modules/@stoplight/spectral-ref-resolver/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@stoplight/spectral-runtime": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/@stoplight/spectral-runtime/-/spectral-runtime-1.1.4.tgz", - "integrity": "sha512-YHbhX3dqW0do6DhiPSgSGQzr6yQLlWybhKwWx0cqxjMwxej3TqLv3BXMfIUYFKKUqIwH4Q2mV8rrMM8qD2N0rQ==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/json": "^3.20.1", - "@stoplight/path": "^1.3.2", - "@stoplight/types": "^13.6.0", - "abort-controller": "^3.0.0", - "lodash": "^4.17.21", - "node-fetch": "^2.7.0", - "tslib": "^2.8.1" - }, - "engines": { - "node": "^16.20 || ^18.18 || >= 20.17" - } - }, - "node_modules/@stoplight/spectral-runtime/node_modules/node-fetch": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", - "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "license": "MIT", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, - "node_modules/@stoplight/spectral-runtime/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@stoplight/types": { - "version": "13.20.0", - "resolved": "https://registry.npmjs.org/@stoplight/types/-/types-13.20.0.tgz", - "integrity": "sha512-2FNTv05If7ib79VPDA/r9eUet76jewXFH2y2K5vuge6SXbRHtWBhcaRmu+6QpF4/WRNoJj5XYRSwLGXDxysBGA==", - "license": "Apache-2.0", - "dependencies": { - "@types/json-schema": "^7.0.4", - "utility-types": "^3.10.0" - }, - "engines": { - "node": "^12.20 || >=14.13" - } - }, - "node_modules/@stoplight/yaml": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/@stoplight/yaml/-/yaml-4.3.0.tgz", - "integrity": "sha512-JZlVFE6/dYpP9tQmV0/ADfn32L9uFarHWxfcRhReKUnljz1ZiUM5zpX+PH8h5CJs6lao3TuFqnPm9IJJCEkE2w==", - "license": "Apache-2.0", - "dependencies": { - "@stoplight/ordered-object-literal": "^1.0.5", - "@stoplight/types": "^14.1.1", - "@stoplight/yaml-ast-parser": "0.0.50", - "tslib": "^2.2.0" - }, - "engines": { - "node": ">=10.8" - } - }, - "node_modules/@stoplight/yaml-ast-parser": { - "version": "0.0.50", - "resolved": "https://registry.npmjs.org/@stoplight/yaml-ast-parser/-/yaml-ast-parser-0.0.50.tgz", - "integrity": "sha512-Pb6M8TDO9DtSVla9yXSTAxmo9GVEouq5P40DWXdOie69bXogZTkgvopCq+yEvTMA0F6PEvdJmbtTV3ccIp11VQ==", - "license": "Apache-2.0" - }, - "node_modules/@stoplight/yaml/node_modules/@stoplight/types": { - "version": "14.1.1", - "resolved": "https://registry.npmjs.org/@stoplight/types/-/types-14.1.1.tgz", - "integrity": "sha512-/kjtr+0t0tjKr+heVfviO9FrU/uGLc+QNX3fHJc19xsCNYqU7lVhaXxDmEID9BZTjG+/r9pK9xP/xU02XGg65g==", - "license": "Apache-2.0", - "dependencies": { - "@types/json-schema": "^7.0.4", - "utility-types": "^3.10.0" - }, - "engines": { - "node": "^12.20 || >=14.13" - } - }, - "node_modules/@stoplight/yaml/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/@szmarczak/http-timer": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz", - "integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==", - "license": "MIT", - "dependencies": { - "defer-to-connect": "^2.0.1" - }, - "engines": { - "node": ">=14.16" - } - }, - "node_modules/@tootallnate/quickjs-emscripten": { - "version": "0.23.0", - "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", - "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==", - "license": "MIT" - }, - "node_modules/@types/acorn": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/@types/acorn/-/acorn-4.0.6.tgz", - "integrity": "sha512-veQTnWP+1D/xbxVrPC3zHnCZRjSrKfhbMUlEA43iMZLu7EsnTtkJklIuwrCPbOi8YkvDQAiW05VQQFvvz9oieQ==", - "license": "MIT", - "dependencies": { - "@types/estree": "*" - } - }, - "node_modules/@types/cookie": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.4.1.tgz", - "integrity": "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q==", - "license": "MIT" - }, - "node_modules/@types/cors": { - "version": "2.8.19", - "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.19.tgz", - "integrity": "sha512-mFNylyeyqN93lfe/9CSxOGREz8cpzAhH+E93xJ4xWQf62V8sQ/24reV2nyzUWM6H6Xji+GGHpkbLe7pVoUEskg==", - "license": "MIT", - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/@types/debug": { - "version": "4.1.13", - "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz", - "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==", - "license": "MIT", - "dependencies": { - "@types/ms": "*" - } - }, - "node_modules/@types/es-aggregate-error": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/@types/es-aggregate-error/-/es-aggregate-error-1.0.6.tgz", - "integrity": "sha512-qJ7LIFp06h1QE1aVxbVd+zJP2wdaugYXYfd6JxsyRMrYHaxb6itXPogW2tz+ylUJ1n1b+JF1PHyYCfYHm0dvUg==", - "license": "MIT", - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/@types/estree": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", - "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", - "license": "MIT" - }, - "node_modules/@types/estree-jsx": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz", - "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==", - "license": "MIT", - "dependencies": { - "@types/estree": "*" - } - }, - "node_modules/@types/hast": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", - "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "*" - } - }, - "node_modules/@types/http-cache-semantics": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz", - "integrity": "sha512-L3LgimLHXtGkWikKnsPg0/VFx9OGZaC+eN1u4r+OB1XRqH3meBIAVC2zr1WdMH+RHmnRkqliQAOHNJ/E0j/e0Q==", - "license": "MIT" - }, - "node_modules/@types/json-schema": { - "version": "7.0.15", - "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", - "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", - "license": "MIT" - }, - "node_modules/@types/katex": { - "version": "0.16.8", - "resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.8.tgz", - "integrity": "sha512-trgaNyfU+Xh2Tc+ABIb44a5AYUpicB3uwirOioeOkNPPbmgRNtcWyDeeFRzjPZENO9Vq8gvVqfhaaXWLlevVwg==", - "license": "MIT" - }, - "node_modules/@types/mdast": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", - "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==", - "license": "MIT", - "dependencies": { - "@types/unist": "*" - } - }, - "node_modules/@types/mdx": { - "version": "2.0.13", - "resolved": "https://registry.npmjs.org/@types/mdx/-/mdx-2.0.13.tgz", - "integrity": "sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw==", - "license": "MIT" - }, - "node_modules/@types/ms": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", - "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", - "license": "MIT" - }, - "node_modules/@types/nlcst": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/@types/nlcst/-/nlcst-2.0.3.tgz", - "integrity": "sha512-vSYNSDe6Ix3q+6Z7ri9lyWqgGhJTmzRjZRqyq15N0Z/1/UnVsno9G/N40NBijoYx2seFDIl0+B2mgAb9mezUCA==", - "license": "MIT", - "dependencies": { - "@types/unist": "*" - } - }, - "node_modules/@types/node": { - "version": "25.5.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.2.tgz", - "integrity": "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg==", - "license": "MIT", - "dependencies": { - "undici-types": "~7.18.0" - } - }, - "node_modules/@types/react": { - "version": "19.2.14", - "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz", - "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", - "license": "MIT", - "peer": true, - "dependencies": { - "csstype": "^3.2.2" - } - }, - "node_modules/@types/unist": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", - "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", - "license": "MIT" - }, - "node_modules/@types/urijs": { - "version": "1.19.26", - "resolved": "https://registry.npmjs.org/@types/urijs/-/urijs-1.19.26.tgz", - "integrity": "sha512-wkXrVzX5yoqLnndOwFsieJA7oKM8cNkOKJtf/3vVGSUFkWDKZvFHpIl9Pvqb/T9UsawBBFMTTD8xu7sK5MWuvg==", - "license": "MIT" - }, - "node_modules/@types/yauzl": { - "version": "2.10.3", - "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", - "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", - "license": "MIT", - "optional": true, - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/@typescript/vfs": { - "version": "1.6.4", - "resolved": "https://registry.npmjs.org/@typescript/vfs/-/vfs-1.6.4.tgz", - "integrity": "sha512-PJFXFS4ZJKiJ9Qiuix6Dz/OwEIqHD7Dme1UwZhTK11vR+5dqW2ACbdndWQexBzCx+CPuMe5WBYQWCsFyGlQLlQ==", - "license": "MIT", - "dependencies": { - "debug": "^4.4.3" - }, - "peerDependencies": { - "typescript": "*" - } - }, - "node_modules/@ungap/structured-clone": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.0.tgz", - "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==", - "license": "ISC" - }, - "node_modules/abort-controller": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", - "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", - "license": "MIT", - "dependencies": { - "event-target-shim": "^5.0.0" - }, - "engines": { - "node": ">=6.5" - } - }, - "node_modules/accepts": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", - "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==", - "license": "MIT", - "dependencies": { - "mime-types": "~2.1.34", - "negotiator": "0.6.3" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/acorn": { - "version": "8.11.2", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.2.tgz", - "integrity": "sha512-nc0Axzp/0FILLEVsm4fNwLCwMttvhEI263QtVPQcbpfZZ3ts0hLsZGOpE6czNlid7CJ9MlyH8reXkpsf3YUY4w==", - "license": "MIT", - "bin": { - "acorn": "bin/acorn" - }, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/acorn-jsx": { - "version": "5.3.2", - "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", - "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", - "license": "MIT", - "peerDependencies": { - "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" - } - }, - "node_modules/address": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/address/-/address-1.2.2.tgz", - "integrity": "sha512-4B/qKCfeE/ODUaAUpSwfzazo5x29WD4r3vXiWsB7I2mSDAihwEqKO+g8GELZUQSSAo5e1XTYh3ZVfLyxBc12nA==", - "license": "MIT", - "engines": { - "node": ">= 10.0.0" - } - }, - "node_modules/adm-zip": { - "version": "0.5.16", - "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.16.tgz", - "integrity": "sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ==", - "license": "MIT", - "engines": { - "node": ">=12.0" - } - }, - "node_modules/agent-base": { - "version": "7.1.4", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", - "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/aggregate-error": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/aggregate-error/-/aggregate-error-4.0.1.tgz", - "integrity": "sha512-0poP0T7el6Vq3rstR8Mn4V/IQrpBLO6POkUSrN7RhyY+GF/InCFShQzsQ39T25gkHhLgSLByyAz+Kjb+c2L98w==", - "license": "MIT", - "dependencies": { - "clean-stack": "^4.0.0", - "indent-string": "^5.0.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/ajv": { - "version": "8.18.0", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", - "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", - "license": "MIT", - "dependencies": { - "fast-deep-equal": "^3.1.3", - "fast-uri": "^3.0.1", - "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, - "node_modules/ajv-draft-04": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/ajv-draft-04/-/ajv-draft-04-1.0.0.tgz", - "integrity": "sha512-mv00Te6nmYbRp5DCwclxtt7yV/joXJPGS7nM+97GdxvuttCOfgI3K4U25zboyeX0O+myI8ERluxQe5wljMmVIw==", - "license": "MIT", - "peerDependencies": { - "ajv": "^8.5.0" - }, - "peerDependenciesMeta": { - "ajv": { - "optional": true - } - } - }, - "node_modules/ajv-errors": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/ajv-errors/-/ajv-errors-3.0.0.tgz", - "integrity": "sha512-V3wD15YHfHz6y0KdhYFjyy9vWtEVALT9UrxfN3zqlI6dMioHnJrqOYfyPKol3oqrnCM9uwkcdCwkJ0WUcbLMTQ==", - "license": "MIT", - "peerDependencies": { - "ajv": "^8.0.1" - } - }, - "node_modules/ajv-formats": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz", - "integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==", - "license": "MIT", - "dependencies": { - "ajv": "^8.0.0" - }, - "peerDependencies": { - "ajv": "^8.0.0" - }, - "peerDependenciesMeta": { - "ajv": { - "optional": true - } - } - }, - "node_modules/ansi-escapes": { - "version": "7.3.0", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.3.0.tgz", - "integrity": "sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg==", - "license": "MIT", - "dependencies": { - "environment": "^1.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/ansi-regex": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", - "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/ansi-styles": { - "version": "6.2.3", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", - "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/any-promise": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", - "license": "MIT" - }, - "node_modules/anymatch": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", - "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", - "license": "ISC", - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/arg": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", - "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==", - "license": "MIT" - }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "license": "Python-2.0" - }, - "node_modules/aria-hidden": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.6.tgz", - "integrity": "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==", - "license": "MIT", - "peer": true, - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/aria-hidden/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true - }, - "node_modules/arkregex": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/arkregex/-/arkregex-0.0.3.tgz", - "integrity": "sha512-bU21QJOJEFJK+BPNgv+5bVXkvRxyAvgnon75D92newgHxkBJTgiFwQxusyViYyJkETsddPlHyspshDQcCzmkNg==", - "license": "MIT", - "dependencies": { - "@ark/util": "0.55.0" - } - }, - "node_modules/arktype": { - "version": "2.1.27", - "resolved": "https://registry.npmjs.org/arktype/-/arktype-2.1.27.tgz", - "integrity": "sha512-enctOHxI4SULBv/TDtCVi5M8oLd4J5SVlPUblXDzSsOYQNMzmVbUosGBnJuZDKmFlN5Ie0/QVEuTE+Z5X1UhsQ==", - "license": "MIT", - "dependencies": { - "@ark/schema": "0.55.0", - "@ark/util": "0.55.0", - "arkregex": "0.0.3" - } - }, - "node_modules/array-buffer-byte-length": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.2.tgz", - "integrity": "sha512-LHE+8BuR7RYGDKvnrmcuSq3tDcKv9OFEXQt/HpbZhY7V6h0zlUXutnAD82GiFx9rdieCMjkvtcsPqBwgUl1Iiw==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "is-array-buffer": "^3.0.5" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array-flatten": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", - "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==", - "license": "MIT" - }, - "node_modules/array-iterate": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/array-iterate/-/array-iterate-2.0.1.tgz", - "integrity": "sha512-I1jXZMjAgCMmxT4qxXfPXa6SthSoE8h6gkSI9BGGNv8mP8G/v0blc+qFnZu6K42vTOiuME596QaLO0TP3Lk0xg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/arraybuffer.prototype.slice": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.4.tgz", - "integrity": "sha512-BNoCY6SXXPQ7gF2opIP4GBE+Xw7U+pHMYKuzjgCN3GwiaIR09UUeKfheyIry77QtrCBlC0KK0q5/TER/tYh3PQ==", - "license": "MIT", - "dependencies": { - "array-buffer-byte-length": "^1.0.1", - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.5", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6", - "is-array-buffer": "^3.0.4" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/ast-types": { - "version": "0.13.4", - "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", - "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", - "license": "MIT", - "dependencies": { - "tslib": "^2.0.1" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/ast-types/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/astring": { - "version": "1.9.0", - "resolved": "https://registry.npmjs.org/astring/-/astring-1.9.0.tgz", - "integrity": "sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg==", - "license": "MIT", - "bin": { - "astring": "bin/astring" - } - }, - "node_modules/async-function": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/async-function/-/async-function-1.0.0.tgz", - "integrity": "sha512-hsU18Ae8CDTR6Kgu9DYf0EbCr/a5iGL0rytQDobUcdpYOKokk8LEjVphnXkDkgpi0wYVsqrXuP0bZxJaTqdgoA==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", - "license": "MIT" - }, - "node_modules/auto-bind": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/auto-bind/-/auto-bind-5.0.1.tgz", - "integrity": "sha512-ooviqdwwgfIfNmDwo94wlshcdzfO64XV0Cg6oDsDYBJfITDz1EngD2z7DkbvCWn+XIMsIqW27sEVF6qcpJrRcg==", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/available-typed-arrays": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", - "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==", - "license": "MIT", - "dependencies": { - "possible-typed-array-names": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/avsc": { - "version": "5.7.9", - "resolved": "https://registry.npmjs.org/avsc/-/avsc-5.7.9.tgz", - "integrity": "sha512-yOA4wFeI7ET3v32Di/sUybQ+ttP20JHSW3mxLuNGeO0uD6PPcvLrIQXSvy/rhJOWU5JrYh7U4OHplWMmtAtjMg==", - "license": "MIT", - "engines": { - "node": ">=0.11" - } - }, - "node_modules/axios": { - "version": "1.13.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.2.tgz", - "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==", - "license": "MIT", - "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.4", - "proxy-from-env": "^1.1.0" - } - }, - "node_modules/b4a": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz", - "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==", - "license": "Apache-2.0", - "peerDependencies": { - "react-native-b4a": "*" - }, - "peerDependenciesMeta": { - "react-native-b4a": { - "optional": true - } - } - }, - "node_modules/bail": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", - "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "license": "MIT" - }, - "node_modules/bare-events": { - "version": "2.8.2", - "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz", - "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", - "license": "Apache-2.0", - "peerDependencies": { - "bare-abort-controller": "*" - }, - "peerDependenciesMeta": { - "bare-abort-controller": { - "optional": true - } - } - }, - "node_modules/bare-fs": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.6.0.tgz", - "integrity": "sha512-2YkS7NuiJceSEbyEOdSNLE9tsGd+f4+f7C+Nik/MCk27SYdwIMPT/yRKvg++FZhQXgk0KWJKJyXX9RhVV0RGqA==", - "license": "Apache-2.0", - "dependencies": { - "bare-events": "^2.5.4", - "bare-path": "^3.0.0", - "bare-stream": "^2.6.4", - "bare-url": "^2.2.2", - "fast-fifo": "^1.3.2" - }, - "engines": { - "bare": ">=1.16.0" - }, - "peerDependencies": { - "bare-buffer": "*" - }, - "peerDependenciesMeta": { - "bare-buffer": { - "optional": true - } - } - }, - "node_modules/bare-os": { - "version": "3.8.7", - "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.8.7.tgz", - "integrity": "sha512-G4Gr1UsGeEy2qtDTZwL7JFLo2wapUarz7iTMcYcMFdS89AIQuBoyjgXZz0Utv7uHs3xA9LckhVbeBi8lEQrC+w==", - "license": "Apache-2.0", - "engines": { - "bare": ">=1.14.0" - } - }, - "node_modules/bare-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz", - "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", - "license": "Apache-2.0", - "dependencies": { - "bare-os": "^3.0.1" - } - }, - "node_modules/bare-stream": { - "version": "2.12.0", - "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.12.0.tgz", - "integrity": "sha512-w28i8lkBgREV3rPXGbgK+BO66q+ZpKqRWrZLiCdmmUlLPrQ45CzkvRhN+7lnv00Gpi2zy5naRxnUFAxCECDm9g==", - "license": "Apache-2.0", - "dependencies": { - "streamx": "^2.25.0", - "teex": "^1.0.1" - }, - "peerDependencies": { - "bare-abort-controller": "*", - "bare-buffer": "*", - "bare-events": "*" - }, - "peerDependenciesMeta": { - "bare-abort-controller": { - "optional": true - }, - "bare-buffer": { - "optional": true - }, - "bare-events": { - "optional": true - } - } - }, - "node_modules/bare-url": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.4.0.tgz", - "integrity": "sha512-NSTU5WN+fy/L0DDenfE8SXQna4voXuW0FHM7wH8i3/q9khUSchfPbPezO4zSFMnDGIf9YE+mt/RWhZgNRKRIXA==", - "license": "Apache-2.0", - "dependencies": { - "bare-path": "^3.0.0" - } - }, - "node_modules/base64-js": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", - "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/base64id": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz", - "integrity": "sha512-lGe34o6EHj9y3Kts9R4ZYs/Gr+6N7MCaMlIFA3F1R2O5/m7K06AxfSeO5530PEERE6/WyEg3lsuyw4GHlPZHog==", - "license": "MIT", - "engines": { - "node": "^4.5.0 || >= 5.9" - } - }, - "node_modules/basic-ftp": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.3.0.tgz", - "integrity": "sha512-5K9eNNn7ywHPsYnFwjKgYH8Hf8B5emh7JKcPaVjjrMJFQQwGpwowEnZNEtHs7DfR7hCZsmaK3VA4HUK0YarT+w==", - "license": "MIT", - "engines": { - "node": ">=10.0.0" - } - }, - "node_modules/better-opn": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/better-opn/-/better-opn-3.0.2.tgz", - "integrity": "sha512-aVNobHnJqLiUelTaHat9DZ1qM2w0C0Eym4LPI/3JxOnSokGVdsl1T1kN7TFvsEAD8G47A6VKQ0TVHqbBnYMJlQ==", - "license": "MIT", - "dependencies": { - "open": "^8.0.4" - }, - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/bl": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", - "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", - "license": "MIT", - "optional": true, - "dependencies": { - "buffer": "^5.5.0", - "inherits": "^2.0.4", - "readable-stream": "^3.4.0" - } - }, - "node_modules/body-parser": { - "version": "1.20.1", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz", - "integrity": "sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==", - "license": "MIT", - "dependencies": { - "bytes": "3.1.2", - "content-type": "~1.0.4", - "debug": "2.6.9", - "depd": "2.0.0", - "destroy": "1.2.0", - "http-errors": "2.0.0", - "iconv-lite": "0.4.24", - "on-finished": "2.4.1", - "qs": "6.11.0", - "raw-body": "2.5.1", - "type-is": "~1.6.18", - "unpipe": "1.0.0" - }, - "engines": { - "node": ">= 0.8", - "npm": "1.2.8000 || >= 1.4.16" - } - }, - "node_modules/body-parser/node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "license": "MIT", - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/body-parser/node_modules/iconv-lite": { - "version": "0.4.24", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", - "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/body-parser/node_modules/ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", - "license": "MIT" - }, - "node_modules/brace-expansion": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz", - "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==", - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, - "node_modules/braces": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", - "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", - "license": "MIT", - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/buffer": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", - "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "base64-js": "^1.3.1", - "ieee754": "^1.1.13" - } - }, - "node_modules/buffer-crc32": { - "version": "0.2.13", - "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", - "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", - "license": "MIT", - "engines": { - "node": "*" - } - }, - "node_modules/bytes": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", - "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/cacheable-lookup": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz", - "integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w==", - "license": "MIT", - "engines": { - "node": ">=14.16" - } - }, - "node_modules/cacheable-request": { - "version": "10.2.14", - "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-10.2.14.tgz", - "integrity": "sha512-zkDT5WAF4hSSoUgyfg5tFIxz8XQK+25W/TLVojJTMKBaxevLBBtLxgqguAuVQB8PVW79FVjHcU+GJ9tVbDZ9mQ==", - "license": "MIT", - "dependencies": { - "@types/http-cache-semantics": "^4.0.2", - "get-stream": "^6.0.1", - "http-cache-semantics": "^4.1.1", - "keyv": "^4.5.3", - "mimic-response": "^4.0.0", - "normalize-url": "^8.0.0", - "responselike": "^3.0.0" - }, - "engines": { - "node": ">=14.16" - } - }, - "node_modules/call-bind": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz", - "integrity": "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.0", - "es-define-property": "^1.0.0", - "get-intrinsic": "^1.2.4", - "set-function-length": "^1.2.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/call-bind-apply-helpers": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", - "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/call-bound": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", - "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.2", - "get-intrinsic": "^1.3.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/callsites": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", - "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/camelcase-css": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", - "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/ccount": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", - "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/chalk": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.2.0.tgz", - "integrity": "sha512-ree3Gqw/nazQAPuJJEy+avdl7QfZMcUvmHIKgEZkGL+xOBzRvup5Hxo6LHuMceSxOabuJLJm5Yp/92R9eMmMvA==", - "license": "MIT", - "engines": { - "node": "^12.17.0 || ^14.13 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/character-entities": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz", - "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/character-entities-html4": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz", - "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/character-entities-legacy": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz", - "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/character-reference-invalid": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", - "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/chardet": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz", - "integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==", - "license": "MIT" - }, - "node_modules/chokidar": { - "version": "3.5.3", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", - "integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==", - "funding": [ - { - "type": "individual", - "url": "https://paulmillr.com/funding/" - } - ], - "license": "MIT", - "dependencies": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" - }, - "engines": { - "node": ">= 8.10.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.2" - } - }, - "node_modules/chownr": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-2.0.0.tgz", - "integrity": "sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==", - "license": "ISC", - "engines": { - "node": ">=10" - } - }, - "node_modules/chromium-bidi": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.6.2.tgz", - "integrity": "sha512-4WVBa6ijmUTVr9cZD4eicQD8Mdy/HCX3bzEIYYpmk0glqYLoWH+LqQEvV9RpDRzoQSbY1KJHloYXbDMXMbDPhg==", - "license": "Apache-2.0", - "dependencies": { - "mitt": "3.0.1", - "urlpattern-polyfill": "10.0.0", - "zod": "3.23.8" - }, - "peerDependencies": { - "devtools-protocol": "*" - } - }, - "node_modules/chromium-bidi/node_modules/zod": { - "version": "3.23.8", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", - "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/colinhacks" - } - }, - "node_modules/clean-stack": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/clean-stack/-/clean-stack-4.2.0.tgz", - "integrity": "sha512-LYv6XPxoyODi36Dp976riBtSY27VmFo+MKqEU9QCCWyTrdEPDog+RWA7xQWHi6Vbp61j5c4cdzzX1NidnwtUWg==", - "license": "MIT", - "dependencies": { - "escape-string-regexp": "5.0.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-boxes": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cli-boxes/-/cli-boxes-3.0.0.tgz", - "integrity": "sha512-/lzGpEWL/8PfI0BmBOPRwp0c/wFNX1RdUML3jK/RcSBA9T8mZDdQpqYBKtCFTOfQbwPqWEOpjqW+Fnayc0969g==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-cursor": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-4.0.0.tgz", - "integrity": "sha512-VGtlMu3x/4DOtIUwEkRezxUZ2lBacNJCHash0N0WeZDBS+7Ux1dm3XWAgWYxLJFMMdOeXMHXorshEFhbMSGelg==", - "license": "MIT", - "dependencies": { - "restore-cursor": "^4.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-spinners": { - "version": "2.9.2", - "resolved": "https://registry.npmjs.org/cli-spinners/-/cli-spinners-2.9.2.tgz", - "integrity": "sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==", - "license": "MIT", - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-truncate": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-4.0.0.tgz", - "integrity": "sha512-nPdaFdQ0h/GEigbPClz11D0v/ZJEwxmeVZGeMo3Z5StPtUTkA9o1lD6QwoirYiSDzbcwn2XcjwmCp68W1IS4TA==", - "license": "MIT", - "dependencies": { - "slice-ansi": "^5.0.0", - "string-width": "^7.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-truncate/node_modules/is-fullwidth-code-point": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-4.0.0.tgz", - "integrity": "sha512-O4L094N2/dZ7xqVdrXhh9r1KODPJpFms8B5sGdJLPy664AgvXsreZUyCQQNItZRDlYug4xStLjNp/sz3HvBowQ==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-truncate/node_modules/slice-ansi": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-5.0.0.tgz", - "integrity": "sha512-FC+lgizVPfie0kkhqUScwRu1O/lF6NOgJmlCgK+/LYxDCTk8sGelYaHDhFcDN+Sn3Cv+3VSa4Byeo+IMCzpMgQ==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.0.0", - "is-fullwidth-code-point": "^4.0.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/slice-ansi?sponsor=1" - } - }, - "node_modules/cli-width": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/cli-width/-/cli-width-4.1.0.tgz", - "integrity": "sha512-ouuZd4/dm2Sw5Gmqy6bGyNNNe1qt9RpmxveLSO7KcgsTnU7RXfsw+/bukWGo1abgBiMAic068rclZsO4IWmmxQ==", - "license": "ISC", - "engines": { - "node": ">= 12" - } - }, - "node_modules/cliui": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", - "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "license": "ISC", - "dependencies": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.1", - "wrap-ansi": "^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/cliui/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/cliui/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/cliui/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/cliui/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/cliui/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/cliui/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/cliui/node_modules/wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/code-excerpt": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/code-excerpt/-/code-excerpt-4.0.0.tgz", - "integrity": "sha512-xxodCmBen3iy2i0WtAK8FlFNrRzjUqjRsMfho58xT/wvZU1YTM3fCnRjcy1gJPMepaRlgm/0e6w8SpWHpn3/cA==", - "license": "MIT", - "dependencies": { - "convert-to-spaces": "^2.0.1" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - } - }, - "node_modules/collapse-white-space": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/collapse-white-space/-/collapse-white-space-2.1.0.tgz", - "integrity": "sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/color": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz", - "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==", - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1", - "color-string": "^1.9.0" - }, - "engines": { - "node": ">=12.5.0" - } - }, - "node_modules/color-blend": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/color-blend/-/color-blend-4.0.0.tgz", - "integrity": "sha512-fYODTHhI/NG+B5GnzvuL3kiFrK/UnkUezWFTgEPBTY5V+kpyfAn95Vn9sJeeCX6omrCOdxnqCL3CvH+6sXtIbw==", - "license": "MIT", - "engines": { - "node": ">=10.0.0" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "license": "MIT" - }, - "node_modules/color-string": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", - "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", - "license": "MIT", - "dependencies": { - "color-name": "^1.0.0", - "simple-swizzle": "^0.2.2" - } - }, - "node_modules/combined-stream": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", - "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "license": "MIT", - "dependencies": { - "delayed-stream": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/comma-separated-tokens": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", - "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/commander": { - "version": "8.3.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz", - "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==", - "license": "MIT", - "engines": { - "node": ">= 12" - } - }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", - "license": "MIT" - }, - "node_modules/content-disposition": { - "version": "0.5.4", - "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", - "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", - "license": "MIT", - "dependencies": { - "safe-buffer": "5.2.1" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/content-type": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", - "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/convert-to-spaces": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/convert-to-spaces/-/convert-to-spaces-2.0.1.tgz", - "integrity": "sha512-rcQ1bsQO9799wq24uE5AM2tAILy4gXGIK/njFWcVQkGNZ96edlpY+A7bjwvzjYvLDyzmG1MmMLZhpcsb+klNMQ==", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - } - }, - "node_modules/cookie": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", - "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/cookie-signature": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", - "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==", - "license": "MIT" - }, - "node_modules/cors": { - "version": "2.8.6", - "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz", - "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==", - "license": "MIT", - "dependencies": { - "object-assign": "^4", - "vary": "^1" - }, - "engines": { - "node": ">= 0.10" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/express" - } - }, - "node_modules/cosmiconfig": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz", - "integrity": "sha512-hr4ihw+DBqcvrsEDioRO31Z17x71pUYoNe/4h6Z0wB72p7MU7/9gH8Q3s12NFhHPfYBBOV3qyfUxmr/Yn3shnQ==", - "license": "MIT", - "dependencies": { - "env-paths": "^2.2.1", - "import-fresh": "^3.3.0", - "js-yaml": "^4.1.0", - "parse-json": "^5.2.0" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/d-fischer" - }, - "peerDependencies": { - "typescript": ">=4.9.5" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/cross-spawn": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", - "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "license": "MIT", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/cssesc": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", - "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", - "license": "MIT", - "bin": { - "cssesc": "bin/cssesc" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/cssfilter": { - "version": "0.0.10", - "resolved": "https://registry.npmjs.org/cssfilter/-/cssfilter-0.0.10.tgz", - "integrity": "sha512-FAaLDaplstoRsDR8XGYH51znUN0UY7nMc6Z9/fvE8EXGwvJE9hu7W2vHwx1+bd6gCYnln9nLbzxFTrcO9YQDZw==", - "license": "MIT" - }, - "node_modules/csstype": { - "version": "3.2.3", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", - "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", - "license": "MIT", - "peer": true - }, - "node_modules/data-uri-to-buffer": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", - "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/data-view-buffer": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/data-view-buffer/-/data-view-buffer-1.0.2.tgz", - "integrity": "sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "is-data-view": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/data-view-byte-length": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/data-view-byte-length/-/data-view-byte-length-1.0.2.tgz", - "integrity": "sha512-tuhGbE6CfTM9+5ANGf+oQb72Ky/0+s3xKUpHvShfiz2RxMFgFPjsXuRLBVMtvMs15awe45SRb83D6wH4ew6wlQ==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "is-data-view": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/inspect-js" - } - }, - "node_modules/data-view-byte-offset": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/data-view-byte-offset/-/data-view-byte-offset-1.0.1.tgz", - "integrity": "sha512-BS8PfmtDGnrgYdOonGZQdLZslWIeCGFP9tpan0hi1Co2Zr2NKADsvGYA8XxuG/4UWgJ6Cjtv+YJnB6MM69QGlQ==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "is-data-view": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/debug": { - "version": "4.4.3", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", - "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/decode-bmp": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/decode-bmp/-/decode-bmp-0.2.1.tgz", - "integrity": "sha512-NiOaGe+GN0KJqi2STf24hfMkFitDUaIoUU3eKvP/wAbLe8o6FuW5n/x7MHPR0HKvBokp6MQY/j7w8lewEeVCIA==", - "license": "MIT", - "dependencies": { - "@canvas/image-data": "^1.0.0", - "to-data-view": "^1.1.0" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/decode-ico": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/decode-ico/-/decode-ico-0.4.1.tgz", - "integrity": "sha512-69NZfbKIzux1vBOd31al3XnMnH+2mqDhEgLdpygErm4d60N+UwA5Sq5WFjmEDQzumgB9fElojGwWG0vybVfFmA==", - "license": "MIT", - "dependencies": { - "@canvas/image-data": "^1.0.0", - "decode-bmp": "^0.2.0", - "to-data-view": "^1.1.0" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/decode-named-character-reference": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.3.0.tgz", - "integrity": "sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q==", - "license": "MIT", - "dependencies": { - "character-entities": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/decompress-response": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", - "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", - "license": "MIT", - "dependencies": { - "mimic-response": "^3.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/decompress-response/node_modules/mimic-response": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", - "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/deep-extend": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", - "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", - "license": "MIT", - "optional": true, - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/defer-to-connect": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", - "integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==", - "license": "MIT", - "engines": { - "node": ">=10" - } - }, - "node_modules/define-data-property": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", - "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", - "license": "MIT", - "dependencies": { - "es-define-property": "^1.0.0", - "es-errors": "^1.3.0", - "gopd": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/define-lazy-prop": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz", - "integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/define-properties": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz", - "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==", - "license": "MIT", - "dependencies": { - "define-data-property": "^1.0.1", - "has-property-descriptors": "^1.0.0", - "object-keys": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/degenerator": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", - "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", - "license": "MIT", - "dependencies": { - "ast-types": "^0.13.4", - "escodegen": "^2.1.0", - "esprima": "^4.0.1" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/delayed-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", - "license": "MIT", - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/depd": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", - "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/dependency-graph": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/dependency-graph/-/dependency-graph-0.11.0.tgz", - "integrity": "sha512-JeMq7fEshyepOWDfcfHK06N3MhyPhz++vtqWhMT5O9A3K42rdsEDpfdVqjaqaAhsw6a+ZqeDvQVtD0hFHQWrzg==", - "license": "MIT", - "engines": { - "node": ">= 0.6.0" - } - }, - "node_modules/dequal": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", - "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/destroy": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", - "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==", - "license": "MIT", - "engines": { - "node": ">= 0.8", - "npm": "1.2.8000 || >= 1.4.16" - } - }, - "node_modules/detect-libc": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", - "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", - "license": "Apache-2.0", - "engines": { - "node": ">=8" - } - }, - "node_modules/detect-node-es": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", - "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==", - "license": "MIT", - "peer": true - }, - "node_modules/detect-port": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/detect-port/-/detect-port-1.5.1.tgz", - "integrity": "sha512-aBzdj76lueB6uUst5iAs7+0H/oOjqI5D16XUWxlWMIMROhcM0rfsNVk93zTngq1dDNpoXRr++Sus7ETAExppAQ==", - "license": "MIT", - "dependencies": { - "address": "^1.0.1", - "debug": "4" - }, - "bin": { - "detect": "bin/detect-port.js", - "detect-port": "bin/detect-port.js" - } - }, - "node_modules/devlop": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", - "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", - "license": "MIT", - "dependencies": { - "dequal": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/devtools-protocol": { - "version": "0.0.1312386", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1312386.tgz", - "integrity": "sha512-DPnhUXvmvKT2dFA/j7B+riVLUt9Q6RKJlcppojL5CoRywJJKLDYnRlw0gTFKfgDPHP5E04UoB71SxoJlVZy8FA==", - "license": "BSD-3-Clause" - }, - "node_modules/didyoumean": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", - "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==", - "license": "Apache-2.0" - }, - "node_modules/dlv": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", - "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==", - "license": "MIT" - }, - "node_modules/dns-packet": { - "version": "5.6.1", - "resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-5.6.1.tgz", - "integrity": "sha512-l4gcSouhcgIKRvyy99RNVOgxXiicE+2jZoNmaNmZ6JXiGajBOJAesk1OBlJuM5k2c+eudGdLxDqXuPCKIj6kpw==", - "license": "MIT", - "dependencies": { - "@leichtgewicht/ip-codec": "^2.0.1" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/dns-socket": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/dns-socket/-/dns-socket-4.2.2.tgz", - "integrity": "sha512-BDeBd8najI4/lS00HSKpdFia+OvUMytaVjfzR9n5Lq8MlZRSvtbI+uLtx1+XmQFls5wFU9dssccTmQQ6nfpjdg==", - "license": "MIT", - "dependencies": { - "dns-packet": "^5.2.4" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/dunder-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", - "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.1", - "es-errors": "^1.3.0", - "gopd": "^1.2.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/ee-first": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", - "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", - "license": "MIT" - }, - "node_modules/emoji-regex": { - "version": "10.6.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.6.0.tgz", - "integrity": "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==", - "license": "MIT" - }, - "node_modules/encodeurl": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", - "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/end-of-stream": { - "version": "1.4.5", - "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", - "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", - "license": "MIT", - "dependencies": { - "once": "^1.4.0" - } - }, - "node_modules/engine.io": { - "version": "6.5.5", - "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.5.5.tgz", - "integrity": "sha512-C5Pn8Wk+1vKBoHghJODM63yk8MvrO9EWZUfkAt5HAqIgPE4/8FF0PEGHXtEd40l223+cE5ABWuPzm38PHFXfMA==", - "license": "MIT", - "dependencies": { - "@types/cookie": "^0.4.1", - "@types/cors": "^2.8.12", - "@types/node": ">=10.0.0", - "accepts": "~1.3.4", - "base64id": "2.0.0", - "cookie": "~0.4.1", - "cors": "~2.8.5", - "debug": "~4.3.1", - "engine.io-parser": "~5.2.1", - "ws": "~8.17.1" - }, - "engines": { - "node": ">=10.2.0" - } - }, - "node_modules/engine.io-parser": { - "version": "5.2.3", - "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.2.3.tgz", - "integrity": "sha512-HqD3yTBfnBxIrbnM1DoD6Pcq8NECnh8d4As1Qgh0z5Gg3jRRIqijury0CL3ghu/edArpUYiYqQiDUQBIs4np3Q==", - "license": "MIT", - "engines": { - "node": ">=10.0.0" - } - }, - "node_modules/engine.io/node_modules/cookie": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.2.tgz", - "integrity": "sha512-aSWTXFzaKWkvHO1Ny/s+ePFpvKsPnjc551iI41v3ny/ow6tBG5Vd+FuqGNhh1LxOmVzOlGUriIlOaokOvhaStA==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/engine.io/node_modules/debug": { - "version": "4.3.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", - "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/engine.io/node_modules/ws": { - "version": "8.17.1", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz", - "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==", - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/entities": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", - "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.12" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, - "node_modules/env-paths": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", - "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/environment": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/environment/-/environment-1.1.0.tgz", - "integrity": "sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/error-ex": { - "version": "1.3.4", - "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz", - "integrity": "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==", - "license": "MIT", - "dependencies": { - "is-arrayish": "^0.2.1" - } - }, - "node_modules/es-abstract": { - "version": "1.24.2", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.24.2.tgz", - "integrity": "sha512-2FpH9Q5i2RRwyEP1AylXe6nYLR5OhaJTZwmlcP0dL/+JCbgg7yyEo/sEK6HeGZRf3dFpWwThaRHVApXSkW3xeg==", - "license": "MIT", - "dependencies": { - "array-buffer-byte-length": "^1.0.2", - "arraybuffer.prototype.slice": "^1.0.4", - "available-typed-arrays": "^1.0.7", - "call-bind": "^1.0.8", - "call-bound": "^1.0.4", - "data-view-buffer": "^1.0.2", - "data-view-byte-length": "^1.0.2", - "data-view-byte-offset": "^1.0.1", - "es-define-property": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.1.1", - "es-set-tostringtag": "^2.1.0", - "es-to-primitive": "^1.3.0", - "function.prototype.name": "^1.1.8", - "get-intrinsic": "^1.3.0", - "get-proto": "^1.0.1", - "get-symbol-description": "^1.1.0", - "globalthis": "^1.0.4", - "gopd": "^1.2.0", - "has-property-descriptors": "^1.0.2", - "has-proto": "^1.2.0", - "has-symbols": "^1.1.0", - "hasown": "^2.0.2", - "internal-slot": "^1.1.0", - "is-array-buffer": "^3.0.5", - "is-callable": "^1.2.7", - "is-data-view": "^1.0.2", - "is-negative-zero": "^2.0.3", - "is-regex": "^1.2.1", - "is-set": "^2.0.3", - "is-shared-array-buffer": "^1.0.4", - "is-string": "^1.1.1", - "is-typed-array": "^1.1.15", - "is-weakref": "^1.1.1", - "math-intrinsics": "^1.1.0", - "object-inspect": "^1.13.4", - "object-keys": "^1.1.1", - "object.assign": "^4.1.7", - "own-keys": "^1.0.1", - "regexp.prototype.flags": "^1.5.4", - "safe-array-concat": "^1.1.3", - "safe-push-apply": "^1.0.0", - "safe-regex-test": "^1.1.0", - "set-proto": "^1.0.0", - "stop-iteration-iterator": "^1.1.0", - "string.prototype.trim": "^1.2.10", - "string.prototype.trimend": "^1.0.9", - "string.prototype.trimstart": "^1.0.8", - "typed-array-buffer": "^1.0.3", - "typed-array-byte-length": "^1.0.3", - "typed-array-byte-offset": "^1.0.4", - "typed-array-length": "^1.0.7", - "unbox-primitive": "^1.1.0", - "which-typed-array": "^1.1.19" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/es-aggregate-error": { - "version": "1.0.14", - "resolved": "https://registry.npmjs.org/es-aggregate-error/-/es-aggregate-error-1.0.14.tgz", - "integrity": "sha512-3YxX6rVb07B5TV11AV5wsL7nQCHXNwoHPsQC8S4AmBiqYhyNCJ5BRKXkXyDJvs8QzXN20NgRtxe3dEEQD9NLHA==", - "license": "MIT", - "dependencies": { - "define-data-property": "^1.1.4", - "define-properties": "^1.2.1", - "es-abstract": "^1.24.0", - "es-errors": "^1.3.0", - "function-bind": "^1.1.2", - "globalthis": "^1.0.4", - "has-property-descriptors": "^1.0.2", - "set-function-name": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/es-define-property": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", - "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-errors": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", - "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-object-atoms": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", - "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-set-tostringtag": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", - "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6", - "has-tostringtag": "^1.0.2", - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-to-primitive": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.3.0.tgz", - "integrity": "sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g==", - "license": "MIT", - "dependencies": { - "is-callable": "^1.2.7", - "is-date-object": "^1.0.5", - "is-symbol": "^1.0.4" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/es-toolkit": { - "version": "1.45.1", - "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.45.1.tgz", - "integrity": "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw==", - "license": "MIT", - "workspaces": [ - "docs", - "benchmarks" - ] - }, - "node_modules/esast-util-from-estree": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/esast-util-from-estree/-/esast-util-from-estree-2.0.0.tgz", - "integrity": "sha512-4CyanoAudUSBAn5K13H4JhsMH6L9ZP7XbLVe/dKybkxMO7eDyLsT8UHl9TRNrU2Gr9nz+FovfSIjuXWJ81uVwQ==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "devlop": "^1.0.0", - "estree-util-visit": "^2.0.0", - "unist-util-position-from-estree": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/esast-util-from-js": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/esast-util-from-js/-/esast-util-from-js-2.0.1.tgz", - "integrity": "sha512-8Ja+rNJ0Lt56Pcf3TAmpBZjmx8ZcK5Ts4cAzIOjsjevg9oSXJnl6SUQ2EevU8tv3h6ZLWmoKL5H4fgWvdvfETw==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "acorn": "^8.0.0", - "esast-util-from-estree": "^2.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/escalade": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", - "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/escape-html": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", - "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", - "license": "MIT" - }, - "node_modules/escape-string-regexp": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", - "integrity": "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/escodegen": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", - "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", - "license": "BSD-2-Clause", - "dependencies": { - "esprima": "^4.0.1", - "estraverse": "^5.2.0", - "esutils": "^2.0.2" - }, - "bin": { - "escodegen": "bin/escodegen.js", - "esgenerate": "bin/esgenerate.js" - }, - "engines": { - "node": ">=6.0" - }, - "optionalDependencies": { - "source-map": "~0.6.1" - } - }, - "node_modules/escodegen/node_modules/source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "license": "BSD-3-Clause", - "optional": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/esprima": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", - "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "license": "BSD-2-Clause", - "bin": { - "esparse": "bin/esparse.js", - "esvalidate": "bin/esvalidate.js" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/estraverse": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", - "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=4.0" - } - }, - "node_modules/estree-util-attach-comments": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/estree-util-attach-comments/-/estree-util-attach-comments-3.0.0.tgz", - "integrity": "sha512-cKUwm/HUcTDsYh/9FgnuFqpfquUbwIqwKM26BVCGDPVgvaCl/nDCCjUfiLlx6lsEZ3Z4RFxNbOQ60pkaEwFxGw==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/estree-util-build-jsx": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/estree-util-build-jsx/-/estree-util-build-jsx-3.0.1.tgz", - "integrity": "sha512-8U5eiL6BTrPxp/CHbs2yMgP8ftMhR5ww1eIKoWRMlqvltHF8fZn5LRDvTKuxD3DUn+shRbLGqXemcP51oFCsGQ==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "devlop": "^1.0.0", - "estree-util-is-identifier-name": "^3.0.0", - "estree-walker": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/estree-util-is-identifier-name": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz", - "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==", - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/estree-util-scope": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/estree-util-scope/-/estree-util-scope-1.0.0.tgz", - "integrity": "sha512-2CAASclonf+JFWBNJPndcOpA8EMJwa0Q8LUFJEKqXLW6+qBvbFZuF5gItbQOs/umBUkjviCSDCbBwU2cXbmrhQ==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "devlop": "^1.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/estree-util-to-js": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/estree-util-to-js/-/estree-util-to-js-2.0.0.tgz", - "integrity": "sha512-WDF+xj5rRWmD5tj6bIqRi6CkLIXbbNQUcxQHzGysQzvHmdYG2G7p/Tf0J0gpxGgkeMZNTIjT/AoSvC9Xehcgdg==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "astring": "^1.8.0", - "source-map": "^0.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/estree-util-visit": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/estree-util-visit/-/estree-util-visit-2.0.0.tgz", - "integrity": "sha512-m5KgiH85xAhhW8Wta0vShLcUvOsh3LLPI2YVwcbio1l7E09NTLL1EyMZFM1OyWowoH0skScNbhOPl4kcBgzTww==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/estree-walker": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", - "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, - "node_modules/esutils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", - "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/etag": { - "version": "1.8.1", - "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", - "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/event-target-shim": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", - "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/events-universal": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz", - "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==", - "license": "Apache-2.0", - "dependencies": { - "bare-events": "^2.7.0" - } - }, - "node_modules/expand-template": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", - "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", - "license": "(MIT OR WTFPL)", - "optional": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/express": { - "version": "4.18.2", - "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", - "integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==", - "license": "MIT", - "dependencies": { - "accepts": "~1.3.8", - "array-flatten": "1.1.1", - "body-parser": "1.20.1", - "content-disposition": "0.5.4", - "content-type": "~1.0.4", - "cookie": "0.5.0", - "cookie-signature": "1.0.6", - "debug": "2.6.9", - "depd": "2.0.0", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "finalhandler": "1.2.0", - "fresh": "0.5.2", - "http-errors": "2.0.0", - "merge-descriptors": "1.0.1", - "methods": "~1.1.2", - "on-finished": "2.4.1", - "parseurl": "~1.3.3", - "path-to-regexp": "0.1.7", - "proxy-addr": "~2.0.7", - "qs": "6.11.0", - "range-parser": "~1.2.1", - "safe-buffer": "5.2.1", - "send": "0.18.0", - "serve-static": "1.15.0", - "setprototypeof": "1.2.0", - "statuses": "2.0.1", - "type-is": "~1.6.18", - "utils-merge": "1.0.1", - "vary": "~1.1.2" - }, - "engines": { - "node": ">= 0.10.0" - } - }, - "node_modules/express/node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "license": "MIT", - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/express/node_modules/ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", - "license": "MIT" - }, - "node_modules/extend": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", - "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", - "license": "MIT" - }, - "node_modules/extract-zip": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", - "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", - "license": "BSD-2-Clause", - "dependencies": { - "debug": "^4.1.1", - "get-stream": "^5.1.0", - "yauzl": "^2.10.0" - }, - "bin": { - "extract-zip": "cli.js" - }, - "engines": { - "node": ">= 10.17.0" - }, - "optionalDependencies": { - "@types/yauzl": "^2.9.1" - } - }, - "node_modules/extract-zip/node_modules/get-stream": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", - "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", - "license": "MIT", - "dependencies": { - "pump": "^3.0.0" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/fast-deep-equal": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", - "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "license": "MIT" - }, - "node_modules/fast-fifo": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", - "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", - "license": "MIT" - }, - "node_modules/fast-glob": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", - "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.8" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/fast-memoize": { - "version": "2.5.2", - "resolved": "https://registry.npmjs.org/fast-memoize/-/fast-memoize-2.5.2.tgz", - "integrity": "sha512-Ue0LwpDYErFbmNnZSF0UH6eImUwDmogUO1jyE+JbN2gsQz/jICm1Ve7t9QT0rNSsfJt+Hs4/S3GnsDVjL4HVrw==", - "license": "MIT" - }, - "node_modules/fast-uri": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", - "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/fastify" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/fastify" - } - ], - "license": "BSD-3-Clause" - }, - "node_modules/fastq": { - "version": "1.20.1", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz", - "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==", - "license": "ISC", - "dependencies": { - "reusify": "^1.0.4" - } - }, - "node_modules/fault": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/fault/-/fault-2.0.1.tgz", - "integrity": "sha512-WtySTkS4OKev5JtpHXnib4Gxiurzh5NCGvWrFaZ34m6JehfTUhKZvn9njTfw48t6JumVQOmrKqpmGcdwxnhqBQ==", - "license": "MIT", - "dependencies": { - "format": "^0.2.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/favicons": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/favicons/-/favicons-7.2.0.tgz", - "integrity": "sha512-k/2rVBRIRzOeom3wI9jBPaSEvoTSQEW4iM0EveBmBBKFxO8mSyyRWtDlfC3VnEfu0avmjrMzy8/ZFPSe6F71Hw==", - "license": "MIT", - "dependencies": { - "escape-html": "^1.0.3", - "sharp": "^0.33.1", - "xml2js": "^0.6.1" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/favicons/node_modules/@img/sharp-darwin-arm64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz", - "integrity": "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-arm64": "1.0.4" - } - }, - "node_modules/favicons/node_modules/@img/sharp-darwin-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz", - "integrity": "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-x64": "1.0.4" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-darwin-arm64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz", - "integrity": "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==", - "cpu": [ - "arm64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-darwin-x64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz", - "integrity": "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==", - "cpu": [ - "x64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-linux-arm": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz", - "integrity": "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==", - "cpu": [ - "arm" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-linux-arm64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz", - "integrity": "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==", - "cpu": [ - "arm64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-linux-s390x": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz", - "integrity": "sha512-u7Wz6ntiSSgGSGcjZ55im6uvTrOxSIS8/dgoVMoiGE9I6JAfU50yH5BoDlYA1tcuGS7g/QNtetJnxA6QEsCVTA==", - "cpu": [ - "s390x" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-linux-x64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz", - "integrity": "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==", - "cpu": [ - "x64" - ], - "libc": [ - "glibc" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-linuxmusl-arm64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz", - "integrity": "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==", - "cpu": [ - "arm64" - ], - "libc": [ - "musl" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-libvips-linuxmusl-x64": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz", - "integrity": "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==", - "cpu": [ - "x64" - ], - "libc": [ - "musl" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-linux-arm": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz", - "integrity": "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==", - "cpu": [ - "arm" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm": "1.0.5" - } - }, - "node_modules/favicons/node_modules/@img/sharp-linux-arm64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz", - "integrity": "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==", - "cpu": [ - "arm64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm64": "1.0.4" - } - }, - "node_modules/favicons/node_modules/@img/sharp-linux-s390x": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.33.5.tgz", - "integrity": "sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q==", - "cpu": [ - "s390x" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-s390x": "1.0.4" - } - }, - "node_modules/favicons/node_modules/@img/sharp-linux-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz", - "integrity": "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==", - "cpu": [ - "x64" - ], - "libc": [ - "glibc" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-x64": "1.0.4" - } - }, - "node_modules/favicons/node_modules/@img/sharp-linuxmusl-arm64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz", - "integrity": "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==", - "cpu": [ - "arm64" - ], - "libc": [ - "musl" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" - } - }, - "node_modules/favicons/node_modules/@img/sharp-linuxmusl-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz", - "integrity": "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==", - "cpu": [ - "x64" - ], - "libc": [ - "musl" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-x64": "1.0.4" - } - }, - "node_modules/favicons/node_modules/@img/sharp-wasm32": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.33.5.tgz", - "integrity": "sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg==", - "cpu": [ - "wasm32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", - "optional": true, - "dependencies": { - "@emnapi/runtime": "^1.2.0" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-win32-ia32": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.33.5.tgz", - "integrity": "sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ==", - "cpu": [ - "ia32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/@img/sharp-win32-x64": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz", - "integrity": "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/favicons/node_modules/sharp": { - "version": "0.33.5", - "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz", - "integrity": "sha512-haPVm1EkS9pgvHrQ/F3Xy+hgcuMV0Wm9vfIBSiwZ05k+xgb0PkBQpGsAA/oWdDobNaZTH5ppvHtzCFbnSEwHVw==", - "hasInstallScript": true, - "license": "Apache-2.0", - "dependencies": { - "color": "^4.2.3", - "detect-libc": "^2.0.3", - "semver": "^7.6.3" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-darwin-arm64": "0.33.5", - "@img/sharp-darwin-x64": "0.33.5", - "@img/sharp-libvips-darwin-arm64": "1.0.4", - "@img/sharp-libvips-darwin-x64": "1.0.4", - "@img/sharp-libvips-linux-arm": "1.0.5", - "@img/sharp-libvips-linux-arm64": "1.0.4", - "@img/sharp-libvips-linux-s390x": "1.0.4", - "@img/sharp-libvips-linux-x64": "1.0.4", - "@img/sharp-libvips-linuxmusl-arm64": "1.0.4", - "@img/sharp-libvips-linuxmusl-x64": "1.0.4", - "@img/sharp-linux-arm": "0.33.5", - "@img/sharp-linux-arm64": "0.33.5", - "@img/sharp-linux-s390x": "0.33.5", - "@img/sharp-linux-x64": "0.33.5", - "@img/sharp-linuxmusl-arm64": "0.33.5", - "@img/sharp-linuxmusl-x64": "0.33.5", - "@img/sharp-wasm32": "0.33.5", - "@img/sharp-win32-ia32": "0.33.5", - "@img/sharp-win32-x64": "0.33.5" - } - }, - "node_modules/fd-slicer": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", - "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", - "license": "MIT", - "dependencies": { - "pend": "~1.2.0" - } - }, - "node_modules/fill-range": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", - "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", - "license": "MIT", - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/finalhandler": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz", - "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==", - "license": "MIT", - "dependencies": { - "debug": "2.6.9", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "on-finished": "2.4.1", - "parseurl": "~1.3.3", - "statuses": "2.0.1", - "unpipe": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/finalhandler/node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "license": "MIT", - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/finalhandler/node_modules/ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", - "license": "MIT" - }, - "node_modules/follow-redirects": { - "version": "1.16.0", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.16.0.tgz", - "integrity": "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==", - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/RubenVerborgh" - } - ], - "license": "MIT", - "engines": { - "node": ">=4.0" - }, - "peerDependenciesMeta": { - "debug": { - "optional": true - } - } - }, - "node_modules/for-each": { - "version": "0.3.5", - "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.5.tgz", - "integrity": "sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==", - "license": "MIT", - "dependencies": { - "is-callable": "^1.2.7" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/form-data": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", - "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", - "license": "MIT", - "dependencies": { - "asynckit": "^0.4.0", - "combined-stream": "^1.0.8", - "es-set-tostringtag": "^2.1.0", - "hasown": "^2.0.2", - "mime-types": "^2.1.12" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/form-data-encoder": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-2.1.4.tgz", - "integrity": "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw==", - "license": "MIT", - "engines": { - "node": ">= 14.17" - } - }, - "node_modules/format": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/format/-/format-0.2.2.tgz", - "integrity": "sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww==", - "engines": { - "node": ">=0.4.x" - } - }, - "node_modules/forwarded": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", - "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/fresh": { - "version": "0.5.2", - "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", - "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/front-matter": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/front-matter/-/front-matter-4.0.2.tgz", - "integrity": "sha512-I8ZuJ/qG92NWX8i5x1Y8qyj3vizhXS31OxjKDu3LKP+7/qBgfIKValiZIEwoVoJKUHlhWtYrktkxV1XsX+pPlg==", - "license": "MIT", - "dependencies": { - "js-yaml": "^3.13.1" - } - }, - "node_modules/front-matter/node_modules/argparse": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", - "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", - "license": "MIT", - "dependencies": { - "sprintf-js": "~1.0.2" - } - }, - "node_modules/front-matter/node_modules/js-yaml": { - "version": "3.14.2", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz", - "integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==", - "license": "MIT", - "dependencies": { - "argparse": "^1.0.7", - "esprima": "^4.0.0" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/fs-constants": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", - "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", - "license": "MIT", - "optional": true - }, - "node_modules/fs-extra": { - "version": "11.2.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", - "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/fs-minipass": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz", - "integrity": "sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==", - "license": "ISC", - "dependencies": { - "minipass": "^3.0.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/fs-minipass/node_modules/minipass": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", - "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", - "license": "ISC", - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/function-bind": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", - "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/function.prototype.name": { - "version": "1.1.8", - "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.8.tgz", - "integrity": "sha512-e5iwyodOHhbMr/yNrc7fDYG4qlbIvI5gajyzPnb5TCwyhjApznQh1BMFou9b30SevY43gCJKXycoCBjMbsuW0Q==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "define-properties": "^1.2.1", - "functions-have-names": "^1.2.3", - "hasown": "^2.0.2", - "is-callable": "^1.2.7" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/functions-have-names": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/functions-have-names/-/functions-have-names-1.2.3.tgz", - "integrity": "sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/gcd": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/gcd/-/gcd-0.0.1.tgz", - "integrity": "sha512-VNx3UEGr+ILJTiMs1+xc5SX1cMgJCrXezKPa003APUWNqQqaF6n25W8VcR7nHN6yRWbvvUTwCpZCFJeWC2kXlw==", - "license": "MIT" - }, - "node_modules/generator-function": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/generator-function/-/generator-function-2.0.1.tgz", - "integrity": "sha512-SFdFmIJi+ybC0vjlHN0ZGVGHc3lgE0DxPAT0djjVg+kjOnSqclqmj0KQ7ykTOLP6YxoqOvuAODGdcHJn+43q3g==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", - "license": "ISC", - "engines": { - "node": "6.* || 8.* || >= 10.*" - } - }, - "node_modules/get-east-asian-width": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.5.0.tgz", - "integrity": "sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA==", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/get-intrinsic": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", - "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.2", - "es-define-property": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.1.1", - "function-bind": "^1.1.2", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "has-symbols": "^1.1.0", - "hasown": "^2.0.2", - "math-intrinsics": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/get-nonce": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz", - "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==", - "license": "MIT", - "peer": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/get-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", - "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "license": "MIT", - "dependencies": { - "dunder-proto": "^1.0.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/get-stream": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", - "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/get-symbol-description": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.1.0.tgz", - "integrity": "sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/get-uri": { - "version": "6.0.5", - "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz", - "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==", - "license": "MIT", - "dependencies": { - "basic-ftp": "^5.0.2", - "data-uri-to-buffer": "^6.0.2", - "debug": "^4.3.4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/github-from-package": { - "version": "0.0.0", - "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", - "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", - "license": "MIT", - "optional": true - }, - "node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/globalthis": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", - "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", - "license": "MIT", - "dependencies": { - "define-properties": "^1.2.1", - "gopd": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/gopd": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", - "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/got": { - "version": "13.0.0", - "resolved": "https://registry.npmjs.org/got/-/got-13.0.0.tgz", - "integrity": "sha512-XfBk1CxOOScDcMr9O1yKkNaQyy865NbYs+F7dr4H0LZMVgCj2Le59k6PqbNHoL5ToeaEQUYh6c6yMfVcc6SJxA==", - "license": "MIT", - "dependencies": { - "@sindresorhus/is": "^5.2.0", - "@szmarczak/http-timer": "^5.0.1", - "cacheable-lookup": "^7.0.0", - "cacheable-request": "^10.2.8", - "decompress-response": "^6.0.0", - "form-data-encoder": "^2.1.2", - "get-stream": "^6.0.1", - "http2-wrapper": "^2.1.10", - "lowercase-keys": "^3.0.0", - "p-cancelable": "^3.0.0", - "responselike": "^3.0.0" - }, - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sindresorhus/got?sponsor=1" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", - "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "license": "ISC" - }, - "node_modules/has-bigints": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz", - "integrity": "sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-property-descriptors": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", - "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", - "license": "MIT", - "dependencies": { - "es-define-property": "^1.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-proto": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.2.0.tgz", - "integrity": "sha512-KIL7eQPfHQRC8+XluaIw7BHUwwqL19bQn4hzNgdr+1wXoU0KKj6rufu47lhY7KbJR2C6T6+PfyN0Ea7wkSS+qQ==", - "license": "MIT", - "dependencies": { - "dunder-proto": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-symbols": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", - "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-tostringtag": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", - "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", - "license": "MIT", - "dependencies": { - "has-symbols": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/hast-util-embedded": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/hast-util-embedded/-/hast-util-embedded-3.0.0.tgz", - "integrity": "sha512-naH8sld4Pe2ep03qqULEtvYr7EjrLK2QHY8KJR6RJkTUjPGObe1vnx585uzem2hGra+s1q08DZZpfgDVYRbaXA==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "hast-util-is-element": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-from-dom": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/hast-util-from-dom/-/hast-util-from-dom-5.0.1.tgz", - "integrity": "sha512-N+LqofjR2zuzTjCPzyDUdSshy4Ma6li7p/c3pA78uTwzFgENbgbUrm2ugwsOdcjI1muO+o6Dgzp9p8WHtn/39Q==", - "license": "ISC", - "dependencies": { - "@types/hast": "^3.0.0", - "hastscript": "^9.0.0", - "web-namespaces": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-from-html": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.3.tgz", - "integrity": "sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "devlop": "^1.1.0", - "hast-util-from-parse5": "^8.0.0", - "parse5": "^7.0.0", - "vfile": "^6.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-from-html-isomorphic": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/hast-util-from-html-isomorphic/-/hast-util-from-html-isomorphic-2.0.0.tgz", - "integrity": "sha512-zJfpXq44yff2hmE0XmwEOzdWin5xwH+QIhMLOScpX91e/NSGPsAzNCvLQDIEPyO2TXi+lBmU6hjLIhV8MwP2kw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "hast-util-from-dom": "^5.0.0", - "hast-util-from-html": "^2.0.0", - "unist-util-remove-position": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-from-parse5": { - "version": "8.0.3", - "resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.3.tgz", - "integrity": "sha512-3kxEVkEKt0zvcZ3hCRYI8rqrgwtlIOFMWkbclACvjlDw8Li9S2hk/d51OI0nr/gIpdMHNepwgOKqZ/sy0Clpyg==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/unist": "^3.0.0", - "devlop": "^1.0.0", - "hastscript": "^9.0.0", - "property-information": "^7.0.0", - "vfile": "^6.0.0", - "vfile-location": "^5.0.0", - "web-namespaces": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-has-property": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/hast-util-has-property/-/hast-util-has-property-3.0.0.tgz", - "integrity": "sha512-MNilsvEKLFpV604hwfhVStK0usFY/QmM5zX16bo7EjnAEGofr5YyI37kzopBlZJkHD4t887i+q/C8/tr5Q94cA==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-is-body-ok-link": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/hast-util-is-body-ok-link/-/hast-util-is-body-ok-link-3.0.1.tgz", - "integrity": "sha512-0qpnzOBLztXHbHQenVB8uNuxTnm/QBFUOmdOSsEn7GnBtyY07+ENTWVFBAnXd/zEgd9/SUG3lRY7hSIBWRgGpQ==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-is-element": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/hast-util-is-element/-/hast-util-is-element-3.0.0.tgz", - "integrity": "sha512-Val9mnv2IWpLbNPqc/pUem+a7Ipj2aHacCwgNfTiK0vJKl0LF+4Ba4+v1oPHFpf3bLYmreq0/l3Gud9S5OH42g==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-minify-whitespace": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/hast-util-minify-whitespace/-/hast-util-minify-whitespace-1.0.1.tgz", - "integrity": "sha512-L96fPOVpnclQE0xzdWb/D12VT5FabA7SnZOUMtL1DbXmYiHJMXZvFkIZfiMmTCNJHUeO2K9UYNXoVyfz+QHuOw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "hast-util-embedded": "^3.0.0", - "hast-util-is-element": "^3.0.0", - "hast-util-whitespace": "^3.0.0", - "unist-util-is": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-parse-selector": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz", - "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-phrasing": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/hast-util-phrasing/-/hast-util-phrasing-3.0.1.tgz", - "integrity": "sha512-6h60VfI3uBQUxHqTyMymMZnEbNl1XmEGtOxxKYL7stY2o601COo62AWAYBQR9lZbYXYSBoxag8UpPRXK+9fqSQ==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "hast-util-embedded": "^3.0.0", - "hast-util-has-property": "^3.0.0", - "hast-util-is-body-ok-link": "^3.0.0", - "hast-util-is-element": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-estree": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/hast-util-to-estree/-/hast-util-to-estree-3.1.3.tgz", - "integrity": "sha512-48+B/rJWAp0jamNbAAf9M7Uf//UVqAoMmgXhBdxTDJLGKY+LRnZ99qcG+Qjl5HfMpYNzS5v4EAwVEF34LeAj7w==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "comma-separated-tokens": "^2.0.0", - "devlop": "^1.0.0", - "estree-util-attach-comments": "^3.0.0", - "estree-util-is-identifier-name": "^3.0.0", - "hast-util-whitespace": "^3.0.0", - "mdast-util-mdx-expression": "^2.0.0", - "mdast-util-mdx-jsx": "^3.0.0", - "mdast-util-mdxjs-esm": "^2.0.0", - "property-information": "^7.0.0", - "space-separated-tokens": "^2.0.0", - "style-to-js": "^1.0.0", - "unist-util-position": "^5.0.0", - "zwitch": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-html": { - "version": "9.0.4", - "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-9.0.4.tgz", - "integrity": "sha512-wxQzXtdbhiwGAUKrnQJXlOPmHnEehzphwkK7aluUPQ+lEc1xefC8pblMgpp2w5ldBTEfveRIrADcrhGIWrlTDA==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "comma-separated-tokens": "^2.0.0", - "hast-util-whitespace": "^3.0.0", - "html-void-elements": "^3.0.0", - "mdast-util-to-hast": "^13.0.0", - "property-information": "^6.0.0", - "space-separated-tokens": "^2.0.0", - "stringify-entities": "^4.0.0", - "zwitch": "^2.0.4" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-html/node_modules/property-information": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz", - "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/hast-util-to-jsx-runtime": { - "version": "2.3.6", - "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz", - "integrity": "sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/unist": "^3.0.0", - "comma-separated-tokens": "^2.0.0", - "devlop": "^1.0.0", - "estree-util-is-identifier-name": "^3.0.0", - "hast-util-whitespace": "^3.0.0", - "mdast-util-mdx-expression": "^2.0.0", - "mdast-util-mdx-jsx": "^3.0.0", - "mdast-util-mdxjs-esm": "^2.0.0", - "property-information": "^7.0.0", - "space-separated-tokens": "^2.0.0", - "style-to-js": "^1.0.0", - "unist-util-position": "^5.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-mdast": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/hast-util-to-mdast/-/hast-util-to-mdast-10.1.0.tgz", - "integrity": "sha512-DsL/SvCK9V7+vfc6SLQ+vKIyBDXTk2KLSbfBYkH4zeF/uR1yBajHRhkzuaUSGOB1WJSTieJBdHwxlC+HLKvZZw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@ungap/structured-clone": "^1.0.0", - "hast-util-phrasing": "^3.0.0", - "hast-util-to-html": "^9.0.0", - "hast-util-to-text": "^4.0.0", - "hast-util-whitespace": "^3.0.0", - "mdast-util-phrasing": "^4.0.0", - "mdast-util-to-hast": "^13.0.0", - "mdast-util-to-string": "^4.0.0", - "rehype-minify-whitespace": "^6.0.0", - "trim-trailing-lines": "^2.0.0", - "unist-util-position": "^5.0.0", - "unist-util-visit": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-string": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/hast-util-to-string/-/hast-util-to-string-3.0.1.tgz", - "integrity": "sha512-XelQVTDWvqcl3axRfI0xSeoVKzyIFPwsAGSLIsKdJKQMXDYJS4WYrBNF/8J7RdhIcFI2BOHgAifggsvsxp/3+A==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-text": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-4.0.2.tgz", - "integrity": "sha512-KK6y/BN8lbaq654j7JgBydev7wuNMcID54lkRav1P0CaE1e47P72AWWPiGKXTJU271ooYzcvTAn/Zt0REnvc7A==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/unist": "^3.0.0", - "hast-util-is-element": "^3.0.0", - "unist-util-find-after": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-whitespace": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", - "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hastscript": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-9.0.1.tgz", - "integrity": "sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "comma-separated-tokens": "^2.0.0", - "hast-util-parse-selector": "^4.0.0", - "property-information": "^7.0.0", - "space-separated-tokens": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hex-rgb": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/hex-rgb/-/hex-rgb-5.0.0.tgz", - "integrity": "sha512-NQO+lgVUCtHxZ792FodgW0zflK+ozS9X9dwGp9XvvmPlH7pyxd588cn24TD3rmPm/N0AIRXF10Otah8yKqGw4w==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/html-void-elements": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz", - "integrity": "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/http-cache-semantics": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz", - "integrity": "sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==", - "license": "BSD-2-Clause" - }, - "node_modules/http-errors": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", - "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", - "license": "MIT", - "dependencies": { - "depd": "2.0.0", - "inherits": "2.0.4", - "setprototypeof": "1.2.0", - "statuses": "2.0.1", - "toidentifier": "1.0.1" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/http-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", - "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.0", - "debug": "^4.3.4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/http2-wrapper": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.1.tgz", - "integrity": "sha512-V5nVw1PAOgfI3Lmeaj2Exmeg7fenjhRUgz1lPSezy1CuhPYbgQtbQj4jZfEAEMlaL+vupsvhjqCyjzob0yxsmQ==", - "license": "MIT", - "dependencies": { - "quick-lru": "^5.1.1", - "resolve-alpn": "^1.2.0" - }, - "engines": { - "node": ">=10.19.0" - } - }, - "node_modules/https-proxy-agent": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", - "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/ico-endec": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/ico-endec/-/ico-endec-0.1.6.tgz", - "integrity": "sha512-ZdLU38ZoED3g1j3iEyzcQj+wAkY2xfWNkymszfJPoxucIUhK7NayQ+/C4Kv0nDFMIsbtbEHldv3V8PU494/ueQ==", - "license": "MPL-2.0" - }, - "node_modules/iconv-lite": { - "version": "0.7.2", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", - "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/express" - } - }, - "node_modules/ieee754": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", - "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "BSD-3-Clause" - }, - "node_modules/ignore": { - "version": "7.0.5", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", - "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/immer": { - "version": "9.0.21", - "resolved": "https://registry.npmjs.org/immer/-/immer-9.0.21.tgz", - "integrity": "sha512-bc4NBHqOqSfRW7POMkHd51LvClaeMXpm8dx0e8oE2GORbq5aRK7Bxl4FyzVLdGtLmvLKL7BTDBG5ACQm4HWjTA==", - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/immer" - } - }, - "node_modules/import-fresh": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", - "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", - "license": "MIT", - "dependencies": { - "parent-module": "^1.0.0", - "resolve-from": "^4.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/indent-string": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-5.0.0.tgz", - "integrity": "sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "license": "ISC" - }, - "node_modules/ini": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", - "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", - "license": "ISC", - "optional": true - }, - "node_modules/ink": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/ink/-/ink-6.3.0.tgz", - "integrity": "sha512-2CbJAa7XeziZYe6pDS5RVLirRY28iSGMQuEV8jRU5NQsONQNfcR/BZHHc9vkMg2lGYTHTM2pskxC1YmY28p6bQ==", - "license": "MIT", - "dependencies": { - "@alcalzone/ansi-tokenize": "^0.2.0", - "ansi-escapes": "^7.0.0", - "ansi-styles": "^6.2.1", - "auto-bind": "^5.0.1", - "chalk": "^5.6.0", - "cli-boxes": "^3.0.0", - "cli-cursor": "^4.0.0", - "cli-truncate": "^4.0.0", - "code-excerpt": "^4.0.0", - "es-toolkit": "^1.39.10", - "indent-string": "^5.0.0", - "is-in-ci": "^2.0.0", - "patch-console": "^2.0.0", - "react-reconciler": "^0.32.0", - "signal-exit": "^3.0.7", - "slice-ansi": "^7.1.0", - "stack-utils": "^2.0.6", - "string-width": "^7.2.0", - "type-fest": "^4.27.0", - "widest-line": "^5.0.0", - "wrap-ansi": "^9.0.0", - "ws": "^8.18.0", - "yoga-layout": "~3.2.1" - }, - "engines": { - "node": ">=20" - }, - "peerDependencies": { - "@types/react": ">=19.0.0", - "react": ">=19.0.0", - "react-devtools-core": "^4.19.1" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "react-devtools-core": { - "optional": true - } - } - }, - "node_modules/ink-spinner": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ink-spinner/-/ink-spinner-5.0.0.tgz", - "integrity": "sha512-EYEasbEjkqLGyPOUc8hBJZNuC5GvXGMLu0w5gdTNskPc7Izc5vO3tdQEYnzvshucyGCBXc86ig0ujXPMWaQCdA==", - "license": "MIT", - "dependencies": { - "cli-spinners": "^2.7.0" - }, - "engines": { - "node": ">=14.16" - }, - "peerDependencies": { - "ink": ">=4.0.0", - "react": ">=18.0.0" - } - }, - "node_modules/ink/node_modules/chalk": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", - "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", - "license": "MIT", - "engines": { - "node": "^12.17.0 || ^14.13 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/ink/node_modules/signal-exit": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", - "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", - "license": "ISC" - }, - "node_modules/ink/node_modules/wrap-ansi": { - "version": "9.0.2", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-9.0.2.tgz", - "integrity": "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.2.1", - "string-width": "^7.0.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/inline-style-parser": { - "version": "0.2.7", - "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.7.tgz", - "integrity": "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==", - "license": "MIT" - }, - "node_modules/inquirer": { - "version": "12.3.0", - "resolved": "https://registry.npmjs.org/inquirer/-/inquirer-12.3.0.tgz", - "integrity": "sha512-3NixUXq+hM8ezj2wc7wC37b32/rHq1MwNZDYdvx+d6jokOD+r+i8Q4Pkylh9tISYP114A128LCX8RKhopC5RfQ==", - "license": "MIT", - "dependencies": { - "@inquirer/core": "^10.1.2", - "@inquirer/prompts": "^7.2.1", - "@inquirer/type": "^3.0.2", - "ansi-escapes": "^4.3.2", - "mute-stream": "^2.0.0", - "run-async": "^3.0.0", - "rxjs": "^7.8.1" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@types/node": ">=18" - } - }, - "node_modules/inquirer/node_modules/ansi-escapes": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", - "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==", - "license": "MIT", - "dependencies": { - "type-fest": "^0.21.3" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/inquirer/node_modules/type-fest": { - "version": "0.21.3", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", - "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", - "license": "(MIT OR CC0-1.0)", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/internal-slot": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.1.0.tgz", - "integrity": "sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "hasown": "^2.0.2", - "side-channel": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/ip-address": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", - "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", - "license": "MIT", - "engines": { - "node": ">= 12" - } - }, - "node_modules/ip-regex": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-4.3.0.tgz", - "integrity": "sha512-B9ZWJxHHOHUhUjCPrMpLD4xEq35bUTClHM1S6CBU5ixQnkZmwipwgc96vAd7AAGM9TGHvJR+Uss+/Ak6UphK+Q==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/ipaddr.js": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", - "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", - "license": "MIT", - "engines": { - "node": ">= 0.10" - } - }, - "node_modules/is-alphabetical": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz", - "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-alphanumerical": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz", - "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==", - "license": "MIT", - "dependencies": { - "is-alphabetical": "^2.0.0", - "is-decimal": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-array-buffer": { - "version": "3.0.5", - "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.5.tgz", - "integrity": "sha512-DDfANUiiG2wC1qawP66qlTugJeL5HyzMpfr8lLK+jMQirGzNod0B12cFB/9q838Ru27sBwfw78/rdoU7RERz6A==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "get-intrinsic": "^1.2.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-arrayish": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", - "license": "MIT" - }, - "node_modules/is-async-function": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-async-function/-/is-async-function-2.1.1.tgz", - "integrity": "sha512-9dgM/cZBnNvjzaMYHVoxxfPj2QXt22Ev7SuuPrs+xav0ukGB0S6d4ydZdEiM48kLx5kDV+QBPrpVnFyefL8kkQ==", - "license": "MIT", - "dependencies": { - "async-function": "^1.0.0", - "call-bound": "^1.0.3", - "get-proto": "^1.0.1", - "has-tostringtag": "^1.0.2", - "safe-regex-test": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-bigint": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.1.0.tgz", - "integrity": "sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ==", - "license": "MIT", - "dependencies": { - "has-bigints": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-binary-path": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", - "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", - "license": "MIT", - "dependencies": { - "binary-extensions": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-boolean-object": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.2.2.tgz", - "integrity": "sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-callable": { - "version": "1.2.7", - "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz", - "integrity": "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-core-module": { - "version": "2.16.1", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz", - "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==", - "license": "MIT", - "dependencies": { - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-data-view": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-data-view/-/is-data-view-1.0.2.tgz", - "integrity": "sha512-RKtWF8pGmS87i2D6gqQu/l7EYRlVdfzemCJN/P3UOs//x1QE7mfhvzHIApBTRf7axvT6DMGwSwBXYCT0nfB9xw==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "get-intrinsic": "^1.2.6", - "is-typed-array": "^1.1.13" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-date-object": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.1.0.tgz", - "integrity": "sha512-PwwhEakHVKTdRNVOw+/Gyh0+MzlCl4R6qKvkhuvLtPMggI1WAHt9sOwZxQLSGpUaDnrdyDsomoRgNnCfKNSXXg==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-decimal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz", - "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-docker": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz", - "integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==", - "license": "MIT", - "bin": { - "is-docker": "cli.js" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-finalizationregistry": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-finalizationregistry/-/is-finalizationregistry-1.1.1.tgz", - "integrity": "sha512-1pC6N8qWJbWoPtEjgcL2xyhQOP491EQjeUo3qTKcmV8YSDDJrOepfG8pcC7h/QgnQHYSv0mJ3Z/ZWxmatVrysg==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.1.0.tgz", - "integrity": "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==", - "license": "MIT", - "dependencies": { - "get-east-asian-width": "^1.3.1" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-generator-function": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.1.2.tgz", - "integrity": "sha512-upqt1SkGkODW9tsGNG5mtXTXtECizwtS2kA161M+gJPc1xdb/Ax629af6YrTwcOeQHbewrPNlE5Dx7kzvXTizA==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.4", - "generator-function": "^2.0.0", - "get-proto": "^1.0.1", - "has-tostringtag": "^1.0.2", - "safe-regex-test": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-glob": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", - "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", - "license": "MIT", - "dependencies": { - "is-extglob": "^2.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-hexadecimal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", - "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-in-ci": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-in-ci/-/is-in-ci-2.0.0.tgz", - "integrity": "sha512-cFeerHriAnhrQSbpAxL37W1wcJKUUX07HyLWZCW1URJT/ra3GyUTzBgUnh24TMVfNTV2Hij2HLxkPHFZfOZy5w==", - "license": "MIT", - "bin": { - "is-in-ci": "cli.js" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-ip": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/is-ip/-/is-ip-3.1.0.tgz", - "integrity": "sha512-35vd5necO7IitFPjd/YBeqwWnyDWbuLH9ZXQdMfDA8TEo7pv5X8yfrvVO3xbJbLUlERCMvf6X0hTUamQxCYJ9Q==", - "license": "MIT", - "dependencies": { - "ip-regex": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-map": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz", - "integrity": "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-negative-zero": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.3.tgz", - "integrity": "sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/is-number-object": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.1.1.tgz", - "integrity": "sha512-lZhclumE1G6VYD8VHe35wFaIif+CTy5SJIi5+3y4psDgWu4wPDoBhF8NxUOinEc7pHgiTsT6MaBb92rKhhD+Xw==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-online": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/is-online/-/is-online-10.0.0.tgz", - "integrity": "sha512-WCPdKwNDjXJJmUubf2VHLMDBkUZEtuOvpXUfUnUFbEnM6In9ByiScL4f4jKACz/fsb2qDkesFerW3snf/AYz3A==", - "license": "MIT", - "dependencies": { - "got": "^12.1.0", - "p-any": "^4.0.0", - "p-timeout": "^5.1.0", - "public-ip": "^5.0.0" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-online/node_modules/got": { - "version": "12.6.1", - "resolved": "https://registry.npmjs.org/got/-/got-12.6.1.tgz", - "integrity": "sha512-mThBblvlAF1d4O5oqyvN+ZxLAYwIJK7bpMxgYqPD9okW0C3qm5FFn7k811QrcuEBwaogR3ngOFoCfs6mRv7teQ==", - "license": "MIT", - "dependencies": { - "@sindresorhus/is": "^5.2.0", - "@szmarczak/http-timer": "^5.0.1", - "cacheable-lookup": "^7.0.0", - "cacheable-request": "^10.2.8", - "decompress-response": "^6.0.0", - "form-data-encoder": "^2.1.2", - "get-stream": "^6.0.1", - "http2-wrapper": "^2.1.10", - "lowercase-keys": "^3.0.0", - "p-cancelable": "^3.0.0", - "responselike": "^3.0.0" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sindresorhus/got?sponsor=1" - } - }, - "node_modules/is-plain-obj": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz", - "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-regex": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.2.1.tgz", - "integrity": "sha512-MjYsKHO5O7mCsmRGxWcLWheFqN9DJ/2TmngvjKXihe6efViPqc274+Fx/4fYj/r03+ESvBdTXK0V6tA3rgez1g==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "gopd": "^1.2.0", - "has-tostringtag": "^1.0.2", - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-set": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.3.tgz", - "integrity": "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-shared-array-buffer": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.4.tgz", - "integrity": "sha512-ISWac8drv4ZGfwKl5slpHG9OwPNty4jOWPRIhBpxOoD+hqITiwuipOQ2bNthAzwA3B4fIjO4Nln74N0S9byq8A==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-string": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.1.1.tgz", - "integrity": "sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-symbol": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.1.1.tgz", - "integrity": "sha512-9gGx6GTtCQM73BgmHQXfDmLtfjjTUDSyoxTCbp5WtoixAhfgsDirWIcVQ/IHpvI5Vgd5i/J5F7B9cN/WlVbC/w==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "has-symbols": "^1.1.0", - "safe-regex-test": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-typed-array": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.15.tgz", - "integrity": "sha512-p3EcsicXjit7SaskXHs1hA91QxgTw46Fv6EFKKGS5DRFLD8yKnohjF3hxoju94b/OcMZoQukzpPpBE9uLVKzgQ==", - "license": "MIT", - "dependencies": { - "which-typed-array": "^1.1.16" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-weakmap": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", - "integrity": "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-weakref": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-weakref/-/is-weakref-1.1.1.tgz", - "integrity": "sha512-6i9mGWSlqzNMEqpCp93KwRS1uUOodk2OJ6b+sq7ZPDSy2WuI5NFIxp/254TytR8ftefexkWn5xNiHUNpPOfSew==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-weakset": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.4.tgz", - "integrity": "sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "get-intrinsic": "^1.2.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-wsl": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-2.2.0.tgz", - "integrity": "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==", - "license": "MIT", - "dependencies": { - "is-docker": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/isarray": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz", - "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==", - "license": "MIT" - }, - "node_modules/isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "license": "ISC" - }, - "node_modules/jiti": { - "version": "1.21.7", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz", - "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==", - "license": "MIT", - "bin": { - "jiti": "bin/jiti.js" - } - }, - "node_modules/jose": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", - "integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/panva" - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "license": "MIT" - }, - "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "license": "MIT", - "dependencies": { - "argparse": "^2.0.1" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/jsep": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz", - "integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==", - "license": "MIT", - "engines": { - "node": ">= 10.16.0" - } - }, - "node_modules/json-buffer": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", - "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", - "license": "MIT" - }, - "node_modules/json-parse-even-better-errors": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", - "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", - "license": "MIT" - }, - "node_modules/json-schema-traverse": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", - "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", - "license": "MIT" - }, - "node_modules/jsonc-parser": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-2.2.1.tgz", - "integrity": "sha512-o6/yDBYccGvTz1+QFevz6l6OBZ2+fMVu2JZ9CIhzsYRX4mjaK5IyX9eldUdCmga16zlgQxyrj5pt9kzuj2C02w==", - "license": "MIT" - }, - "node_modules/jsonfile": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz", - "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==", - "license": "MIT", - "dependencies": { - "universalify": "^2.0.0" - }, - "optionalDependencies": { - "graceful-fs": "^4.1.6" - } - }, - "node_modules/jsonpath-plus": { - "version": "10.4.0", - "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.4.0.tgz", - "integrity": "sha512-T92WWatJXmhBbKsgH/0hl+jxjdXrifi5IKeMY02DWggRxX0UElcbVzPlmgLTbvsPeW1PasQ6xE2Q75stkhGbsA==", - "license": "MIT", - "dependencies": { - "@jsep-plugin/assignment": "^1.3.0", - "@jsep-plugin/regex": "^1.0.4", - "jsep": "^1.4.0" - }, - "bin": { - "jsonpath": "bin/jsonpath-cli.js", - "jsonpath-plus": "bin/jsonpath-cli.js" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/jsonpointer": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", - "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/katex": { - "version": "0.16.45", - "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.45.tgz", - "integrity": "sha512-pQpZbdBu7wCTmQUh7ufPmLr0pFoObnGUoL/yhtwJDgmmQpbkg/0HSVti25Fu4rmd1oCR6NGWe9vqTWuWv3GcNA==", - "funding": [ - "https://opencollective.com/katex", - "https://github.com/sponsors/katex" - ], - "license": "MIT", - "dependencies": { - "commander": "^8.3.0" - }, - "bin": { - "katex": "cli.js" - } - }, - "node_modules/keytar": { - "version": "7.9.0", - "resolved": "https://registry.npmjs.org/keytar/-/keytar-7.9.0.tgz", - "integrity": "sha512-VPD8mtVtm5JNtA2AErl6Chp06JBfy7diFQ7TQQhdpWOl6MrCRB+eRbvAZUsbGQS9kiMq0coJsy0W0vHpDCkWsQ==", - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "dependencies": { - "node-addon-api": "^4.3.0", - "prebuild-install": "^7.0.1" - } - }, - "node_modules/keyv": { - "version": "4.5.4", - "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", - "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", - "license": "MIT", - "dependencies": { - "json-buffer": "3.0.1" - } - }, - "node_modules/lcm": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/lcm/-/lcm-0.0.3.tgz", - "integrity": "sha512-TB+ZjoillV6B26Vspf9l2L/vKaRY/4ep3hahcyVkCGFgsTNRUQdc24bQeNFiZeoxH0vr5+7SfNRMQuPHv/1IrQ==", - "license": "MIT", - "dependencies": { - "gcd": "^0.0.1" - } - }, - "node_modules/leven": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/leven/-/leven-4.1.0.tgz", - "integrity": "sha512-KZ9W9nWDT7rF7Dazg8xyLHGLrmpgq2nVNFUckhqdW3szVP6YhCpp/RAnpmVExA9JvrMynjwSLVrEj3AepHR6ew==", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/lilconfig": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", - "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", - "license": "MIT", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antonk52" - } - }, - "node_modules/lines-and-columns": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", - "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", - "license": "MIT" - }, - "node_modules/lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "license": "MIT" - }, - "node_modules/lodash.topath": { - "version": "4.5.2", - "resolved": "https://registry.npmjs.org/lodash.topath/-/lodash.topath-4.5.2.tgz", - "integrity": "sha512-1/W4dM+35DwvE/iEd1M9ekewOSTlpFekhw9mhAtrwjVqUr83/ilQiyAvmg4tVX7Unkcfl1KC+i9WdaT4B6aQcg==", - "license": "MIT" - }, - "node_modules/longest-streak": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz", - "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "license": "MIT", - "peer": true, - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/lowercase-keys": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz", - "integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ==", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/lru-cache": { - "version": "7.18.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", - "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/markdown-extensions": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/markdown-extensions/-/markdown-extensions-2.0.0.tgz", - "integrity": "sha512-o5vL7aDWatOTX8LzaS1WMoaoxIiLRQJuIKKe2wAw6IeULDHaqbiqiggmx+pKvZDb1Sj+pE46Sn1T7lCqfFtg1Q==", - "license": "MIT", - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/markdown-table": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.4.tgz", - "integrity": "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/math-intrinsics": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", - "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/mdast-util-find-and-replace": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.2.tgz", - "integrity": "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "escape-string-regexp": "^5.0.0", - "unist-util-is": "^6.0.0", - "unist-util-visit-parents": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-from-markdown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.2.tgz", - "integrity": "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "decode-named-character-reference": "^1.0.0", - "devlop": "^1.0.0", - "mdast-util-to-string": "^4.0.0", - "micromark": "^4.0.0", - "micromark-util-decode-numeric-character-reference": "^2.0.0", - "micromark-util-decode-string": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0", - "unist-util-stringify-position": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-frontmatter": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mdast-util-frontmatter/-/mdast-util-frontmatter-2.0.1.tgz", - "integrity": "sha512-LRqI9+wdgC25P0URIJY9vwocIzCcksduHQ9OF2joxQoyTNVduwLAFUzjoopuRJbJAReaKrNQKAZKL3uCMugWJA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "escape-string-regexp": "^5.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "micromark-extension-frontmatter": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.0.0.tgz", - "integrity": "sha512-dgQEX5Amaq+DuUqf26jJqSK9qgixgd6rYDHAv4aTBuA92cTknZlKpPfa86Z/s8Dj8xsAQpFfBmPUHWJBWqS4Bw==", - "license": "MIT", - "dependencies": { - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-gfm-autolink-literal": "^2.0.0", - "mdast-util-gfm-footnote": "^2.0.0", - "mdast-util-gfm-strikethrough": "^2.0.0", - "mdast-util-gfm-table": "^2.0.0", - "mdast-util-gfm-task-list-item": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-autolink-literal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-2.0.1.tgz", - "integrity": "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "ccount": "^2.0.0", - "devlop": "^1.0.0", - "mdast-util-find-and-replace": "^3.0.0", - "micromark-util-character": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-footnote": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-footnote/-/mdast-util-gfm-footnote-2.1.0.tgz", - "integrity": "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-strikethrough": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-2.0.0.tgz", - "integrity": "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-table": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-2.0.0.tgz", - "integrity": "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "markdown-table": "^3.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-task-list-item": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-2.0.0.tgz", - "integrity": "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-math": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-math/-/mdast-util-math-3.0.0.tgz", - "integrity": "sha512-Tl9GBNeG/AhJnQM221bJR2HPvLOSnLE/T9cJI9tlc6zwQk2nPk/4f0cHkOdEixQPC/j8UtKDdITswvLAy1OZ1w==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "longest-streak": "^3.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.1.0", - "unist-util-remove-position": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-mdx": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-mdx/-/mdast-util-mdx-3.0.0.tgz", - "integrity": "sha512-JfbYLAW7XnYTTbUsmpu0kdBUVe+yKVJZBItEjwyYJiDJuZ9w4eeaqks4HQO+R7objWgS2ymV60GYpI14Ug554w==", - "license": "MIT", - "dependencies": { - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-mdx-expression": "^2.0.0", - "mdast-util-mdx-jsx": "^3.0.0", - "mdast-util-mdxjs-esm": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-mdx-expression": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.1.tgz", - "integrity": "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-mdx-jsx": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz", - "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-mdxjs-esm": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz", - "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-phrasing": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", - "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "unist-util-is": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-to-hast": { - "version": "13.2.1", - "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.1.tgz", - "integrity": "sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@ungap/structured-clone": "^1.0.0", - "devlop": "^1.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "trim-lines": "^3.0.0", - "unist-util-position": "^5.0.0", - "unist-util-visit": "^5.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-to-markdown": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.2.tgz", - "integrity": "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "longest-streak": "^3.0.0", - "mdast-util-phrasing": "^4.0.0", - "mdast-util-to-string": "^4.0.0", - "micromark-util-classify-character": "^2.0.0", - "micromark-util-decode-string": "^2.0.0", - "unist-util-visit": "^5.0.0", - "zwitch": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-to-string": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz", - "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/media-typer": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", - "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==", - "license": "MIT" - }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/methods": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", - "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/micromark": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz", - "integrity": "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "@types/debug": "^4.0.0", - "debug": "^4.0.0", - "decode-named-character-reference": "^1.0.0", - "devlop": "^1.0.0", - "micromark-core-commonmark": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-combine-extensions": "^2.0.0", - "micromark-util-decode-numeric-character-reference": "^2.0.0", - "micromark-util-encode": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-resolve-all": "^2.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "micromark-util-subtokenize": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-core-commonmark": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.3.tgz", - "integrity": "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "decode-named-character-reference": "^1.0.0", - "devlop": "^1.0.0", - "micromark-factory-destination": "^2.0.0", - "micromark-factory-label": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-factory-title": "^2.0.0", - "micromark-factory-whitespace": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-classify-character": "^2.0.0", - "micromark-util-html-tag-name": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-resolve-all": "^2.0.0", - "micromark-util-subtokenize": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-extension-frontmatter": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-frontmatter/-/micromark-extension-frontmatter-2.0.0.tgz", - "integrity": "sha512-C4AkuM3dA58cgZha7zVnuVxBhDsbttIMiytjgsM2XbHAB2faRVaHRle40558FBN+DJcrLNCoqG5mlrpdU4cRtg==", - "license": "MIT", - "dependencies": { - "fault": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", - "integrity": "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==", - "license": "MIT", - "dependencies": { - "micromark-extension-gfm-autolink-literal": "^2.0.0", - "micromark-extension-gfm-footnote": "^2.0.0", - "micromark-extension-gfm-strikethrough": "^2.0.0", - "micromark-extension-gfm-table": "^2.0.0", - "micromark-extension-gfm-tagfilter": "^2.0.0", - "micromark-extension-gfm-task-list-item": "^2.0.0", - "micromark-util-combine-extensions": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-autolink-literal": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-2.1.0.tgz", - "integrity": "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==", - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-footnote": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-footnote/-/micromark-extension-gfm-footnote-2.1.0.tgz", - "integrity": "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-core-commonmark": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-strikethrough": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-2.1.0.tgz", - "integrity": "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-classify-character": "^2.0.0", - "micromark-util-resolve-all": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-table": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-2.1.1.tgz", - "integrity": "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-tagfilter": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-2.0.0.tgz", - "integrity": "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==", - "license": "MIT", - "dependencies": { - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-task-list-item": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-2.1.0.tgz", - "integrity": "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-math": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-math/-/micromark-extension-math-3.1.0.tgz", - "integrity": "sha512-lvEqd+fHjATVs+2v/8kg9i5Q0AP2k85H0WUOwpIVvUML8BapsMvh1XAogmQjOCsLpoKRCVQqEkQBB3NhVBcsOg==", - "license": "MIT", - "dependencies": { - "@types/katex": "^0.16.0", - "devlop": "^1.0.0", - "katex": "^0.16.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-mdx-expression": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/micromark-extension-mdx-expression/-/micromark-extension-mdx-expression-3.0.1.tgz", - "integrity": "sha512-dD/ADLJ1AeMvSAKBwO22zG22N4ybhe7kFIZ3LsDI0GlsNr2A3KYxb0LdC1u5rj4Nw+CHKY0RVdnHX8vj8ejm4Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "devlop": "^1.0.0", - "micromark-factory-mdx-expression": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-events-to-acorn": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-extension-mdx-jsx": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/micromark-extension-mdx-jsx/-/micromark-extension-mdx-jsx-3.0.1.tgz", - "integrity": "sha512-vNuFb9czP8QCtAQcEJn0UJQJZA8Dk6DXKBqx+bg/w0WGuSxDxNr7hErW89tHUY31dUW4NqEOWwmEUNhjTFmHkg==", - "license": "MIT", - "dependencies": { - "@types/acorn": "^4.0.0", - "@types/estree": "^1.0.0", - "devlop": "^1.0.0", - "estree-util-is-identifier-name": "^3.0.0", - "micromark-factory-mdx-expression": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-events-to-acorn": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-mdx-md": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-mdx-md/-/micromark-extension-mdx-md-2.0.0.tgz", - "integrity": "sha512-EpAiszsB3blw4Rpba7xTOUptcFeBFi+6PY8VnJ2hhimH+vCQDirWgsMpz7w1XcZE7LVrSAUGb9VJpG9ghlYvYQ==", - "license": "MIT", - "dependencies": { - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-mdxjs": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-mdxjs/-/micromark-extension-mdxjs-3.0.0.tgz", - "integrity": "sha512-A873fJfhnJ2siZyUrJ31l34Uqwy4xIFmvPY1oj+Ean5PHcPBYzEsvqvWGaWcfEIr11O5Dlw3p2y0tZWpKHDejQ==", - "license": "MIT", - "dependencies": { - "acorn": "^8.0.0", - "acorn-jsx": "^5.0.0", - "micromark-extension-mdx-expression": "^3.0.0", - "micromark-extension-mdx-jsx": "^3.0.0", - "micromark-extension-mdx-md": "^2.0.0", - "micromark-extension-mdxjs-esm": "^3.0.0", - "micromark-util-combine-extensions": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-mdxjs-esm": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-mdxjs-esm/-/micromark-extension-mdxjs-esm-3.0.0.tgz", - "integrity": "sha512-DJFl4ZqkErRpq/dAPyeWp15tGrcrrJho1hKK5uBS70BCtfrIFg81sqcTVu3Ta+KD1Tk5vAtBNElWxtAa+m8K9A==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "devlop": "^1.0.0", - "micromark-core-commonmark": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-events-to-acorn": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0", - "unist-util-position-from-estree": "^2.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-factory-destination": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz", - "integrity": "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-label": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.1.tgz", - "integrity": "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-mdx-expression": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/micromark-factory-mdx-expression/-/micromark-factory-mdx-expression-2.0.3.tgz", - "integrity": "sha512-kQnEtA3vzucU2BkrIa8/VaSAsP+EJ3CKOvhMuJgOEGg9KDC6OAY6nSnNDVRiVNRqj7Y4SlSzcStaH/5jge8JdQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "devlop": "^1.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-events-to-acorn": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0", - "unist-util-position-from-estree": "^2.0.0", - "vfile-message": "^4.0.0" - } - }, - "node_modules/micromark-factory-space": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.1.tgz", - "integrity": "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-title": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.1.tgz", - "integrity": "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-whitespace": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.1.tgz", - "integrity": "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-character": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.1.tgz", - "integrity": "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-chunked": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.1.tgz", - "integrity": "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-classify-character": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.1.tgz", - "integrity": "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-combine-extensions": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.1.tgz", - "integrity": "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-chunked": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-decode-numeric-character-reference": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.2.tgz", - "integrity": "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-decode-string": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.1.tgz", - "integrity": "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "decode-named-character-reference": "^1.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-decode-numeric-character-reference": "^2.0.0", - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-encode": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.1.tgz", - "integrity": "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromark-util-events-to-acorn": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/micromark-util-events-to-acorn/-/micromark-util-events-to-acorn-2.0.3.tgz", - "integrity": "sha512-jmsiEIiZ1n7X1Rr5k8wVExBQCg5jy4UXVADItHmNk1zkwEVhBuIUKRu3fqv+hs4nxLISi2DQGlqIOGiFxgbfHg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "@types/unist": "^3.0.0", - "devlop": "^1.0.0", - "estree-util-visit": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0", - "vfile-message": "^4.0.0" - } - }, - "node_modules/micromark-util-html-tag-name": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.1.tgz", - "integrity": "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromark-util-normalize-identifier": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.1.tgz", - "integrity": "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-resolve-all": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.1.tgz", - "integrity": "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-sanitize-uri": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.1.tgz", - "integrity": "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-encode": "^2.0.0", - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-subtokenize": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.1.0.tgz", - "integrity": "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-symbol": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.1.tgz", - "integrity": "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromark-util-types": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.2.tgz", - "integrity": "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromatch": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", - "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", - "license": "MIT", - "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/mime": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", - "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", - "license": "MIT", - "bin": { - "mime": "cli.js" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/mime-db": { - "version": "1.52.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", - "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mime-types": { - "version": "2.1.35", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", - "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "license": "MIT", - "dependencies": { - "mime-db": "1.52.0" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mimic-fn": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", - "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/mimic-response": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz", - "integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg==", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "license": "ISC", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "license": "MIT", - "optional": true, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/minipass": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz", - "integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==", - "license": "ISC", - "engines": { - "node": ">=8" - } - }, - "node_modules/minizlib": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-2.1.2.tgz", - "integrity": "sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==", - "license": "MIT", - "dependencies": { - "minipass": "^3.0.0", - "yallist": "^4.0.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/minizlib/node_modules/minipass": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", - "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", - "license": "ISC", - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/mintlify": { - "version": "4.2.500", - "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.2.500.tgz", - "integrity": "sha512-pVuzf4F+JRmVCuQZLQebIlggCzWQyHsnPiAbuUoJ8aofsKbbs30woRQznoeCmzgmzDxBk25xPay9yy4GRPRlOw==", - "license": "Elastic-2.0", - "dependencies": { - "@mintlify/cli": "4.0.1103" - }, - "bin": { - "mintlify": "index.js" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/mitt": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", - "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==", - "license": "MIT" - }, - "node_modules/mkdirp": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz", - "integrity": "sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==", - "license": "MIT", - "bin": { - "mkdirp": "bin/cmd.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/mkdirp-classic": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", - "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", - "license": "MIT", - "optional": true - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "license": "MIT" - }, - "node_modules/mute-stream": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-2.0.0.tgz", - "integrity": "sha512-WWdIxpyjEn+FhQJQQv9aQAYlHoNVdzIzUySNV1gHUPDSdZJ3yZn7pAAbQcV7B56Mvu881q9FZV+0Vx2xC44VWA==", - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/mz": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", - "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0", - "object-assign": "^4.0.1", - "thenify-all": "^1.0.0" - } - }, - "node_modules/nanoid": { - "version": "3.3.11", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", - "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/napi-build-utils": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", - "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", - "license": "MIT", - "optional": true - }, - "node_modules/negotiator": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", - "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/neotraverse": { - "version": "0.6.18", - "resolved": "https://registry.npmjs.org/neotraverse/-/neotraverse-0.6.18.tgz", - "integrity": "sha512-Z4SmBUweYa09+o6pG+eASabEpP6QkQ70yHj351pQoEXIs8uHbaU2DWVmzBANKgflPa47A50PtB2+NgRpQvr7vA==", - "license": "MIT", - "engines": { - "node": ">= 10" - } - }, - "node_modules/netmask": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.1.0.tgz", - "integrity": "sha512-z9sZrk6wyf8/NDKKqe+Tyl58XtgkYrV4kgt1O8xrzYvpl1LvPacPo0imMLHfpStk3kgCIq1ksJ2bmJn9hue2lQ==", - "license": "MIT", - "engines": { - "node": ">= 0.4.0" - } - }, - "node_modules/nimma": { - "version": "0.2.3", - "resolved": "https://registry.npmjs.org/nimma/-/nimma-0.2.3.tgz", - "integrity": "sha512-1ZOI8J+1PKKGceo/5CT5GfQOG6H8I2BencSK06YarZ2wXwH37BSSUWldqJmMJYA5JfqDqffxDXynt6f11AyKcA==", - "license": "Apache-2.0", - "dependencies": { - "@jsep-plugin/regex": "^1.0.1", - "@jsep-plugin/ternary": "^1.0.2", - "astring": "^1.8.1", - "jsep": "^1.2.0" - }, - "engines": { - "node": "^12.20 || >=14.13" - }, - "optionalDependencies": { - "jsonpath-plus": "^6.0.1 || ^10.1.0", - "lodash.topath": "^4.5.2" - } - }, - "node_modules/nlcst-to-string": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/nlcst-to-string/-/nlcst-to-string-4.0.0.tgz", - "integrity": "sha512-YKLBCcUYKAg0FNlOBT6aI91qFmSiFKiluk655WzPF+DDMA02qIyy8uiRqI8QXtcFpEvll12LpL5MXqEmAZ+dcA==", - "license": "MIT", - "dependencies": { - "@types/nlcst": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/node-abi": { - "version": "3.89.0", - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz", - "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==", - "license": "MIT", - "optional": true, - "dependencies": { - "semver": "^7.3.5" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/node-addon-api": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-4.3.0.tgz", - "integrity": "sha512-73sE9+3UaLYYFmDsFZnqCInzPyh3MqIwZO9cw58yIqAZhONrrabrYyYe3TuIqtIiOuTXVhsGau8hcrhhwSsDIQ==", - "license": "MIT", - "optional": true - }, - "node_modules/node-fetch": { - "version": "2.6.7", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", - "integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==", - "license": "MIT", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, - "node_modules/non-error": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/non-error/-/non-error-0.1.0.tgz", - "integrity": "sha512-TMB1uHiGsHRGv1uYclfhivcnf0/PdFp2pNqRxXjncaAsjYMoisaQJI+SSZCqRq+VliwRTC8tsMQfmrWjDMhkPQ==", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/normalize-url": { - "version": "8.1.1", - "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.1.1.tgz", - "integrity": "sha512-JYc0DPlpGWB40kH5g07gGTrYuMqV653k3uBKY6uITPWds3M0ov3GaWGp9lbE3Bzngx8+XkfzgvASb9vk9JDFXQ==", - "license": "MIT", - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/oauth4webapi": { - "version": "3.8.5", - "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.5.tgz", - "integrity": "sha512-A8jmyUckVhRJj5lspguklcl90Ydqk61H3dcU0oLhH3Yv13KpAliKTt5hknpGGPZSSfOwGyraNEFmofDYH+1kSg==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/panva" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/object-inspect": { - "version": "1.13.4", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", - "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/object-keys": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", - "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/object.assign": { - "version": "4.1.7", - "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.7.tgz", - "integrity": "sha512-nK28WOo+QIjBkDduTINE4JkF/UJJKyf2EJxvJKfblDpyg0Q+pkOHNTL0Qwy6NP6FhE/EnzV73BxxqcJaXY9anw==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.0.0", - "has-symbols": "^1.1.0", - "object-keys": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/on-finished": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", - "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", - "license": "MIT", - "dependencies": { - "ee-first": "1.1.1" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", - "license": "ISC", - "dependencies": { - "wrappy": "1" - } - }, - "node_modules/onetime": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", - "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", - "license": "MIT", - "dependencies": { - "mimic-fn": "^2.1.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/oniguruma-parser": { - "version": "0.12.1", - "resolved": "https://registry.npmjs.org/oniguruma-parser/-/oniguruma-parser-0.12.1.tgz", - "integrity": "sha512-8Unqkvk1RYc6yq2WBYRj4hdnsAxVze8i7iPfQr8e4uSP3tRv0rpZcbGUDvxfQQcdwHt/e9PrMvGCsa8OqG9X3w==", - "license": "MIT" - }, - "node_modules/oniguruma-to-es": { - "version": "4.3.5", - "resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.5.tgz", - "integrity": "sha512-Zjygswjpsewa0NLTsiizVuMQZbp0MDyM6lIt66OxsF21npUDlzpHi1Mgb/qhQdkb+dWFTzJmFbEWdvZgRho8eQ==", - "license": "MIT", - "dependencies": { - "oniguruma-parser": "^0.12.1", - "regex": "^6.1.0", - "regex-recursion": "^6.0.2" - } - }, - "node_modules/open": { - "version": "8.4.2", - "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz", - "integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==", - "license": "MIT", - "dependencies": { - "define-lazy-prop": "^2.0.0", - "is-docker": "^2.1.1", - "is-wsl": "^2.2.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/openapi-types": { - "version": "12.1.3", - "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz", - "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==", - "license": "MIT" - }, - "node_modules/openid-client": { - "version": "6.8.2", - "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.2.tgz", - "integrity": "sha512-uOvTCndr4udZsKihJ68H9bUICrriHdUVJ6Az+4Ns6cW55rwM5h0bjVIzDz2SxgOI84LKjFyjOFvERLzdTUROGA==", - "license": "MIT", - "dependencies": { - "jose": "^6.1.3", - "oauth4webapi": "^3.8.4" - }, - "funding": { - "url": "https://github.com/sponsors/panva" - } - }, - "node_modules/own-keys": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz", - "integrity": "sha512-qFOyK5PjiWZd+QQIh+1jhdb9LpxTF0qs7Pm8o5QHYZ0M3vKqSqzsZaEB6oWlxZ+q2sJBMI/Ktgd2N5ZwQoRHfg==", - "license": "MIT", - "dependencies": { - "get-intrinsic": "^1.2.6", - "object-keys": "^1.1.1", - "safe-push-apply": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/p-any": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/p-any/-/p-any-4.0.0.tgz", - "integrity": "sha512-S/B50s+pAVe0wmEZHmBs/9yJXeZ5KhHzOsgKzt0hRdgkoR3DxW9ts46fcsWi/r3VnzsnkKS7q4uimze+zjdryw==", - "license": "MIT", - "dependencies": { - "p-cancelable": "^3.0.0", - "p-some": "^6.0.0" - }, - "engines": { - "node": ">=12.20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-cancelable": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz", - "integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw==", - "license": "MIT", - "engines": { - "node": ">=12.20" - } - }, - "node_modules/p-some": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/p-some/-/p-some-6.0.0.tgz", - "integrity": "sha512-CJbQCKdfSX3fIh8/QKgS+9rjm7OBNUTmwWswAFQAhc8j1NR1dsEDETUEuVUtQHZpV+J03LqWBEwvu0g1Yn+TYg==", - "license": "MIT", - "dependencies": { - "aggregate-error": "^4.0.0", - "p-cancelable": "^3.0.0" - }, - "engines": { - "node": ">=12.20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-timeout": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-5.1.0.tgz", - "integrity": "sha512-auFDyzzzGZZZdHz3BtET9VEz0SE/uMEAx7uWfGPucfzEwwe/xH0iVeZibQmANYE/hp9T2+UUZT5m+BKyrDp3Ew==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/pac-proxy-agent": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", - "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==", - "license": "MIT", - "dependencies": { - "@tootallnate/quickjs-emscripten": "^0.23.0", - "agent-base": "^7.1.2", - "debug": "^4.3.4", - "get-uri": "^6.0.1", - "http-proxy-agent": "^7.0.0", - "https-proxy-agent": "^7.0.6", - "pac-resolver": "^7.0.1", - "socks-proxy-agent": "^8.0.5" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/pac-resolver": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", - "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", - "license": "MIT", - "dependencies": { - "degenerator": "^5.0.0", - "netmask": "^2.0.2" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/parent-module": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", - "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", - "license": "MIT", - "dependencies": { - "callsites": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/parse-entities": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz", - "integrity": "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "character-entities-legacy": "^3.0.0", - "character-reference-invalid": "^2.0.0", - "decode-named-character-reference": "^1.0.0", - "is-alphanumerical": "^2.0.0", - "is-decimal": "^2.0.0", - "is-hexadecimal": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/parse-entities/node_modules/@types/unist": { - "version": "2.0.11", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", - "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", - "license": "MIT" - }, - "node_modules/parse-json": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", - "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.0.0", - "error-ex": "^1.3.1", - "json-parse-even-better-errors": "^2.3.0", - "lines-and-columns": "^1.1.6" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/parse-latin": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/parse-latin/-/parse-latin-7.0.0.tgz", - "integrity": "sha512-mhHgobPPua5kZ98EF4HWiH167JWBfl4pvAIXXdbaVohtK7a6YBOy56kvhCqduqyo/f3yrHFWmqmiMg/BkBkYYQ==", - "license": "MIT", - "dependencies": { - "@types/nlcst": "^2.0.0", - "@types/unist": "^3.0.0", - "nlcst-to-string": "^4.0.0", - "unist-util-modify-children": "^4.0.0", - "unist-util-visit-children": "^3.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/parse5": { - "version": "7.3.0", - "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", - "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", - "license": "MIT", - "dependencies": { - "entities": "^6.0.0" - }, - "funding": { - "url": "https://github.com/inikulin/parse5?sponsor=1" - } - }, - "node_modules/parseurl": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", - "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/patch-console": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/patch-console/-/patch-console-2.0.0.tgz", - "integrity": "sha512-0YNdUceMdaQwoKce1gatDScmMo5pu/tfABfnzEqeG0gtTmd7mh/WcwgUjtAeOU7N8nFFlbQBnFK2gXW5fGvmMA==", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - } - }, - "node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/path-parse": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", - "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", - "license": "MIT" - }, - "node_modules/path-to-regexp": { - "version": "0.1.7", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", - "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==", - "license": "MIT" - }, - "node_modules/pend": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", - "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==", - "license": "MIT" - }, - "node_modules/picocolors": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", - "license": "ISC" - }, - "node_modules/picomatch": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", - "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", - "license": "MIT", - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/pify": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", - "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/pirates": { - "version": "4.0.7", - "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz", - "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/pony-cause": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/pony-cause/-/pony-cause-1.1.1.tgz", - "integrity": "sha512-PxkIc/2ZpLiEzQXu5YRDOUgBlfGYBY8156HY5ZcRAwwonMk5W/MrJP2LLkG/hF7GEQzaHo2aS7ho6ZLCOvf+6g==", - "license": "0BSD", - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/possible-typed-array-names": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz", - "integrity": "sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/postcss": { - "version": "8.5.6", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", - "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.11", - "picocolors": "^1.1.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/postcss-import": { - "version": "15.1.0", - "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz", - "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.0.0", - "read-cache": "^1.0.0", - "resolve": "^1.1.7" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "postcss": "^8.0.0" - } - }, - "node_modules/postcss-js": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.1.0.tgz", - "integrity": "sha512-oIAOTqgIo7q2EOwbhb8UalYePMvYoIeRY2YKntdpFQXNosSu3vLrniGgmH9OKs/qAkfoj5oB3le/7mINW1LCfw==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "camelcase-css": "^2.0.1" - }, - "engines": { - "node": "^12 || ^14 || >= 16" - }, - "peerDependencies": { - "postcss": "^8.4.21" - } - }, - "node_modules/postcss-load-config": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-6.0.1.tgz", - "integrity": "sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "lilconfig": "^3.1.1" - }, - "engines": { - "node": ">= 18" - }, - "peerDependencies": { - "jiti": ">=1.21.0", - "postcss": ">=8.0.9", - "tsx": "^4.8.1", - "yaml": "^2.4.2" - }, - "peerDependenciesMeta": { - "jiti": { - "optional": true - }, - "postcss": { - "optional": true - }, - "tsx": { - "optional": true - }, - "yaml": { - "optional": true - } - } - }, - "node_modules/postcss-nested": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz", - "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "postcss-selector-parser": "^6.1.1" - }, - "engines": { - "node": ">=12.0" - }, - "peerDependencies": { - "postcss": "^8.2.14" - } - }, - "node_modules/postcss-selector-parser": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", - "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-value-parser": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", - "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", - "license": "MIT" - }, - "node_modules/posthog-node": { - "version": "5.17.2", - "resolved": "https://registry.npmjs.org/posthog-node/-/posthog-node-5.17.2.tgz", - "integrity": "sha512-lz3YJOr0Nmiz0yHASaINEDHqoV+0bC3eD8aZAG+Ky292dAnVYul+ga/dMX8KCBXg8hHfKdxw0SztYD5j6dgUqQ==", - "license": "MIT", - "dependencies": { - "@posthog/core": "1.7.1" - }, - "engines": { - "node": ">=20" - } - }, - "node_modules/prebuild-install": { - "version": "7.1.3", - "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", - "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", - "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.", - "license": "MIT", - "optional": true, - "dependencies": { - "detect-libc": "^2.0.0", - "expand-template": "^2.0.3", - "github-from-package": "0.0.0", - "minimist": "^1.2.3", - "mkdirp-classic": "^0.5.3", - "napi-build-utils": "^2.0.0", - "node-abi": "^3.3.0", - "pump": "^3.0.0", - "rc": "^1.2.7", - "simple-get": "^4.0.0", - "tar-fs": "^2.0.0", - "tunnel-agent": "^0.6.0" - }, - "bin": { - "prebuild-install": "bin.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/progress": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", - "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", - "license": "MIT", - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/property-information": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz", - "integrity": "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/proxy-addr": { - "version": "2.0.7", - "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", - "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", - "license": "MIT", - "dependencies": { - "forwarded": "0.2.0", - "ipaddr.js": "1.9.1" - }, - "engines": { - "node": ">= 0.10" - } - }, - "node_modules/proxy-agent": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz", - "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==", - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "^4.3.4", - "http-proxy-agent": "^7.0.1", - "https-proxy-agent": "^7.0.6", - "lru-cache": "^7.14.1", - "pac-proxy-agent": "^7.1.0", - "proxy-from-env": "^1.1.0", - "socks-proxy-agent": "^8.0.5" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/proxy-from-env": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", - "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", - "license": "MIT" - }, - "node_modules/public-ip": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/public-ip/-/public-ip-5.0.0.tgz", - "integrity": "sha512-xaH3pZMni/R2BG7ZXXaWS9Wc9wFlhyDVJF47IJ+3ali0TGv+2PsckKxbmo+rnx3ZxiV2wblVhtdS3bohAP6GGw==", - "license": "MIT", - "dependencies": { - "dns-socket": "^4.2.2", - "got": "^12.0.0", - "is-ip": "^3.1.0" - }, - "engines": { - "node": "^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/public-ip/node_modules/got": { - "version": "12.6.1", - "resolved": "https://registry.npmjs.org/got/-/got-12.6.1.tgz", - "integrity": "sha512-mThBblvlAF1d4O5oqyvN+ZxLAYwIJK7bpMxgYqPD9okW0C3qm5FFn7k811QrcuEBwaogR3ngOFoCfs6mRv7teQ==", - "license": "MIT", - "dependencies": { - "@sindresorhus/is": "^5.2.0", - "@szmarczak/http-timer": "^5.0.1", - "cacheable-lookup": "^7.0.0", - "cacheable-request": "^10.2.8", - "decompress-response": "^6.0.0", - "form-data-encoder": "^2.1.2", - "get-stream": "^6.0.1", - "http2-wrapper": "^2.1.10", - "lowercase-keys": "^3.0.0", - "p-cancelable": "^3.0.0", - "responselike": "^3.0.0" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sindresorhus/got?sponsor=1" - } - }, - "node_modules/pump": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", - "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", - "license": "MIT", - "dependencies": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } - }, - "node_modules/puppeteer": { - "version": "22.14.0", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.14.0.tgz", - "integrity": "sha512-MGTR6/pM8zmWbTdazb6FKnwIihzsSEXBPH49mFFU96DNZpQOevCAZMnjBZGlZRGRzRK6aADCavR6SQtrbv5dQw==", - "deprecated": "< 24.15.0 is no longer supported", - "hasInstallScript": true, - "license": "Apache-2.0", - "dependencies": { - "@puppeteer/browsers": "2.3.0", - "cosmiconfig": "^9.0.0", - "devtools-protocol": "0.0.1312386", - "puppeteer-core": "22.14.0" - }, - "bin": { - "puppeteer": "lib/esm/puppeteer/node/cli.js" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/puppeteer-core": { - "version": "22.14.0", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.14.0.tgz", - "integrity": "sha512-rl4tOY5LcA3e374GAlsGGHc05HL3eGNf5rZ+uxkl6id9zVZKcwcp1Z+Nd6byb6WPiPeecT/dwz8f/iUm+AZQSw==", - "license": "Apache-2.0", - "dependencies": { - "@puppeteer/browsers": "2.3.0", - "chromium-bidi": "0.6.2", - "debug": "^4.3.5", - "devtools-protocol": "0.0.1312386", - "ws": "^8.18.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/qs": { - "version": "6.11.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", - "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==", - "license": "BSD-3-Clause", - "dependencies": { - "side-channel": "^1.0.4" - }, - "engines": { - "node": ">=0.6" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/quick-lru": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", - "integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/range-parser": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", - "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/raw-body": { - "version": "2.5.1", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz", - "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==", - "license": "MIT", - "dependencies": { - "bytes": "3.1.2", - "http-errors": "2.0.0", - "iconv-lite": "0.4.24", - "unpipe": "1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/raw-body/node_modules/iconv-lite": { - "version": "0.4.24", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", - "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/rc": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", - "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", - "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", - "optional": true, - "dependencies": { - "deep-extend": "^0.6.0", - "ini": "~1.3.0", - "minimist": "^1.2.0", - "strip-json-comments": "~2.0.1" - }, - "bin": { - "rc": "cli.js" - } - }, - "node_modules/react": { - "version": "19.2.3", - "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz", - "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react-reconciler": { - "version": "0.32.0", - "resolved": "https://registry.npmjs.org/react-reconciler/-/react-reconciler-0.32.0.tgz", - "integrity": "sha512-2NPMOzgTlG0ZWdIf3qG+dcbLSoAc/uLfOwckc3ofy5sSK0pLJqnQLpUFxvGcN2rlXSjnVtGeeFLNimCQEj5gOQ==", - "license": "MIT", - "dependencies": { - "scheduler": "^0.26.0" - }, - "engines": { - "node": ">=0.10.0" - }, - "peerDependencies": { - "react": "^19.1.0" - } - }, - "node_modules/react-remove-scroll": { - "version": "2.7.2", - "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.7.2.tgz", - "integrity": "sha512-Iqb9NjCCTt6Hf+vOdNIZGdTiH1QSqr27H/Ek9sv/a97gfueI/5h1s3yRi1nngzMUaOOToin5dI1dXKdXiF+u0Q==", - "license": "MIT", - "peer": true, - "dependencies": { - "react-remove-scroll-bar": "^2.3.7", - "react-style-singleton": "^2.2.3", - "tslib": "^2.1.0", - "use-callback-ref": "^1.3.3", - "use-sidecar": "^1.1.3" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-remove-scroll-bar": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz", - "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==", - "license": "MIT", - "peer": true, - "dependencies": { - "react-style-singleton": "^2.2.2", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-remove-scroll-bar/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true - }, - "node_modules/react-remove-scroll/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true - }, - "node_modules/react-style-singleton": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz", - "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "get-nonce": "^1.0.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-style-singleton/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true - }, - "node_modules/read-cache": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", - "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", - "license": "MIT", - "dependencies": { - "pify": "^2.3.0" - } - }, - "node_modules/readable-stream": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", - "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", - "license": "MIT", - "optional": true, - "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/readdirp": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", - "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", - "license": "MIT", - "dependencies": { - "picomatch": "^2.2.1" - }, - "engines": { - "node": ">=8.10.0" - } - }, - "node_modules/recma-build-jsx": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/recma-build-jsx/-/recma-build-jsx-1.0.0.tgz", - "integrity": "sha512-8GtdyqaBcDfva+GUKDr3nev3VpKAhup1+RvkMvUxURHpW7QyIvk9F5wz7Vzo06CEMSilw6uArgRqhpiUcWp8ew==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "estree-util-build-jsx": "^3.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/recma-jsx": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/recma-jsx/-/recma-jsx-1.0.1.tgz", - "integrity": "sha512-huSIy7VU2Z5OLv6oFLosQGGDqPqdO1iq6bWNAdhzMxSJP7RAso4fCZ1cKu8j9YHCZf3TPrq4dw3okhrylgcd7w==", - "license": "MIT", - "dependencies": { - "acorn-jsx": "^5.0.0", - "estree-util-to-js": "^2.0.0", - "recma-parse": "^1.0.0", - "recma-stringify": "^1.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - }, - "peerDependencies": { - "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" - } - }, - "node_modules/recma-parse": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/recma-parse/-/recma-parse-1.0.0.tgz", - "integrity": "sha512-OYLsIGBB5Y5wjnSnQW6t3Xg7q3fQ7FWbw/vcXtORTnyaSFscOtABg+7Pnz6YZ6c27fG1/aN8CjfwoUEUIdwqWQ==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "esast-util-from-js": "^2.0.0", - "unified": "^11.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/recma-stringify": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/recma-stringify/-/recma-stringify-1.0.0.tgz", - "integrity": "sha512-cjwII1MdIIVloKvC9ErQ+OgAtwHBmcZ0Bg4ciz78FtbT8In39aAYbaA7zvxQ61xVMSPE8WxhLwLbhif4Js2C+g==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "estree-util-to-js": "^2.0.0", - "unified": "^11.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/reflect.getprototypeof": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.10.tgz", - "integrity": "sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.9", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.0.0", - "get-intrinsic": "^1.2.7", - "get-proto": "^1.0.1", - "which-builtin-type": "^1.2.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/regex": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/regex/-/regex-6.1.0.tgz", - "integrity": "sha512-6VwtthbV4o/7+OaAF9I5L5V3llLEsoPyq9P1JVXkedTP33c7MfCG0/5NOPcSJn0TzXcG9YUrR0gQSWioew3LDg==", - "license": "MIT", - "dependencies": { - "regex-utilities": "^2.3.0" - } - }, - "node_modules/regex-recursion": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-6.0.2.tgz", - "integrity": "sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==", - "license": "MIT", - "dependencies": { - "regex-utilities": "^2.3.0" - } - }, - "node_modules/regex-utilities": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/regex-utilities/-/regex-utilities-2.3.0.tgz", - "integrity": "sha512-8VhliFJAWRaUiVvREIiW2NXXTmHs4vMNnSzuJVhscgmGav3g9VDxLrQndI3dZZVVdp0ZO/5v0xmX516/7M9cng==", - "license": "MIT" - }, - "node_modules/regexp.prototype.flags": { - "version": "1.5.4", - "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz", - "integrity": "sha512-dYqgNSZbDwkaJ2ceRd9ojCGjBq+mOm9LmtXnAnEGyHhN/5R7iDW2TRw3h+o/jCFxus3P2LfWIIiwowAjANm7IA==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-errors": "^1.3.0", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "set-function-name": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/rehype-katex": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/rehype-katex/-/rehype-katex-7.0.1.tgz", - "integrity": "sha512-OiM2wrZ/wuhKkigASodFoo8wimG3H12LWQaH8qSPVJn9apWKFSH3YOCtbKpBorTVw/eI7cuT21XBbvwEswbIOA==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/katex": "^0.16.0", - "hast-util-from-html-isomorphic": "^2.0.0", - "hast-util-to-text": "^4.0.0", - "katex": "^0.16.0", - "unist-util-visit-parents": "^6.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/rehype-minify-whitespace": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/rehype-minify-whitespace/-/rehype-minify-whitespace-6.0.2.tgz", - "integrity": "sha512-Zk0pyQ06A3Lyxhe9vGtOtzz3Z0+qZ5+7icZ/PL/2x1SHPbKao5oB/g/rlc6BCTajqBb33JcOe71Ye1oFsuYbnw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "hast-util-minify-whitespace": "^1.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/rehype-parse": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/rehype-parse/-/rehype-parse-9.0.1.tgz", - "integrity": "sha512-ksCzCD0Fgfh7trPDxr2rSylbwq9iYDkSn8TCDmEJ49ljEUBxDVCzCHv7QNzZOfODanX4+bWQ4WZqLCRWYLfhag==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "hast-util-from-html": "^2.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/rehype-recma": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/rehype-recma/-/rehype-recma-1.0.0.tgz", - "integrity": "sha512-lqA4rGUf1JmacCNWWZx0Wv1dHqMwxzsDWYMTowuplHF3xH0N/MmrZ/G3BDZnzAkRmxDadujCjaKM2hqYdCBOGw==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "@types/hast": "^3.0.0", - "hast-util-to-estree": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/rehype-stringify": { - "version": "10.0.1", - "resolved": "https://registry.npmjs.org/rehype-stringify/-/rehype-stringify-10.0.1.tgz", - "integrity": "sha512-k9ecfXHmIPuFVI61B9DeLPN0qFHfawM6RsuX48hoqlaKSF61RskNjSm1lI8PhBEM0MRdLxVVm4WmTqJQccH9mA==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "hast-util-to-html": "^9.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark": { - "version": "15.0.1", - "resolved": "https://registry.npmjs.org/remark/-/remark-15.0.1.tgz", - "integrity": "sha512-Eht5w30ruCXgFmxVUSlNWQ9iiimq07URKeFS3hNc8cUWy1llX4KDWfyEDZRycMc+znsN9Ux5/tJ/BFdgdOwA3A==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "remark-parse": "^11.0.0", - "remark-stringify": "^11.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-frontmatter": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/remark-frontmatter/-/remark-frontmatter-5.0.0.tgz", - "integrity": "sha512-XTFYvNASMe5iPN0719nPrdItC9aU0ssC4v14mH1BCi1u0n1gAocqcujWUrByftZTbLhRtiKRyjYTSIOcr69UVQ==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-frontmatter": "^2.0.0", - "micromark-extension-frontmatter": "^2.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-gfm": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.0.tgz", - "integrity": "sha512-U92vJgBPkbw4Zfu/IiW2oTZLSL3Zpv+uI7My2eq8JxKgqraFdU8YUGicEJCEgSbeaG+QDFqIcwwfMTOEelPxuA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-gfm": "^3.0.0", - "micromark-extension-gfm": "^3.0.0", - "remark-parse": "^11.0.0", - "remark-stringify": "^11.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-math": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/remark-math/-/remark-math-6.0.0.tgz", - "integrity": "sha512-MMqgnP74Igy+S3WwnhQ7kqGlEerTETXMvJhrUzDikVZ2/uogJCb+WHUg97hK9/jcfc0dkD73s3LN8zU49cTEtA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-math": "^3.0.0", - "micromark-extension-math": "^3.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-mdx": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/remark-mdx/-/remark-mdx-3.1.0.tgz", - "integrity": "sha512-Ngl/H3YXyBV9RcRNdlYsZujAmhsxwzxpDzpDEhFBVAGthS4GDgnctpDjgFl/ULx5UEDzqtW1cyBSNKqYYrqLBA==", - "license": "MIT", - "dependencies": { - "mdast-util-mdx": "^3.0.0", - "micromark-extension-mdxjs": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-mdx-remove-esm": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/remark-mdx-remove-esm/-/remark-mdx-remove-esm-1.3.1.tgz", - "integrity": "sha512-POa8abdiuicD2e+zQkclxzJa5JEGLtV8XIOFVvisnGuw4l4xd6dfQozedwqR8JTeXQmxLebvYhlbwHoQP9RWkw==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.4", - "mdast-util-mdxjs-esm": "^2.0.1", - "unist-util-remove": "^4.0.0" - }, - "peerDependencies": { - "unified": "^11" - } - }, - "node_modules/remark-parse": { - "version": "11.0.0", - "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", - "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-from-markdown": "^2.0.0", - "micromark-util-types": "^2.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-rehype": { - "version": "11.1.1", - "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.1.tgz", - "integrity": "sha512-g/osARvjkBXb6Wo0XvAeXQohVta8i84ACbenPpoSsxTOQH/Ae0/RGP4WZgnMH5pMLpsj4FG7OHmcIcXxpza8eQ==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "mdast-util-to-hast": "^13.0.0", - "unified": "^11.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-smartypants": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/remark-smartypants/-/remark-smartypants-3.0.2.tgz", - "integrity": "sha512-ILTWeOriIluwEvPjv67v7Blgrcx+LZOkAUVtKI3putuhlZm84FnqDORNXPPm+HY3NdZOMhyDwZ1E+eZB/Df5dA==", - "license": "MIT", - "dependencies": { - "retext": "^9.0.0", - "retext-smartypants": "^6.0.0", - "unified": "^11.0.4", - "unist-util-visit": "^5.0.0" - }, - "engines": { - "node": ">=16.0.0" - } - }, - "node_modules/remark-stringify": { - "version": "11.0.0", - "resolved": "https://registry.npmjs.org/remark-stringify/-/remark-stringify-11.0.0.tgz", - "integrity": "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-to-markdown": "^2.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/require-from-string": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", - "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/resolve": { - "version": "1.22.11", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.11.tgz", - "integrity": "sha512-RfqAvLnMl313r7c9oclB1HhUEAezcpLjz95wFH4LVuhk9JF/r22qmVP9AMmOU4vMX7Q8pN8jwNg/CSpdFnMjTQ==", - "license": "MIT", - "dependencies": { - "is-core-module": "^2.16.1", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve-alpn": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz", - "integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g==", - "license": "MIT" - }, - "node_modules/resolve-from": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", - "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/responselike": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz", - "integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==", - "license": "MIT", - "dependencies": { - "lowercase-keys": "^3.0.0" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/restore-cursor": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-4.0.0.tgz", - "integrity": "sha512-I9fPXU9geO9bHOt9pHHOhOkYerIMsmVaWB0rA2AI9ERh/+x/i7MV5HKBNrg+ljO5eoPVgCcnFuRjJ9uH6I/3eg==", - "license": "MIT", - "dependencies": { - "onetime": "^5.1.0", - "signal-exit": "^3.0.2" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/restore-cursor/node_modules/signal-exit": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", - "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", - "license": "ISC" - }, - "node_modules/retext": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/retext/-/retext-9.0.0.tgz", - "integrity": "sha512-sbMDcpHCNjvlheSgMfEcVrZko3cDzdbe1x/e7G66dFp0Ff7Mldvi2uv6JkJQzdRcvLYE8CA8Oe8siQx8ZOgTcA==", - "license": "MIT", - "dependencies": { - "@types/nlcst": "^2.0.0", - "retext-latin": "^4.0.0", - "retext-stringify": "^4.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/retext-latin": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/retext-latin/-/retext-latin-4.0.0.tgz", - "integrity": "sha512-hv9woG7Fy0M9IlRQloq/N6atV82NxLGveq+3H2WOi79dtIYWN8OaxogDm77f8YnVXJL2VD3bbqowu5E3EMhBYA==", - "license": "MIT", - "dependencies": { - "@types/nlcst": "^2.0.0", - "parse-latin": "^7.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/retext-smartypants": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/retext-smartypants/-/retext-smartypants-6.2.0.tgz", - "integrity": "sha512-kk0jOU7+zGv//kfjXEBjdIryL1Acl4i9XNkHxtM7Tm5lFiCog576fjNC9hjoR7LTKQ0DsPWy09JummSsH1uqfQ==", - "license": "MIT", - "dependencies": { - "@types/nlcst": "^2.0.0", - "nlcst-to-string": "^4.0.0", - "unist-util-visit": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/retext-stringify": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/retext-stringify/-/retext-stringify-4.0.0.tgz", - "integrity": "sha512-rtfN/0o8kL1e+78+uxPTqu1Klt0yPzKuQ2BfWwwfgIUSayyzxpM1PJzkKt4V8803uB9qSy32MvI7Xep9khTpiA==", - "license": "MIT", - "dependencies": { - "@types/nlcst": "^2.0.0", - "nlcst-to-string": "^4.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/reusify": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", - "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", - "license": "MIT", - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" - } - }, - "node_modules/run-async": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/run-async/-/run-async-3.0.0.tgz", - "integrity": "sha512-540WwVDOMxA6dN6We19EcT9sc3hkXPw5mzRNGM3FkdN/vtE9NFvj5lFAPNwUDmJjXidm3v7TC1cTE7t17Ulm1Q==", - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "queue-microtask": "^1.2.2" - } - }, - "node_modules/rxjs": { - "version": "7.8.2", - "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.2.tgz", - "integrity": "sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==", - "license": "Apache-2.0", - "dependencies": { - "tslib": "^2.1.0" - } - }, - "node_modules/rxjs/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/safe-array-concat": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.3.tgz", - "integrity": "sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.2", - "get-intrinsic": "^1.2.6", - "has-symbols": "^1.1.0", - "isarray": "^2.0.5" - }, - "engines": { - "node": ">=0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/safe-buffer": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", - "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/safe-push-apply": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/safe-push-apply/-/safe-push-apply-1.0.0.tgz", - "integrity": "sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "isarray": "^2.0.5" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/safe-regex-test": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.1.0.tgz", - "integrity": "sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "is-regex": "^1.2.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/safe-stable-stringify": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-1.1.1.tgz", - "integrity": "sha512-ERq4hUjKDbJfE4+XtZLFPCDi8Vb1JqaxAPTxWFLBx8XcAlf9Bda/ZJdVezs/NAfsMQScyIlUMx+Yeu7P7rx5jw==", - "license": "MIT" - }, - "node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "license": "MIT" - }, - "node_modules/sax": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/sax/-/sax-1.6.0.tgz", - "integrity": "sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA==", - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=11.0.0" - } - }, - "node_modules/scheduler": { - "version": "0.26.0", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz", - "integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==", - "license": "MIT" - }, - "node_modules/semver": { - "version": "7.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz", - "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==", - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/send": { - "version": "0.18.0", - "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", - "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", - "license": "MIT", - "dependencies": { - "debug": "2.6.9", - "depd": "2.0.0", - "destroy": "1.2.0", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "fresh": "0.5.2", - "http-errors": "2.0.0", - "mime": "1.6.0", - "ms": "2.1.3", - "on-finished": "2.4.1", - "range-parser": "~1.2.1", - "statuses": "2.0.1" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/send/node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "license": "MIT", - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/send/node_modules/debug/node_modules/ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", - "license": "MIT" - }, - "node_modules/serialize-error": { - "version": "13.0.1", - "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-13.0.1.tgz", - "integrity": "sha512-bBZaRwLH9PN5HbLCjPId4dP5bNGEtumcErgOX952IsvOhVPrm3/AeK1y0UHA/QaPG701eg0yEnOKsCOC6X/kaA==", - "license": "MIT", - "dependencies": { - "non-error": "^0.1.0", - "type-fest": "^5.4.1" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/serialize-error/node_modules/type-fest": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-5.5.0.tgz", - "integrity": "sha512-PlBfpQwiUvGViBNX84Yxwjsdhd1TUlXr6zjX7eoirtCPIr08NAmxwa+fcYBTeRQxHo9YC9wwF3m9i700sHma8g==", - "license": "(MIT OR CC0-1.0)", - "dependencies": { - "tagged-tag": "^1.0.0" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/serve-static": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz", - "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==", - "license": "MIT", - "dependencies": { - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "parseurl": "~1.3.3", - "send": "0.18.0" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/set-function-length": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", - "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", - "license": "MIT", - "dependencies": { - "define-data-property": "^1.1.4", - "es-errors": "^1.3.0", - "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.4", - "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/set-function-name": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.2.tgz", - "integrity": "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ==", - "license": "MIT", - "dependencies": { - "define-data-property": "^1.1.4", - "es-errors": "^1.3.0", - "functions-have-names": "^1.2.3", - "has-property-descriptors": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/set-proto": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/set-proto/-/set-proto-1.0.0.tgz", - "integrity": "sha512-RJRdvCo6IAnPdsvP/7m6bsQqNnn1FCBX5ZNtFL98MmFF/4xAIJTIg1YbHW5DC2W5SKZanrC6i4HsJqlajw/dZw==", - "license": "MIT", - "dependencies": { - "dunder-proto": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/setprototypeof": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", - "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", - "license": "ISC" - }, - "node_modules/sharp": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", - "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==", - "hasInstallScript": true, - "license": "Apache-2.0", - "dependencies": { - "@img/colour": "^1.0.0", - "detect-libc": "^2.1.2", - "semver": "^7.7.3" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-darwin-arm64": "0.34.5", - "@img/sharp-darwin-x64": "0.34.5", - "@img/sharp-libvips-darwin-arm64": "1.2.4", - "@img/sharp-libvips-darwin-x64": "1.2.4", - "@img/sharp-libvips-linux-arm": "1.2.4", - "@img/sharp-libvips-linux-arm64": "1.2.4", - "@img/sharp-libvips-linux-ppc64": "1.2.4", - "@img/sharp-libvips-linux-riscv64": "1.2.4", - "@img/sharp-libvips-linux-s390x": "1.2.4", - "@img/sharp-libvips-linux-x64": "1.2.4", - "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", - "@img/sharp-libvips-linuxmusl-x64": "1.2.4", - "@img/sharp-linux-arm": "0.34.5", - "@img/sharp-linux-arm64": "0.34.5", - "@img/sharp-linux-ppc64": "0.34.5", - "@img/sharp-linux-riscv64": "0.34.5", - "@img/sharp-linux-s390x": "0.34.5", - "@img/sharp-linux-x64": "0.34.5", - "@img/sharp-linuxmusl-arm64": "0.34.5", - "@img/sharp-linuxmusl-x64": "0.34.5", - "@img/sharp-wasm32": "0.34.5", - "@img/sharp-win32-arm64": "0.34.5", - "@img/sharp-win32-ia32": "0.34.5", - "@img/sharp-win32-x64": "0.34.5" - } - }, - "node_modules/sharp-ico": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/sharp-ico/-/sharp-ico-0.1.5.tgz", - "integrity": "sha512-a3jODQl82NPp1d5OYb0wY+oFaPk7AvyxipIowCHk7pBsZCWgbe0yAkU2OOXdoH0ENyANhyOQbs9xkAiRHcF02Q==", - "license": "MIT", - "dependencies": { - "decode-ico": "*", - "ico-endec": "*", - "sharp": "*" - } - }, - "node_modules/sharp/node_modules/semver": { - "version": "7.7.4", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", - "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "license": "MIT", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/shiki": { - "version": "3.23.0", - "resolved": "https://registry.npmjs.org/shiki/-/shiki-3.23.0.tgz", - "integrity": "sha512-55Dj73uq9ZXL5zyeRPzHQsK7Nbyt6Y10k5s7OjuFZGMhpp4r/rsLBH0o/0fstIzX1Lep9VxefWljK/SKCzygIA==", - "license": "MIT", - "dependencies": { - "@shikijs/core": "3.23.0", - "@shikijs/engine-javascript": "3.23.0", - "@shikijs/engine-oniguruma": "3.23.0", - "@shikijs/langs": "3.23.0", - "@shikijs/themes": "3.23.0", - "@shikijs/types": "3.23.0", - "@shikijs/vscode-textmate": "^10.0.2", - "@types/hast": "^3.0.4" - } - }, - "node_modules/side-channel": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", - "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "object-inspect": "^1.13.3", - "side-channel-list": "^1.0.0", - "side-channel-map": "^1.0.1", - "side-channel-weakmap": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/side-channel-list": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", - "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "object-inspect": "^1.13.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/side-channel-map": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", - "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.5", - "object-inspect": "^1.13.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/side-channel-weakmap": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", - "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.5", - "object-inspect": "^1.13.3", - "side-channel-map": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/signal-exit": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", - "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/simple-concat": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", - "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "optional": true - }, - "node_modules/simple-eval": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/simple-eval/-/simple-eval-1.0.1.tgz", - "integrity": "sha512-LH7FpTAkeD+y5xQC4fzS+tFtaNlvt3Ib1zKzvhjv/Y+cioV4zIuw4IZr2yhRLu67CWL7FR9/6KXKnjRoZTvGGQ==", - "license": "MIT", - "dependencies": { - "jsep": "^1.3.6" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/simple-get": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", - "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "optional": true, - "dependencies": { - "decompress-response": "^6.0.0", - "once": "^1.3.1", - "simple-concat": "^1.0.0" - } - }, - "node_modules/simple-swizzle": { - "version": "0.2.4", - "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz", - "integrity": "sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==", - "license": "MIT", - "dependencies": { - "is-arrayish": "^0.3.1" - } - }, - "node_modules/simple-swizzle/node_modules/is-arrayish": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz", - "integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==", - "license": "MIT" - }, - "node_modules/slice-ansi": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-7.1.2.tgz", - "integrity": "sha512-iOBWFgUX7caIZiuutICxVgX1SdxwAVFFKwt1EvMYYec/NWO5meOJ6K5uQxhrYBdQJne4KxiqZc+KptFOWFSI9w==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.2.1", - "is-fullwidth-code-point": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/chalk/slice-ansi?sponsor=1" - } - }, - "node_modules/smart-buffer": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", - "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", - "license": "MIT", - "engines": { - "node": ">= 6.0.0", - "npm": ">= 3.0.0" - } - }, - "node_modules/socket.io": { - "version": "4.7.2", - "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.7.2.tgz", - "integrity": "sha512-bvKVS29/I5fl2FGLNHuXlQaUH/BlzX1IN6S+NKLNZpBsPZIDH+90eQmCs2Railn4YUiww4SzUedJ6+uzwFnKLw==", - "license": "MIT", - "dependencies": { - "accepts": "~1.3.4", - "base64id": "~2.0.0", - "cors": "~2.8.5", - "debug": "~4.3.2", - "engine.io": "~6.5.2", - "socket.io-adapter": "~2.5.2", - "socket.io-parser": "~4.2.4" - }, - "engines": { - "node": ">=10.2.0" - } - }, - "node_modules/socket.io-adapter": { - "version": "2.5.6", - "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.6.tgz", - "integrity": "sha512-DkkO/dz7MGln0dHn5bmN3pPy+JmywNICWrJqVWiVOyvXjWQFIv9c2h24JrQLLFJ2aQVQf/Cvl1vblnd4r2apLQ==", - "license": "MIT", - "dependencies": { - "debug": "~4.4.1", - "ws": "~8.18.3" - } - }, - "node_modules/socket.io-adapter/node_modules/ws": { - "version": "8.18.3", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", - "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/socket.io-parser": { - "version": "4.2.6", - "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.6.tgz", - "integrity": "sha512-asJqbVBDsBCJx0pTqw3WfesSY0iRX+2xzWEWzrpcH7L6fLzrhyF8WPI8UaeM4YCuDfpwA/cgsdugMsmtz8EJeg==", - "license": "MIT", - "dependencies": { - "@socket.io/component-emitter": "~3.1.0", - "debug": "~4.4.1" - }, - "engines": { - "node": ">=10.0.0" - } - }, - "node_modules/socket.io/node_modules/debug": { - "version": "4.3.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", - "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/socks": { - "version": "2.8.7", - "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", - "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", - "license": "MIT", - "dependencies": { - "ip-address": "^10.0.1", - "smart-buffer": "^4.2.0" - }, - "engines": { - "node": ">= 10.0.0", - "npm": ">= 3.0.0" - } - }, - "node_modules/socks-proxy-agent": { - "version": "8.0.5", - "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", - "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "^4.3.4", - "socks": "^2.8.3" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/source-map": { - "version": "0.7.6", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.6.tgz", - "integrity": "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ==", - "license": "BSD-3-Clause", - "engines": { - "node": ">= 12" - } - }, - "node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/space-separated-tokens": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", - "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/sprintf-js": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", - "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", - "license": "BSD-3-Clause" - }, - "node_modules/stack-utils": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz", - "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==", - "license": "MIT", - "dependencies": { - "escape-string-regexp": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/stack-utils/node_modules/escape-string-regexp": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz", - "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/statuses": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", - "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/stop-iteration-iterator": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/stop-iteration-iterator/-/stop-iteration-iterator-1.1.0.tgz", - "integrity": "sha512-eLoXW/DHyl62zxY4SCaIgnRhuMr6ri4juEYARS8E6sCEqzKpOiE521Ucofdx+KnDZl5xmvGYaaKCk5FEOxJCoQ==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "internal-slot": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/streamx": { - "version": "2.25.0", - "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.25.0.tgz", - "integrity": "sha512-0nQuG6jf1w+wddNEEXCF4nTg3LtufWINB5eFEN+5TNZW7KWJp6x87+JFL43vaAUPyCfH1wID+mNVyW6OHtFamg==", - "license": "MIT", - "dependencies": { - "events-universal": "^1.0.0", - "fast-fifo": "^1.3.2", - "text-decoder": "^1.1.0" - } - }, - "node_modules/string_decoder": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", - "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", - "license": "MIT", - "optional": true, - "dependencies": { - "safe-buffer": "~5.2.0" - } - }, - "node_modules/string-width": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz", - "integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^10.3.0", - "get-east-asian-width": "^1.0.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/string.prototype.trim": { - "version": "1.2.10", - "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.10.tgz", - "integrity": "sha512-Rs66F0P/1kedk5lyYyH9uBzuiI/kNRmwJAR9quK6VOtIpZ2G+hMZd+HQbbv25MgCA6gEffoMZYxlTod4WcdrKA==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.2", - "define-data-property": "^1.1.4", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.5", - "es-object-atoms": "^1.0.0", - "has-property-descriptors": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/string.prototype.trimend": { - "version": "1.0.9", - "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.9.tgz", - "integrity": "sha512-G7Ok5C6E/j4SGfyLCloXTrngQIQU3PWtXGst3yM7Bea9FRURf1S42ZHlZZtsNque2FN2PoUhfZXYLNWwEr4dLQ==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.2", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/string.prototype.trimstart": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz", - "integrity": "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/stringify-entities": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", - "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==", - "license": "MIT", - "dependencies": { - "character-entities-html4": "^2.0.0", - "character-entities-legacy": "^3.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/strip-ansi": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.2.0.tgz", - "integrity": "sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.2.2" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/strip-json-comments": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", - "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", - "license": "MIT", - "optional": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/style-to-js": { - "version": "1.1.21", - "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.21.tgz", - "integrity": "sha512-RjQetxJrrUJLQPHbLku6U/ocGtzyjbJMP9lCNK7Ag0CNh690nSH8woqWH9u16nMjYBAok+i7JO1NP2pOy8IsPQ==", - "license": "MIT", - "dependencies": { - "style-to-object": "1.0.14" - } - }, - "node_modules/style-to-object": { - "version": "1.0.14", - "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.14.tgz", - "integrity": "sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw==", - "license": "MIT", - "dependencies": { - "inline-style-parser": "0.2.7" - } - }, - "node_modules/sucrase": { - "version": "3.35.1", - "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz", - "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==", - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.2", - "commander": "^4.0.0", - "lines-and-columns": "^1.1.6", - "mz": "^2.7.0", - "pirates": "^4.0.1", - "tinyglobby": "^0.2.11", - "ts-interface-checker": "^0.1.9" - }, - "bin": { - "sucrase": "bin/sucrase", - "sucrase-node": "bin/sucrase-node" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/sucrase/node_modules/commander": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", - "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/supports-preserve-symlinks-flag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", - "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/tagged-tag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/tagged-tag/-/tagged-tag-1.0.0.tgz", - "integrity": "sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng==", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/tailwindcss": { - "version": "3.4.19", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.19.tgz", - "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==", - "license": "MIT", - "dependencies": { - "@alloc/quick-lru": "^5.2.0", - "arg": "^5.0.2", - "chokidar": "^3.6.0", - "didyoumean": "^1.2.2", - "dlv": "^1.1.3", - "fast-glob": "^3.3.2", - "glob-parent": "^6.0.2", - "is-glob": "^4.0.3", - "jiti": "^1.21.7", - "lilconfig": "^3.1.3", - "micromatch": "^4.0.8", - "normalize-path": "^3.0.0", - "object-hash": "^3.0.0", - "picocolors": "^1.1.1", - "postcss": "^8.4.47", - "postcss-import": "^15.1.0", - "postcss-js": "^4.0.1", - "postcss-load-config": "^4.0.2 || ^5.0 || ^6.0", - "postcss-nested": "^6.2.0", - "postcss-selector-parser": "^6.1.2", - "resolve": "^1.22.8", - "sucrase": "^3.35.0" - }, - "bin": { - "tailwind": "lib/cli.js", - "tailwindcss": "lib/cli.js" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/tailwindcss/node_modules/chokidar": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", - "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", - "license": "MIT", - "dependencies": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" - }, - "engines": { - "node": ">= 8.10.0" - }, - "funding": { - "url": "https://paulmillr.com/funding/" - }, - "optionalDependencies": { - "fsevents": "~2.3.2" - } - }, - "node_modules/tailwindcss/node_modules/chokidar/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/tailwindcss/node_modules/glob-parent": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", - "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.3" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/tar": { - "version": "6.1.15", - "resolved": "https://registry.npmjs.org/tar/-/tar-6.1.15.tgz", - "integrity": "sha512-/zKt9UyngnxIT/EAGYuxaMYgOIJiP81ab9ZfkILq4oNLPFX50qyYmu7jRj9qeXoxmJHjGlbH0+cm2uy1WCs10A==", - "deprecated": "Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", - "license": "ISC", - "dependencies": { - "chownr": "^2.0.0", - "fs-minipass": "^2.0.0", - "minipass": "^5.0.0", - "minizlib": "^2.1.1", - "mkdirp": "^1.0.3", - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/tar-fs": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", - "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", - "license": "MIT", - "optional": true, - "dependencies": { - "chownr": "^1.1.1", - "mkdirp-classic": "^0.5.2", - "pump": "^3.0.0", - "tar-stream": "^2.1.4" - } - }, - "node_modules/tar-fs/node_modules/chownr": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", - "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", - "license": "ISC", - "optional": true - }, - "node_modules/tar-stream": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", - "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", - "license": "MIT", - "optional": true, - "dependencies": { - "bl": "^4.0.3", - "end-of-stream": "^1.4.1", - "fs-constants": "^1.0.0", - "inherits": "^2.0.3", - "readable-stream": "^3.1.1" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/teex": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/teex/-/teex-1.0.1.tgz", - "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==", - "license": "MIT", - "dependencies": { - "streamx": "^2.12.5" - } - }, - "node_modules/text-decoder": { - "version": "1.2.7", - "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", - "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==", - "license": "Apache-2.0", - "dependencies": { - "b4a": "^1.6.4" - } - }, - "node_modules/thenify": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", - "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0" - } - }, - "node_modules/thenify-all": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", - "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", - "license": "MIT", - "dependencies": { - "thenify": ">= 3.1.0 < 4" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/through": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", - "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==", - "license": "MIT" - }, - "node_modules/tinyglobby": { - "version": "0.2.16", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz", - "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==", - "license": "MIT", - "dependencies": { - "fdir": "^6.5.0", - "picomatch": "^4.0.4" - }, - "engines": { - "node": ">=12.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/SuperchupuDev" - } - }, - "node_modules/tinyglobby/node_modules/fdir": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", - "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", - "license": "MIT", - "engines": { - "node": ">=12.0.0" - }, - "peerDependencies": { - "picomatch": "^3 || ^4" - }, - "peerDependenciesMeta": { - "picomatch": { - "optional": true - } - } - }, - "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/to-data-view": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/to-data-view/-/to-data-view-1.1.0.tgz", - "integrity": "sha512-1eAdufMg6mwgmlojAx3QeMnzB/BTVp7Tbndi3U7ftcT2zCZadjxkkmLmd97zmaxWi+sgGcgWrokmpEoy0Dn0vQ==", - "license": "MIT" - }, - "node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "license": "MIT", - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/toidentifier": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", - "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", - "license": "MIT", - "engines": { - "node": ">=0.6" - } - }, - "node_modules/tr46": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", - "license": "MIT" - }, - "node_modules/trim-lines": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", - "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/trim-trailing-lines": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/trim-trailing-lines/-/trim-trailing-lines-2.1.0.tgz", - "integrity": "sha512-5UR5Biq4VlVOtzqkm2AZlgvSlDJtME46uV0br0gENbwN4l5+mMKT4b9gJKqWtuL2zAIqajGJGuvbCbcAJUZqBg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/trough": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", - "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/ts-interface-checker": { - "version": "0.1.13", - "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", - "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==", - "license": "Apache-2.0" - }, - "node_modules/tslib": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", - "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", - "license": "0BSD" - }, - "node_modules/tunnel-agent": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", - "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", - "license": "Apache-2.0", - "optional": true, - "dependencies": { - "safe-buffer": "^5.0.1" - }, - "engines": { - "node": "*" - } - }, - "node_modules/twoslash": { - "version": "0.3.6", - "resolved": "https://registry.npmjs.org/twoslash/-/twoslash-0.3.6.tgz", - "integrity": "sha512-VuI5OKl+MaUO9UIW3rXKoPgHI3X40ZgB/j12VY6h98Ae1mCBihjPvhOPeJWlxCYcmSbmeZt5ZKkK0dsVtp+6pA==", - "license": "MIT", - "dependencies": { - "@typescript/vfs": "^1.6.2", - "twoslash-protocol": "0.3.6" - }, - "peerDependencies": { - "typescript": "^5.5.0" - } - }, - "node_modules/twoslash-protocol": { - "version": "0.3.6", - "resolved": "https://registry.npmjs.org/twoslash-protocol/-/twoslash-protocol-0.3.6.tgz", - "integrity": "sha512-FHGsJ9Q+EsNr5bEbgG3hnbkvEBdW5STgPU824AHUjB4kw0Dn4p8tABT7Ncg1Ie6V0+mDg3Qpy41VafZXcQhWMA==", - "license": "MIT" - }, - "node_modules/type-fest": { - "version": "4.41.0", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", - "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==", - "license": "(MIT OR CC0-1.0)", - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/type-is": { - "version": "1.6.18", - "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", - "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", - "license": "MIT", - "dependencies": { - "media-typer": "0.3.0", - "mime-types": "~2.1.24" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/typed-array-buffer": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.3.tgz", - "integrity": "sha512-nAYYwfY3qnzX30IkA6AQZjVbtK6duGontcQm1WSG1MD94YLqK0515GNApXkoxKOWMusVssAHWLh9SeaoefYFGw==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "is-typed-array": "^1.1.14" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/typed-array-byte-length": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.3.tgz", - "integrity": "sha512-BaXgOuIxz8n8pIq3e7Atg/7s+DpiYrxn4vdot3w9KbnBhcRQq6o3xemQdIfynqSeXeDrF32x+WvfzmOjPiY9lg==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "for-each": "^0.3.3", - "gopd": "^1.2.0", - "has-proto": "^1.2.0", - "is-typed-array": "^1.1.14" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/typed-array-byte-offset": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.4.tgz", - "integrity": "sha512-bTlAFB/FBYMcuX81gbL4OcpH5PmlFHqlCCpAl8AlEzMz5k53oNDvN8p1PNOWLEmI2x4orp3raOFB51tv9X+MFQ==", - "license": "MIT", - "dependencies": { - "available-typed-arrays": "^1.0.7", - "call-bind": "^1.0.8", - "for-each": "^0.3.3", - "gopd": "^1.2.0", - "has-proto": "^1.2.0", - "is-typed-array": "^1.1.15", - "reflect.getprototypeof": "^1.0.9" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/typed-array-length": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.7.tgz", - "integrity": "sha512-3KS2b+kL7fsuk/eJZ7EQdnEmQoaho/r6KUef7hxvltNA5DR8NAUM+8wJMbJyZ4G9/7i3v5zPBIMN5aybAh2/Jg==", - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "for-each": "^0.3.3", - "gopd": "^1.0.1", - "is-typed-array": "^1.1.13", - "possible-typed-array-names": "^1.0.0", - "reflect.getprototypeof": "^1.0.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/typescript": { - "version": "5.9.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", - "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", - "license": "Apache-2.0", - "peer": true, - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, - "node_modules/unbox-primitive": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.1.0.tgz", - "integrity": "sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-bigints": "^1.0.2", - "has-symbols": "^1.1.0", - "which-boxed-primitive": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/unbzip2-stream": { - "version": "1.4.3", - "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz", - "integrity": "sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==", - "license": "MIT", - "dependencies": { - "buffer": "^5.2.1", - "through": "^2.3.8" - } - }, - "node_modules/undici-types": { - "version": "7.18.2", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", - "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", - "license": "MIT" - }, - "node_modules/unified": { - "version": "11.0.5", - "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", - "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "bail": "^2.0.0", - "devlop": "^1.0.0", - "extend": "^3.0.0", - "is-plain-obj": "^4.0.0", - "trough": "^2.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-builder": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unist-builder/-/unist-builder-4.0.0.tgz", - "integrity": "sha512-wmRFnH+BLpZnTKpc5L7O67Kac89s9HMrtELpnNaE6TAobq5DTZZs5YaTQfAZBA9bFPECx2uVAPO31c+GVug8mg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-find-after": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-5.0.0.tgz", - "integrity": "sha512-amQa0Ep2m6hE2g72AugUItjbuM8X8cGQnFoHk0pGfrFeT9GZhzN5SW8nRsiGKK7Aif4CrACPENkA6P/Lw6fHGQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-is": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-is": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.1.tgz", - "integrity": "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-map": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unist-util-map/-/unist-util-map-4.0.0.tgz", - "integrity": "sha512-HJs1tpkSmRJUzj6fskQrS5oYhBYlmtcvy4SepdDEEsL04FjBrgF0Mgggvxc1/qGBGgW7hRh9+UBK1aqTEnBpIA==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-modify-children": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unist-util-modify-children/-/unist-util-modify-children-4.0.0.tgz", - "integrity": "sha512-+tdN5fGNddvsQdIzUF3Xx82CU9sMM+fA0dLgR9vOmT0oPT2jH+P1nd5lSqfCfXAw+93NhcXNY2qqvTUtE4cQkw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "array-iterate": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-position": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz", - "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-position-from-estree": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/unist-util-position-from-estree/-/unist-util-position-from-estree-2.0.0.tgz", - "integrity": "sha512-KaFVRjoqLyF6YXCbVLNad/eS4+OfPQQn2yOd7zF/h5T/CSL2v8NpN6a5TPvtbXthAGw5nG+PuTtq+DdIZr+cRQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-remove": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unist-util-remove/-/unist-util-remove-4.0.0.tgz", - "integrity": "sha512-b4gokeGId57UVRX/eVKej5gXqGlc9+trkORhFJpu9raqZkZhU0zm8Doi05+HaiBsMEIJowL+2WtQ5ItjsngPXg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-is": "^6.0.0", - "unist-util-visit-parents": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-remove-position": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unist-util-remove-position/-/unist-util-remove-position-5.0.0.tgz", - "integrity": "sha512-Hp5Kh3wLxv0PHj9m2yZhhLt58KzPtEYKQQ4yxfYFEO7EvHwzyDYnduhHnY1mDxoqr7VUwVuHXk9RXKIiYS1N8Q==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-visit": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-stringify-position": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", - "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-visit": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz", - "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-is": "^6.0.0", - "unist-util-visit-parents": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-visit-children": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/unist-util-visit-children/-/unist-util-visit-children-3.0.0.tgz", - "integrity": "sha512-RgmdTfSBOg04sdPcpTSD1jzoNBjt9a80/ZCzp5cI9n1qPzLZWF9YdvWGN2zmTumP1HWhXKdUWexjy/Wy/lJ7tA==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-visit-parents": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz", - "integrity": "sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-is": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/universalify": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", - "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", - "license": "MIT", - "engines": { - "node": ">= 10.0.0" - } - }, - "node_modules/unpipe": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", - "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/urijs": { - "version": "1.19.11", - "resolved": "https://registry.npmjs.org/urijs/-/urijs-1.19.11.tgz", - "integrity": "sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==", - "license": "MIT" - }, - "node_modules/urlpattern-polyfill": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz", - "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==", - "license": "MIT" - }, - "node_modules/use-callback-ref": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz", - "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==", - "license": "MIT", - "peer": true, - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-callback-ref/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true - }, - "node_modules/use-sidecar": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz", - "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "detect-node-es": "^1.1.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sidecar/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true - }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "license": "MIT" - }, - "node_modules/utility-types": { - "version": "3.11.0", - "resolved": "https://registry.npmjs.org/utility-types/-/utility-types-3.11.0.tgz", - "integrity": "sha512-6Z7Ma2aVEWisaL6TvBCy7P8rm2LQoPv6dJ7ecIaIixHcwfbJ0x7mWdbcwlIM5IGQxPZSFYeqRCqlOOeKoJYMkw==", - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/utils-merge": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", - "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==", - "license": "MIT", - "engines": { - "node": ">= 0.4.0" - } - }, - "node_modules/uuid": { - "version": "11.1.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz", - "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "license": "MIT", - "bin": { - "uuid": "dist/esm/bin/uuid" - } - }, - "node_modules/vary": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", - "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/vfile": { - "version": "6.0.3", - "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", - "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/vfile-location": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz", - "integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/vfile-matter": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/vfile-matter/-/vfile-matter-5.0.1.tgz", - "integrity": "sha512-o6roP82AiX0XfkyTHyRCMXgHfltUNlXSEqCIS80f+mbAyiQBE2fxtDVMtseyytGx75sihiJFo/zR6r/4LTs2Cw==", - "license": "MIT", - "dependencies": { - "vfile": "^6.0.0", - "yaml": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/vfile-message": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz", - "integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-stringify-position": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/web-namespaces": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz", - "integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/webidl-conversions": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", - "license": "BSD-2-Clause" - }, - "node_modules/whatwg-url": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", - "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "license": "MIT", - "dependencies": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } - }, - "node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "license": "ISC", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/which-boxed-primitive": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.1.1.tgz", - "integrity": "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA==", - "license": "MIT", - "dependencies": { - "is-bigint": "^1.1.0", - "is-boolean-object": "^1.2.1", - "is-number-object": "^1.1.1", - "is-string": "^1.1.1", - "is-symbol": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/which-builtin-type": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/which-builtin-type/-/which-builtin-type-1.2.1.tgz", - "integrity": "sha512-6iBczoX+kDQ7a3+YJBnh3T+KZRxM/iYNPXicqk66/Qfm1b93iu+yOImkg0zHbj5LNOcNv1TEADiZ0xa34B4q6Q==", - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "function.prototype.name": "^1.1.6", - "has-tostringtag": "^1.0.2", - "is-async-function": "^2.0.0", - "is-date-object": "^1.1.0", - "is-finalizationregistry": "^1.1.0", - "is-generator-function": "^1.0.10", - "is-regex": "^1.2.1", - "is-weakref": "^1.0.2", - "isarray": "^2.0.5", - "which-boxed-primitive": "^1.1.0", - "which-collection": "^1.0.2", - "which-typed-array": "^1.1.16" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/which-collection": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.2.tgz", - "integrity": "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw==", - "license": "MIT", - "dependencies": { - "is-map": "^2.0.3", - "is-set": "^2.0.3", - "is-weakmap": "^2.0.2", - "is-weakset": "^2.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/which-typed-array": { - "version": "1.1.20", - "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.20.tgz", - "integrity": "sha512-LYfpUkmqwl0h9A2HL09Mms427Q1RZWuOHsukfVcKRq9q95iQxdw0ix1JQrqbcDR9PH1QDwf5Qo8OZb5lksZ8Xg==", - "license": "MIT", - "dependencies": { - "available-typed-arrays": "^1.0.7", - "call-bind": "^1.0.8", - "call-bound": "^1.0.4", - "for-each": "^0.3.5", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/widest-line": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/widest-line/-/widest-line-5.0.0.tgz", - "integrity": "sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==", - "license": "MIT", - "dependencies": { - "string-width": "^7.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/wrap-ansi": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz", - "integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/wrap-ansi/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/wrap-ansi/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", - "license": "ISC" - }, - "node_modules/ws": { - "version": "8.20.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", - "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/xml2js": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.6.2.tgz", - "integrity": "sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==", - "license": "MIT", - "dependencies": { - "sax": ">=0.6.0", - "xmlbuilder": "~11.0.0" - }, - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/xmlbuilder": { - "version": "11.0.1", - "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", - "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", - "license": "MIT", - "engines": { - "node": ">=4.0" - } - }, - "node_modules/xss": { - "version": "1.0.15", - "resolved": "https://registry.npmjs.org/xss/-/xss-1.0.15.tgz", - "integrity": "sha512-FVdlVVC67WOIPvfOwhoMETV72f6GbW7aOabBC3WxN/oUdoEMDyLz4OgRv5/gck2ZeNqEQu+Tb0kloovXOfpYVg==", - "license": "MIT", - "dependencies": { - "commander": "^2.20.3", - "cssfilter": "0.0.10" - }, - "bin": { - "xss": "bin/xss" - }, - "engines": { - "node": ">= 0.10.0" - } - }, - "node_modules/xss/node_modules/commander": { - "version": "2.20.3", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", - "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", - "license": "MIT" - }, - "node_modules/y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", - "license": "ISC", - "engines": { - "node": ">=10" - } - }, - "node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", - "license": "ISC" - }, - "node_modules/yaml": { - "version": "2.8.3", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", - "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", - "license": "ISC", - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - }, - "funding": { - "url": "https://github.com/sponsors/eemeli" - } - }, - "node_modules/yargs": { - "version": "17.7.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.1.tgz", - "integrity": "sha512-cwiTb08Xuv5fqF4AovYacTFNxk62th7LKJ6BL9IGUpTJrWoU7/7WdQGTP2SjKf1dUNBGzDd28p/Yfs/GI6JrLw==", - "license": "MIT", - "dependencies": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/yargs-parser": { - "version": "21.1.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/yargs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/yargs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/yargs/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/yargs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/yargs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/yauzl": { - "version": "2.10.0", - "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", - "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", - "license": "MIT", - "dependencies": { - "buffer-crc32": "~0.2.3", - "fd-slicer": "~1.1.0" - } - }, - "node_modules/yoctocolors-cjs": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/yoctocolors-cjs/-/yoctocolors-cjs-2.1.3.tgz", - "integrity": "sha512-U/PBtDf35ff0D8X8D0jfdzHYEPFxAI7jJlxZXwCSez5M3190m+QobIfh+sWDWSHMCWWJN2AWamkegn6vr6YBTw==", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/yoga-layout": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/yoga-layout/-/yoga-layout-3.2.1.tgz", - "integrity": "sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ==", - "license": "MIT" - }, - "node_modules/zod": { - "version": "4.3.6", - "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", - "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/colinhacks" - } - }, - "node_modules/zwitch": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", - "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - } - } -} diff --git a/docs/package.json b/docs/package.json deleted file mode 100644 index 4413f7c935b..00000000000 --- a/docs/package.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "dependencies": { - "mintlify": "^4.2.500", - "sharp": "^0.34.4" - } -} diff --git a/docs/quickstart-tutorial.mdx b/docs/quickstart-tutorial.mdx deleted file mode 100644 index 2be5657a3b2..00000000000 --- a/docs/quickstart-tutorial.mdx +++ /dev/null @@ -1,188 +0,0 @@ ---- -title: Tutorial to Get Started with MindsDB -sidebarTitle: Quickstart -icon: "play" ---- - -Before we start, install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). - -Get started with MindsDB in a few simple steps: - - - - Connect one or more data sources. Explore all available [data sources here](/integrations/data-overview). - - - Unify your data with [knowledge bases](/mindsdb_sql/knowledge_bases/overview). - - - Respond to questions over your data with [AI agents](/mindsdb_sql/agents/agent). - - - -## Step 1. Connect - -MindsDB enables connecting data from various data sources and operating on data without moving it from its source. Learn more [here](/mindsdb-connect). - -* **Connecting Structured Data** - -Use the [`CREATE DATABASE`](/mindsdb_sql/sql/create/database) statement to connect a data source to MindsDB. - -```sql -CREATE DATABASE mysql_demo_db -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -This is the input data used in the following steps: - -```sql -SELECT * -FROM mysql_demo_db.home_rentals -LIMIT 3; -``` - -The sample contains contains information about properties for rent. - -* **Connecting Unstructured Data** - -Extract data from webpages using the [web crawler](/integrations/app-integrations/web-crawler) or [upload files](/integrations/files/csv-xlsx-xls) to MindsDB. - -In this example, we fetch data from MindsDB Documentation webpage using the web crawler. - -```sql -CREATE DATABASE my_web -WITH ENGINE = 'web'; - -SELECT url, text_content -FROM my_web.crawler -WHERE url = 'https://docs.mindsdb.com/' -``` - -Now we save this data into a view which is saved in the default `mindsdb` project. - -```sql -CREATE VIEW mindsdb_docs ( - SELECT url, text_content - FROM my_web.crawler - WHERE url = 'https://docs.mindsdb.com/' -); - -SELECT * -FROM mindsdb.mindsdb_docs; -``` - -## Step 2. Unify - -MindsDB enables unifying data from structured and unstructured data sources into a single, queryable interface. This unified view allows seamless querying and model-building across all data without consolidation into one system. Learn more [here](/mindsdb-unify). - -Create a knowledge base to store all your data in a single location. Learn more about [knowledge bases here](/mindsdb_sql/knowledge_bases/overview). - -```sql -CREATE KNOWLEDGE_BASE my_kb -USING - embedding_model = { - "provider": "openai", - "model_name" : "text-embedding-3-large", - "api_key": "your-openai-api-key" - }, - reranking_model = { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "your-openai-api-key" - }, - content_columns = ['content']; -``` - -[Insert data](/mindsdb_sql/knowledge_bases/insert_data) from Step 1 into the knowledge base. - -```sql -INSERT INTO my_kb - SELECT - 'number_of_rooms: ' || number_of_rooms || ', ' || - 'number_of_bathrooms' || number_of_bathrooms || ', ' || - 'sqft' || sqft || ', ' || - 'location' || location || ', ' || - 'days_on_market' || days_on_market || ', ' || - 'neighborhood' || neighborhood || ', ' || - 'rental_price' || rental_price - AS content - FROM mysql_demo_db.home_rentals; - -INSERT INTO my_kb - SELECT text_content AS content - FROM mindsdb.mindsdb_docs; -``` - -[Query the knowledge base](/mindsdb_sql/knowledge_bases/query) to search your data. - -```sql -SELECT * -FROM my_kb -WHERE content = 'what is MindsDB'; - -SELECT * -FROM my_kb -WHERE content = 'rental price lower than 2000'; -``` - - -In order to keep the knowledge base up-to-date with your data, use [jobs](/mindsdb_sql/sql/create/jobs) to automate data inserts every time your data is modified. - -```sql -CREATE JOB update_kb ( - - INSERT INTO my_kb - SELECT - 'number_of_rooms: ' || number_of_rooms || ', ' || - 'number_of_bathrooms' || number_of_bathrooms || ', ' || - 'sqft' || sqft || ', ' || - 'location' || location || ', ' || - 'days_on_market' || days_on_market || ', ' || - 'neighborhood' || neighborhood || ', ' || - 'rental_price' || rental_price - AS content - FROM mysql_demo_db.home_rentals - WHERE created_at > LATEST -) -EVERY 1 day; -``` - - -## Step 3. Respond - -MindsDB enables generating insightful and accurate responses from unified data using natural language. Learn more [here](/mindsdb-respond). - -Create an [agent](https://docs.mindsdb.com/mindsdb_sql/agents/agent) that can answer questions over your unified data from Step 2. - -```sql -CREATE AGENT my_agent -USING - model = { - "provider": "openai", - "model_name" : "gpt-4o", - "api_key": "your-openai-api-key" - }, - data = { - "knowledge_bases": ["mindsdb.my_kb"], - "tables": ["mysql_demo_db.home_rentals"] - }, - prompt_template = 'mindsdb.my_kb stores data about mindsdb and home rentals, - mysql_demo_db.home_rentals stores data about home rentals'; -``` - -Now you can ask questions over your data. - -```sql -SELECT * -FROM my_agent -WHERE question = 'what is MindsDB?'; -``` - -Visit the [Respond tab in the MindsDB Editor](/mindsdb_sql/agents/agent_gui) to chat with an agent. diff --git a/docs/releases.mdx b/docs/releases.mdx deleted file mode 100644 index 7df5b0fb982..00000000000 --- a/docs/releases.mdx +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: MindsDB Releases -sidebarTitle: Releases -icon: "code-branch" ---- - -MindsDB releases new features, functionalitites, and fixes on regular cadence. This document outlines the release process, versioning, and naming conventions. - -## Release Types and Versioning - -MindsDB uses [semantic versioning](https://semver.org/) to name all releases. This format is applied consistently across our GitHub tags, Python packages, and Docker images. - -Each release name follows the structure: - -``` -v..() -``` - -Where: - -* `MAJOR` indicates the major version, which introduces significant changes or backward-incompatible updates. -* `MINOR` indicates the minor version, which introduces new features that remain backward-compatible. -* `PATCH` indicates the patch version, which introduces small fixes or improvements. -* `TYPE` is an optional component, which informs about the nature of the (pre-)release. -* `NUMBER` is an optional component used when `TYPE` is provided that indicates the pre-release version. - -The following are the release types and their naming conventions. - -| **Release Type** | **Sample Version** | **Description** | -|-------------------------------|--------------------|-----------------| -| **GA (General Availability)** | `v25.9.3` | The stable public release. `25` is the major version, `9` is the minor version, and `3` is the patch number. | -| **Pre-GA (Release Candidate)**| `v25.9.3rc1` | A release candidate that is nearly ready for GA. `rc` stands for release candidate, and `1` indicates the version number of the pre-release. | -| **Alpha** | `v25.9.3alpha1` | An early testing version with limited features or stability. `alpha` denotes an initial stage for internal or early feedback. | -| **Beta** | `v25.9.3beta1` | A version close to final release. `beta` indicates a feature-complete build shared for broader testing and feedback. | - -## Release Process - -The `main` branch of the [MindsDB repository](https://github.com/mindsdb/mindsdb) contains the latest stable version of MindsDB and represents the GA (General Availability) release. - -MindsDB adheres to the release process as follows. - - - - A short-lived `release/x.x.x` branch is created for each release where all code changes for that release are pushed. - - - Developers create feature PRs that target the `release/x.x.x` branch. This branch is used for testing and validation of the release. - - - Pre-GA artifacts are built, including both the Python package and the Docker image, and shared for broader testing and feedback. - - - After successful testing and validation, the `release/x.x.x` branch is merged into the `main` branch, making it an official GA release. The final GA versions of the Python package and Docker image are released. - - - - -If you are interested in contributing to MindsDB, follow [this link](/contribute/contribute). - diff --git a/docs/rest/agents/create.mdx b/docs/rest/agents/create.mdx deleted file mode 100644 index 9000c0158b7..00000000000 --- a/docs/rest/agents/create.mdx +++ /dev/null @@ -1,131 +0,0 @@ ---- -title: Create Agent -sidebarTitle: Create Agent ---- - -**POST `/api/projects/{project_name}/agents`** - -This API endpoint creates an agent using the `POST` method. - - -Learn more about agents and the available parameters following [this doc page](/mindsdb_sql/agents/agent). - - -### Path Parameters - - -Defines the project where the agents are located. Note that the default project name is `mindsdb`. - - -### Body - - -Name of the agent. - - - -Stores parameters of the model, including `provider`, `model_name`, and `api_key`. Note that agents can use the default model defined in the configuration, if no model provided when creating an agent. - - - -Stores data connected to an agent, including `tables` and `knowledge_bases`. - - - -Stores instruction to an agent. This should contain the description of connected data. - - -### Response - - -Unique identifier for the agent. - - - -The name assigned to the agent. - - - -The ID of the project where the agent resides. - - - -Timestamp indicating when the agent was created. - - - -Timestamp indicating when the agent was last updated. - - - -Stores data connected to an agent, including `tables` and `knowledge_bases`. - -In order to provide all tables from a database or all knowledge bases from a project, use the `*` wildcard like this: -```shell - "data": { - "knowledge_bases": ["my_project.*"], - "tables": ["my_data_source.*"] - } -``` - - - -Stores parameters of the model, including `provider`, `model_name`, and `api_key`. - - - -Stores instruction to an agent. This should contain the description of connected data. - - - - -```shell Shell -curl --request POST \ - --url http://127.0.0.1:47334/api/projects/mindsdb/agents \ - --header 'Content-Type: application/json' \ - --data '{ - "agent": { - "name": "my_agent", - "model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-xxx" - }, - "data": { - "knowledge_bases": ["my_project.my_kb"], - "tables": ["my_data_source.my_table"] - }, - "prompt_template": "my_project.my_kb stores documentation of MindsDB, my_data_source.my_table stores documentation of MindsDB" - } -}' -``` - - - - - -```json Response -{ - "id": 197, - "name": "my_agent", - "project_id": 1, - "created_at": "2025-07-09 12:58:24.868202", - "updated_at": "2025-07-09 12:58:24.868199", - "data": { - "knowledge_bases": [ - "my_project.my_kb" - ], - "tables": [ - "my_data_source.my_table" - ] - }, - "model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-xxx" - }, - "prompt_template": "my_project.my_kb stores documentation of MindsDB, my_data_source.my_table stores documentation of MindsDB" -} -``` - - diff --git a/docs/rest/agents/delete.mdx b/docs/rest/agents/delete.mdx deleted file mode 100644 index 41bd09101a5..00000000000 --- a/docs/rest/agents/delete.mdx +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: Delete Agent -sidebarTitle: Delete Agent ---- - -**DELETE `/api/projects/{project_name}/agents/{agent_name}`** - -This API endpoint deletes an agent using the `DELETE` method. - - -Learn more about agents and the available parameters following [this doc page](/mindsdb_sql/agents/agent). - - -### Path Parameters - - -Defines the project where the agent are located. Note that the default project name is `mindsdb`. - - - -Defines the agent name. - - -### Body - -None. - -### Response - -None. - - - -```shell Shell -curl --request DELETE \ - --url http://127.0.0.1:47334/api/projects/mindsdb/agents/my_agent -``` - - - - - -```json Response -200 OK -``` - - diff --git a/docs/rest/agents/get.mdx b/docs/rest/agents/get.mdx deleted file mode 100644 index ce9ff66cc16..00000000000 --- a/docs/rest/agents/get.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Get Agent -sidebarTitle: Get Agent ---- - -**GET `/api/projects/{project_name}/agents/{agent_name}`** - -This API endpoint lists details about an agent using the `GET` method. - - -Learn more about agents and the available parameters following [this doc page](/mindsdb_sql/agents/agent). - - -### Path Parameters - - -Defines the project where the agents are located. Note that the default project name is `mindsdb`. - - - -Defines the agent name to get its details. - - -### Body - -None. - -### Response - - -Unique identifier for the agent. - - - -The name assigned to the agent. - - - -The ID of the project where the agent resides. - - - -Timestamp indicating when the agent was created. - - - -Timestamp indicating when the agent was last updated. - - - -Stores data connected to an agent, including `tables` and `knowledge_bases`. - - - -Stores parameters of the model, including `provider`, `model_name`, and `api_key`. - - - -Stores instruction to an agent. This should contain the description of connected data. - - - - -```shell Shell -curl --request GET \ - --url http://127.0.0.1:47334/api/projects/mindsdb/agents/my_agent -``` - - - - - -```json Response -{ - "id": 197, - "name": "my_agent", - "project_id": 1, - "created_at": "2025-07-09 12:58:24.868202", - "updated_at": "2025-07-09 12:58:24.868199", - "data": { - "knowledge_bases": ["my_project.my_kb"], - "tables": ["my_data_source.my_table"] - }, - "model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-xxx" - }, - "prompt_template": "my_project.my_kb stores documentation of MindsDB, my_data_source.my_table stores documentation of MindsDB" -} -``` - - diff --git a/docs/rest/agents/list.mdx b/docs/rest/agents/list.mdx deleted file mode 100644 index 3bdb6e0e035..00000000000 --- a/docs/rest/agents/list.mdx +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: List Agents -sidebarTitle: List Agents ---- - -**GET `/api/projects/{project_name}/agents`** - -This API endpoint lists all available agents using the `GET` method. - - -Learn more about agents and the available parameters following [this doc page](/mindsdb_sql/agents/agent). - - -### Path Parameters - - -Defines the project where the agents are located. Note that the default project name is `mindsdb`. - - -### Body - -None. - -### Response - - -Unique identifier for the agent. - - - -The name assigned to the agent. - - - -The ID of the project where the agent resides. - - - -Timestamp indicating when the agent was created. - - - -Timestamp indicating when the agent was last updated. - - - -Stores data connected to an agent, including `tables` and `knowledge_bases`. - - - -Stores parameters of the model, including `provider`, `model_name`, and `api_key`. - - - -Stores instruction to an agent. This should contain the description of connected data. - - - - -```shell Shell -curl --request GET \ - --url http://127.0.0.1:47334/api/projects/mindsdb/agents -``` - - - - - -```json Response -[ - { - "id": 197, - "name": "my_agent", - "project_id": 1, - "created_at": "2025-07-09 12:58:24.868202", - "updated_at": "2025-07-09 12:58:24.868199", - "data": { - "knowledge_bases": ["my_project.my_kb"], - "tables": ["my_data_source.my_table"] - }, - "model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-xxx" - }, - "prompt_template": "my_project.my_kb stores documentation of MindsDB, my_data_source.my_table stores documentation of MindsDB" - } -] -``` - - diff --git a/docs/rest/agents/query.mdx b/docs/rest/agents/query.mdx deleted file mode 100644 index 8921c27bb12..00000000000 --- a/docs/rest/agents/query.mdx +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: Query Agents -sidebarTitle: Query Agents ---- - -**POST `/api/projects/{project_name}/agents/{agent_name}/completions[/stream]`** - -This API endpoint queries an agent using the `POST` method. The `completions` endpoints returns an answer, while the `completions/stream` endpoint streams the thoughts and returns an answer. - - -Learn more about agents and the available parameters following [this doc page](/mindsdb_sql/agents/agent). - - -### Path Parameters - - -Defines the project where the agents are located. Note that the default project name is `mindsdb`. - - - -Defines the agent name. - - -### Body - - -Stores the question to an agent. - - -### Response - - -Returns data chunks containing thoughts and an answer. - - - - -```shell Shell -curl --request POST \ - --url http://127.0.0.1:47334/api/projects/mindsdb/agents/my_agent/completions/stream \ - --header 'Content-Type: application/json' \ - --data '{ - "messages": [ - { - "question": "What is MindsDB?", - "answer": "" - } - ] -}' -``` - - - - - -```json Response -data: {"type": "start", "prompt": "What is MindsDB?", "trace_id": ""} - -data: {"actions": [{"tool": "kb_list_tool", "tool_input": "", "log": "```\nThought: Do I need to use a tool? Yes\nAction: kb_list_tool\nAction Input: "}], "messages": [{"content": "```\nThought: Do I need to use a tool? Yes\nAction: kb_list_tool\nAction Input: "}], "trace_id": ""} - -data: {"steps": [{"action": {"tool": "kb_list_tool", "tool_input": "", "log": "```\nThought: Do I need to use a tool? Yes\nAction: kb_list_tool\nAction Input: "}, "observation": "[\"kb_mindsdb_docs\"]"}], "messages": [{"content": "[\"kb_mindsdb_docs\"]"}], "trace_id": ""} - -data: {"actions": [{"tool": "kb_query_tool", "tool_input": "SELECT * FROM `kb_mindsdb_docs` WHERE content = 'What is MindsDB?' LIMIT 1;", "log": "I have identified a knowledge base named `kb_mindsdb_docs` that contains documentation about MindsDB. I will now query this knowledge base to provide you with information about MindsDB.\n\n```\nAction: kb_query_tool\nAction Input: SELECT * FROM `kb_mindsdb_docs` WHERE content = 'What is MindsDB?' LIMIT 1;"}], "messages": [{"content": "I have identified a knowledge base named `kb_mindsdb_docs` that contains documentation about MindsDB. I will now query this knowledge base to provide you with information about MindsDB.\n\n```\nAction: kb_query_tool\nAction Input: SELECT * FROM `kb_mindsdb_docs` WHERE content = 'What is MindsDB?' LIMIT 1;"}], "trace_id": ""} - -data: {"steps": [{"action": {"tool": "kb_query_tool", "tool_input": "SELECT * FROM `kb_mindsdb_docs` WHERE content = 'What is MindsDB?' LIMIT 1;", "log": "I have identified a knowledge base named `kb_mindsdb_docs` that contains documentation about MindsDB. I will now query this knowledge base to provide you with information about MindsDB.\n\n```\nAction: kb_query_tool\nAction Input: SELECT * FROM `kb_mindsdb_docs` WHERE content = 'What is MindsDB?' LIMIT 1;"}, "observation": "Output columns: 'id', 'chunk_id', 'chunk_content', 'metadata', 'distance', 'relevance'\nResult in CSV format (dialect is 'excel'):\nid,chunk_id,chunk_content,metadata,distance,relevance\r\nc2b24e025ed01388,c2b24e025ed01388:text_content:1766of1836:1633168to1634163,\"with MindsDB By integrating databases and OpenAI using MindsDB, developers can easily extract insights from text data with just a few SQL commands. These powerful natural language processing (NLP) models are capable of answering questions with or without context and completing general prompts. Furthermore, these models are powered by large pre-trained language models from OpenAI, so there is no need for manual development work. Ultimately, this provides developers with an easy way to incorporate powerful NLP capabilities into their applications while saving time and resources compared to traditional ML development pipelines and methods. All in all, MindsDB makes it possible for developers to harness the power of OpenAI efficiently! MindsDB is now the fastest-growing open-source applied machine-learning platform in the world. Its community continues to contribute to more than 70 data-source and ML-framework integrations. Stay tuned for the upcoming features - including more control\",\"{'_chunk_index': 1765, '_content_column': 'text_content', '_end_char': 1634163, '_original_doc_id': 'c2b24e025ed01388', '_original_row_index': '0', '_source': 'TextChunkingPreprocessor', '_start_char': 1633168, '_updated_at': '2025-07-01 12:36:41', 'url': 'https://docs.mindsdb.com/llms-full.txt'}\",0.24353297838910382,0.9321520551316381\r\n"}], "messages": [{"content": "Output columns: 'id', 'chunk_id', 'chunk_content', 'metadata', 'distance', 'relevance'\nResult in CSV format (dialect is 'excel'):\nid,chunk_id,chunk_content,metadata,distance,relevance\r\nc2b24e025ed01388,c2b24e025ed01388:text_content:1766of1836:1633168to1634163,\"with MindsDB By integrating databases and OpenAI using MindsDB, developers can easily extract insights from text data with just a few SQL commands. These powerful natural language processing (NLP) models are capable of answering questions with or without context and completing general prompts. Furthermore, these models are powered by large pre-trained language models from OpenAI, so there is no need for manual development work. Ultimately, this provides developers with an easy way to incorporate powerful NLP capabilities into their applications while saving time and resources compared to traditional ML development pipelines and methods. All in all, MindsDB makes it possible for developers to harness the power of OpenAI efficiently! MindsDB is now the fastest-growing open-source applied machine-learning platform in the world. Its community continues to contribute to more than 70 data-source and ML-framework integrations. Stay tuned for the upcoming features - including more control\",\"{'_chunk_index': 1765, '_content_column': 'text_content', '_end_char': 1634163, '_original_doc_id': 'c2b24e025ed01388', '_original_row_index': '0', '_source': 'TextChunkingPreprocessor', '_start_char': 1633168, '_updated_at': '2025-07-01 12:36:41', 'url': 'https://docs.mindsdb.com/llms-full.txt'}\",0.24353297838910382,0.9321520551316381\r\n"}], "trace_id": ""} - -data: {"output": "MindsDB is an open-source platform that integrates databases and OpenAI to enable developers to extract insights from text data using SQL commands. It leverages powerful natural language processing (NLP) models, powered by large pre-trained language models from OpenAI, to answer questions and complete prompts without the need for manual development work. This makes it easier for developers to incorporate NLP capabilities into their applications, saving time and resources compared to traditional machine learning development methods. MindsDB is recognized as the fastest-growing open-source applied machine-learning platform, with a community contributing to over 70 data-source and ML-framework integrations.", "messages": [{"content": "MindsDB is an open-source platform that integrates databases and OpenAI to enable developers to extract insights from text data using SQL commands. It leverages powerful natural language processing (NLP) models, powered by large pre-trained language models from OpenAI, to answer questions and complete prompts without the need for manual development work. This makes it easier for developers to incorporate NLP capabilities into their applications, saving time and resources compared to traditional machine learning development methods. MindsDB is recognized as the fastest-growing open-source applied machine-learning platform, with a community contributing to over 70 data-source and ML-framework integrations."}], "trace_id": ""} - -data: {"type": "end"} -``` - - diff --git a/docs/rest/agents/update.mdx b/docs/rest/agents/update.mdx deleted file mode 100644 index 2c4e59c1078..00000000000 --- a/docs/rest/agents/update.mdx +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: Update Agent -sidebarTitle: Update Agent ---- - -**PUT `/api/projects/{project_name}/agents/{agent_name}`** - -This API endpoint updates an agent using the `PUT` method. - - -Learn more about agents and the available parameters following [this doc page](/mindsdb_sql/agents/agent). - - -### Path Parameters - - -Defines the project where the agents are located. Note that the default project name is `mindsdb`. - - - -Defines the agent name. - - -### Body - - -Name of the agent. - - - -Stores parameters of the model, including `provider`, `model_name`, and `api_key`. - - - -Stores data connected to an agent, including `tables` and `knowledge_bases`. - - - -Stores instruction to an agent. This should contain the description of connected data. - - -### Response - - -Unique identifier for the agent. - - - -The name assigned to the agent. - - - -The ID of the project where the agent resides. - - - -Timestamp indicating when the agent was created. - - - -Timestamp indicating when the agent was last updated. - - - -Stores data connected to an agent, including `tables` and `knowledge_bases`. - - - -Stores parameters of the model, including `provider`, `model_name`, and `api_key`. - - - -Stores instruction to an agent. This should contain the description of connected data. - - - - -```shell Shell -curl --request PUT \ - --url http://127.0.0.1:47334/api/projects/mindsdb/agents/my_agent \ - --header 'Content-Type: application/json' \ - --data '{ - "agent": { - "model": { - "provider": "openai", - "model_name": "gpt-4.1", - "api_key": "sk-xxx" - } - } -}' - -``` - - - - - -```json Response -{ - "id": 197, - "name": "my_agent", - "project_id": 1, - "created_at": "2025-07-09 12:58:24.868202", - "updated_at": "2025-07-09 12:58:24.868199", - "data": { - "knowledge_bases": ["my_project.my_kb"], - "tables": ["my_data_source.my_table"] - }, - "model": { - "provider": "openai", - "model_name": "gpt-4.1", - "api_key": "sk-xxx" - }, - "prompt_template": "my_project.my_kb stores documentation of MindsDB, my_data_source.my_table stores documentation of MindsDB" -} -``` - - diff --git a/docs/rest/authentication.mdx b/docs/rest/authentication.mdx deleted file mode 100644 index fa1f89f504b..00000000000 --- a/docs/rest/authentication.mdx +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Authentication -sidebarTitle: Authentication ---- - -MindsDB provides an optional authentication mechanism for its HTTP API. This includes setting up a username and a password for the MindsDB instance. Learn [more here](/setup/custom-config#auth). - -If this authentication method is defined in the MindsDB configuration file, it is required to authenticate oneself when using the REST API endpoints of this MindsDB instance. - -**Here is how to authenticate an HTTP session for calling MindsDB REST APIs.** - -1. Call the `login` endpoint with the username and password parameters. - - ``` - curl --request POST --url 'http://127.0.0.1:47334/api/login' \ - --header 'Content-Type: application/json' \ - --data-raw '{"username":"your-username","password":"your-password"}' -v - ``` - - This command returns an HTTP status code 200 if the request is successful, along with a token in the response body. - -2. Call any other endpoint providing the token. - - ``` - curl --request GET \ - --url http://127.0.0.1:47334/api/projects/mindsdb/... \ - --header 'Content-Type: application/json' \ - --header 'Authorization: Bearer pat_your_mindsdb_token_here' \ - --data '{ - ... - }' - ``` - - For example, query an agent under the authenticated session: - - ``` - curl --request POST \ - --url http://127.0.0.1:47334/api/projects/mindsdb/agents/my_agent/completions/stream \ - --header 'Content-Type: application/json' \ - --header 'Authorization: Bearer pat_your_mindsdb_token_here' \ - --data '{ - "messages": [ - { - "question": "What is MindsDB?", - "answer": "" - } - ] - }' - ``` diff --git a/docs/rest/databases/create-databases.mdx b/docs/rest/databases/create-databases.mdx deleted file mode 100644 index 4c8c55dafcc..00000000000 --- a/docs/rest/databases/create-databases.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Connect a Data Source -openapi: "POST /api/databases" -sidebarTitle: Connect a Data Source ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - diff --git a/docs/rest/databases/delete-databases.mdx b/docs/rest/databases/delete-databases.mdx deleted file mode 100644 index f00b4dc977d..00000000000 --- a/docs/rest/databases/delete-databases.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Remove a Data Source -openapi: "DELETE /api/databases/{databaseName}" -sidebarTitle: Remove a Data Source ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/databases/list-database.mdx b/docs/rest/databases/list-database.mdx deleted file mode 100644 index 6413ff14182..00000000000 --- a/docs/rest/databases/list-database.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Get a Data Source -openapi: "GET /api/databases/{databaseName}" -sidebarTitle: Get a Data Source ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/databases/list-databases.mdx b/docs/rest/databases/list-databases.mdx deleted file mode 100644 index 3c8fa42ed50..00000000000 --- a/docs/rest/databases/list-databases.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: List Data Sources -openapi: "GET /api/databases" -sidebarTitle: List Data Sources ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/databases/update-databases.mdx b/docs/rest/databases/update-databases.mdx deleted file mode 100644 index 726c50be131..00000000000 --- a/docs/rest/databases/update-databases.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Update a Data Source -openapi: "PUT /api/databases/{databaseName}" -sidebarTitle: Update a Data Source ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/files/delete.mdx b/docs/rest/files/delete.mdx deleted file mode 100644 index a5f7bd43b59..00000000000 --- a/docs/rest/files/delete.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Remove a File -openapi: "DELETE /api/files/{fileName}" -sidebarTitle: Remove a File ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/files/list.mdx b/docs/rest/files/list.mdx deleted file mode 100644 index eae3a967552..00000000000 --- a/docs/rest/files/list.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: List Files -openapi: "GET /api/files" -sidebarTitle: List Files ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/files/upload.mdx b/docs/rest/files/upload.mdx deleted file mode 100644 index dfdb5fe815c..00000000000 --- a/docs/rest/files/upload.mdx +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: Upload a File -openapi: "PUT /api/files/{fileName}" -sidebarTitle: Upload a File ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - - - -Note that the trailing whitespaces on column names are erased upon uploading a file to MindsDB. - diff --git a/docs/rest/jobs/create.mdx b/docs/rest/jobs/create.mdx deleted file mode 100644 index 614f494b3e0..00000000000 --- a/docs/rest/jobs/create.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Create a Job -openapi: "POST /api/projects/{projectName}/jobs" -sidebarTitle: Create a Job ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/jobs/delete.mdx b/docs/rest/jobs/delete.mdx deleted file mode 100644 index 709b5fe9358..00000000000 --- a/docs/rest/jobs/delete.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Remove a Job -openapi: "DELETE /api/projects/{projectName}/jobs/{jobName}" -sidebarTitle: Remove a Job ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/jobs/get.mdx b/docs/rest/jobs/get.mdx deleted file mode 100644 index bd4c0960732..00000000000 --- a/docs/rest/jobs/get.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Get a Job -openapi: "GET /api/projects/{projectName}/jobs/{jobName}" -sidebarTitle: Get a Job ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/jobs/list.mdx b/docs/rest/jobs/list.mdx deleted file mode 100644 index ca31a4f8c4e..00000000000 --- a/docs/rest/jobs/list.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: List Jobs -openapi: "GET /api/projects/{projectName}/jobs" -sidebarTitle: List Jobs ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/knowledge_bases/alter.mdx b/docs/rest/knowledge_bases/alter.mdx deleted file mode 100644 index 21f6df65fad..00000000000 --- a/docs/rest/knowledge_bases/alter.mdx +++ /dev/null @@ -1,172 +0,0 @@ ---- -title: Alter Knowledge Base -sidebarTitle: Alter Knowledge Base ---- - -**PUT `/api/projects/{project_name}/knowledge_bases/{kb_name}`** - -This API endpoint alters an existing knowledge base using the `PUT` method. - - -Learn more about knowledge bases following [this doc page](/mindsdb_sql/knowledge_bases/overview). - - -### Path Parameters - - -Defines the project where the knowledge bases are located. Note that the default project name is `mindsdb`. - - - -Defines the knowledge base to be altered. - - -### Body - - -Defines the embedding model used to embed data in vector representation. - - - -Defines the reranking model used to rerank the search results by relevance. - - - -Defines the columns that store content to be embedded. - - - -Defines the columns that are considered metadata. - - - -Defines the column that uniquely identifies each row from the data inserted into the knowledge base. - - - -Defines the data preprocessing parameters. - - -### Response - - -Unique identifier for the knowledge base. - - - -The name assigned to the knowledge base. - - - -The ID of the project where the knowledge base resides. - - - -The vector store used for storing vector embeddings. - - - -The name of the collection or table within the vector database. - - - -Timestamp indicating when the knowledge base was last updated. - - - -Timestamp indicating when the knowledge base was created. - - - -Optional field for linking specific queries to this knowledge base. - - - -The embedding model used to convert content into vector representations. - - - -Optional model used to rerank search results based on relevance. - - - -Optional list of columns used for metadata-based filtering or enrichment. - - - -Optional list of columns treated as the main content for embedding and retrieval. - - - -The name of the column that uniquely identifies each content row. - - - -A nested object that contains additional configuration parameters. - - - -The name of the embedding model associated with this knowledge base at creation time. - - - - -```shell Shell -curl -X PUT http://127.0.0.1:47334/api/projects/mindsdb/knowledge_bases/my_kb \ - -H "Content-Type: application/json" \ - -d '{ - "knowledge_base": { - "embedding_model": { - "api_key": "sk-xxx" - }, - "reranking_model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-xxx" - }, - "content_columns": ["notes"], - "metadata_columns": ["product"], - "id_column": "order_id" - } - }' - -``` - - - - - -```json Response -{ - "id": 2, - "name": "my_kb", - "project_id": 1, - "vector_database": "my_kb_chromadb", - "vector_database_table": "default_collection", - "updated_at": "2025-06-26 10:24:06.311655", - "created_at": "2025-06-26 10:24:06.311654", - "query_id": null, - "embedding_model": { - "provider": "openai", - "model_name": "text-embedding-3-small", - "api_key": "******" - }, - "reranking_model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "******" - }, - "metadata_columns": [ - "product" - ], - "content_columns": [ - "notes" - ], - "id_column": "order_id", - "params": { - "created_embedding_model": "kb_embedding_my_kbxxx" - } -} -``` - - diff --git a/docs/rest/knowledge_bases/create.mdx b/docs/rest/knowledge_bases/create.mdx deleted file mode 100644 index 925ef87b394..00000000000 --- a/docs/rest/knowledge_bases/create.mdx +++ /dev/null @@ -1,183 +0,0 @@ ---- -title: Create Knowledge Base -sidebarTitle: Create Knowledge Base ---- - -**POST `/api/projects/{project_name}/knowledge_bases`** - -This API endpoint creates a knowledge base using the `POST` method. - - -Learn more about knowledge bases following [this doc page](/mindsdb_sql/knowledge_bases/overview). - - -### Path Parameters - - -Defines the project where the knowledge bases are located. Note that the default project name is `mindsdb`. - - -### Body - - -Name of the knowledge base. - - - -Underlying vector database that stores the embeddings. - - - -Defines the embedding model used to embed data in vector representation. - - - -Defines the reranking model used to rerank the search results by relevance. - - - -Defines the columns that store content to be embedded. - - - -Defines the columns that are considered metadata. - - - -Defines the column that uniquely identifies each row from the data inserted into the knowledge base. - - - -Defines the data preprocessing parameters. - - -### Response - - -Unique identifier for the knowledge base. - - - -The name assigned to the knowledge base. - - - -The ID of the project where the knowledge base resides. - - - -The vector store used for storing vector embeddings. - - - -The name of the collection or table within the vector database. - - - -Timestamp indicating when the knowledge base was last updated. - - - -Timestamp indicating when the knowledge base was created. - - - -Optional field for linking specific queries to this knowledge base. - - - -The embedding model used to convert content into vector representations. - - - -Optional model used to rerank search results based on relevance. - - - -Optional list of columns used for metadata-based filtering or enrichment. - - - -Optional list of columns treated as the main content for embedding and retrieval. - - - -The name of the column that uniquely identifies each content row. - - - -A nested object that contains additional configuration parameters. - - - -The name of the embedding model associated with this knowledge base at creation time. - - - - -```shell Shell -curl -X POST http://127.0.0.1:47334/api/projects/mindsdb/knowledge_bases \ - -H "Content-Type: application/json" \ - -d '{ - "knowledge_base": { - "name": "my_kb", - "storage": { - "database": "my_kb_chromadb", - "table": "default_collection" - }, - "embedding_model": { - "provider": "openai", - "model_name": "text-embedding-3-small", - "api_key": "sk-xxx" - }, - "reranking_model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-xxx" - }, - "content_columns": ["notes"], - "metadata_columns": ["product"], - "id_column": "order_id" - } - }' - -``` - - - - - -```json Response -{ - "id": 2, - "name": "my_kb", - "project_id": 1, - "vector_database": "my_kb_chromadb", - "vector_database_table": "default_collection", - "updated_at": "2025-06-26 10:24:06.311655", - "created_at": "2025-06-26 10:24:06.311654", - "query_id": null, - "embedding_model": { - "provider": "openai", - "model_name": "text-embedding-3-small", - "api_key": "******" - }, - "reranking_model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "******" - }, - "metadata_columns": [ - "product" - ], - "content_columns": [ - "notes" - ], - "id_column": "order_id", - "params": { - "created_embedding_model": "kb_embedding_my_kbxxx" - } -} -``` - - diff --git a/docs/rest/knowledge_bases/delete.mdx b/docs/rest/knowledge_bases/delete.mdx deleted file mode 100644 index d52e580816e..00000000000 --- a/docs/rest/knowledge_bases/delete.mdx +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: Delete Knowledge Base -sidebarTitle: Delete Knowledge Base ---- - -**DELETE `/api/projects/{project_name}/knowledge_bases/{knowledge_base_name}`** - -This API endpoint deletes a knowledge base using the `DELETE` method. - - -Learn more about knowledge bases following [this doc page](/mindsdb_sql/knowledge_bases/overview). - - -### Path Parameters - - -Defines the project where the knowledge bases are located. Note that the default project name is `mindsdb`. - - - -Defines the knowledge base name. - - -### Body - -None. - -### Response - -None. - - - -```shell Shell -curl -X DELETE http://127.0.0.1:47334/api/projects/mindsdb/knowledge_bases/my_kb -``` - - - - - -```json Response -200 OK -``` - - diff --git a/docs/rest/knowledge_bases/get.mdx b/docs/rest/knowledge_bases/get.mdx deleted file mode 100644 index e3a3c4fe849..00000000000 --- a/docs/rest/knowledge_bases/get.mdx +++ /dev/null @@ -1,138 +0,0 @@ ---- -title: Get Knowledge Base -sidebarTitle: Get Knowledge Base ---- - -**GET `/api/projects/{project_name}/knowledge_bases/{knowledge_base_name}`** - -This API endpoint lists details about a knowledge base using the `GET` method. - - -Learn more about knowledge bases following [this doc page](/mindsdb_sql/knowledge_bases/overview). - - -### Path Parameters - - -Defines the project where the knowledge bases are located. Note that the default project name is `mindsdb`. - - - -Defines the knowledge base name to get its details. - - -### Body - -None. - -### Response - - -Unique identifier for the knowledge base. - - - -The name assigned to the knowledge base. - - - -The ID of the project where the knowledge base resides. - - - -The name of the project where the knowledge base resides. - - - -The vector store used for storing vector embeddings. - - - -The name of the collection or table within the vector database. - - - -Timestamp indicating when the knowledge base was last updated. - - - -Timestamp indicating when the knowledge base was created. - - - -Optional field for linking specific queries to this knowledge base. - - - -The embedding model used to convert content into vector representations. - - - -Optional model used to rerank search results based on relevance. - - - -Optional list of columns used for metadata-based filtering or enrichment. - - - -Optional list of columns treated as the main content for embedding and retrieval. - - - -The name of the column that uniquely identifies each content row. - - - -A nested object that contains additional configuration parameters. - - - -The name of the embedding model associated with this knowledge base at creation time. - - - - -```shell Shell -curl -X GET http://127.0.0.1:47334/api/projects/mindsdb/knowledge_bases/my_kb -``` - - - - - -```json Response -{ - "id": 2, - "name": "my_kb", - "project_id": 1, - "vector_database": "my_kb_chromadb", - "vector_database_table": "default_collection", - "updated_at": "2025-06-26 10:24:06.311655", - "created_at": "2025-06-26 10:24:06.311654", - "query_id": null, - "embedding_model": { - "provider": "openai", - "model_name": "text-embedding-3-small", - "api_key": "******" - }, - "reranking_model": { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "******" - }, - "metadata_columns": [ - "product" - ], - "content_columns": [ - "notes" - ], - "id_column": "order_id", - "params": { - "created_embedding_model": "kb_embedding_my_kb" - } -} - -``` - - diff --git a/docs/rest/knowledge_bases/insert.mdx b/docs/rest/knowledge_bases/insert.mdx deleted file mode 100644 index 1f991dc60cd..00000000000 --- a/docs/rest/knowledge_bases/insert.mdx +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Insert Into Knowledge Base -sidebarTitle: Insert Into Knowledge Base ---- - -**PUT `/api/projects/{project_name}/knowledge_bases/{knowledge_base_name}`** - -This API endpoint inserts data into a knowledge base using the `PUT` method. - - -Learn more about knowledge bases following [this doc page](/mindsdb_sql/knowledge_bases/overview). - - -### Path Parameters - - -Defines the project where the knowledge bases are located. Note that the default project name is `mindsdb`. - - - -Defines the knowledge base name. - - -### Body - - -Defines the SQL query used to fetch data to be inserted into the knowledge base. - - - -Defines raw data to be inserted into the knowledge base. - - - -Defines the list of files to be inserted into the knowledge base. - - - -Defines the list of URLs to be crawled and their content inserted into the knowledge base. For example, `"urls": ["https://docs.mindsdb.com/mindsdb_sql/knowledge_bases/overview"]`. - - - -Defines the limit of pages to be crawled. For example, `"limit": 10`. - - - -Defines the crawl depth limit for URLs. For example, `"crawl_depth": 2`. - - - -Defines the list of domains to be filtered. For example, `"filters": { "allowed_domains": ["example.com"] }`. - - - -Learn more about the [web crawler here](/integrations/app-integrations/web-crawler). - - -### Response - -None. - - - -```shell Shell -curl -X PUT http://127.0.0.1:47334/api/projects/mindsdb/knowledge_bases/my_kb \ - -H "Content-Type: application/json" \ - -d '{ - "knowledge_base": { - "rows": [ - { - "order_id": "123", - "product": "Widget A", - "notes": "Great product, would buy again" - }, - { - "order_id": "124", - "product": "Widget B", - "notes": "Poor quality" - } - ], - "query": "SELECT * FROM sample_data.orders" - } -}' -``` - - - - - -```json Response -200 OK -``` - - diff --git a/docs/rest/knowledge_bases/list.mdx b/docs/rest/knowledge_bases/list.mdx deleted file mode 100644 index cc8a7392e1e..00000000000 --- a/docs/rest/knowledge_bases/list.mdx +++ /dev/null @@ -1,129 +0,0 @@ ---- -title: List Knowledge Bases -sidebarTitle: List Knowledge Bases ---- - -**GET `/api/projects/{project_name}/knowledge_bases`** - -This API endpoint lists all available knowledge bases using the `GET` method. - - -Learn more about knowledge bases following [this doc page](/mindsdb_sql/knowledge_bases/overview). - - -### Path Parameters - - -Defines the project where the knowledge bases are located. Note that the default project name is `mindsdb`. - - -### Body - -None. - -### Response - - -Unique identifier for the knowledge base. - - - -The name assigned to the knowledge base. - - - -The ID of the project where the knowledge base resides. - - - -The name of the project where the knowledge base resides. - - - -The vector store used for storing vector embeddings. - - - -The name of the collection or table within the vector database. - - - -Timestamp indicating when the knowledge base was last updated. - - - -Timestamp indicating when the knowledge base was created. - - - -Optional field for linking specific queries to this knowledge base. - - - -The embedding model used to convert content into vector representations. - - - -Optional model used to rerank search results based on relevance. - - - -Optional list of columns used for metadata-based filtering or enrichment. - - - -Optional list of columns treated as the main content for embedding and retrieval. - - - -The name of the column that uniquely identifies each content row. - - - -A nested object that contains additional configuration parameters. - - - -The name of the embedding model associated with this knowledge base at creation time. - - - -The default storage used for storing vector data. - - - - -```shell Shell -curl -X GET http://127.0.0.1:47334/api/projects/mindsdb/knowledge_bases -``` - - - - - -```json Response -[ - { - "id": 1, - "name": "my_kb", - "project_id": 1, - "vector_database": "my_kb_chromadb", - "vector_database_table": "default_collection", - "updated_at": "2025-06-25 13:04:01.864625", - "created_at": "2025-06-25 13:04:01.864624", - "query_id": null, - "embedding_model": null, - "reranking_model": null, - "metadata_columns": null, - "content_columns": null, - "id_column": null, - "params": { - "created_embedding_model": "kb_embedding_my_kb", - "default_vector_storage": "my_kb_chromadb" - }, - "project_name": "mindsdb" - } -] -``` - - diff --git a/docs/rest/knowledge_bases/query.mdx b/docs/rest/knowledge_bases/query.mdx deleted file mode 100644 index d5b4ba6b109..00000000000 --- a/docs/rest/knowledge_bases/query.mdx +++ /dev/null @@ -1,113 +0,0 @@ ---- -title: Query Knowledge Base -sidebarTitle: Query Knowledge Base ---- - -**POST `/api/sql/query`** - -This API endpoint queries a knowledge base using the `POST` method. Learn more about [querying knowledge bases using semantic search and metadata filtering here](/mindsdb_sql/knowledge_bases/query). - - -Learn more about knowledge bases following [this doc page](/mindsdb_sql/knowledge_bases/overview). - - -### Path Parameters - -None. - -### Body - - -A query that is sent to the MindsDB instance. - - -### Response - -Contains data stored in the knowledge base. - - - -```shell Shell -curl -X POST http://127.0.0.1:47334/api/sql/query \ - --header 'Content-Type: application/json' \ - --data '{ - "query": "SELECT * FROM my_kb;" - }' -``` - - - - - -```json Response -{ - "type": "table", - "column_names": [ - "id", - "chunk_id", - "chunk_content", - "metadata", - "relevance", - "distance" - ], - "data": [ - [ - "A1B", - "A1B:notes:1of1:0to20", - "Request color: black", - { - "chunk_index": 0, - "content_column": "notes", - "end_char": 20, - "original_doc_id": "A1B", - "original_row_index": "0", - "product": "Wireless Mouse", - "source": "TextChunkingPreprocessor", - "start_char": 0 - }, - null, - null - ], - [ - "3XZ", - "3XZ:notes:1of1:0to19", - "Gift wrap requested", - { - "chunk_index": 0, - "content_column": "notes", - "end_char": 19, - "original_doc_id": "3XZ", - "original_row_index": "1", - "product": "Bluetooth Speaker", - "source": "TextChunkingPreprocessor", - "start_char": 0 - }, - null, - null - ], - [ - "Q7P", - "Q7P:notes:1of1:0to22", - "Prefer aluminum finish", - { - "chunk_index": 0, - "content_column": "notes", - "end_char": 22, - "original_doc_id": "Q7P", - "original_row_index": "2", - "product": "Aluminum Laptop Stand", - "source": "TextChunkingPreprocessor", - "start_char": 0 - }, - null, - null - ] - ], - "context": { - "show_secrets": false, - "db": "mindsdb" - } -} -``` - - diff --git a/docs/rest/overview.mdx b/docs/rest/overview.mdx deleted file mode 100644 index cba646fff3a..00000000000 --- a/docs/rest/overview.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: REST API -sidebarTitle: Overview ---- - -MindsDB provides REST API endpoints, enabling incorporation of AI building blocks into applications. - -This section introduces REST API endpoints provided by MindsDB to bring data and AI together. - -Follow these steps to get started: - - - - Learn more about [usage here](/rest/usage). - - - Connect your data source to MindsDB via [this endpoint](/rest/databases/create-databases).

- Explore all available [data sources here](/integrations/data-overview). -
- - Create, train, and deploy AI/ML models within MindsDB via [this endpoint](/rest/models/train-model).

- Explore all available [AI engines here](/integrations/ai-overview). -
- - Query for predictions via [this endpoint](/rest/models/query-model). - -
diff --git a/docs/rest/projects/get-project.mdx b/docs/rest/projects/get-project.mdx deleted file mode 100644 index 6684f06eaf9..00000000000 --- a/docs/rest/projects/get-project.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Get a Project -openapi: "GET /api/projects/{projectName}" -sidebarTitle: Get a Project ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/projects/get-projects.mdx b/docs/rest/projects/get-projects.mdx deleted file mode 100644 index a612b6d324d..00000000000 --- a/docs/rest/projects/get-projects.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: List Projects -openapi: "GET /api/projects" -sidebarTitle: List Projects ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/sql.mdx b/docs/rest/sql.mdx deleted file mode 100644 index 4b8604931d0..00000000000 --- a/docs/rest/sql.mdx +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: Query -openapi: POST /api/sql/query -sidebarTitle: Query ---- - -## Description - -This API provides a REST endpoint for executing the SQL queries. Note: - -* This endpoint is a HTTP POST method. -* This endpoint accept data via `application/json` request body. -* The only required key is the `query` which has the SQL statement value. - -### Body - - - -String that contains the SQL query that needs to be executed. - - - - - -Format of the response. Available options: -- `null` (default) - returns all data in a single JSON response -- `"sse"` - returns data as Server-Sent Events stream -- `"jsonlines"` - returns data as JSON Lines stream (one JSON object per line) - -Use `"sse"` or `"jsonlines"` for streaming large result sets to avoid loading all data into memory at once. - - - - - -Optional context object, e.g., `{"db": "mindsdb"}` to specify the database. - - - - - -Optional parameters for parameterized queries, e.g., `{"name": "value"}`. - - - -### Response - - - -A list with the column names returned - - - - - -The database where the query is executed - - - - - -The actual data returned by the query in case of the table response type - - - - -The type of the response table | error | ok - - - - - -```shell Shell -curl --request POST \ - --url https://cloud.mindsdb.com/api/sql/query \ - --header 'Content-Type: application/json' \ - --data ' -{ - "query": "SELECT * FROM example_db.demo_data.home_rentals LIMIT 10;" -} -' -``` - -```shell Shell (Streaming with SSE) -curl --request POST \ - --url https://cloud.mindsdb.com/api/sql/query \ - --header 'Content-Type: application/json' \ - --data ' -{ - "query": "SELECT * FROM example_db.demo_data.home_rentals;", - "response_format": "sse" -} -' -``` - -```shell Shell (Streaming with JSON Lines) -curl --request POST \ - --url https://cloud.mindsdb.com/api/sql/query \ - --header 'Content-Type: application/json' \ - --data ' -{ - "query": "SELECT * FROM example_db.demo_data.home_rentals;", - "response_format": "jsonlines" -} -' -``` - -```python Python -import requests -url = 'https://cloud.mindsdb.com/api/sql/query' -resp = requests.post(url, json={'query': - 'SELECT * FROM example_db.demo_data.home_rentals LIMIT 10;'}) -``` - - - - - -```json Response (Default) -{ - "column_names": [ - "sqft", - "rental_price" - ], - "context": { - "db": "mindsdb" - }, - "data": [ - [ - 917, - 3901 - ], - [ - 194, - 2042 - ] - ], - "type": "table" -} -``` - -```text Response (SSE format) -data: {"type": "table", "column_names": ["sqft", "rental_price"], "context": {"db": "mindsdb"}} - -data: [[917, 3901], [194, 2042]] - -data: [[543, 1871], [289, 1563]] - -``` - -```text Response (JSON Lines format) -{"type": "table", "column_names": ["sqft", "rental_price"], "context": {"db": "mindsdb"}} -[[917, 3901], [194, 2042]] -[[543, 1871], [289, 1563]] -``` - - diff --git a/docs/rest/tables/create-table.mdx b/docs/rest/tables/create-table.mdx deleted file mode 100644 index 5dd4e5cf158..00000000000 --- a/docs/rest/tables/create-table.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Create a Table -openapi: "POST /api/databases/{databaseName}/tables" -sidebarTitle: Create a Table ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/tables/delete-table.mdx b/docs/rest/tables/delete-table.mdx deleted file mode 100644 index 6939a027e8b..00000000000 --- a/docs/rest/tables/delete-table.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Remove a Table -openapi: "DELETE /api/databases/{databaseName}/tables/{tableName}" -sidebarTitle: Remove a Table ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/tables/list-table.mdx b/docs/rest/tables/list-table.mdx deleted file mode 100644 index a50ffb385b9..00000000000 --- a/docs/rest/tables/list-table.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Get a Table -openapi: "GET /api/databases/{databaseName}/tables/{tableName}" -sidebarTitle: Get a Table ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/tables/list-tables.mdx b/docs/rest/tables/list-tables.mdx deleted file mode 100644 index 99b63c639d2..00000000000 --- a/docs/rest/tables/list-tables.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: List Tables -openapi: "GET /api/databases/{databaseName}/tables" -sidebarTitle: List Tables ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/usage.mdx b/docs/rest/usage.mdx deleted file mode 100644 index 7cc178ffc96..00000000000 --- a/docs/rest/usage.mdx +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Usage -sidebarTitle: Usage ---- - -Here is how to connect and use REST API to MindsDB. - -## Local MindsDB - -This example shows how to execute SQL statements, either raw or parametrized, on MindsDB via REST APIs. - -``` -import requests - -# connect -url = 'http://127.0.0.1:47334/api/sql/query' - -# query -resp = requests.post(url, json={ - "query": "select * from my_datasource.my_table where name = :name and age = :age", - "params": {"name": "acme", "age": 1}, - }) - -# response -print(resp.text) # alternative: print(resp.json()) -``` - -Note that you can either send a raw SQL and omit the `params` parameter, or send a parametrized SQL in the `query` parameter and provide the `params` parameter that defines the values. diff --git a/docs/rest/views/create-view.mdx b/docs/rest/views/create-view.mdx deleted file mode 100644 index da1aed6d02d..00000000000 --- a/docs/rest/views/create-view.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Create a View -openapi: "POST /api/projects/{projectName}/views" -sidebarTitle: Create a View ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/views/delete-views.mdx b/docs/rest/views/delete-views.mdx deleted file mode 100644 index 9ba7137714f..00000000000 --- a/docs/rest/views/delete-views.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Remove a View -openapi: "DELETE /api/projects/{projectName}/views/{viewName}" -sidebarTitle: Remove a View ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/views/list-view.mdx b/docs/rest/views/list-view.mdx deleted file mode 100644 index 84fee1c8cbc..00000000000 --- a/docs/rest/views/list-view.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Get a View -openapi: "GET /api/projects/{projectName}/views/{viewName}" -sidebarTitle: Get a View ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/views/list-views.mdx b/docs/rest/views/list-views.mdx deleted file mode 100644 index a7024cb15c1..00000000000 --- a/docs/rest/views/list-views.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: List Views -openapi: "GET /api/projects/{projectName}/views" -sidebarTitle: List Views ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/rest/views/update-view.mdx b/docs/rest/views/update-view.mdx deleted file mode 100644 index 4143290cd34..00000000000 --- a/docs/rest/views/update-view.mdx +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Update a View -openapi: "PUT /api/projects/{projectName}/views/{viewName}" -sidebarTitle: Update a View ---- - - - -The REST API endpoints can be used with MindsDB running locally at http://127.0.0.1:47334/api. - - \ No newline at end of file diff --git a/docs/sdks/javascript/agents.mdx b/docs/sdks/javascript/agents.mdx deleted file mode 100644 index 9a6ce4e7c1f..00000000000 --- a/docs/sdks/javascript/agents.mdx +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: How to Use Agents -sidebarTitle: AI Agents ---- - -Currently, there is no JavaScript syntax for using Agents. To use Agents from JavaScript SDK, refer to the [Agents documentation in SQL](/mindsdb_sql/agents/agent) and execute SQL queries as below. - -``` -const query = ` - CREATE AGENT my_agent - USING - model = { - "provider": "openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123" - }, - data = { - "knowledge_bases": ["mindsdb.sales_kb", "mindsdb.orders_kb"], - "tables": ["postgres_conn.customers", "mysql_conn.products"] - }, - prompt_template=' - mindsdb.sales_kb stores sales analytics data - mindsdb.orders_kb stores order data - postgres_conn.customers stores customers data - mysql_conn.products stores products data - '; - `; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/connect.mdx b/docs/sdks/javascript/connect.mdx deleted file mode 100644 index 03365f90e8a..00000000000 --- a/docs/sdks/javascript/connect.mdx +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: Connect -sidebarTitle: Connect ---- - -Before performing any operations, you must connect to MindsDB. By default, all operations will go through [MindsDB Cloud REST APIs](/rest/sql), but you can use a self-hosted version of MindsDB as well. - - - - Here is how to connect to your local MindsDB server: - - ``` - import MindsDB from 'mindsdb-js-sdk'; - // const MindsDB = require("mindsdb-js-sdk").default; // alternative for CommonJS syntax - - try { - - // No authentication needed for self-hosting - await MindsDB.connect({ // alternative for ES6 module syntax: await MindsDB.default.connect({ - host: 'http://127.0.0.1:47334' - }); - console.log('connected'); - - } catch(error) { - // Failed to connect to local instance - console.log(error); - } - ``` - - - Here is how to connect using your own Axios instance (see [details on the default instance](https://github.com/mindsdb/mindsdb-js-sdk/blob/main/src/util/http.ts)): - - ``` - import MindsDB from 'mindsdb-js-sdk'; - // const MindsDB = require("mindsdb-js-sdk").default; // alternative for CommonJS syntax - - import axios from 'axios'; - - // Use 'host' option in MindsDB.connect to specify base URL override - const customAxios = axios.create({ - timeout: 1000, - }); - - try { - - await MindsDB.connect({ - user: mindsdbuser@gmail.com, - password: mypassword, - httpClient: customAxios - }); - console.log('connected'); - - } catch(error) { - // Failed to authenticate - console.log(error); - } - ``` - - - - -Please note that all methods that use `await` must be wrapped in an `async` function, like this: - -``` -(async() => { - - try { - - // No authentication needed for self-hosting - await MindsDB.connect({ - host: 'http://127.0.0.1:47334' - }); - console.log('connected'); - - } catch(error) { - // Failed to connect to local instance - console.log(error); - } - -})(); -``` - diff --git a/docs/sdks/javascript/create_database.mdx b/docs/sdks/javascript/create_database.mdx deleted file mode 100644 index e7410585050..00000000000 --- a/docs/sdks/javascript/create_database.mdx +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Connect a Data Source -sidebarTitle: Connect a Data Source ---- - -## Description - -The `MindsDB.Databases.createDatabase` function connects a new data source to MindsDB. - -## Syntax - -Here is how to connect our sample MySQL database: - -``` -const connectionParams = { - 'user': 'user', - 'port': 3306, - 'password': 'MindsDBUser123!', - 'host': 'samples.mindsdb.com', - 'database': 'public' -} - -try { - - const mysqlDatabase = await MindsDB.Databases.createDatabase( - 'mysql_datasource', - 'mysql', - connectionParams); - console.log('connected a database'); - -} catch (error) { - // Couldn't connect to database - console.log(error); -} -``` - -First, we define the connection parameters and then use the `createDatabase` function to connect a database. diff --git a/docs/sdks/javascript/create_table.mdx b/docs/sdks/javascript/create_table.mdx deleted file mode 100644 index f3e4e15e7a6..00000000000 --- a/docs/sdks/javascript/create_table.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Create a Table -sidebarTitle: Create a Table ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `CREATE TABLE integration_name.table_name (SELECT * FROM data);`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/create_view.mdx b/docs/sdks/javascript/create_view.mdx deleted file mode 100644 index b6c45ca9ffd..00000000000 --- a/docs/sdks/javascript/create_view.mdx +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Create a View -sidebarTitle: Create a View ---- - -## Description - -The `createView()` function creates a view in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const viewSelect = `SELECT t.sqft, t.location, m.rental_price - FROM mysql_demo_db.home_rentals as t - JOIN mindsdb.home_rentals_model as m`; - -const predictionsView = await MindsDB.Views.createView( - 'view_name', - 'project_name', - viewSelect); -``` diff --git a/docs/sdks/javascript/delete_from.mdx b/docs/sdks/javascript/delete_from.mdx deleted file mode 100644 index bec3cd894e0..00000000000 --- a/docs/sdks/javascript/delete_from.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Delete From a Table -sidebarTitle: Delete From a Table ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `DELETE FROM datasource_name.table_name WHERE …`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/delete_table.mdx b/docs/sdks/javascript/delete_table.mdx deleted file mode 100644 index f1cefd3e6ae..00000000000 --- a/docs/sdks/javascript/delete_table.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Remove a Table -sidebarTitle: Remove a Table ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `DROP TABLE integration_name.table_name;`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/drop_database.mdx b/docs/sdks/javascript/drop_database.mdx deleted file mode 100644 index 926ad05cbe6..00000000000 --- a/docs/sdks/javascript/drop_database.mdx +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Remove a Data Source -sidebarTitle: Remove a Data Source ---- - -## Description - -The `delete` function removes a data source from MindsDB. Please note that in order to delete a connected data source, we need to fetch it first with the `getDatabase` function. - -## Syntax - -Here is how to get an existing database and remove it: - -``` -try { - - const db = await MindsDB.Databases.getDatabase('mysql_datasource'); - console.log('got a database') - - // Deleting a database - if (db) { - try { - - await db.delete(); - console.log('deleted a database'); - - } catch (error) { - // Couldn't delete a database - console.log(error); - } - } - -} catch (error) { - // Couldn't connect to database - console.log(error); - } -``` diff --git a/docs/sdks/javascript/drop_view.mdx b/docs/sdks/javascript/drop_view.mdx deleted file mode 100644 index 5bd46b8f925..00000000000 --- a/docs/sdks/javascript/drop_view.mdx +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Remove a View -sidebarTitle: Remove a View ---- - -## Description - -The `deleteView()` function deletes an existing view from MindsDB. - -## Syntax - -Here is the syntax: - -``` -await MindsDB.Views.deleteView( - 'view_name', - 'project_name'); -``` diff --git a/docs/sdks/javascript/get_database.mdx b/docs/sdks/javascript/get_database.mdx deleted file mode 100644 index 0032cff3578..00000000000 --- a/docs/sdks/javascript/get_database.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: Get a Data Source -sidebarTitle: Get a Data Source ---- - -You can save a data sources into a variable using the code below. - -``` -const db = await MindsDB.Databases.getDatabase('mysql_datasource'); -``` diff --git a/docs/sdks/javascript/insert_into_table.mdx b/docs/sdks/javascript/insert_into_table.mdx deleted file mode 100644 index 56b1b4cabf0..00000000000 --- a/docs/sdks/javascript/insert_into_table.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Insert Into a Table -sidebarTitle: Insert Into a Table ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `INSERT INTO integration_name.table_name (SELECT ...)`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/installation.mdx b/docs/sdks/javascript/installation.mdx deleted file mode 100644 index da16dfc8a5f..00000000000 --- a/docs/sdks/javascript/installation.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Installation -sidebarTitle: Installation ---- - -The MindsDB JavaScript SDK allows you to unlock the power of machine learning right inside your web applications. Read along to see how to install the MindsDB's JavaScript SDK. - -## How to Install - -To install the MindsDB JavaScript SDK, run the below command: - -```bash -npm install --save mindsdb-js-sdk -``` - -Here is the expected output: - -

- -

diff --git a/docs/sdks/javascript/join_on.mdx b/docs/sdks/javascript/join_on.mdx deleted file mode 100644 index 61fb924c446..00000000000 --- a/docs/sdks/javascript/join_on.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Join Tables On -sidebarTitle: Join Tables On ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `SELECT * FROM table_name t JOIN another_table a ON t…=a…`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/list_data_handlers.mdx b/docs/sdks/javascript/list_data_handlers.mdx deleted file mode 100644 index 068332e9985..00000000000 --- a/docs/sdks/javascript/list_data_handlers.mdx +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: List Data Handlers -sidebarTitle: List Data Handlers ---- - -Here is how you can fetch all available data handlers directly from JavaScript code: - -``` -const query = 'SHOW HANDLERS WHERE type = \‘data\'’; -result = await MindsDB.SQL.runQuery(query); -console.log(result); -``` diff --git a/docs/sdks/javascript/list_databases.mdx b/docs/sdks/javascript/list_databases.mdx deleted file mode 100644 index 3aaf7178084..00000000000 --- a/docs/sdks/javascript/list_databases.mdx +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: List Data Sources -sidebarTitle: List Data Sources ---- - -You can list all data sources using the code below. - -``` -const query = 'SHOW FULL DATABASES WHERE type = \'data\''; -result = await MindsDB.SQL.runQuery(query); // alternative for ES6 module syntax: MindsDB.default.SQL.runQuery(query) -console.log(result); -``` diff --git a/docs/sdks/javascript/list_projects.mdx b/docs/sdks/javascript/list_projects.mdx deleted file mode 100644 index d22f7f7d468..00000000000 --- a/docs/sdks/javascript/list_projects.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: List Projects -sidebarTitle: List Projects ---- - -## Description - -The `getAllProjects()` function lists all available projects. - -## Syntax - -Here is how to list all available projects: - -``` -const allProjects = await MindsDB.Projects.getAllProjects(); -console.log('all projects:') -allProjects.forEach(p => { - console.log(p.name); -}); -``` diff --git a/docs/sdks/javascript/list_views.mdx b/docs/sdks/javascript/list_views.mdx deleted file mode 100644 index 6181463908f..00000000000 --- a/docs/sdks/javascript/list_views.mdx +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: List Views -sidebarTitle: List Views ---- - -## Description - -The getAllViews() function lists all available views. - -​ -## Syntax - -Here is how to list all available views: - -```sql -const allViews = await MindsDB.Views.getAllViews(); -console.log('all views:') -allViews.forEach(v => { - console.log(v.name); -}); -``` diff --git a/docs/sdks/javascript/native_queries.mdx b/docs/sdks/javascript/native_queries.mdx deleted file mode 100644 index a3926ff7bd9..00000000000 --- a/docs/sdks/javascript/native_queries.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Native Queries -sidebarTitle: Native Queries ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. And the native queries syntax ensures that the query is executed directly on the connected data source. - -## Syntax - -Here is the syntax: - -``` -const query = `SELECT * FROM datasource_name ()`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/overview.mdx b/docs/sdks/javascript/overview.mdx deleted file mode 100644 index b6828172f96..00000000000 --- a/docs/sdks/javascript/overview.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Overview -sidebarTitle: Overview ---- - -MindsDB provides JavaScript SDK, enabling its integration into JavaScript environments. - -Follow these steps to get started: - - - - For JavaScript, [install the package](/sdks/javascript/installation). - - - Connect a data source in [JavaScript](/sdks/javascript/create_database).

- Explore all available [data sources here](/integrations/data-overview). -
- - Configure an AI engine in [JavaScript](/sdks/javascript/create_ml_engine).

- Explore all available [AI engines here](/integrations/ai-overview). -
- - Create and deploy an AI/ML model in [JavaScript](/sdks/javascript/create_model). - - - Query for predictions in [JavaScript](/sdks/javascript/batchQuery). - - - Automate tasks by scheduling jobs in [JavaScript](/sdks/javascript/create_job). - -
diff --git a/docs/sdks/javascript/query_files.mdx b/docs/sdks/javascript/query_files.mdx deleted file mode 100644 index 59a31bf6f24..00000000000 --- a/docs/sdks/javascript/query_files.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Query a File -sidebarTitle: Query a File ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `SELECT * FROM files.file_name`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/query_table.mdx b/docs/sdks/javascript/query_table.mdx deleted file mode 100644 index 6ae56d39300..00000000000 --- a/docs/sdks/javascript/query_table.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Query a Table -sidebarTitle: Query a Table ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `SELECT * FROM table_name`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/query_view.mdx b/docs/sdks/javascript/query_view.mdx deleted file mode 100644 index cc0bee73f57..00000000000 --- a/docs/sdks/javascript/query_view.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Query a View -sidebarTitle: Query a View ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `SELECT * FROM project_name.view_name`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/javascript/update_table.mdx b/docs/sdks/javascript/update_table.mdx deleted file mode 100644 index e1ba578c975..00000000000 --- a/docs/sdks/javascript/update_table.mdx +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Update a Table -sidebarTitle: Update a Table ---- - -## Description - -The `runQuery()` function executes a query given as its argument directly in MindsDB. - -## Syntax - -Here is the syntax: - -``` -const query = `UPDATE integration_name.table_name - SET column_name = new_value - WHERE column_name = old_value`; -const queryResult = await MindsDB.SQL.runQuery(query); -``` diff --git a/docs/sdks/python/agents.mdx b/docs/sdks/python/agents.mdx deleted file mode 100644 index b6c170b90fc..00000000000 --- a/docs/sdks/python/agents.mdx +++ /dev/null @@ -1,357 +0,0 @@ ---- -title: How to Use Agents -sidebarTitle: AI Agents ---- - -Agents enable conversation with data, including structured and unstructured data connected to MindsDB. - - -Learn more about [MindsDB Agents here](/mindsdb_sql/agents/agent). - - -## Create Agents - -Here is the syntax for creating an agent: - -```python -agent = server.agents.create( - 'my_agent', - model={ - 'model_name': 'gpt-4o', - 'provider': 'openai', - 'api_key': 'sk-abc123', - 'base_url': 'http://example.com', - 'api_version': '2024-02-01' - }, - data={ - 'knowledge_bases': ['project_name.kb_name', ...] - 'tables': ['datasource_conn_name.table_name', ...] - }, - prompt_template='describe data' -) -``` - -It creates an agent that uses the defined model and has access to the connected data. Here is how to list all available agents. - -```python -agents = server.agents.list() -print(agents) -``` - -The following sections explain all the agent parameters. - -### `model` - -This parameter defines the underlying language model, including: - -* `provider` -It is a required parameter. It defines the model provider from the list below. - -* `model_name` -It is a required parameter. It defines the model name from the list below. - -* `api_key` -It is an optional parameter (applicable to selected providers), which stores the API key to access the model. Users can provide it either in this `api_key` parameter, or using [environment variables](/mindsdb_sql/functions/from_env). - -* `base_url` -It is an optional parameter (applicable to selected providers), which stores the base URL for accessing the model. It is the root URL used to send API requests. - -* `api_version` -It is an optional parameter (applicable to selected providers), which defines the API version. - -The available models and providers include the following. - - - - -Available models: -- claude-3-opus-20240229 -- claude-3-sonnet-20240229 -- claude-3-haiku-20240307 -- claude-2.1 -- claude-2.0 -- claude-instant-1.2 - - - -Available models: -- gemini-2.5-pro-preview-03-25 -- gemini-2.0-flash -- gemini-2.0-flash-lite -- gemini-1.5-flash -- gemini-1.5-flash-8b -- gemini-1.5-pro - - - -Available models: -- gemma -- llama2 -- mistral -- mixtral -- llava -- neural-chat -- codellama -- dolphin-mixtral -- qwen -- llama2-uncensored -- mistral-openorca -- deepseek-coder -- nous-hermes2 -- phi -- orca-mini -- dolphin-mistral -- wizard-vicuna-uncensored -- vicuna -- tinydolphin -- llama2-chinese -- openhermes -- zephyr -- nomic-embed-text -- tinyllama -- openchat -- wizardcoder -- phind-codellama -- starcoder -- yi -- orca2 -- falcon -- starcoder2 -- wizard-math -- dolphin-phi -- nous-hermes -- starling-lm -- stable-code -- medllama2 -- bakllava -- codeup -- wizardlm-uncensored -- solar -- everythinglm -- sqlcoder -- nous-hermes2-mixtral -- stable-beluga -- yarn-mistral -- samantha-mistral -- stablelm2 -- meditron -- stablelm-zephyr -- magicoder -- yarn-llama2 -- wizard-vicuna -- llama-pro -- deepseek-llm -- codebooga -- mistrallite -- dolphincoder -- nexusraven -- open-orca-platypus2 -- all-minilm -- goliath -- notux -- alfred -- megadolphin -- xwinlm -- wizardlm -- duckdb-nsql -- notus - - - -Available models: -- gpt-3.5-turbo -- gpt-3.5-turbo-16k -- gpt-3.5-turbo-instruct -- gpt-4 -- gpt-4-32k -- gpt-4-1106-preview -- gpt-4-0125-preview -- gpt-4.1 -- gpt-4.1-mini -- gpt-4o -- o4-mini -- o3-mini -- o1-mini - - - -Available models: -- microsoft/phi-3-mini-4k-instruct -- mistralai/mistral-7b-instruct-v0.2 -- writer/palmyra-med-70b -- mistralai/mistral-large -- mistralai/codestral-22b-instruct-v0.1 -- nvidia/llama3-chatqa-1.5-70b -- upstage/solar-10.7b-instruct -- google/gemma-2-9b-it -- adept/fuyu-8b -- google/gemma-2b -- databricks/dbrx-instruct -- meta/llama-3_1-8b-instruct -- microsoft/phi-3-medium-128k-instruct -- 01-ai/yi-large -- nvidia/neva-22b -- meta/llama-3_1-70b-instruct -- google/codegemma-7b -- google/recurrentgemma-2b -- google/gemma-2-27b-it -- deepseek-ai/deepseek-coder-6.7b-instruct -- mediatek/breeze-7b-instruct -- microsoft/kosmos-2 -- microsoft/phi-3-mini-128k-instruct -- nvidia/llama3-chatqa-1.5-8b -- writer/palmyra-med-70b-32k -- google/deplot -- meta/llama-3_1-405b-instruct -- aisingapore/sea-lion-7b-instruct -- liuhaotian/llava-v1.6-mistral-7b -- microsoft/phi-3-small-8k-instruct -- meta/codellama-70b -- liuhaotian/llava-v1.6-34b -- nv-mistralai/mistral-nemo-12b-instruct -- microsoft/phi-3-medium-4k-instruct -- seallms/seallm-7b-v2.5 -- mistralai/mixtral-8x7b-instruct-v0.1 -- mistralai/mistral-7b-instruct-v0.3 -- google/paligemma -- google/gemma-7b -- mistralai/mixtral-8x22b-instruct-v0.1 -- google/codegemma-1.1-7b -- nvidia/nemotron-4-340b-instruct -- meta/llama3-70b-instruct -- microsoft/phi-3-small-128k-instruct -- ibm/granite-8b-code-instruct -- meta/llama3-8b-instruct -- snowflake/arctic -- microsoft/phi-3-vision-128k-instruct -- meta/llama2-70b -- ibm/granite-34b-code-instruct - - - -Available models: -- palmyra-x5 -- palmyra-x4 - - - - -Users can define the model for the agent choosing one of the following options. - -**Option 1.** Use the `model` parameter to define the specification. - -```python - ... - model={ - 'model_name': 'gpt-4o', - 'provider': 'openai', - 'api_key': 'sk-abc123', - 'base_url': 'http://example.com', - 'api_version': '2024-02-01' - }, - ... -``` - -**Option 2.** Define the default model in the [MindsDB configuration file](/setup/custom-config). - -If you define `default_llm` in the configuration file, you do not need to provide the `model` parameter when creating an agent. If provide both, then the values from the `model` parameter are used. - - -You can define the default models in the Settings of the MindsDB Editor GUI. - - -```bash -"default_llm": { - - "provider": "openai", - "model_name" : "got-4o", - "api_key": "sk-abc123", - "base_url": "https://example.com/", - "api_version": "2024-02-01" - -} -``` - -### `data` - -This parameter stores data connected to the agent, including knowledge bases and data sources connected to MindsDB. - -The following parameters store the list of connected data. - -* `knowledge_bases` stores the list of [knowledge bases](/mindsdb_sql/knowledge_bases/overview) to be used by the agent. - -* `tables` stores the list of tables from data sources connected to MindsDB. - - -Note that you can insert all tables from a connected data source and all knowledge bases from a project using the `*` syntax. - -```python - ... - data={ - 'knowledge_bases': ['project_name.*', ...] - 'tables': ['datasource_conn_name.*', ...] - }, - ... -``` - - -### `prompt_template` - -This parameter stores instructions for the agent. - -It is recommended to provide data description of the data sources listed in the `knowledge_bases` and `tables` parameters to help the agent locate relevant data for answering questions. - -### `timeout` - -This parameter defines the time the agent can take to come back with an answer. - -For example, when the `timeout` parameter is set to 10, the agent has 10 seconds to return an answer. If the agent takes longer than 10 seconds, it aborts the process and comes back with an answer indicating its failure to return an answer within the defined time interval. - -### `mode` - -This parameter defines the agent's response style, allowing users to partially control the output format. Supported values include `text` and `sql`. - -When set, the agent will tailor its responses to match the specified format. Note that the agent may still adapt its output when necessary to ensure clarity or correctness. - -## Get Agents - -You can get an existing agent with the `get()` method. - -```python -agent = server.agents.get('sales_agent') -``` - -## Query Agents - -Query an agent to generate responses to questions. - -```python -completion = agent.completion([{'question': 'What is the average number of orders per customers?', 'answer': None}]) -print(completion.content) -``` - -Here is how to query agents with enabled streaming, allowing users to view agent's thoughts when it is working on answering questions. - -```python -completion = agent.completion_stream([{'question': 'What is the average number of orders per customers?', 'answer': None}]) -for chunk in completion: - print(chunk) -``` - -## Update Agents - -Update existing agents with new data, model, or prompt. - -```python -agent.data['tables'].append('mysql_demo_db.car_sales') -updated_agent = server.agents.update('my_agent', agent) -print(updated_agent) -``` - -## Delete Agents - -Here is the syntax for deleting an agent: - -```python -server.agents.drop('my_agent') -``` diff --git a/docs/sdks/python/connect.mdx b/docs/sdks/python/connect.mdx deleted file mode 100644 index f28469eafa2..00000000000 --- a/docs/sdks/python/connect.mdx +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Connect -sidebarTitle: Connect ---- - -This documentation describes how you can connect to your MindsDB server from Python code. - -Here is how to connect to your local MindsDB server: - -``` -import mindsdb_sdk - -# connects to the default port (47334) on localhost -server = mindsdb_sdk.connect() - -# connects to the specified host and port -server = mindsdb_sdk.connect('http://127.0.0.1:47334') -``` diff --git a/docs/sdks/python/create_database.mdx b/docs/sdks/python/create_database.mdx deleted file mode 100644 index de39390b373..00000000000 --- a/docs/sdks/python/create_database.mdx +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Connect a Data Source -sidebarTitle: Connect a Data Source ---- - -## Description - -The `get_database()` and `create_database()` functions enable you to use the existing data source or connect a new one. - -## Syntax - -You can use the `get_database()` method to get an existing database: - -```python -mysql_demo_db = server.get_database('mysql_demo_db') -``` - -Or, the `create_database()` method to connect a new data source to MindsDB: - -```python -mysql_demo_db = server.create_database( - engine = "mysql", - name = "mysql_demo_db", - connection_args = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" - } -) -``` diff --git a/docs/sdks/python/create_job.mdx b/docs/sdks/python/create_job.mdx deleted file mode 100644 index 174735beb6c..00000000000 --- a/docs/sdks/python/create_job.mdx +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Create a Job -sidebarTitle: Create a Job ---- - -## Description - -The `get_job()` and `create_job()` functions let you save either an existing job or a newly created job into a variable. - -## Syntax - -Use the `get_job()` method to get an existing job: - -```python -my_job = project.get_job('my_job') -``` - -Or, the `create_job()` method to create a job: - -```python -my_job = project.create_job( - 'job_name', - 'select * from models', - repeat_str = '1 hour' -) -``` - -Alternatively, you can create a job using this syntax: - -```python -with project.jobs.create(name='job_name', repeat_min=1) as job: - job.add_query(model.retrain()) - job.add_query(model.predict(database.tables.tbl1)) - job.add_query(kb.insert(database.tables.tbl1)) - job.add_query('show models') -``` - -Where: - -- `name='job_name'` is the job name, -- `repeat_min=1` indicates periodicity of the job in minutes, -- `job.add_query(model.retrain())` adds a task to a job to retrain a model, -- `job.add_query(model.predict(database.tables.tbl1))` adds a task to a job to make predictions, -- `job.add_query(kb.insert(database.tables.tbl1))` adds a task to a job to insert data into a knowledge base, -- `job.add_query('show models')` adds a task to a job to run the statement provided as string value. - -Note that the `add_query()` method adds tasks to a job and takes either String or Query as an argument. - -Note that this method enables a job to manipulate Knowledge Bases, Models, Tables, Views, and Queries, but not Databases, Handlers, Jobs, ML Engines, or Projects. diff --git a/docs/sdks/python/create_project.mdx b/docs/sdks/python/create_project.mdx deleted file mode 100644 index 0526a553aac..00000000000 --- a/docs/sdks/python/create_project.mdx +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Create a Project -sidebarTitle: Create a Project ---- - -## Description - -The `get_project()` and `create_project()` functions fetch an existing project or create a new one. - -## Syntax - -Use the `get_project()` method to get the default `mindsdb` project: - -```python -project = server.get_project() -``` - -Use the `get_project()` method to get other project: - -```python -project = server.get_project('project_name') -``` - -Use the `create_project()` method to create a new project: - -```python -project = server.create_project('project_name') -``` diff --git a/docs/sdks/python/create_table.mdx b/docs/sdks/python/create_table.mdx deleted file mode 100644 index d18e6251d67..00000000000 --- a/docs/sdks/python/create_table.mdx +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: Create a Table -sidebarTitle: Create a Table ---- - -## Description - -The `get_table()` and `create_table()` functions let you save either an existing table or a newly created table into a variable. - -## Syntax - -Use the `get_table()` method to fetch a table from the `mysql_demo_db` database: - -```python -my_table = mysql_demo_db.get_table('my_table') -``` - -Or, the `create_table()` method to create a new table: - -```python -# option 1 -my_table = mysql_demo_db.create_table('my_table', 'SELECT * FROM some_table WHERE key=value') - -# option 2 -my_table = mysql_demo_db.create_table('my_table', base_table) - -# option 3 -my_table = mysql_demo_db.create_table('my_table', base_table.filter(key='value')) -``` diff --git a/docs/sdks/python/create_view.mdx b/docs/sdks/python/create_view.mdx deleted file mode 100644 index 5d22b8caf29..00000000000 --- a/docs/sdks/python/create_view.mdx +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: Create a View -sidebarTitle: Create a View ---- - -## Description - -The `get_view()` and `create_view()` functions let you save either an existing view or a newly created view into a variable. - -## Syntax - -Use the `get_view()` method to get an existing view: - -```python -my_view = project.get_view('my_view') -``` - -Or, the `create_view()` method to create a view: - -```python -my_view = project.create_view( - 'view_name', - mysql_demo_db.query('SELECT * FROM my_table LIMIT 100') -) -``` diff --git a/docs/sdks/python/delete_file.mdx b/docs/sdks/python/delete_file.mdx deleted file mode 100644 index 0c0737bf1d1..00000000000 --- a/docs/sdks/python/delete_file.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Remove a File -sidebarTitle: Remove a File ---- - -## Description - -In MindsDB, files are treated as tables. These are stored in the default `files` database. To delete a file, you must save this `files` database into a variable and then, run the `tables.drop()` function on it. - -## Syntax - -Here is the syntax: - -```sql -files = server.get_database('files') -files.tables.drop('file_name') -``` diff --git a/docs/sdks/python/delete_from.mdx b/docs/sdks/python/delete_from.mdx deleted file mode 100644 index 9f173653c86..00000000000 --- a/docs/sdks/python/delete_from.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Delete From a Table -sidebarTitle: Delete From a Table ---- - -## Description - -The `delete()` function is executed on a table from a data source connected to MindsDB. It deletes rows from a table. - -## Syntax - -Here is the syntax: - -```sql -data_source.tables.table_name.delete(key=values, ...) -``` diff --git a/docs/sdks/python/delete_table.mdx b/docs/sdks/python/delete_table.mdx deleted file mode 100644 index fe1bf914b8b..00000000000 --- a/docs/sdks/python/delete_table.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Remove a Table -sidebarTitle: Remove a Table ---- - -## Description - -The `tables.drop()` method enables you to delete a table from a connected data source. - -## Syntax - -Here is the syntax: - -```sql -data_source.tables.drop('table_name') -``` diff --git a/docs/sdks/python/drop_database.mdx b/docs/sdks/python/drop_database.mdx deleted file mode 100644 index 2ddf3832771..00000000000 --- a/docs/sdks/python/drop_database.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Remove a Data Source -sidebarTitle: Remove a Data Source ---- - -## Description - -The `drop_database()` function enables you to remove a defined data source connection from MindsDB. - -## Syntax - -Use the `drop_database()` method to remove a database: - -```python -server.drop_database('mysql_demo_db') -``` diff --git a/docs/sdks/python/drop_job.mdx b/docs/sdks/python/drop_job.mdx deleted file mode 100644 index a0f15a360fe..00000000000 --- a/docs/sdks/python/drop_job.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Remove a Job -sidebarTitle: Remove a Job ---- - -## Description - -The `drop_job()` function deletes a job from MindsDB. - -## Syntax - -Use the `drop_job()` method to remove a job: - -```python -project.drop_job('job_name') -``` diff --git a/docs/sdks/python/drop_project.mdx b/docs/sdks/python/drop_project.mdx deleted file mode 100644 index 3c0d2a3191d..00000000000 --- a/docs/sdks/python/drop_project.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Remove a Project -sidebarTitle: Remove a Project ---- - -## Description - -The `drop_project()` function removed a project from MindsDB. - -## Syntax - -Use the `drop_project()` method to remove a project: - -```python -server.drop_project('project_name') -``` diff --git a/docs/sdks/python/drop_view.mdx b/docs/sdks/python/drop_view.mdx deleted file mode 100644 index 6556a9532af..00000000000 --- a/docs/sdks/python/drop_view.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Remove a View -sidebarTitle: Remove a View ---- - -## Description - -The `drop_view()` function removes a view from MindsDB. - -## Syntax - -Use the `drop_view()` method to remove a view: - -```python -project.drop_view('view_name') -``` diff --git a/docs/sdks/python/get_history.mdx b/docs/sdks/python/get_history.mdx deleted file mode 100644 index 878909e6d22..00000000000 --- a/docs/sdks/python/get_history.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Get Job History -sidebarTitle: Get Job History ---- - -## Description - -The `get_history()` function lets you access the job history information where you can find a job record for each job execution, including execution errors. - -## Syntax - -Use the `get_history()` method to get history of job execution: - -```python -my_job.get_history() -``` diff --git a/docs/sdks/python/insert_into_table.mdx b/docs/sdks/python/insert_into_table.mdx deleted file mode 100644 index 6729f5470bb..00000000000 --- a/docs/sdks/python/insert_into_table.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Insert Into a Table -sidebarTitle: Insert Into a Table ---- - -## Description - -The `insert()` function is executed on a table from a data source connected to MindsDB. It inserts data into a table. - -## Syntax - -Here is the syntax: - -```sql -my_table.insert(table_to_be_inserted) -``` diff --git a/docs/sdks/python/installation.mdx b/docs/sdks/python/installation.mdx deleted file mode 100644 index 9c87440e445..00000000000 --- a/docs/sdks/python/installation.mdx +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Installation -sidebarTitle: Installation ---- - -Python SDK enables you to connect to the MindsDB server from Python using HTTP API. Read along to see how to install and test the MindsDB Python SDK. - -## Simple Installation - -To install the MindsDB Python SDK, run the below command: - -```bash -pip install mindsdb_sdk -``` - -Here is the expected output: - -

- -

- -## Advanced Installation - -Instead of using the `pip install mindsdb_sdk` command, you can install it by cloning the [Python SDK repository](https://github.com/mindsdb/mindsdb_python_sdk). Then you should create a virtual environment, install all dependencies from the `requirements.txt` file, and run tests as instructed below. - -To test all the components, go to the project directory (`mindsdb_sdk`) and run the below command: - -```bash -env PYTHONPATH=./ pytest -``` - -To generate the API documentation, run the below commands: - -```bash -pip install sphinx -cd docs -make html -``` - -The documentation is generated in the `docs/build/html` folder. diff --git a/docs/sdks/python/join_on.mdx b/docs/sdks/python/join_on.mdx deleted file mode 100644 index ecb655d8653..00000000000 --- a/docs/sdks/python/join_on.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Join Tables On -sidebarTitle: Join Tables On ---- - -## Description - -The `query()` function is executed on a data source connected to MindsDB and saved into a variable. It performs a join operation between tables. - -## Syntax - -Here is the syntax: - -```sql -my_data_source.query('SELECT * FROM my_table t JOIN another_table a ON t…=a… LIMIT 100') -``` diff --git a/docs/sdks/python/knowledge_bases/create.mdx b/docs/sdks/python/knowledge_bases/create.mdx deleted file mode 100644 index bce9eec8b92..00000000000 --- a/docs/sdks/python/knowledge_bases/create.mdx +++ /dev/null @@ -1,504 +0,0 @@ ---- -title: How to Create Knowledge Bases -sidebarTitle: Create KB ---- - -A knowledge base is an advanced system that organizes information based on semantic meaning rather than simple keyword matching. It integrates embedding models, reranking models, and vector stores to enable context-aware data retrieval. - - -Learn more about features of [knowledge bases available via SQL API](/mindsdb_sql/knowledge_bases/overview). - - -## `create()` Function - -Here is the syntax for creating a knowledge base: - -```python -my_kb = server.knowledge_bases.create( - 'my_kb', - embedding_model={ - 'provider': 'openai', - 'model_name': 'text-embedding-3-small', - 'api_key': 'sk-...'}, - reranking_model={ - 'provider': 'openai', - 'model_name': 'gpt-4', - 'api_key': 'sk-...'}, - storage=server.databases.my_db.tables.my_table, - metadata_columns=['date', 'creator', ...], - content_columns=['review', 'content', ...], - id_column='id' -) -``` - -Upon execution, it registers `my_kb` and associates the specified models and storage. `my_kb` is a unique identifier of the knowledge base within MindsDB. - -### Supported LLMs - -Below is the list of all language models supported for the `embedding_model` and `reranking_model` parameters. - -#### `provider = 'openai'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from OpenAI in Settings of the MindsDB GUI. - -Furthermore, users can select `Custom OpenAI API` from the dropdown and use models from any OpenAI-compatible API. - - -When choosing `openai` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the OpenAI model to be used. -* `api_key` stores the OpenAI API key. - -Learn more about the [OpenAI integration with MindsDB here](/integrations/ai-engines/openai). - -#### `provider = 'openai_azure'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from Azure OpenAI in Settings of the MindsDB GUI. - - -When choosing `openai_azure` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the OpenAI model to be used. -* `api_key` stores the OpenAI API key. -* `base_url` stores the base URL of the Azure instance. -* `api_version` stores the version of the Azure instance. - - -Users need to log in to their Azure OpenAI instance to retrieve all relevant parameter values. Next, click on `Explore Azure AI Foundry portal` and go to `Models + endpoints`. Select the model and copy the parameter values. - - -#### `provider = 'google'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from Google in Settings of the MindsDB GUI. - - -When choosing `google` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the Google model to be used. -* `api_key` stores the Google API key. - -Learn more about the [Google Gemini integration with MindsDB here](/integrations/ai-engines/google_gemini). - -#### `provider = 'bedrock'` - -This provider is supported for both `embedding_model` and `reranking_model`. - -When choosing `bedrock` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the model available via Amazon Bedrock. -* `aws_access_key_id` stores a unique identifier associated with your AWS account, used to identify the user or application making requests to AWS. -* `aws_region_name` stores the name of the AWS region you want to send your requests to (e.g., `"us-west-2"`). -* `aws_secret_access_key` stores the secret key associated with your AWS access key ID. It is used to sign your requests securely. -* `aws_session_token` is an optional parameter that stores a temporary token used for short-term security credentials when using AWS Identity and Access Management (IAM) roles or temporary credentials. - -#### `provider = 'snowflake'` - -This provider is supported for both `embedding_model` and `reranking_model`. - -When choosing `snowflake` as the model provider, users should choose one of the available models from [Snowflake Cortex AI](https://www.snowflake.com/en/product/features/cortex/) and define the following model parameters. - -* `model_name` stores the name of the model available via Snowflake Cortex AI. -* `api_key` stores the Snowflake Cortex AI API key. -* `account_id` stores the Snowflake account ID. - - - -Follow the below steps to generate the API key. - -1. Generate a key pair according to [this instruction](https://docs.snowflake.com/en/user-guide/key-pair-auth) as below. - - * Execute these commands in the console: - - ```bash - # generate private key - openssl genrsa 2048 | openssl pkcs8 -topk8 -inform PEM -out rsa_key.p8 -nocrypt - # generate public key - openssl rsa -in rsa_key.p8 -pubout -out rsa_key.pub - ``` - - * Save the public key, that is, the content of rsa_key.pub, into your database user: - - ```sql - ALTER USER my_user SET RSA_PUBLIC_KEY = "" - ``` - -2. Verify the key pair with the database user. - - * Install `snowsql` following [this instruction](https://docs.snowflake.com/en/user-guide/snowsql-install-config). - - * Execute this command in the console: - - ```bash - snowsql -a -u my_user --private-key-path rsa_key.p8 - ``` - -3. Generate JWT token. - - * Download the Python script from [Snowflake's Developer Guide for Authentication](https://docs.snowflake.com/en/developer-guide/sql-api/authenticating). Here is a [direct download link](https://docs.snowflake.com/en/_downloads/aeb84cdfe91dcfbd889465403b875515/sql-api-generate-jwt.py). - - * Ensure to have the PyJWT module installed that is required for running the script. - - * Run the script using this command: - - ```bash - sql-api-generate-jwt.py --account --user my_user --private_key_file_path rsa_key.p8 - ``` - - This command returns the JWT token, which is used in the `api_key` parameter for the `snowflake` provider. - - - -#### `provider = 'ollama'` - -This provider is supported for both `embedding_model` and `reranking_model`. - - -Users can define the default embedding and reranking models from Ollama in Settings of the MindsDB GUI. - - -When choosing `ollama` as the model provider, users should define the following model parameters. - -* `model_name` stores the name of the model to be used. -* `base_url` stores the base URL of the Ollama instance. - -### `embedding_model` - -The embedding model is a required component of the knowledge base. It stores specifications of the embedding model to be used. - -Users can define the embedding model choosing one of the following options. - -**Option 1.** Use the `embedding_model` parameter to define the specification. - -```python -... -embedding_model = { - - "provider": "azure_openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01" - -}, -... -``` - -**Option 2.** Define the default embedding model in the [MindsDB configuration file](/setup/custom-config). - - -You can define the default models in the Settings of the MindsDB Editor GUI. - - - -Note that if you define `default_embedding_model` in the configuration file, you do not need to provide the `embedding_model` parameter when creating a knowledge base. If provide both, then the values from the `embedding_model` parameter are used. - - -```bash -"default_embedding_model": { - - "provider": "azure_openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01" - -} -``` - -The embedding model specification includes: - -* `provider` -It is a required parameter. It defines the model provider. - -* `model_name` -It is a required parameter. It defines the embedding model name as specified by the provider. - -* `api_key` -The API key is required to access the embedding model assigned to a knowledge base. Users can provide it either in this `api_key` parameter, or in the `OPENAI_API_KEY` environment variable for `"provider": "openai"` and `AZURE_OPENAI_API_KEY` environment variable for `"provider": "azure_openai"`. - -* `base_url` -It is an optional parameter, which defaults to `https://api.openai.com/v1/`. It is a required parameter when using the `azure_openai` provider. It is the root URL used to send API requests. - -* `api_version` -It is an optional parameter. It is a required parameter when using the `azure_openai` provider. It defines the API version. - -### `reranking_model` - -The reranking model is an optional component of the knowledge base. It stores specifications of the reranking model to be used. - -Users can disable reranking features of knowledge bases by setting this parameter to `false`. - -```python -... -reranking_model = False, -... -``` - -Users can enable reranking features of knowledge bases by defining the reranking model choosing one of the following options. - -**Option 1.** Use the `reranking_model` parameter to define the specification. - -```python -... -reranking_model = { - - "provider": "azure_openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01", - "method": "multi-class" - -}, -... -``` - -**Option 2.** Define the default reranking model in the [MindsDB configuration file](/setup/custom-config). - - -You can define the default models in the Settings of the MindsDB Editor GUI. - - - -Note that if you define [`default_reranking_model` in the configuration file](/setup/custom-config#default-reranking-model), you do not need to provide the `reranking_model` parameter when creating a knowledge base. If provide both, then the values from the `reranking_model` parameter are used. - - -```bash -"default_reranking_model": { - - "provider": "azure_openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01", - "method": "multi-class" - -} -``` - -The reranking model specification includes: - -* `provider` -It is a required parameter. It defines the model provider as listed in [supported LLMs](/mindsdb_sql/knowledge_bases/create#supported-llms). - -* `model_name` -It is a required parameter. It defines the embedding model name as specified by the provider. - -* `api_key` -The API key is required to access the embedding model assigned to a knowledge base. Users can provide it either in this `api_key` parameter, or in the `OPENAI_API_KEY` environment variable for `"provider": "openai"` and `AZURE_OPENAI_API_KEY` environment variable for `"provider": "azure_openai"`. - -* `base_url` -It is an optional parameter, which defaults to `https://api.openai.com/v1/`. It is a required parameter when using the `azure_openai` provider. It is the root URL used to send API requests. - -* `api_version` -It is an optional parameter. It is a required parameter when using the `azure_openai` provider. It defines the API version. - -* `method` -It is an optional parameter. It defines the method used to calculate the relevance of the output rows. The available options include `multi-class` and `binary`. It defaults to `multi-class`. - - -**Reranking Method** - -The `multi-class` reranking method classifies each document chunk (that meets any specified metadata filtering conditions) into one of four relevance classes: - -1. Not relevant with class weight of 0.25. -2. Slightly relevant with class weight of 0.5. -3. Moderately relevant with class weight of 0.75. -4. Highly relevant with class weight of 1. - -The overall `relevance_score` of a document is calculated as the sum of each chunk’s class weight multiplied by its class probability (from model logprob output). - -The `binary` reranking method simplifies classification by determining whether a document is relevant or not, without intermediate relevance levels. With this method, the overall `relevance_score` of a document is calculated based on the model log probability. - - -### `storage` - -The vector store is a required component of the knowledge base. It stores data in the form of embeddings. - - -When using [MindsDB via Docker Desktop Extension](/setup/self-hosted/docker-desktop), the `storage` parameter is optional. The default storage is PGVector that is integrated into the MindsDB Docker Desktop Extension. - -When using [MindsDB via PyPI](/contribute/install) or [MindsDB via Docker image](/setup/self-hosted/docker), the `storage` parameter is required. Users need to connect their vector storage, such as PGVector, using the `CREATE DATABASE` command and then use it for the storage of the knowledge bases. - - -The recommended vector store for knowledge bases is [PGVector](/integrations/vector-db-integrations/pgvector), specifically, version 0.8.0 or higher for a better performance. - -In order to provide the storage vector database, it is required to connect it to MindsDB beforehand. - -Here is an example for [PGVector](/integrations/vector-db-integrations/pgvector). - -```python -my_kb = server.knowledge_bases.create( - ... - storage=server.databases.my_pgvector.tables.storage_table, - ... -) -``` - - -Note that you do not need to have the `storage_table` created as it is created when creating a knowledge base. - - -### `metadata_columns` - -The data inserted into the knowledge base can be classified as metadata, which enables users to filter the search results using defined data fields. - - -Note that source data column(s) included in `metadata_columns` cannot be used in `content_columns`, and vice versa. - - -This parameter is an array of strings that lists column names from the source data to be used as metadata. If not provided, then all inserted columns (except for columns defined as `id_column` and `content_columns`) are considered metadata columns. - -Here is an example of usage. A user wants to store the following data in a knowledge base. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -The `product` column can be used as metadata to enable metadata filtering. - -```python -my_kb = server.knowledge_bases.create( - ... - metadata_columns=['product'], - ... -) -``` - -### `content_columns` - -The data inserted into the knowledge base can be classified as content, which is embedded by the embedding model and stored in the underlying vector store. - - -Note that source data column(s) included in `content_columns` cannot be used in `metadata_columns`, and vice versa. - - -This parameter is an array of strings that lists column names from the source data to be used as content and processed into embeddings. If not provided, the `content` column is expected by default when inserting data into the knowledge base. - -Here is an example of usage. A user wants to store the following data in a knowledge base. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -The `notes` column can be used as content. - -```python -my_kb = server.knowledge_bases.create( - ... - content_columns=['notes'], - ... -) -``` - -### `id_column` - -The ID column uniquely identifies each source data row in the knowledge base. - -It is an optional parameter. If provided, this parameter is a string that contains the source data ID column name. If not provided, it is generated from the hash of the content columns. - -Here is an example of usage. A user wants to store the following data in a knowledge base. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -The `order_id` column can be used as ID. - -```python -my_kb = server.knowledge_bases.create( - ... - id_column='order_id' -) -``` - - -Note that if the source data row is chunked into multiple chunks by the knowledge base (that is, to optimize the storage), then these rows in the knowledge base have the same ID value that identifies chunks from one source data row. - - - -**Available options for the ID column values** - -* User-Defined ID Column:

-When users defined the `id_column` parameter, the values from the provided source data column are used to identify source data rows within the knowledge base. - -* User-Generated ID Column:

-When users do not have a column that uniquely identifies each row in their source data, they can generate the ID column values when inserting data into the knowledge base using functions like `HASH()` or `ROW_NUMBER()`. - -```sql -INSERT INTO my_kb ( - SELECT ROW_NUMBER() OVER (ORDER BY order_id) AS id, * - FROM sample_data.orders -); -``` - -* Default ID Column:

-If the `id_column` parameter is not defined, its default values are build from the hash of the content columns and follow the format: ``. -
- -## `list()` and `get()` Functions - -Users can get details about the knowledge base using the `get()` function. - -```python -my_kb = project.knowledge_bases.get('my_kb') -``` - -And list all available knowledge bases using the `list()` function. - -```python -kb_list = project.knowledge_bases.list() -``` - -## `drop()` Function - -Here is the syntax for deleting a knowledge base: - -```python -project.knowledge_bases.drop('my_kb') -``` - -Upon execution, it removes the knowledge base with its content. - - -See more examples of [knowledge bases via SQL here](/mindsdb_sql/knowledge_bases/overview). - diff --git a/docs/sdks/python/knowledge_bases/insert_data.mdx b/docs/sdks/python/knowledge_bases/insert_data.mdx deleted file mode 100644 index 470d5499268..00000000000 --- a/docs/sdks/python/knowledge_bases/insert_data.mdx +++ /dev/null @@ -1,340 +0,0 @@ ---- -title: How to Insert Data into Knowledge Bases -sidebarTitle: Insert Data into KB ---- - -Knowledge Bases (KBs) organize data across data sources, including databases, files, documents, webpages, enabling efficient search capabilities. - -Here is what happens to data when it is inserted into the knowledge base. - -

- -

- -Upon inserting data into the knowledge base, it is split into chunks, transformed into the embedding representation to enhance the search capabilities, and stored in a vector database. - - -Learn more about features of [knowledge bases available via SQL API](/mindsdb_sql/knowledge_bases/overview). - - -## `insert()` Function - -Here is the syntax for inserting data into a knowledge base: - -* Inserting raw data: - - ```python - my_kb.insert([ - {'type': 'apartment', 'price': 100000}, - {'type': 'villa', 'price': 500000} - ]) - ``` - -* Inserting data from data sources connected to MindsDB: - - ```python - my_kb.insert_query( - server.databases.my_database.tables.my_table.filter(type='my_type') - ) - ``` - -* Inserting data from files uploaded to MindsDB: - - ```python - my_kb.insert_files(['my_pdf_file', 'my_txt_file']) - ``` - -* Inserting data from webpages: - - ```python - kb.insert_webpages( - ['https://example.com'], - crawl_depth=2, - filters=[r'.*\/blog\/.*'], - limit=10 - ) - ``` - - Where: - - * `urls`: Base URLs to crawl. - * `crawl_depth`: Depth for recursive crawling. Default is 1. - * `filters`: Regex patterns to include. - * `limit`: Max number of pages. - -Upon execution, it inserts data into a knowledge base, using the embedding model to embed it into vectors before inserting into an underlying vector database. - - -The status of the insert operations is logged in the `information_schema.queries` table with the timestamp when it was ran. - - - -**Default Batch Inserts** -The batch inserts into knowledge bases (see "Insert Data using Partitions") are enabled by default for all vector stores except PGVector. Note that in order for batch inserts to work by default, users must provide the `id_column` when creating the knowledge base. - -To enable default batch inserts for PGVector, set the `DISABLE_PGVECTOR_AUTOBATCH` environment variable or the `knowledge_bases.disable_pgvector_autobatch` configuration variable to `false` (it is set to `true` by default). - -To disable default batch inserts, set the `DISABLE_AUTOBATCH` environment variable or the `knowledge_bases.disable_autobatch` configuration variable to `true` (it is set to `false` by default). - - - -**Handling duplicate data while inserting into the knowledge base** - -Knowledge bases uniquely identify data rows using an ID column, which prevents from inserting duplicate data, as follows. - -* **Case 1: Inserting data into the knowledge base without the `id_column` defined.** - - When users do not define the `id_column` during the creation of a knowledge base, MindsDB generates the ID for each row using a hash of the content columns, as [explained here](/mindsdb_sql/knowledge_bases/create#id-column). - - **Example:** - - If two rows have exactly the same content in the content columns, their hash (and thus their generated ID) will be the same. - - Note that duplicate rows are skipped and not inserted. - - Since both rows in the below table have the same content, only one row will be inserted. - - | name | age | - |-------|-----| - | Alice | 25 | - | Alice | 25 | - -* **Case 2: Inserting data into the knowledge base with the `id_column` defined.** - - When users define the `id_column` during the creation of a knowledge base, then the knowledge base uses that column's values as the row ID. - - **Example:** - - If the `id_column` has duplicate values, the knowledge base skips the duplicate row(s) during the insert. - - The second row in the below table has the same `id` as the first row, so only one of these rows is inserted. - - | id | name | age | - |-----|-------|-----| - | 1 | Alice | 25 | - | 1 | Bob | 30 | - -**Best practice** - -Ensure the `id_column` uniquely identifies each row to avoid unintentional data loss due to duplicate ID skipping. - - - -### Update Existing Data - -In order to update existing data in the knowledge base, insert data with the column ID that you want to update and the updated content. - -Here is an example of usage. A knowledge base stores the following data. - -```sql -+----------+-------------------+------------------------+ -| order_id | product | notes | -+----------+-------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Laptop Stand | Prefer aluminum finish | -+----------+-------------------+------------------------+ -``` - -A user updated `Laptop Stand` to `Aluminum Laptop Stand`. - -```sql -+----------+-----------------------+------------------------+ -| order_id | product | notes | -+----------+-----------------------+------------------------+ -| A1B | Wireless Mouse | Request color: black | -| 3XZ | Bluetooth Speaker | Gift wrap requested | -| Q7P | Aluminum Laptop Stand | Prefer aluminum finish | -+----------+-----------------------+------------------------+ -``` - - -Go to the *Complete Example* section below to find out how to access this sample data. - - -Here is how to propagate this change into the knowledge base. - -```python -my_kb.insert_query( - server.databases.sample_data.tables.orders.filter(order_id='Q7P') -) -``` - -The knowledge base matches the ID value to the existing one and updates the data if required. - -### Insert Data using Partitions - -In order to optimize the performance of data insertion into the knowledge base, users can set up partitions and threads to insert batches of data in parallel. This also enables tracking the progress of data insertion process including cancelling and resuming it if required. - -Here is an example. - -```python -project.query( - ''' - INSERT INTO my_kb - SELECT order_id, product, notes - FROM sample_data.orders - USING - batch_size = 200, - track_column = order_id, - threads = 10, - error = 'skip'; - ''' -) -``` - -The parameters include the following: - -* `batch_size` defines the number of rows fetched per iteration to optimize data extraction from the source. It defaults to 1000. - -* `threads` defines threads for running partitions. Note that if the [ML task queue](/setup/custom-config#overview-of-config-parameters) is enabled, threads are used automatically. The available values for `threads` are: - - a number of threads to be used, for example, `threads = 10`, - - a boolean value that defines whether to enable threads, setting `threads = true`, or disable threads, setting `threads = false`. - -* `track_column` defines the column used for sorting data before partitioning. - -* `error` defines the error processing options. The available values include `raise`, used to raise errors as they come, or `skip`, used to subside errors. It defaults to `raise` if not provided. - -After executing the `INSERT INTO` statement with the above parameters, users can view the data insertion progress by querying the `information_schema.queries` table. - -```python -project.query( - ''' - SELECT * FROM information_schema.queries; - ''' -) -``` - -Users can cancel the data insertion process using the process ID from the `information_schema.queries` table. - -```python -project.query( - ''' - SELECT query_cancel(1); - ''' -) -``` - -If you want to cancel the data insertion process, look up the process ID value from the `information_schema.queries` table and pass it as an argument to the `query_cancel()` function. Note that canceling the query will not remove the already inserted data. - -Users can resume the data insertion process using the process ID from the `information_schema.queries` table. - -```python -project.query( - ''' - SELECT query_resume(1); - ''' -) -``` - -If you want to resume the data insertion process (which may have been interrupted by an error or cancelled by a user), look up the process ID value from the `information_schema.queries` table and pass it as an argument to the `query_resume()` function. Note that resuming the query will not remove the already inserted data and will start appending the remaining data. - -### Chunking Data - -Upon inserting data into the knowledge base, the data chunking is performed in order to optimize the storage and search of data. - -Each chunk is identified by its chunk ID of the following format: `:of:to`. - -#### Text - -Users can opt for defining the chunking parameters when creating a knowledge base. - -```python -my_kb = project.knowledge_bases.create( - ... - params={ - "preprocessing": { - "text_chunking_config" : { - "chunk_size": 2000, - "chunk_overlap": 200 - } - } - } -) -``` - -The `chunk_size` parameter defines the size of the chunk as the number of characters. And the `chunk_overlap` parameter defines the number of characters that should overlap between subsequent chunks. - -#### JSON - -Users can opt for defining the chunking parameters specifically for JSON data. - -```python -my_kb = project.knowledge_bases.create( - ... - params={ - "preprocessing": { - "type": "json_chunking", - "json_chunking_config" : { - ... - } - } - } -) -``` - -When the `type` of chunking is set to `json_chunking`, users can configure it by setting the following parameter values in the `json_chunking_config` parameter: - -* `flatten_nested` -It is of the `bool` data type with the default value of `True`. -It defines whether to flatten nested JSON structures. - -* `include_metadata` -It is of the `bool` data type with the default value of `True`. -It defines whether to include original metadata in chunks. - -* `chunk_by_object` -It is of the `bool` data type with the default value of `True`. -It defines whether to chunk by top-level objects (`True`) or create a single document (`False`). - -* `exclude_fields` -It is of the `List[str]` data type with the default value of an empty list. -It defines the list of fields to exclude from chunking. - -* `include_fields` -It is of the `List[str]` data type with the default value of an empty list. -It defines the list of fields to include in chunking (if empty, all fields except excluded ones are included). - -* `metadata_fields` -It is of the `List[str]` data type with the default value of an empty list. -It defines the list of fields to extract into metadata for filtering (can include nested fields using dot notation). If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure). - -* `extract_all_primitives` -It is of the `bool` data type with the default value of `False`. -It defines whether to extract all primitive values (strings, numbers, booleans) into metadata. - -* `nested_delimiter` -It is of the `str` data type with the default value of `"."`. -It defines the delimiter for flattened nested field names. - -* `content_column` -It is of the `str` data type with the default value of `"content"`. -It defines the name of the content column for chunk ID generation. - -### Underlying Vector Store - -Each knowledge base has its underlying vector store that stores data inserted into the knowledge base in the form of embeddings. - -Users can query the underlying vector store as follows. - -* KB with the default ChromaDB vector store: - -```python -project.query( - ''' - SELECT id, content, metadata, embeddings - FROM _chromadb.storage_table; - ''' -) -``` - -* KB with user-defined vector store (either [PGVector](/integrations/vector-db-integrations/pgvector) or [ChromaDB](/integrations/vector-db-integrations/chromadb)): - -```python -project.query( - ''' - SELECT id, content, metadata, embeddings - FROM .; - ''' -) -``` diff --git a/docs/sdks/python/knowledge_bases/overview.mdx b/docs/sdks/python/knowledge_bases/overview.mdx deleted file mode 100644 index 244c1de4b8a..00000000000 --- a/docs/sdks/python/knowledge_bases/overview.mdx +++ /dev/null @@ -1,133 +0,0 @@ ---- -title: How Knowledge Bases Work -sidebarTitle: Overview ---- - -A knowledge base is an advanced system that organizes information based on semantic meaning rather than simple keyword matching. It integrates embedding models, reranking models, and vector stores to enable context-aware data retrieval. - -By performing semantic reasoning across multiple data points, a knowledge base delivers deeper insights and more accurate responses, making it a powerful tool for intelligent data access. - - -Learn more about features of [knowledge bases available via SQL API](/mindsdb_sql/knowledge_bases/overview). - - -Before diving into the syntax, here is a quick walkthrough showing how knowledge bases work in MindsDB. - -We start by creating a knowledge base and inserting data. Next we can run semantic search queries with metadata filtering. - - - - Use the `create()` function to create a knowledge base, specifying all its components. - - ```python - server = mindsdb_sdk.connect() - project = server.get_project() - - my_kb = project.knowledge_bases.create( - 'my_kb', - embedding_model={'provider': 'openai', 'model_name': 'text-embedding-3-small', 'api_key': 'sk-...'}, - reranking_model={'provider': 'openai', 'model_name': 'gpt-4o', 'api_key': 'sk-...'}, - storage=server.databases.my_vector_db.tables.my_table, - metadata_columns=['product'], - content_columns=['notes'], - id_column='order_id' - ) - ``` - - - In this example, we use a simple dataset containing customer notes for product orders which will be inserted into the knowledge base. - - ```sql - +----------+-----------------------+------------------------+ - | order_id | product | notes | - +----------+-----------------------+------------------------+ - | A1B | Wireless Mouse | Request color: black | - | 3XZ | Bluetooth Speaker | Gift wrap requested | - | Q7P | Aluminum Laptop Stand | Prefer aluminum finish | - +----------+-----------------------+------------------------+ - ``` - - Use the `insert_query()` function to ingest data into the knowledge base from a query. - - ```python - my_kb.insert_query( - server.databases.sample_data.tables.orders - ) - ``` - - - Query the knowledge base using semantic search. - - ```python - results = my_kb.find('color') - - print(results.fetch()) - ``` - - This query returns: - - ```sql - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | id | chunk_id | chunk_content | metadata | product | distance | relevance | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | - | Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | - | 3XZ | 3XZ_notes:1of1:0to19 | Gift wrap requested | {"chunk_index":0,"content_column":"notes","end_char":19,"original_doc_id":"3XZ_notes","original_row_id":"3XZ","product":"Bluetooth Speaker","source":"TextChunkingPreprocessor","start_char":0} | Bluetooth Speaker | 0.8010851611432231 | 0.2500003885558766 | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - ``` - - - Query the knowledge base using semantic search and define the `relevance` parameter to receive only the best matching data for your use case. - - ```python - results = project.query( - ''' - SELECT * - FROM my_kb - WHERE content = 'color' - AND relevance >= 0.2502; - ''' - ) - - print(results.fetch()) - ``` - - This query returns: - - ```sql - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | id | chunk_id | chunk_content | metadata | product | distance | relevance | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - | A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | - | Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | - +-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ - ``` - - - Add metadata filtering to focus your search. - - ```python - results = project.query( - ''' - SELECT * - FROM my_kb - WHERE product = 'Wireless Mouse' - AND content = 'color' - AND relevance >= 0.2502; - ''' - ) - - print(results.fetch()) - ``` - - This query returns: - - ```sql - +-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ - | id | chunk_id | chunk_content | metadata | product | distance | relevance | - +-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ - | A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.504396172197583 | - +-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ - ``` - - diff --git a/docs/sdks/python/knowledge_bases/query.mdx b/docs/sdks/python/knowledge_bases/query.mdx deleted file mode 100644 index ee09cb47afa..00000000000 --- a/docs/sdks/python/knowledge_bases/query.mdx +++ /dev/null @@ -1,244 +0,0 @@ ---- -title: How to Query Knowledge Bases -sidebarTitle: Query KB ---- - -Knowledge Bases support two primary querying approaches: semantic search and metadata filtering, each of which offers different filtering capabilities, including filtering by the relevance score to ensure only data most relevant to the query is returned. - -* **Semantic Search** - - Semantic search enables users to query Knowledge Bases using natural language. When searching semantically, you reference the content column in your SQL statement. MindsDB will interpret the input as a semantic query and use vector-based similarity to find relevant results. - -* **Metadata Filtering** - - It allows users to query Knowledge Bases based on the available metadata fields. These fields can be used in the `WHERE` clause of a SQL statement. - -* **Relevance Filtering** - - Every semantic search result is assigned a relevance score, which indicates how closely a given entry matches your query. You can filter results by this score to ensure only the most relevant entries are returned. - -* **Hybrid Search** - - Hybrid search combines the flexibility of semantic search and exact keyword matching. [Learn more here](/mindsdb_sql/knowledge_bases/hybrid_search). - - -Learn more about features of [knowledge bases available via SQL API](/mindsdb_sql/knowledge_bases/overview). - - -## `find()` Function - -Knowledge bases provide an abstraction that enables users to see the stored data. - -Note that here a sample knowledge base created and inserted into in the previous **Example** sections is searched. - -```python -results = project.query( - ''' - SELECT * - FROM my_kb; - ''' -) - -print(results.fetch()) -``` - -Here is the sample output: - -```sql -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | -| Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | -| 3XZ | 3XZ_notes:1of1:0to19 | Gift wrap requested | {"chunk_index":0,"content_column":"notes","end_char":19,"original_doc_id":"3XZ_notes","original_row_id":"3XZ","product":"Bluetooth Speaker","source":"TextChunkingPreprocessor","start_char":0} | Bluetooth Speaker | 0.8010851611432231 | 0.2500003885558766 | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -``` - -### Data Stored in Knowledge Base - -The following columns are stored in the knowledge base. - -`id` -It stores values from the column defined in the `id_column` parameter when creating the knowledge base. These are the source data IDs. - -`chunk_id` -Knowledge bases chunk the inserted data in order to fit the defined chunk size. If the chunking is performed, the following chunk ID format is used: `:of:to`. - -`chunk_content` -It stores values from the column(s) defined in the `content_columns` parameter when creating the knowledge base. - -`metadata` -It stores the general metadata and the metadata defined in the `metadata_columns` parameter when creating the knowledge base. - -`distance` -It stores the calculated distance between the chunk's content and the search phrase. - -`relevance` -It stores the calculated relevance of the chunk as compared to the search phrase. Its values are between 0 and 1. - - -Note that the calculation method of `relevance` differs as follows: - -- When the ranking model is provided, the default `relevance` is equal or greater than 0, unless defined otherwise in the `WHERE` clause. -- When the reranking model is not provided and the `relevance` is not defined in the query, then no relevance filtering is applied and the output includes all rows matched based on the similarity and metadata search. -- When the reranking model is not provided but the `relevance` is defined in the query, then the relevance is calculated based on the `distance` column (`1/(1+ distance)`) and the `relevance` value is compared with this relevance value to filter the output. - - -### Semantic Search - -Users can query a knowledge base using semantic search by providing the search phrase (called `content`) to be searched for. - -```python -results = my_kb.find('color') - -print(results.fetch()) -``` - -Here is the output: - -```sql -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | -| Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | -| 3XZ | 3XZ_notes:1of1:0to19 | Gift wrap requested | {"chunk_index":0,"content_column":"notes","end_char":19,"original_doc_id":"3XZ_notes","original_row_id":"3XZ","product":"Bluetooth Speaker","source":"TextChunkingPreprocessor","start_char":0} | Bluetooth Speaker | 0.8010851611432231 | 0.2500003885558766 | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -``` - - -When querying a knowledge base, the default values include the following: - -* `relevance`

-If not provided, its default value is equal to or greater than 0, ensuring there is no filtering of rows based on their relevance. - -* `LIMIT`

-If not provided, its default value is 10, returning a maximum of 10 rows. -
- - -Note that when specifying both `relevance` and `LIMIT` as follows: - -```python -results = project.query( - ''' - SELECT * - FROM my_kb - WHERE content = 'color' - AND relevance >= 0.5 - LIMIT 20; - ''' -) - -print(results.fetch()) -``` - -The query extracts 20 rows (as defined in the `LIMIT` clause) that match the defined `content`. Next, these set of rows is filtered out to match the defined `relevance`. - - -Users can limit the `relevance` in order to get only the most relevant results. - -```python -results = project.query( - ''' - SELECT * - FROM my_kb - WHERE content = 'color' - AND relevance >= 0.5; - ''' -) - -print(results.fetch()) -``` - -Here is the output: - -```sql -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5103766499957533 | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+--------------------+ -``` - -By providing the `relevance` filter, the output is limited to only data with relevance score of the provided value. The available values of `relevance` are between 0 and 1, and its default value covers all available relevance values ensuring no filtering based on the relevance score. - -Users can limit the number of rows returned. - -```python -results = project.query( - ''' - SELECT * - FROM my_kb - WHERE content = 'color' - LIMIT 2; - ''' -) - -print(results.fetch()) -``` - -Here is the output: - -```sql -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.5093188026135379 | -| Q7P | Q7P_notes:1of1:0to22 | Prefer aluminum finish | {"chunk_index":0,"content_column":"notes","end_char":22,"original_doc_id":"Q7P_notes","original_row_id":"Q7P","product":"Aluminum Laptop Stand","source":"TextChunkingPreprocessor","start_char":0} | Aluminum Laptop Stand | 0.7744703514692067 | 0.2502580835880018 | -+-----+----------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------+--------------------+--------------------+ -``` - -### Metadata Filtering - -Besides semantic search features, knowledge bases enable users to filter the result set by the defined metadata. - -```python -results = project.query( - ''' - SELECT * - FROM my_kb - WHERE product = 'Wireless Mouse'; - ''' -) - -print(results.fetch()) -``` - -Here is the output: - -```sql -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------+----------+ -| id | chunk_id | chunk_content | metadata | product | relevance | distance | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------+----------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | [NULL] | [NULL] | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+-----------+----------+ -``` - -Note that when searching by metadata alone, the `relevance` column values are not calculated. - -Users can do both, filter by metadata and search by content. - -```python -results = project.query( - ''' - SELECT * - FROM my_kb - WHERE product = 'Wireless Mouse' - AND content = 'color' - AND relevance >= 0.5; - ''' -) - -print(results.fetch()) -``` - -Here is the output: - -```sql -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ -| id | chunk_id | chunk_content | metadata | product | distance | relevance | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ -| A1B | A1B_notes:1of1:0to20 | Request color: black | {"chunk_index":0,"content_column":"notes","end_char":20,"original_doc_id":"A1B_notes","original_row_id":"A1B","product":"Wireless Mouse","source":"TextChunkingPreprocessor","start_char":0} | Wireless Mouse | 0.5743341242061104 | 0.504396172197583 | -+-----+----------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+--------------------+-------------------+ -``` diff --git a/docs/sdks/python/list_data_handlers.mdx b/docs/sdks/python/list_data_handlers.mdx deleted file mode 100644 index c819f521e01..00000000000 --- a/docs/sdks/python/list_data_handlers.mdx +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: List Data Handlers -sidebarTitle: List Data Handlers ---- - -Here is how you can fetch all available data handlers directly from Python code: - -``` -mindsdb = server.get_project('mindsdb') -data_handlers = mindsdb.query('SHOW HANDLERS WHERE type = \'data\'') -print(data_handlers.fetch()) -``` diff --git a/docs/sdks/python/list_databases.mdx b/docs/sdks/python/list_databases.mdx deleted file mode 100644 index a739f69d24e..00000000000 --- a/docs/sdks/python/list_databases.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: List Data Sources -sidebarTitle: List Data Sources ---- - -## Description - -The `list_databases()` function lists all data sources connected to MindsDB. - -## Syntax - -Use the `list_databases()` method to list all databases: - -```python -server.list_databases() -``` diff --git a/docs/sdks/python/list_jobs.mdx b/docs/sdks/python/list_jobs.mdx deleted file mode 100644 index 441e5190cf6..00000000000 --- a/docs/sdks/python/list_jobs.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: List Jobs -sidebarTitle: List Jobs ---- - -## Description - -The `list_jobs()` function is executed on a project and lists all jobs available in this project. - -## Syntax - -Use the `list_jobs()` method to list all jobs in a project: - -```python -project.list_jobs() -``` diff --git a/docs/sdks/python/list_projects.mdx b/docs/sdks/python/list_projects.mdx deleted file mode 100644 index f99d4e44868..00000000000 --- a/docs/sdks/python/list_projects.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: List Projects -sidebarTitle: List Projects ---- - -## Description - -The `list_projects()` function lists all available projects. - -## Syntax - -Use the `list_projects()` method to lists all available projects: - -```python -server.list_projects() -``` diff --git a/docs/sdks/python/list_views.mdx b/docs/sdks/python/list_views.mdx deleted file mode 100644 index 68b6f4b8a5e..00000000000 --- a/docs/sdks/python/list_views.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: List Views -sidebarTitle: List Views ---- - -## Description - -The `list_views()` function is executed on a project and lists all views available in this project. - -## Syntax - -Use the `list_views()` method to list all views in a project: - -```python -project.list_views() -``` diff --git a/docs/sdks/python/native_queries.mdx b/docs/sdks/python/native_queries.mdx deleted file mode 100644 index 5415b9a3f06..00000000000 --- a/docs/sdks/python/native_queries.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Native Queries -sidebarTitle: Native Queries ---- - -## Description - -The `query()` function is executed on a data source connected to MindsDB and saved into a variable. This native query is executed directly on a data source. - -## Syntax - -Here is the syntax: - -```sql -my_data_source.query('SELECT * FROM datasource_name ();') -``` diff --git a/docs/sdks/python/overview.mdx b/docs/sdks/python/overview.mdx deleted file mode 100644 index 5523bfa79fa..00000000000 --- a/docs/sdks/python/overview.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Overview -sidebarTitle: Overview ---- - -MindsDB provides Python SDK, enabling its integration into Python environments. - -Follow these steps to get started: - - - - For Python, [install the package](/sdks/python/installation). - - - Connect a data source in [Python](/sdks/python/create_database).

- Explore all available [data sources here](/integrations/data-overview). -
- - Configure an AI engine in [Python](/sdks/python/create_ml_engine).

- Explore all available [AI engines here](/integrations/ai-overview). -
- - Create and deploy an AI/ML model in [Python](/sdks/python/create_model). - - - Query for predictions in [Python](/sdks/python/get-batch-predictions). - - - Automate tasks by scheduling jobs in [Python](/sdks/python/create_job). - -
diff --git a/docs/sdks/python/query_files.mdx b/docs/sdks/python/query_files.mdx deleted file mode 100644 index 14f86dc5c1f..00000000000 --- a/docs/sdks/python/query_files.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Query a File -sidebarTitle: Query a File ---- - -## Description - -In MindsDB, files are treated as tables. These are stored in the default `files` database. To query a file, you must save this `files` database into a variable and then, run the `query()` function on it. - -## Syntax - -Here is the syntax: - -``` -server.get_database('files').query('SELECT * FROM file_name') -``` diff --git a/docs/sdks/python/query_projects.mdx b/docs/sdks/python/query_projects.mdx deleted file mode 100644 index be06d1cafaf..00000000000 --- a/docs/sdks/python/query_projects.mdx +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Query Projects -sidebarTitle: Query Projects ---- - -## Description - -The `query()` methods enables you to run queries on models, tables, and views stored in a project. - -## Syntax - -Use the `query()` method to submit a query to a project: - -```python -query = project.query('SELECT * FROM my_table;') -query.fetch() -``` diff --git a/docs/sdks/python/query_table.mdx b/docs/sdks/python/query_table.mdx deleted file mode 100644 index ec6f8603a1f..00000000000 --- a/docs/sdks/python/query_table.mdx +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Query a Table -sidebarTitle: Query a Table ---- - -## Description - -The `query()` function is executed on a data source connected to MindsDB and saved into a variable. It queries a table from this data source. - -## Syntax - -Here is the syntax: - -```sql -my_data_source.query('SELECT * FROM my_table LIMIT 100') -``` - -You can query for newly added data using the functionality introduced by the [`LAST` keyword](/mindsdb_sql/sql/create/jobs#last) as follows: - -```sql -query = server.databases.my_data_source.tables.table_name.filter(column_name='value').track('timestamp_column') - -# first call returns no records -df = query.fetch() - -# second call returns rows with timestamp_column greater than the timestamp of a previous fetch -df = query.fetch() -``` diff --git a/docs/sdks/python/query_view.mdx b/docs/sdks/python/query_view.mdx deleted file mode 100644 index 8d0a1a81973..00000000000 --- a/docs/sdks/python/query_view.mdx +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Query a View -sidebarTitle: Query a View ---- - -## Description - -The `query()` function is executed on a view that resides in one of the projects. - -## Syntax - -Here is the syntax: - -```sql -project_name.query('SELECT * FROM my_project.my_view LIMIT 100') -``` - -You can query for newly added data using the functionality introduced by the [`LAST` keyword](/mindsdb_sql/sql/create/jobs#last) as follows: - -```sql -query = server.databases.my_data_source.views.view_name.filter(column_name='value').track('timestamp_column') - -# first call returns no records -df = query.fetch() - -# second call returns rows with timestamp_column greater than the timestamp of a previous fetch -df = query.fetch() -``` diff --git a/docs/sdks/python/refresh_job.mdx b/docs/sdks/python/refresh_job.mdx deleted file mode 100644 index 60902198426..00000000000 --- a/docs/sdks/python/refresh_job.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Refresh a Job -sidebarTitle: Refresh a Job ---- - -## Description - -The `refresh()` function synchronizes the job with MindsDB. - -## Syntax - -Use the `refresh()` method to retrieve job data from the MindsDB server: - -```python -my_job.refresh() -``` diff --git a/docs/sdks/python/update_table.mdx b/docs/sdks/python/update_table.mdx deleted file mode 100644 index 0b38c80101c..00000000000 --- a/docs/sdks/python/update_table.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Update a Table -sidebarTitle: Update a Table ---- - -## Description - -The `update()` function is executed on a table from a data source connected to MindsDB. It updates a table on specified columns. - -## Syntax - -Here is the syntax: - -```sql -my_table.update(table_used_to_update, on=['column1', 'column2', ...]) -``` - - -Check out the [SQL syntax](/sql/api/update) to better understand how the `update()` function works. - diff --git a/docs/sdks/python/upload_file.mdx b/docs/sdks/python/upload_file.mdx deleted file mode 100644 index 46aaa6834e3..00000000000 --- a/docs/sdks/python/upload_file.mdx +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Upload a File -sidebarTitle: Upload a File ---- - -## Description - -In MindsDB, files are treated as tables. These are stored in the default `files` database. To upload a file, you must save this `files` database into a variable and then, run the `create_table()` function on it. - - -Note that the trailing whitespaces on column names are erased upon uploading a file to MindsDB. - - -## Syntax - -Here is the syntax: - -``` -files_db = server.get_database('files') -files_db.create_table('file_name', data_frame) -``` diff --git a/docs/setup.html b/docs/setup.html new file mode 100644 index 00000000000..34502a593ac --- /dev/null +++ b/docs/setup.html @@ -0,0 +1,395 @@ + + + + +Setup — Minds Cowork Docs + + + + + + + + + + +
+ + +
+
Setup
+

Get running in minutes

+

+ Minds Cowork ships as a desktop app (Electron) and a web app (Vite SPA + FastAPI). + Both modes share the same codebase — one repo, one setup step. +

+ + +
+

01 Prerequisites

+

Make sure the following are installed on your machine before starting.

+
+
+
Node.js
+
v20 or later
+
+
+
Python
+
3.12 or later
+
+
+
uv
+
Python package manager
+
+
+
make
+
pre-installed on macOS / Linux
+
+
+

Install uv if you don't have it:

+
+
bash
+
curl -LsSf https://astral.sh/uv/install.sh | sh
+
+
+ + +
+

02 Clone & install

+

Clone with --recurse-submodules to pull frontend, backend/core_api, and backend/core_agent in one shot.

+
+
bash
+
git clone --recurse-submodules https://github.com/mindsdb/minds-platform.git
+cd minds-platform
+make setup
+
+

make setup runs npm ci for the frontend and uv sync for both Python backends. It is automatically skipped on subsequent runs if nothing changed in the lock files.

+
+ + +
+

Desktop app

+

Starts the FastAPI backend (with hot reload) and the Electron renderer (TypeScript watch + Vite HMR) in parallel.

+
+
bash
+
make dev
+
+

Press Ctrl-C once to shut down all processes cleanly.

+

To open DevTools, set ANTON_OPEN_DEVTOOLS=1 before running:

+
+
bash
+
ANTON_OPEN_DEVTOOLS=1 make dev
+
+
+ + +
+

Web app

+

Starts the FastAPI backend and the Vite dev server with BUILD_TARGET=web. Opens the browser automatically.

+
+
bash
+
make dev-web
+
+

The app is served at http://localhost:5173. The API runs on http://localhost:26866.

+
+ API key required. Set ANTON_ANTHROPIC_API_KEY=sk-ant-... in a .env file inside backend/core_api/ before running. +
+
+ + +
+

Docker

+

Run the full stack in containers — the web SPA served by nginx on port 3000, the API on port 26866.

+
+
bash
+
make docker-build   # build both images
+make docker-up      # start everything
+
+

Or pass your API key inline:

+
+
bash
+
ANTON_ANTHROPIC_API_KEY=sk-ant-... docker compose up --build
+
+

Open http://localhost:3000 when the health check turns green.

+
+ + +
+

All make commands

+ + + + + + + + + + + + + + + +
CommandWhat it does
make setupInstall all dependencies (npm + uv). Auto-skipped when lock files haven't changed.
make devStart backend + Electron with full hot reload.
make dev-webStart backend + Vite web dev server.
make buildProduction build of the frontend (renderer + main process).
make dist-macBuild and package a macOS .dmg.
make dist-winBuild and package a Windows .exe.
make docker-buildBuild the api and web Docker images.
make docker-upStart the Docker stack (docker compose up).
make docker-downStop and remove containers.
+
+
+
+ + + + + diff --git a/docs/setup/cloud/aws-marketplace.mdx b/docs/setup/cloud/aws-marketplace.mdx deleted file mode 100644 index e9a9d61d5a4..00000000000 --- a/docs/setup/cloud/aws-marketplace.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: MindsDB at AWS Marketplace -sidebarTitle: AWS Marketplace ---- - -MindsDB offers a streamlined setup process in cloud environments using its AWS Marketplace image. - - -Explore the MindsDB AWS Marketplace image [here](https://aws.amazon.com/marketplace/seller-profile?id=03a65520-86ca-4ab8-a394-c11eb54573a9). - diff --git a/docs/setup/custom-config.mdx b/docs/setup/custom-config.mdx deleted file mode 100644 index 0a76a038edc..00000000000 --- a/docs/setup/custom-config.mdx +++ /dev/null @@ -1,471 +0,0 @@ ---- -title: Extend the Default MindsDB Configuration -sidebarTitle: Extend the Default MindsDB Configuration ---- - -To follow this guide, install MindsDB locally via [Docker](/setup/self-hosted/docker-desktop) or [PyPI](/setup/self-hosted/pip/source). - -## Starting MindsDB with Extended Configuration - -Start MindsDB locally with your custom configuration by providing a path to the `config.json` file that stores custom config parameters listed in this section. - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_CONFIG_PATH=/Users/username/path/config.json -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Python - python -m mindsdb --api=http,mysql --config=/path-to-the-extended-config-file/config.json - ``` - - -### Available Config Parameters - -Below are all of the custom configuration parameters that should be set according to your requirements and saved into the `config.json` file. - -#### `permanent_storage` - -```bash -{ - "permanent_storage": { - "location": "absent", - "bucket": "s3_bucket_name" # optional, used only if "location": "s3" - }, -``` - -The `permanent_storage` parameter defines where MindsDB stores copies of user files, such as uploaded files, models, and tab content. MindsDB checks the `permanent_storage` location to access the latest version of a file and updates it as needed. - -The `location` specifies the storage type. - -* `absent` (default): Disables permanent storage and is recommended to use when MindsDB is running locally. -* `local`: Stores files in a local directory defined with `config['paths']['storage']`. -* `s3`: Stores files in an Amazon S3 bucket. This option requires the `bucket` parameter that specifies the name of the S3 bucket where files will be stored. - -If this parameter is not set, the path is determined by the `MINDSDB_STORAGE_DIR` environment variable. MindsDB defaults to creating a `mindsdb` folder in the operating system user's home directory. - -#### `paths` - -```bash - "paths": { - "root": "/home/mindsdb/var", # optional (alternatively, it can be defined in the MINDSDB_STORAGE_DIR environment variable) - "content": "/home/mindsdb/var/content", # optional - "storage": "/home/mindsdb/var/storage", # optional - "static": "/home/mindsdb/var/static", # optional - "tmp": "/home/mindsdb/var/tmp", # optional - "cache": "/home/mindsdb/var/cache", # optional - "locks": "/home/mindsdb/var/locks", # optional - }, -``` - -The `paths` parameter allows users to redefine the file paths for various groups of MindsDB files. If only the `root` path is defined, all other folders will be created within that directory. If this parameter is absent, the value is determined by the `MINDSDB_STORAGE_DIR` environment variable. - -The `root` parameter defines the base directory for storing all MindsDB files, including models, uploaded files, tab content, and the internal SQLite database (if running locally). - -The `content` parameter specifies the directory where user-related files are stored, such as uploaded files, created models, and tab content. The internal SQLite database (if running locally) is stored in the `root` directory instead. - -If the `['permanent_storage']['location']` is set to `'local'`, then the `storage` parameter is used to store copies of user files. - -The `static` parameter is used to store files for the graphical user interface (GUI) when MindsDB is run locally. - -The `tmp` parameter designates a directory for temporary files. Note that the operating system’s default temporary directory may also be used for some temporary files. - -If the `['cache']['type']` is set to `'local'`, then the `cache` parameter defines the location for storing cached files for the most recent predictions. For example, if a model is queried with identical input, the result will be stored in the cache and returned directly on subsequent queries, instead of recalculating the prediction. - -The `locks` parameter is used to store lock files to prevent race conditions when the `content` folder is shared among multiple applications. This directory helps ensure that file access is managed properly using `fcntl` locks. Note that this is not applicable for Windows OS. - -#### `auth` - -```bash - "auth":{ - "http_auth_type": "session" | "token"| "session_or_token", - "http_auth_enabled": true, - "username": "username", - "password": "password" - }, -``` - -The `auth` parameter controls the authentication settings for APIs in MindsDB. - -Users can define the authentication type by setting the `http_auth_type` parameter to one of the following values: - -* `session_or_token` is the default value. When a user logs in to MindsDB, the session cookie is set and the token is returned in the response. To use the MindsDB API, users can utilize either one or both of these methods. - -* `session` sets the session cookie when a user logs in. The session lifetime can be set with the `http_permanent_session_lifetime` parameter. - -* `token` returns the token in the response, which is valid indefinitely. - -The authentication type can also be set via the `MINDSDB_HTTP_AUTH_TYPE` environment variable with the same values as defined above. - -If the `http_auth_enabled` parameter is set to `true`, then the `username` and `password` parameters are required. Otherwise these are optional. - -#### `gui` - -```bash - "gui": { - "autoupdate": true, - "open_on_start": true - }, -``` - -The `gui` parameter controls the behavior of the MindsDB graphical user interface (GUI) updates. - -The `autoupdate` parameter defines whether MindsDB automatically checks for and updates the GUI to the latest version when the application starts. If set to `true`, MindsDB will attempt to fetch the latest available version of the GUI. If set to `False`, MindsDB will not try to update the GUI on startup. - -The `open_on_start` parameter defines whether MindsDB automatically opens the GUI on start. If set to `true`, MindsDB will open the GUI automatically. If set to `False`, MindsDB will not open the GUI on startup. - -#### `api` - -```bash - "api": { - "http": { - "host": "127.0.0.1", - "port": "47334", - "restart_on_failure": true, - "max_restart_count": 1, - "max_restart_interval_seconds": 60, - "a2wsgi": { - "workers": 15, - "send_queue_size": 10 - } - }, - "mysql": { - "host": "127.0.0.1", - "port": "47335", - "database": "mindsdb", - "ssl": true, - "restart_on_failure": true, - "max_restart_count": 1, - "max_restart_interval_seconds": 60 - }, - }, -``` - -The `api` parameter contains the configuration settings for running MindsDB APIs. - -Currently, the supported APIs are: - -* `http`: Configures the HTTP API. It requires the `host` and `port` parameters. Alternatively, configure HTTP authentication for your MindsDB instance by setting the environment variables `MINDSDB_USERNAME` and `MINDSDB_PASSWORD` before starting MindsDB, which is a recommended way for the production systems. -* `mysql`: Configures the MySQL API. It requires the `host` and `port` parameters and additionally the `database` and `ssl` parameters. - - - - - -Connection parameters for the HTTP API include: - -* `host`: Specifies the IP address or hostname where the API should run. For example, `"127.0.0.1"` indicates the API will run locally. -* `port`: Defines the port number on which the API will listen for incoming requests. The default ports are `47334` for HTTP, and `47335` for MySQL. -* `restart_on_failure`: If it is set to `true` (and `max_restart_count` is not reached), the restart of MindsDB will be attempted after the MindsDB process was killed - with code 9 on Linux and MacOS, or for any reason on Windows. -* `max_restart_count`: This defines how many times the restart attempts can be made. Note that 0 stands for no limit. -* `max_restart_interval_seconds`: This defines the time limit during which there can be no more than `max_restart_count` restart attempts. Note that 0 stands for no time limit, which means there would be a maximum of `max_restart_count` restart attempts allowed. - - - Here is a usage example of the restart features: - - Assume the following values: - - max_restart_count = 2 - - max_restart_interval_seconds = 30 seconds - - Assume the following scenario: - - MindsDB fails at 1000s of its work - the restart attempt succeeds as there were no restarts in the past 30 seconds. - - MindsDB fails at 1010s of its work - the restart attempt succeeds as there was only 1 restart (at 1000s) in the past 30 seconds. - - MindsDB fails at 1020s of its work - the restart attempt fails as there were already max_restart_count=2 restarts (at 1000s and 1010s) in the past 30 seconds. - - MindsDB fails at 1031s of its work - the restart attempt succeeds as there was only 1 restart (at 1010s) in the past 30 seconds. - - -* `a2wsgi` is an WSGI wrapper with the following parameters: `workers` defines the number of requests that can be processed in parallel, and `send_queue_size` defines the buffer size. - - - - - -Connection parameters for the MySQL API include: - -* `host`: Specifies the IP address or hostname where the API should run. For example, `"127.0.0.1"` indicates the API will run locally. -* `port`: Defines the port number on which the API will listen for incoming requests. The default ports are `47334` for HTTP, and `47335` for MySQL. -* `database`: Specifies the name of the database that MindsDB uses. Users must connect to this database to interact with MindsDB through the respective API. -* `ssl`: Indicates whether SSL support is enabled for the MySQL API. -* `restart_on_failure`: If it is set to `true` (and `max_restart_count` is not reached), the restart of MindsDB will be attempted after the MindsDB process was killed - with code 9 on Linux and MacOS, or for any reason on Windows. -* `max_restart_count`: This defines how many times the restart attempts can be made. Note that 0 stands for no limit. -* `max_restart_interval_seconds`: This defines the time limit during which there can be no more than `max_restart_count` restart attempts. Note that 0 stands for no time limit, which means there would be a maximum of `max_restart_count` restart attempts allowed. - - - - - -The `mcp` section configures the [MCP server](/model-context-protocol/usage). - -```json -"api": { - "mcp": { - "cors": { - "enabled": true, - "allow_origins": [], - "allow_origin_regex": "https?://(localhost|127\\.0\\.0\\.1)(:\\d+)?", - "allow_headers": ["*"] - }, - "rate_limit": { - "enabled": false, - "requests_per_minute": 60 - }, - "dns_rebinding_protection": false - } -} -``` - -* `cors.enabled`: Enables CORS headers on MCP endpoints. Can also be set via `MINDSDB_MCP_CORS_ENABLED`. -* `cors.allow_origins`: List of allowed origins. Can also be set via `MINDSDB_MCP_ALLOW_ORIGINS` (comma-separated). -* `cors.allow_origin_regex`: Regex pattern for allowed origins. Can also be set via `MINDSDB_MCP_ALLOW_ORIGIN_REGEXP`. -* `cors.allow_headers`: List of allowed request headers. Can also be set via `MINDSDB_MCP_ALLOW_HEADERS` (comma-separated). -* `rate_limit.enabled`: Enables per-IP rate limiting. Can also be set via `MINDSDB_MCP_RATE_LIMIT_ENABLED`. -* `rate_limit.requests_per_minute`: Maximum number of requests per minute per IP. Can also be set via `MINDSDB_MCP_RATE_LIMIT_RPM`. -* `dns_rebinding_protection`: When `true`, the MCP transport validates the `Host` header against a list of known-safe hosts to prevent DNS rebinding attacks. Disabled by default (`false`). Enable it when running MindsDB locally and you want to restrict MCP access to `localhost` only. Can also be set via `MINDSDB_MCP_DNS_REBINDING_PROTECTION`. - - - - - -#### `cache` - -```bash - "cache": { - "type": "local", - "connection": "redis://localhost:6379" # optional, used only if "type": "redis" - }, -``` - -The `cache` parameter controls how MindsDB stores the results of recent predictions to avoid recalculating them if the same query is run again. Note that recent predictions are cached for ML models, but not in the case of large language models (LLMs), like OpenAI. - -The `type` parameter specifies the type of caching mechanism to use for storing prediction results. - -* `none`: Disables caching. No prediction results are stored. -* `local` (default): Stores prediction results in the `cache` folder (as defined in the `paths` configuration). This is useful for repeated queries where the result doesn't change. -* `redis`: Stores prediction results in a Redis instance. This option requires the `connection` parameter, which specifies the Redis connection string. - -The `connection` parameter is required only if the `type` parameter is set to `redis`. It stores the Redis connection string. - -#### `logging` - -```bash - "logging": { - "handlers": { - "console": { - "enabled": true, - "formatter": "default", # optional, available values include default and json - "level": "INFO" # optional (alternatively, it can be defined in the MINDSDB_CONSOLE_LOG_LEVEL environment variable) - }, - "file": { - "enabled": False, - "level": "INFO", # optional (alternatively, it can be defined in the MINDSDB_FILE_LOG_LEVEL environment variable) - "filename": "app.log", - "maxBytes": 524288, # 0.5 Mb - "backupCount": 3 - } - } - }, -``` - -The above parameters are implemented based on [Python's Logging Dictionary Schema](https://docs.python.org/3/library/logging.config.html#logging-config-dictschema). - -The `logging` parameter defines the details of output logging, including the logging levels. - -The `handler` parameter provides handlers used for logging into streams and files. - -* `console`: This parameter defines the setup for saving logs into a stream. - - * If the `enabled` parameter is set to `true`, then the logging output is saved into a stream. - * Users can define the `formatter` parameter that configures the format of the logs, where the available values include `default` and `json`. - * Users can also define the logging level in the `level` parameter or in the `MINDSDB_CONSOLE_LOG_LEVEL` environment variable - one of `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`. - -* `file`: This parameter defines the setup for saving logs into a file. - - * If the `enabled` parameter is set to `true`, then the logging output is saved into a file. - * Users can define the logging level in the `level` parameter or in the `MINDSDB_FILE_LOG_LEVEL` environment variable - one of `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`. - * Additionally, the `filename` parameter stores the name of the file that contains logs. - * And the `maxBytes` and `backupCount` parameters determine the rollover process of the file - that is, if the file reached the size of `maxBytes`, then the file is closed and a new file is opened, where the number of files is defined by the `backupCount` parameter. - -#### `ml_task_queue` - -```bash - "ml_task_queue": { - "type": "local", - "host": "localhost", # optional, used only if "type": "redis" - "port": 6379, # optional, used only if "type": "redis" - "db": 0, # optional, used only if "type": "redis" - "username": "username", # optional, used only if "type": "redis" - "password": "password" # optional, used only if "type": "redis" - }, -``` - -The `ml_task_queue` parameter manages the queueing system for machine learning tasks in MindsDB. ML tasks include operations such as creating, training, predicting, fine-tuning, and retraining models. These tasks can be resource-intensive, and running multiple ML tasks simultaneously may lead to Out of Memory (OOM) errors or performance degradation. To address this, MindsDB uses a task queue to control task execution and optimize resource utilization. - -The `type` parameter defines the type of task queue to use. - -* `local`: Tasks are processed immediately as they appear, without a queue. This is suitable for environments where resource constraints are not a concern. -* `redis`: Tasks are added to a Redis-based queue, and consumer process (which is run with `--ml_task_consumer`) ensures that tasks are executed only when sufficient resources are available. - - Using a Redis queue requires additional configuration such as the `host`, `port`, `db`, `username`, and `password` parameters. - - To use the Redis queue, start MindsDB with the following command to initiate a queue consumer process: `python3 -m mindsdb --ml_task_queue_consumer`. This process will monitor the queue and fetch tasks for execution only when sufficient resources are available. - -#### `url_file_upload` - -```bash - "url_file_upload": { - "enabled": true, - "allowed_origins": ["https://example.com"], - "disallowed_origins": ["http://example.com"] - } -``` - -The `url_file_upload` parameter restricts file uploads to trusted sources by specifying a list of allowed domains. This ensures that users can only upload files from the defined sources, such as S3 or Google Drive. - -The `enabled` flag turns this feature on (`true`) or off (`false`). - -The `allowed_origins` parameter lists allowed domains. If left empty, then any domain is allowed. - -The `disallowed_origins` parameter lists domains that are not allowed. If left empty, then there are no restricted domains. - -#### `web_crawling_allowed_sites` - -```bash - "web_crawling_allowed_sites": [], -``` - -The `web_crawling_allowed_sites` parameter restricts web crawling operations to a specified list of allowed IPs or web addresses. This ensures that the application only accesses pre-approved and safe URLs (`"web_crawling_allowed_sites": ["https://example.com", "https://api.mysite.com"]`). - -If left empty (`[]`), the application allows access to all URLs by default (marked with a wildcard in the open-source version). - -#### `default_llm` - -```bash - "default_llm": { - "provider": "azure_openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01", - "method": "multi-class" - } -``` - -The `default_llm` parameter specifies the default LLM that will be used with the [`LLM()` function](/mindsdb_sql/functions/llm_function), the [`TO_MARKDOWN()` function](/mindsdb_sql/functions/to_markdown_function), and as a default model for [agents](/mindsdb_sql/agents/agent). - -#### `default_embedding_model` - -```bash - "default_embedding_model": { - "provider": "azure_openai", - "model_name" : "text-embedding-3-large", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01" - } -} -``` - -The `default_embedding_model` parameter specifies the default embedding model used with knowledge bases. Learn more about the parameters following the [documentation of the `embedding_model` of knowledge bases](/mindsdb_sql/knowledge_bases/create#embedding-model). - -#### `default_reranking_model` - -```bash - "default_reranking_model": { - "provider": "azure_openai", - "model_name" : "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://ai-6689.openai.azure.com/", - "api_version": "2024-02-01", - "method": "multi-class" - } -``` - -The `default_reranking_model` parameter specifies the default reranking model used with knowledge bases. Learn more about the parameters following the [documentation of the `reranking_model` of knowledge bases](/mindsdb_sql/knowledge_bases/create#reranking-model). - -#### `data_catalog` - -```bash -{ - "data_catalog": { - "enabled": true - } -} -``` - -This parameter enables the [data catalog](/data_catalog/overview). - -### Example - -First, create a `config.json` file. - -```bash -{ - "permanent_storage": { - "location": "absent" - }, - "paths": { - "root": "/path/to/root/location" - }, - "auth":{ - "http_auth_enabled": true, - "username": "username", - "password": "password" - }, - "gui": { - "autoupdate": true - }, - "api": { - "http": { - "host": "127.0.0.1", - "port": "47334", - "restart_on_failure": true, - "max_restart_count": 1, - "max_restart_interval_seconds": 60 - }, - "mysql": { - "host": "127.0.0.1", - "port": "47335", - "database": "mindsdb", - "ssl": true, - "restart_on_failure": true, - "max_restart_count": 1, - "max_restart_interval_seconds": 60 - } - }, - "cache": { - "type": "local" - }, - "logging": { - "handlers": { - "console": { - "enabled": true, - "formatter": "default", - "level": "INFO" - }, - "file": { - "enabled": false, - "level": "INFO", - "filename": "app.log", - "maxBytes": 524288, - "backupCount": 3 - } - } - }, - "ml_task_queue": { - "type": "local" - }, - "url_file_upload": { - "enabled": true, - "allowed_origins": ["https://example.com"], - "disallowed_origins": ["http://example.com"] - }, - "web_crawling_allowed_sites": [] -} -``` - -Next, start MindsDB providing this `config.json` file. - -```bash -python -m mindsdb --config=/path-to-the-extended-config-file/config.json -``` - -## Modifying Config Values - -Users can modify config values by directly editing the `config.json` file they created. diff --git a/docs/setup/environment-vars.mdx b/docs/setup/environment-vars.mdx deleted file mode 100644 index e1bb4af1030..00000000000 --- a/docs/setup/environment-vars.mdx +++ /dev/null @@ -1,367 +0,0 @@ ---- -title: Environment Variables -sidebarTitle: Environment Variables ---- - -Most of the MindsDB functionality can be modified by extending the default configuration, but some of the configuration options -can be added as environment variables on the server where MindsDB is deployed. -[Here is the list](/setup/full-list-environment-vars) of all the available environment variables. - -## MindsDB Authentication - -MindsDB does not require authentication by default. If you want to enable authentication, you can set the `MINDSDB_USERNAME` and `MINDSDB_PASSWORD` environment variables. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_USERNAME='mindsdb_user' -e MINDSDB_PASSWORD='mindsdb_password' -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_USERNAME='mindsdb_user' - export MINDSDB_PASSWORD='mindsdb_password' - ``` - - - -## MindsDB Authentication Type - -Users can define the authentication type by setting the `MINDSDB_HTTP_AUTH_TYPE` environment variable to one of the following values: - -* `session_or_token` is the default value. When a user logs in to MindsDB, the session cookie is set and the token is returned in the response. To use the MindsDB API, users can utilize either one or both of these methods. - -* `session` sets the session cookie when a user logs in. The session lifetime can be set with the `http_permanent_session_lifetime` parameter. - -* `token` returns the token in the response, which is valid indefinitely. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_HTTP_AUTH_TYPE='session' -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_HTTP_AUTH_TYPE='session_or_token' - ``` - - -## MindsDB Configuration File - -In order to start MindsDB with a [custom configuration file](/setup/custom-config), the `MINDSDB_CONFIG_PATH` environment variable should store the file path. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_CONFIG_PATH=/Users/username/path/config.json -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_CONFIG_PATH=/Users/username/path/config.json - ``` - - - -## MindsDB Storage - -By default, MindsDB stores the configuration files by determining appropriate platform-specific directories, e.g. a "user data dir": - -- On Linux `~/.local/share/mindsdb/var` -- On MacOS `~/Library/Application Support/mindsdb/var` -- On Windows `C:\Documents and Settings\\Application Data\Local Settings\\mindsdb\var` - -In the `MINDSDB_STORAGE_DIR` location, MindsDB stores users' data, models and uploaded data files, the static assets for the frontend application and the -`sqlite.db` file. -You can change the default storage location using `MINDSDB_STORAGE_DIR` variable. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_STORAGE_DIR='~/home/mindsdb/var' -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_STORAGE_DIR='~/home/mindsdb/var' - ``` - - - -## MindsDB Configuration Storage - -MindsDB uses `sqlite` database by default to store the required configuration as models, projects, files metadata etc. -The full list of the above schemas can be found [here](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/interfaces/storage/db.py#L69). You can change the -default storage option and use different database by adding the new connection string using `MINDSDB_DB_CON` variable. - -### Example - - - ```bash Docker docker run --name mindsdb_container -e - MINDSDB_DB_CON='postgresql://user:secret@localhost' -e MINDSDB_APIS=http,mysql - -p 47334:47334 -p 47335:47335 mindsdb/mindsdb ``` ```bash Shell export - MINDSDB_DB_CON='postgresql://user:secret@localhost' ``` - - -#### `MINDSDB_STORAGE_BACKUP_DISABLED` - -- **Type:** Boolean (`1`, `true`, `True`) -- **Description:** Disables permanent storage backup -- **Default:** `false` -- **Example:** `MINDSDB_STORAGE_BACKUP_DISABLED=1` - -## MindsDB APIs - -The `MINDSDB_APIS` environment variable lets users define which APIs to start. Learn more about the [available APIs here](/setup/mindsdb-apis). - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_APIS='http,mysql' - ``` - - - -## MindsDB Logs - -This environment variable defines the level of logging generated by MindsDB. You can choose one of the values [defined here](https://docs.python.org/3/library/logging.html#logging-levels). The `INFO` level is used by default. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_LOG_LEVEL='DEBUG' -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_LOG_LEVEL='DEBUG' - ``` - - - -#### `MINDSDB_CONSOLE_LOG_LEVEL` - -- **Type:** String (`DEBUG`, `INFO`, `WARNING`, `ERROR`) -- **Description:** Sets console log level -- **Default:** `INFO` -- **Example:** `MINDSDB_CONSOLE_LOG_LEVEL=DEBUG` - -#### `MINDSDB_FILE_LOG_LEVEL` - -- **Type:** String (`DEBUG`, `INFO`, `WARNING`, `ERROR`) -- **Description:** Sets file log level and enables file logging -- **Default:** `INFO` (disabled by default) -- **Example:** `MINDSDB_FILE_LOG_LEVEL=DEBUG` - -## MindsDB Default Project - -By default, MindsDB creates a project named `mindsdb` where all the models and other objects are stored. You can change the default project name by setting the `MINDSDB_DEFAULT_PROJECT` environment variable. - -If this environment variable is set or modified after MindsDB has started, the default project will be **renamed** accordingly upon restart. To start using the new default project, a `USE` statement will also need to be executed. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_DEFAULT_PROJECT='my_project' -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_DEFAULT_PROJECT='my_project' - ``` - - - -#### `MINDSDB_DEFAULT_LLM_API_KEY` - -- **Type:** String -- **Description:** API key for default LLM (Large Language Model) -- **Default:** None -- **Example:** `MINDSDB_DEFAULT_LLM_API_KEY=sk-...` - -#### `MINDSDB_DEFAULT_EMBEDDING_MODEL_API_KEY` - -- **Type:** String -- **Description:** API key for default embedding model -- **Default:** None -- **Example:** `MINDSDB_DEFAULT_EMBEDDING_MODEL_API_KEY=sk-...` - -#### `MINDSDB_DEFAULT_RERANKING_MODEL_API_KEY` - -- **Type:** String -- **Description:** API key for default reranking model -- **Default:** None -- **Example:** `MINDSDB_DEFAULT_RERANKING_MODEL_API_KEY=sk-...` - -## MindsDB's PID File - -When running MindsDB via [Docker](/setup/self-hosted/docker) or [Docker Extension](/setup/self-hosted/docker-desktop), the PID file is not used by default. Users can opt for enabling the PID file by defining the `USE_PIDFILE` environment variable. - -If used, the PID file is stored in the temp directory (`$TMPDIR` on MacOS and Linux, `%TEMP%` on Windows) under the `mindsdb` folder. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e USE_PIDFILE=1 -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export USE_PIDFILE=1 - ``` - - - -## MindsDB GUI Updates - -In order to disable automatic GUI updates, the `MINDSDB_GUI_AUTOUPDATE` environment variable should be set to `false` (or `0`). - -By default, the automatic GUI updates are enabled and the `MINDSDB_GUI_AUTOUPDATE` environment variable is set to `true` (or `1`). - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_GUI_AUTOUPDATE=false -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_GUI_AUTOUPDATE=false - ``` - - - -## MindsDB GUI Startup and Updates - -In order to not open the MindsDB GUI automatically when starting the instance (and to disable automatic GUI updates), the `MINDSDB_NO_STUDIO` environment variable should be set to `true` (or `1`). - -By default, the MindsDB GUI starts automatically when starting the instance (and the automatic GUI updates are enabled), that is, the `MINDSDB_NO_STUDIO` environment variable is set to `false` (or `0`). - -Note that the `MINDSDB_NO_STUDIO` is not recommended for the MindsDB instance running in Docker. Instead, use the `MINDSDB_GUI_AUTOUPDATE` environment variable to disable automatic GUI updates. - -### Example - - - ```bash Docker - docker run --name mindsdb_container -e MINDSDB_NO_STUDIO=true -e MINDSDB_APIS=http,mysql -p 47334:47334 -p 47335:47335 mindsdb/mindsdb - ``` - - ```bash Shell - export MINDSDB_NO_STUDIO=true - ``` - - - -### ML Task Queue - -#### `MINDSDB_ML_QUEUE_TYPE` - -- **Type:** String (`local`, `redis`) -- **Description:** Type of ML task queue to use -- **Default:** `local` -- **Example:** `MINDSDB_ML_QUEUE_TYPE=redis` - -#### `MINDSDB_ML_QUEUE_HOST` - -- **Type:** String (hostname) -- **Description:** Redis host for ML task queue (only when `MINDSDB_ML_QUEUE_TYPE=redis`) -- **Default:** `localhost` -- **Example:** `MINDSDB_ML_QUEUE_HOST=redis.example.com` - -#### `MINDSDB_ML_QUEUE_PORT` - -- **Type:** Integer -- **Description:** Redis port for ML task queue -- **Default:** `6379` -- **Example:** `MINDSDB_ML_QUEUE_PORT=6380` - -#### `MINDSDB_ML_QUEUE_DB` - -- **Type:** Integer -- **Description:** Redis database number for ML task queue -- **Default:** `0` -- **Example:** `MINDSDB_ML_QUEUE_DB=1` - -#### `MINDSDB_ML_QUEUE_USERNAME` - -- **Type:** String -- **Description:** Redis username for ML task queue -- **Default:** None -- **Example:** `MINDSDB_ML_QUEUE_USERNAME=redis_user` - -#### `MINDSDB_ML_QUEUE_PASSWORD` - -- **Type:** String -- **Description:** Redis password for ML task queue -- **Default:** None -- **Example:** `MINDSDB_ML_QUEUE_PASSWORD=redis_pass` - -## Reranker Configuration - -#### `MINDSDB_RERANKER_N` - -- **Type:** Integer -- **Description:** Number of results to rerank -- **Default:** None -- **Example:** `MINDSDB_RERANKER_N=10` - -#### `MINDSDB_RERANKER_LOGPROBS` - -- **Type:** Boolean (`true`, `false`, `1`, `0`, `yes`, `no`) -- **Description:** Enable log probabilities in reranker -- **Default:** None -- **Example:** `MINDSDB_RERANKER_LOGPROBS=true` - -#### `MINDSDB_RERANKER_TOP_LOGPROBS` - -- **Type:** Integer -- **Description:** Number of top log probabilities to return -- **Default:** None -- **Example:** `MINDSDB_RERANKER_TOP_LOGPROBS=5` - -#### `MINDSDB_RERANKER_MAX_TOKENS` - -- **Type:** Integer -- **Description:** Maximum tokens for reranker -- **Default:** None -- **Example:** `MINDSDB_RERANKER_MAX_TOKENS=512` - -#### `MINDSDB_RERANKER_VALID_CLASS_TOKENS` - -- **Type:** String (comma-separated list) -- **Description:** Valid class tokens for reranker -- **Default:** None -- **Example:** `MINDSDB_RERANKER_VALID_CLASS_TOKENS=token1,token2,token3` - -## Features - -#### `MINDSDB_DATA_CATALOG_ENABLED` - -- **Type:** Boolean (`1`, `true`) -- **Description:** Enables the data catalog feature -- **Default:** `false` -- **Example:** `MINDSDB_DATA_CATALOG_ENABLED=1` - -## Runtime - -#### `MINDSDB_DOCKER_ENV` - -- **Type:** Any value (presence check) -- **Description:** Indicates MindsDB is running in Docker environment (changes default API host to `0.0.0.0`) -- **Default:** Not set -- **Example:** `MINDSDB_DOCKER_ENV=1` - -#### `MINDSDB_RUNTIME` - -- **Type:** String (`1`) -- **Description:** Indicates MindsDB runtime environment -- **Default:** Not set -- **Example:** `MINDSDB_RUNTIME=1` - ---- diff --git a/docs/setup/mindsdb-apis.mdx b/docs/setup/mindsdb-apis.mdx deleted file mode 100644 index 97d60ca8478..00000000000 --- a/docs/setup/mindsdb-apis.mdx +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: MindsDB APIs -sidebarTitle: MindsDB APIs ---- - -MindsDB provides multiple APIs with optional authentication mechanisms. - -## APIs - -When you start MindsDB, the following APIs become available: - -* **HTTP API**, along with **A2A API** and **MCP API**, runs on port `47334`. - - - Access the MindsDB Editor at `mindsdb-instance-url:47334` - - - Access the MCP API at `mindsdb-instance-url:47334/mcp/` - - - Access the A2A API at `mindsdb-instance-url:47334/a2a/` - -* **MySQL API** runs on port `47335`. - - - Connect to MindsDB from database clients as if it were a standard MySQL database. - -## Authentication - -Authentication mechanism covers HTTP API, A2A API, and MCP API. - -You can configure authentication by setting [environment variables](/setup/environment-vars#mindsdb-authentication) or by defining credentials in the [configuration file](/setup/custom-config#auth). - -For details on generating and using MindsDB authentication tokens, refer to the [authentication guide](/rest/authentication). diff --git a/docs/setup/my_config.json b/docs/setup/my_config.json deleted file mode 100644 index fb4afd96b13..00000000000 --- a/docs/setup/my_config.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "permanent_storage": { - "location": "local", - "bucket": "s3_bucket_name" - }, - "paths": { - "root": "/home/mindsdb/var", - "content": "/home/mindsdb/var/content", - "storage": "/home/mindsdb/var/storage", - "static": "/home/mindsdb/var/static", - "tmp": "/home/mindsdb/var/tmp", - "cache": "/home/mindsdb/var/cache", - "locks": "/home/mindsdb/var/locks" - }, - "auth":{ - "http_auth_enabled": true, - "http_permanent_session_lifetime": 86400, - "username": "username", - "password": "password" - }, - "gui": { - "autoupdate": true - }, - "api": { - "http": { - "host": "127.0.0.1", - "port": "47334", - "restart_on_failure": true, - "max_restart_count": 1, - "max_restart_interval_seconds": 60 - }, - "mysql": { - "host": "127.0.0.1", - "port": "47335", - "database": "mindsdb", - "ssl": true, - "restart_on_failure": true, - "max_restart_count": 1, - "max_restart_interval_seconds": 60 - } - }, - "cache": { - "type": "local", - "connection": "redis://localhost:6379" - }, - "logging": { - "handlers": { - "console": { - "enabled": true, - "level": "INFO" - }, - "file": { - "enabled": false, - "level": "INFO", - "filename": "app.log", - "maxBytes": 524288, - "backupCount": 3 - } - } - }, - "ml_task_queue": { - "type": "local", - "host": "localhost", - "port": 6379, - "db": 0, - "username": "username", - "password": "password" - }, - "url_file_upload": { - "enabled": false, - "allowed_origins": [], - "disallowed_origins": [] - }, - "web_crawling_allowed_sites": [] -} \ No newline at end of file diff --git a/docs/setup/open_telemetry.mdx b/docs/setup/open_telemetry.mdx deleted file mode 100644 index c20eb878ce1..00000000000 --- a/docs/setup/open_telemetry.mdx +++ /dev/null @@ -1,113 +0,0 @@ ---- -title: OpenTelemetry Integration Guide -sidebarTitle: OpenTelemetry Integration Guide -icon: "book" ---- - -This guide explains how to configure and integrate OpenTelemetry (OTEL) using environment variables. It assumes you are familiar with OTEL concepts and provides details on environment variable configuration. The provided `docker-compose.yml` includes an OTEL Collector for local development, but you can use any collector of your choice. - -## Install required dependencies - -Install dependencies listed in `requirements/requirements-opentelemetry.txt`, or install `[opentelemetry]` extra installing mindsdb: - -```bash -pip install mindsdb[opentelemetry] -``` - -## Configuring OpenTelemetry with Environment Variables - -OpenTelemetry behavior is controlled via environment variables. Below is a breakdown of the key variables and their purposes: - -### General Variables - -- `OTEL_EXPORTER_TYPE`: Defines the type of exporter (`console` or `otlp`). - - Default: `console`. - -- `OTEL_EXPORTER_PROTOCOL`: Defines the protocol for the exporter (`grpc` or `http`). - - Default: `grpc`. - -- `OTEL_SDK_DISABLED`: Disables the Open Telemetry SDK. - - Default: Disabled for `local` environments unless `OTEL_SDK_FORCE_RUN` is `true`. - -- `OTEL_SDK_FORCE_RUN`: Forces Open Telemetry SDK to run even in disabled environments. - - Example: Set to `true` for testing in local environments. - -### OTLP Exporter Endpoints - -- `OTEL_OTLP_ENDPOINT`: Sets the base endpoint for all OTLP exports. - - Default: `http://localhost:4317`. - -- `OTEL_OTLP_LOGGING_ENDPOINT`: Endpoint for logging exports. - - Default: Same as `OTEL_OTLP_ENDPOINT`. - -- `OTEL_OTLP_TRACING_ENDPOINT`: Endpoint for tracing exports. - - Default: Same as `OTEL_OTLP_ENDPOINT`. - -- `OTEL_OTLP_METRICS_ENDPOINT`: Endpoint for metrics exports. - - Default: Same as `OTEL_OTLP_ENDPOINT`. - -### Service Information - -- `OTEL_SERVICE_NAME`: Specifies the service name. - - Default: `mindsdb_new_test`. - -- `OTEL_SERVICE_INSTANCE_ID`: Unique ID for the service instance. - - Default: `mindsdb`. - -- `OTEL_SERVICE_ENVIRONMENT`: Specifies the deployment environment (e.g., `local`, `dev`, `prod`). - - Default: `local`. - -- `OTEL_SERVICE_RELEASE`: Version or release identifier of the service. - - Default: `local`. - -- `OTEL_TRACE_SAMPLE_RATE`: Determines the sampling rate for tracing. - - Default: `1.0`. - -### Attributes - -- `OTEL_EXTRA_ATTRIBUTES`: Allows passing additional attributes as a comma-separated list of key-value pairs. - - Example: `service.version=0.0.1,foo=bar`. - -### Logging, Metrics, and Tracing Toggles - -- `OTEL_LOGGING_DISABLED`: Disables logging integration. - - Default: Follows `OTEL_SDK_DISABLED`. - -- `OTEL_TRACING_DISABLED`: Disables tracing integration. - - Default: Follows `OTEL_SDK_DISABLED` or `true`. - -- `OTEL_METRICS_DISABLED`: Disables metrics integration. - - Default: Follows `OTEL_SDK_DISABLED` or `true`. - ---- - -## Using with Docker Compose - -The `docker-compose.yml` provides an example configuration for local development. Replace the OTEL Collector with your preferred collector in production. - -### MindsDB Service Configuration - -```yaml -environment: - OTEL_EXPORTER_TYPE: "otlp" - OTEL_OTLP_ENDPOINT: "http://otel-collector:4317" - OTEL_SERVICE_NAME: "mindsdb" - OTEL_SERVICE_INSTANCE_ID: "mindsdb-instance" - OTEL_SERVICE_ENVIRONMENT: "local" - OTEL_LOGGING_DISABLED: "true" - OTEL_TRACING_DISABLED: "true" - OTEL_METRICS_DISABLED: "false" - OTEL_EXTRA_ATTRIBUTES: "service.version=0.0.1,foo=bar" - OTEL_SDK_FORCE_RUN: "true" -``` - -### OTEL Collector for Local Development - -```yaml -otel-collector: - image: otel/opentelemetry-collector-contrib:0.116.1 - volumes: - - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml - ports: - - 4317:4317 # OTLP gRPC receiver -``` diff --git a/docs/setup/self-hosted/docker-desktop.mdx b/docs/setup/self-hosted/docker-desktop.mdx deleted file mode 100644 index 80ae4c246d8..00000000000 --- a/docs/setup/self-hosted/docker-desktop.mdx +++ /dev/null @@ -1,86 +0,0 @@ ---- -title: Docker Desktop Extension for MindsDB -sidebarTitle: Docker Desktop ---- - -MindsDB provides an extension for Docker Desktop that facilitates running MindsDB on Docker Desktop. - - -Visit the [GitHub repository for MindsDB Docker Desktop Extension](https://github.com/mindsdb/mindsdb-docker-extension) to learn more. - - -## Prerequisites - -Before proceeding, ensure you have installed Docker Desktop, following the [official Docker Desktop documentation](https://www.docker.com/products/docker-desktop/). - -## Setup - -This setup of MindsDB uses the `mindsdb/mindsdb:latest` Docker image, which is a lightweight Docker image of MindsDB that comes with mysql,postgresql,snowflake,bigquery,mssql,and salesforce pre-installed. - -Follow the steps to set up MindsDB in Docker Desktop. - -### Install the MindsDB Docker Desktop Extension - - -If you are a Windows user, ensure that you have enabled Developer Mode under settings before installing the extension. - - - -It is not necessary to keep Developer Mode enabled to use the extension. Once the extension is installed, you can disable Developer Mode if you wish. - - -Go to the Extensions page in Docker Desktop and search for MindsDB. - -Install the MindsDB extension. - - -The first time the extension is installed, it will run the latest version of MindsDB. Moving forward, it's advisable to regularly update the MindsDB image used by the extension to ensure access to the latest features and improvements. - -As mentioned previously, the extension uses the `mindsdb/mindsdb:latest` Docker image. To update the image, follow these steps: -1. Navigate to the 'Images' tab in Docker Desktop. -2. Search or locate the mindsdb/mindsdb:latest image. -3. Click on the three dots on the right side of the image and click 'Pull'. If the image is already up to date, you will see a message stating so and you can skip the next step. -4. Wait for the image to be pulled and restart Docker Desktop. - - - - -Access MindsDB inside Docker Desktop. - -

- -

- -### Install dependencies - -In the MindsDB editor, go to *Settings* and *Manage Integrations*. - -Select integrations you want to use and click on *Install*. - -

- -

- -### View logs - -In order to view the logs generated by MindsDB when running the extension, follow these steps: -1. Navigate to the 'Containers' tab in Docker Desktop. -2. Search or locate the multi-container application running the MindsDB extension. This can be done by searching for 'mindsdb'. - - -If you do not see the application listed here, navigate to the 'Extensions' tag in Settings and ensure that the 'Show Docker Extensions system containers' option is enabled. - - - - -3. Click on the container named 'mindsdb_service'. This will direct you to the container running MindsDB. - -

- -

- -4. View the logs in the 'Logs' tab. - -

- -

\ No newline at end of file diff --git a/docs/setup/self-hosted/docker.mdx b/docs/setup/self-hosted/docker.mdx deleted file mode 100644 index e85a98b8343..00000000000 --- a/docs/setup/self-hosted/docker.mdx +++ /dev/null @@ -1,206 +0,0 @@ ---- -title: Docker for MindsDB -sidebarTitle: Docker ---- - -MindsDB provides Docker images that facilitate running MindsDB in Docker containers. - - -As MindsDB integrates with numerous [data sources](/integrations/data-overview) and [AI frameworks](/integrations/ai-overview), each integration requires a set of dependencies. Hence, MindsDB provides multiple Docker images for different tasks, as outlined below. - -* `mindsdb/mindsdb:latest` (or `mindsdb/mindsdb`) -It is the lightweight Docker image of MindsDB that comes with mysql,postgresql,snowflake,bigquery,mssql and salesforce pre-installed. - -* `mindsdb/mindsdb:huggingface` -It is the Docker image of MindsDB that comes with the Hugging Face integration preloaded. - - -## Prerequisites - -Before proceeding, ensure you have installed Docker, following the [official Docker documentation](https://docs.docker.com/install). - -## Setup - -This setup of MindsDB uses one of the available Docker images, as listed above. - - -When running MindsDB in one container and the integration you want to connect to it (such as Ollama or PGVector) in another container, then use `http://host.docker.internal` instead of `localhost` when connecting this integration to MindsDB. - - -Follow the steps to set up MindsDB in a Docker container. - -### Install MindsDB - -Run this command to create a Docker container with MindsDB: - -```bash -docker run --name mindsdb_container \ --e MINDSDB_APIS=http,mysql \ --p 47334:47334 -p 47335:47335 \ -mindsdb/mindsdb -``` - -Where: - -- `docker run` is a native Docker command used to spin up a container. -- `--name mindsdb_container` defines a name for the container. -- `-e MINDSDB_APIS=http,mysql` defines the APIs to be exposed by the MindsDB instance. All available APIs include `http`, `mysql`, and `postgres`. -- `-p 47334:47334 -p 47335:47335` defines the ports where the APIs are exposed (HTTP and MySQL respectively). -- `mindsdb/mindsdb` is a Docker image provided by MindsDB. You can choose a different one from the list above. - -Once the container is created, you can use the following commands: - -- `docker stop mindsdb_container` to stop the container. *Note that this may not always be necessary because when turning off the host machine, the container will also be shut down.* -- `docker start mindsdb_container` to restart a stopped container with all its previous changes (such as any dependencies that were installed) intact. *Note that `docker start` restarts a stopped container, while `docker run` creates a new container.* - - -If you don't want to follow the logs and get the prompt back, add the `-d` flag that stands for *detach*. - -```bash -docker run --name mindsdb_container -e MINDSDB_APIS=http -d -p 47334:47334 mindsdb/mindsdb -``` - - - -If you want to persist your models and configurations in the host machine, run these commands: - -```bash -mkdir mdb_data -docker run --name mindsdb_container -e MINDSDB_APIS=http -p 47334:47334 -v $(pwd)/mdb_data:/root/mdb_storage mindsdb/mindsdb -``` - -Where `-v $(pwd)/mdb_data:/root/mdb_storage` maps the newly created folder `mdb_data` on the host machine to the `/root/mdb_storage` inside the container. - - -Now you can access the MindsDB editor by going to `127.0.0.1:47334` in your browser. - -

- -

- - -If you experience any issues related to MKL or your training process does not complete, please add the `MKL_SERVICE_FORCE_INTEL` environment variable, as below. - -```bash -docker run --name mindsdb_container -e MKL_SERVICE_FORCE_INTEL=1 -e MINDSDB_APIS=http -p 47334:47334 mindsdb/mindsdb -``` - - - -If you want to enable authentication for MindsDB, you do so by passing the `MINDSDB_USERNAME` and `MINDSDB_PASSWORD` environment variables when running the container. - -```bash -docker run --name mindsdb_container -e MINDSDB_USERNAME='admin' -e MINDSDB_PASSWORD='password' -e MINDSDB_APIS=http -p 47334:47334 mindsdb/mindsdb -``` - - -### Install dependencies - -MindsDB integrates with numerous data sources and AI frameworks. To use any of the integrations, you should ensure that the required dependencies are installed in the Docker container. - -**Method 1** - -Install dependencies directly from MindsDB editor. Go to *Settings* and *Manage Integrations*, select integrations you want to use and click on *Install*. - -

- -

- -**Method 2** - -Start the MindsDB Docker container: - -```bash -docker start mindsdb_container -``` - - -If you haven't specified a container name when spinning up a container with `docker run`, you can find it by running `docker ps`. - - - -If you haven't yet created a container, use this command: - -```bash -docker run --name mindsdb_container -e MINDSDB_APIS=http -d -p 47334:47334 mindsdb/mindsdb -``` - - -Start an interactive shell in the container: - -```bash -docker exec -it mindsdb_container sh -``` - -Install the dependencies: - -```bash -pip install .[handler_name] -``` - -For example, run this command to install dependencies for the [OpenAI handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/openai_handler): - -```bash -pip install .[openai] -``` - -Exit the interactive shell: - -```bash -exit -``` - -Restart the container: - -```bash -docker restart mindsdb_container -``` - -## Configuration - -This is a configuration for MindsDB's Docker image that includes storage location, log level, debugging information, installed integrations, and API endpoints. These parameters can be customized by modifying a JSON file that stores default configuration. - -### Default configuration - -The default configuration for MindsDB's Docker image is stored as a JSON code, as below. - -```json -{ - "config_version":"1.4", - "paths": { - "root": "/root/mdb_storage" - }, - "debug": false, - "integrations": {}, - "api": { - "http": { - "host": "0.0.0.0", - "port": "47334" - }, - "mysql": { - "host": "0.0.0.0", - "password": "", - "port": "47335", - "user": "mindsdb", - "database": "mindsdb", - "ssl": true - } - } -} -``` - -### Custom configuration - -To override the default configuration, you can mount a config file created in your host machine over `/root/mindsdb_config.json`, as below. - -```bash -docker run --name mindsdb_container -e MINDSDB_APIS=http -d -p 47334:47334 -v $(pwd)/mdb_config.json:/root/mindsdb_config.json mindsdb/mindsdb -``` - - -**What's next?** - -Now that you installed and started MindsDB locally in your Docker container, go ahead and find out how to create and train a model using the [`CREATE MODEL`](/sql/create/model) statement. - -Check out the [Use Cases](/use-cases/overview) section to follow tutorials that cover Large Language Models, Chatbots, Time Series, Classification, and Regression models, Semantic Search, and more. - diff --git a/docs/setup/self-hosted/pip/linux.mdx b/docs/setup/self-hosted/pip/linux.mdx deleted file mode 100644 index 099c02937d1..00000000000 --- a/docs/setup/self-hosted/pip/linux.mdx +++ /dev/null @@ -1,172 +0,0 @@ ---- -title: Setup for Linux via pip -sidebarTitle: pip on Linux ---- - - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure that **Python >= 3.10** and **pip >= 20.3**. - - -## Installation using the Python [`venv`](https://docs.python.org/3/library/venv.html) Module - -1. Create a new virtual environment called `mindsdb`: - - ```bash - python -m venv mindsdb - ``` - - Now, activate it: - - ```bash - source mindsdb/bin/activate - ``` - -2. Once inside the virtual environment, run the command below to mitigate the - dependency issues: - - ```bash - pip install --upgrade pip setuptools wheel - ``` - -3. Install MindsDB: - - ```bash - pip install mindsdb - ``` - -4. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql,postgres - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - -## Installation using Anaconda - -Here, you need either [Anaconda](https://www.anaconda.com/products/individual) -or [Conda](https://conda.io/projects/conda/en/latest/index.html) installed on -your machine. - -1. Open Anaconda prompt and create a new virtual environment: - - ```bash - conda create -n mindsdb - ``` - - Now, activate it: - - ```bash - conda activate mindsdb - ``` - -2. Once inside the virtual environment, run the command below to mitigate the - dependency issues: - - ```bash - pip install --upgrade pip setuptools wheel - ``` - -3. Install MindsDB: - - ```bash - pip install mindsdb - ``` - -4. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql,postgres - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - -## Dependencies - -The dependencies for many of the data or ML integrations are not installed by default. - -If you want to use a data or ML integration whose dependencies are not available by default, install it by running this command: - -``` -pip install mindsdb[handler_name] -``` - - -You can find all available [handlers here](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers). - - -## Troubleshooting - -### Pip and Python Versions - -Currently, MindsDB supports Python versions 3.10.x, and 3.11.x. - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure -that **Python >= 3.10** and **pip >= 20.3**. You can check the pip and python -versions by running the `pip --version` and -`python --version` commands. - -Please note that depending on your environment and installed pip and python -packages, you might have to use **pip3** instead of **pip** or **python3.x** -instead of **py**. For example, `pip3 install mindsdb` instead of -`pip install mindsdb`. - -### How to Avoid Dependency Issues - -Install MindsDB in a virtual environment using **pip** to avoid dependency -issues. - -Or you could try to install MindsDB with -[Anaconda](https://www.anaconda.com/products/individual) and run the -installation from the **Anaconda prompt**. - -### How to Avoid Common Errors - -MindsDB requires around 3 GB of free disk space to install all of its -dependencies. Make sure to allocate min. 3 GB of disk space to avoid the -`IOError: [Errno 28] No space left on device while installing MindsDB` error. - -Before anything, activate your virtual environment where your MindsDB is -installed. It is to avoid the `No module named mindsdb` error. - - -## Further Issues? - -You can try to replicate your issue using the -[Docker setup](/setup/self-hosted/docker/). - -Also, please create an issue with detailed description in the -[MindsDB GitHub repository](https://github.com/mindsdb/mindsdb/issues) so we can -help you. Usually, we review issues and respond within a few hours. - -## What's Next - -Now that you installed and started MindsDB locally in your Docker container, go ahead and find out how to create and train a model using the [`CREATE MODEL`](/sql/create/model) statement. In the **MindsDB SQL** section, you'll find a comprehensive overview of the SQL syntax offered by MindsDB. - -You can connect MindsDB to different clients, including [PostgreSQL CLI](/connect/postgres-client) and [MySQL CLI](/connect/mysql-client). - -Check out the [Use Cases](/use-cases/overview) section to follow tutorials that cover Large Language Models, Natural Language Processing, Time Series, Classification, and Regression models. diff --git a/docs/setup/self-hosted/pip/macos.mdx b/docs/setup/self-hosted/pip/macos.mdx deleted file mode 100644 index c71b6dead81..00000000000 --- a/docs/setup/self-hosted/pip/macos.mdx +++ /dev/null @@ -1,179 +0,0 @@ ---- -title: Setup for MacOS via pip -sidebarTitle: pip on MacOS ---- - - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure that **Python >= 3.10** and **pip >= 20.3**. - - -## Installation using the Python [`venv`](https://docs.python.org/3/library/venv.html) Module - -1. Create a new virtual environment called `mindsdb`: - - ```bash - python -m venv mindsdb - ``` - - Now, activate it: - - ```bash - source mindsdb/bin/activate - ``` - -2. Once inside the virtual environment, run the command below to mitigate the - dependency issues: - - ```bash - pip install --upgrade pip setuptools wheel - ``` - -3. Install MindsDB: - - ```bash - pip install mindsdb - ``` - -4. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql,postgres - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - -## Installation using Anaconda - -Here, you need either [Anaconda](https://www.anaconda.com/products/individual) -or [Conda](https://conda.io/projects/conda/en/latest/index.html) installed on -your machine. - -1. Open Anaconda prompt and create a new virtual environment: - - ```bash - conda create -n mindsdb - ``` - - Now, activate it: - - ```bash - conda activate mindsdb - ``` - -2. Once inside the virtual environment, run the command below to mitigate the - dependency issues: - - ```bash - pip install --upgrade pip setuptools wheel - ``` - -3. Install MindsDB: - - ```bash - pip install mindsdb - ``` - -4. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql,postgres - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - -## Dependencies - -The dependencies for many of the data or ML integrations are not installed by default. - -If you want to use a data or ML integration whose dependencies are not available by default, install it by running this command: - -``` -pip install mindsdb[handler_name] -``` - - -You can find all available [handlers here](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers). - - -## Troubleshooting - -### Pip and Python Versions - -Currently, MindsDB supports Python versions 3.10.x, 3.10.x, and 3.11.x. - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure -that **Python >= 3.10** and **pip >= 20.3**. You can check the pip and python -versions by running the `pip --version` and -`python --version` commands. - -Please note that depending on your environment and installed pip and python -packages, you might have to use **pip3** instead of **pip** or **python3.x** -instead of **py**. For example, `pip3 install mindsdb` instead of -`pip install mindsdb`. - -### How to Avoid Dependency Issues - -Install MindsDB in a virtual environment using **pip** to avoid dependency -issues. - -Or you could try to install MindsDB with -[Anaconda](https://www.anaconda.com/products/individual) and run the -installation from the **Anaconda prompt**. - -### How to Avoid Common Errors - -MindsDB requires around 3 GB of free disk space to install all of its -dependencies. Make sure to allocate min. 3 GB of disk space to avoid the -`IOError: [Errno 28] No space left on device while installing MindsDB` error. - -Before anything, activate your virtual environment where your MindsDB is -installed. It is to avoid the `No module named mindsdb` error. - -Some users can get `OSError: dlopen Library not loaded 'libomp.dylib'`. Please make sure you have installed `libomp` and run the export commands. - -``` -brew install libomp -export LDFLAGS="-L/usr/local/opt/libomp/lib" -export CPPFLAGS="-I/usr/local/opt/libomp/include" -``` - -## Further Issues? - -You can try to replicate your issue using the -[Docker setup](/setup/self-hosted/docker/). - -Also, please create an issue with detailed description in the -[MindsDB GitHub repository](https://github.com/mindsdb/mindsdb/issues) so we can -help you. Usually, we review issues and respond within a few hours. - -## What's Next - -Now that you installed and started MindsDB locally in your Docker container, go ahead and find out how to create and train a model using the [`CREATE MODEL`](/sql/create/model) statement. In the **MindsDB SQL** section, you'll find a comprehensive overview of the SQL syntax offered by MindsDB. - -You can connect MindsDB to different clients, including [PostgreSQL CLI](/connect/postgres-client) and [MySQL CLI](/connect/mysql-client). - -Check out the [Use Cases](/use-cases/overview) section to follow tutorials that cover Large Language Models, Natural Language Processing, Time Series, Classification, and Regression models. diff --git a/docs/setup/self-hosted/pip/source.mdx b/docs/setup/self-hosted/pip/source.mdx deleted file mode 100644 index b96bf0b5811..00000000000 --- a/docs/setup/self-hosted/pip/source.mdx +++ /dev/null @@ -1,144 +0,0 @@ ---- -title: Setup for Source Code via pip -sidebarTitle: pip from Source ---- - -This section describes how to deploy MindsDB from the source code. It is the preferred way to use MindsDB if you want to contribute to our code or debug MindsDB. - - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure that **Python >= 3.10** and **pip >= 20.3**. - - -## Installation - - -Please note that this method of MindsDB installation requires a minimum of 6 GB free storage. - - -1. Clone the MindsDB repository: - - ```bash - git clone https://github.com/mindsdb/mindsdb.git - ``` - -2. Create a new virtual environment: - - ```bash - python -m venv mindsdb-venv - ``` - -3. Activate the virtual environment: - - ```bash - source mindsdb-venv/bin/activate - ``` - -4. Install dependencies: - - ```bash - cd mindsdb - pip install -e . - pip install -r requirements/requirements-dev.txt - ``` - -5. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql,postgres - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - -6. Now, you can access the following: - - - -```bash MindsDB Studio -http://127.0.0.1:47334/ -``` - -```bash MindsDB using MySQL -mysql -h 127.0.0.1 --port 3306 -u mindsdb -p -``` - - - -## Dependencies - -The dependencies for many of the data or ML integrations are not installed by default. - -If you want to use a data or ML integration whose dependencies are not available by default, install it by running this command: - -``` -pip install mindsdb[handler_name] -``` - - -You can find all available [handlers here](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers). - - -## Troubleshooting - -### Pip and Python Versions - -Currently, MindsDB supports Python versions 3.10.x, 3.10.x, and 3.11.x. - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure -that **Python >= 3.10** and **pip >= 20.3**. You can check the pip and python -versions by running the `pip --version` and -`python --version` commands. - -Please note that depending on your environment and installed pip and python -packages, you might have to use **pip3** instead of **pip** or **python3.x** -instead of **py**. For example, `pip3 install mindsdb` instead of -`pip install mindsdb`. - -### How to Avoid Dependency Issues - -Install MindsDB in a virtual environment using **pip** to avoid dependency -issues. - -### How to Avoid Common Errors - -MindsDB requires around 3 GB of free disk space to install all of its -dependencies. Make sure to allocate min. 3 GB of disk space to avoid the -`IOError: [Errno 28] No space left on device while installing MindsDB` error. - -Before anything, activate your virtual environment where your MindsDB is -installed. It is to avoid the `No module named mindsdb` error. - -If you encounter the `This site can’t be reached. 127.0.0.1 refused to connect.` -error, please check the MindsDB server console to see if the server is still in -the `starting` phase. But if the server has started and you still get this -error, please report it on our -[GitHub repository](https://github.com/mindsdb/mindsdb/issues). - - -## Further Issues? - -You can try to use -[Docker setup](/setup/self-hosted/docker/) in case you are experiencing issues using pip. - -Also, please create an issue with detailed description in the -[MindsDB GitHub repository](https://github.com/mindsdb/mindsdb/issues) so we can -help you. Usually, we review issues and respond within a few hours. - -## What's Next - -Now that you installed and started MindsDB locally in your Docker container, go ahead and find out how to create and train a model using the [`CREATE MODEL`](/sql/create/model) statement. In the **MindsDB SQL** section, you'll find a comprehensive overview of the SQL syntax offered by MindsDB. - -You can connect MindsDB to different clients, including [PostgreSQL CLI](/connect/postgres-client) and [MySQL CLI](/connect/mysql-client). - -Check out the [Use Cases](/use-cases/overview) section to follow tutorials that cover Large Language Models, Natural Language Processing, Time Series, Classification, and Regression models. diff --git a/docs/setup/self-hosted/pip/windows.mdx b/docs/setup/self-hosted/pip/windows.mdx deleted file mode 100644 index 56a2fe98aa8..00000000000 --- a/docs/setup/self-hosted/pip/windows.mdx +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: Setup for Windows via pip -sidebarTitle: pip on Windows ---- - - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure that **Python >= 3.10** and **pip >= 20.3**. - - -## Installation using the Python [`venv`](https://docs.python.org/3/library/venv.html) Module - -1. Create a new virtual environment called `mindsdb`: - - ```bash - py -m venv mindsdb - ``` - - Now, activate it: - - ```bash - .\mindsdb\Scripts\activate.bat - ``` - -2. Once inside the virtual environment, run the command below to mitigate the - dependency issues: - - ```bash - pip install --upgrade pip setuptools wheel - ``` - -3. Install MindsDB: - - ```bash - pip install mindsdb - ``` - -4. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql,postgres - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - -## Installation using Anaconda - -Here, you need either [Anaconda](https://www.anaconda.com/products/individual) -or [Conda](https://conda.io/projects/conda/en/latest/index.html) installed on -your machine. - -1. Open Anaconda prompt and create a new virtual environment: - - ```bash - conda create -n mindsdb - ``` - - Now, activate it: - - ```bash - conda activate mindsdb - ``` - -2. Once inside the virtual environment, run the command below to mitigate the - dependency issues: - - ```bash - pip install --upgrade pip setuptools wheel - ``` - -3. Install MindsDB: - - ```bash - pip install mindsdb - ``` - -4. Start MindsDB: - - ```bash - python -m mindsdb - ``` - - - By default, MindsDB starts the `http` and `mysql` APIs. You can define which APIs to start using the `api` flag as below. - - ```bash - python -m mindsdb --api http,mysql,postgres - ``` - - If you want to start MindsDB without the graphical user interface (GUI), use the `--no_studio` flag as below. - - ```bash - python -m mindsdb --no_studio - ``` - - -## Dependencies - -The dependencies for many of the data or ML integrations are not installed by default. - -If you want to use a data or ML integration whose dependencies are not available by default, install it by running this command: - -``` -pip install mindsdb[handler_name] -``` - - -You can find all available [handlers here](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers). - - -## Troubleshooting - -### Pip and Python Versions - -Currently, MindsDB supports Python versions 3.10.x, and 3.11.x. - -To successfully install MindsDB, use **Python 64-bit version**. Also, make sure -that **Python >= 3.10** and **pip >= 20.3**. You can check the pip and python -versions by running the `pip --version` and -`python --version` commands. - -Please note that depending on your environment and installed pip and python -packages, you might have to use **pip3** instead of **pip** or **python3.x** -instead of **py**. For example, `pip3 install mindsdb` instead of -`pip install mindsdb`. - -### How to Avoid Dependency Issues - -Install MindsDB in a virtual environment using **pip** to avoid dependency -issues. - -Or you could try to install MindsDB with -[Anaconda](https://www.anaconda.com/products/individual) and run the -installation from the **Anaconda prompt**. - -In addition, for Windows systems with default languages other than English, your system -might not have UTF-8 as the default encoding standard, which will cause encoding errors -when installing dependencies. To solve this issue, go to -`Control Panel` > `Clock and Region` > `Region` > `Administrative tab` > -`Change system locale button` and enable `Beta: Use Unicode UTF-8 for worldwide language support`. - -### Installing torch or torchvision - -If the installation fails when installing **torch** or **torchvision**, try to -install them manually by following the instructions on their -[official website](https://pytorch.org/get-started/locally/). - -## Further Issues? - -You can try to replicate your issue using the -[Docker setup](/setup/self-hosted/docker/). - -Also, please create an issue with detailed description in the -[MindsDB GitHub repository](https://github.com/mindsdb/mindsdb/issues) so we can -help you. Usually, we review issues and respond within a few hours. - -## What's Next - -Now that you installed and started MindsDB locally in your Docker container, go ahead and find out how to create and train a model using the [`CREATE MODEL`](/sql/create/model) statement. In the **MindsDB SQL** section, you'll find a comprehensive overview of the SQL syntax offered by MindsDB. - -You can connect MindsDB to different clients, including [PostgreSQL CLI](/connect/postgres-client) and [MySQL CLI](/connect/mysql-client). - -Check out the [Use Cases](/use-cases/overview) section to follow tutorials that cover Large Language Models, Natural Language Processing, Time Series, Classification, and Regression models. diff --git a/docs/setup/system-defaults.mdx b/docs/setup/system-defaults.mdx deleted file mode 100644 index 812803122b3..00000000000 --- a/docs/setup/system-defaults.mdx +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: MindsDB System Defaults -sidebarTitle: System Defaults ---- - -System defaults in MindsDB provide a convenient way to set application-wide configurations for commonly used AI models. By defining these defaults once, users can streamline workflows and avoid repeatedly specifying model parameters when creating or using various MindsDB objects and functions. - -## Usage of System Defaults - -When system defaults are set, MindsDB can automatically use the configured models across the platform for various components such as: - -* [Agents](/mindsdb_sql/agents/agent) that can answer questions over the connected data and are powered by a default large language model (LLM). - -* [Knowledge Bases](/mindsdb_sql/knowledge_bases/overview) that can store and search both structured and unstructured data, and use a default embedding model for embedding the content and a default reranking model for reranking the search results. Additionally, knowledge bases use a default model for evaluating performance with the [EVALUATE KNOWLEDGE_BASE command](/mindsdb_sql/knowledge_bases/evaluate). - -* Custom functions such as [LLM()](/mindsdb_sql/functions/llm_function) and [TO_MARKDOWN()](/mindsdb_sql/functions/to_markdown_function) that rely on the default LLM for text generation and formatting. - -Once configured, users can create and use agents, knowledge bases, and custom functions without having to specify model parameters each time. This ensures consistent behavior across the system and simplifies deployment. - -## Available System Defaults - -MindsDB supports the following system defaults: - -| System Default | Used By | Description | -|--------------------------|--------------------------------------------------------------|---------------------------------------------------------------------------------------------------| -| Default LLM | Agents, EVALUATE, KNOWLEDGE_BASE, LLM(), TO_MARKDOWN() | Used as an underlying LLM for reasoning, conversation, and text generation and formatting. | -| Default Embedding Model | Knowledge Bases | Converts inserted content and user questions into embeddings for semantic search. | -| Default Reranking Model | Knowledge Bases | Reranks search results to improve retrieval accuracy. | - -## Supported Model Providers - -Different components in MindsDB support different sets of model providers. - -**Knowledge Bases** - -Supported providers for **embedding models**: - -* Azure OpenAI -* Bedrock -* Google -* OpenAI (and OpenAI-compatible model providers) -* Snowflake Cortex AI - -Supported providers for **reranking models**: - -* Azure OpenAI -* Bedrock -* Google -* OpenAI (and OpenAI-compatible model providers) -* Snowflake Cortex AI - -Supported providers for **models used to evaluate knowledge bases**: - -* Azure OpenAI -* Bedrock -* Google -* OpenAI (and OpenAI-compatible model providers) -* Snowflake Cortex AI - -**Agents** - -Supported providers for **default models**: - -* Bedrock -* Google -* Ollama -* OpenAI (and OpenAI-compatible model providers) - -**LLM()** - -Supported providers for **default models**: - -* Ollama -* OpenAI (and OpenAI-compatible model providers) - -**TO_MARKDOWN()** - -Supported providers for **default models**: - -* Azure OpenAI -* Google -* OpenAI (and OpenAI-compatible model providers) - -## How to Configure System Defaults - -You can configure system defaults using either the MindsDB UI or the configuration file, depending on your setup preferences. - -The configuration variables include a provider name, a model name, and – if available – base URL, API key, and API version. - -**Option 1: Configure via MindsDB UI** - -1. Open the MindsDB UI. -2. Navigate to Settings → Models. -3. Define the models for each of the system defaults as follows: -a. Under Provider, select the model provider from the dropdown. -b. Under Model, define the model name that is available with the selected model provider. -c. Under Base URL, define the base URL of the model provider, if available. -d. Under API key, provide the API key, if available. -e. Under API version, define the API version, if available. -4.Click the Test & Save button to validate and save the configuration. - -After saving, the defaults take immediate effect across your MindsDB instance. - -**Option 2: Configure via MindsDB Configuration File** - -You can also define system defaults in the [MindsDB configuration file](/setup/custom-config). This method is recommended for advanced or automated deployments. - -When MindsDB is started with the custom configuration file, it will automatically load and apply these default models. - -**Option 3: Environment Variables** - -For functions like [LLM()](/mindsdb_sql/functions/llm_function) and [TO_MARKDOWN()](/mindsdb_sql/functions/to_markdown_function), system defaults can also be defined using environment variables. This allows for easy configuration in containerized or cloud deployments. - -Refer to the individual function documentation for details on environment variables. - -**Option 4: Define Models at Object Creation** - -You can specify models when creating [agents](/mindsdb_sql/agents/agent_syntax) and [knowledge bases](/mindsdb_sql/knowledge_bases/create). These models override the system defaults for that specific object. - -This allows you to tailor model behavior per agent or per knowledge base while keeping system-wide defaults in place. - - -Note that after changing the default model, the existing objects are not updated with the new default model. All objects being created going forward will use the updated default models. - - -## Summary - -System defaults in MindsDB simplify the AI development by standardizing the models used across various components. Whether configured through the UI, YAML configuration file, or environment variables, defaults help maintain consistency and reduce setup time. diff --git a/docs/sql/project.mdx b/docs/sql/project.mdx deleted file mode 100644 index 84452ef4af1..00000000000 --- a/docs/sql/project.mdx +++ /dev/null @@ -1,155 +0,0 @@ ---- -title: MindsDB Projects -sidebarTitle: MindsDB Projects ---- - -MindsDB enables you to group all objects within [projects](/sql/project). - -Projects store [all MindsDB schema objects](/sql/table-structure#the-information-schema-database) except for handlers, connected data sources, and configured AI/ML engines. That is, projects can store models, views, jobs, triggers, agents, skills, knowledge bases, and chatbots. - -MindsDB provides the default `mindsdb` project where all objects created without defining a project are stored. - -## Working with MindsDB Projects - -### Create a Project - -Use the below command to create a project. - -```sql -CREATE PROJECT project_name; -``` - - -Use lower-case letters for a project name. - - -### List All Projects - -Use the below command to list all projects. - -```sql -SHOW [FULL] DATABASES -WHERE type = 'project'; -``` - -### Create an Object within a Project - -Use the below command template to create an object within a project. - -```sql -CREATE project_name.object_name -...; -``` - -### Drop a Project - -Use the below command to remove a project. - -```sql -DROP PROJECT project_name; -``` - - -Please note that if your project stores at least one object, it cannot be removed. In this case, you should first drop all the objects belonging to this project, and then, you can remove the project. -Please see the [Example](#example) section for details. - - -## Example - -Let's create a project. - -```sql -CREATE PROJECT my_project; -``` - -To verify that the project was created successfully, let's run the command below to select all databases, including connected data sources and projects. - -```sql -SHOW FULL DATABASES; -``` - -On execution, we get: - -```sql -+------------------+-------+------+ -|Database |TYPE |ENGINE| -+------------------+-------+------+ -|information_schema|system |[NULL]| -|mindsdb |project|[NULL]| -|my_project |project|[NULL]| -|files |data |files | -+------------------+-------+------+ -``` - - -Please note that `information_schema` is the system database, `mindsdb` is the default project, and `files` is the database to store all uploaded files. For more information, please visit our docs on [MindsDB default structure](/sql/table-structure/). - - -Now we create a model within the project. - -```sql -CREATE MODEL my_project.my_model -FROM example_db - (SELECT * FROM demo_data.home_rentals) -PREDICT rental_price; -``` - -Also, let's create a view. - -```sql -CREATE VIEW my_project.my_view ( - SELECT * - FROM example_db.demo_data.home_rentals -); -``` - -Here is what we have in the `my_project` project. - -```sql -SHOW TABLES FROM my_project; -``` - -On execution, we get: - -```sql -+--------------------+ -|Tables_in_my_project| -+--------------------+ -|my_model | -|my_view | -+--------------------+ -``` - -Let's try to delete our project. - -```sql -DROP PROJECT my_project; -``` - -On execution, we get: - -```sql -Project 'my_project' can not be deleted, because it contains tables: my_model, my_view -``` - -Users should remove all project content before dropping a project. - -```sql -DROP MODEL my_project.my_model; -DROP VIEW my_project.my_view; -``` - -Now we can proceed to drop a project. - -```sql -DROP PROJECT my_project; -``` - - -**Next Steps** - -Below are the links to help you explore further. - -* Find out how to [create and use projects](/sql/create/project). -* Learn more about [MindsDB Schema](/sql/table-structure). - diff --git a/docs/sql/table-structure.mdx b/docs/sql/table-structure.mdx deleted file mode 100644 index 77480ded4b5..00000000000 --- a/docs/sql/table-structure.mdx +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: MindsDB Schema -sidebarTitle: MindsDB Schema ---- - -Initially, MindsDB comprises three system databases and one default project, as follows: - -- `information_schema` stores metadata of all the objects such as handlers, databases, AI engines, models, jobs, and more. -- `log` stores log data of models and jobs. -- `files`, which is initially empty, stores all files uploaded to MindsDB. -- `mindsdb` is the default project for storing models, views, jobs, triggers, and agents. - -List all databases by running the following SQL commands: - -```sql -SHOW [FULL] DATABASES; -``` - -Here is the output: - -```sql -+----------------------+---------+--------+ -| Database | TYPE | ENGINE | -+----------------------+---------+--------+ -| information_schema | system | [NULL] | -| log | system | [NULL] | -| mindsdb | project | [NULL] | -| files | data | files | -+----------------------+---------+--------+ -``` - -## The `information_schema` Database - -The `information_schema` database contains all the system tables that correspond to MindsDB objects as follows: - -| Name | Description | -|-------------------|---------------------------------------------------------------------------------------------------------------------------------------| -| `HANDLERS` | Stores all (data and AI) handlers, which are [data integrations](/integrations/data-overview) and [AI integrations](/integrations/ai-overview) supported by MindsDB. | -| `DATABASES` | Stores all data sources connected to MindsDB. Note that corresponding handlers are required, and you can connect only the [data sources supported by MindsDB](/integrations/data-overview) after [installing the required handler dependencies](/setup/self-hosted/docker#install-dependencies). | -| `ML_ENGINES` | Stores all AI/ML engines configured at MindsDB. Note that corresponding handlers are required, and you can connect only the [AI/ML engines supported by MindsDB](/integrations/ai-overview) after [installing the required handler dependencies](/setup/self-hosted/docker#install-dependencies). | -| `MODELS` | Stores all models deployed within the MindsDB ecosystem. Note that you can create and deploy a model only after configuring the corresponding AI/ML engine. | -| `VIEWS` | Stores all views created in MindsDB. | -| `JOBS` | Stores all [jobs](/mindsdb_sql/sql/create/jobs) that facilitate workflow automation. | -| `TRIGGERS` | Stores all [triggers](/mindsdb_sql/sql/create/trigger) that facilitate workflow automation. | -| `AGENTS` | Stores all [AI agents](/mindsdb_sql/agents/agent) created in MindsDB. | -| `SKILLS` | Stores all skills that can be assigned to [AI agents](/mindsdb_sql/agents/agent). | -| `KNOWLEDGE_BASES` | Stores all [knowledge bases](/mindsdb_sql/agents/knowledge-bases) that can be assigned to AI agents as skills. | -| `CHATBOTS` | Stores all [chatbots](/mindsdb_sql/agents/chatbot) that comprise an AI agent and a chat interface. | - - -Some of the objects, including `DATABASES`, `ML_ENGINES`, and `MODELS`, may contain sensitive information in the form of API keys or passwords. MindsDB hides this sensitive information by default. - -If you want to expose this sensitive information in the output when querying these objects, set the `show_secrets` flag to `true`. - -```sql -SET SHOW_SECRETS = TRUE; -``` - -And to hide them back, set it to `false`. - -```sql -SET SHOW_SECRETS = FALSE; -``` - - -Use the `SHOW` command to list all objects as follows: - -```sql -SHOW object_name -[FROM project_name] -[LIKE 'object_name_part%'] -[WHERE key = value]; -``` - -For instance, list all OpenAI models from the `mindsdb` project that contain `ai` in its name. - -```sql -SHOW MODELS -FROM mindsdb -LIKE '%ai%' -WHERE engine = 'openai'; -``` - -Another example of how to query for the available data and AI handlers: - -```sql -SHOW HANDLERS -WHERE type = 'data'; - -SHOW HANDLERS -WHERE type = 'ml'; -``` - - -Before you can connect a data source using a data handler or create a model using an AI handler, make sure that the `IMPORT_SUCCESS` column reads *true*. If it reads *false*, then [install the dependencies for this handler](/setup/self-hosted/docker#install-dependencies) before using it. - - -## The `mindsdb` Project - -MindsDB enables you to group all objects within [projects](/sql/project). Projects can store models, views, jobs, triggers, agents, skills, knowledge bases, and chatbots. - - -Projects store all objects except for handlers, connected data sources, and configured AI/ML engines. - -Note that based on the available handlers, you can connect a data source to MindsDB or configure an AI/ML engine within MindsDB. Having done that, you can, for instance, create a view with data from the connected data source and store it inside the project, or create a model based on the configured AI/ML engine and store it inside the project. - - -MindsDB provides the default `mindsdb` project where all objects created without defining a project are stored. - -Learn more about how to create and manage [projects here](/sql/project). - -## The `files` Database - -It is another default database that stores all the files uploaded to MindsDB. - -Here is how you can [upload files to MindsDB](/sql/create/file/). diff --git a/docs/use-cases.html b/docs/use-cases.html new file mode 100644 index 00000000000..10faeb26337 --- /dev/null +++ b/docs/use-cases.html @@ -0,0 +1,337 @@ + + + + +Use Cases — Minds Cowork Docs + + + + + + + + + + +
+
Use Cases
+

Two things.
Automate and Create.

+

+ Minds Cowork is built around two core primitives. Everything below is an expression of one or both. +

+
+
+
Automate
+
Recurring tasks that run without you — reports, monitoring, workflows, digests.
+
+
+
Create
+
Artifacts that used to take hours — apps, decks, docs, analyses, dashboards.
+
+
+
+ +
+ + +
+
+
+
+

Creators

+
Writers, designers, content teams
+
+
+
+
+ Automate +
Content pipeline on autopilot
+
Draft, research, and publish in one run. The agent handles the full workflow from brief to publication.
+
blog → social → newsletter
+
+
+ Automate +
Repurpose long-form content
+
Feed a transcript or essay and get a Twitter thread, LinkedIn post, and email summary — automatically.
+
video → clips → captions → posts
+
+
+ Create +
Personal knowledge base
+
Upload your notes, docs, and bookmarks. Ask questions in plain language and get answers that actually reference your material.
+
+
+
+ + +
+
+
+
+

Operators

+
Sales, support, finance, engineering teams
+
+
+
+
+ Automate +
Live data reports, written for you
+
Connect your data source and set a schedule. Every Monday the agent pulls fresh numbers and writes the summary.
+
data → analysis → digest → send
+
+
+ Automate +
Meeting notes to tasks
+
Paste a transcript or upload a recording. The agent extracts action items and creates tasks — hands-free.
+
meeting → notes → actions → tasks
+
+
+ Automate +
Thread monitoring and triage
+
Monitor a Slack channel or email thread and surface only what actually needs a decision — nothing else.
+
+
+
+ + +
+
+
+
+

Strategists

+
Analysts, founders, product and research teams
+
+
+
+
+ Create +
Ask across all your data
+
Query across docs, tickets, spreadsheets, and databases in plain language. No SQL, no dashboards.
+
+
+ Automate +
Competitive research digest
+
Run a competitor sweep on a schedule. Every week you get a structured digest of what changed — pricing, features, messaging.
+
sources → extract → compare → report
+
+
+ Create +
Synthesize feedback at scale
+
Feed support tickets, reviews, and survey responses. Get back themes, sentiment, and concrete product signal.
+
feedback → themes → insights → action
+
+
+
+ + +
+
+
+
+

For everyone

+
Any knowledge worker with repetitive work
+
+
+
+
+ Automate +
Automate any multi-step task
+
If it involves reading something and writing something else on a schedule, the agent can own it.
+
+
+ Create +
Build internal AI tools
+
Describe what your team needs. The agent builds it — a web app, a form, a dashboard — and deploys it, no engineering required.
+
+
+
+ +
+ +
+
+
+

Ready to build?

+

Set up the platform in minutes and start automating your first workflow today.

+
+ Get started → +
+
+ + + + + diff --git a/frontend b/frontend new file mode 160000 index 00000000000..b2490ccf2c2 --- /dev/null +++ b/frontend @@ -0,0 +1 @@ +Subproject commit b2490ccf2c210ba42edc7bbb0058b3d0e4850fa4 diff --git a/mindsdb hacktoberfest/README.md b/mindsdb hacktoberfest/README.md deleted file mode 100644 index 53cf02f559c..00000000000 --- a/mindsdb hacktoberfest/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# 🎃 MindsDB Hacktoberfest 2025 - -## Supercharging AI analytical Apps with Knowledge Bases ⚡ - -This Hacktoberfest, MindsDB challenges you to build RAG apps using Knowledge Bases. - - -### 🌟 Why Join? -MindsDB's Hacktoberfest is your chance to turn code into impact: -- Build tools that answer real business questions. -- Help teams move beyond rigid dashboards and siloed data. -- Level up your open-source contributions with AI-native analytics apps. -- Compete for prizes: GitHub sponsorships, swag, and a Prize Draw for a [MacBook Pro 16" M4 Chip](https://www.apple.com/shop/buy-mac/macbook-pro/16-inch-space-black-standard-display-apple-m4-pro-with-14-core-cpu-and-20-core-gpu-48gb-memory-512gb). -- Get your project featured on the MindsDB blog + community. - -**Your mission:** Create AI apps powered by MindsDB's Knowledge Bases that query enterprise-like data in place—delivering accurate, explainable answers. - - ------- - -## 🛠️ Core Task - -- Pick a use case where there is unstructured data and can benefit from making it searchable via natural language: (For example analyzing CRM Unstructured data: Notes, Emails, Calls, Meetings, Tasks, Conversations → transcripts, attachments, Tickets → Descriptions, associated notes/emails) -- Pick the datasources that you will need for these use cases -- Write a blog post (Medium, Hashnode, dev.to, LinkedIn) explaining your use case. -- Write a pull request with your use-case implementation in the use-cases folder (create a folder for your use case with a descriptive name) -- Your use case implementation can be either a notebook or an app, that uses MindsDB + Knowledge bases -- Promote your use case on Linkedin, and X/Twitter with a post mentioning @mindsdb. - ------ - -## 🏆 Prize Categories - -Stand a chance to win a [MacBook Pro 16" M4 Chip](https://www.apple.com/shop/buy-mac/macbook-pro/16-inch-space-black-standard-display-apple-m4-pro-with-14-core-cpu-and-20-core-gpu-48gb-memory-512gb) in our Prize Draw! - -### 🔥 Most Popular Pull Requests -- Top 3 Pull Requests with the most thumbs up (👍) or heart (❤️) reaction wins win GitHub sponsorship prizes. -- Every 10 positive reaction = 1 entry into the Apple MacBook Pro prize draw. - -**Prizes:** -- 🥇 $1500 + MindsDB T-shirt -- 🥈 $1000 + MindsDB T-shirt -- 🥉 $500 + MindsDB T-shirt - - -(Note: GitHub sponsorship must be available in your country in order to receive the prize, participants to check before they contribute. Automated voting is not allowed—violations will be disqualified.) - -### 📣 Social Media Awareness -Top 3 posts (LinkedIn/X) with the most engagement win: -- MindsDB T-shirt -- 1 entry into the Apple MacBook Pro prize draw -- $100 Github Sponsorship - -(Github Sponsorship may change depending on the amount of engagement a social media post received). - -### ✍️ Best Blog Content -Top 3 blog posts (as judged by the MindsDB team) win: -- MindsDB T-shirt -- Blog feature on the official MindsDB website -- 1 entry into the Apple MacBook Pro prize draw -- $100 Github Sponsorship. - ----- - -## 🎯 Goals -- Showcase zero-ETL, data-in-place AI analytics with MindsDB KBs. -- Demonstrate hybrid semantic + SQL logic and use Evaluate KB for quality. -- Encourage integrations (Salesforce, BigQuery, Confluence, Gong, Postgres, etc.). -- Create repeatable app templates for use cases in accordance to our industries listed on our webpage, i.e [Finance Services](https://mindsdb.com/solutions/industry/ai-data-solution-financial-services), [Energy & Utilities](https://mindsdb.com/solutions/industry/ai-data-solution-energy-utilities), [Retail & E-commerce](https://mindsdb.com/solutions/industry/ai-data-solution-retail-ecommerce), [Enterprise Software Vendors](https://mindsdb.com/solutions/industry/ai-data-solution-b2b-tech), or for another Enterprise industry. - -## 👩‍💻 Who Should Join? -- AI/ML Enthusiasts (especially RAG & semantic search fans) -- SQL-savvy developers (data engineers, full-stack devs, data scientists) -- Existing MindsDB users & open-source contributors - -## 🔑 Example Use Cases -- Decision BI Re-imagined → NLQ → KPIs/charts (with auditability). -- Operations Copilot → Root cause & SOP search across tickets/wikis. -- Customer Intelligence → 360° CRM + docs with explainable recs. -- Compliance & Controls → Policy/filing QA with citations + risk flags. -- Wildcard → Any creative KB-powered analytics app. - -## 🛤️ Tracks - -### Track 1: Build an application with MindsDB Knowledge Bases - -Create a functional application (CLI, Web App, API, Bot Interface etc.) where the primary interaction or feature relies on the semantic query results from the KB. This includes: - - A functional, empty Knowledge Base exists within their MindsDB instance (Cloud or local) - - Participant connects a data source (Salesforce, Gong, Hubspot, Postgres or files) and successfully ingests text data into the KB using INSERT INTO. The KB is populated with text data suitable for semantic querying. - - Demonstrate retrieving meaningful results based on semantic similarity and metadata filtering using [Hybrid Search](https://docs.mindsdb.com/mindsdb_sql/knowledge_bases/hybrid_search). Successfully retrieve relevant data chunks/rows based on semantic queries. - - Provide a public GitHub repo with clear setup instructions and documentation, along with a working application that demonstrates a practical use case for Knowledge Bases, supported by a short, shareable demo video showcasing the app in action. - -### Track 2: Advanced Capabilities -- Jobs Integration: Auto-update KBs with [CREATE JOB](https://docs.mindsdb.com/mindsdb_sql/sql/create/jobs). -- [Agent Integration](https://docs.mindsdb.com/mindsdb_sql/agents/agent) -- Metadata Filtering: Hybrid search with semantic + structured filters for eg. LIKE and BETWEEN operators. -- [Evaluate Knowledge Bases](https://docs.mindsdb.com/mindsdb_sql/knowledge_bases/evaluate): Produce an evaluation report (MRR, Hit@k, relevancy, etc.). -- [Hybrid Search](https://docs.mindsdb.com/mindsdb_sql/knowledge_bases/hybrid_search): Perform semantic and metadata filtering queries on your data. - ------ - -## 📦 Deliverables/ Minimum Requirements -- Public GitHub repo with code + infra (Docker optional). -- README: problem statement(what use case this solves), architecture, Knowledge Base schema, SQL examples, metrics. -- Demo UI (CLI or Web) + 5-min demo video -- Sample queries (Natural language + SQL). -- Evaluation report: metrics (MRR, Hit@k, avg relevancy, etc.). -- Blog post explaining how you built the application and what use case it solves. -- Social media posts on LinkedIn and Twitter about your use case, mention @mindsdb. - ----- - -## 🚀 Get Started - -- [MindsDB Documentation](https://docs.mindsdb.com/mindsdb) -- [MindsDB Knowledge Bases Documentation](https://docs.mindsdb.com/mindsdb_sql/knowledge_bases/overview) -- [SDK's and API documentation](https://docs.mindsdb.com/overview_sdks_apis) - -As the main category is based on the amount of likes/upvotes your Pull Request receives, you can request to have it merged so that you can claim the merged PR by the official [Hacktoberfest organizers](https://hacktoberfest.com/participation/). Pull Requests will be merged 2 hours before the deadline. - ----- -## Ideas - -| **Team / Function** | **Finance & Insurance** | **Healthcare & Life Sciences** | **Energy & Manufacturing** | **Government & Legal** | **Research & Education** | **Tech, SaaS & AI Infra** | **Enterprise / Cross-Industry Ops** | -| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **🧾 Compliance / Risk** | Regulatory filings, audit reports, policy docs
💬 *QA:* “Find all reports citing liquidity risk in 2024”
🧩 **Integrations:** PostgreSQL, Snowflake, ElasticSearch, SharePoint, Dropbox, Google Drive, QuickBooks, Slack | FDA/EMA docs, SOPs, adverse event reports
💬 *QA:* “Trials that mention IRB deviation?”
🧩 **Integrations:** PostgreSQL, MongoDB, Notion, Google Cloud Storage, MS SQL Server | NRC inspections, maintenance logs
💬 *QA:* “Reports mentioning turbine cracks”
🧩 **Integrations:** PostgreSQL, TimescaleDB, Oracle, S3, Azure Blob | Procurement policies, memos
💬 *QA:* “Memos mentioning budget overruns”
🧩 **Integrations:** SharePoint, Microsoft Access, Box, Gmail | Grant compliance docs
💬 *QA:* “Projects acknowledging NIH funding”
🧩 **Integrations:** Zotero, Mendeley, Google Drive | SOC2, GDPR evidence docs
💬 *QA:* “Which systems lack risk assessments?”
🧩 **Integrations:** Confluence, Notion, GitHub, Jira, Slack | ESG & vendor compliance forms
💬 *QA:* “Who hasn’t signed NDA rev3?”
🧩 **Integrations:** Salesforce, Google Sheets, Email, DocuSign (via API), SharePoint | -| **⚙️ Operations / Field** | Branch ops reports, loan notes
💬 *QA:* “Underwriting exceptions by branch”
🧩 **Integrations:** MySQL, Snowflake, Salesforce, Email, Google Sheets | Lab reports, clinical notes
💬 *QA:* “Recurring post-implant issues?”
🧩 **Integrations:** MongoDB, PostgreSQL, Notion, S3 | Shift logs, SCADA text alerts
💬 *QA:* “Outages mentioning pump cavitation?”
🧩 **Integrations:** InfluxDB, TimescaleDB, Prometheus (via REST), PostgreSQL | Field inspection reports
💬 *QA:* “Bridge safety issues noted?”
🧩 **Integrations:** Google Cloud Storage, Dropbox, S3, MS SharePoint | Lab notebooks, experiments
💬 *QA:* “Failed tests due to reagent purity?”
🧩 **Integrations:** Notion, Google Drive, Zotero, ChromaDB | Runbooks, support tickets
💬 *QA:* “Root causes of downtime tickets?”
🧩 **Integrations:** Jira, GitHub, Confluence, Slack | Facility reports, customer support logs
💬 *QA:* “Delivery issues by region?”
🧩 **Integrations:** Zendesk, Gmail, Salesforce, Google Sheets | -| **🔬 R&D / Engineering** | Quant models, research memos
💬 *QA:* “Which models use Monte Carlo VaR?”
🧩 **Integrations:** Databricks, PostgreSQL, GitHub, Confluence | Trial protocols, biomedical research
💬 *QA:* “Genes linked to treatment response?”
🧩 **Integrations:** MongoDB, Milvus, Qdrant, Zotero, Mendeley | Design reviews, test reports
💬 *QA:* “Materials failed in stress tests?”
🧩 **Integrations:** PostgreSQL, Oracle, SharePoint, S3 | Policy studies, pilot reports
💬 *QA:* “Programs evaluating IoT sensors?”
🧩 **Integrations:** Notion, Google Drive, SharePoint | Academic papers, datasets
💬 *QA:* “Who published on federated learning?”
🧩 **Integrations:** Google Books, Zotero, ArXiv (via web), DuckDB | Architecture reviews, Git issues
💬 *QA:* “Modules changed before latency spike?”
🧩 **Integrations:** GitHub, GitLab, Jira, Notion, ChromaDB | Product PRDs, process docs
💬 *QA:* “Automation proposals this year?”
🧩 **Integrations:** Notion, Confluence, Google Docs, GitHub | -| **⚖️ Legal / Contracts** | Loan agreements, risk clauses
💬 *QA:* “Contracts with force majeure?”
🧩 **Integrations:** SharePoint, Dropbox, Google Drive, Snowflake | Site agreements, NDAs
💬 *QA:* “Trials with data-sharing clauses?”
🧩 **Integrations:** MS SharePoint, Notion, Email | Vendor SLAs
💬 *QA:* “Maintenance contracts mentioning vibration warranty?”
🧩 **Integrations:** PostgreSQL, Dropbox, SharePoint | Case files, legislation
💬 *QA:* “Cases citing statute 14-C?”
🧩 **Integrations:** ElasticSearch, Solr, PostgreSQL, Google Drive | IP licensing docs
💬 *QA:* “Collaborations with MIT?”
🧩 **Integrations:** Notion, Zotero, Google Drive | Partner contracts, OSS licenses
💬 *QA:* “Repos using AGPL?”
🧩 **Integrations:** GitHub, GitLab, Notion | Customer contracts
💬 *QA:* “Contracts expiring Q1 2026?”
🧩 **Integrations:** Salesforce, SharePoint, Dropbox, Email | -| **💰 Finance / Strategy** | Analyst reports, call transcripts
💬 *QA:* “CFO sentiment by quarter?”
🧩 **Integrations:** Financial_Modeling_Prep, QuickBooks, Snowflake, PostgreSQL, Email | R&D budgets
💬 *QA:* “Therapeutic areas over budget?”
🧩 **Integrations:** PostgreSQL, Snowflake, Google Sheets | CapEx memos, project costs
💬 *QA:* “Cost variance per plant?”
🧩 **Integrations:** PostgreSQL, Oracle, Excel (via Sheets), QuickBooks | Budgets & grants
💬 *QA:* “Projects over $5M funding?”
🧩 **Integrations:** Google Sheets, PostgreSQL, SharePoint | Grant summaries
💬 *QA:* “Labs exceeding budgets?”
🧩 **Integrations:** Google Sheets, Zotero, Notion | Investor updates
💬 *QA:* “Delayed GTM features?”
🧩 **Integrations:** Notion, Google Drive, Slack | P&L reports
💬 *QA:* “Which ops sites exceed cost benchmarks?”
🧩 **Integrations:** QuickBooks, Google Sheets, Snowflake | -| **🧩 Customer / Support** | Claims, support chat logs
💬 *QA:* “Common causes of claim denials?”
🧩 **Integrations:** Zendesk, Email, Slack, Salesforce | Patient feedback
💬 *QA:* “Post-visit complaints?”
🧩 **Integrations:** Email, Zendesk, Notion | Vendor support tickets
💬 *QA:* “Frequent field service failures?”
🧩 **Integrations:** Jira, Slack, PostgreSQL | Citizen helpdesk
💬 *QA:* “Permit delays causes?”
🧩 **Integrations:** Zendesk, MS Teams, SharePoint | Student feedback
💬 *QA:* “Top paper rejection reasons?”
🧩 **Integrations:** Gmail, Notion, Zotero | Support tickets, Slack threads
💬 *QA:* “Feature requests tied to churn?”
🧩 **Integrations:** Slack, Intercom, HubSpot, Salesforce | IT helpdesk, HR chat logs
💬 *QA:* “Recurring release issues?”
🧩 **Integrations:** Jira, Slack, Gmail, Confluence | -| **📘 Knowledge / Training** | Onboarding manuals, AML docs
💬 *QA:* “Changes to AML since 2023?”
🧩 **Integrations:** Confluence, Notion, SharePoint | Clinical guidelines, manuals
💬 *QA:* “Latest insulin dosage protocol?”
🧩 **Integrations:** SharePoint, Notion, Google Drive | SOPs, maintenance guides
💬 *QA:* “Valve calibration steps?”
🧩 **Integrations:** PostgreSQL, Dropbox, Google Drive | Agency handbooks
💬 *QA:* “Emergency declaration steps?”
🧩 **Integrations:** SharePoint, Notion | Curricula, lecture notes
💬 *QA:* “Course covering neural nets?”
🧩 **Integrations:** Notion, Google Drive, Zotero | API docs, runbooks
💬 *QA:* “How to configure S3 triggers?”
🧩 **Integrations:** GitHub, Confluence, Notion, S3 | HR & IT playbooks
💬 *QA:* “Vacation policy updates 2025?”
🧩 **Integrations:** Confluence, Notion, SharePoint | - - -**Deadline:** -The competion ends on 31st October 2025 00:00 PST. It is advised to make Pull Requests well in advanced. We wish everyone goodluck! - -**Hack smarter. Query faster. Build the Next Generation of AI Analytics Apps with MindsDB.** - - - - diff --git a/mindsdb hacktoberfest/use-cases/README.md b/mindsdb hacktoberfest/use-cases/README.md deleted file mode 100644 index a3762d95b25..00000000000 --- a/mindsdb hacktoberfest/use-cases/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# MindsDB Hacktoberfest Use Cases - -- Create a Pull Request by creating a folder with for your use case with a descriptive name. -- Make sure it is in the file path mindsdb/mindsdb hacktoberfest/use-cases -- Share your Pull Request with people to upvote your PR by giving it a react (either with a thumbs up emoji 👍 or a heart emoji ❤️) - - If you have any questions, contact our team on [Slack.](https://mindsdb.com/joincommunity) diff --git a/mindsdb/.gitignore b/mindsdb/.gitignore deleted file mode 100644 index a55a27c977b..00000000000 --- a/mindsdb/.gitignore +++ /dev/null @@ -1,20 +0,0 @@ -.DStore -.idea -*.ipy* -*.test.* -*.pyc -.cache* -storage/* -config/personal_config.py -*.jar -venv/* -data/* -dist/* -build/* -mindsdb.egg-info/* -clean_data -.pypirc -mindsdb_storage -__pycache__/ -*.egg-info/ -*.egg diff --git a/mindsdb/README.md b/mindsdb/README.md deleted file mode 100644 index 7b44fc4aa0f..00000000000 --- a/mindsdb/README.md +++ /dev/null @@ -1,161 +0,0 @@ -# MindsDB Core Implementation - -This directory (`/mindsdb`) contains the core implementation of MindsDB - an AI federated query engine that follows the "Connect, Unify, Respond" philosophy to help users work with data across disparate sources using natural language and SQL. - -## Architecture Overview -![image](https://github.com/user-attachments/assets/2e050a75-fed6-4ba5-9e5a-0c59ac302509) - -As shown in the diagram, MindsDB's architecture is organized around its core mission of connecting to data, unifying it through various interfaces, and responding to queries via APIs. The implementation is structured around the following key components: - - -``` - -├── API Layer (RESPOND) -│ └── Exposes MindsDB functionality and handles responses -├── Core Components (UNIFY) -│ ├── Interfaces -│ │ ├── Views - Simplify and organize data scope over federated data -│ │ ├── JOBs - Schedule and synchronize data operations -│ │ ├── Knowledge Bases - Organize unstructured data -│ │ └── ML Models - Transform data using AI/ML -│ └── Utilities - Shared code across interfaces -├── Integration Layer (CONNECT) -│ │── Datasources - Connects to various data sources and services - -``` - -## Core Philosophy: Connect, Unify, Respond - -MindsDB's architecture is built around three fundamental capabilities: - -### CONNECT (Integrations) - -The Integration layer is responsible for connecting MindsDB to all types of data sources: - -- **Database Integrations**: Connect to SQL, NoSQL, and time-series databases -- **Vector Store Integrations**: Connect to vector databases for embeddings -- **Application Integrations**: Connect to SaaS platforms and third-party services -- **File Integrations**: Connect to various file formats and storage systems - -These integrations allow MindsDB to access data wherever it resides, forming the foundation for all other capabilities. - -### UNIFY (Interfaces) - -The Interfaces layer provides tools to unify and organize data from multiple sources: - -- **Views**: Simplify data access by creating unified views across different data sources -- **JOBs**: Schedule data synchronization and transformation tasks for real-time data processing -- **Knowledge Bases**: Index and organize unstructured data for efficient retrieval -- **ML Models**: Apply AI/ML transformations to data for predictions and insights - -These interfaces allow working with heterogeneous data as if it were unified in a single system. - -### RESPOND (APIs) - -The API layer enables humans, applications, and AI agents to interact with the unified data: - -- **SQL API**: Process SQL queries against unified data sources -- **HTTP API**: Enable programmatic access via RESTful endpoints -- **SDK API**: Provide language-specific libraries for application integration -- **MCP Server**: Support Model Context Protocol for AI agent interactions - -These APIs provide multiple ways to query and interact with the unified data ecosystem. - -## Component Details - -### Integrations (CONNECT) - -The `/integrations` directory contains handlers that connect MindsDB to external systems: - -- **Data Handlers**: Enable connections to databases and data warehouses -- **Vector Store Handlers**: Connect to vector databases for embedding storage -- **App Handlers**: Integrate with third-party applications and services -- **ML Handlers**: Connect to AI/ML frameworks and model providers - -Each handler type follows a common pattern but specializes in its specific domain of integration. - -### Interfaces (UNIFY) - -The interfaces implemented in MindsDB serve to unify and organize data: - -- **Views**: - - Define simplified or aggregated views of data - - Create virtual tables spanning multiple data sources - - Provide standardized data access patterns - -- **JOBs**: - - Schedule recurring operations on data - - Maintain synchronized copies of data - - Automate data transformations in near real-time - -- **Knowledge Bases**: - - Index and organize unstructured data - - Create searchable repositories of documents - - Enable semantic retrieval of information - -- **ML Models**: - - Apply machine learning to transform data - - Generate predictions and insights - - Process data using various AI techniques - -### Utilities - -The Utilities module contains shared code used across all interfaces: - -- **Type Inference**: Code for detecting data types automatically -- **Data Preparation**: Functions for cleaning and preparing data -- **Vector Operations**: Tools for handling embeddings and vectors -- **Common Helpers**: Shared functions used throughout the codebase - -### APIs (RESPOND) - -The API layer provides ways to query and interact with the unified data: - -- **SQL Interface**: Processes SQL statements for all operations -- **HTTP API**: Enables RESTful access to MindsDB functionality -- **Python SDK**: Provides programmatic access from Python applications -- **MCP Integration**: Implements Model Context Protocol for AI agents - -## Implementation Stack - -MindsDB's core implementation leverages: - -- **Python**: Primary programming language -- **SQL Parser**: For processing SQL queries -- **Vector Libraries**: For embedding handling -- **HTTP/REST**: For API communications -- **Various DB Connectors**: For database integrations -- **Python SDKs**: For third-party service integration - -## Directory Structure - -The `/mindsdb` directory contains: - -- **`/api`**: Implementation of the response layer -- **`/interfaces`**: Implementation of unification tools -- **`/utilities`**: Shared helper code -- **`/integrations`**: Connection handlers for various sources - - `/handlers`: Base implementation of handlers - - `/data`: Data source connections - - `/app`: Application integrations - - `/ml`: ML/AI framework integrations - -## Development Guidelines - -When working with the MindsDB codebase: - -1. **New Integrations (CONNECT)**: Extend appropriate handler classes for new data sources, applications, or ML frameworks -2. **New Interfaces (UNIFY)**: Implement new tools for data unification in the interfaces layer -3. **API Enhancements (RESPOND)**: Improve the ways users and systems can interact with MindsDB - -## Contributing - -Contributions to MindsDB are welcome and can focus on any of the three core capabilities: - -- **CONNECT**: Add new integration handlers -- **UNIFY**: Enhance data unification interfaces -- **RESPOND**: Improve API capabilities and interactions - -For detailed guidance, see the [contribution guide](https://github.com/mindsdb/mindsdb/blob/main/CONTRIBUTING.md). - -For comprehensive documentation, visit [MindsDB Documentation](https://docs.mindsdb.com/). diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py deleted file mode 100644 index baaa11312b2..00000000000 --- a/mindsdb/__about__.py +++ /dev/null @@ -1,10 +0,0 @@ -__title__ = "MindsDB" -__package_name__ = "mindsdb" -__version__ = "26.2.0" -__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" -__email__ = "jorge@mindsdb.com" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "Elastic License 2.0" -__copyright__ = "Copyright(c) 2018 MindsDB, Inc" diff --git a/mindsdb/__init__.py b/mindsdb/__init__.py deleted file mode 100644 index e0eebf48416..00000000000 --- a/mindsdb/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from mindsdb.__about__ import __version__ # noqa: F40 \ No newline at end of file diff --git a/mindsdb/__main__.py b/mindsdb/__main__.py deleted file mode 100644 index 3a8921e995c..00000000000 --- a/mindsdb/__main__.py +++ /dev/null @@ -1,646 +0,0 @@ -import gc - -gc.disable() -import os -import sys -import time -import atexit -import signal -import psutil -import asyncio -import threading -import shutil -from enum import Enum -from dataclasses import dataclass, field -from typing import Callable, Optional, Tuple, List - -from sqlalchemy import func -from sqlalchemy.orm.attributes import flag_modified - -from mindsdb.utilities import log - -logger = log.getLogger("mindsdb") -logger.debug("Starting MindsDB...") - -from mindsdb.__about__ import __version__ as mindsdb_version -from mindsdb.utilities.config import config -from mindsdb.utilities.starters import ( - start_http, - start_mysql, - start_ml_task_queue, - start_scheduler, - start_tasks, - start_litellm, -) -from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids -import mindsdb.interfaces.storage.db as db -from mindsdb.utilities.fs import ( - clean_process_marks, - clean_unlinked_process_marks, - create_pid_file, - delete_pid_file, -) -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.auth import register_oauth_client, get_aws_meta_data -from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 -from mindsdb.utilities.api_status import set_api_status -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID - -try: - import torch.multiprocessing as mp -except Exception: - import multiprocessing as mp -try: - mp.set_start_method("spawn") -except RuntimeError: - logger.info("Torch multiprocessing context already set, ignoring...") - -gc.enable() - -_stop_event = threading.Event() - - -class TrunkProcessEnum(Enum): - HTTP = "http" - MYSQL = "mysql" - JOBS = "jobs" - TASKS = "tasks" - ML_TASK_QUEUE = "ml_task_queue" - LITELLM = "litellm" - - @classmethod - def _missing_(cls, value): - logger.error(f'"{value}" is not a valid name of subprocess') - sys.exit(1) - - -@dataclass -class TrunkProcessData: - name: str - entrypoint: Callable - need_to_run: bool = False - port: Optional[int] = None - process: Optional[mp.Process] = None - started: bool = False - args: Optional[Tuple] = None - restart_on_failure: bool = False - max_restart_count: int = 3 - max_restart_interval_seconds: int = 60 - - _restart_count: int = 0 - _restarts_time: List[int] = field(default_factory=list) - - def request_restart_attempt(self) -> bool: - """Check if the process may be restarted. - If `max_restart_count` == 0, then there are not restrictions on restarts count or interval. - If `max_restart_interval_seconds` == 0, then there are no time limit for restarts count. - - Returns: - bool: `True` if the number of restarts in the interval does not exceed - """ - if self.max_restart_count == 0: - return True - current_time_seconds = int(time.time()) - self._restarts_time.append(current_time_seconds) - if self.max_restart_interval_seconds > 0: - self._restarts_time = [ - x for x in self._restarts_time if x >= (current_time_seconds - self.max_restart_interval_seconds) - ] - if len(self._restarts_time) > self.max_restart_count: - return False - return True - - @property - def should_restart(self) -> bool: - """In case of OOM we want to restart the process. OS kill the process with code 9 on linux when an OOM occurs. - On other OS process will be restarted regardless the code. - - Returns: - bool: `True` if the process need to be restarted on failure - """ - if config.is_cloud: - return False - if sys.platform in ("linux", "darwin"): - return self.restart_on_failure and self.process.exitcode == -signal.SIGKILL.value - else: - if self.max_restart_count == 0: - # to prevent infinity restarts, max_restart_count should be > 0 - logger.warning("In the current OS, it is not possible to use `max_restart_count=0`") - return False - return self.restart_on_failure - - -def close_api_gracefully(trunc_processes_struct): - _stop_event.set() - - delete_pid_file() - - try: - for trunc_processes_data in trunc_processes_struct.values(): - process = trunc_processes_data.process - if process is None: - continue - try: - childs = get_child_pids(process.pid) - for p in childs: - try: - os.kill(p, signal.SIGTERM) - except Exception: - p.kill() - sys.stdout.flush() - process.terminate() - process.join() - sys.stdout.flush() - except psutil.NoSuchProcess: - pass - except KeyboardInterrupt: - sys.exit(0) - - -def clean_mindsdb_tmp_dir(): - """Clean the MindsDB tmp dir at exit.""" - try: - temp_dir = config["paths"]["tmp"] - if not temp_dir.exists(): - return - - for file in temp_dir.iterdir(): - try: - if file.is_dir(): - # https://docs.python.org/3/library/shutil.html#shutil.rmtree - shutil.rmtree(file) - else: - # https://docs.python.org/3/library/pathlib.html#pathlib.Path.unlink - file.unlink(missing_ok=True) - except PermissionError as e: - logger.error(f"Failed to clean %s: %s{file}: {e}") - except FileNotFoundError: - logger.error(f"File not found during cleanup: {file}") - except Exception as e: - logger.error(f"Failed to clean MindsDB tmp dir: {e}") - - -def set_error_model_status_by_pids(unexisting_pids: List[int]): - """Models have id of its traiing process in the 'training_metadata' field. - If the pid does not exist, we should set the model status to "error". - Note: only for local usage. - - Args: - unexisting_pids (List[int]): list of 'pids' that do not exist. - """ - predictor_records = ( - db.session.query(db.Predictor) - .filter( - db.Predictor.deleted_at.is_(None), - db.Predictor.status.not_in([db.PREDICTOR_STATUS.COMPLETE, db.PREDICTOR_STATUS.ERROR]), - ) - .all() - ) - for predictor_record in predictor_records: - predictor_process_id = (predictor_record.training_metadata or {}).get("process_id") - if predictor_process_id in unexisting_pids: - predictor_record.status = db.PREDICTOR_STATUS.ERROR - if isinstance(predictor_record.data, dict) is False: - predictor_record.data = {} - if "error" not in predictor_record.data: - predictor_record.data["error"] = "The training process was terminated for unknown reasons" - flag_modified(predictor_record, "data") - db.session.commit() - - -def set_error_model_status_for_unfinished(): - """Set error status to any model if status not in 'complete' or 'error' - Note: only for local usage. - """ - predictor_records = ( - db.session.query(db.Predictor) - .filter( - db.Predictor.deleted_at.is_(None), - db.Predictor.status.not_in([db.PREDICTOR_STATUS.COMPLETE, db.PREDICTOR_STATUS.ERROR]), - ) - .all() - ) - for predictor_record in predictor_records: - predictor_record.status = db.PREDICTOR_STATUS.ERROR - if isinstance(predictor_record.data, dict) is False: - predictor_record.data = {} - if "error" not in predictor_record.data: - predictor_record.data["error"] = "Unknown error" - flag_modified(predictor_record, "data") - db.session.commit() - - -def do_clean_process_marks(): - """delete unexisting 'process marks'""" - while _stop_event.wait(timeout=5) is False: - unexisting_pids = clean_unlinked_process_marks() - if not config.is_cloud and len(unexisting_pids) > 0: - set_error_model_status_by_pids(unexisting_pids) - - -def create_permanent_integrations(): - """ - Create permanent integrations, for now only the 'files' integration. - NOTE: this is intentional to avoid importing integration_controller - """ - integration_name = "files" - existing = ( - db.session.query(db.Integration) - .filter_by(name=integration_name, company_id=DEFAULT_COMPANY_ID, user_id=DEFAULT_USER_ID) - .first() - ) - if existing is not None: - logger.info(f"Permanent integration '{integration_name}' already exists") - return - integration_record = db.Integration( - name=integration_name, - data={}, - engine=integration_name, - company_id=DEFAULT_COMPANY_ID, - user_id=DEFAULT_USER_ID, - ) - db.session.add(integration_record) - try: - db.session.commit() - except Exception: - logger.exception(f"Failed to create permanent integration '{integration_name}' in the internal database.") - db.session.rollback() - - -def validate_default_project() -> None: - """Handle 'default_project' config option. - Project with the name specified in 'default_project' must exists and be marked with - 'is_default' metadata. If it is not possible, then terminate the process with error. - Note: this can be done using 'project_controller', but we want to save init time and used RAM. - """ - new_default_project_name = config.get("default_project") - logger.debug(f"Checking if default project {new_default_project_name} exists") - filter_company_id = ctx.company_id if ctx.company_id is not None else DEFAULT_COMPANY_ID - filter_user_id = ctx.user_id if ctx.user_id is not None else DEFAULT_USER_ID - - current_default_project: db.Project | None = db.Project.query.filter( - db.Project.company_id == filter_company_id, - db.Project.user_id == filter_user_id, - db.Project.metadata_["is_default"].as_boolean() == True, # noqa - ).first() - - if current_default_project is None: - # Legacy: If the default project does not exist, mark the new one as default. - existing_project = db.Project.query.filter( - db.Project.company_id == filter_company_id, - db.Project.user_id == filter_user_id, - func.lower(db.Project.name) == func.lower(new_default_project_name), - ).first() - if existing_project is None: - logger.critical(f"A project with the name '{new_default_project_name}' does not exist") - sys.exit(1) - - existing_project.metadata_ = {"is_default": True} - flag_modified(existing_project, "metadata_") - db.session.commit() - elif current_default_project.name != new_default_project_name: - # If the default project exists, but the name is different, update the name. - existing_project = db.Project.query.filter( - db.Project.company_id == filter_company_id, - db.Project.user_id == filter_user_id, - func.lower(db.Project.name) == func.lower(new_default_project_name), - ).first() - if existing_project is not None: - logger.critical(f"A project with the name '{new_default_project_name}' already exists") - sys.exit(1) - current_default_project.name = new_default_project_name - db.session.commit() - - -def start_process(trunc_process_data: TrunkProcessData) -> None: - """Start a process. - - Args: - trunc_process_data (TrunkProcessData): The data of the process to start. - """ - mp_ctx = mp.get_context("spawn") - logger.info(f"{trunc_process_data.name} API: starting...") - try: - trunc_process_data.process = mp_ctx.Process( - target=trunc_process_data.entrypoint, - args=trunc_process_data.args, - name=trunc_process_data.name, - ) - trunc_process_data.process.start() - except Exception as e: - logger.exception(f"Failed to start '{trunc_process_data.name}' API process due to unexpected error:") - close_api_gracefully(trunc_processes_struct) - raise e - - -if __name__ == "__main__": - mp.freeze_support() - # warn if less than 1Gb of free RAM - if psutil.virtual_memory().available < (1 << 30): - logger.warning( - "The system is running low on memory. " + "This may impact the stability and performance of the program." - ) - - ctx.set_default() - - # ---- CHECK SYSTEM ---- - if not (sys.version_info[0] >= 3 and sys.version_info[1] >= 10): - print( - """ - MindsDB requires Python >= 3.10 to run - - Once you have supported Python version installed you can start mindsdb as follows: - - 1. create and activate venv: - python3 -m venv venv - source venv/bin/activate - - 2. install MindsDB: - pip3 install mindsdb - - 3. Run MindsDB - python3 -m mindsdb - - More instructions in https://docs.mindsdb.com - """ - ) - exit(1) - - if config.cmd_args.version: - print(f"MindsDB {mindsdb_version}") - sys.exit(0) - - if config.cmd_args.update_gui: - from mindsdb.api.http.initialize import initialize_static - - logger.info("Updating the GUI version") - initialize_static() - - sys.exit(0) - - if config.cmd_args.mcp_stdio: - # StreamHandler writes to stderr by default, which MCP treats as notification messages. - # Raise the log level to ERROR to suppress notification spam, and explicitly set the - # stream to stderr in case the user has overridden it in their config. - os.environ["MINDSDB_CONSOLE_LOG_LEVEL"] = "ERROR" - config["logging"]["handlers"]["console"]["level"] = "ERROR" - config["logging"]["handlers"]["console"]["stream"] = "ext://sys.stderr" - log.configure_logging() - - config.raise_warnings(logger=logger) - os.environ["MINDSDB_RUNTIME"] = "1" - - if os.environ.get("ARROW_DEFAULT_MEMORY_POOL") is None: - try: - """It seems like snowflake handler have memory issue that related to pyarrow. Memory usage keep growing with - requests. This is related to 'memory pool' that is 'mimalloc' by default: it is fastest but use a lot of ram - """ - import pyarrow as pa - - try: - pa.jemalloc_memory_pool() - os.environ["ARROW_DEFAULT_MEMORY_POOL"] = "jemalloc" - except NotImplementedError: - pa.system_memory_pool() - os.environ["ARROW_DEFAULT_MEMORY_POOL"] = "system" - except Exception: - pass - - db.init() - - environment = config["environment"] - if environment == "aws_marketplace": - try: - register_oauth_client() - except Exception: - logger.exception("Something went wrong during client register:") - elif environment != "local": - try: - aws_meta_data = get_aws_meta_data() - config.update({"aws_meta_data": aws_meta_data}) - except Exception: - pass - - apis = os.getenv("MINDSDB_APIS") or config.cmd_args.api - - if apis is None: # If "--api" option is not specified, start the default APIs - api_arr = [TrunkProcessEnum.HTTP, TrunkProcessEnum.MYSQL] - elif apis == "": # If "--api=" (blank) is specified, don't start any APIs - api_arr = [] - else: # The user has provided a list of APIs to start - api_arr = [TrunkProcessEnum(name) for name in apis.split(",")] - - logger.info(f"Version: {mindsdb_version}") - logger.info(f"Configuration file: {config.config_path or 'absent'}") - logger.info(f"Storage path: {config.paths['root']}") - log.log_system_info(logger) - logger.debug(f"User config: {config.user_config}") - logger.debug(f"System config: {config.auto_config}") - logger.debug(f"Env config: {config.env_config}") - - is_cloud = config.is_cloud - unexisting_pids = clean_unlinked_process_marks() - if not is_cloud: - try: - from mindsdb.migrations import migrate - - migrate.migrate_to_head() - except Exception: - logger.exception("Failed to apply database migrations. This may prevent MindsDB from operating correctly:") - - validate_default_project() - - if len(unexisting_pids) > 0: - set_error_model_status_by_pids(unexisting_pids) - set_error_model_status_for_unfinished() - create_permanent_integrations() - - clean_process_marks() - - if config.cmd_args.mcp_stdio: - from mindsdb.api.mcp.mcp_instance import mcp - - mcp.run() - sys.exit(0) - - # Get config values for APIs - http_api_config = config.get("api", {}).get("http", {}) - mysql_api_config = config.get("api", {}).get("mysql", {}) - litellm_api_config = config.get("api", {}).get("litellm", {}) - trunc_processes_struct = { - TrunkProcessEnum.HTTP: TrunkProcessData( - name=TrunkProcessEnum.HTTP.value, - entrypoint=start_http, - port=http_api_config["port"], - args=(config.cmd_args.verbose,), - restart_on_failure=http_api_config.get("restart_on_failure", False), - max_restart_count=http_api_config.get("max_restart_count", TrunkProcessData.max_restart_count), - max_restart_interval_seconds=http_api_config.get( - "max_restart_interval_seconds", - TrunkProcessData.max_restart_interval_seconds, - ), - ), - TrunkProcessEnum.MYSQL: TrunkProcessData( - name=TrunkProcessEnum.MYSQL.value, - entrypoint=start_mysql, - port=mysql_api_config["port"], - args=(config.cmd_args.verbose,), - restart_on_failure=mysql_api_config.get("restart_on_failure", False), - max_restart_count=mysql_api_config.get("max_restart_count", TrunkProcessData.max_restart_count), - max_restart_interval_seconds=mysql_api_config.get( - "max_restart_interval_seconds", - TrunkProcessData.max_restart_interval_seconds, - ), - ), - TrunkProcessEnum.JOBS: TrunkProcessData( - name=TrunkProcessEnum.JOBS.value, - entrypoint=start_scheduler, - args=(config.cmd_args.verbose,), - ), - TrunkProcessEnum.TASKS: TrunkProcessData( - name=TrunkProcessEnum.TASKS.value, - entrypoint=start_tasks, - args=(config.cmd_args.verbose,), - ), - TrunkProcessEnum.ML_TASK_QUEUE: TrunkProcessData( - name=TrunkProcessEnum.ML_TASK_QUEUE.value, - entrypoint=start_ml_task_queue, - args=(config.cmd_args.verbose,), - ), - TrunkProcessEnum.LITELLM: TrunkProcessData( - name=TrunkProcessEnum.LITELLM.value, - entrypoint=start_litellm, - port=litellm_api_config.get("port", 8000), - args=(config.cmd_args.verbose,), - restart_on_failure=litellm_api_config.get("restart_on_failure", False), - max_restart_count=litellm_api_config.get("max_restart_count", TrunkProcessData.max_restart_count), - max_restart_interval_seconds=litellm_api_config.get( - "max_restart_interval_seconds", - TrunkProcessData.max_restart_interval_seconds, - ), - ), - } - - for api_enum in api_arr: - if api_enum in trunc_processes_struct: - trunc_processes_struct[api_enum].need_to_run = True - else: - logger.error(f"ERROR: {api_enum} API is not a valid api in config") - - if config["jobs"]["disable"] is False: - trunc_processes_struct[TrunkProcessEnum.JOBS].need_to_run = True - - if config["tasks"]["disable"] is False: - trunc_processes_struct[TrunkProcessEnum.TASKS].need_to_run = True - - if config.cmd_args.ml_task_queue_consumer is True: - trunc_processes_struct[TrunkProcessEnum.ML_TASK_QUEUE].need_to_run = True - - create_pid_file(config) - - for trunc_process_data in trunc_processes_struct.values(): - if trunc_process_data.started is True or trunc_process_data.need_to_run is False: - continue - start_process(trunc_process_data) - # Set status for APIs without ports (they don't go through wait_api_start) - if trunc_process_data.port is None: - set_api_status(trunc_process_data.name, True) - - atexit.register(close_api_gracefully, trunc_processes_struct=trunc_processes_struct) - atexit.register(clean_mindsdb_tmp_dir) - - async def wait_api_start(api_name, pid, port): - timeout = 60 - start_time = time.time() - started = is_pid_listen_port(pid, port) - while (time.time() - start_time) < timeout and started is False: - await asyncio.sleep(0.5) - started = is_pid_listen_port(pid, port) - - set_api_status(api_name, started) - - return api_name, port, started - - async def wait_apis_start(): - futures = [ - wait_api_start( - trunc_process_data.name, - trunc_process_data.process.pid, - trunc_process_data.port, - ) - for trunc_process_data in trunc_processes_struct.values() - if trunc_process_data.port is not None and trunc_process_data.need_to_run is True - ] - for future in asyncio.as_completed(futures): - api_name, port, started = await future - if started: - logger.info(f"{api_name} API: started on {port}") - else: - logger.error(f"ERROR: {api_name} API cant start on {port}") - - async def join_process(trunc_process_data: TrunkProcessData): - finish = False - while not finish: - process = trunc_process_data.process - try: - while process.is_alive(): - process.join(1) - await asyncio.sleep(0) - except KeyboardInterrupt: - logger.info("Got keyboard interrupt, stopping APIs") - close_api_gracefully(trunc_processes_struct) - finally: - if trunc_process_data.should_restart: - if trunc_process_data.request_restart_attempt(): - logger.warning(f"{trunc_process_data.name} API: stopped unexpectedly, restarting") - trunc_process_data.process = None - if trunc_process_data.name == TrunkProcessEnum.HTTP.value: - # do not open GUI on HTTP API restart - trunc_process_data.args = ( - config.cmd_args.verbose, - None, - True, - ) - start_process(trunc_process_data) - api_name, port, started = await wait_api_start( - trunc_process_data.name, - trunc_process_data.process.pid, - trunc_process_data.port, - ) - if started: - logger.info(f"{api_name} API: started on {port}") - else: - logger.error(f"ERROR: {api_name} API cant start on {port}") - else: - finish = True - logger.error( - f'The "{trunc_process_data.name}" process could not restart after failure. ' - "There will be no further attempts to restart." - ) - else: - finish = True - logger.info(f"{trunc_process_data.name} API: stopped") - - async def gather_apis(): - await asyncio.gather( - *[ - join_process(trunc_process_data) - for trunc_process_data in trunc_processes_struct.values() - if trunc_process_data.need_to_run is True - ], - return_exceptions=False, - ) - - ioloop = asyncio.new_event_loop() - ioloop.run_until_complete(wait_apis_start()) - - threading.Thread(target=do_clean_process_marks, name="clean_process_marks").start() - if config["logging"]["resources_log"]["enabled"] is True: - threading.Thread( - target=log.resources_log_thread, - args=(_stop_event, config["logging"]["resources_log"]["interval"]), - name="resources_log", - ).start() - - ioloop.run_until_complete(gather_apis()) - ioloop.close() diff --git a/mindsdb/api/__init__.py b/mindsdb/api/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/a2a/README.md b/mindsdb/api/a2a/README.md deleted file mode 100644 index 787b2d8c409..00000000000 --- a/mindsdb/api/a2a/README.md +++ /dev/null @@ -1,70 +0,0 @@ -## MindsDB Agent-to-Agent (A2A) API - -The A2A API enables MindsDB agents to communicate with external systems and other agents using a standardized protocol. It allows for both synchronous and streaming responses, making it suitable for a wide range of applications including chatbots, data analysis, and automated workflows. - -## Overview - -The A2A API runs as part of the MindsDB HTTP API, allowing you to: - -- Query MindsDB agents using natural language -- Stream responses in real-time for interactive applications -- Connect MindsDB agents to external systems and other agents -- Process complex queries across multiple data sources - -## Prerequisites - -- MindsDB running -- Python 3.10.20 or higher - -## Running A2A API - -The A2A API is enabled by default when starting MindsDB (or when you include `--api=http`): - -```bash -python -m mindsdb -``` - -## Example Request - -Here's an example of how to make a streaming request to the A2A API: - -```bash -curl -X POST \ - "http://localhost:47334/a2a/" \ - -H "Content-Type: application/json" \ - -H "Accept: text/event-stream" \ - -H "Cache-Control: no-cache" \ - -H "Connection: keep-alive" \ - -d '{ - "jsonrpc": "2.0", - "id": "your-request-id", - "method": "tasks/sendSubscribe", - "params": { - "id": "your-task-id", - "sessionId": "your-session-id", - "message": { - "role": "user", - "parts": [ - {"type": "text", "text": "What is the average rental price for a three bedroom?"} - ], - "metadata": { - "agentName": "my_agent_123" - } - }, - "acceptedOutputModes": ["text/plain"] - } - }' \ - --no-buffer -``` - -**Note:** You must pass the agent name in metadata using either `agentName` or `agent_name` parameter. - -## Example Queries - -You can ask questions like: - -- "Show me sales data from our CRM and combine it with customer feedback from our support tickets" -- "What are the top performing products across all our e-commerce platforms?" -- "Compare customer engagement metrics between our web analytics and email marketing platforms" - -The agent will handle the complexity of joining and analyzing data across different sources and stream the responses back to you in real-time. diff --git a/mindsdb/api/a2a/__init__.py b/mindsdb/api/a2a/__init__.py deleted file mode 100644 index b41ee846469..00000000000 --- a/mindsdb/api/a2a/__init__.py +++ /dev/null @@ -1,52 +0,0 @@ -# A2A specific imports -from mindsdb.api.a2a.common.types import ( - AgentCard, - AgentCapabilities, - AgentSkill, -) -from mindsdb.api.a2a.common.server.server import A2AServer -from mindsdb.api.a2a.task_manager import AgentTaskManager -from mindsdb.api.a2a.agent import MindsDBAgent -from mindsdb.utilities.config import config - - -def get_a2a_app( - project_name: str = "mindsdb", -): - mindsdb_port = config.get("api", {}).get("http", {}).get("port", 47334) - - # Prepare A2A artefacts (agent card & task-manager) - capabilities = AgentCapabilities(streaming=True) - skill = AgentSkill( - id="mindsdb_query", - name="MindsDB Query", - description="Executes natural-language queries via MindsDB agents.", - tags=["database", "mindsdb", "query", "analytics"], - examples=[ - "What trends exist in my sales data?", - "Generate insights from the support tickets dataset.", - ], - inputModes=MindsDBAgent.SUPPORTED_CONTENT_TYPES, - outputModes=MindsDBAgent.SUPPORTED_CONTENT_TYPES, - ) - - agent_card = AgentCard( - name="MindsDB Agent Connector", - description=(f"A2A connector that proxies requests to MindsDB agents in project '{project_name}'."), - url=f"http://127.0.0.1:{mindsdb_port}/a2a/", - version="1.0.0", - defaultInputModes=MindsDBAgent.SUPPORTED_CONTENT_TYPES, - defaultOutputModes=MindsDBAgent.SUPPORTED_CONTENT_TYPES, - capabilities=capabilities, - skills=[skill], - ) - - task_manager = AgentTaskManager( - project_name=project_name, - ) - - server = A2AServer( - agent_card=agent_card, - task_manager=task_manager, - ) - return server.app diff --git a/mindsdb/api/a2a/agent.py b/mindsdb/api/a2a/agent.py deleted file mode 100644 index bbf90059ec3..00000000000 --- a/mindsdb/api/a2a/agent.py +++ /dev/null @@ -1,203 +0,0 @@ -import json -from typing import Any, AsyncIterable, Dict, List -import requests -import httpx -from mindsdb.api.a2a.utils import to_serializable, convert_a2a_message_to_qa_format -from mindsdb.api.a2a.constants import DEFAULT_STREAM_TIMEOUT -from mindsdb.api.a2a.common.types import A2AClientError, A2AClientHTTPError -from mindsdb.utilities import log -from mindsdb.utilities.config import config - -logger = log.getLogger(__name__) - - -class MindsDBAgent: - """An agent that communicates with MindsDB over HTTP following the A2A protocol.""" - - SUPPORTED_CONTENT_TYPES = ["text", "text/plain", "application/json"] - - def __init__( - self, - agent_name="my_agent", - project_name="mindsdb", - user_info: Dict[str, Any] = None, - ): - self.agent_name = agent_name - self.project_name = project_name - port = config.get("api", {}).get("http", {}).get("port", 47334) - host = config.get("api", {}).get("http", {}).get("host", "127.0.0.1") - - # Use 127.0.0.1 instead of localhost for better compatibility - if host in ("0.0.0.0", ""): - url = f"http://127.0.0.1:{port}/" - else: - url = f"http://{host}:{port}/" - - self.base_url = url - self.agent_url = f"{self.base_url}/api/projects/{project_name}/agents/{agent_name}" - self.sql_url = f"{self.base_url}/api/sql/query" - self.headers = {k: v for k, v in user_info.items() if v is not None} or {} - logger.info(f"Initialized MindsDB agent connector to {self.base_url}") - - def invoke(self, query, session_id) -> Dict[str, Any]: - """Send a query to the MindsDB agent using SQL API.""" - try: - escaped_query = query.replace("'", "''") - sql_query = f"SELECT * FROM {self.project_name}.{self.agent_name} WHERE question = '{escaped_query}'" - logger.debug(f"Sending SQL query to MindsDB: {sql_query[:100]}...") - response = requests.post(self.sql_url, json={"query": sql_query}, headers=self.headers) - response.raise_for_status() - data = response.json() - logger.debug(f"Received response from MindsDB: {json.dumps(data)[:200]}...") - if "data" in data and len(data["data"]) > 0: - result_row = data["data"][0] - for column in ["response", "result", "answer", "completion", "output"]: - if column in result_row: - content = result_row[column] - logger.info(f"Found result in column '{column}': {content[:100]}...") - return { - "content": content, - "parts": [{"type": "text", "text": content}], - } - logger.info("No specific result column found, returning full row") - content = json.dumps(result_row, indent=2) - parts = [{"type": "text", "text": content}] - if isinstance(result_row, dict): - parts.append( - { - "type": "data", - "data": result_row, - "metadata": {"subtype": "json"}, - } - ) - return { - "content": content, - "parts": parts, - } - else: - error_msg = "Error: No data returned from MindsDB" - logger.error(error_msg) - return { - "content": error_msg, - "parts": [{"type": "text", "text": error_msg}], - } - except requests.exceptions.RequestException as e: - logger.exception("Error connecting to MindsDB:") - return { - "content": f"Error connecting to MindsDB: {e}", - "parts": [{"type": "text", "text": error_msg}], - } - except Exception as e: - logger.exception("Error: ") - return { - "content": f"Error: {e}", - "parts": [{"type": "text", "text": error_msg}], - } - - async def streaming_invoke(self, messages, timeout=DEFAULT_STREAM_TIMEOUT): - url = f"{self.base_url}/api/projects/{self.project_name}/agents/{self.agent_name}/completions/stream" - logger.debug(f"Sending streaming request to MindsDB agent: {self.agent_name}") - try: - async with httpx.AsyncClient(timeout=timeout, headers=self.headers) as client: - async with client.stream("POST", url, json={"messages": to_serializable(messages)}) as response: - response.raise_for_status() - async for line in response.aiter_lines(): - if not line.strip(): - continue - # Only process actual SSE data lines - if line.startswith("data:"): - payload = line[len("data:") :].strip() - try: - chunk = json.loads(payload) - # Transform chunks from HTTP endpoint format (type/content) to A2A format (text/output) - if isinstance(chunk, dict): - chunk_type = chunk.get("type") - content = chunk.get("content") - - # Transform chunks with type/content structure to A2A format - if chunk_type is not None and content is not None: - # Map content to text field for A2A compatibility - transformed_chunk = chunk.copy() - transformed_chunk["text"] = str(content) if content is not None else "" - - # Preserve original fields but ensure A2A-compatible format - # For data chunks (markdown), the content is already in text format - if chunk_type == "data": - transformed_chunk["output"] = str(content) - elif chunk_type in ("sql", "status", "context"): - # These are informational chunks, map to text - transformed_chunk["text"] = str(content) - elif chunk_type == "error": - transformed_chunk["type"] = "context" - # Error chunks should have error field - transformed_chunk["text"] = str(content) - - yield transformed_chunk - else: - # Chunk doesn't have type/content structure, yield as-is - yield chunk - else: - # Not a dict, yield as-is - yield chunk - except Exception as e: - logger.exception(f"Failed to parse SSE JSON payload: {e}; line: {payload}") - # Ignore comments or control lines - # Signal the end of the stream - yield {"is_task_complete": True} - except httpx.ReadTimeout: - error_msg = f"Request timed out after {timeout} seconds while streaming from agent '{self.agent_name}'" - logger.error(error_msg) - raise TimeoutError(error_msg) - except httpx.ConnectTimeout: - error_msg = f"Connection timeout while connecting to agent '{self.agent_name}' at {url}" - logger.error(error_msg) - raise ConnectionError(error_msg) - except httpx.ConnectError as e: - error_msg = f"Failed to connect to agent '{self.agent_name}' at {url}: {str(e)}" - logger.error(error_msg) - raise ConnectionError(error_msg) - except httpx.HTTPStatusError as e: - error_msg = f"HTTP error {e.response.status_code} from agent '{self.agent_name}': {str(e)}" - logger.error(error_msg) - raise A2AClientHTTPError(status_code=e.response.status_code, message=error_msg) - except httpx.RequestError as e: - error_msg = f"Request error while streaming from agent '{self.agent_name}': {str(e)}" - logger.error(error_msg) - raise A2AClientError(error_msg) - - async def stream( - self, - query: str, - session_id: str, - history: List[dict] | None = None, - timeout: int = DEFAULT_STREAM_TIMEOUT, - ) -> AsyncIterable[Dict[str, Any]]: - """Stream responses from the MindsDB agent (uses streaming API endpoint).""" - try: - # Create A2A message structure with history and current query - a2a_message = {"role": "user", "parts": [{"text": query}]} - if history: - a2a_message["history"] = history - # Convert to Q&A format using centralized utility - formatted_messages = convert_a2a_message_to_qa_format(a2a_message) - logger.debug(f"Formatted messages for agent: {formatted_messages}") - streaming_response = self.streaming_invoke(formatted_messages, timeout=timeout) - async for chunk in streaming_response: - content_value = chunk.get("text") or chunk.get("output") or json.dumps(chunk) - wrapped_chunk = {"is_task_complete": False, "content": content_value, "metadata": {}} - yield wrapped_chunk - except Exception as e: - logger.exception(f"Error in streaming: {e}") - yield { - "is_task_complete": True, - "parts": [ - { - "type": "text", - "text": f"Error: {e}", - } - ], - "metadata": { - "type": "reasoning", - "subtype": "error", - }, - } diff --git a/mindsdb/api/a2a/common/__init__.py b/mindsdb/api/a2a/common/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/a2a/common/server/__init__.py b/mindsdb/api/a2a/common/server/__init__.py deleted file mode 100644 index 10f5fa467ca..00000000000 --- a/mindsdb/api/a2a/common/server/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .server import A2AServer -from .task_manager import TaskManager, InMemoryTaskManager - -__all__ = ["A2AServer", "TaskManager", "InMemoryTaskManager"] diff --git a/mindsdb/api/a2a/common/server/server.py b/mindsdb/api/a2a/common/server/server.py deleted file mode 100644 index f9b3e4c37f3..00000000000 --- a/mindsdb/api/a2a/common/server/server.py +++ /dev/null @@ -1,164 +0,0 @@ -import json -import time -from typing import AsyncIterable, Any, Dict - -from starlette.applications import Starlette -from starlette.middleware.cors import CORSMiddleware -from starlette.responses import JSONResponse -from sse_starlette.sse import EventSourceResponse -from starlette.requests import Request -from starlette.routing import Route -from ...common.types import ( - A2ARequest, - JSONRPCResponse, - InvalidRequestError, - JSONParseError, - GetTaskRequest, - CancelTaskRequest, - SendTaskRequest, - SetTaskPushNotificationRequest, - GetTaskPushNotificationRequest, - InternalError, - AgentCard, - TaskResubscriptionRequest, - SendTaskStreamingRequest, - MessageStreamRequest, -) -from pydantic import ValidationError -from ...common.server.task_manager import TaskManager - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class A2AServer: - def __init__( - self, - agent_card: AgentCard = None, - task_manager: TaskManager = None, - ): - self.task_manager = task_manager - self.agent_card = agent_card - self.app = Starlette( - routes=[ - Route("/", self._process_request, methods=["POST"]), - Route("/.well-known/agent.json", self._get_agent_card, methods=["GET"]), - Route("/.well-known/agent-card.json", self._get_agent_card, methods=["GET"]), - Route("/status", self._get_status, methods=["GET"]), - ] - ) - # TODO: Remove this when we have a proper CORS policy - self.app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - ) - self.start_time = time.time() - - def _get_agent_card(self, request: Request) -> JSONResponse: - return JSONResponse(self.agent_card.model_dump(exclude_none=True)) - - def _get_status(self, request: Request) -> JSONResponse: - """ - Status endpoint that returns basic server information. - This endpoint can be used by the frontend to check if the A2A server is running. - """ - uptime_seconds = time.time() - self.start_time - - status_info: Dict[str, Any] = { - "status": "ok", - "service": "mindsdb-a2a", - "uptime_seconds": round(uptime_seconds, 2), - "agent_name": self.agent_card.name if self.agent_card else None, - "version": self.agent_card.version if self.agent_card else "unknown", - } - - return JSONResponse(status_info) - - async def _process_request(self, request: Request): - try: - body = await request.json() - json_rpc_request = A2ARequest.validate_python(body) - - user_info = { - "user-id": request.headers.get("user-id", None), - "company-id": request.headers.get("company-id", None), - "user-class": request.headers.get("user-class", None), - "authorization": request.headers.get("Authorization", None), - } - - if isinstance(json_rpc_request, GetTaskRequest): - result = await self.task_manager.on_get_task(json_rpc_request) - elif isinstance(json_rpc_request, SendTaskRequest): - result = await self.task_manager.on_send_task(json_rpc_request, user_info) - elif isinstance(json_rpc_request, SendTaskStreamingRequest): - # Don't await the async generator, just pass it to _create_response - result = self.task_manager.on_send_task_subscribe(json_rpc_request, user_info) - elif isinstance(json_rpc_request, CancelTaskRequest): - result = await self.task_manager.on_cancel_task(json_rpc_request) - elif isinstance(json_rpc_request, SetTaskPushNotificationRequest): - result = await self.task_manager.on_set_task_push_notification(json_rpc_request) - elif isinstance(json_rpc_request, GetTaskPushNotificationRequest): - result = await self.task_manager.on_get_task_push_notification(json_rpc_request) - elif isinstance(json_rpc_request, TaskResubscriptionRequest): - result = await self.task_manager.on_resubscribe_to_task(json_rpc_request) - elif isinstance(json_rpc_request, MessageStreamRequest): - result = await self.task_manager.on_message_stream(json_rpc_request, user_info) - else: - logger.warning(f"Unexpected request type: {type(json_rpc_request)}") - raise ValueError(f"Unexpected request type: {type(request)}") - - return self._create_response(result) - - except Exception as e: - return self._handle_exception(e) - - def _handle_exception(self, e: Exception) -> JSONResponse: - if isinstance(e, json.decoder.JSONDecodeError): - json_rpc_error = JSONParseError() - elif isinstance(e, ValidationError): - json_rpc_error = InvalidRequestError(data=json.loads(e.json())) - else: - logger.exception("Unhandled exception:") - json_rpc_error = InternalError() - - response = JSONRPCResponse(id=None, error=json_rpc_error) - return JSONResponse(response.model_dump(exclude_none=True), status_code=400) - - def _create_response(self, result: Any) -> JSONResponse | EventSourceResponse: - if isinstance(result, AsyncIterable): - # Step 2: Yield actual serialized event as JSON, with timing logs - async def event_generator(result): - async for item in result: - t0 = time.time() - logger.debug(f"[A2AServer] STEP2 serializing item at {t0}: {str(item)[:120]}") - try: - if hasattr(item, "model_dump_json"): - data = item.model_dump_json(exclude_none=True) - else: - data = json.dumps(item) - except Exception as e: - logger.exception("Serialization error in SSE stream:") - data = json.dumps({"error": f"Serialization error: {e}"}) - yield {"data": data} - - # Add robust SSE headers for compatibility - sse_headers = { - "Content-Type": "text/event-stream", - "Cache-Control": "no-cache, no-transform", - "X-Accel-Buffering": "no", - "Connection": "keep-alive", - "Transfer-Encoding": "chunked", - } - return EventSourceResponse(event_generator(result), headers=sse_headers) - elif isinstance(result, JSONRPCResponse): - return JSONResponse(result.model_dump(exclude_none=True)) - elif isinstance(result, dict): - logger.warning("Falling back to JSONResponse for result type: dict") - return JSONResponse(result) - else: - logger.error(f"Unexpected result type: {type(result)}") - raise ValueError(f"Unexpected result type: {type(result)}") diff --git a/mindsdb/api/a2a/common/server/task_manager.py b/mindsdb/api/a2a/common/server/task_manager.py deleted file mode 100644 index 49ffcf150b7..00000000000 --- a/mindsdb/api/a2a/common/server/task_manager.py +++ /dev/null @@ -1,280 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Union, AsyncIterable, List, Dict -from ...common.types import ( - Task, - JSONRPCResponse, - TaskIdParams, - TaskQueryParams, - GetTaskRequest, - TaskNotFoundError, - SendTaskRequest, - CancelTaskRequest, - TaskNotCancelableError, - SetTaskPushNotificationRequest, - GetTaskPushNotificationRequest, - GetTaskResponse, - CancelTaskResponse, - SendTaskResponse, - SetTaskPushNotificationResponse, - GetTaskPushNotificationResponse, - TaskSendParams, - TaskStatus, - TaskState, - TaskResubscriptionRequest, - SendTaskStreamingRequest, - SendTaskStreamingResponse, - Artifact, - PushNotificationConfig, - TaskStatusUpdateEvent, - JSONRPCError, - TaskPushNotificationConfig, - InternalError, - MessageStreamRequest, -) -from ...common.server.utils import new_not_implemented_error -from mindsdb.utilities import log -import asyncio - -logger = log.getLogger(__name__) - - -class TaskManager(ABC): - @abstractmethod - async def on_get_task(self, request: GetTaskRequest) -> GetTaskResponse: - pass - - @abstractmethod - async def on_cancel_task(self, request: CancelTaskRequest) -> CancelTaskResponse: - pass - - @abstractmethod - async def on_send_task(self, request: SendTaskRequest, user_info: Dict) -> SendTaskResponse: - pass - - @abstractmethod - async def on_send_task_subscribe( - self, request: SendTaskStreamingRequest, user_info: Dict - ) -> Union[AsyncIterable[SendTaskStreamingResponse], JSONRPCResponse]: - pass - - @abstractmethod - async def on_set_task_push_notification( - self, request: SetTaskPushNotificationRequest - ) -> SetTaskPushNotificationResponse: - pass - - @abstractmethod - async def on_get_task_push_notification( - self, request: GetTaskPushNotificationRequest - ) -> GetTaskPushNotificationResponse: - pass - - @abstractmethod - async def on_resubscribe_to_task( - self, request: TaskResubscriptionRequest - ) -> Union[AsyncIterable[SendTaskResponse], JSONRPCResponse]: - pass - - @abstractmethod - async def on_message_stream( - self, request: MessageStreamRequest, user_info: Dict - ) -> Union[AsyncIterable[SendTaskStreamingResponse], JSONRPCResponse]: - pass - - -class InMemoryTaskManager(TaskManager): - def __init__(self): - self.tasks: dict[str, Task] = {} - self.push_notification_infos: dict[str, PushNotificationConfig] = {} - self.lock = asyncio.Lock() - self.task_sse_subscribers: dict[str, List[asyncio.Queue]] = {} - self.subscriber_lock = asyncio.Lock() - - async def on_get_task(self, request: GetTaskRequest) -> GetTaskResponse: - logger.info(f"Getting task {request.params.id}") - task_query_params: TaskQueryParams = request.params - - async with self.lock: - task = self.tasks.get(task_query_params.id) - if task is None: - return GetTaskResponse(id=request.id, error=TaskNotFoundError()) - - task_result = self.append_task_history(task, task_query_params.historyLength) - - return GetTaskResponse(id=request.id, result=task_result) - - async def on_cancel_task(self, request: CancelTaskRequest) -> CancelTaskResponse: - logger.info(f"Cancelling task {request.params.id}") - task_id_params: TaskIdParams = request.params - - async with self.lock: - task = self.tasks.get(task_id_params.id) - if task is None: - return CancelTaskResponse(id=request.id, error=TaskNotFoundError()) - - return CancelTaskResponse(id=request.id, error=TaskNotCancelableError()) - - @abstractmethod - async def on_send_task(self, request: SendTaskRequest, user_info: Dict) -> SendTaskResponse: - pass - - @abstractmethod - async def on_send_task_subscribe( - self, request: SendTaskStreamingRequest, user_info: Dict - ) -> Union[AsyncIterable[SendTaskStreamingResponse], JSONRPCResponse]: - pass - - async def set_push_notification_info(self, task_id: str, notification_config: PushNotificationConfig): - async with self.lock: - task = self.tasks.get(task_id) - if task is None: - raise ValueError(f"Task not found for {task_id}") - - self.push_notification_infos[task_id] = notification_config - - return - - async def get_push_notification_info(self, task_id: str) -> PushNotificationConfig: - async with self.lock: - task = self.tasks.get(task_id) - if task is None: - raise ValueError(f"Task not found for {task_id}") - - return self.push_notification_infos[task_id] - - return - - async def has_push_notification_info(self, task_id: str) -> bool: - async with self.lock: - return task_id in self.push_notification_infos - - async def on_set_task_push_notification( - self, request: SetTaskPushNotificationRequest - ) -> SetTaskPushNotificationResponse: - logger.info(f"Setting task push notification {request.params.id}") - task_notification_params: TaskPushNotificationConfig = request.params - - try: - await self.set_push_notification_info( - task_notification_params.id, - task_notification_params.pushNotificationConfig, - ) - except Exception: - logger.exception("Error while setting push notification info:") - return JSONRPCResponse( - id=request.id, - error=InternalError(message="An error occurred while setting push notification info"), - ) - - return SetTaskPushNotificationResponse(id=request.id, result=task_notification_params) - - async def on_get_task_push_notification( - self, request: GetTaskPushNotificationRequest - ) -> GetTaskPushNotificationResponse: - logger.info(f"Getting task push notification {request.params.id}") - task_params: TaskIdParams = request.params - - try: - notification_info = await self.get_push_notification_info(task_params.id) - except Exception: - logger.exception("Error while getting push notification info:") - return GetTaskPushNotificationResponse( - id=request.id, - error=InternalError(message="An error occurred while getting push notification info"), - ) - - return GetTaskPushNotificationResponse( - id=request.id, - result=TaskPushNotificationConfig(id=task_params.id, pushNotificationConfig=notification_info), - ) - - async def upsert_task(self, task_send_params: TaskSendParams) -> Task: - logger.info(f"Upserting task {task_send_params.id}") - async with self.lock: - task = self.tasks.get(task_send_params.id) - if task is None: - task = Task( - id=task_send_params.id, - sessionId=task_send_params.sessionId, - messages=[task_send_params.message], - status=TaskStatus(state=TaskState.SUBMITTED), - history=[task_send_params.message], - ) - self.tasks[task_send_params.id] = task - else: - task.history.append(task_send_params.message) - - return task - - async def on_resubscribe_to_task( - self, request: TaskResubscriptionRequest - ) -> Union[AsyncIterable[SendTaskStreamingResponse], JSONRPCResponse]: - return new_not_implemented_error(request.id) - - async def update_store(self, task_id: str, status: TaskStatus, artifacts: list[Artifact]) -> Task: - async with self.lock: - try: - task = self.tasks[task_id] - except KeyError: - logger.error(f"Task {task_id} not found for updating the task") - raise ValueError(f"Task {task_id} not found") - - task.status = status - - if status.message is not None: - task.history.append(status.message) - - if artifacts is not None: - if task.artifacts is None: - task.artifacts = [] - task.artifacts.extend(artifacts) - - return task - - def append_task_history(self, task: Task, historyLength: int | None): - new_task = task.model_copy() - if historyLength is not None and historyLength > 0: - new_task.history = new_task.history[-historyLength:] - else: - new_task.history = [] - - return new_task - - async def setup_sse_consumer(self, task_id: str, is_resubscribe: bool = False): - async with self.subscriber_lock: - if task_id not in self.task_sse_subscribers: - if is_resubscribe: - raise ValueError("Task not found for resubscription") - else: - self.task_sse_subscribers[task_id] = [] - - sse_event_queue = asyncio.Queue(maxsize=0) # <=0 is unlimited - self.task_sse_subscribers[task_id].append(sse_event_queue) - return sse_event_queue - - async def enqueue_events_for_sse(self, task_id, task_update_event): - async with self.subscriber_lock: - if task_id not in self.task_sse_subscribers: - return - - current_subscribers = self.task_sse_subscribers[task_id] - for subscriber in current_subscribers: - await subscriber.put(task_update_event) - - async def dequeue_events_for_sse( - self, request_id, task_id, sse_event_queue: asyncio.Queue - ) -> AsyncIterable[SendTaskStreamingResponse] | JSONRPCResponse: - try: - while True: - event = await sse_event_queue.get() - if isinstance(event, JSONRPCError): - yield SendTaskStreamingResponse(id=request_id, error=event) - break - - yield SendTaskStreamingResponse(id=request_id, result=event) - if isinstance(event, TaskStatusUpdateEvent) and event.final: - break - finally: - async with self.subscriber_lock: - if task_id in self.task_sse_subscribers: - self.task_sse_subscribers[task_id].remove(sse_event_queue) diff --git a/mindsdb/api/a2a/common/server/utils.py b/mindsdb/api/a2a/common/server/utils.py deleted file mode 100644 index fbbd41e8ea0..00000000000 --- a/mindsdb/api/a2a/common/server/utils.py +++ /dev/null @@ -1,28 +0,0 @@ -from ...common.types import ( - JSONRPCResponse, - ContentTypeNotSupportedError, - UnsupportedOperationError, -) -from typing import List - - -def are_modalities_compatible( - server_output_modes: List[str], client_output_modes: List[str] -): - """Modalities are compatible if they are both non-empty - and there is at least one common element.""" - if client_output_modes is None or len(client_output_modes) == 0: - return True - - if server_output_modes is None or len(server_output_modes) == 0: - return True - - return any(x in server_output_modes for x in client_output_modes) - - -def new_incompatible_types_error(request_id): - return JSONRPCResponse(id=request_id, error=ContentTypeNotSupportedError()) - - -def new_not_implemented_error(request_id): - return JSONRPCResponse(id=request_id, error=UnsupportedOperationError()) diff --git a/mindsdb/api/a2a/common/types.py b/mindsdb/api/a2a/common/types.py deleted file mode 100644 index 68694d61ee7..00000000000 --- a/mindsdb/api/a2a/common/types.py +++ /dev/null @@ -1,430 +0,0 @@ -from typing import Union, Any -from pydantic import BaseModel, Field, TypeAdapter -from typing import Literal, List, Annotated, Optional -from datetime import datetime -from pydantic import model_validator, ConfigDict, field_serializer -from uuid import uuid4 -from enum import Enum -from typing_extensions import Self - - -class TaskState(str, Enum): - SUBMITTED = "submitted" - WORKING = "working" - INPUT_REQUIRED = "input-required" - COMPLETED = "completed" - CANCELED = "canceled" - FAILED = "failed" - UNKNOWN = "unknown" - - -class TextPart(BaseModel): - type: Literal["text"] = "text" - text: str - metadata: dict[str, Any] | None = None - - -class FileContent(BaseModel): - name: str | None = None - mimeType: str | None = None - bytes: str | None = None - uri: str | None = None - - @model_validator(mode="after") - def check_content(self) -> Self: - if not (self.bytes or self.uri): - raise ValueError("Either 'bytes' or 'uri' must be present in the file data") - if self.bytes and self.uri: - raise ValueError("Only one of 'bytes' or 'uri' can be present in the file data") - return self - - -class FilePart(BaseModel): - type: Literal["file"] = "file" - file: FileContent - metadata: dict[str, Any] | None = None - - -class DataPart(BaseModel): - type: Literal["data"] = "data" - data: dict[str, Any] - metadata: dict[str, Any] | None = None - - -Part = Annotated[Union[TextPart, FilePart, DataPart], Field(discriminator="type")] - - -class Message(BaseModel): - role: Literal["user", "agent", "assistant"] - parts: List[Part] - metadata: dict[str, Any] | None = None - history: Optional[List["Message"]] = None - messageId: str | None = None - - -class FlexibleMessage(BaseModel): - """Message that can handle both 'type' and 'kind' in parts.""" - - role: Literal["user", "agent", "assistant"] - parts: List[dict[str, Any]] # Raw parts that we'll process manually - metadata: dict[str, Any] | None = None - history: Optional[List["FlexibleMessage"]] = None - - @model_validator(mode="after") - def normalize_parts(self): - """Convert parts with 'kind' to parts with 'type'.""" - normalized_parts = [] - for part in self.parts: - if isinstance(part, dict): - # Convert 'kind' to 'type' if needed - if "kind" in part and "type" not in part: - normalized_part = part.copy() - normalized_part["type"] = normalized_part.pop("kind") - else: - normalized_part = part - - # Validate the normalized part - try: - if normalized_part.get("type") == "text": - normalized_parts.append(TextPart.model_validate(normalized_part)) - elif normalized_part.get("type") == "file": - normalized_parts.append(FilePart.model_validate(normalized_part)) - elif normalized_part.get("type") == "data": - normalized_parts.append(DataPart.model_validate(normalized_part)) - else: - raise ValueError(f"Unknown part type: {normalized_part.get('type')}") - except Exception as e: - raise ValueError(f"Invalid part: {normalized_part}, error: {e}") - else: - normalized_parts.append(part) - - self.parts = normalized_parts - return self - - -class TaskStatus(BaseModel): - state: TaskState - message: Message | None = None - timestamp: datetime = Field(default_factory=datetime.now) - - @field_serializer("timestamp") - def serialize_dt(self, dt: datetime, _info): - return dt.isoformat() - - -class Artifact(BaseModel): - name: str | None = None - description: str | None = None - parts: List[Part] - metadata: dict[str, Any] | None = None - index: int = 0 - append: bool | None = None - lastChunk: bool | None = None - - -class Task(BaseModel): - id: str - sessionId: str | None = None - status: TaskStatus - artifacts: List[Artifact] | None = None - history: List[Message] | None = None - metadata: dict[str, Any] | None = None - contextId: str | None = None - - -class TaskStatusUpdateEvent(BaseModel): - id: str - status: TaskStatus - final: bool = False - metadata: dict[str, Any] | None = None - contextId: str | None = None - taskId: str | None = None - - -class TaskArtifactUpdateEvent(BaseModel): - id: str - artifact: Artifact - metadata: dict[str, Any] | None = None - contextId: str | None = None - taskId: str | None = None - - -class AuthenticationInfo(BaseModel): - model_config = ConfigDict(extra="allow") - - schemes: List[str] - credentials: str | None = None - - -class PushNotificationConfig(BaseModel): - url: str - token: str | None = None - authentication: AuthenticationInfo | None = None - - -class TaskIdParams(BaseModel): - id: str - metadata: dict[str, Any] | None = None - - -class TaskQueryParams(TaskIdParams): - historyLength: int | None = None - - -class TaskSendParams(BaseModel): - id: str - sessionId: str = Field(default_factory=lambda: uuid4().hex) - message: Message - acceptedOutputModes: Optional[List[str]] = None - pushNotification: PushNotificationConfig | None = None - historyLength: int | None = None - metadata: dict[str, Any] | None = None - - -class TaskPushNotificationConfig(BaseModel): - id: str - pushNotificationConfig: PushNotificationConfig - - -# RPC Messages - - -class JSONRPCMessage(BaseModel): - jsonrpc: Literal["2.0"] = "2.0" - id: int | str | None = Field(default_factory=lambda: uuid4().hex) - - -class JSONRPCRequest(JSONRPCMessage): - method: str - params: dict[str, Any] | None = None - - -class JSONRPCError(BaseModel): - code: int - message: str - data: Any | None = None - - -class JSONRPCResponse(JSONRPCMessage): - result: Any | None = None - error: JSONRPCError | None = None - - -class SendTaskRequest(JSONRPCRequest): - method: Literal["tasks/send"] = "tasks/send" - params: TaskSendParams - - -class SendTaskResponse(JSONRPCResponse): - result: Task | None = None - - -class SendTaskStreamingRequest(JSONRPCRequest): - method: Literal["tasks/sendSubscribe"] = "tasks/sendSubscribe" - params: TaskSendParams - - -class SendTaskStreamingResponse(JSONRPCResponse): - result: TaskStatusUpdateEvent | TaskArtifactUpdateEvent | None = None - - -class MessageStreamParams(BaseModel): - sessionId: str = Field(default_factory=lambda: uuid4().hex) - message: FlexibleMessage - metadata: dict[str, Any] | None = None - - -class MessageStreamRequest(JSONRPCRequest): - method: Literal["message/stream"] = "message/stream" - params: MessageStreamParams - - -class MessageStreamResponse(JSONRPCResponse): - result: Message | None = None - - -class SendStreamingMessageSuccessResponse(JSONRPCResponse): - result: Union[Task, TaskStatusUpdateEvent, TaskArtifactUpdateEvent] | None = None - - -class GetTaskRequest(JSONRPCRequest): - method: Literal["tasks/get"] = "tasks/get" - params: TaskQueryParams - - -class GetTaskResponse(JSONRPCResponse): - result: Task | None = None - - -class CancelTaskRequest(JSONRPCRequest): - method: Literal["tasks/cancel",] = "tasks/cancel" - params: TaskIdParams - - -class CancelTaskResponse(JSONRPCResponse): - result: Task | None = None - - -class SetTaskPushNotificationRequest(JSONRPCRequest): - method: Literal["tasks/pushNotification/set",] = "tasks/pushNotification/set" - params: TaskPushNotificationConfig - - -class SetTaskPushNotificationResponse(JSONRPCResponse): - result: TaskPushNotificationConfig | None = None - - -class GetTaskPushNotificationRequest(JSONRPCRequest): - method: Literal["tasks/pushNotification/get",] = "tasks/pushNotification/get" - params: TaskIdParams - - -class GetTaskPushNotificationResponse(JSONRPCResponse): - result: TaskPushNotificationConfig | None = None - - -class TaskResubscriptionRequest(JSONRPCRequest): - method: Literal["tasks/resubscribe",] = "tasks/resubscribe" - params: TaskIdParams - - -A2ARequest = TypeAdapter( - Annotated[ - Union[ - SendTaskRequest, - GetTaskRequest, - CancelTaskRequest, - SetTaskPushNotificationRequest, - GetTaskPushNotificationRequest, - TaskResubscriptionRequest, - SendTaskStreamingRequest, - MessageStreamRequest, - ], - Field(discriminator="method"), - ] -) - -# Error types - - -class JSONParseError(JSONRPCError): - code: int = -32700 - message: str = "Invalid JSON payload" - data: Any | None = None - - -class InvalidRequestError(JSONRPCError): - code: int = -32600 - message: str = "Request payload validation error" - data: Any | None = None - - -class MethodNotFoundError(JSONRPCError): - code: int = -32601 - message: str = "Method not found" - data: None = None - - -class InvalidParamsError(JSONRPCError): - code: int = -32602 - message: str = "Invalid parameters" - data: Any | None = None - - -class InternalError(JSONRPCError): - code: int = -32603 - message: str = "Internal error" - data: Any | None = None - - -class TaskNotFoundError(JSONRPCError): - code: int = -32001 - message: str = "Task not found" - data: None = None - - -class TaskNotCancelableError(JSONRPCError): - code: int = -32002 - message: str = "Task cannot be canceled" - data: None = None - - -class PushNotificationNotSupportedError(JSONRPCError): - code: int = -32003 - message: str = "Push Notification is not supported" - data: None = None - - -class UnsupportedOperationError(JSONRPCError): - code: int = -32004 - message: str = "This operation is not supported" - data: None = None - - -class ContentTypeNotSupportedError(JSONRPCError): - code: int = -32005 - message: str = "Incompatible content types" - data: None = None - - -class AgentProvider(BaseModel): - organization: str - url: str | None = None - - -class AgentCapabilities(BaseModel): - streaming: bool = False - pushNotifications: bool = False - stateTransitionHistory: bool = False - - -class AgentAuthentication(BaseModel): - schemes: List[str] - credentials: str | None = None - - -class AgentSkill(BaseModel): - id: str - name: str - description: str | None = None - tags: List[str] | None = None - examples: List[str] | None = None - inputModes: List[str] | None = None - outputModes: List[str] | None = None - - -class AgentCard(BaseModel): - name: str - description: str | None = None - url: str - provider: AgentProvider | None = None - version: str - documentationUrl: str | None = None - capabilities: AgentCapabilities - authentication: AgentAuthentication | None = None - defaultInputModes: List[str] = ["text"] - defaultOutputModes: List[str] = ["text"] - skills: List[AgentSkill] - - -class A2AClientError(Exception): - pass - - -class A2AClientHTTPError(A2AClientError): - def __init__(self, status_code: int, message: str): - self.status_code = status_code - self.message = message - super().__init__(f"HTTP Error {status_code}: {message}") - - -class A2AClientJSONError(A2AClientError): - def __init__(self, message: str): - self.message = message - super().__init__(f"JSON Error: {message}") - - -class MissingAPIKeyError(Exception): - """Exception for missing API key.""" - - pass diff --git a/mindsdb/api/a2a/common/utils/in_memory_cache.py b/mindsdb/api/a2a/common/utils/in_memory_cache.py deleted file mode 100644 index c50bd3900aa..00000000000 --- a/mindsdb/api/a2a/common/utils/in_memory_cache.py +++ /dev/null @@ -1,108 +0,0 @@ -"""In Memory Cache utility.""" - -import threading -import time -from typing import Any, Dict, Optional - - -class InMemoryCache: - """A thread-safe Singleton class to manage cache data. - - Ensures only one instance of the cache exists across the application. - """ - - _instance: Optional["InMemoryCache"] = None - _lock: threading.Lock = threading.Lock() - _initialized: bool = False - - def __new__(cls): - """Override __new__ to control instance creation (Singleton pattern). - - Uses a lock to ensure thread safety during the first instantiation. - - Returns: - The singleton instance of InMemoryCache. - """ - if cls._instance is None: - with cls._lock: - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __init__(self): - """Initialize the cache storage. - - Uses a flag (_initialized) to ensure this logic runs only on the very first - creation of the singleton instance. - """ - if not self._initialized: - with self._lock: - if not self._initialized: - self._cache_data: Dict[str, Dict[str, Any]] = {} - self._ttl: Dict[str, float] = {} - self._data_lock: threading.Lock = threading.Lock() - self._initialized = True - - def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None: - """Set a key-value pair. - - Args: - key: The key for the data. - value: The data to store. - ttl: Time to live in seconds. If None, data will not expire. - """ - with self._data_lock: - self._cache_data[key] = value - - if ttl is not None: - self._ttl[key] = time.time() + ttl - else: - if key in self._ttl: - del self._ttl[key] - - def get(self, key: str, default: Any = None) -> Any: - """Get the value associated with a key. - - Args: - key: The key for the data within the session. - default: The value to return if the session or key is not found. - - Returns: - The cached value, or the default value if not found. - """ - with self._data_lock: - if key in self._ttl and time.time() > self._ttl[key]: - del self._cache_data[key] - del self._ttl[key] - return default - return self._cache_data.get(key, default) - - def delete(self, key: str) -> None: - """Delete a specific key-value pair from a cache. - - Args: - key: The key to delete. - - Returns: - True if the key was found and deleted, False otherwise. - """ - - with self._data_lock: - if key in self._cache_data: - del self._cache_data[key] - if key in self._ttl: - del self._ttl[key] - return True - return False - - def clear(self) -> bool: - """Remove all data. - - Returns: - True if the data was cleared, False otherwise. - """ - with self._data_lock: - self._cache_data.clear() - self._ttl.clear() - return True - return False diff --git a/mindsdb/api/a2a/common/utils/push_notification_auth.py b/mindsdb/api/a2a/common/utils/push_notification_auth.py deleted file mode 100644 index aa25e70dab7..00000000000 --- a/mindsdb/api/a2a/common/utils/push_notification_auth.py +++ /dev/null @@ -1,131 +0,0 @@ -from jwcrypto import jwk -import uuid -from starlette.responses import JSONResponse -from starlette.requests import Request -from typing import Any - -import jwt -import time -import json -import hashlib -import httpx - -from jwt import PyJWK, PyJWKClient - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) -AUTH_HEADER_PREFIX = "Bearer " - - -class PushNotificationAuth: - def _calculate_request_body_sha256(self, data: dict[str, Any]): - """Calculates the SHA256 hash of a request body. - - This logic needs to be same for both the agent who signs the payload and the client verifier. - """ - body_str = json.dumps( - data, - ensure_ascii=False, - allow_nan=False, - indent=None, - separators=(",", ":"), - ) - return hashlib.sha256(body_str.encode()).hexdigest() - - -class PushNotificationSenderAuth(PushNotificationAuth): - def __init__(self): - self.public_keys = [] - self.private_key_jwk: PyJWK = None - - @staticmethod - async def verify_push_notification_url(url: str) -> bool: - async with httpx.AsyncClient(timeout=10) as client: - try: - validation_token = str(uuid.uuid4()) - response = await client.get(url, params={"validationToken": validation_token}) - response.raise_for_status() - is_verified = response.text == validation_token - - logger.info(f"Verified push-notification URL: {url} => {is_verified}") - return is_verified - except Exception as e: - logger.warning(f"Error during sending push-notification for URL {url}: {e}") - - return False - - def generate_jwk(self): - key = jwk.JWK.generate(kty="RSA", size=2048, kid=str(uuid.uuid4()), use="sig") - self.public_keys.append(key.export_public(as_dict=True)) - self.private_key_jwk = PyJWK.from_json(key.export_private()) - - def handle_jwks_endpoint(self, _request: Request): - """Allow clients to fetch public keys.""" - return JSONResponse({"keys": self.public_keys}) - - def _generate_jwt(self, data: dict[str, Any]): - """JWT is generated by signing both the request payload SHA digest and time of token generation. - - Payload is signed with private key and it ensures the integrity of payload for client. - Including iat prevents from replay attack. - """ - - iat = int(time.time()) - - return jwt.encode( - { - "iat": iat, - "request_body_sha256": self._calculate_request_body_sha256(data), - }, - key=self.private_key_jwk, - headers={"kid": self.private_key_jwk.key_id}, - algorithm="RS256", - ) - - async def send_push_notification(self, url: str, data: dict[str, Any]): - jwt_token = self._generate_jwt(data) - headers = {"Authorization": f"Bearer {jwt_token}"} - async with httpx.AsyncClient(timeout=10) as client: - try: - response = await client.post(url, json=data, headers=headers) - response.raise_for_status() - logger.info(f"Push-notification sent for URL: {url}") - except Exception as e: - logger.warning(f"Error during sending push-notification for URL {url}: {e}") - - -class PushNotificationReceiverAuth(PushNotificationAuth): - def __init__(self): - self.public_keys_jwks = [] - self.jwks_client = None - - async def load_jwks(self, jwks_url: str): - self.jwks_client = PyJWKClient(jwks_url) - - async def verify_push_notification(self, request: Request) -> bool: - auth_header = request.headers.get("Authorization") - if not auth_header or not auth_header.startswith(AUTH_HEADER_PREFIX): - return False - - token = auth_header[len(AUTH_HEADER_PREFIX) :] - signing_key = self.jwks_client.get_signing_key_from_jwt(token) - - decode_token = jwt.decode( - token, - signing_key, - options={"require": ["iat", "request_body_sha256"]}, - algorithms=["RS256"], - ) - - actual_body_sha256 = self._calculate_request_body_sha256(await request.json()) - if actual_body_sha256 != decode_token["request_body_sha256"]: - # Payload signature does not match the digest in signed token. - raise ValueError("Invalid request body") - - if time.time() - decode_token["iat"] > 60 * 5: - # Do not allow push-notifications older than 5 minutes. - # This is to prevent replay attack. - raise ValueError("Token is expired") - - return True diff --git a/mindsdb/api/a2a/constants.py b/mindsdb/api/a2a/constants.py deleted file mode 100644 index a7d52a22010..00000000000 --- a/mindsdb/api/a2a/constants.py +++ /dev/null @@ -1,9 +0,0 @@ -TEXT_BY_SUBTYPE = { - "plan": "To determine the distribution of companies by size, we first need to identify the relevant table and column that contains information about company sizes. Let's start by listing the tables in the database to find the appropriate one.", - "query": "Sales_Data_Expert_Demo_Data.public.`call_summaries`, Sales_Data_Expert_Demo_Data.public.`prospects_details`", - "curate": "To find the distribution of companies by size, we should examine the `prospects_details` table, as it likely contains information about potential customers, including company size. Let's first check the schema of the `prospects_details` table to identify the relevant column for company size.", - "validate": "Table named `Sales_Data_Expert_Demo_Data.public.prospects_details`:\n\nSample with first 3 rows from table Sales_Data_Expert_Demo_Data.public.prospects_details in CSV format (dialect is 'excel'):\n\nid,created_at,first_name,last_name,email,company,job_title,company_size,use_case\r\n1,2025-01-16 13:55:03,Alexandra,Dixon,alexandra.dixon@harveypayneandhudson.com,\"Harvey, Payne and Hudson\",Manager,Medium company (500-999 employees),Looking into predictive analytics for customer behavior\r\n2,2025-01-28 08:01:46,David,Clark,david.clark@boyd-hernandez.com,Boyd-Hernandez,CEO,Medium company (500-999 employees),Looking into predictive analytics for customer behavior\r\n3,2025-02-03 03:37:12,Jason,Mcmillan,jason.mcmillan@stokesandsons.com,Stokes and Sons,Marketing Manager,Small company (100-499 employees),Interested in AI search over my data\r\n\n\nColumn data types: \n`id` : `MYSQL_DATA_TYPE.INT`,\t\n`created_at` : `MYSQL_DATA_TYPE.DATETIME`,\t\n`first_name` : `MYSQL_DATA_TYPE.TEXT`,\t\n`last_name` : `MYSQL_DATA_TYPE.TEXT`,\t\n`email` : `MYSQL_DATA_TYPE.TEXT`,\t\n`company` : `MYSQL_DATA_TYPE.TEXT`,\t\n`job_title` : `MYSQL_DATA_TYPE.TEXT`,\t\n`company_size` : `MYSQL_DATA_TYPE.TEXT`,\t\n`use_case` : `MYSQL_DATA_TYPE.TEXT`\n", - "respond": "The `prospects_details` table contains a `company_size` column, which is what we need to determine the distribution of companies by size. Let's first retrieve the distinct values in the `company_size` column to understand the categories available, and then calculate the distribution.", -} - -DEFAULT_STREAM_TIMEOUT = 300 diff --git a/mindsdb/api/a2a/task_manager.py b/mindsdb/api/a2a/task_manager.py deleted file mode 100644 index fea907215db..00000000000 --- a/mindsdb/api/a2a/task_manager.py +++ /dev/null @@ -1,576 +0,0 @@ -import time -import logging -import asyncio -from typing import AsyncIterable, Dict, Union - -from mindsdb.api.a2a.common.types import ( - SendTaskRequest, - TaskSendParams, - Message, - TaskStatus, - Artifact, - TaskStatusUpdateEvent, - TaskArtifactUpdateEvent, - TaskState, - Task, - SendTaskResponse, - InternalError, - JSONRPCResponse, - SendTaskStreamingRequest, - SendTaskStreamingResponse, - InvalidRequestError, - MessageStreamRequest, - SendStreamingMessageSuccessResponse, -) -from mindsdb.api.a2a.common.server.task_manager import InMemoryTaskManager -from mindsdb.api.a2a.agent import MindsDBAgent -from mindsdb.api.a2a.utils import to_serializable, convert_a2a_message_to_qa_format -from mindsdb.interfaces.agents.agents_controller import AgentsController - - -logger = logging.getLogger(__name__) - - -def to_question_format(messages): - """Convert A2A messages to a list of {"question": ...} dicts for agent compatibility.""" - out = [] - for msg in messages: - if "question" in msg: - out.append(msg) - elif "parts" in msg and isinstance(msg["parts"], list): - for part in msg["parts"]: - part_dict = to_serializable(part) - if part_dict.get("type") == "text" and "text" in part_dict: - out.append({"question": part_dict["text"]}) - return out - - -class AgentTaskManager(InMemoryTaskManager): - def __init__( - self, - project_name: str, - agent_name: str = None, - ): - super().__init__() - self.project_name = project_name - self.agent_name = agent_name - self.tasks = {} # Task storage - self.lock = asyncio.Lock() # Lock for task operations - - def _create_agent(self, user_info: Dict, agent_name: str = None) -> MindsDBAgent: - """Create a new MindsDBAgent instance for the given agent name.""" - if not agent_name: - raise ValueError("Agent name is required but was not provided in the request") - - return MindsDBAgent( - agent_name=agent_name, - project_name=self.project_name, - user_info=user_info, - ) - - async def _stream_generator( - self, request: SendTaskStreamingRequest, user_info: Dict - ) -> AsyncIterable[SendTaskStreamingResponse]: - task_send_params: TaskSendParams = request.params - query = self._get_user_query(task_send_params) - params = self._get_task_params(task_send_params) - agent_name = params["agent_name"] - streaming = params["streaming"] - - # Create and store the task first to ensure it exists - try: - task = await self.upsert_task(task_send_params) - logger.info(f"Task created/updated with history length: {len(task.history) if task.history else 0}") - except Exception as e: - logger.exception("Error creating task:") - error_result = to_serializable( - { - "id": request.id, - "error": to_serializable(InternalError(message=f"Error creating task: {e}")), - } - ) - yield error_result - return # Early return from generator - - agent = self._create_agent(user_info, agent_name) - - # Get the history from the task object (where it was properly extracted and stored) - history = task.history if task and task.history else [] - - if not streaming: - # If streaming is disabled, use invoke and return a single response - try: - result = agent.invoke(query, task_send_params.sessionId, history=history) - - # Use the parts from the agent response if available, or create them - if "parts" in result: - parts = result["parts"] - else: - result_text = result.get("content", "No response from MindsDB") - parts = [{"type": "text", "text": result_text}] - - # Check if we have structured data - if "data" in result and result["data"]: - parts.append( - { - "type": "data", - "data": result["data"], - "metadata": {"subtype": "json"}, - } - ) - - # Create and yield the final response - task_state = TaskState.COMPLETED - artifact = Artifact(parts=parts, index=0, append=False) - task_status = TaskStatus(state=task_state) - - # Update the task store - await self._update_store(task_send_params.id, task_status, [artifact]) - - # Yield the artifact update - yield to_serializable( - SendTaskStreamingResponse( - id=request.id, - result=to_serializable(TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact)), - ) - ) - - # Yield the final status update - yield to_serializable( - SendTaskStreamingResponse( - id=request.id, - result=to_serializable( - TaskStatusUpdateEvent( - id=task_send_params.id, - status=to_serializable(TaskStatus(state=task_status.state)), - final=True, - ) - ), - ) - ) - return - - except Exception as e: - logger.exception("Error invoking agent:") - error_result = to_serializable( - { - "id": request.id, - "error": to_serializable( - JSONRPCResponse( - id=request.id, - error=to_serializable(InternalError(message=f"Error invoking agent: {e}")), - ) - ), - } - ) - yield error_result - return - - # If streaming is enabled (default), use the streaming implementation - try: - logger.debug(f"Entering agent.stream() at {time.time()}") - # Create A2A message structure and convert using centralized utility - a2a_message = task_send_params.message.model_dump() - logger.debug(f"History: {history}") - if history: - a2a_message["history"] = [msg.model_dump() if hasattr(msg, "model_dump") else msg for msg in history] - - # Convert to Q&A format using centralized utility function - all_messages = convert_a2a_message_to_qa_format(a2a_message) - - async for item in agent.streaming_invoke(all_messages, timeout=60): - # Clean up: Remove verbose debug logs, keep only errors and essential info - if isinstance(item, dict) and "artifact" in item and "parts" in item["artifact"]: - item["artifact"]["parts"] = [to_serializable(p) for p in item["artifact"]["parts"]] - yield to_serializable(item) - except TimeoutError as e: - logger.error(f"Timeout error while streaming the response: {e}") - error_text = "The request timed out. The agent is taking longer than expected to respond. Please try again or increase the timeout." - parts = [{"type": "text", "text": error_text}] - parts = [to_serializable(part) for part in parts] - artifact = { - "parts": parts, - "index": 0, - "append": False, - } - error_result = { - "id": request.id, - "error": { - "id": task_send_params.id, - "artifact": artifact, - "error_type": "timeout", - }, - } - yield error_result - except ConnectionError as e: - logger.error(f"Connection error while streaming the response: {e}") - error_text = "Failed to connect to the agent. Please check if the agent is running and accessible." - parts = [{"type": "text", "text": error_text}] - parts = [to_serializable(part) for part in parts] - artifact = { - "parts": parts, - "index": 0, - "append": False, - } - error_result = { - "id": request.id, - "error": { - "id": task_send_params.id, - "artifact": artifact, - "error_type": "connection", - }, - } - yield error_result - except Exception as e: - logger.exception("An error occurred while streaming the response:") - # Provide more specific error messages based on error type - if "API key" in str(e) or "authentication" in str(e).lower(): - error_text = f"Authentication error: {str(e)}" - error_category = "authentication" - elif "404" in str(e) or "not found" in str(e).lower(): - error_text = f"Resource not found: {str(e)}" - error_category = "not_found" - elif "rate limit" in str(e).lower() or "429" in str(e): - error_text = f"Rate limit exceeded: {str(e)}" - error_category = "rate_limit" - else: - error_text = f"An error occurred while streaming the response: {str(e)}" - error_category = "general" - - # Ensure all parts are plain dicts - parts = [{"type": "text", "text": error_text}] - parts = [to_serializable(part) for part in parts] - artifact = { - "parts": parts, - "index": 0, - "append": False, - } - error_result = { - "id": request.id, - "error": { - "id": task_send_params.id, - "artifact": artifact, - "error_type": error_category, - }, - } - yield error_result - - async def upsert_task(self, task_send_params: TaskSendParams) -> Task: - """Create or update a task in the task store. - - Args: - task_send_params: The parameters for the task. - - Returns: - The created or updated task. - """ - logger.info(f"Upserting task {task_send_params.id}") - async with self.lock: - task = self.tasks.get(task_send_params.id) - if task is None: - # Convert the message to a dict if it's not already one - message = task_send_params.message - message_dict = message.dict() if hasattr(message, "dict") else message - - # Get history from request if available - check both locations - history = [] - - # First check if history is at top level (task_send_params.history) - if hasattr(task_send_params, "history") and task_send_params.history: - # Convert each history item to dict if needed - for item in task_send_params.history: - item_dict = item.model_dump() if hasattr(item, "model_dump") else item - history.append(item_dict) - # Also check if history is nested under message (message.history) - elif hasattr(task_send_params.message, "history") and task_send_params.message.history: - for item in task_send_params.message.history: - item_dict = item.model_dump() if hasattr(item, "model_dump") else item - history.append(item_dict) - - # DO NOT add current message to history - it should be processed separately - # The current message will be extracted during streaming from task_send_params.message - - # Create a new task - task = Task( - id=task_send_params.id, - sessionId=task_send_params.sessionId, - status=TaskStatus(state=TaskState.SUBMITTED), - history=history, - artifacts=[], - ) - self.tasks[task_send_params.id] = task - else: - # Convert the message to a dict if it's not already one - message = task_send_params.message - message_dict = message.dict() if hasattr(message, "dict") else message - - # Update the existing task - if task.history is None: - task.history = [] - - # If we have new history from the request, use it - if hasattr(task_send_params, "history") and task_send_params.history: - # Convert each history item to dict if needed and ensure proper role - history = [] - for item in task_send_params.history: - item_dict = item.dict() if hasattr(item, "dict") else item - # Ensure the role is properly set - if "role" not in item_dict: - item_dict["role"] = "assistant" if "answer" in item_dict else "user" - history.append(item_dict) - task.history = history - - # Add current message to history - task.history.append(message_dict) - return task - - def _validate_request( - self, request: Union[SendTaskRequest, SendTaskStreamingRequest] - ) -> Union[None, JSONRPCResponse]: - """Validate the request and return an error response if invalid.""" - # Check if the request has the required parameters - if not hasattr(request, "params") or not request.params: - return JSONRPCResponse( - id=request.id, - error=InvalidRequestError(message="Missing params"), - ) - - # Check if the request has a message - if not hasattr(request.params, "message") or not request.params.message: - return JSONRPCResponse( - id=request.id, - error=InvalidRequestError(message="Missing message in params"), - ) - - # Check if the message has metadata - if not hasattr(request.params.message, "metadata") or not request.params.message.metadata: - return JSONRPCResponse( - id=request.id, - error=InvalidRequestError(message="Missing metadata in message"), - ) - - # Check if the agent name is provided in the metadata - metadata = request.params.message.metadata - agent_name = metadata.get("agent_name", metadata.get("agentName")) - if not agent_name: - return JSONRPCResponse( - id=request.id, - error=InvalidRequestError( - message="Agent name is required but was not provided in the request metadata" - ), - ) - - return None - - async def on_send_task(self, request: SendTaskRequest, user_info: Dict) -> SendTaskResponse: - error = self._validate_request(request) - if error: - return error - - return await self._invoke(request, user_info=user_info) - - async def on_send_task_subscribe( - self, request: SendTaskStreamingRequest, user_info: Dict - ) -> AsyncIterable[SendTaskStreamingResponse]: - error = self._validate_request(request) - if error: - logger.info(f"Yielding error at {time.time()} for invalid request: {error}") - yield to_serializable(SendTaskStreamingResponse(id=request.id, error=to_serializable(error.error))) - return - - # We can't await an async generator directly, so we need to use it as is - try: - logger.debug(f"Entering streaming path at {time.time()}") - async for response in self._stream_generator(request, user_info): - logger.debug(f"Yielding streaming response at {time.time()} with: {str(response)[:120]}") - yield response - except Exception as e: - # If an error occurs, yield an error response - logger.exception(f"Error in on_send_task_subscribe: {e}") - error_result = to_serializable( - { - "id": request.id, - "error": to_serializable(InternalError(message=f"Error processing streaming request: {e}")), - } - ) - yield error_result - - async def _update_store(self, task_id: str, status: TaskStatus, artifacts: list[Artifact]) -> Task: - async with self.lock: - try: - task = self.tasks[task_id] - except KeyError: - logger.error(f"Task {task_id} not found for updating the task") - # Create a new task with the provided ID if it doesn't exist - # This ensures we don't fail when a task is not found - task = Task( - id=task_id, - sessionId="recovery-session", # Use a placeholder session ID - messages=[], # No messages available - status=status, # Use the provided status - history=[], # No history available - ) - self.tasks[task_id] = task - - task.status = status - - # Store assistant's response in history if we have a message - if status.message and status.message.role == "agent": - if task.history is None: - task.history = [] - # Convert message to dict if needed - message_dict = status.message.dict() if hasattr(status.message, "dict") else status.message - # Ensure role is set to assistant - message_dict["role"] = "assistant" - task.history.append(message_dict) - - if artifacts is not None: - for artifact in artifacts: - if artifact.append and len(task.artifacts) > 0: - # Append to the last artifact - last_artifact = task.artifacts[-1] - for part in artifact.parts: - last_artifact.parts.append(part) - else: - # Add as a new artifact - task.artifacts.append(artifact) - return task - - def _get_user_query(self, task_send_params: TaskSendParams) -> str: - """Extract the user query from the task parameters.""" - message = task_send_params.message - if not message.parts: - return "" - - # Find the first text part - for part in message.parts: - if part.type == "text": - return part.text - - # If no text part found, return empty string - return "" - - def _get_task_params(self, task_send_params: TaskSendParams) -> dict: - """Extract common parameters from task metadata.""" - metadata = task_send_params.message.metadata or {} - # Check for both agent_name and agentName in the metadata - agent_name = metadata.get("agent_name", metadata.get("agentName")) - return { - "agent_name": agent_name, - "streaming": metadata.get("streaming", True), - "session_id": task_send_params.sessionId, - } - - async def _invoke(self, request: SendTaskRequest, user_info: Dict) -> SendTaskResponse: - task_send_params: TaskSendParams = request.params - query = self._get_user_query(task_send_params) - params = self._get_task_params(task_send_params) - agent_name = params["agent_name"] - streaming = params["streaming"] - agent = self._create_agent(user_info, agent_name) - - try: - # Get the history from the task - task = self.tasks.get(task_send_params.id) - history = task.history if task and task.history else [] - - # Always use streaming internally, but handle the response differently based on the streaming parameter - all_parts = [] - final_metadata = {} - - # Create a streaming generator - stream_gen = agent.stream(query, task_send_params.sessionId, history=history) - - if streaming: - # For streaming mode, we'll use the streaming endpoint instead - # Just create a minimal response to acknowledge the request - task_state = TaskState.WORKING - task = await self._update_store(task_send_params.id, TaskStatus(state=task_state), []) - return to_serializable(SendTaskResponse(id=request.id, result=task)) - else: - # For non-streaming mode, collect all chunks into a single response - async for chunk in stream_gen: - # Extract parts if they exist - if "parts" in chunk and chunk["parts"]: - all_parts.extend(chunk["parts"]) - elif "content" in chunk: - all_parts.append({"type": "text", "text": chunk["content"]}) - - # Extract metadata if it exists - if "metadata" in chunk: - final_metadata.update(chunk["metadata"]) - - # If we didn't get any parts, create a default part - if not all_parts: - all_parts = [{"type": "text", "text": "No response from MindsDB"}] - - # Create the final response - task_state = TaskState.COMPLETED - task = await self._update_store( - task_send_params.id, - TaskStatus( - state=task_state, - message=Message(role="agent", parts=all_parts, metadata=final_metadata), - ), - [Artifact(parts=all_parts)], - ) - return to_serializable(SendTaskResponse(id=request.id, result=task)) - except Exception as e: - logger.exception("Error invoking agent:") - result_text = f"Error invoking agent: {e}" - parts = [{"type": "text", "text": result_text}] - - task_state = TaskState.FAILED - task = await self._update_store( - task_send_params.id, - TaskStatus(state=task_state, message=Message(role="agent", parts=parts)), - [Artifact(parts=parts)], - ) - return to_serializable(SendTaskResponse(id=request.id, result=task)) - - async def on_message_stream( - self, request: MessageStreamRequest, user_info: Dict - ) -> Union[AsyncIterable[SendStreamingMessageSuccessResponse], JSONRPCResponse]: - """ - Handle message streaming requests. - """ - logger.info(f"Processing message stream request for session {request.params.sessionId}") - - query = self._get_user_query(request.params) - params = self._get_task_params(request.params) - - try: - task_id = f"msg_stream_{request.params.sessionId}_{request.id}" - context_id = f"ctx_{request.params.sessionId}" - message_id = f"msg_{request.id}" - - agents_controller = AgentsController() - existing_agent = agents_controller.get_agent(params["agent_name"]) - resp = agents_controller.get_completion(existing_agent, [{"question": query}]) - response_message = resp["answer"][0] - - response_message = Message( - role="agent", parts=[{"type": "text", "text": response_message}], metadata={}, messageId=message_id - ) - - task_status = TaskStatus(state=TaskState.COMPLETED, message=response_message) - - task_status_update = TaskStatusUpdateEvent( - id=task_id, - status=task_status, - final=True, - metadata={"message_stream": True}, - contextId=context_id, - taskId=task_id, - ) - - async def message_stream_generator(): - yield to_serializable(SendStreamingMessageSuccessResponse(id=request.id, result=task_status_update)) - - return message_stream_generator() - - except Exception as e: - logger.error(f"Error processing message stream: {e}") - return SendStreamingMessageSuccessResponse( - id=request.id, error=InternalError(message=f"Error processing message stream: {str(e)}") - ) diff --git a/mindsdb/api/a2a/utils.py b/mindsdb/api/a2a/utils.py deleted file mode 100644 index c8b3898725c..00000000000 --- a/mindsdb/api/a2a/utils.py +++ /dev/null @@ -1,84 +0,0 @@ -from typing import Dict, List -from mindsdb.utilities.log import getLogger - -logger = getLogger(__name__) - - -def to_serializable(obj): - # Primitives - if isinstance(obj, (str, int, float, bool, type(None))): - return obj - # Pydantic v2 - if hasattr(obj, "model_dump"): - return to_serializable(obj.model_dump(exclude_none=True)) - # Pydantic v1 - if hasattr(obj, "dict"): - return to_serializable(obj.dict(exclude_none=True)) - # Custom classes with __dict__ - if hasattr(obj, "__dict__"): - return {k: to_serializable(v) for k, v in vars(obj).items() if not k.startswith("_")} - # Dicts - if isinstance(obj, dict): - return {k: to_serializable(v) for k, v in obj.items()} - # Lists, Tuples, Sets - if isinstance(obj, (list, tuple, set)): - return [to_serializable(v) for v in obj] - # Fallback: string - return str(obj) - - -def convert_a2a_message_to_qa_format(a2a_message: Dict) -> List[Dict[str, str]]: - """ - Convert A2A message format to question/answer format. - - This is the format that the langchain agent expects and ensure effective multi-turn conversation - - Args: - a2a_message: A2A message containing history and current message parts - - Returns: - List of messages in question/answer format - """ - converted_messages = [] - - # Process conversation history first - if "history" in a2a_message and a2a_message["history"] is not None: - for hist_msg in a2a_message["history"]: - if hist_msg.get("role") == "user": - # Extract text from parts - text = "" - for part in hist_msg.get("parts", []): - if part.get("type") == "text": - text = part.get("text", "") - break - # Create question with empty answer initially - converted_messages.append({"question": text, "answer": ""}) - elif hist_msg.get("role") in ["agent", "assistant"]: - # Extract text from parts - text = "" - for part in hist_msg.get("parts", []): - if part.get("type") == "text": - text = part.get("text", "") - break - # Pair with the most recent question that has empty answer - paired = False - for i in range(len(converted_messages) - 1, -1, -1): - if converted_messages[i].get("answer") == "": - converted_messages[i]["answer"] = text - paired = True - break - - if not paired: - logger.warning("Could not pair agent response with question (no empty answer found)") - - logger.debug(f"Converted {len(a2a_message['history'])} A2A history messages to Q&A format") - - # Add current message as final question with empty answer - current_text = "" - for part in a2a_message.get("parts", []): - if part.get("type") == "text": - current_text = part.get("text", "") - break - converted_messages.append({"question": current_text, "answer": ""}) - - return converted_messages diff --git a/mindsdb/api/common/__init__.py b/mindsdb/api/common/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/common/middleware.py b/mindsdb/api/common/middleware.py deleted file mode 100644 index 6fb93380191..00000000000 --- a/mindsdb/api/common/middleware.py +++ /dev/null @@ -1,193 +0,0 @@ -import os -import time -import hmac -import secrets -import hashlib -from collections import deque -from http import HTTPStatus -from typing import Optional - -from starlette.responses import JSONResponse -from starlette.requests import Request -from starlette.types import ASGIApp, Receive, Scope, Send - -from mindsdb.utilities import log -from mindsdb.utilities.config import config - -logger = log.getLogger(__name__) - -SECRET_KEY = os.environ.get("AUTH_SECRET_KEY") or secrets.token_urlsafe(32) -# We store token (fingerprints) in memory, which means everyone is logged out if the process restarts -TOKENS = [] - - -def get_pat_fingerprint(token: str) -> str: - """Hash the token with HMAC-SHA256 using secret_key as pepper.""" - return hmac.new(SECRET_KEY.encode(), token.encode(), hashlib.sha256).hexdigest() - - -if config["auth"]["token"]: - TOKENS.append(get_pat_fingerprint(config["auth"]["token"])) - - -def generate_pat() -> str: - logger.debug("Generating new auth token") - token = "pat_" + secrets.token_urlsafe(32) - TOKENS.append(get_pat_fingerprint(token)) - return token - - -def verify_pat(raw_token: str) -> bool: - """Verify if the raw_token matches a stored fingerprint. - Returns token_id if valid, None if not. - """ - if not raw_token: - return False - fp = get_pat_fingerprint(raw_token) - for stored_fp in TOKENS: - if hmac.compare_digest(fp, stored_fp): - return True - return False - - -def revoke_pat(raw_token: str) -> bool: - """Revoke raw_token from active tokens""" - if not raw_token: - return False - fp = get_pat_fingerprint(raw_token) - for stored_fp in TOKENS: - if hmac.compare_digest(fp, stored_fp): - TOKENS.remove(stored_fp) - return True - return False - - -class PATAuthMiddleware: - """Pure ASGI middleware (compatible with SSE / streaming responses). - The class is not inherited from starlette.middleware.base.BaseHTTPMiddleware - bacause it collect responses to buffer, which is not good for streaming - """ - - def __init__(self, app: ASGIApp) -> None: - self.app = app - - @staticmethod - def _extract_bearer(headers: dict) -> Optional[str]: - h = headers.get("authorization") - if not h or not h.startswith("Bearer "): - return None - return h.split(" ", 1)[1].strip() or None - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - if scope["type"] != "http": - await self.app(scope, receive, send) - return - - if config.get("auth", {}).get("http_auth_enabled", False) is False: - await self.app(scope, receive, send) - return - - if scope.get("method") == "OPTIONS": - await self.app(scope, receive, send) - return - - request = Request(scope) - token = self._extract_bearer(dict(request.headers)) - if not token or not verify_pat(token): - response = JSONResponse({"detail": "Unauthorized"}, status_code=HTTPStatus.UNAUTHORIZED) - await response(scope, receive, send) - return - - scope.setdefault("state", {})["user"] = config["auth"].get("username") - await self.app(scope, receive, send) - - -class RateLimitMiddleware: - """Rate limiting middleware using a sliding window counter. Tracks requests per client IP.""" - - def __init__(self, app: ASGIApp, requests_per_minute: int) -> None: - self.app = app - self.requests_per_minute = requests_per_minute - self._window = 60.0 # seconds - self._counters: dict[str, deque] = {} - - def _get_client_key(self, scope: Scope) -> str: - client = scope.get("client") - if client: - return client[0] - return "unknown" - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - if scope["type"] != "http": - await self.app(scope, receive, send) - return - - if scope.get("method") == "OPTIONS": - await self.app(scope, receive, send) - return - - # Clients usually repeat this request until - # the connection is established, so no rate limit it. - if scope.get("method") == "GET" and scope.get("path", "").endswith("/sse"): - await self.app(scope, receive, send) - return - - client_key = self._get_client_key(scope) - now = time.monotonic() - window_start = now - self._window - - timestamps = self._counters.setdefault(client_key, deque()) - - # Evict timestamps outside the current window - while timestamps and timestamps[0] <= window_start: - timestamps.popleft() - - if len(timestamps) >= self.requests_per_minute: - retry_after = int(self._window - (now - timestamps[0])) + 1 - else: - retry_after = None - timestamps.append(now) - - if retry_after is not None: - response = JSONResponse( - {"detail": f"Too Many Requests, retry after {retry_after} seconds"}, - status_code=HTTPStatus.TOO_MANY_REQUESTS, - headers={"Retry-After": str(retry_after)}, - ) - await response(scope, receive, send) - return - - stale_keys = [k for k, ts in self._counters.items() if not ts or ts[-1] <= window_start] - for k in stale_keys: - del self._counters[k] - - await self.app(scope, receive, send) - - -# Used by mysql protocol -def check_auth(username, password, scramble_func, salt, company_id, user_id, config): - try: - hardcoded_user = config["auth"].get("username") - hardcoded_password = config["auth"].get("password") - if hardcoded_password is None: - hardcoded_password = "" - hardcoded_password_hash = scramble_func(hardcoded_password, salt) - hardcoded_password = hardcoded_password.encode() - - if password is None: - password = "" - if isinstance(password, str): - password = password.encode() - - if username != hardcoded_user: - logger.warning(f"Check auth, user={username}: user mismatch") - return {"success": False} - - if password != hardcoded_password and password != hardcoded_password_hash: - logger.warning(f"check auth, user={username}: password mismatch") - return {"success": False} - - logger.info(f"Check auth, user={username}: Ok") - return {"success": True, "username": username, "company_id": company_id, "user_id": user_id} - except Exception: - logger.exception(f"Check auth, user={username}: ERROR") diff --git a/mindsdb/api/executor/__init__.py b/mindsdb/api/executor/__init__.py deleted file mode 100644 index 8b137891791..00000000000 --- a/mindsdb/api/executor/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mindsdb/api/executor/command_executor.py b/mindsdb/api/executor/command_executor.py deleted file mode 100644 index 25d8858f458..00000000000 --- a/mindsdb/api/executor/command_executor.py +++ /dev/null @@ -1,2029 +0,0 @@ -import datetime -from pathlib import Path -from textwrap import dedent -from typing import Optional -from functools import reduce - -import pandas as pd -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.mindsdb import AlterDatabase -from mindsdb_sql_parser.ast import ( - Alter, - ASTNode, - BinaryOperation, - CommitTransaction, - Constant, - CreateTable, - Delete, - Describe, - DropDatabase, - DropTables, - DropView, - Explain, - Identifier, - Insert, - NativeQuery, - Operation, - RollbackTransaction, - Select, - Set, - Show, - Star, - StartTransaction, - Union, - Update, - Use, - Tuple, - Function, - Variable, - Intersect, - Except, - Parameter, - NullConstant, -) - -# typed models -from mindsdb_sql_parser.ast.mindsdb import ( - AlterView, - CreateAgent, - CreateAnomalyDetectionModel, - CreateChatBot, - CreateDatabase, - CreateJob, - CreateKnowledgeBase, - AlterKnowledgeBase, - CreateMLEngine, - CreatePredictor, - CreateTrigger, - CreateView, - CreateKnowledgeBaseIndex, - EvaluateKnowledgeBase, - DropAgent, - DropChatBot, - DropDatasource, - DropJob, - DropKnowledgeBase, - DropMLEngine, - DropPredictor, - DropTrigger, - Evaluate, - FinetunePredictor, - RetrainPredictor, - UpdateAgent, - UpdateChatBot, -) - -import mindsdb.utilities.profiler as profiler - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities.types.column import Column -from mindsdb.api.executor.sql_query import SQLQuery -from mindsdb.api.executor.data_types.answer import ExecuteAnswer -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ( - CHARSET_NUMBERS, - SERVER_VARIABLES, - TYPES, -) - -from mindsdb.api.executor.exceptions import ( - ExecutorException, - BadDbError, - NotSupportedYet, - WrongArgumentError, - TableNotExistError, -) -from mindsdb.api.executor.utilities.functions import download_file -from mindsdb.api.executor.utilities.sql import query_df -from mindsdb.integrations.libs.const import ( - HANDLER_CONNECTION_ARG_TYPE, - PREDICTOR_STATUS, -) -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.integrations.libs.response import HandlerStatusResponse -from mindsdb.interfaces.chatbot.chatbot_controller import ChatBotController -from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.interfaces.jobs.jobs_controller import JobsController -from mindsdb.interfaces.model.functions import ( - get_model_record, - get_model_records, - get_predictor_integration, -) -from mindsdb.interfaces.query_context.context_controller import query_context_controller -from mindsdb.interfaces.triggers.triggers_controller import TriggersController -from mindsdb.interfaces.variables.variables_controller import variables_controller -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.functions import mark_process, resolve_model_identifier, get_handler_install_message -from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def _get_show_where( - statement: ASTNode, - from_name: Optional[str] = None, - like_name: Optional[str] = None, - initial: Optional[ASTNode] = None, -) -> ASTNode: - """combine all possible show filters to single 'where' condition - SHOW category [FROM name] [LIKE filter] [WHERE filter] - - Args: - statement (ASTNode): 'show' query statement - from_name (str): name of column for 'from' filter - like_name (str): name of column for 'like' filter, - initial (ASTNode): initial 'where' filter - Returns: - ASTNode: 'where' statemnt - """ - where = [] - if initial is not None: - where.append(initial) - if statement.from_table is not None and from_name is not None: - where.append( - BinaryOperation( - "=", - args=[Identifier(from_name), Constant(statement.from_table.parts[-1])], - ) - ) - if statement.like is not None and like_name is not None: - where.append(BinaryOperation("like", args=[Identifier(like_name), Constant(statement.like)])) - if statement.where is not None: - where.append(statement.where) - - if len(where) > 0: - return reduce(lambda prev, next: BinaryOperation("and", args=[prev, next]), where) - return None - - -def match_one_part_name(identifier: Identifier, ensure_lower_case: bool = False) -> str: - """Extract a single-part name from an Identifier object, optionally ensuring it is lowercase. - - Args: - identifier (Identifier): The identifier to extract the name from. Must contain exactly one part. - ensure_lower_case (bool, optional): If True, raises ValueError if the name is not lowercase. Defaults to False. - - Returns: - str: The extracted name, converted to lowercase if not quoted. - - Raises: - ValueError: If the identifier does not contain exactly one part, or if ensure_lower_case is True and the name is not lowercase. - """ - match identifier.parts, identifier.is_quoted: - case [name], [is_quoted]: - ... - case _: - raise ValueError(f"Only single-part names are allowed: {identifier}") - if not is_quoted: - name = name.lower() - if ensure_lower_case and not name.islower(): - raise ValueError(f"The name must be in lowercase: {identifier}") - return name - - -def match_two_part_name( - identifier: Identifier, ensure_lower_case: bool = False, default_db_name: str | None = None -) -> tuple[str, str]: - """Extract a (database, name) tuple from an Identifier object that may have one or two parts. - - Args: - identifier (Identifier): The identifier to extract names from. Must contain one or two parts. - ensure_lower_case (bool, optional): If True, raises ValueError if the name part is not lowercase. Defaults to False. - default_db_name (str | None, optional): The default database name to use if only one part is provided. Defaults to None. - - Returns: - tuple[str, str]: A tuple of (database_name, name), where database_name may be None if not provided and no default is given. - - Raises: - ValueError: If the identifier does not contain one or two parts, or if ensure_lower_case is True and the name is not lowercase. - """ - db_name = None - - match identifier.parts, identifier.is_quoted: - case [name], [is_quoted]: - ... - case [db_name, name], [db_is_quoted, is_quoted]: - if not db_is_quoted: - db_name = db_name.lower() - case _: - raise ValueError(f"Only single-part or two-part names are allowed: {identifier}") - if not is_quoted: - name = name.lower() - if ensure_lower_case and not name.islower(): - raise ValueError(f"The name must be in lowercase: {identifier}") - if db_name is None: - db_name = default_db_name - return db_name, name - - -def apply_parameters(statement, params): - def fill_parameters(node, **kwargs): - if isinstance(node, Parameter): - if node.value in params: - value = params[node.value] - if value is None: - return NullConstant() - if isinstance(value, list): - return Tuple([Constant(i) for i in value]) - return Constant(value) - - query_traversal(statement, fill_parameters) - - -class ExecuteCommands: - def __init__(self, session, context=None): - if context is None: - context = {} - - self.context = context - self.session = session - - self.charset_text_type = CHARSET_NUMBERS["utf8_general_ci"] - self.datahub = session.datahub - - @profiler.profile() - def execute_command(self, statement: ASTNode, database_name: str = None) -> ExecuteAnswer: - sql: str = statement.to_string() - sql_lower: str = sql.lower() - - if database_name is None: - database_name = self.session.database - - if ctx.params: - apply_parameters(statement, ctx.params) - - statement_type = type(statement) - if statement_type is CreateDatabase: - return self.answer_create_database(statement) - elif statement_type is CreateMLEngine: - return self.answer_create_ml_engine(statement) - elif statement_type is DropMLEngine: - return self.answer_drop_ml_engine(statement) - elif statement_type is DropPredictor: - return self.answer_drop_model(statement, database_name) - elif statement_type is DropTables: - return self.answer_drop_tables(statement, database_name) - elif statement_type is DropDatasource or statement_type is DropDatabase: - return self.answer_drop_database(statement) - elif statement_type is AlterDatabase: - return self.answer_alter_database(statement) - elif statement_type is Describe: - # NOTE in sql 'describe table' is same as 'show columns' - obj_type = statement.type - - if obj_type is None or obj_type.upper() in ("MODEL", "PREDICTOR"): - return self.answer_describe_predictor(statement.value, database_name) - else: - return self.answer_describe_object(obj_type.upper(), statement.value, database_name) - - elif statement_type is RetrainPredictor: - return self.answer_retrain_predictor(statement, database_name) - elif statement_type is FinetunePredictor: - return self.answer_finetune_predictor(statement, database_name) - elif statement_type is Show: - sql_category = statement.category.lower() - if hasattr(statement, "modes"): - if isinstance(statement.modes, list) is False: - statement.modes = [] - statement.modes = [x.upper() for x in statement.modes] - if sql_category == "ml_engines": - new_statement = Select( - targets=[Star()], - from_table=Identifier(parts=["information_schema", "ml_engines"]), - where=_get_show_where(statement, like_name="name"), - ) - - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category == "handlers": - new_statement = Select( - targets=[Star()], - from_table=Identifier(parts=["information_schema", "handlers"]), - where=_get_show_where(statement, like_name="name"), - ) - - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category == "plugins": - if statement.where is not None or statement.like: - raise ExecutorException("'SHOW PLUGINS' query should be used without filters") - new_statement = Select( - targets=[Star()], - from_table=Identifier(parts=["information_schema", "PLUGINS"]), - ) - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category in ("databases", "schemas"): - new_statement = Select( - targets=[Identifier(parts=["NAME"], alias=Identifier("Database"))], - from_table=Identifier(parts=["information_schema", "DATABASES"]), - where=_get_show_where(statement, like_name="Database"), - ) - - if "FULL" in statement.modes: - new_statement.targets.extend( - [ - Identifier(parts=["TYPE"], alias=Identifier("TYPE")), - Identifier(parts=["ENGINE"], alias=Identifier("ENGINE")), - ] - ) - - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category in ("tables", "full tables"): - schema = database_name or "mindsdb" - if statement.from_table is not None and statement.in_table is not None: - raise ExecutorException( - "You have an error in your SQL syntax: 'from' and 'in' cannot be used together" - ) - - if statement.from_table is not None: - schema = statement.from_table.parts[-1] - statement.from_table = None - if statement.in_table is not None: - schema = statement.in_table.parts[-1] - statement.in_table = None - - table_types = [Constant(t) for t in ["MODEL", "BASE TABLE", "SYSTEM VIEW", "VIEW"]] - where = BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("table_schema"), Constant(schema)]), - BinaryOperation("in", args=[Identifier("table_type"), Tuple(table_types)]), - ], - ) - - new_statement = Select( - targets=[ - Identifier( - parts=["table_name"], - alias=Identifier(f"Tables_in_{schema}"), - ) - ], - from_table=Identifier(parts=["information_schema", "TABLES"]), - where=_get_show_where(statement, like_name=f"Tables_in_{schema}", initial=where), - ) - - if "FULL" in statement.modes: - new_statement.targets.append(Identifier(parts=["TABLE_TYPE"], alias=Identifier("Table_type"))) - - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category in ( - "variables", - "session variables", - "session status", - "global variables", - ): - new_statement = Select( - targets=[ - Identifier(parts=["Variable_name"]), - Identifier(parts=["Value"]), - ], - from_table=Identifier(parts=["dataframe"]), - where=_get_show_where(statement, like_name="Variable_name"), - ) - - data = {} - is_session = "session" in sql_category - for var_name, var_data in SERVER_VARIABLES.items(): - var_name = var_name.replace("@@", "") - if is_session and var_name.startswith("session.") is False: - continue - if var_name.startswith("session.") or var_name.startswith("GLOBAL."): - name = var_name.replace("session.", "").replace("GLOBAL.", "") - data[name] = var_data[0] - elif var_name not in data: - data[var_name] = var_data[0] - - df = pd.DataFrame(data.items(), columns=["Variable_name", "Value"]) - df2 = query_df(df, new_statement) - - return ExecuteAnswer(data=ResultSet.from_df(df2, table_name="session_variables")) - elif sql_category == "search_path": - return ExecuteAnswer( - data=ResultSet( - columns=[Column(name="search_path", table_name="search_path", type="str")], - values=[['"$user", public']], - ) - ) - elif "show status like 'ssl_version'" in sql_lower: - return ExecuteAnswer( - data=ResultSet( - columns=[ - Column(name="Value", table_name="session_variables", type="str"), - Column(name="Value", table_name="session_variables", type="str"), - ], - values=[["Ssl_version", "TLSv1.1"]], - ) - ) - elif sql_category in ("function status", "procedure status"): - # SHOW FUNCTION STATUS WHERE Db = 'MINDSDB'; - # SHOW PROCEDURE STATUS WHERE Db = 'MINDSDB' - # SHOW FUNCTION STATUS WHERE Db = 'MINDSDB' AND Name LIKE '%'; - return self.answer_function_status() - elif sql_category in ("index", "keys", "indexes"): - # INDEX | INDEXES | KEYS are synonyms - # https://dev.mysql.com/doc/refman/8.0/en/show-index.html - new_statement = Select( - targets=[ - Identifier("TABLE_NAME", alias=Identifier("Table")), - Identifier("NON_UNIQUE", alias=Identifier("Non_unique")), - Identifier("INDEX_NAME", alias=Identifier("Key_name")), - Identifier("SEQ_IN_INDEX", alias=Identifier("Seq_in_index")), - Identifier("COLUMN_NAME", alias=Identifier("Column_name")), - Identifier("COLLATION", alias=Identifier("Collation")), - Identifier("CARDINALITY", alias=Identifier("Cardinality")), - Identifier("SUB_PART", alias=Identifier("Sub_part")), - Identifier("PACKED", alias=Identifier("Packed")), - Identifier("NULLABLE", alias=Identifier("Null")), - Identifier("INDEX_TYPE", alias=Identifier("Index_type")), - Identifier("COMMENT", alias=Identifier("Comment")), - Identifier("INDEX_COMMENT", alias=Identifier("Index_comment")), - Identifier("IS_VISIBLE", alias=Identifier("Visible")), - Identifier("EXPRESSION", alias=Identifier("Expression")), - ], - from_table=Identifier(parts=["information_schema", "STATISTICS"]), - where=statement.where, - ) - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - # FIXME if have answer on that request, then DataGrip show warning '[S0022] Column 'Non_unique' not found.' - elif "show create table" in sql_lower: - # SHOW CREATE TABLE `MINDSDB`.`predictors` - table = sql[sql.rfind(".") + 1 :].strip(" .;\n\t").replace("`", "") - return self.answer_show_create_table(table) - elif sql_category in ("character set", "charset"): - new_statement = Select( - targets=[ - Identifier("CHARACTER_SET_NAME", alias=Identifier("Charset")), - Identifier("DEFAULT_COLLATE_NAME", alias=Identifier("Description")), - Identifier("DESCRIPTION", alias=Identifier("Default collation")), - Identifier("MAXLEN", alias=Identifier("Maxlen")), - ], - from_table=Identifier(parts=["INFORMATION_SCHEMA", "CHARACTER_SETS"]), - where=_get_show_where(statement, like_name="CHARACTER_SET_NAME"), - ) - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category == "warnings": - return self.answer_show_warnings() - elif sql_category == "engines": - new_statement = Select( - targets=[Star()], - from_table=Identifier(parts=["information_schema", "ENGINES"]), - ) - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category == "collation": - new_statement = Select( - targets=[ - Identifier("COLLATION_NAME", alias=Identifier("Collation")), - Identifier("CHARACTER_SET_NAME", alias=Identifier("Charset")), - Identifier("ID", alias=Identifier("Id")), - Identifier("IS_DEFAULT", alias=Identifier("Default")), - Identifier("IS_COMPILED", alias=Identifier("Compiled")), - Identifier("SORTLEN", alias=Identifier("Sortlen")), - Identifier("PAD_ATTRIBUTE", alias=Identifier("Pad_attribute")), - ], - from_table=Identifier(parts=["INFORMATION_SCHEMA", "COLLATIONS"]), - where=_get_show_where(statement, like_name="Collation"), - ) - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - elif sql_category == "table status": - # TODO improve it - # SHOW TABLE STATUS LIKE 'table' - table_name = None - if statement.like is not None: - table_name = statement.like - # elif condition == 'from' and type(expression) == Identifier: - # table_name = expression.parts[-1] - if table_name is None: - err_str = f"Can't determine table name in query: {sql}" - logger.warning(err_str) - raise TableNotExistError(err_str) - return self.answer_show_table_status(table_name) - elif sql_category == "columns": - is_full = statement.modes is not None and "FULL" in statement.modes - return self.answer_show_columns( - statement.from_table, - statement.where, - statement.like, - is_full=is_full, - database_name=database_name, - ) - - elif sql_category in ( - "agents", - "jobs", - "skills", - "chatbots", - "triggers", - "views", - "knowledge_bases", - "knowledge bases", - "predictors", - "models", - ): - if sql_category == "knowledge bases": - sql_category = "knowledge_bases" - - if sql_category == "predictors": - sql_category = "models" - - db_name = database_name - if statement.from_table is not None: - db_name = statement.from_table.parts[-1] - - where = BinaryOperation(op="=", args=[Identifier("project"), Constant(db_name)]) - - select_statement = Select( - targets=[Star()], - from_table=Identifier(parts=["information_schema", sql_category]), - where=_get_show_where(statement, like_name="name", initial=where), - ) - query = SQLQuery(select_statement, session=self.session) - return self.answer_select(query) - - elif sql_category == "projects": - where = BinaryOperation(op="=", args=[Identifier("type"), Constant("project")]) - select_statement = Select( - targets=[Identifier(parts=["NAME"], alias=Identifier("project"))], - from_table=Identifier(parts=["information_schema", "DATABASES"]), - where=_get_show_where(statement, like_name="project", from_name="project", initial=where), - ) - - query = SQLQuery(select_statement, session=self.session) - return self.answer_select(query) - else: - raise NotSupportedYet(f"Statement not implemented: {sql}") - elif statement_type in ( - StartTransaction, - CommitTransaction, - RollbackTransaction, - ): - return ExecuteAnswer() - elif statement_type is Set: - category = (statement.category or "").lower() - if category == "": - if isinstance(statement.name, Identifier): - param = statement.name.parts[0].lower() - - value = None - if isinstance(statement.value, Constant): - value = statement.value.value - - if param == "profiling": - self.session.profiling = value in (1, True) - if self.session.profiling is True: - profiler.enable() - else: - profiler.disable() - elif param == "predictor_cache": - self.session.predictor_cache = value in (1, True) - elif param == "context": - if value in (0, False, None): - # drop context - query_context_controller.drop_query_context(None) - elif param == "show_secrets": - self.session.show_secrets = value in (1, True) - elif isinstance(statement.name, Variable): - variables_controller.set_variable(statement.name.value, statement.value) - return ExecuteAnswer() - elif category == "autocommit": - return ExecuteAnswer() - elif category == "names": - # set names utf8; - charsets = { - "utf8": CHARSET_NUMBERS["utf8_general_ci"], - "utf8mb4": CHARSET_NUMBERS["utf8mb4_general_ci"], - } - self.charset = statement.value.value - self.charset_text_type = charsets.get(self.charset) - if self.charset_text_type is None: - logger.warning( - f"Unknown charset: {self.charset}. Setting up 'utf8_general_ci' as charset text type." - ) - self.charset_text_type = CHARSET_NUMBERS["utf8_general_ci"] - return ExecuteAnswer( - state_track=[ - ["character_set_client", self.charset], - ["character_set_connection", self.charset], - ["character_set_results", self.charset], - ], - ) - elif category == "active": - return self.answer_update_model_version(statement.value, database_name) - - else: - logger.warning(f"SQL statement is not processable, return OK package: {sql}") - return ExecuteAnswer() - elif statement_type is Use: - db_name = statement.value.parts[-1] - self.change_default_db(db_name) - return ExecuteAnswer() - elif statement_type in ( - CreatePredictor, - CreateAnomalyDetectionModel, # we may want to specialize these in the future - ): - return self.answer_create_predictor(statement, database_name) - elif statement_type is CreateView: - return self.answer_create_or_alter_view(statement, database_name) - elif statement_type is AlterView: - return self.answer_create_or_alter_view(statement, database_name) - elif statement_type is DropView: - return self.answer_drop_view(statement, database_name) - elif statement_type is Delete: - query = SQLQuery(statement, session=self.session, database=database_name) - return ExecuteAnswer(affected_rows=query.fetched_data.affected_rows) - elif statement_type is Insert: - query = SQLQuery(statement, session=self.session, database=database_name) - if query.fetched_data.length() > 0: - return self.answer_select(query) - return ExecuteAnswer(affected_rows=query.fetched_data.affected_rows) - elif statement_type is Update: - query = SQLQuery(statement, session=self.session, database=database_name) - return ExecuteAnswer(affected_rows=query.fetched_data.affected_rows) - elif statement_type is Alter and ("disable keys" in sql_lower) or ("enable keys" in sql_lower): - return ExecuteAnswer() - elif statement_type is Select: - ret = self.exec_service_function(statement, database_name) - if ret is not None: - return ret - query = SQLQuery(statement, session=self.session, database=database_name) - return self.answer_select(query) - elif statement_type is Explain: - return self.answer_show_columns(statement.target, database_name=database_name) - elif statement_type is CreateTable: - return self.answer_create_table(statement, database_name) - # -- jobs -- - elif statement_type is CreateJob: - return self.answer_create_job(statement, database_name) - elif statement_type is DropJob: - return self.answer_drop_job(statement, database_name) - # -- triggers -- - elif statement_type is CreateTrigger: - return self.answer_create_trigger(statement, database_name) - elif statement_type is DropTrigger: - return self.answer_drop_trigger(statement, database_name) - # -- chatbots - elif statement_type is CreateChatBot: - return self.answer_create_chatbot(statement, database_name) - elif statement_type is UpdateChatBot: - return self.answer_update_chatbot(statement, database_name) - elif statement_type is DropChatBot: - return self.answer_drop_chatbot(statement, database_name) - elif statement_type is CreateKnowledgeBase: - return self.answer_create_kb(statement, database_name) - elif statement_type is AlterKnowledgeBase: - return self.answer_alter_kb(statement, database_name) - elif statement_type is DropKnowledgeBase: - return self.answer_drop_kb(statement, database_name) - elif statement_type is CreateAgent: - return self.answer_create_agent(statement, database_name) - elif statement_type is DropAgent: - return self.answer_drop_agent(statement, database_name) - elif statement_type is UpdateAgent: - return self.answer_update_agent(statement, database_name) - elif statement_type is Evaluate: - statement.data = parse_sql(statement.query_str) - return self.answer_evaluate_metric(statement, database_name) - elif statement_type is CreateKnowledgeBaseIndex: - return self.answer_create_kb_index(statement, database_name) - elif statement_type is EvaluateKnowledgeBase: - return self.answer_evaluate_kb(statement, database_name) - elif statement_type in (Union, Intersect, Except): - query = SQLQuery(statement, session=self.session, database=database_name) - return self.answer_select(query) - else: - logger.warning(f"Unknown SQL statement: {sql}") - raise NotSupportedYet(f"Unknown SQL statement: {sql}") - - def exec_service_function(self, statement: Select, database_name: str) -> Optional[ExecuteAnswer]: - """ - If input query is a single line select without FROM - and has function in targets that matches with one of the mindsdb service functions: - - execute this function and return response - Otherwise, return None to allow to continue execution query outside - """ - - if statement.from_table is not None or len(statement.targets) != 1: - return - - target = statement.targets[0] - if not isinstance(target, Function): - return - - command = target.op.lower() - args = [arg.value for arg in target.args if isinstance(arg, Constant)] - if command == "query_resume": - ret = SQLQuery(None, session=self.session, query_id=args[0]) - return self.answer_select(ret) - - elif command == "query_cancel": - query_context_controller.cancel_query(*args) - return ExecuteAnswer() - - def answer_create_trigger(self, statement, database_name): - triggers_controller = TriggersController() - project_name, trigger_name = match_two_part_name(statement.name, default_db_name=database_name) - - triggers_controller.add( - trigger_name, - project_name, - statement.table, - statement.query_str, - statement.columns, - ) - return ExecuteAnswer() - - def answer_drop_trigger(self, statement, database_name): - triggers_controller = TriggersController() - - project_name, trigger_name = match_two_part_name(statement.name, default_db_name=database_name) - - triggers_controller.delete(trigger_name, project_name) - - return ExecuteAnswer() - - def answer_create_job(self, statement: CreateJob, database_name): - jobs_controller = JobsController() - project_name, job_name = match_two_part_name(statement.name, default_db_name=database_name) - - try: - jobs_controller.create(job_name, project_name, statement) - except EntityExistsError: - if getattr(statement, "if_not_exists", False) is False: - raise - - return ExecuteAnswer() - - def answer_drop_job(self, statement, database_name): - jobs_controller = JobsController() - project_name, job_name = match_two_part_name(statement.name, default_db_name=database_name) - - try: - jobs_controller.delete(job_name, project_name) - except EntityNotExistsError: - if statement.if_exists is False: - raise - - return ExecuteAnswer() - - def answer_create_chatbot(self, statement, database_name): - chatbot_controller = ChatBotController() - project_name, name = match_two_part_name(statement.name, default_db_name=database_name) - - is_running = statement.params.pop("is_running", True) - - database = self.session.integration_controller.get(statement.database.parts[-1]) - if database is None: - raise ExecutorException(f"Database not found: {statement.database}") - - # Database ID cannot be null - database_id = database["id"] if database is not None else -1 - - model_name = None - if statement.model is not None: - model_name = statement.model.parts[-1] - - agent_name = None - if statement.agent is not None: - agent_name = statement.agent.parts[-1] - chatbot_controller.add_chatbot( - name, - project_name=project_name, - model_name=model_name, - agent_name=agent_name, - database_id=database_id, - is_running=is_running, - params=variables_controller.fill_parameters(statement.params), - ) - return ExecuteAnswer() - - def answer_update_chatbot(self, statement, database_name): - chatbot_controller = ChatBotController() - - project_name, name = match_two_part_name(statement.name, default_db_name=database_name) - - # From SET keyword parameters - updated_name = statement.params.pop("name", None) - model_name = statement.params.pop("model", None) - agent_name = statement.params.pop("agent", None) - database_name = statement.params.pop("database", None) - is_running = statement.params.pop("is_running", None) - - database_id = None - if database_name is not None: - database = self.session.integration_controller.get(database_name) - if database is None: - raise ExecutorException(f"Database with name {database_name} not found") - database_id = database["id"] - - updated_chatbot = chatbot_controller.update_chatbot( - name, - project_name=project_name, - name=updated_name, - model_name=model_name, - agent_name=agent_name, - database_id=database_id, - is_running=is_running, - params=variables_controller.fill_parameters(statement.params), - ) - if updated_chatbot is None: - raise ExecutorException(f"Chatbot with name {name} not found") - return ExecuteAnswer() - - def answer_drop_chatbot(self, statement, database_name): - chatbot_controller = ChatBotController() - - project_name, name = match_two_part_name(statement.name, default_db_name=database_name) - - chatbot_controller.delete_chatbot(name, project_name=project_name) - return ExecuteAnswer() - - def answer_evaluate_metric(self, statement, database_name): - # heavy import, so we do it here on-demand - try: - from mindsdb_evaluator.accuracy.general import evaluate_accuracy - except ImportError: - logger.error("mindsdb-evaluator is not installed. Please install it with `pip install mindsdb-evaluator]`.") - - try: - sqlquery = SQLQuery(statement.data, session=self.session, database=database_name) - except Exception as e: - raise Exception(f'Nested query failed to execute with error: "{e}", please check and try again.') from e - df = sqlquery.fetched_data.to_df() - df.columns = [str(t.alias) if hasattr(t, "alias") else str(t.parts[-1]) for t in statement.data.targets] - - for col in ["actual", "prediction"]: - assert col in df.columns, f"`{col}` column was not provided, please try again." - assert df[col].isna().sum() == 0, f"There are missing values in the `{col}` column, please try again." - - metric_name = statement.name.parts[-1] - target_series = df.pop("prediction") - using_clause = statement.using if statement.using is not None else {} - metric_value = evaluate_accuracy( - df, - target_series, - metric_name, - target="actual", - ts_analysis=using_clause.get("ts_analysis", {}), # will be deprecated soon - n_decimals=using_clause.get("n_decimals", 3), - ) # 3 decimals by default - return ExecuteAnswer( - data=ResultSet( - columns=[Column(name=metric_name, table_name="", type="str")], - values=[[metric_value]], - ) - ) - - def answer_describe_object(self, obj_type: str, obj_name: Identifier, database_name: str): - project_objects = ( - "AGENTS", - "JOBS", - "SKILLS", - "CHATBOTS", - "TRIGGERS", - "VIEWS", - "KNOWLEDGE_BASES", - "PREDICTORS", - "MODELS", - ) - - global_objects = ("DATABASES", "PROJECTS", "HANDLERS", "ML_ENGINES") - - all_objects = project_objects + global_objects - - # is not plural? - if obj_type not in all_objects: - if obj_type + "S" in all_objects: - obj_type = obj_type + "S" - elif obj_type + "ES" in all_objects: - obj_type = obj_type + "ES" - else: - raise WrongArgumentError(f"Unknown describe type: {obj_type}") - - parts = obj_name.parts - if len(parts) > 2: - raise WrongArgumentError( - f"Invalid object name: {obj_name.to_string()}.\nOnly models support three-part namespaces." - ) - - name = parts[-1] - where = BinaryOperation(op="=", args=[Identifier("name"), Constant(name)]) - - if obj_type in project_objects: - database_name = parts[0] if len(parts) > 1 else database_name - where = BinaryOperation( - op="and", args=[where, BinaryOperation(op="=", args=[Identifier("project"), Constant(database_name)])] - ) - - select_statement = Select( - targets=[Star()], - from_table=Identifier(parts=["information_schema", obj_type]), - where=where, - ) - query = SQLQuery(select_statement, session=self.session) - return self.answer_select(query) - - def answer_describe_predictor(self, obj_name, database_name): - value = obj_name.parts.copy() - # project.model.version.?attrs - parts = value[:3] - attrs = value[3:] - model_info = self._get_model_info(Identifier(parts=parts), except_absent=False, database_name=database_name) - if model_info is None: - # project.model.?attrs - parts = value[:2] - attrs = value[2:] - model_info = self._get_model_info(Identifier(parts=parts), except_absent=False, database_name=database_name) - if model_info is None: - # model.?attrs - parts = value[:1] - attrs = value[1:] - model_info = self._get_model_info( - Identifier(parts=parts), except_absent=False, database_name=database_name - ) - - if model_info is None: - raise ExecutorException(f"Model not found: {obj_name}") - - if len(attrs) == 1: - attrs = attrs[0] - elif len(attrs) == 0: - attrs = None - - df = self.session.model_controller.describe_model( - self.session, - model_info["project_name"], - model_info["model_record"].name, - attribute=attrs, - version=model_info["model_record"].version, - ) - - return ExecuteAnswer(data=ResultSet.from_df(df, table_name="")) - - def answer_create_kb_index(self, statement, database_name): - project_name, table_name = match_two_part_name(statement.name, default_db_name=database_name) - self.session.kb_controller.create_index( - table_name=table_name, project_name=project_name, params=statement.params - ) - return ExecuteAnswer() - - def answer_evaluate_kb(self, statement: EvaluateKnowledgeBase, database_name): - project_name, table_name = match_two_part_name(statement.name, default_db_name=database_name) - scores = self.session.kb_controller.evaluate( - table_name=table_name, project_name=project_name, params=statement.params - ) - return ExecuteAnswer(data=ResultSet.from_df(scores)) - - def _get_model_info(self, identifier, except_absent=True, database_name=None): - if len(identifier.parts) == 1: - identifier.parts = [database_name, identifier.parts[0]] - identifier.is_quoted = [False] + identifier.is_quoted - - database_name, model_name, model_version = resolve_model_identifier(identifier) - # at least two part in identifier - identifier.parts[0] = database_name - identifier.parts[1] = model_name - - if database_name is None: - database_name = database_name - - if model_name is None: - if except_absent: - raise Exception(f"Model not found: {identifier.to_string()}") - else: - return - - model_record = get_model_record( - name=model_name, - project_name=database_name, - except_absent=except_absent, - version=model_version, - active=True if model_version is None else None, - ) - if not model_record: - return None - return {"model_record": model_record, "project_name": database_name} - - def _sync_predictor_check(self, phase_name): - """Checks if there is already a predictor retraining or fine-tuning - Do not allow to run retrain if there is another model in training process in less that 1h - """ - if ctx.company_id is None: - # bypass for tests - return - if ctx.user_id is None: - # bypass for tests - return - - is_cloud = self.session.config.get("cloud", False) - if is_cloud and ctx.user_class == 0: - models = get_model_records(active=None) - shortest_training = None - for model in models: - if ( - model.status in (PREDICTOR_STATUS.GENERATING, PREDICTOR_STATUS.TRAINING) - and model.training_start_at is not None - and model.training_stop_at is None - ): - training_time = datetime.datetime.now() - model.training_start_at - if shortest_training is None or training_time < shortest_training: - shortest_training = training_time - - if shortest_training is not None and shortest_training < datetime.timedelta(hours=1): - raise ExecutorException( - f"Can't start {phase_name} process while any other predictor is in status 'training' or 'generating'" - ) - - def answer_retrain_predictor(self, statement, database_name): - model_record = self._get_model_info(statement.name, database_name=database_name)["model_record"] - - if statement.query_str is None: - if model_record.data_integration_ref is not None: - if model_record.data_integration_ref["type"] == "integration": - integration = self.session.integration_controller.get_by_id(model_record.data_integration_ref["id"]) - if integration is None: - raise EntityNotExistsError("The database from which the model was trained no longer exists") - elif statement.integration_name is None: - # set to current project - statement.integration_name = Identifier(database_name) - - ml_handler = None - if statement.using is not None: - # repack using with lower names - statement.using = {k.lower(): v for k, v in statement.using.items()} - - if "engine" in statement.using: - ml_integration_name = statement.using.pop("engine") - ml_handler = self.session.integration_controller.get_ml_handler(ml_integration_name) - - # use current ml handler - if ml_handler is None: - integration_record = get_predictor_integration(model_record) - if integration_record is None: - raise EntityNotExistsError("ML engine model was trained with does not esxists") - ml_handler = self.session.integration_controller.get_ml_handler(integration_record.name) - - self._sync_predictor_check(phase_name="retrain") - df = self.session.model_controller.retrain_model(statement, ml_handler) - - return ExecuteAnswer(data=ResultSet.from_df(df)) - - @profiler.profile() - @mark_process("learn") - def answer_finetune_predictor(self, statement, database_name): - model_record = self._get_model_info(statement.name, database_name=database_name)["model_record"] - - if statement.using is not None: - # repack using with lower names - statement.using = {k.lower(): v for k, v in statement.using.items()} - - if statement.query_str is not None and statement.integration_name is None: - # set to current project - statement.integration_name = Identifier(database_name) - - # use current ml handler - integration_record = get_predictor_integration(model_record) - if integration_record is None: - raise Exception("The ML engine that the model was trained with does not exist.") - ml_handler = self.session.integration_controller.get_ml_handler(integration_record.name) - - self._sync_predictor_check(phase_name="finetune") - df = self.session.model_controller.finetune_model(statement, ml_handler) - - return ExecuteAnswer(data=ResultSet.from_df(df)) - - def _create_integration(self, name: str, engine: str, connection_args: dict): - # we have connection checkers not for any db. So do nothing if fail - # TODO return rich error message - - if connection_args is None: - connection_args = {} - status = HandlerStatusResponse(success=False) - - storage = None - try: - handler_meta = self.session.integration_controller.get_handler_meta(engine) - if handler_meta is None: - raise ExecutorException(f"There is no engine '{engine}'") - - if handler_meta.get("import", {}).get("success") is not True: - raise ExecutorException( - f"The '{engine}' handler isn't installed.\n" + get_handler_install_message(engine) - ) - - accept_connection_args = handler_meta.get("connection_args") - if accept_connection_args is not None and connection_args is not None: - for arg_name, arg_value in connection_args.items(): - if arg_name not in accept_connection_args: - continue - arg_meta = accept_connection_args[arg_name] - arg_type = arg_meta.get("type") - if arg_type == HANDLER_CONNECTION_ARG_TYPE.PATH: - # arg may be one of: - # str: '/home/file.pem' - # dict: {'path': '/home/file.pem'} - # dict: {'url': 'https://host.com/file'} - arg_value = connection_args[arg_name] - if isinstance(arg_value, (str, dict)) is False: - raise ExecutorException(f"Unknown type of arg: '{arg_value}'") - if isinstance(arg_value, str) or "path" in arg_value: - path = arg_value if isinstance(arg_value, str) else arg_value["path"] - if Path(path).is_file() is False: - raise ExecutorException(f"File not found at: '{path}'") - elif "url" in arg_value: - path = download_file(arg_value["url"]) - else: - raise ExecutorException(f"Argument '{arg_name}' must be path or url to the file") - connection_args[arg_name] = path - - handler = self.session.integration_controller.create_tmp_handler( - name=name, engine=engine, connection_args=connection_args - ) - status = handler.check_connection() - if status.copy_storage: - storage = handler.handler_storage.export_files() - except Exception as e: - status.error_message = str(e) - - if status.success is False: - raise ExecutorException(f"Can't connect to db: {status.error_message}") - - integration = self.session.integration_controller.get(name) - if integration is not None: - raise EntityExistsError("Database already exists", name) - try: - integration = ProjectController().get(name=name) - except EntityNotExistsError: - pass - if integration is not None: - raise EntityExistsError("Project exists with this name", name) - - self.session.integration_controller.add(name, engine, connection_args) - if storage: - handler = self.session.integration_controller.get_data_handler(name, connect=False) - handler.handler_storage.import_files(storage) - - def answer_create_ml_engine(self, statement: CreateMLEngine) -> ExecuteAnswer: - """Handles the `CREATE ML_ENGINE` command, which creates a new ML integration (engine) in the system. - - Args: - statement (CreateMLEngine): The AST object representing the CREATE ML_ENGINE command. - - Returns: - ExecuteAnswer: The result of the ML engine creation operation. - - Raises: - ValueError: If the ml_engine name format is invalid. - """ - name = match_one_part_name(statement.name) - - handler = statement.handler - params = statement.params - if_not_exists = getattr(statement, "if_not_exists", False) - - integrations = self.session.integration_controller.get_all() - if name in integrations: - if not if_not_exists: - raise EntityExistsError("Integration already exists", name) - else: - return ExecuteAnswer() - - handler_module_meta = self.session.integration_controller.get_handler_meta(handler) - - if handler_module_meta is None: - raise ExecutorException(f"There is no engine '{handler}'") - - params_out = {} - if params: - for key, value in variables_controller.fill_parameters(params).items(): - # convert ast types to string - if isinstance(value, (Constant, Identifier)): - value = value.to_string() - params_out[key] = value - - try: - self.session.integration_controller.add(name=name, engine=handler, connection_args=params_out) - except Exception as e: - msg = str(e) - if type(e) in (ImportError, ModuleNotFoundError): - msg = dedent( - f"""\ - The '{handler_module_meta["name"]}' handler cannot be used. Reason is: - {handler_module_meta["import"]["error_message"] or msg} - """ - ) - is_cloud = self.session.config.get("cloud", False) - if ( - is_cloud is False - # NOTE: BYOM may raise these errors if there is an error in the user's code, - # therefore error_message will be None - and handler_module_meta["name"] != "byom" - and "No module named" in handler_module_meta["import"]["error_message"] - ): - logger.info(get_handler_install_message(handler_module_meta["name"])) - ast_drop = DropMLEngine(name=Identifier(name)) - self.answer_drop_ml_engine(ast_drop) - logger.info(msg) - raise ExecutorException(msg) from e - - return ExecuteAnswer() - - def answer_drop_ml_engine(self, statement: DropMLEngine) -> ExecuteAnswer: - """Handles the `DROP ML_ENGINE` command, which removes an ML integration (engine) from the system. - - Args: - statement (DropMLEngine): The AST object representing the DROP ML_ENGINE command. - - Raises: - EntityNotExistsError: If the integration does not exist and IF EXISTS is not specified. - ValueError: If the integration name is provided in an invalid format. - - Returns: - ExecuteAnswer: The result of the ML engine deletion operation. - """ - name = match_one_part_name(statement.name) - - integrations = self.session.integration_controller.get_all() - if name not in integrations: - if not statement.if_exists: - raise EntityNotExistsError("Integration does not exists", name) - else: - return ExecuteAnswer() - self.session.integration_controller.delete(name) - return ExecuteAnswer() - - def answer_create_database(self, statement: CreateDatabase) -> ExecuteAnswer: - """Create new integration or project - - Args: - statement (CreateDatabase): data for creating database/project - - Returns: - ExecuteAnswer: 'ok' answer - """ - database_name = match_one_part_name(statement.name) - - engine = (statement.engine or "mindsdb").lower() - - connection_args = variables_controller.fill_parameters(statement.parameters) - - try: - if engine == "mindsdb": - ProjectController().add(database_name) - else: - self._create_integration(database_name, engine, connection_args) - except EntityExistsError: - if statement.if_not_exists is False: - raise - - return ExecuteAnswer() - - def answer_drop_database(self, statement: DropDatabase | DropDatasource) -> ExecuteAnswer: - """Drop a database (project or integration) by name. - - Args: - statement (DropDatabase | DropDatasource): The parsed DROP DATABASE or DROP DATASOURCE statement. - - Raises: - Exception: If the database name format is invalid. - EntityNotExistsError: If the database does not exist and 'IF EXISTS' is not specified in the statement. - - Returns: - ExecuteAnswer: The result of the drop database operation. - """ - db_name = match_one_part_name(statement.name) - - try: - self.session.database_controller.delete(db_name, strict_case=statement.name.is_quoted[0]) - except EntityNotExistsError: - if statement.if_exists is not True: - raise - return ExecuteAnswer() - - def answer_alter_database(self, statement: AlterDatabase) -> ExecuteAnswer: - db_name = match_one_part_name(statement.name) - self.session.database_controller.update( - db_name, data=statement.params, strict_case=statement.name.is_quoted[0], check_connection=True - ) - return ExecuteAnswer() - - def answer_drop_tables(self, statement, database_name): - """answer on 'drop table [if exists] {name}' - Args: - statement: ast - """ - - for table in statement.tables: - if len(table.parts) > 1: - db_name = table.parts[0] - table = Identifier(parts=table.parts[1:]) - else: - db_name = database_name - - dn = self.session.datahub[db_name] - if dn is None: - raise ExecutorException(f"Cannot delete a table from database '{db_name}': the database does not exist") - - if db_name is not None: - dn.drop_table(table, if_exists=statement.if_exists) - elif db_name in self.session.database_controller.get_dict(filter_type="project"): - # TODO do we need feature: delete object from project via drop table? - - project = self.session.database_controller.get_project(db_name) - project_tables = {key: val for key, val in project.get_tables().items() if val.get("deletable") is True} - table_name = table.to_string() - - if table_name in project_tables: - self.session.model_controller.delete_model(table_name, project_name=db_name) - elif statement.if_exists is False: - raise ExecutorException(f"Cannot delete a table from database '{db_name}': table does not exists") - else: - raise ExecutorException(f"Cannot delete a table from database '{db_name}'") - - return ExecuteAnswer() - - def answer_create_or_alter_view(self, statement: CreateView | AlterView, database_name: str) -> ExecuteAnswer: - """Process CREATE and ALTER VIEW commands - - Args: - statement (CreateView | AlterView): data for creating or altering view - database_name (str): name of the current database - - Returns: - ExecuteAnswer: answer for the command - """ - project_name, view_name = match_two_part_name(statement.name, default_db_name=database_name) - - query_str = statement.query_str - - if isinstance(statement.from_table, Identifier): - query = Select( - targets=[Star()], - from_table=NativeQuery(integration=statement.from_table, query=statement.query_str), - ) - query_str = query.to_string() - - project = self.session.database_controller.get_project(project_name) - - if isinstance(statement, CreateView): - try: - project.create_view(view_name, query=query_str, session=self.session) - except EntityExistsError: - if getattr(statement, "if_not_exists", False) is False: - raise - elif isinstance(statement, AlterView): - try: - project.update_view(view_name, query=query_str, strict_case=(not view_name.islower())) - except EntityNotExistsError: - raise ExecutorException(f"View {view_name} does not exist in {project_name}") - else: - raise ValueError(f"Unknown view DDL statement: {statement}") - - return ExecuteAnswer() - - def answer_drop_view(self, statement: DropView, database_name: str) -> ExecuteAnswer: - """Drop one or more views from the specified database/project. - - Args: - statement (DropView): The parsed DROP VIEW statement containing view names and options. - database_name (str): The name of the database (project) from which to drop the views. - - Raises: - EntityNotExistsError: If a view does not exist and 'IF EXISTS' is not specified in the statement. - ValueError: If the view name format is invalid. - - Returns: - ExecuteAnswer: The result of the drop view operation. - """ - for name in statement.names: - match name.parts, name.is_quoted: - case [view_name], [view_name_quoted]: - db_name_quoted = False - case [database_name, view_name], [db_name_quoted, view_name_quoted]: - pass - case _: - raise ValueError(f"Invalid view name: {name}") - - if not db_name_quoted: - database_name = database_name.lower() - if not view_name_quoted: - view_name = view_name.lower() - - project = self.session.database_controller.get_project(database_name, db_name_quoted) - - try: - project.drop_view(view_name, strict_case=True) - except EntityNotExistsError: - if statement.if_exists is not True: - raise - - return ExecuteAnswer() - - def answer_create_kb(self, statement: CreateKnowledgeBase, database_name: str): - if statement.model: - raise ExecutorException( - "Creating a knowledge base using pre-existing models is no longer supported.\n" - "Please pass the model parameters as a JSON object in the embedding_model field." - ) - - project_name, kb_name = match_two_part_name(statement.name, default_db_name=database_name) - - if statement.storage is not None: - if len(statement.storage.parts) != 2: - raise ExecutorException( - f"Invalid vectordatabase table name: {statement.storage}Need the form 'database_name.table_name'" - ) - - if statement.from_query is not None: - # TODO: implement this - raise ExecutorException("Create a knowledge base from a select is not supported yet") - - # create the knowledge base - _ = self.session.kb_controller.add( - name=kb_name, - project_name=project_name, - # embedding_model=statement.model, - storage=statement.storage, - params=variables_controller.fill_parameters(statement.params), - if_not_exists=statement.if_not_exists, - ) - - return ExecuteAnswer() - - def answer_alter_kb(self, statement: AlterKnowledgeBase, database_name: str): - project_name, kb_name = match_two_part_name( - statement.name, ensure_lower_case=True, default_db_name=database_name - ) - - # update the knowledge base - self.session.kb_controller.update( - name=kb_name, - project_name=project_name, - params=variables_controller.fill_parameters(statement.params), - ) - - return ExecuteAnswer() - - def answer_drop_kb(self, statement: DropKnowledgeBase, database_name: str) -> ExecuteAnswer: - project_name, kb_name = match_two_part_name(statement.name, default_db_name=database_name) - - # delete the knowledge base - self.session.kb_controller.delete( - name=kb_name, - project_name=project_name, - if_exists=statement.if_exists, - ) - - return ExecuteAnswer() - - def answer_create_agent(self, statement, database_name): - project_name, name = match_two_part_name(statement.name, default_db_name=database_name) - - try: - _ = self.session.agents_controller.add_agent( - name=name, - project_name=project_name, - model=statement.model, - params=variables_controller.fill_parameters(statement.params), - ) - except EntityExistsError as e: - if statement.if_not_exists is not True: - raise ExecutorException(str(e)) - except ValueError as e: - # Project does not exist or agent already exists. - raise ExecutorException(str(e)) - - return ExecuteAnswer() - - def answer_drop_agent(self, statement: DropAgent, database_name: str): - project_name, name = match_two_part_name(statement.name, default_db_name=database_name) - - try: - self.session.agents_controller.delete_agent(name, project_name) - except ValueError as e: - # Project does not exist or agent does not exist. - raise ExecutorException(str(e)) - - return ExecuteAnswer() - - def answer_update_agent(self, statement: UpdateAgent, database_name: str): - project_name, name = match_two_part_name(statement.name, default_db_name=database_name) - - model = statement.params.pop("model", None) - try: - _ = self.session.agents_controller.update_agent( - name, - project_name=project_name, - model=model, - params=variables_controller.fill_parameters(statement.params), - ) - except (EntityExistsError, EntityNotExistsError, ValueError) as e: - # Project does not exist or agent does not exist. - raise ExecutorException(str(e)) - - return ExecuteAnswer() - - @mark_process("learn") - def answer_create_predictor(self, statement: CreatePredictor, database_name: str): - integration_name, model_name = match_two_part_name(statement.name, default_db_name=database_name) - - statement.name.parts = [integration_name, model_name] - statement.name.is_quoted = [False, False] - - ml_integration_name = self.session.config["default_ml_engine"] - if statement.using is not None: - # repack using with lower names - statement.using = {k.lower(): v for k, v in statement.using.items()} - - ml_integration_name = statement.using.pop("engine", ml_integration_name) - - if ml_integration_name is None: - raise ValueError("ML engine must be specified when creating a model") - - if statement.query_str is not None and statement.integration_name is None: - # set to current project - statement.integration_name = Identifier(database_name) - - try: - ml_handler = self.session.integration_controller.get_ml_handler(ml_integration_name) - except EntityNotExistsError: - # not exist, try to create it with same name as handler - self.answer_create_ml_engine( - CreateMLEngine(name=Identifier(ml_integration_name), handler=ml_integration_name) - ) - - ml_handler = self.session.integration_controller.get_ml_handler(ml_integration_name) - - if getattr(statement, "is_replace", False) is True: - # try to delete - try: - self.session.model_controller.delete_model(model_name, project_name=integration_name) - except EntityNotExistsError: - pass - - try: - df = self.session.model_controller.create_model(statement, ml_handler) - return ExecuteAnswer(data=ResultSet.from_df(df)) - except EntityExistsError: - if getattr(statement, "if_not_exists", False) is True: - return ExecuteAnswer() - raise - - def answer_show_columns( - self, - target: Identifier, - where: Optional[Operation] = None, - like: Optional[str] = None, - is_full=False, - database_name=None, - ): - if isinstance(target, Identifier) is False: - raise TableNotExistError("The table name is required for the query.") - - if len(target.parts) > 1: - db = target.parts[0] - elif isinstance(database_name, str) and len(database_name) > 0: - db = database_name - else: - db = self.session.config.get("default_project") - table_name = target.parts[-1] - - new_where = BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("TABLE_SCHEMA"), Constant(db)]), - BinaryOperation("=", args=[Identifier("TABLE_NAME"), Constant(table_name)]), - ], - ) - if where is not None: - new_where = BinaryOperation("and", args=[new_where, where]) - if like is not None: - like = BinaryOperation("like", args=[Identifier("View"), Constant(like)]) - new_where = BinaryOperation("and", args=[new_where, like]) - - targets = [ - Identifier("COLUMN_NAME", alias=Identifier("Field")), - Identifier("COLUMN_TYPE", alias=Identifier("Type")), - Identifier("IS_NULLABLE", alias=Identifier("Null")), - Identifier("COLUMN_KEY", alias=Identifier("Key")), - Identifier("COLUMN_DEFAULT", alias=Identifier("Default")), - Identifier("EXTRA", alias=Identifier("Extra")), - ] - if is_full: - targets.extend( - [ - Constant(None, alias=Identifier("Collation")), - Constant("select", alias=Identifier("Privileges")), - Constant(None, alias=Identifier("Comment")), - ] - ) - new_statement = Select( - targets=targets, - from_table=Identifier(parts=["information_schema", "COLUMNS"]), - where=new_where, - ) - - query = SQLQuery(new_statement, session=self.session, database=database_name) - return self.answer_select(query) - - def answer_show_create_table(self, table): - columns = [ - Column(table_name="", name="Table", type=TYPES.MYSQL_TYPE_VAR_STRING), - Column(table_name="", name="Create Table", type=TYPES.MYSQL_TYPE_VAR_STRING), - ] - return ExecuteAnswer( - data=ResultSet( - columns=columns, - values=[[table, f"create table {table} ()"]], - ) - ) - - def answer_function_status(self): - columns = [ - Column( - name="Db", - alias="Db", - table_name="schemata", - table_alias="ROUTINES", - type="str", - database="mysql", - charset=self.charset_text_type, - ), - Column( - name="Db", - alias="Db", - table_name="routines", - table_alias="ROUTINES", - type="str", - database="mysql", - charset=self.charset_text_type, - ), - Column( - name="Type", - alias="Type", - table_name="routines", - table_alias="ROUTINES", - type="str", - database="mysql", - charset=CHARSET_NUMBERS["utf8_bin"], - ), - Column( - name="Definer", - alias="Definer", - table_name="routines", - table_alias="ROUTINES", - type="str", - database="mysql", - charset=CHARSET_NUMBERS["utf8_bin"], - ), - Column( - name="Modified", - alias="Modified", - table_name="routines", - table_alias="ROUTINES", - type=TYPES.MYSQL_TYPE_TIMESTAMP, - database="mysql", - charset=CHARSET_NUMBERS["binary"], - ), - Column( - name="Created", - alias="Created", - table_name="routines", - table_alias="ROUTINES", - type=TYPES.MYSQL_TYPE_TIMESTAMP, - database="mysql", - charset=CHARSET_NUMBERS["binary"], - ), - Column( - name="Security_type", - alias="Security_type", - table_name="routines", - table_alias="ROUTINES", - type=TYPES.MYSQL_TYPE_STRING, - database="mysql", - charset=CHARSET_NUMBERS["utf8_bin"], - ), - Column( - name="Comment", - alias="Comment", - table_name="routines", - table_alias="ROUTINES", - type=TYPES.MYSQL_TYPE_BLOB, - database="mysql", - charset=CHARSET_NUMBERS["utf8_bin"], - ), - Column( - name="character_set_client", - alias="character_set_client", - table_name="character_sets", - table_alias="ROUTINES", - type=TYPES.MYSQL_TYPE_VAR_STRING, - database="mysql", - charset=self.charset_text_type, - ), - Column( - name="collation_connection", - alias="collation_connection", - table_name="collations", - table_alias="ROUTINES", - type=TYPES.MYSQL_TYPE_VAR_STRING, - database="mysql", - charset=self.charset_text_type, - ), - Column( - name="Database Collation", - alias="Database Collation", - table_name="collations", - table_alias="ROUTINES", - type=TYPES.MYSQL_TYPE_VAR_STRING, - database="mysql", - charset=self.charset_text_type, - ), - ] - - return ExecuteAnswer(data=ResultSet(columns=columns)) - - def answer_show_table_status(self, table_name): - # NOTE at this moment parsed statement only like `SHOW TABLE STATUS LIKE 'table'`. - # NOTE some columns has {'database': 'mysql'}, other not. That correct. This is how real DB sends messages. - columns = [ - { - "database": "mysql", - "table_name": "tables", - "name": "Name", - "alias": "Name", - "type": TYPES.MYSQL_TYPE_VAR_STRING, - "charset": self.charset_text_type, - }, - { - "database": "", - "table_name": "tables", - "name": "Engine", - "alias": "Engine", - "type": TYPES.MYSQL_TYPE_VAR_STRING, - "charset": self.charset_text_type, - }, - { - "database": "", - "table_name": "tables", - "name": "Version", - "alias": "Version", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "mysql", - "table_name": "tables", - "name": "Row_format", - "alias": "Row_format", - "type": TYPES.MYSQL_TYPE_VAR_STRING, - "charset": self.charset_text_type, - }, - { - "database": "", - "table_name": "tables", - "name": "Rows", - "alias": "Rows", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Avg_row_length", - "alias": "Avg_row_length", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Data_length", - "alias": "Data_length", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Max_data_length", - "alias": "Max_data_length", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Index_length", - "alias": "Index_length", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Data_free", - "alias": "Data_free", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Auto_increment", - "alias": "Auto_increment", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Create_time", - "alias": "Create_time", - "type": TYPES.MYSQL_TYPE_TIMESTAMP, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Update_time", - "alias": "Update_time", - "type": TYPES.MYSQL_TYPE_TIMESTAMP, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Check_time", - "alias": "Check_time", - "type": TYPES.MYSQL_TYPE_TIMESTAMP, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "mysql", - "table_name": "tables", - "name": "Collation", - "alias": "Collation", - "type": TYPES.MYSQL_TYPE_VAR_STRING, - "charset": self.charset_text_type, - }, - { - "database": "", - "table_name": "tables", - "name": "Checksum", - "alias": "Checksum", - "type": TYPES.MYSQL_TYPE_LONGLONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "tables", - "name": "Create_options", - "alias": "Create_options", - "type": TYPES.MYSQL_TYPE_VAR_STRING, - "charset": self.charset_text_type, - }, - { - "database": "", - "table_name": "tables", - "name": "Comment", - "alias": "Comment", - "type": TYPES.MYSQL_TYPE_BLOB, - "charset": self.charset_text_type, - }, - ] - columns = [Column(**d) for d in columns] - data = [ - [ - table_name, # Name - "InnoDB", # Engine - 10, # Version - "Dynamic", # Row_format - 1, # Rows - 16384, # Avg_row_length - 16384, # Data_length - 0, # Max_data_length - 0, # Index_length - 0, # Data_free - None, # Auto_increment - datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), # Create_time - datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), # Update_time - None, # Check_time - "utf8mb4_0900_ai_ci", # Collation - None, # Checksum - "", # Create_options - "", # Comment - ] - ] - return ExecuteAnswer(data=ResultSet(columns=columns, values=data)) - - def answer_show_warnings(self): - columns = [ - { - "database": "", - "table_name": "", - "name": "Level", - "alias": "Level", - "type": TYPES.MYSQL_TYPE_VAR_STRING, - "charset": self.charset_text_type, - }, - { - "database": "", - "table_name": "", - "name": "Code", - "alias": "Code", - "type": TYPES.MYSQL_TYPE_LONG, - "charset": CHARSET_NUMBERS["binary"], - }, - { - "database": "", - "table_name": "", - "name": "Message", - "alias": "Message", - "type": TYPES.MYSQL_TYPE_VAR_STRING, - "charset": self.charset_text_type, - }, - ] - columns = [Column(**d) for d in columns] - return ExecuteAnswer(data=ResultSet(columns=columns)) - - def answer_create_table(self, statement, database_name): - SQLQuery(statement, session=self.session, execute=True, database=database_name) - return ExecuteAnswer() - - def answer_select(self, query): - data = query.fetched_data - return ExecuteAnswer(data=data) - - def answer_update_model_version(self, model_version, database_name): - if not isinstance(model_version, Identifier): - raise ExecutorException(f"Please define version: {model_version}") - - model_parts = model_version.parts - version = model_parts[-1] - if version.isdigit(): - version = int(version) - else: - raise ExecutorException(f"Unknown version: {version}") - - if len(model_parts) == 3: - project_name, model_name = model_parts[:2] - elif len(model_parts) == 2: - model_name = model_parts[0] - project_name = database_name - else: - raise ExecutorException(f"Unknown model: {model_version}") - - self.session.model_controller.set_model_active_version(project_name, model_name, version) - return ExecuteAnswer() - - def answer_drop_model(self, statement: DropPredictor, database_name: str) -> ExecuteAnswer: - """Handles the DROP MODEL (or DROP PREDICTOR) command, which removes a model - or a specific model version from a project. - - Args: - statement (DropPredictor): The AST object representing the DROP MODEL or DROP PREDICTOR command. - database_name (str): The name of the current database/project. - - Raises: - EntityNotExistsError: If the model or version does not exist and IF EXISTS is not specified. - ValueError: If the model name format is invalid. - - Returns: - ExecuteAnswer: The result of the model deletion operation. - """ - project_name, model_name, version = resolve_model_identifier(statement.name) - if project_name is None: - project_name = database_name - - if version is not None: - # delete version - try: - self.session.model_controller.delete_model_version(project_name, model_name, version) - except EntityNotExistsError as e: - if not statement.if_exists: - raise e - else: - # drop model - try: - project = self.session.database_controller.get_project(project_name, strict_case=True) - project.drop_model(model_name) - except Exception as e: - if not statement.if_exists: - raise e - - return ExecuteAnswer() - - def change_default_db(self, db_name): - # That fix for bug in mssql: it keeps connection for a long time, but after some time mssql can - # send packet with COM_INIT_DB=null. In this case keep old database name as default. - if db_name != "null": - if self.session.database_controller.exists(db_name): - self.session.database = db_name - else: - raise BadDbError(f"Database {db_name} does not exists") diff --git a/mindsdb/api/executor/controllers/__init__.py b/mindsdb/api/executor/controllers/__init__.py deleted file mode 100644 index 904992c26fb..00000000000 --- a/mindsdb/api/executor/controllers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .session_controller import SessionController diff --git a/mindsdb/api/executor/controllers/session_controller.py b/mindsdb/api/executor/controllers/session_controller.py deleted file mode 100644 index dfaa69062a7..00000000000 --- a/mindsdb/api/executor/controllers/session_controller.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.executor.datahub.datanodes import InformationSchemaDataNode -from mindsdb.utilities.config import Config -from mindsdb.interfaces.agents.agents_controller import AgentsController -from mindsdb.interfaces.model.model_controller import ModelController -from mindsdb.interfaces.database.database import DatabaseController -from mindsdb.interfaces.functions.controller import FunctionController - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class SessionController: - """ - This class manages the server session - """ - - def __init__(self, api_type="http") -> object: - """ - Initialize the session - """ - self.api_type = api_type - self.username = None - self.auth = False - self.logging = logger - self.database = None - - self.config = Config() - - self.model_controller = ModelController() - - # to prevent circular imports - from mindsdb.interfaces.database.integrations import integration_controller - - self.integration_controller = integration_controller - - self.database_controller = DatabaseController() - self.function_controller = FunctionController(self) - - # to prevent circular imports - from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseController - - self.kb_controller = KnowledgeBaseController(self) - - self.datahub = InformationSchemaDataNode(self) - self.agents_controller = AgentsController() - - self.prepared_stmts = {} - self.packet_sequence_number = 0 - self.profiling = False - self.predictor_cache = False if self.config.get("cache")["type"] == "none" else True - self.show_secrets = False - - def inc_packet_sequence_number(self): - self.packet_sequence_number = (self.packet_sequence_number + 1) % 256 - - def register_stmt(self, statement): - i = 1 - while i in self.prepared_stmts and i < 100: - i = i + 1 - if i == 100: - raise Exception("Too many unclosed queries") - - self.prepared_stmts[i] = dict(type=None, statement=statement, fetched=0) - return i - - def unregister_stmt(self, stmt_id): - del self.prepared_stmts[stmt_id] - - def to_json(self): - return { - "username": self.username, - "auth": self.auth, - "database": self.database, - "prepared_stmts": self.prepared_stmts, - "packet_sequence_number": self.packet_sequence_number, - } - - def from_json(self, updated): - for key in updated: - setattr(self, key, updated[key]) diff --git a/mindsdb/api/executor/data_types/__init__.py b/mindsdb/api/executor/data_types/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/executor/data_types/answer.py b/mindsdb/api/executor/data_types/answer.py deleted file mode 100644 index d4265e7ba9b..00000000000 --- a/mindsdb/api/executor/data_types/answer.py +++ /dev/null @@ -1,13 +0,0 @@ -from dataclasses import dataclass -from typing import List, Optional - -from mindsdb.api.executor.sql_query.result_set import ResultSet - - -@dataclass(kw_only=True, slots=True) -class ExecuteAnswer: - data: Optional[ResultSet] = None - state_track: Optional[List[List]] = None - error_code: Optional[int] = None - error_message: Optional[str] = None - affected_rows: Optional[int] = None diff --git a/mindsdb/api/executor/data_types/response_type.py b/mindsdb/api/executor/data_types/response_type.py deleted file mode 100644 index ec72cc09c8d..00000000000 --- a/mindsdb/api/executor/data_types/response_type.py +++ /dev/null @@ -1,10 +0,0 @@ -class RESPONSE_TYPE: - __slots__ = () - OK = "ok" - TABLE = "table" - ERROR = "error" - COLUMNS_TABLE = "columns_table" # for queries to information_schema.columns - EOF = "eof" - - -RESPONSE_TYPE = RESPONSE_TYPE() diff --git a/mindsdb/api/executor/data_types/sql_answer.py b/mindsdb/api/executor/data_types/sql_answer.py deleted file mode 100644 index 0a8b6087dbf..00000000000 --- a/mindsdb/api/executor/data_types/sql_answer.py +++ /dev/null @@ -1,129 +0,0 @@ -from typing import Generator -from dataclasses import dataclass - -import orjson -import numpy as np -import pandas as pd - -from mindsdb.utilities.json_encoder import CustomJSONEncoder -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -@dataclass -class SQLAnswer: - """Container for SQL query execution results and metadata. - - Attributes: - resp_type: Type of response (OK, ERROR, TABLE, COLUMNS_TABLE). - result_set: Query result data as a ResultSet object. - status: Status code for the response. - state_track: List of state tracking information. - error_code: Error code if query execution failed. - error_message: Human-readable error message if query failed. - affected_rows: Number of rows affected by the query (for DML operations). - mysql_types: List of MySQL data types for result columns. - """ - - resp_type: RESPONSE_TYPE = RESPONSE_TYPE.OK - result_set: ResultSet | None = None - status: int | None = None - state_track: list[list] | None = None - error_code: int | None = None - error_message: str | None = None - affected_rows: int | None = None - mysql_types: list[MYSQL_DATA_TYPE] | None = None - - @property - def type(self) -> RESPONSE_TYPE: - """Get the response type. - - Returns: - RESPONSE_TYPE: The type of this SQL response. - """ - return self.resp_type - - def stream_http_response_sse(self, context: dict | None) -> Generator[str, None, None]: - """Stream response in Server-Sent Events (SSE) format. - - Args: - context: Optional context information. - - Yields: - str: SSE-formatted data lines (prefixed with "data: "). - """ - for piece in self.stream_http_response_jsonlines(context=context): - yield f"data: {piece}\n" - - def stream_http_response_jsonlines(self, context: dict | None) -> Generator[str, None, None]: - """Stream response as newline-delimited JSON (JSONL). - - Args: - context: Optional context information. - - Yields: - str: JSON-encoded lines terminated with newline characters. - """ - _default_json = CustomJSONEncoder().default - - if self.resp_type in (RESPONSE_TYPE.OK, RESPONSE_TYPE.ERROR): - response = self.dump_http_response(context=context) - yield orjson.dumps(response).decode() + "\n" - return - - yield ( - orjson.dumps( - { - "type": RESPONSE_TYPE.TABLE, - "column_names": [column.alias or column.name for column in self.result_set.columns], - } - ).decode() - + "\n" - ) - - for el in self.result_set.stream_data(): - el.replace([np.nan, pd.NA, pd.NaT], None, inplace=True) - yield ( - orjson.dumps( - el.to_dict("split")["data"], - default=_default_json, - option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME, - ).decode() - + "\n" - ) - - def dump_http_response(self, context: dict | None = None) -> dict: - """Serialize the complete response as a single dictionary. - - Args: - context: Optional context information. - - Returns: - dict: Serialized response. - """ - if context is None: - context = {} - if self.resp_type == RESPONSE_TYPE.OK: - return { - "type": self.resp_type, - "affected_rows": self.affected_rows, - "context": context, - } - elif self.resp_type in (RESPONSE_TYPE.TABLE, RESPONSE_TYPE.COLUMNS_TABLE): - data = self.result_set.to_lists(json_types=True) - return { - "type": RESPONSE_TYPE.TABLE, - "data": data, - "column_names": [column.alias or column.name for column in self.result_set.columns], - "context": context, - } - elif self.resp_type == RESPONSE_TYPE.ERROR: - return { - "type": RESPONSE_TYPE.ERROR, - "error_code": self.error_code or 0, - "error_message": self.error_message, - "context": context, - } - else: - raise ValueError(f"Unsupported response type for dump HTTP response: {self.resp_type}") diff --git a/mindsdb/api/executor/datahub/__init__.py b/mindsdb/api/executor/datahub/__init__.py deleted file mode 100644 index 8b137891791..00000000000 --- a/mindsdb/api/executor/datahub/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mindsdb/api/executor/datahub/classes/__init__.py b/mindsdb/api/executor/datahub/classes/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/executor/datahub/classes/tables_row.py b/mindsdb/api/executor/datahub/classes/tables_row.py deleted file mode 100644 index 4fc19641b4d..00000000000 --- a/mindsdb/api/executor/datahub/classes/tables_row.py +++ /dev/null @@ -1,58 +0,0 @@ -from dataclasses import dataclass, astuple -from datetime import datetime - - -class TABLES_ROW_TYPE: - __slots__ = () - BASE_TABLE = 'BASE TABLE' - VIEW = 'VIEW' - SYSTEM_VIEW = 'SYSTEM VIEW' - - -TABLES_ROW_TYPE = TABLES_ROW_TYPE() - - -@dataclass(slots=True) -class TablesRow: - TABLE_CATALOG: str = 'def' - TABLE_SCHEMA: str = 'information_schema' - TABLE_NAME: str = None - TABLE_TYPE: str = TABLES_ROW_TYPE.BASE_TABLE - ENGINE: str = None - VERSION: int = None - ROW_FORMAT: str = None - TABLE_ROWS: int = 0 - AVG_ROW_LENGTH: int = 0 - DATA_LENGTH: int = 0 - MAX_DATA_LENGTH: int = 0 - INDEX_LENGTH: int = 0 - DATA_FREE: int = 0 - AUTO_INCREMENT: int = None - CREATE_TIME: datetime = datetime(2024, 1, 1) - UPDATE_TIME: datetime = datetime(2024, 1, 1) - CHECK_TIME: datetime = datetime(2024, 1, 1) - TABLE_COLLATION: str = None - CHECKSUM: int = None - CREATE_OPTIONS: str = None - TABLE_COMMENT: str = '' - - def to_list(self) -> list: - return list(astuple(self)) - - @staticmethod - def from_dict(data: dict): - del_keys = [] - data = {k.upper(): v for k, v in data.items()} - - # table is different column - if 'TABLE_NAME' not in data and 'NAME' in data: - data['TABLE_NAME'] = data['NAME'] - - for key in data: - if key not in TablesRow.__dataclass_fields__: - del_keys.append(key) - - for key in del_keys: - del data[key] - - return TablesRow(**data) diff --git a/mindsdb/api/executor/datahub/datanodes/__init__.py b/mindsdb/api/executor/datahub/datanodes/__init__.py deleted file mode 100644 index 5feb22957d3..00000000000 --- a/mindsdb/api/executor/datahub/datanodes/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .information_schema_datanode import InformationSchemaDataNode -from .integration_datanode import IntegrationDataNode - -__all__ = ['InformationSchemaDataNode', 'IntegrationDataNode'] diff --git a/mindsdb/api/executor/datahub/datanodes/datanode.py b/mindsdb/api/executor/datahub/datanodes/datanode.py deleted file mode 100644 index 8be9e355949..00000000000 --- a/mindsdb/api/executor/datahub/datanodes/datanode.py +++ /dev/null @@ -1,26 +0,0 @@ -from pandas import DataFrame - -from mindsdb.integrations.libs.response import DataHandlerResponse - - -class DataNode: - type = "meta" - has_support_stream = False - - def __init__(self): - pass - - def get_type(self): - return self.type - - def get_tables(self): - pass - - def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> DataFrame: - pass - - def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]: - pass - - def query(self, query=None, session=None) -> DataHandlerResponse: - pass diff --git a/mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py b/mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py deleted file mode 100644 index ac309f72e6d..00000000000 --- a/mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +++ /dev/null @@ -1,235 +0,0 @@ -from dataclasses import astuple - -import pandas as pd -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.api.executor.datahub.datanodes.datanode import DataNode -from mindsdb.api.executor.datahub.datanodes.integration_datanode import IntegrationDataNode -from mindsdb.api.executor.datahub.datanodes.project_datanode import ProjectDataNode -from mindsdb.api.executor.datahub.classes.tables_row import TablesRow -from mindsdb.api.executor.utilities.sql import query_df -from mindsdb.api.executor.utilities.sql import get_query_tables -from mindsdb.api.executor import exceptions as exc -from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.integrations.libs.response import TableResponse, INF_SCHEMA_COLUMNS_NAMES -from mindsdb.utilities.types.column import Column -from mindsdb.utilities import log - -from .system_tables import ( - SchemataTable, - TablesTable, - MetaTablesTable, - ColumnsTable, - MetaColumnsTable, - EventsTable, - RoutinesTable, - PluginsTable, - EnginesTable, - MetaTableConstraintsTable, - KeyColumnUsageTable, - MetaColumnUsageTable, - StatisticsTable, - MetaColumnStatisticsTable, - CharacterSetsTable, - CollationsTable, - MetaHandlerInfoTable, -) -from .mindsdb_tables import ( - ModelsTable, - DatabasesTable, - MLEnginesTable, - HandlersTable, - JobsTable, - QueriesTable, - ChatbotsTable, - KBTable, - AgentsTable, - ViewsTable, - TriggersTable, -) - - -logger = log.getLogger(__name__) - - -class InformationSchemaDataNode(DataNode): - type = "INFORMATION_SCHEMA" - - tables_list = [ - SchemataTable, - TablesTable, - MetaTablesTable, - ColumnsTable, - MetaColumnsTable, - EventsTable, - RoutinesTable, - PluginsTable, - EnginesTable, - MetaTableConstraintsTable, - KeyColumnUsageTable, - MetaColumnUsageTable, - StatisticsTable, - MetaColumnStatisticsTable, - CharacterSetsTable, - CollationsTable, - ModelsTable, - DatabasesTable, - MLEnginesTable, - HandlersTable, - JobsTable, - ChatbotsTable, - KBTable, - AgentsTable, - ViewsTable, - TriggersTable, - QueriesTable, - MetaHandlerInfoTable, - ] - - def __init__(self, session): - self.session = session - self.integration_controller = session.integration_controller - self.project_controller = ProjectController() - self.database_controller = session.database_controller - self.persist_datanodes_names = ("log", "files") - self.tables = {t.name: t for t in self.tables_list} - - def __getitem__(self, key): - return self.get(key) - - def get(self, name): - name_lower = name.lower() - - if name_lower == "information_schema": - return self - - if name_lower == "log": - return self.database_controller.get_system_db("log") - - if name_lower == "files": - return IntegrationDataNode( - "files", - ds_type="file", - integration_controller=self.session.integration_controller, - ) - - existing_databases_meta = self.database_controller.get_dict() # filter_type='project' - database_name = None - for key in existing_databases_meta: - if key.lower() == name_lower: - database_name = key - break - - if database_name is None: - return None - - database_meta = existing_databases_meta[database_name] - if database_meta["type"] == "integration": - integration = self.integration_controller.get(name=database_name) - return IntegrationDataNode( - database_name, - ds_type=integration["engine"], - integration_controller=self.session.integration_controller, - ) - if database_meta["type"] == "project": - project = self.database_controller.get_project(name=database_name) - return ProjectDataNode( - project=project, - integration_controller=self.session.integration_controller, - information_schema=self, - ) - - integration_names = self.integration_controller.get_all().keys() - for integration_name in integration_names: - if integration_name.lower() == name_lower: - datasource = self.integration_controller.get(name=integration_name) - return IntegrationDataNode( - integration_name, - ds_type=datasource["engine"], - integration_controller=self.session.integration_controller, - ) - - return None - - def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame: - """Get a DataFrame containing representation of information_schema.columns for the specified table. - - Args: - table_name (str): The name of the table to get columns from. - schema_name (str | None): Not in use. The name of the schema to get columns from. - - Returns: - pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table. - The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES - but only 'COLUMN_NAME' column is filled with the actual column names. - Other columns are filled with None. - """ - table_name = table_name.upper() - if table_name not in self.tables: - raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists") - table_columns_names = self.tables[table_name].columns - df = pd.DataFrame(pd.Series(table_columns_names, name=INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME)) - for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES): - if column_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: - continue - df[column_name] = None - return df - - def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]: - """Get a list of column names for the specified table. - - Args: - table_name (str): The name of the table to get columns from. - schema_name (str | None): Not in use. The name of the schema to get columns from. - - Returns: - list[str]: A list of column names for the specified table. - """ - table_name = table_name.upper() - if table_name not in self.tables: - raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists") - return self.tables[table_name].columns - - def get_integrations_names(self): - integration_names = self.integration_controller.get_all().keys() - # remove files from list to prevent doubling in 'select from INFORMATION_SCHEMA.TABLES' - return [x.lower() for x in integration_names if x not in ("files",)] - - def get_projects_names(self): - projects = self.database_controller.get_dict(filter_type="project") - return [x.lower() for x in projects] - - def get_tables(self): - return [TablesRow(TABLE_NAME=name) for name in self.tables.keys()] - - def get_tree_tables(self): - return {name: table for name, table in self.tables.items() if table.visible} - - def query(self, query: ASTNode, session=None) -> TableResponse: - query_tables = [x[1] for x in get_query_tables(query)] - - if len(query_tables) != 1: - raise exc.BadTableError(f"Only one table can be used in query to information_schema: {query}") - - table_name = query_tables[0].upper() - - if table_name not in self.tables: - raise exc.NotSupportedYet("Information schema: Not implemented.") - - tbl = self.tables[table_name] - - if hasattr(tbl, "get_data"): - dataframe = tbl.get_data(query=query, inf_schema=self, session=self.session) - else: - dataframe = self._get_empty_table(tbl) - data = query_df(dataframe, query, session=self.session) - - columns = [Column(name=k, dtype=v) for k, v in data.dtypes.items()] - return TableResponse(data=data, columns=columns, affected_rows=0) - - def _get_empty_table(self, table): - columns = table.columns - data = [] - - df = pd.DataFrame(data, columns=columns) - return df diff --git a/mindsdb/api/executor/datahub/datanodes/integration_datanode.py b/mindsdb/api/executor/datahub/datanodes/integration_datanode.py deleted file mode 100644 index 0bcaae6aad4..00000000000 --- a/mindsdb/api/executor/datahub/datanodes/integration_datanode.py +++ /dev/null @@ -1,311 +0,0 @@ -import time -import inspect -import functools -from dataclasses import astuple - -import pandas as pd -from sqlalchemy.types import Integer, Float - -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Insert, Identifier, CreateTable, TableColumn, DropTables - -from mindsdb.api.executor.datahub.datanodes.datanode import DataNode -from mindsdb.api.executor.datahub.datanodes.system_tables import infer_mysql_type -from mindsdb.api.executor.datahub.classes.tables_row import TablesRow -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES, DataHandlerResponse, ErrorResponse, OkResponse -from mindsdb.integrations.utilities.utils import get_class_name -from mindsdb.metrics import metrics -from mindsdb.utilities import log -from mindsdb.utilities.profiler import profiler -from mindsdb.utilities.exception import QueryError - -logger = log.getLogger(__name__) - - -class DBHandlerException(Exception): - pass - - -def collect_metrics(func): - """Decorator for collecting performance metrics if integration handler query. - - The decorator measures: - - Query execution time using high-precision performance counter - - Response size (number of rows returned) - - Args: - func: The function to be decorated (integration handler method) - - Returns: - function: Wrapped function that includes metrics collection and error handling - """ - - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - try: - time_before_query = time.perf_counter() - result = func(self, *args, **kwargs) - - # metrics - handler_class_name = get_class_name(self.integration_handler) - elapsed_seconds = time.perf_counter() - time_before_query - query_time_with_labels = metrics.INTEGRATION_HANDLER_QUERY_TIME.labels(handler_class_name, result.type) - query_time_with_labels.observe(elapsed_seconds) - - num_rows = getattr(result, "affected_rows", None) - if num_rows is None: - num_rows = getattr(result, "rows_fetched", -1) - if num_rows is None: - num_rows = -1 - response_size_with_labels = metrics.INTEGRATION_HANDLER_RESPONSE_SIZE.labels( - handler_class_name, result.type - ) - response_size_with_labels.observe(num_rows) - logger.debug(f"Handler '{handler_class_name}' returned {num_rows} rows in {elapsed_seconds:.3f} seconds") - except Exception as e: - msg = str(e).strip() - if msg == "": - msg = e.__class__.__name__ - msg = f"[{self.ds_type}/{self.integration_name}]: {msg}" - raise DBHandlerException(msg) from e - return result - - return wrapper - - -class IntegrationDataNode(DataNode): - type = "integration" - - def __init__(self, integration_name, ds_type, integration_controller): - self.integration_name = integration_name - self.ds_type = ds_type - self.integration_controller = integration_controller - self.integration_handler = self.integration_controller.get_data_handler(self.integration_name) - - def get_type(self): - return self.type - - def get_tables(self): - response = self.integration_handler.get_tables() - if response.type == RESPONSE_TYPE.TABLE: - result_dict = response.data_frame.to_dict(orient="records") - return [TablesRow.from_dict(row) for row in result_dict] - else: - raise Exception(f"Can't get tables: {response.error_message}") - - def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame: - """Get a DataFrame containing representation of information_schema.columns for the specified table. - - Args: - table_name (str): The name of the table to get columns from. - schema_name (str | None): The name of the schema to get columns from. - - Returns: - pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table. - The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES. - """ - if "schema_name" in inspect.signature(self.integration_handler.get_columns).parameters: - response = self.integration_handler.get_columns(table_name, schema_name) - else: - response = self.integration_handler.get_columns(table_name) - - if response.type == RESPONSE_TYPE.COLUMNS_TABLE: - return response.data_frame - - if response.type != RESPONSE_TYPE.TABLE: - logger.warning(f"Wrong response type for handler's `get_columns` call: {response.type}") - return pd.DataFrame([], columns=astuple(INF_SCHEMA_COLUMNS_NAMES)) - - # region fallback for old handlers - df = response.data_frame - df.columns = [name.upper() for name in df.columns] - if "FIELD" not in df.columns or "TYPE" not in df.columns: - logger.warning( - f"Response from the handler's `get_columns` call does not contain required columns: {list(df.columns)}" - ) - return pd.DataFrame([], columns=astuple(INF_SCHEMA_COLUMNS_NAMES)) - - new_df = df[["FIELD", "TYPE"]] - new_df.columns = ["COLUMN_NAME", "DATA_TYPE"] - - new_df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = new_df[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE].apply( - lambda x: infer_mysql_type(x).value - ) - - for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES): - if column_name in new_df.columns: - continue - new_df[column_name] = None - # endregion - - return new_df - - def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]: - """Get a list of column names for the specified table. - - Args: - table_name (str): The name of the table to get columns from. - schema_name (str | None): The name of the schema to get columns from. - - Returns: - list[str]: A list of column names for the specified table. - """ - df = self.get_table_columns_df(table_name, schema_name) - return df[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME].to_list() - - def drop_table(self, name: Identifier, if_exists=False): - drop_ast = DropTables(tables=[name], if_exists=if_exists) - self.query(drop_ast) - - def create_table( - self, - table_name: Identifier, - result_set: ResultSet = None, - columns: list[TableColumn] = None, - is_replace: bool = False, - is_create: bool = False, - raise_if_exists: bool = True, - **kwargs, - ) -> OkResponse: - # is_create - create table - # if !raise_if_exists: error will be skipped - # is_replace - drop table if exists - # is_create==False and is_replace==False: just insert - - table_columns_meta = {} - - if columns is None: - columns: list[TableColumn] = result_set.get_ast_columns() - table_columns_meta = {column.name: column.type for column in columns} - - if is_replace: - # drop - drop_ast = DropTables(tables=[table_name], if_exists=True) - self.query(drop_ast) - is_create = True - - if is_create: - create_table_ast = CreateTable(name=table_name, columns=columns, is_replace=is_replace) - try: - self.query(create_table_ast) - except Exception as e: - if raise_if_exists: - raise e - - if result_set is None: - # it is just a 'create table' - return OkResponse() - - # native insert - if hasattr(self.integration_handler, "insert"): - df = result_set.to_df() - - result: DataHandlerResponse = self.integration_handler.insert(table_name.parts[-1], df) - if result is not None: - affected_rows = result.affected_rows - else: - affected_rows = None - return OkResponse(affected_rows=affected_rows) - - insert_columns = [Identifier(parts=[x.alias]) for x in result_set.columns] - - # adapt table types - for col_idx, col in enumerate(result_set.columns): - column_type = table_columns_meta[col.alias] - - if column_type == Integer: - type_name = "int" - elif column_type == Float: - type_name = "float" - else: - continue - - try: - result_set.set_col_type(col_idx, type_name) - except Exception: - pass - - values = result_set.to_lists() - - if len(values) == 0: - # not need to insert - return OkResponse() - - insert_ast = Insert(table=table_name, columns=insert_columns, values=values, is_plain=True) - - try: - result: DataHandlerResponse = self.query(insert_ast) - except Exception as e: - msg = f"[{self.ds_type}/{self.integration_name}]: {str(e)}" - raise DBHandlerException(msg) from e - - return OkResponse(affected_rows=result.affected_rows) - - def has_support_stream(self) -> bool: - """Check if the integration handler supports streaming responses. - - Returns: - bool: True if the integration handler supports streaming responses, False otherwise. - """ - return getattr(self.integration_handler, "stream_response", False) - - @profiler.profile() - def query(self, query: ASTNode | str = None, session=None) -> DataHandlerResponse: - """Execute a query against the integration data source. - - This method processes SQL queries either as ASTNode objects or raw SQL strings - - Args: - query (ASTNode | str, optional): The query to execute. Can be either: - - ASTNode: A parsed SQL query object - - str: Raw SQL query string - session: Session object (currently unused but kept for compatibility) - - Returns: - DataHandlerResponse: Response object - - Raises: - NotImplementedError: If query is not ASTNode or str type - Exception: If the query execution fails with an error response - """ - if isinstance(query, ASTNode): - result: DataHandlerResponse = self.query_integration_handler(query=query) - elif isinstance(query, str): - result: DataHandlerResponse = self.native_query_integration(query=query) - else: - raise NotImplementedError("Thew query argument must be ASTNode or string type") - - if type(result) is ErrorResponse: - if isinstance(query, ASTNode): - try: - query_str = query.to_string() - except Exception: - # most likely it is CreateTable with exotic column types - query_str = "can't be dump" - else: - query_str = query - - exception = QueryError( - db_name=self.integration_handler.name, - db_type=self.integration_handler.__class__.name, - db_error_msg=result.error_message, - failed_query=query_str, - is_expected=result.is_expected_error, - ) - - if result.exception is None: - raise exception - else: - raise exception from result.exception - - return result - - @collect_metrics - def query_integration_handler(self, query: ASTNode) -> DataHandlerResponse: - return self.integration_handler.query(query) - - @collect_metrics - def native_query_integration(self, query: str) -> DataHandlerResponse: - return self.integration_handler.native_query(query) diff --git a/mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py b/mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py deleted file mode 100644 index b7fd38e3b3a..00000000000 --- a/mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +++ /dev/null @@ -1,459 +0,0 @@ -import json - -import pandas as pd -from mindsdb_sql_parser.ast import BinaryOperation, Constant, Select -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.interfaces.agents.agents_controller import AgentsController -from mindsdb.interfaces.jobs.jobs_controller import JobsController -from mindsdb.interfaces.database.views import ViewController -from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.interfaces.query_context.context_controller import query_context_controller - -from mindsdb.api.executor.datahub.datanodes.system_tables import Table - - -def to_json(obj): - if obj is None: - return None - try: - return json.dumps(obj) - except TypeError: - return obj - - -def get_project_name(query: ASTNode = None): - project_name = None - if ( - isinstance(query, Select) - and type(query.where) is BinaryOperation - and query.where.op == "=" - and query.where.args[0].parts == ["project"] - and isinstance(query.where.args[1], Constant) - ): - project_name = query.where.args[1].value - return project_name - - -class MdbTable(Table): - visible: bool = True - - -class ModelsTable(MdbTable): - name = "MODELS" - columns = [ - "NAME", - "ENGINE", - "PROJECT", - "ACTIVE", - "VERSION", - "STATUS", - "ACCURACY", - "PREDICT", - "UPDATE_STATUS", - "MINDSDB_VERSION", - "ERROR", - "SELECT_DATA_QUERY", - "TRAINING_OPTIONS", - "CURRENT_TRAINING_PHASE", - "TOTAL_TRAINING_PHASES", - "TRAINING_PHASE_NAME", - "TAG", - "CREATED_AT", - "TRAINING_TIME", - ] - - @classmethod - def get_data(cls, session, inf_schema, **kwargs): - data = [] - for project_name in inf_schema.get_projects_names(): - project = inf_schema.database_controller.get_project(name=project_name) - project_models = project.get_models(active=None, with_secrets=session.show_secrets) - for row in project_models: - table_name = row["name"] - table_meta = row["metadata"] - - data.append( - [ - table_name, - table_meta["engine"], - project_name, - table_meta["active"], - table_meta["version"], - table_meta["status"], - table_meta["accuracy"], - table_meta["predict"], - table_meta["update_status"], - table_meta["mindsdb_version"], - table_meta["error"], - table_meta["select_data_query"], - to_json(table_meta["training_options"]), - table_meta["current_training_phase"], - table_meta["total_training_phases"], - table_meta["training_phase_name"], - table_meta["label"], - row["created_at"], - table_meta["training_time"], - ] - ) - # TODO optimise here - # if target_table is not None and target_table != project_name: - # continue - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -class DatabasesTable(MdbTable): - name = "DATABASES" - columns = ["NAME", "TYPE", "ENGINE", "CONNECTION_DATA"] - - @classmethod - def get_data(cls, session, inf_schema, **kwargs): - project = inf_schema.database_controller.get_list(with_secrets=session.show_secrets) - data = [[x["name"], x["type"], x["engine"], to_json(x.get("connection_data"))] for x in project] - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -class MLEnginesTable(MdbTable): - name = "ML_ENGINES" - columns = ["NAME", "HANDLER", "CONNECTION_DATA"] - - @classmethod - def get_data(cls, session, inf_schema, **kwargs): - integrations = inf_schema.integration_controller.get_all(show_secrets=session.show_secrets) - ml_integrations = {key: val for key, val in integrations.items() if val["type"] == "ml"} - - data = [] - for _key, val in ml_integrations.items(): - data.append([val["name"], val.get("engine"), to_json(val.get("connection_data"))]) - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -class HandlersTable(MdbTable): - name = "HANDLERS" - columns = [ - "NAME", - "TYPE", - "TITLE", - "DESCRIPTION", - "VERSION", - "CONNECTION_ARGS", - "IMPORT_SUCCESS", - "IMPORT_ERROR", - ] - - @classmethod - def get_data(cls, inf_schema, **kwargs): - handlers = inf_schema.integration_controller.get_handlers_import_status() - - data = [] - for _key, val in handlers.items(): - connection_args = val.get("connection_args") - if connection_args is not None: - connection_args = to_json(connection_args) - import_success = val.get("import", {}).get("success") - import_error = val.get("import", {}).get("error_message") - data.append( - [ - val["name"], - val.get("type"), - val.get("title"), - val.get("description"), - val.get("version"), - connection_args, - import_success, - import_error, - ] - ) - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -class JobsTable(MdbTable): - name = "JOBS" - columns = [ - "NAME", - "PROJECT", - "START_AT", - "END_AT", - "NEXT_RUN_AT", - "SCHEDULE_STR", - "QUERY", - "IF_QUERY", - "VARIABLES", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, **kwargs): - jobs_controller = JobsController() - - project_name = None - if ( - isinstance(query, Select) - and type(query.where) is BinaryOperation - and query.where.op == "=" - and query.where.args[0].parts == ["project"] - and isinstance(query.where.args[1], Constant) - ): - project_name = query.where.args[1].value - - data = jobs_controller.get_list(project_name) - - columns = cls.columns - columns_lower = [col.lower() for col in columns] - - # to list of lists - data = [[row[k] for k in columns_lower] for row in data] - - return pd.DataFrame(data, columns=columns) - - -class TriggersTable(MdbTable): - name = "TRIGGERS" - columns = [ - "TRIGGER_CATALOG", - "TRIGGER_SCHEMA", - "TRIGGER_NAME", - "EVENT_MANIPULATION", - "EVENT_OBJECT_CATALOG", - "EVENT_OBJECT_SCHEMA", - "EVENT_OBJECT_TABLE", - "ACTION_ORDER", - "ACTION_CONDITION", - "ACTION_STATEMENT", - "ACTION_ORIENTATION", - "ACTION_TIMING", - "ACTION_REFERENCE_OLD_TABLE", - "ACTION_REFERENCE_NEW_TABLE", - "ACTION_REFERENCE_OLD_ROW", - "ACTION_REFERENCE_NEW_ROW", - "CREATED", - "SQL_MODE", - "DEFINER", - "CHARACTER_SET_CLIENT", - "COLLATION_CONNECTION", - "DATABASE_COLLATION", - ] - - mindsdb_columns = ["NAME", "PROJECT", "DATABASE", "TABLE", "QUERY", "LAST_ERROR"] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - from mindsdb.interfaces.triggers.triggers_controller import TriggersController - - triggers_controller = TriggersController() - - project_name = None - if ( - isinstance(query, Select) - and type(query.where) is BinaryOperation - and query.where.op == "=" - and query.where.args[0].parts == ["project"] - and isinstance(query.where.args[1], Constant) - ): - project_name = query.where.args[1].value - - data = triggers_controller.get_list(project_name) - - columns = cls.mindsdb_columns - if inf_schema.session.api_type == "sql": - columns = columns + cls.columns - columns_lower = [col.lower() for col in columns] - - # to list of lists - data = [[row.get(k) for k in columns_lower] for row in data] - - return pd.DataFrame(data, columns=columns) - - -class ChatbotsTable(MdbTable): - name = "CHATBOTS" - columns = [ - "NAME", - "PROJECT", - "DATABASE", - "MODEL_NAME", - "PARAMS", - "IS_RUNNING", - "LAST_ERROR", - "WEBHOOK_TOKEN", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, **kwargs): - from mindsdb.interfaces.chatbot.chatbot_controller import ChatBotController - - chatbot_controller = ChatBotController() - - project_name = None - if ( - isinstance(query, Select) - and type(query.where) is BinaryOperation - and query.where.op == "=" - and query.where.args[0].parts == ["project"] - and isinstance(query.where.args[1], Constant) - ): - project_name = query.where.args[1].value - - chatbot_data = chatbot_controller.get_chatbots(project_name=project_name) - - columns = cls.columns - columns_lower = [col.lower() for col in columns] - - # to list of lists - data = [] - for row in chatbot_data: - row["params"] = to_json(row["params"]) - data.append([row[k] for k in columns_lower]) - - return pd.DataFrame(data, columns=columns) - - -class KBTable(MdbTable): - name = "KNOWLEDGE_BASES" - columns = [ - "NAME", - "PROJECT", - "EMBEDDING_MODEL", - "RERANKING_MODEL", - "STORAGE", - "METADATA_COLUMNS", - "CONTENT_COLUMNS", - "ID_COLUMN", - "PARAMS", - "INSERT_STARTED_AT", - "INSERT_FINISHED_AT", - "PROCESSED_ROWS", - "ERROR", - "QUERY_ID", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - project_name = get_project_name(query) - - from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseController - - controller = KnowledgeBaseController(inf_schema.session) - kb_list = controller.list(project_name) - - # shouldn't be a lot of queries, we can fetch them all - queries_data = {item["id"]: item for item in query_context_controller.list_queries()} - - data = [] - - for kb in kb_list: - query_item = {} - query_id = kb["query_id"] - if query_id is not None: - if query_id in queries_data: - query_item = queries_data.get(query_id) - else: - query_id = None - - data.append( - ( - kb["name"], - kb["project_name"], - to_json(kb["embedding_model"]), - to_json(kb["reranking_model"]), - kb["vector_database"] + "." + kb["vector_database_table"], - to_json(kb["metadata_columns"]), - to_json(kb["content_columns"]), - kb["id_column"], - to_json(kb["params"]), - query_item.get("started_at"), - query_item.get("finished_at"), - query_item.get("processed_rows"), - query_item.get("error"), - query_id, - ) - ) - - return pd.DataFrame(data, columns=cls.columns) - - -class AgentsTable(MdbTable): - name = "AGENTS" - columns = ["NAME", "PROJECT", "MODEL", "PARAMS"] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - agents_controller = AgentsController() - - project_name = get_project_name(query) - all_agents = agents_controller.get_agents(project_name) - - project_controller = ProjectController() - project_names = {i.id: i.name for i in project_controller.get_list()} - - # NAME, PROJECT, MODEL, PARAMS (skills removed) - data = [] - for a in all_agents: - params = a.params or {} - model = params.pop("model", {}) - data.append( - [ - a.name, - project_names[a.project_id], - to_json(model), - to_json(params), - ] - ) - return pd.DataFrame(data, columns=cls.columns) - - -class ViewsTable(MdbTable): - name = "VIEWS" - columns = ["NAME", "PROJECT", "QUERY"] - - @classmethod - def get_data(cls, query: ASTNode = None, **kwargs): - project_name = get_project_name(query) - - data = ViewController().list(project_name) - - columns_lower = [col.lower() for col in cls.columns] - - # to list of lists - data = [[row[k] for k in columns_lower] for row in data] - - return pd.DataFrame(data, columns=cls.columns) - - -class QueriesTable(MdbTable): - name = "QUERIES" - columns = [ - "ID", - "STARTED_AT", - "FINISHED_AT", - "PROCESSED_ROWS", - "ERROR", - "SQL", - "DATABASE", - "PARAMETERS", - "CONTEXT", - "UPDATED_AT", - ] - - @classmethod - def get_data(cls, **kwargs): - """ - Returns all queries in progres or recently completed - Only queries marked as is_resumable by planner are stored in this table - :param kwargs: - :return: - """ - - data = query_context_controller.list_queries() - columns_lower = [col.lower() for col in cls.columns] - - data = [[row[k] for k in columns_lower] for row in data] - - return pd.DataFrame(data, columns=cls.columns) diff --git a/mindsdb/api/executor/datahub/datanodes/project_datanode.py b/mindsdb/api/executor/datahub/datanodes/project_datanode.py deleted file mode 100644 index 21e07d65d83..00000000000 --- a/mindsdb/api/executor/datahub/datanodes/project_datanode.py +++ /dev/null @@ -1,198 +0,0 @@ -from copy import deepcopy -from dataclasses import astuple - -import pandas as pd -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import ( - BinaryOperation, - Identifier, - Constant, - Update, - Select, - Delete, -) - -from mindsdb.api.executor.datahub.datanodes.datanode import DataNode -from mindsdb.api.executor.datahub.classes.tables_row import TablesRow -from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.utilities.types.column import Column -from mindsdb.utilities.partitioning import process_dataframe_in_partitions -from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES, DataHandlerResponse, OkResponse, TableResponse - - -class ProjectDataNode(DataNode): - type = "project" - - def __init__(self, project, integration_controller, information_schema): - self.project = project - self.integration_controller = integration_controller - self.information_schema = information_schema - - def get_type(self): - return self.type - - def get_tables(self): - tables = self.project.get_tables() - table_types = { - "table": "BASE TABLE", - "model": "MODEL", - "view": "VIEW", - "agent": "AGENT", - "knowledge_base": "KNOWLEDGE BASE", - } - tables = [{"TABLE_NAME": key, "TABLE_TYPE": table_types.get(val["type"])} for key, val in tables.items()] - result = [TablesRow.from_dict(row) for row in tables] - return result - - def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame: - """Get a DataFrame containing representation of information_schema.columns for the specified table. - - Args: - table_name (str): The name of the table to get columns from. - schema_name (str | None): Not in use. The name of the schema to get columns from. - - Returns: - pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table. - The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES - but only 'COLUMN_NAME' column is filled with the actual column names. - Other columns are filled with None. - """ - columns = self.project.get_columns(table_name) - - data = [] - row = {name: None for name in astuple(INF_SCHEMA_COLUMNS_NAMES)} - for column_name in columns: - r = row.copy() - r[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME] = column_name - data.append(r) - - return pd.DataFrame(data, columns=astuple(INF_SCHEMA_COLUMNS_NAMES)) - - def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]: - """Get a list of column names for the specified table. - - Args: - table_name (str): The name of the table to get columns from. - schema_name (str | None): Not in use. The name of the schema to get columns from. - - Returns: - list[str]: A list of column names for the specified table. - """ - return self.project.get_columns(table_name) - - def predict(self, model_name: str, df, version=None, params=None): - model_metadata = self.project.get_model(model_name) - if model_metadata is None: - raise Exception(f"Can't find model '{model_name}'") - model_metadata = model_metadata["metadata"] - if model_metadata["update_status"] == "available": - raise Exception(f"model '{model_name}' is obsolete and needs to be updated. Run 'RETRAIN {model_name};'") - ml_handler = self.integration_controller.get_ml_handler(model_metadata["engine_name"]) - if params is not None and "partition_size" in params: - - def callback(chunk): - return ml_handler.predict( - model_name, chunk, project_name=self.project.name, version=version, params=params - ) - - return pd.concat(process_dataframe_in_partitions(df, callback, params["partition_size"])) - - return ml_handler.predict(model_name, df, project_name=self.project.name, version=version, params=params) - - def query(self, query: ASTNode | str = None, session=None) -> DataHandlerResponse: - if isinstance(query, str): - query = parse_sql(query) - - if isinstance(query, Update): - query_table = query.table.parts[0].lower() - kb_table = session.kb_controller.get_table(query_table, self.project.id) - if kb_table: - # this is the knowledge db - kb_table.update_query(query) - return OkResponse() - - raise NotImplementedError(f"Can't update object: {query_table}") - - elif isinstance(query, Delete): - query_table = query.table.parts[0].lower() - kb_table = session.kb_controller.get_table(query_table, self.project.id) - if kb_table: - # this is the knowledge db - kb_table.delete_query(query) - return OkResponse() - - raise NotImplementedError(f"Can't delete object: {query_table}") - - elif isinstance(query, Select): - match query.from_table.parts, query.from_table.is_quoted: - case [query_table], [is_quoted]: - ... - case [query_table, int(_)], [is_quoted, _]: - ... - case [query_table, str(version)], [is_quoted, _] if version.isdigit(): - ... - case _: - raise EntityNotExistsError( - f"Table '{query.from_table}' not found in the database. The project database support only single-part names", - self.project.name, - ) - - if not is_quoted: - query_table = query_table.lower() - - # region is it query to 'models'? - if query_table in ("models", "jobs", "mdb_triggers", "chatbots", "skills", "agents"): - new_query = deepcopy(query) - project_filter = BinaryOperation("=", args=[Identifier("project"), Constant(self.project.name)]) - if new_query.where is None: - new_query.where = project_filter - else: - new_query.where = BinaryOperation("and", args=[new_query.where, project_filter]) - return self.information_schema.query(new_query) - # endregion - - # other table from project - if self.project.get_view(query_table, strict_case=is_quoted): - # this is the view - df = self.project.query_view(query, session) - - columns = [Column(name=k, dtype=v) for k, v in df.dtypes.items()] - return TableResponse(data=df, columns=columns) - - kb_table = session.kb_controller.get_table(query_table, self.project.id) - if kb_table: - # this is the knowledge db - df = kb_table.select_query(query) - columns = [Column(name=k, dtype=v) for k, v in df.dtypes.items()] - return TableResponse(data=df, columns=columns) - - raise EntityNotExistsError(f"Table '{query_table}' not found in database", self.project.name) - else: - raise NotImplementedError(f"Query not supported {query}") - - def create_table( - self, table_name: Identifier, result_set=None, is_replace=False, params=None, is_create=None, **kwargs - ) -> OkResponse: - # is_create - create table - # is_replace - drop table if exists - # is_create==False and is_replace==False: just insert - - from mindsdb.api.executor.controllers.session_controller import SessionController - - session = SessionController() - - if is_create: - raise NotImplementedError(f"Can't create table {table_name}") - - table_name = table_name.parts[-1] - kb_table = session.kb_controller.get_table(table_name, self.project.id) - if kb_table: - # this is the knowledge db - if is_replace: - kb_table.clear() - - df = result_set.to_df() - kb_table.insert(df, params=params) - return OkResponse() - raise ValueError(f"Table or Knowledge Base '{table_name}' doesn't exist") diff --git a/mindsdb/api/executor/datahub/datanodes/system_tables.py b/mindsdb/api/executor/datahub/datanodes/system_tables.py deleted file mode 100644 index 785aca507a9..00000000000 --- a/mindsdb/api/executor/datahub/datanodes/system_tables.py +++ /dev/null @@ -1,782 +0,0 @@ -from typing import Optional, Literal -from dataclasses import dataclass, fields - -import pandas as pd -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.utilities.config import config -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions -from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES -from mindsdb.interfaces.data_catalog.data_catalog_retriever import DataCatalogRetriever -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE, MYSQL_DATA_TYPE_COLUMNS_DEFAULT -from mindsdb.api.executor.datahub.classes.tables_row import TABLES_ROW_TYPE, TablesRow - - -logger = log.getLogger(__name__) - - -def _get_scope(query): - databases, tables = None, None - try: - conditions = extract_comparison_conditions(query.where, ignore_functions=True) - except NotImplementedError: - return databases, tables - for op, arg1, arg2 in conditions: - if op == "=": - scope = [arg2] - elif op == "in": - if not isinstance(arg2, list): - arg2 = [arg2] - scope = arg2 - else: - continue - - if arg1.lower() == "table_schema": - databases = scope - elif arg1.lower() == "table_name": - tables = scope - return databases, tables - - -class Table: - deletable: bool = False - visible: bool = False - kind: str = "table" - - -class SchemataTable(Table): - name = "SCHEMATA" - columns = [ - "CATALOG_NAME", - "SCHEMA_NAME", - "DEFAULT_CHARACTER_SET_NAME", - "DEFAULT_COLLATION_NAME", - "SQL_PATH", - ] - - @classmethod - def get_data(cls, inf_schema=None, **kwargs): - databases_meta = inf_schema.session.database_controller.get_list() - data = [["def", x["name"], "utf8mb4", "utf8mb4_0900_ai_ci", None] for x in databases_meta] - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -class TablesTable(Table): - name = "TABLES" - - columns = [ - "TABLE_CATALOG", - "TABLE_SCHEMA", - "TABLE_NAME", - "TABLE_TYPE", - "ENGINE", - "VERSION", - "ROW_FORMAT", - "TABLE_ROWS", - "AVG_ROW_LENGTH", - "DATA_LENGTH", - "MAX_DATA_LENGTH", - "INDEX_LENGTH", - "DATA_FREE", - "AUTO_INCREMENT", - "CREATE_TIME", - "UPDATE_TIME", - "CHECK_TIME", - "TABLE_COLLATION", - "CHECKSUM", - "CREATE_OPTIONS", - "TABLE_COMMENT", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - databases, _ = _get_scope(query) - - data = [] - for name in inf_schema.tables.keys(): - if databases is not None and name not in databases: - continue - row = TablesRow(TABLE_TYPE=TABLES_ROW_TYPE.SYSTEM_VIEW, TABLE_NAME=name) - data.append(row.to_list()) - - for ds_name in inf_schema.persist_datanodes_names: - if databases is not None and ds_name not in databases: - continue - ds = inf_schema.get(ds_name) - - if hasattr(ds, "get_tables_rows"): - ds_tables = ds.get_tables_rows() - else: - ds_tables = ds.get_tables() - if len(ds_tables) == 0: - continue - elif isinstance(ds_tables[0], dict): - ds_tables = [TablesRow(TABLE_TYPE=TABLES_ROW_TYPE.BASE_TABLE, TABLE_NAME=x["name"]) for x in ds_tables] - elif isinstance(ds_tables, list) and len(ds_tables) > 0 and isinstance(ds_tables[0], str): - ds_tables = [TablesRow(TABLE_TYPE=TABLES_ROW_TYPE.BASE_TABLE, TABLE_NAME=x) for x in ds_tables] - for row in ds_tables: - row.TABLE_SCHEMA = ds_name - data.append(row.to_list()) - - for ds_name in inf_schema.get_integrations_names(): - if databases is not None and ds_name not in databases: - continue - - try: - ds = inf_schema.get(ds_name) - ds_tables = ds.get_tables() - for row in ds_tables: - row.TABLE_SCHEMA = ds_name - data.append(row.to_list()) - except Exception: - logger.exception(f"Can't get tables from '{ds_name}'") - - for project_name in inf_schema.get_projects_names(): - if databases is not None and project_name not in databases: - continue - - project_dn = inf_schema.get(project_name) - project_tables = project_dn.get_tables() - for row in project_tables: - row.TABLE_SCHEMA = project_name - data.append(row.to_list()) - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -def infer_mysql_type(original_type: str) -> MYSQL_DATA_TYPE: - """Infer MySQL data type from original type string from a database. - - Args: - original_type (str): The original type string from a database. - - Returns: - MYSQL_DATA_TYPE: The inferred MySQL data type. - """ - match original_type.lower(): - case "double precision" | "real" | "numeric" | "float": - data_type = MYSQL_DATA_TYPE.FLOAT - case "integer" | "smallint" | "int" | "bigint": - data_type = MYSQL_DATA_TYPE.BIGINT - case "timestamp without time zone" | "timestamp with time zone" | "date" | "timestamp": - data_type = MYSQL_DATA_TYPE.DATETIME - case _: - data_type = MYSQL_DATA_TYPE.VARCHAR - return data_type - - -@dataclass(slots=True, kw_only=True) -class ColumnsTableRow: - """Represents a row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table. - This class follows the MySQL-compatible COLUMNS table structure. - - Detailed field descriptions can be found in MySQL documentation: - https://dev.mysql.com/doc/refman/8.4/en/information-schema-columns-table.html - - NOTE: The order of attributes is significant and matches the MySQL column order. - """ - - TABLE_CATALOG: Literal["def"] = "def" - TABLE_SCHEMA: Optional[str] = None - TABLE_NAME: Optional[str] = None - COLUMN_NAME: Optional[str] = None - ORDINAL_POSITION: int = 0 - COLUMN_DEFAULT: Optional[str] = None - IS_NULLABLE: Literal["YES", "NO"] = "YES" - DATA_TYPE: str = MYSQL_DATA_TYPE.VARCHAR.value - CHARACTER_MAXIMUM_LENGTH: Optional[int] = None - CHARACTER_OCTET_LENGTH: Optional[int] = None - NUMERIC_PRECISION: Optional[int] = None - NUMERIC_SCALE: Optional[int] = None - DATETIME_PRECISION: Optional[int] = None - CHARACTER_SET_NAME: Optional[str] = None - COLLATION_NAME: Optional[str] = None - COLUMN_TYPE: Optional[str] = None - COLUMN_KEY: Optional[str] = None - EXTRA: Optional[str] = None - PRIVILEGES: str = "select" - COLUMN_COMMENT: Optional[str] = None - GENERATION_EXPRESSION: Optional[str] = None - SRS_ID: Optional[str] = None - # MindsDB's specific columns: - ORIGINAL_TYPE: Optional[str] = None - - @classmethod - def from_is_columns_row(cls, table_schema: str, table_name: str, row: pd.Series) -> "ColumnsTableRow": - """Transform row from response of `handler.get_columns(...)` to internal information_schema.columns row. - - Args: - table_schema (str): The name of the schema of the table which columns are described. - table_name (str): The name of the table which columns are described. - row (pd.Series): A row from the response of `handler.get_columns(...)`. - - Returns: - ColumnsTableRow: A row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table. - """ - original_type: str = row[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE] or "" - data_type: MYSQL_DATA_TYPE | None = row[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] - if isinstance(data_type, MYSQL_DATA_TYPE) is False: - data_type = infer_mysql_type(original_type) - - # region set default values depend on type - defaults = MYSQL_DATA_TYPE_COLUMNS_DEFAULT.get(data_type) - if defaults is not None: - for key, value in defaults.items(): - if key in row and row[key] is None: - row[key] = value - - # region determine COLUMN_TYPE - it is text representation of DATA_TYPE with additioan attributes - match data_type: - case MYSQL_DATA_TYPE.DECIMAL: - column_type = f"decimal({row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION]},{INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE})" - case MYSQL_DATA_TYPE.VARCHAR: - column_type = f"varchar({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})" - case MYSQL_DATA_TYPE.VARBINARY: - column_type = f"varbinary({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})" - case MYSQL_DATA_TYPE.BIT | MYSQL_DATA_TYPE.BINARY | MYSQL_DATA_TYPE.CHAR: - column_type = f"{data_type.value.lower()}(1)" - case MYSQL_DATA_TYPE.BOOL | MYSQL_DATA_TYPE.BOOLEAN: - column_type = "tinyint(1)" - case _: - column_type = data_type.value.lower() - # endregion - - # BOOLean types had 'tinyint' DATA_TYPE in MySQL - if data_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): - data_type = "tinyint" - else: - data_type = data_type.value.lower() - - return cls( - TABLE_SCHEMA=table_schema, - TABLE_NAME=table_name, - COLUMN_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME], - ORDINAL_POSITION=row[INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION], - COLUMN_DEFAULT=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT], - IS_NULLABLE=row[INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE], - DATA_TYPE=data_type, - CHARACTER_MAXIMUM_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH], - CHARACTER_OCTET_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH], - NUMERIC_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION], - NUMERIC_SCALE=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE], - DATETIME_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION], - CHARACTER_SET_NAME=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME], - COLLATION_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME], - COLUMN_TYPE=column_type, - ORIGINAL_TYPE=original_type, - ) - - def __post_init__(self): - """Check if all mandatory fields are filled.""" - mandatory_fields = ["TABLE_SCHEMA", "TABLE_NAME", "COLUMN_NAME"] - if any(getattr(self, field_name) is None for field_name in mandatory_fields): - raise ValueError("One of mandatory fields is missed when creating ColumnsTableRow") - - -class ColumnsTable(Table): - name = "COLUMNS" - columns = [field.name for field in fields(ColumnsTableRow)] - - @classmethod - def get_data(cls, inf_schema=None, query: ASTNode = None, **kwargs) -> pd.DataFrame: - databases, tables_names = _get_scope(query) - - if databases is None: - databases = ["information_schema", config.get("default_project"), "files"] - - result = [] - for db_name in databases: - tables = {} - - dn = inf_schema.get(db_name) - if dn is None: - continue - - if tables_names is None: - list_tables = [t.TABLE_NAME for t in dn.get_tables()] - else: - list_tables = tables_names - for table_name in list_tables: - tables[table_name] = dn.get_table_columns_df(table_name) - - for table_name, table_columns_df in tables.items(): - for _, row in table_columns_df.iterrows(): - result.append( - ColumnsTableRow.from_is_columns_row(table_schema=db_name, table_name=table_name, row=row) - ) - - return pd.DataFrame(result, columns=cls.columns) - - -class EventsTable(Table): - name = "EVENTS" - - columns = [ - "EVENT_CATALOG", - "EVENT_SCHEMA", - "EVENT_NAME", - "DEFINER", - "TIME_ZONE", - "EVENT_BODY", - "EVENT_DEFINITION", - "EVENT_TYPE", - "EXECUTE_AT", - "INTERVAL_VALUE", - "INTERVAL_FIELD", - "SQL_MODE", - "STARTS", - "ENDS", - "STATUS", - "ON_COMPLETION", - "CREATED", - "LAST_ALTERED", - "LAST_EXECUTED", - "EVENT_COMMENT", - "ORIGINATOR", - "CHARACTER_SET_CLIENT", - "COLLATION_CONNECTION", - "DATABASE_COLLATION", - ] - - -class RoutinesTable(Table): - name = "ROUTINE" - columns = [ - "SPECIFIC_NAME", - "ROUTINE_CATALOG", - "ROUTINE_SCHEMA", - "ROUTINE_NAME", - "ROUTINE_TYPE", - "DATA_TYPE", - "CHARACTER_MAXIMUM_LENGTH", - "CHARACTER_OCTET_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - "DATETIME_PRECISION", - "CHARACTER_SET_NAME", - "COLLATION_NAME", - "DTD_IDENTIFIER", - "ROUTINE_BODY", - "ROUTINE_DEFINITION", - "EXTERNAL_NAME", - "EXTERNAL_LANGUAGE", - "PARAMETER_STYLE", - "IS_DETERMINISTIC", - "SQL_DATA_ACCESS", - "SQL_PATH", - "SECURITY_TYPE", - "CREATED", - "LAST_ALTERED", - "SQL_MODE", - "ROUTINE_COMMENT", - "DEFINER", - "CHARACTER_SET_CLIENT", - "COLLATION_CONNECTION", - "DATABASE_COLLATION", - ] - - -class PluginsTable(Table): - name = "PLUGINS" - columns = [ - "PLUGIN_NAME", - "PLUGIN_VERSION", - "PLUGIN_STATUS", - "PLUGIN_TYPE", - "PLUGIN_TYPE_VERSION", - "PLUGIN_LIBRARY", - "PLUGIN_LIBRARY_VERSION", - "PLUGIN_AUTHOR", - "PLUGIN_DESCRIPTION", - "PLUGIN_LICENSE", - "LOAD_OPTION", - "PLUGIN_MATURITY", - "PLUGIN_AUTH_VERSION", - ] - - -class EnginesTable(Table): - name = "ENGINES" - columns = ["ENGINE", "SUPPORT", "COMMENT", "TRANSACTIONS", "XA", "SAVEPOINTS"] - - @classmethod - def get_data(cls, **kwargs): - data = [ - [ - "InnoDB", - "DEFAULT", - "Supports transactions, row-level locking, and foreign keys", - "YES", - "YES", - "YES", - ] - ] - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -class KeyColumnUsageTable(Table): - name = "KEY_COLUMN_USAGE" - columns = [ - "CONSTRAINT_CATALOG", - "CONSTRAINT_SCHEMA", - "CONSTRAINT_NAME", - "TABLE_CATALOG", - "TABLE_SCHEMA", - "TABLE_NAME", - "COLUMN_NAME", - "ORDINAL_POSITION", - "POSITION_IN_UNIQUE_CONSTRAINT", - "REFERENCED_TABLE_SCHEMA", - "REFERENCED_TABLE_NAME", - "REFERENCED_COLUMN_NAME", - ] - - -class StatisticsTable(Table): - name = "STATISTICS" - columns = [ - "TABLE_CATALOG", - "TABLE_SCHEMA", - "TABLE_NAME", - "NON_UNIQUE", - "INDEX_SCHEMA", - "INDEX_NAME", - "SEQ_IN_INDEX", - "COLUMN_NAME", - "COLLATION", - "CARDINALITY", - "SUB_PART", - "PACKED", - "NULLABLE", - "INDEX_TYPE", - "COMMENT", - "INDEX_COMMENT", - "IS_VISIBLE", - "EXPRESSION", - ] - - -class CharacterSetsTable(Table): - name = "CHARACTER_SETS" - columns = [ - "CHARACTER_SET_NAME", - "DEFAULT_COLLATE_NAME", - "DESCRIPTION", - "MAXLEN", - ] - - @classmethod - def get_data(cls, **kwargs): - data = [ - ["utf8", "UTF-8 Unicode", "utf8_general_ci", 3], - ["latin1", "cp1252 West European", "latin1_swedish_ci", 1], - ["utf8mb4", "UTF-8 Unicode", "utf8mb4_general_ci", 4], - ] - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -class CollationsTable(Table): - name = "COLLATIONS" - - columns = [ - "COLLATION_NAME", - "CHARACTER_SET_NAME", - "ID", - "IS_DEFAULT", - "IS_COMPILED", - "SORTLEN", - "PAD_ATTRIBUTE", - ] - - @classmethod - def get_data(cls, **kwargs): - data = [ - ["utf8_general_ci", "utf8", 33, "Yes", "Yes", 1, "PAD SPACE"], - ["latin1_swedish_ci", "latin1", 8, "Yes", "Yes", 1, "PAD SPACE"], - ] - - df = pd.DataFrame(data, columns=cls.columns) - return df - - -# Data Catalog tables -# TODO: Should these be placed in a separate schema? - - -# TODO: Combine with existing 'TablesTable'? -class MetaTablesTable(Table): - name = "META_TABLES" - - columns = ["TABLE_CATALOG", "TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE", "TABLE_DESCRIPTION", "ROW_COUNT"] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - databases, tables = _get_scope(query) - - if not databases: - raise ValueError("At least one database must be specified in the query.") - - df = pd.DataFrame() - for database in databases: - data_catalog_retriever = DataCatalogRetriever(database_name=database, table_names=tables) - table_df = data_catalog_retriever.retrieve_tables() - # Table schema may be returned as a column name. - table_df.columns = table_df.columns.str.upper() - table_df["TABLE_CATALOG"] = "def" - table_df["TABLE_SCHEMA"] = database - df = pd.concat([df, table_df]) - - df = df.reindex(columns=cls.columns, fill_value=None) - - return df - - -# TODO: Combine with existing 'ColumnsTable'? -class MetaColumnsTable(Table): - name = "META_COLUMNS" - - columns = [ - "TABLE_CATALOG", - "TABLE_SCHEMA", - "TABLE_NAME", - "COLUMN_NAME", - "DATA_TYPE", - "COLUMN_DESCRIPTION", - "COLUMN_DEFAULT", - "IS_NULLABLE", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - databases, tables = _get_scope(query) - - if not databases: - raise ValueError("At least one database must be specified in the query.") - - df = pd.DataFrame() - for database in databases: - data_catalog_retriever = DataCatalogRetriever(database_name=database, table_names=tables) - columns_df = data_catalog_retriever.retrieve_columns() - columns_df["TABLE_CATALOG"] = "def" - columns_df["TABLE_SCHEMA"] = database - df = pd.concat([df, columns_df]) - - df.columns = df.columns.str.upper() - - df = df.reindex(columns=cls.columns, fill_value=None) - df["IS_NULLABLE"] = df["IS_NULLABLE"].map({True: "YES", False: "NO"}) - - return df - - -class MetaColumnStatisticsTable(Table): - name = "META_COLUMN_STATISTICS" - columns = [ - "TABLE_SCHEMA", - "TABLE_NAME", - "COLUMN_NAME", - "MOST_COMMON_VALS", - "MOST_COMMON_FREQS", - "NULL_FRAC", - "N_DISTINCT", - "MIN_VALUE", - "MAX_VALUE", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - databases, tables = _get_scope(query) - - if not databases: - raise ValueError("At least one database must be specified in the query.") - - df = pd.DataFrame() - for database in databases: - data_catalog_retriever = DataCatalogRetriever(database_name=database, table_names=tables) - columns_df = data_catalog_retriever.retrieve_column_statistics() - columns_df["TABLE_CATALOG"] = "def" - columns_df["TABLE_SCHEMA"] = database - df = pd.concat([df, columns_df]) - - df.columns = df.columns.str.upper() - - df.rename( - columns={ - "NULL_PERCENTAGE": "NULL_FRAC", - "MOST_COMMON_VALUES": "MOST_COMMON_VALS", - "MOST_COMMON_FREQUENCIES": "MOST_COMMON_FREQS", - "DISTINCT_VALUES_COUNT": "N_DISTINCT", - "MINIMUM_VALUE": "MIN_VALUE", - "MAXIMUM_VALUE": "MAX_VALUE", - }, - inplace=True, - ) - - df = df.reindex(columns=cls.columns, fill_value=None) - return df - - -class MetaTableConstraintsTable(Table): - name = "META_TABLE_CONSTRAINTS" - columns = [ - "CONSTRAINT_CATALOG", - "CONSTRAINT_SCHEMA", - "CONSTRAINT_NAME", - "TABLE_SCHEMA", - "TABLE_NAME", - "CONSTRAINT_TYPE", - "ENFORCED", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - databases, tables = _get_scope(query) - - if not databases: - raise ValueError("At least one database must be specified in the query.") - - df = pd.DataFrame() - for database in databases: - data_catalog_retriever = DataCatalogRetriever(database_name=database, table_names=tables) - - primary_keys_df = data_catalog_retriever.retrieve_primary_keys() - if not primary_keys_df.empty: - primary_keys_df["CONSTRAINT_CATALOG"] = "def" - primary_keys_df[["CONSTRAINT_SCHEMA", "TABLE_SCHEMA"]] = database - primary_keys_df["CONSTRAINT_TYPE"] = "PRIMARY KEY" - - primary_keys_df.columns = primary_keys_df.columns.str.upper() - - df = pd.concat([df, primary_keys_df]) - - foreign_keys_df = data_catalog_retriever.retrieve_foreign_keys() - if not foreign_keys_df.empty: - foreign_keys_df["CONSTRAINT_CATALOG"] = "def" - foreign_keys_df[["CONSTRAINT_SCHEMA", "TABLE_SCHEMA"]] = database - foreign_keys_df["CONSTRAINT_TYPE"] = "FOREIGN KEY" - - foreign_keys_df.columns = foreign_keys_df.columns.str.upper() - - parent_constraints_df = foreign_keys_df.copy(deep=True) - child_constraints_df = foreign_keys_df.copy(deep=True) - - parent_constraints_df.rename( - columns={ - "PARENT_TABLE_NAME": "TABLE_NAME", - }, - inplace=True, - ) - child_constraints_df.rename( - columns={ - "CHILD_TABLE_NAME": "TABLE_NAME", - }, - inplace=True, - ) - - df = pd.concat([df, parent_constraints_df, child_constraints_df]) - - df = df.reindex(columns=cls.columns, fill_value=None) - - return df - - -class MetaColumnUsageTable(Table): - name = "META_KEY_COLUMN_USAGE" - columns = [ - "CONSTRAINT_CATALOG", - "CONSTRAINT_SCHEMA", - "CONSTRAINT_NAME", - "TABLE_CATALOG", - "TABLE_SCHEMA", - "TABLE_NAME", - "COLUMN_NAME", - "ORDINAL_POSITION", - "POSITION_IN_UNIQUE_CONSTRAINT", - "REFERENCED_TABLE_SCHEMA", - "REFERENCED_TABLE_NAME", - "REFERENCED_COLUMN_NAME", - ] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - databases, tables = _get_scope(query) - - if not databases: - raise ValueError("At least one database must be specified in the query.") - - df = pd.DataFrame() - for database in databases: - data_catalog_retriever = DataCatalogRetriever(database_name=database, table_names=tables) - - primary_keys_df = data_catalog_retriever.retrieve_primary_keys() - if not primary_keys_df.empty: - primary_keys_df[["CONSTRAINT_CATALOG", "TABLE_CATALOG"]] = "def" - primary_keys_df[["CONSTRAINT_SCHEMA", "TABLE_SCHEMA"]] = database - - primary_keys_df.columns = primary_keys_df.columns.str.upper() - - df = pd.concat([df, primary_keys_df]) - - foreign_keys_df = data_catalog_retriever.retrieve_foreign_keys() - if not foreign_keys_df.empty: - foreign_keys_df[["CONSTRAINT_CATALOG", "TABLE_CATALOG"]] = "def" - foreign_keys_df[["TABLE_SCHEMA", "REFERENCED_TABLE_SCHEMA"]] = database - - foreign_keys_df.columns = foreign_keys_df.columns.str.upper() - - parent_constraints_df = foreign_keys_df.copy(deep=True) - child_constraints_df = foreign_keys_df.copy(deep=True) - - parent_constraints_df.rename( - columns={ - "PARENT_TABLE_NAME": "TABLE_NAME", - "PARENT_COLUMN_NAME": "COLUMN_NAME", - "CHILD_TABLE_NAME": "REFERENCED_TABLE_NAME", - "CHILD_COLUMN_NAME": "REFERENCED_COLUMN_NAME", - }, - inplace=True, - ) - child_constraints_df.rename( - columns={ - "CHILD_TABLE_NAME": "TABLE_NAME", - "CHILD_COLUMN_NAME": "COLUMN_NAME", - "PARENT_TABLE_NAME": "REFERENCED_TABLE_NAME", - "PARENT_COLUMN_NAME": "REFERENCED_COLUMN_NAME", - }, - inplace=True, - ) - - df = pd.concat([df, parent_constraints_df, child_constraints_df]) - - df = df.reindex(columns=cls.columns, fill_value=None) - - return df - - -class MetaHandlerInfoTable(Table): - name = "META_HANDLER_INFO" - columns = ["HANDLER_INFO", "TABLE_SCHEMA"] - - @classmethod - def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs): - databases, tables = _get_scope(query) - - if not databases: - raise ValueError("At least one database must be specified in the query.") - - data = [] - for database in databases: - data_catalog_retriever = DataCatalogRetriever(database_name=database, table_names=tables) - handler_info = data_catalog_retriever.retrieve_handler_info() - data.append({"HANDLER_INFO": str(handler_info) if handler_info else None, "TABLE_SCHEMA": database}) - - df = pd.DataFrame(data, columns=cls.columns) - return df diff --git a/mindsdb/api/executor/exceptions.py b/mindsdb/api/executor/exceptions.py deleted file mode 100644 index f43076d9b00..00000000000 --- a/mindsdb/api/executor/exceptions.py +++ /dev/null @@ -1,58 +0,0 @@ -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ERR - - -# base exception for unknown error -class UnknownError(Exception): - mysql_error_code = ERR.ER_UNKNOWN_ERROR - is_expected = False - - -# base exception for known error -class ExecutorException(Exception): - mysql_error_code = ERR.ER_UNKNOWN_ERROR - is_expected = False - - -class NotSupportedYet(ExecutorException): - mysql_error_code = ERR.ER_NOT_SUPPORTED_YET - is_expected = True - - -class BadDbError(ExecutorException): - mysql_error_code = ERR.ER_BAD_DB_ERROR - is_expected = True - - -class BadTableError(ExecutorException): - mysql_error_code = ERR.ER_BAD_DB_ERROR - is_expected = True - - -class KeyColumnDoesNotExist(ExecutorException): - mysql_error_code = ERR.ER_KEY_COLUMN_DOES_NOT_EXIST - is_expected = True - - -class TableNotExistError(ExecutorException): - mysql_error_code = ERR.ER_TABLE_EXISTS_ERROR - is_expected = True - - -class WrongArgumentError(ExecutorException): - mysql_error_code = ERR.ER_WRONG_ARGUMENTS - is_expected = True - - -class LogicError(ExecutorException): - mysql_error_code = ERR.ER_WRONG_USAGE - is_expected = True - - -class SqlSyntaxError(ExecutorException): - err_code = ERR.ER_SYNTAX_ERROR - is_expected = True - - -class WrongCharsetError(ExecutorException): - err_code = ERR.ER_UNKNOWN_CHARACTER_SET - is_expected = True diff --git a/mindsdb/api/executor/planner/README.md b/mindsdb/api/executor/planner/README.md deleted file mode 100644 index f82252f650e..00000000000 --- a/mindsdb/api/executor/planner/README.md +++ /dev/null @@ -1,109 +0,0 @@ -# Planner - - -## How to use - -**Initialize planner** - -```python -from mindsdb.api.executor.planner import query_planner - -# all parameters are optional -planner = query_planner.QueryPlanner( - ast_query, # query as AST-tree - integrations=['mysql'], # list of available integrations - predictor_namespace='mindsdb', # name of namespace to lookup for predictors - default_namespace='mindsdb', # if namespace is not set in query default namespace will be used - predictor_metadata={ # information about predictors - 'tp3': { # name of predictor - 'timeseries': True, # is timeseries predictor - 'order_by_column': 'pickup_hour', # timeseries column - 'group_by_columns': ['day', 'type'], # columns for partition (only for timeseries) - 'window': 10 # windows size (only for timeseries) - } - } -) - -``` -Detailed description of timeseries predictor: [https://docs.mindsdb.com/sql/create/predictor/] - - -**Plan of prepared statement** - -Planner can be used in case of query with parameters: query is not complete and can't be executed. -But it is possible to get list of columns and parameters from query. - -```python -for step in planner.prepare_steps(ast_query): - data = do_execute_step(step) - step.set_result(data) - -statement_info = planner.get_statement_info() - -# list of columns -print(statement_info['columns']) - -# list of parameters -print(statement_info['parameters']) -``` - -At the moment this functionality is used only in COM_STMT_PREPARE command of mysql binary protocol. - -**Plan of execution** - -```python - -# if prepare_steps was executed we need pass params. -# otherwise, params=None -for step in planner.execute_steps(params): - data = do_execute_step(step) - step.set_result(data) -``` - -Query result data will be on output of the last step. - -**Alternative way of execution** - -At the moment execution plan doesn't dependent from results of previous steps. -But this behavior can be changed in the future. - -With the current behavior that it is possible to get plan of query as list: - -```python -from mindsdb.api.executor.planner import plan_query - -plan = plan_query( - ast_query, - integrations=['mysql'], - predictor_namespace='mindsdb', - default_namespace='mindsdb', - predictor_metadata={ - 'tp3': { - 'timeseries': False, - } - } -) -# list of steps -print(plan.steps) - -``` - -## Architecture - -Planner is analysing AST-query and return sequence of steps that is needed to execute to perform query. - -Steps are defined in planner/steps.py. Steps can reference to future result of previous step (using class Result in planner/step_results.py) - -Query planner consists from 2 different planner: - -1. For prepare statement is class PreparedStatementPlanner in query_prepare.py - -2. For execution is class QueryPlanner in query_panner.py -The most complex part of planner is planning of join table with timeseries predictor. Logic briefly: -- extract query for integration (without predictor) -- select all possible values of group fields (in scope of query) -- for every value of group field - - select part of data according to filters and size of window -- join all data in one dataframe -- pass it to predictor input -- join predictor results with data before prediction diff --git a/mindsdb/api/executor/planner/__init__.py b/mindsdb/api/executor/planner/__init__.py deleted file mode 100644 index b14c7b3a34a..00000000000 --- a/mindsdb/api/executor/planner/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .query_planner import QueryPlanner - - -def plan_query(query, *args, **kwargs): - return QueryPlanner(query, *args, **kwargs).from_query() diff --git a/mindsdb/api/executor/planner/exceptions.py b/mindsdb/api/executor/planner/exceptions.py deleted file mode 100644 index a83272eafc7..00000000000 --- a/mindsdb/api/executor/planner/exceptions.py +++ /dev/null @@ -1,3 +0,0 @@ - -class PlanningException(Exception): - pass diff --git a/mindsdb/api/executor/planner/plan_join.py b/mindsdb/api/executor/planner/plan_join.py deleted file mode 100644 index a7eb26800ef..00000000000 --- a/mindsdb/api/executor/planner/plan_join.py +++ /dev/null @@ -1,866 +0,0 @@ -import copy -from dataclasses import dataclass, field - -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast import ( - Select, - Identifier, - BetweenOperation, - Join, - Star, - BinaryOperation, - Constant, - NativeQuery, - Parameter, - Function, - Last, - Tuple, -) - -from mindsdb.integrations.utilities.query_traversal import query_traversal - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner.steps import ( - FetchDataframeStep, - FetchDataframeStepPartition, - JoinStep, - ApplyPredictorStep, - SubSelectStep, - QueryStep, - MapReduceStep, -) -from mindsdb.api.executor.planner.utils import filters_to_bin_op -from mindsdb.api.executor.planner.plan_join_ts import PlanJoinTSPredictorQuery - - -@dataclass -class TableInfo: - integration: str - table: Identifier - aliases: list[tuple[str, ...]] = field(default_factory=list) - conditions: list = None - sub_select: ast.ASTNode = None - predictor_info: dict = None - join_condition = None - index: int = None - - -class PlanJoin: - def __init__(self, planner): - self.planner = planner - - def is_timeseries(self, query): - join = query.from_table - l_predictor = self.planner.get_predictor(join.left) if isinstance(join.left, Identifier) else None - r_predictor = self.planner.get_predictor(join.right) if isinstance(join.right, Identifier) else None - if l_predictor and l_predictor.get("timeseries"): - return True - if r_predictor and r_predictor.get("timeseries"): - return True - - def check_single_integration(self, query): - query_info = self.planner.get_query_info(query) - # can we send all query to integration? - # one integration and not mindsdb objects in query - if ( - len(query_info["mdb_entities"]) == 0 - and len(query_info["integrations"]) == 1 - and "files" not in query_info["integrations"] - and "views" not in query_info["integrations"] - ): - int_name = list(query_info["integrations"])[0] - # if is sql database - class_type = self.planner.integrations.get(int_name, {}).get("class_type") - if class_type != "api": - # send to this integration - return int_name - return None - - def plan(self, query, integration=None): - # FIXME: Tableau workaround, INFORMATION_SCHEMA with Where - # if isinstance(join.right, Identifier) \ - # and self.resolve_database_table(join.right)[0] == 'INFORMATION_SCHEMA': - # pass - - # send join to integration as is? - integration_to_send = self.check_single_integration(query) - if integration_to_send: - self.planner.prepare_integration_select(integration_to_send, query) - - fetch_params = self.planner.get_fetch_params(query.using) - last_step = self.planner.plan.add_step( - FetchDataframeStep(integration=integration_to_send, query=query, params=fetch_params) - ) - return last_step - elif self.is_timeseries(query): - return PlanJoinTSPredictorQuery(self.planner).plan(query, integration) - else: - return PlanJoinTablesQuery(self.planner).plan(query) - - -class PlanJoinTablesQuery: - def __init__(self, planner): - self.planner = planner - - # index to lookup tables - self.tables_idx = None - self.tables = [] - self.tables_fetch_step = {} - - self.step_stack = None - self.query_context = {} - - self.partition = None - self.has_ambiguous_columns = False - - def plan(self, query): - self.tables_idx = {} - join_step = self.plan_join_tables(query) - - if ( - query.group_by is not None - or query.order_by is not None - or query.having is not None - or query.distinct is True - or query.where is not None - or query.limit is not None - or query.offset is not None - or len(query.targets) != 1 - or not isinstance(query.targets[0], Star) - ): - query2 = copy.deepcopy(query) - query2.from_table = None - query2.using = None - query2.cte = None - sup_select = QueryStep(query2, from_table=join_step.result, strict_where=False) - self.planner.plan.add_step(sup_select) - return sup_select - return join_step - - def resolve_table(self, table): - # gets integration for table and name to access to it - table = copy.deepcopy(table) - # get possible table aliases - aliases = [] - if table.alias is not None: - # to lowercase - parts = tuple(map(str.lower, table.alias.parts)) - aliases.append(parts) - else: - for i in range(0, len(table.parts)): - parts = table.parts[i:] - parts = tuple(map(str.lower, parts)) - aliases.append(parts) - - # try to use default namespace - integration = self.planner.default_namespace - if len(table.parts) > 0: - # if not quoted check in lower case - part = table.parts[0] - if part not in self.planner.databases and not table.is_quoted[0]: - part = part.lower() - - if part in self.planner.databases: - integration = part - table.parts.pop(0) - table.is_quoted.pop(0) - else: - integration = self.planner.default_namespace - - if integration is None and not hasattr(table, "sub_select"): - raise PlanningException(f"Database not found for: {table}") - - sub_select = getattr(table, "sub_select", None) - - return TableInfo(integration, table, aliases, conditions=[], sub_select=sub_select) - - def get_table_for_column(self, column: Identifier): - if not isinstance(column, Identifier): - return - # to lowercase - parts = tuple(map(str.lower, column.parts[:-1])) - if parts in self.tables_idx: - return self.tables_idx[parts] - - def get_join_sequence(self, node, condition=None): - sequence = [] - if isinstance(node, Identifier): - # resolve identifier - - table_info = self.resolve_table(node) - for alias in table_info.aliases: - self.tables_idx[alias] = table_info - - table_info.index = len(self.tables) - self.tables.append(table_info) - - table_info.predictor_info = self.planner.get_predictor(node) - - if condition is not None: - table_info.join_condition = condition - sequence.append(table_info) - - elif isinstance(node, Join): - # create sequence: 1)table1, 2)table2, 3)join 1 2, 4)table 3, 5)join 3 4 - - # put all tables before - sequence2 = self.get_join_sequence(node.left) - for item in sequence2: - sequence.append(item) - - sequence2 = self.get_join_sequence(node.right, condition=node.condition) - if len(sequence2) != 1: - raise PlanningException("Unexpected join nesting behavior") - - # put next table - sequence.append(sequence2[0]) - - # put join - sequence.append(node) - - else: - raise NotImplementedError() - return sequence - - def check_node_condition(self, node): - col_idx = 0 - if len(node.args) == 2: - if not isinstance(node.args[col_idx], Identifier): - # try to use second arg, could be: 'x'=col - col_idx = 1 - - # check the case col value, col between value and value - for i, arg in enumerate(node.args): - if i == col_idx: - if not isinstance(arg, Identifier): - return - else: - if not self.can_be_table_filter(arg): - return - - # checked, find table and store condition - node2 = copy.deepcopy(node) - - arg1 = node2.args[col_idx] - - if len(arg1.parts) < 2: - return - - table_info = self.get_table_for_column(arg1) - if table_info is None: - raise PlanningException(f"Table not found for identifier: {arg1.to_string()}") - - # keep only column name - arg1.parts = [arg1.parts[-1]] - - node2._orig_node = node - table_info.conditions.append(node2) - - def can_be_table_filter(self, node): - """ - Check if node can be used as a filter. - It can contain only: Constant, Parameter, Tuple (for IN clauses), Function (with Last) - """ - if isinstance(node, (Constant, Parameter)): - return True - if isinstance(node, Tuple): - return all(isinstance(item, Constant) for item in node.items) - if isinstance(node, Function): - # `Last` must be in args - if not any(isinstance(arg, Last) for arg in node.args): - return False - return all([self.can_be_table_filter(arg) for arg in node.args]) - - def check_query_conditions(self, query): - # get conditions for tables - binary_ops = [] - - def _check_node_condition(node, **kwargs): - if isinstance(node, BetweenOperation): - self.check_node_condition(node) - - if isinstance(node, BinaryOperation): - binary_ops.append(node.op) - - self.check_node_condition(node) - - query_traversal(query.where, _check_node_condition) - - self.query_context["binary_ops"] = binary_ops - - def check_use_limit(self, query_in, join_sequence): - # if only models (predictors), not for regular table joins - use_limit = False - optimize_inner_join = False - if query_in.having is None and query_in.group_by is None and query_in.limit is not None: - use_limit = True - - # Check what we're joining - has_predictor = False - - for item in join_sequence: - if isinstance(item, TableInfo): - if item.predictor_info is not None: - has_predictor = True - elif isinstance(item, Join) and not has_predictor: - # LEFT JOIN preserves left table row count - LIMIT pushdown is safe - join_type = str(item.join_type).upper() if item.join_type else "" - if join_type in ("LEFT JOIN", "LEFT OUTER JOIN"): - continue - - if query_in.offset is None: - optimize_inner_join = True - continue - use_limit = False - - self.query_context["use_limit"] = use_limit - self.query_context["optimize_inner_join"] = optimize_inner_join - - def plan_join_tables(self, query_in): - # plan all nested selects in 'where' - find_selects = self.planner.get_nested_selects_plan_fnc(self.planner.default_namespace, force=True) - query_in.targets = query_traversal(query_in.targets, find_selects) - query_traversal(query_in.where, find_selects) - - query = copy.deepcopy(query_in) - - # replace sub selects, with identifiers with links to original selects - def replace_subselects(node, **args): - if isinstance(node, Select) or isinstance(node, NativeQuery) or isinstance(node, ast.Data): - name = f"t_{id(node)}" - node2 = Identifier(name, alias=node.alias) - - # save in attribute - if isinstance(node, NativeQuery) or isinstance(node, ast.Data): - # wrap to select - node = Select(targets=[Star()], from_table=node) - node2.sub_select = node - return node2 - - query_traversal(query.from_table, replace_subselects) - - # get all join tables, form join sequence - join_sequence = self.get_join_sequence(query.from_table) - self.join_sequence = join_sequence - - # find tables for identifiers used in query - def _check_identifiers(node, is_table, **kwargs): - if not is_table and isinstance(node, Identifier): - if len(node.parts) > 1: - table_info = self.get_table_for_column(node) - if table_info is None: - raise PlanningException(f"Table not found for identifier: {node.to_string()}") - - # # replace identifies name - col_parts = list(table_info.aliases[-1]) - col_parts.append(node.parts[-1]) - node.parts = col_parts - else: - self.has_ambiguous_columns = True - - query.cte = None # already used before - query_traversal(query, _check_identifiers) - - self.check_query_conditions(query) - - # workaround for 'model join table': swap tables: - if len(join_sequence) == 3 and join_sequence[0].predictor_info is not None: - join_sequence = [join_sequence[1], join_sequence[0], join_sequence[2]] - - self.check_use_limit(query_in, join_sequence) - - # create plan - # TODO add optimization: one integration without predictor - - planned_steps_before_join = len(self.planner.plan.steps) - - self.step_stack = [] - for item in join_sequence: - if isinstance(item, TableInfo): - if item.sub_select is not None: - self.process_subselect(item, query_in) - elif item.predictor_info is not None: - self.process_predictor(item, query_in) - else: - # is table - self.process_table(item, query_in) - - elif isinstance(item, Join): - step_right = self.step_stack.pop() - step_left = self.step_stack.pop() - - new_join = copy.deepcopy(item) - - # TODO - new_join.left = Identifier("tab1") - new_join.right = Identifier("tab2") - new_join.implicit = False - - step = self.add_plan_step(JoinStep(left=step_left.result, right=step_right.result, query=new_join)) - - self.step_stack.append(step) - - query_in.where = query.where - - if self.query_context["optimize_inner_join"]: - self.planner.plan.steps = self.optimize_inner_join(self.planner.plan.steps, planned_steps_before_join) - - self.close_partition() - return self.planner.plan.steps[-1] - - def optimize_inner_join(self, steps_in, min_step_num): - steps_out = [] - - partition_step = None - partition_used = False - - for i, step in enumerate(steps_in): - if partition_step is None: - if ( - i >= min_step_num - and isinstance(step, FetchDataframeStep) - and not partition_used - and step.query.limit is not None - ): - limit = step.query.limit.value - step.query.limit = None - partition_used = True - - partition_step = FetchDataframeStepPartition( - step_num=step.step_num, - integration=step.integration, - query=step.query, - raw_query=step.raw_query, - params=step.params, - condition={"limit": limit}, - ) - steps_out.append(partition_step) - continue - - elif isinstance(step, (JoinStep, FetchDataframeStep, SubSelectStep)): - partition_step.steps.append(step) - continue - else: - partition_step = None - - steps_out.append(step) - - return steps_out - - def process_subselect(self, item, query_in): - # is sub select - item.sub_select.alias = None - item.sub_select.parentheses = False - step = self.planner.plan_select(item.sub_select) - - where = filters_to_bin_op(item.conditions) - - # Column pruning for subselects: - # - If subselect has pure SELECT *, we can prune to only needed columns - # - If subselect has explicit columns (SELECT a, b, c), pass through all (don't prune) - # This preserves column aliases and prevents breaking explicit projections - targets = [Star()] - needed_columns = self.get_fetch_columns_for_table(item, query_in) - if needed_columns: - targets = needed_columns - - # apply table alias - query2 = Select(targets=targets, where=where) - if item.table.alias is None: - raise PlanningException(f"Subselect in join have to be aliased: {item.sub_select.to_string()}") - table_name = item.table.alias.parts[-1] - - add_absent_cols = False - if hasattr(item.sub_select, "from_table") and isinstance(item.sub_select.from_table, ast.Data): - add_absent_cols = True - - step2 = SubSelectStep(query2, step.result, table_name=table_name, add_absent_cols=add_absent_cols) - step2 = self.add_plan_step(step2) - self.step_stack.append(step2) - - def _collect_from_order_by(self, query_in, alias_map, add_column_callback): - """Helper to collect columns from ORDER BY clause, resolving aliases and ordinals.""" - for order_col in query_in.order_by: - field = order_col.field - - # Handle ORDER BY ordinal (e.g., ORDER BY 1) - if isinstance(field, Constant) and isinstance(field.value, int): - ordinal = field.value - if 1 <= ordinal <= len(query_in.targets): - target_expr = query_in.targets[ordinal - 1] - query_traversal(target_expr, add_column_callback) - continue - - # Handle ORDER BY alias (e.g., ORDER BY alias_name) - if isinstance(field, Identifier) and len(field.parts) == 1: - alias_name = field.parts[0].lower() - if alias_name in alias_map: - query_traversal(alias_map[alias_name], add_column_callback) - continue - - # Regular column reference - query_traversal(field, add_column_callback) - - def _join_has_predictor(self, join_sequence) -> bool: - """Check if the join sequence contains any predictor.""" - for item in join_sequence: - if isinstance(item, TableInfo) and item.predictor_info is not None: - return True - return False - - def _can_prune_columns(self, table_info) -> bool: - """ - Determine if column pruning can be applied to this table. - - Returns: - True if column pruning can be applied - False if we should skip pruning (use SELECT *) - """ - - # If this table is part of a join with a predictor: cannot prune - # Predictors may need all columns from joined tables as input features - if hasattr(self, "join_sequence") and self._join_has_predictor(self.join_sequence): - return False - - if self.has_ambiguous_columns: - return False - - # For subselects: can only prune if they have pure SELECT * (no other columns) - sub = table_info.sub_select - if sub is not None and isinstance(sub, Select): - targets = getattr(sub, "targets", None) or [] - # Can prune only if subselect has PURE SELECT * (one target that is Star) - # Cannot prune if: - # - Mixed: SELECT *, col1 (has Star but also other columns) - if len(targets) == 1 and isinstance(targets[0], Star): - return True # Pure SELECT * - can prune - return False - - # For project tables (KB tables, views, etc.): cannot prune - # Project tables need SELECT * for proper column mapping - if table_info.integration and table_info.integration in self.planner.projects: - return False - - # Regular integration tables: can prune - return True - - def get_fetch_columns_for_table(self, table_info, query_in): - """ - Collect all columns needed from a specific table for column pruning optimization. - - Note: Caller should check _can_prune_columns() before calling this method. - - Returns a list of column Identifiers or None if we should fetch all columns. - """ - if not self._can_prune_columns(table_info): - return None - - columns = {} - has_qualified_star_for_table = False - - alias_map = {} - if query_in.targets: - for target in query_in.targets: - if isinstance(target, Identifier) and target.alias: - alias_map[target.alias.parts[-1].lower()] = target - - def add_column(node, **kwargs): - if isinstance(node, Identifier): - col_table = self.get_table_for_column(node) - if not col_table or col_table.index != table_info.index: - return - - # Check for qualified star: t1.* or alias.* - col_name = node.parts[-1] - is_quoted = node.is_quoted[-1] - - if isinstance(col_name, Star): - nonlocal has_qualified_star_for_table - has_qualified_star_for_table = True - return - - # Store - if already exists, keep it quoted if either reference was quoted - columns[col_name] = columns.get(col_name) or is_quoted - - # Check for bare Star() in targets - if query_in.targets: - for target in query_in.targets: - if isinstance(target, Star): - return None - - query_traversal(query_in, add_column) - - # If qualified star found for this table, fetch all columns - if has_qualified_star_for_table: - return None - - # If we found no columns, fetch all - if not columns: - return None - - # Convert column names to Identifier objects, we need to preserve quoting - result = [] - for col, is_quoted in sorted(columns.items()): - result.append(Identifier(parts=[col], is_quoted=[is_quoted])) - return result - - def process_table(self, item, query_in): - table = copy.deepcopy(item.table) - table.parts.insert(0, item.integration) - table.is_quoted.insert(0, False) - - needed_columns = self.get_fetch_columns_for_table(item, query_in) - targets = needed_columns if needed_columns else [Star()] - - query2 = Select(from_table=table, targets=targets) - conditions = item.conditions - if "or" in self.query_context["binary_ops"]: - conditions = [] - - if self.query_context.get("had_limit"): - conditions += self.get_filters_from_join_conditions(item) - - if self.query_context["use_limit"]: - order_by = None - if query_in.order_by is not None: - order_by = [] - # all order column are from this table - for col in query_in.order_by: - table_info = self.get_table_for_column(col.field) - if table_info is None or table_info.table != item.table: - order_by = False - break - col = copy.deepcopy(col) - col.field.parts = [col.field.parts[-1]] - col.field.is_quoted = [col.field.is_quoted[-1]] - order_by.append(col) - - if order_by is not False: - # copy limit from upper query - query2.limit = query_in.limit - # move offset from upper query - query2.offset = query_in.offset - query_in.offset = None - # copy order - query2.order_by = order_by - - self.query_context["use_limit"] = False - self.query_context["had_limit"] = True - for cond in conditions: - if query2.where is not None: - query2.where = BinaryOperation("and", args=[query2.where, cond]) - else: - query2.where = cond - - step = self.planner.get_integration_select_step(query2, params=query_in.using) - self.tables_fetch_step[item.index] = step - - self.add_plan_step(step) - self.step_stack.append(step) - - def join_condition_to_columns_map(self, model_table): - columns_map = {} - - def _check_conditions(node, **kwargs): - if not isinstance(node, BinaryOperation): - return - - arg1, arg2 = node.args - if not (isinstance(arg1, Identifier) and isinstance(arg2, Identifier)): - return - - table1 = self.get_table_for_column(arg1) - table2 = self.get_table_for_column(arg2) - - if table1 is model_table: - # model is on the left - columns_map[arg1.parts[-1]] = arg2 - elif table2 is model_table: - # model is on the right - columns_map[arg2.parts[-1]] = arg1 - else: - # not found, skip - return - - # exclude condition - node.args = [Constant(0), Constant(0)] - - query_traversal(model_table.join_condition, _check_conditions) - return columns_map - - def get_filters_from_join_conditions(self, fetch_table): - """ - Extract filters from join conditions for filter pushdown optimization. - - Note: This function is currently disabled (not called) to avoid: - - Creating massive IN clauses that exceed database query size limits - - Making arbitrary assumptions about data distribution - - For cross-database joins with large tables, users should: - - Add explicit WHERE clauses to filter data at the source - - Use indexed/partitioned tables in their databases - - Consider materializing intermediate results - """ - binary_ops = set() - conditions = [] - data_conditions = [] - - def _check_conditions(node, **kwargs): - if not isinstance(node, BinaryOperation): - return - - if node.op != "=": - binary_ops.add(node.op.lower()) - return - - arg1, arg2 = node.args - table1 = self.get_table_for_column(arg1) if isinstance(arg1, Identifier) else None - table2 = self.get_table_for_column(arg2) if isinstance(arg2, Identifier) else None - - if table1 is not fetch_table: - if table2 is not fetch_table: - return - # set our table first - table1, table2 = table2, table1 - arg1, arg2 = arg2, arg1 - - if isinstance(arg2, Constant): - conditions.append(node) - elif table2 is not None: - data_conditions.append([arg1, arg2]) - - query_traversal(fetch_table.join_condition, _check_conditions) - - binary_ops.discard("and") - if len(binary_ops) > 0: - # other operations exists, skip - return [] - - for arg1, arg2 in data_conditions: - # is fetched? - table2 = self.get_table_for_column(arg2) - fetch_step = self.tables_fetch_step.get(table2.index) - - if fetch_step is None: - continue - - # extract distinct values - # remove aliases - arg1 = Identifier(parts=[arg1.parts[-1]], is_quoted=[arg1.is_quoted[-1]]) - arg2 = Identifier(parts=[arg2.parts[-1]], is_quoted=[arg2.is_quoted[-1]]) - - query2 = Select(targets=[arg2], distinct=True) - subselect_step = SubSelectStep(query2, fetch_step.result) - subselect_step = self.add_plan_step(subselect_step) - - conditions.append(BinaryOperation(op="in", args=[arg1, Parameter(subselect_step.result)])) - - return conditions - - def process_predictor(self, item, query_in): - if len(self.step_stack) == 0: - raise NotImplementedError("Predictor can't be first element of join syntax") - if item.predictor_info.get("timeseries"): - raise NotImplementedError("TS predictor is not supported here yet") - data_step = self.step_stack[-1] - row_dict = None - - predict_target = item.predictor_info.get("to_predict") - if isinstance(predict_target, list) and len(predict_target) > 0: - predict_target = predict_target[0] - if predict_target is not None: - predict_target = predict_target.lower() - - columns_map = None - if item.join_condition: - columns_map = self.join_condition_to_columns_map(item) - - if item.conditions: - row_dict = {} - for i, el in enumerate(item.conditions): - if isinstance(el.args[0], Identifier) and el.op == "=": - col_name = el.args[0].parts[-1] - if col_name.lower() == predict_target: - # don't add predict target to parameters - continue - - if isinstance(el.args[1], (Constant, Parameter)): - row_dict[el.args[0].parts[-1]] = el.args[1].value - - # exclude condition - el._orig_node.args = [Constant(0), Constant(0)] - - # params for model - model_params = None - partition_size = None - if query_in.using is not None: - model_params = {} - for param, value in query_in.using.items(): - if "." in param: - alias = param.split(".")[0] - if (alias,) in item.aliases: - new_param = ".".join(param.split(".")[1:]) - model_params[new_param.lower()] = value - else: - model_params[param.lower()] = value - - partition_size = model_params.pop("partition_size", None) - - predictor_step = ApplyPredictorStep( - namespace=item.integration, - dataframe=data_step.result, - predictor=item.table, - params=model_params, - row_dict=row_dict, - columns_map=columns_map, - ) - - self.step_stack.append(self.add_plan_step(predictor_step, partition_size=partition_size)) - - def add_plan_step(self, step, partition_size=None): - """ - Adds step to plan - - If partition_size is defined: create partition - If partition is active - If step can be partitioned: - Add step to partition not in plan - Otherwise: - Add partition to plan - Add step to plan - """ - if self.partition: - if isinstance(step, (JoinStep, ApplyPredictorStep)): - # add to partition - - self.add_step_to_partition(step) - return step - - elif partition_size is not None: - # create partition - - self.partition = MapReduceStep(values=step.dataframe, reduce="union", step=[], partition=partition_size) - self.planner.plan.add_step(self.partition) - - self.add_step_to_partition(step) - return step - - else: - # next step can't be partitioned. - self.close_partition() - - return self.planner.plan.add_step(step) - - def add_step_to_partition(self, step): - step.step_num = f"{self.partition.step_num}_{len(self.partition.step)}" - self.partition.step.append(step) - - def close_partition(self): - # return - # if partitions is exist - clear it and replace last stack item with it - - if self.partition: - if len(self.step_stack) > 0: - self.step_stack[-1] = self.partition - - self.partition = None diff --git a/mindsdb/api/executor/planner/plan_join_ts.py b/mindsdb/api/executor/planner/plan_join_ts.py deleted file mode 100644 index bebe3752895..00000000000 --- a/mindsdb/api/executor/planner/plan_join_ts.py +++ /dev/null @@ -1,401 +0,0 @@ -import copy - -from mindsdb_sql_parser.ast.mindsdb import Latest -from mindsdb_sql_parser.ast import ( - Select, - Identifier, - BetweenOperation, - Join, - Star, - BinaryOperation, - Constant, - OrderBy, - NullConstant, -) - -from mindsdb.integrations.utilities.query_traversal import query_traversal - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import utils -from mindsdb.api.executor.planner.steps import ( - JoinStep, - LimitOffsetStep, - MultipleSteps, - MapReduceStep, - ApplyTimeseriesPredictorStep, -) -from mindsdb.api.executor.planner.ts_utils import ( - validate_ts_where_condition, - find_time_filter, - replace_time_filter, - find_and_remove_time_filter, - recursively_check_join_identifiers_for_ambiguity, -) - - -class PlanJoinTSPredictorQuery: - def __init__(self, planner): - self.planner = planner - - def adapt_dbt_query(self, query, integration): - orig_query = query - - join = query.from_table - join_left = join.left - - # dbt query. - - # move latest into subquery - moved_conditions = [] - - def move_latest(node, **kwargs): - if isinstance(node, BinaryOperation): - if Latest() in node.args: - for arg in node.args: - if isinstance(arg, Identifier): - # remove table alias - arg.parts = [arg.parts[-1]] - moved_conditions.append(node) - - query_traversal(query.where, move_latest) - - # TODO make project step from query.target - - # TODO support complex query. Only one table is supported at the moment. - # if not isinstance(join_left.from_table, Identifier): - # raise PlanningException(f'Statement not supported: {query.to_string()}') - - # move properties to upper query - query = join_left - - if query.from_table.alias is not None: - table_alias = [query.from_table.alias.parts[0]] - else: - table_alias = query.from_table.parts - - # add latest to query.where - for cond in moved_conditions: - if query.where is not None: - query.where = BinaryOperation("and", args=[query.where, cond]) - else: - query.where = cond - - def add_aliases(node, is_table, **kwargs): - if not is_table and isinstance(node, Identifier): - if len(node.parts) == 1: - # add table alias to field - node.parts = table_alias + node.parts - node.is_quoted = [False] + node.is_quoted - - query_traversal(query.where, add_aliases) - - if isinstance(query.from_table, Identifier): - # DBT workaround: allow use tables without integration. - # if table.part[0] not in integration - take integration name from create table command - if integration is not None and query.from_table.parts[0] not in self.planner.databases: - # add integration name to table - query.from_table.parts.insert(0, integration) - query.from_table.is_quoted.insert(0, False) - - join_left = join_left.from_table - - if orig_query.limit is not None: - if query.limit is None or query.limit.value > orig_query.limit.value: - query.limit = orig_query.limit - query.parentheses = False - query.alias = None - - return query, join_left - - def get_aliased_fields(self, targets): - # get aliases from select target - aliased_fields = {} - for target in targets: - if target.alias is not None: - aliased_fields[target.alias.to_string()] = target - return aliased_fields - - def plan_fetch_timeseries_partitions(self, query, table, predictor_group_by_names): - targets = [Identifier(column) for column in predictor_group_by_names] - - query = Select( - distinct=True, - targets=targets, - from_table=table, - where=query.where, - modifiers=query.modifiers, - ) - select_step = self.planner.plan_integration_select(query) - return select_step - - def plan(self, query, integration=None): - # integration is for dbt only - - join = query.from_table - join_left = join.left - join_right = join.right - - predictor_is_left = False - if self.planner.is_predictor(join_left): - # predictor is in the left, put it in the right - join_left, join_right = join_right, join_left - predictor_is_left = True - - if self.planner.is_predictor(join_left): - # in the left is also predictor - raise PlanningException(f"Can't join two predictors {join_left} and {join_left}") - - orig_query = query - # dbt query? - if isinstance(join_left, Select) and isinstance(join_left.from_table, Identifier): - query, join_left = self.adapt_dbt_query(query, integration) - - predictor_namespace, predictor = self.planner.get_predictor_namespace_and_name_from_identifier(join_right) - table = join_left - - aliased_fields = self.get_aliased_fields(query.targets) - - recursively_check_join_identifiers_for_ambiguity(query.where) - recursively_check_join_identifiers_for_ambiguity(query.group_by, aliased_fields=aliased_fields) - recursively_check_join_identifiers_for_ambiguity(query.having) - recursively_check_join_identifiers_for_ambiguity(query.order_by, aliased_fields=aliased_fields) - - predictor_steps = self.plan_timeseries_predictor(query, table, predictor_namespace, predictor) - - # add join - # Update reference - - left = Identifier(predictor_steps["predictor"].result.ref_name) - right = Identifier(predictor_steps["data"].result.ref_name) - - if not predictor_is_left: - # swap join - left, right = right, left - new_join = Join(left=left, right=right, join_type=join.join_type) - - left = predictor_steps["predictor"].result - right = predictor_steps["data"].result - if not predictor_is_left: - # swap join - left, right = right, left - - last_step = self.planner.plan.add_step(JoinStep(left=left, right=right, query=new_join)) - - # limit from timeseries - if predictor_steps.get("saved_limit"): - last_step = self.planner.plan.add_step( - LimitOffsetStep(dataframe=last_step.result, limit=predictor_steps["saved_limit"]) - ) - - return self.planner.plan_project(orig_query, last_step.result) - - def plan_timeseries_predictor(self, query, table, predictor_namespace, predictor): - predictor_metadata = self.planner.get_predictor(predictor) - - predictor_time_column_name = predictor_metadata["order_by_column"] - predictor_group_by_names = predictor_metadata["group_by_columns"] - if predictor_group_by_names is None: - predictor_group_by_names = [] - predictor_window = predictor_metadata["window"] - - if query.order_by: - raise PlanningException( - f"Can't provide ORDER BY to time series predictor, it will be taken from predictor settings. Found: {query.order_by}" - ) - - saved_limit = None - if query.limit is not None: - saved_limit = query.limit.value - - if query.group_by or query.having or query.offset: - raise PlanningException(f"Unsupported query to timeseries predictor: {str(query)}") - - allowed_columns = [predictor_time_column_name.lower()] - if len(predictor_group_by_names) > 0: - allowed_columns += [i.lower() for i in predictor_group_by_names] - - no_time_filter_query = copy.deepcopy(query) - - preparation_where = no_time_filter_query.where - - validate_ts_where_condition(preparation_where, allowed_columns=allowed_columns) - - time_filter = find_time_filter(preparation_where, time_column_name=predictor_time_column_name) - - order_by = [OrderBy(Identifier(parts=[predictor_time_column_name]), direction="DESC")] - - query_modifiers = query.modifiers - - # add {order_by_field} is not null - def add_order_not_null(condition): - order_field_not_null = BinaryOperation( - op="is not", args=[Identifier(parts=[predictor_time_column_name]), NullConstant()] - ) - if condition is not None: - condition = BinaryOperation(op="and", args=[condition, order_field_not_null]) - else: - condition = order_field_not_null - return condition - - preparation_where2 = copy.deepcopy(preparation_where) - preparation_where = add_order_not_null(preparation_where) - - # Obtain integration selects - if isinstance(time_filter, BetweenOperation): - between_from = time_filter.args[1] - preparation_time_filter = BinaryOperation("<", args=[Identifier(predictor_time_column_name), between_from]) - preparation_where2 = replace_time_filter(preparation_where2, time_filter, preparation_time_filter) - integration_select_1 = Select( - targets=[Star()], - from_table=table, - where=add_order_not_null(preparation_where2), - modifiers=query_modifiers, - order_by=order_by, - limit=Constant(predictor_window), - ) - - integration_select_2 = Select( - targets=[Star()], - from_table=table, - where=preparation_where, - modifiers=query_modifiers, - order_by=order_by, - ) - - integration_selects = [integration_select_1, integration_select_2] - elif isinstance(time_filter, BinaryOperation) and time_filter.op == ">" and time_filter.args[1] == Latest(): - integration_select = Select( - targets=[Star()], - from_table=table, - where=preparation_where, - modifiers=query_modifiers, - order_by=order_by, - limit=Constant(predictor_window), - ) - integration_select.where = find_and_remove_time_filter(integration_select.where, time_filter) - integration_selects = [integration_select] - elif isinstance(time_filter, BinaryOperation) and time_filter.op == "=": - integration_select = Select( - targets=[Star()], - from_table=table, - where=preparation_where, - modifiers=query_modifiers, - order_by=order_by, - limit=Constant(predictor_window), - ) - - if type(time_filter.args[1]) is Latest: - integration_select.where = find_and_remove_time_filter(integration_select.where, time_filter) - else: - time_filter_date = time_filter.args[1] - preparation_time_filter = BinaryOperation( - "<=", args=[Identifier(predictor_time_column_name), time_filter_date] - ) - integration_select.where = add_order_not_null( - replace_time_filter(preparation_where2, time_filter, preparation_time_filter) - ) - time_filter.op = ">" - - integration_selects = [integration_select] - elif isinstance(time_filter, BinaryOperation) and time_filter.op in (">", ">="): - time_filter_date = time_filter.args[1] - preparation_time_filter_op = {">": "<=", ">=": "<"}[time_filter.op] - - preparation_time_filter = BinaryOperation( - preparation_time_filter_op, args=[Identifier(predictor_time_column_name), time_filter_date] - ) - preparation_where2 = replace_time_filter(preparation_where2, time_filter, preparation_time_filter) - integration_select_1 = Select( - targets=[Star()], - from_table=table, - where=add_order_not_null(preparation_where2), - modifiers=query_modifiers, - order_by=order_by, - limit=Constant(predictor_window), - ) - - integration_select_2 = Select( - targets=[Star()], - from_table=table, - where=preparation_where, - modifiers=query_modifiers, - order_by=order_by, - ) - - integration_selects = [integration_select_1, integration_select_2] - else: - integration_select = Select( - targets=[Star()], - from_table=table, - where=preparation_where, - modifiers=query_modifiers, - order_by=order_by, - ) - integration_selects = [integration_select] - - if len(predictor_group_by_names) == 0: - # ts query without grouping - # one or multistep - if len(integration_selects) == 1: - select_partition_step = self.planner.get_integration_select_step(integration_selects[0]) - else: - select_partition_step = MultipleSteps( - steps=[self.planner.get_integration_select_step(s) for s in integration_selects], reduce="union" - ) - - # fetch data step - data_step = self.planner.plan.add_step(select_partition_step) - else: - # inject $var to queries - for integration_select in integration_selects: - condition = integration_select.where - for num, column in enumerate(predictor_group_by_names): - cond = BinaryOperation("=", args=[Identifier(column), Constant(f"$var[{column}]")]) - - # join to main condition - if condition is None: - condition = cond - else: - condition = BinaryOperation("and", args=[condition, cond]) - - integration_select.where = condition - # one or multistep - if len(integration_selects) == 1: - select_partition_step = self.planner.get_integration_select_step(integration_selects[0]) - else: - select_partition_step = MultipleSteps( - steps=[self.planner.get_integration_select_step(s) for s in integration_selects], reduce="union" - ) - - # get groping values - no_time_filter_query.where = find_and_remove_time_filter(no_time_filter_query.where, time_filter) - select_partitions_step = self.plan_fetch_timeseries_partitions( - no_time_filter_query, table, predictor_group_by_names - ) - - # sub-query by every grouping value - map_reduce_step = self.planner.plan.add_step( - MapReduceStep(values=select_partitions_step.result, reduce="union", step=select_partition_step) - ) - data_step = map_reduce_step - - predictor_identifier = utils.get_predictor_name_identifier(predictor) - - params = None - if query.using is not None: - params = query.using - predictor_step = self.planner.plan.add_step( - ApplyTimeseriesPredictorStep( - output_time_filter=time_filter, - namespace=predictor_namespace, - dataframe=data_step.result, - predictor=predictor_identifier, - params=params, - ) - ) - - return { - "predictor": predictor_step, - "data": data_step, - "saved_limit": saved_limit, - } diff --git a/mindsdb/api/executor/planner/query_plan.py b/mindsdb/api/executor/planner/query_plan.py deleted file mode 100644 index 44bd5ef9210..00000000000 --- a/mindsdb/api/executor/planner/query_plan.py +++ /dev/null @@ -1,32 +0,0 @@ -class QueryPlan: - def __init__(self, steps=None, is_resumable=False, is_async=False, probe_query=None, failback_plan=None, **kwargs): - self.steps = [] - self.is_resumable = is_resumable - self.is_async = is_async - self.probe_query = probe_query - self.failback_plan = failback_plan - - if steps: - for step in steps: - self.add_step(step) - - def __eq__(self, other): - if type(self) != type(other): - return False - - if len(self.steps) != len(other.steps): - return False - - for step, other_step in zip(self.steps, other.steps): - if step != other_step: - return False - - @property - def last_step_index(self): - return len(self.steps) - 1 - - def add_step(self, step): - if not step.step_num: - step.step_num = len(self.steps) - self.steps.append(step) - return self.steps[-1] diff --git a/mindsdb/api/executor/planner/query_planner.py b/mindsdb/api/executor/planner/query_planner.py deleted file mode 100644 index 93d6ae93ae5..00000000000 --- a/mindsdb/api/executor/planner/query_planner.py +++ /dev/null @@ -1,1073 +0,0 @@ -import copy - -import pandas as pd - -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast import ( - Select, - Identifier, - Join, - Star, - BinaryOperation, - Constant, - Union, - CreateTable, - Function, - Insert, - Except, - Intersect, - Update, - NativeQuery, - Parameter, - Delete, -) - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import utils -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.steps import ( - PlanStep, - FetchDataframeStep, - ProjectStep, - ApplyPredictorStep, - ApplyPredictorRowStep, - UnionStep, - GetPredictorColumns, - SaveToTable, - InsertToTable, - UpdateToTable, - SubSelectStep, - QueryStep, - JoinStep, - DeleteStep, - DataStep, - CreateTableStep, - FetchDataframeStepPartition, -) -from mindsdb.api.executor.planner.utils import ( - disambiguate_predictor_column_identifier, - recursively_extract_column_values, - query_traversal, - filters_to_bin_op, -) -from mindsdb.api.executor.planner.plan_join import PlanJoin -from mindsdb.api.executor.planner.query_prepare import PreparedStatementPlanner -from mindsdb.utilities.config import config - - -default_project = config.get("default_project") - -# This includes built-in MindsDB SQL functions and functions to be executed via DuckDB consistently. -MINDSDB_SQL_FUNCTIONS = {"llm", "to_markdown", "hash"} - - -def _resolve_identifier_part(identifier: Identifier, part: int = -1) -> str: - """Resolve a part of an identifier. - - Args: - identifier (Identifier): The identifier to resolve the part of. - part (int): The part number to resolve. - - Returns: - str: part of the identifier in lowercase if not quoted, otherwise the part itself. - """ - name = identifier.parts[part] - is_quoted = identifier.is_quoted[part] - if not is_quoted: - name = name.lower() - return name - - -class QueryPlanner: - def __init__( - self, - query=None, - integrations: list = None, - predictor_namespace=None, - predictor_metadata: list = None, - default_namespace: str = None, - kb_metadata: dict = None, - ): - self.query = query - self.plan = QueryPlan() - - _projects = set() - self.integrations = {} - if integrations is not None: - for integration in integrations: - if isinstance(integration, dict): - integration_name = integration["name"] - # it is project of system database - if integration["type"] != "data": - _projects.add(integration_name) - continue - else: - integration_name = integration - integration = {"name": integration} - self.integrations[integration_name] = integration - - # allow to select from mindsdb namespace - _projects.add(default_project) - - self.default_namespace = default_namespace - - # legacy parameter - self.predictor_namespace = predictor_namespace.lower() if predictor_namespace else default_project - - # map for lower names of predictors - - self.predictor_info = {} - if isinstance(predictor_metadata, list): - # convert to dict - for predictor in predictor_metadata: - if "integration_name" in predictor: - integration_name = predictor["integration_name"] - else: - integration_name = self.predictor_namespace - predictor["integration_name"] = integration_name - idx = f"{integration_name}.{predictor['name']}".lower() - self.predictor_info[idx] = predictor - _projects.add(integration_name.lower()) - elif isinstance(predictor_metadata, dict): - # legacy behaviour - for name, predictor in predictor_metadata.items(): - if "." not in name: - if "integration_name" in predictor: - integration_name = predictor["integration_name"] - else: - integration_name = self.predictor_namespace - predictor["integration_name"] = integration_name - name = f"{integration_name}.{name}".lower() - _projects.add(integration_name.lower()) - - self.predictor_info[name] = predictor - - self.projects = list(_projects) - self.databases = list(self.integrations.keys()) + self.projects - self.kb_metadata = kb_metadata or {} - self.statement = None - - self.cte_results = {} - - def is_predictor(self, identifier): - if not isinstance(identifier, Identifier): - return False - return self.get_predictor(identifier) is not None - - def get_predictor(self, identifier): - name_parts = list(identifier.parts) - - version = None - if len(name_parts) > 1 and name_parts[-1].isdigit(): - # last part is version - version = name_parts[-1] - name_parts = name_parts[:-1] - - name = name_parts[-1] - - namespace = None - if len(name_parts) > 1: - namespace = name_parts[-2] - else: - if self.default_namespace is not None: - namespace = self.default_namespace - - idx_ar = [name] - if namespace is not None: - idx_ar.insert(0, namespace) - - idx = ".".join(idx_ar).lower() - info = self.predictor_info.get(idx) - if info is not None: - info["version"] = version - info["name"] = name - return info - - def prepare_integration_select(self, database, query): - # replacement for 'utils.recursively_disambiguate_*' functions from utils - # main purpose: make tests working (don't change planner outputs) - # can be removed in future (with adapting the tests) except 'cut integration part' block - - def _prepare_integration_select(node, is_table, is_target, parent_query, **kwargs): - if not isinstance(node, Identifier): - return - - # cut integration part - if len(node.parts) > 1: - if (node.is_quoted[0] and node.parts[0] == database) or ( - not node.is_quoted[0] and node.parts[0].lower() == database.lower() - ): - node.parts.pop(0) - node.is_quoted.pop(0) - - if not hasattr(parent_query, "from_table"): - return - - table = parent_query.from_table - if not is_table: - # add table name or alias for identifiers - if isinstance(table, Join): - # skip for join - return - - # keep column name for target - if is_target: - if node.alias is None: - last_part = node.parts[-1] - if isinstance(last_part, str): - node.alias = Identifier(parts=[node.parts[-1]]) - - query_traversal(query, _prepare_integration_select) - - def get_integration_select_step(self, select: Select, params: dict = None) -> PlanStep: - """ - Generate planner step to execute query over integration or over results of previous step (if it is CTE) - """ - - if isinstance(select.from_table, NativeQuery): - integration_name = select.from_table.integration.parts[-1] - else: - integration_name, table = self.resolve_database_table(select.from_table) - - # is it CTE? - table_name = table_alias = _resolve_identifier_part(table) - if table.alias is not None: - table_alias = _resolve_identifier_part(table.alias) - - if integration_name == self.default_namespace and table_name in self.cte_results: - select.from_table = None - return SubSelectStep(select, self.cte_results[table_name], table_name=table_alias) - - fetch_df_select = copy.deepcopy(select) - self.prepare_integration_select(integration_name, fetch_df_select) - - # remove predictor params - if fetch_df_select.using is not None: - fetch_df_select.using = None - fetch_params = self.get_fetch_params(params) - return FetchDataframeStep(integration=integration_name, query=fetch_df_select, params=fetch_params) - - def get_fetch_params(self, params): - # extracts parameters for fetching - - if params: - fetch_params = params.copy() - # remove partition parameters - for key in ("batch_size", "track_column"): - if key in params: - del params[key] - if "track_column" in fetch_params and isinstance(fetch_params["track_column"], Identifier): - fetch_params["track_column"] = fetch_params["track_column"].parts[-1] - else: - fetch_params = None - return fetch_params - - def plan_integration_select(self, select): - """Plan for a select query that can be fully executed in an integration""" - - return self.plan.add_step(self.get_integration_select_step(select, params=select.using)) - - def resolve_database_table(self, node: Identifier): - # resolves integration name and table name - - parts = node.parts.copy() - alias = None - if node.alias is not None: - alias = node.alias.copy() - - database = self.default_namespace - - err_msg_suffix = "" - if len(parts) > 1: - # if not quoted check in lower case - part = parts[0] - if part not in self.databases and not node.is_quoted[0]: - part = part.lower() - - if part in self.databases: - database = part - parts.pop(0) - else: - err_msg_suffix = f"'{parts[0].lower()}' is not valid database name." - - if database is None: - raise PlanningException( - f"Invalid or missing database name for identifier '{node}'. {err_msg_suffix}\n" - "Query must include a valid database name prefix in format: 'database_name.table_name' or 'database_name.schema_name.table_name'" - ) - - return database, Identifier(parts=parts, alias=alias) - - def get_query_info(self, query): - # get all predictors - mdb_entities = [] - predictors = [] - user_functions = [] - # projects = set() - integrations = set() - - def find_objects(node, is_table, **kwargs): - if isinstance(node, Function): - if node.namespace is not None or node.op.lower() in MINDSDB_SQL_FUNCTIONS: - user_functions.append(node) - - if is_table: - if isinstance(node, ast.Identifier): - integration, _ = self.resolve_database_table(node) - - if self.is_predictor(node): - predictors.append(node) - - if integration in self.projects: - # it is project - mdb_entities.append(node) - - elif integration is not None: - integrations.add(integration) - if isinstance(node, ast.NativeQuery) or isinstance(node, ast.Data): - mdb_entities.append(node) - - query_traversal(query, find_objects) - - # cte names are not mdb objects - if isinstance(query, Select) and query.cte: - cte_names = [cte.name.parts[-1] for cte in query.cte] - mdb_entities = [item for item in mdb_entities if ".".join(item.parts) not in cte_names] - - return { - "mdb_entities": mdb_entities, - "integrations": integrations, - "predictors": predictors, - "user_functions": user_functions, - } - - def get_nested_selects_plan_fnc(self, main_integration, force=False): - # returns function for traversal over query and inject fetch data query instead of subselects - def find_selects(node, **kwargs): - if isinstance(node, Select): - query_info2 = self.get_query_info(node) - if force or ( - len(query_info2["integrations"]) > 1 - or main_integration not in query_info2["integrations"] - or len(query_info2["mdb_entities"]) > 0 - ): - # need to execute in planner - - node.parentheses = False - last_step = self.plan_select(node) - - node2 = Parameter(last_step.result) - - return node2 - - return find_selects - - def plan_select_identifier(self, query): - # query_info = self.get_query_info(query) - # - # if len(query_info['integrations']) == 0 and len(query_info['predictors']) >= 1: - # # select from predictor - # return self.plan_select_from_predictor(query) - # elif ( - # len(query_info['integrations']) == 1 - # and len(query_info['mdb_entities']) == 0 - # and len(query_info['user_functions']) == 0 - # ): - # - # int_name = list(query_info['integrations'])[0] - # if self.integrations.get(int_name, {}).get('class_type') != 'api': - # # one integration without predictors, send all query to integration - # return self.plan_integration_select(query) - - # find subselects - main_integration, _ = self.resolve_database_table(query.from_table) - is_api_db = self.integrations.get(main_integration, {}).get("class_type") == "api" - - find_selects = self.get_nested_selects_plan_fnc(main_integration, force=is_api_db) - query.targets = query_traversal(query.targets, find_selects) - query_traversal(query.where, find_selects) - - # get info of updated query - query_info = self.get_query_info(query) - - if len(query_info["predictors"]) >= 1: - # select from predictor - return self.plan_select_from_predictor(query) - elif is_api_db: - return self.plan_api_db_select(query) - elif len(query_info["user_functions"]) > 0: - return self.plan_integration_select_with_functions(query) - else: - # fallback to integration - return self.plan_integration_select(query) - - def plan_integration_select_with_functions(self, query): - # UDF can't be aggregate function: it means we have to do aggregation after function execution - # - remove targets from query - # - add subselect with targets - - # replace functions in conditions - - query2 = query.copy() - - skipped_conditions = [] - - def replace_functions(node, **kwargs): - if not isinstance(node, BinaryOperation): - return - - arg1, arg2 = node.args - if not isinstance(arg1, Function): - arg1 = arg2 - if not isinstance(arg1, Function): - return - - # user defined - if arg1.namespace is not None: - # clear - skipped_conditions.append(node) - node.args = [Constant(0), Constant(0)] - node.op = "=" - - query_traversal(query2.where, replace_functions) - - query2.targets = [Star()] - - # don't do aggregate - query2.having = None - - if query.group_by is not None: - # if aggregation exists, do order and limit in subquery - query2.group_by = None - query2.order_by = None - query2.limit = None - else: - query.order_by = None - query.limit = None - - # if all conditions were executed - clear it - if len(skipped_conditions) == 0: - query.where = None - - prev_step = self.plan_integration_select(query2) - - return self.plan_sub_select(query, prev_step) - - def plan_api_db_select(self, query): - # split to select from api database - # keep only limit and where - # the rest goes to outer select - if query.group_by is not None: - targets = [Star()] - order_by = None - limit = None - else: - targets = query.targets - order_by = query.order_by - limit = query.limit - - query2 = Select( - targets=targets, - from_table=query.from_table, - where=query.where, - order_by=order_by, - limit=limit, - ) - - prev_step = self.plan_integration_select(query2) - - # clear where - query.where = None - return self.plan_sub_select(query, prev_step, skip_for_aggregation=True) - - def plan_nested_select(self, select): - # query_info = self.get_query_info(select) - # # get all predictors - # - # if ( - # len(query_info['mdb_entities']) == 0 - # and len(query_info['integrations']) == 1 - # and len(query_info['user_functions']) == 0 - # and 'files' not in query_info['integrations'] - # and 'views' not in query_info['integrations'] - # ): - # int_name = list(query_info['integrations'])[0] - # if self.integrations.get(int_name, {}).get('class_type') != 'api': - # - # # if no predictor inside = run as is - # return self.plan_integration_nested_select(select, int_name) - - return self.plan_mdb_nested_select(select) - - def plan_mdb_nested_select(self, select): - # plan nested select - - select2 = copy.deepcopy(select.from_table) - select2.parentheses = False - select2.alias = None - self.plan_select(select2) - last_step = self.plan.steps[-1] - - return self.plan_sub_select(select, last_step) - - def get_predictor_namespace_and_name_from_identifier(self, identifier): - new_identifier = copy.deepcopy(identifier) - - info = self.get_predictor(identifier) - namespace = info["integration_name"] - - parts = [namespace, info["name"]] - if info["version"] is not None: - parts.append(info["version"]) - new_identifier.parts = parts - - return namespace, new_identifier - - def plan_select_from_predictor(self, select): - predictor_namespace, predictor = self.get_predictor_namespace_and_name_from_identifier(select.from_table) - - if select.where == BinaryOperation("=", args=[Constant(1), Constant(0)]): - # Hardcoded mysql way of getting predictor columns - predictor_identifier = utils.get_predictor_name_identifier(predictor) - predictor_step = self.plan.add_step( - GetPredictorColumns(namespace=predictor_namespace, predictor=predictor_identifier) - ) - else: - new_query_targets = [] - for target in select.targets: - if isinstance(target, Identifier): - new_query_targets.append(disambiguate_predictor_column_identifier(target, predictor)) - elif type(target) in (Star, Constant, Function): - new_query_targets.append(target) - else: - raise PlanningException(f"Unknown select target {type(target)}") - - if select.group_by or select.having: - raise PlanningException( - "Unsupported operation when querying predictor. Only WHERE is allowed and required." - ) - - row_dict = {} - where_clause = select.where - if not where_clause: - raise PlanningException("WHERE clause required when selecting from predictor") - - predictor_identifier = utils.get_predictor_name_identifier(predictor) - recursively_extract_column_values(where_clause, row_dict, predictor_identifier) - - params = None - if select.using is not None: - params = select.using - predictor_step = self.plan.add_step( - ApplyPredictorRowStep( - namespace=predictor_namespace, predictor=predictor_identifier, row_dict=row_dict, params=params - ) - ) - project_step = self.plan_project(select, predictor_step.result) - return project_step - - def plan_predictor(self, query, table, predictor_namespace, predictor): - int_select = copy.deepcopy(query) - int_select.targets = [Star()] # TODO why not query.targets? - int_select.from_table = table - - predictor_alias = None - if predictor.alias is not None: - predictor_alias = predictor.alias.parts[0] - - params = {} - if query.using is not None: - params = query.using - - binary_ops = [] - table_filters = [] - model_filters = [] - - def split_filters(node, **kwargs): - # split conditions between model and table - - if isinstance(node, BinaryOperation): - op = node.op.lower() - - binary_ops.append(op) - - if op in ["and", "or"]: - return - - arg1, arg2 = node.args - if not isinstance(arg1, Identifier): - arg1, arg2 = arg2, arg1 - - if isinstance(arg1, Identifier) and isinstance(arg2, (Constant, Parameter)) and len(arg1.parts) > 1: - model = Identifier(parts=arg1.parts[:-1]) - - if self.is_predictor(model) or (len(model.parts) == 1 and model.parts[0] == predictor_alias): - model_filters.append(node) - return - table_filters.append(node) - - # find subselects - main_integration, _ = self.resolve_database_table(table) - find_selects = self.get_nested_selects_plan_fnc(main_integration, force=True) - query_traversal(int_select.where, find_selects) - - # split conditions - query_traversal(int_select.where, split_filters) - - if len(model_filters) > 0 and "or" not in binary_ops: - int_select.where = filters_to_bin_op(table_filters) - - integration_select_step = self.plan_integration_select(int_select) - - predictor_identifier = utils.get_predictor_name_identifier(predictor) - - if len(params) == 0: - params = None - - row_dict = None - if model_filters: - row_dict = {} - for el in model_filters: - if isinstance(el.args[0], Identifier) and el.op == "=": - if isinstance(el.args[1], (Constant, Parameter)): - row_dict[el.args[0].parts[-1]] = el.args[1].value - - last_step = self.plan.add_step( - ApplyPredictorStep( - namespace=predictor_namespace, - dataframe=integration_select_step.result, - predictor=predictor_identifier, - params=params, - row_dict=row_dict, - ) - ) - - return { - "predictor": last_step, - "data": integration_select_step, - } - - # def plan_group(self, query, last_step): - # # ! is not using yet - # - # # check group - # funcs = [] - # for t in query.targets: - # if isinstance(t, Function): - # funcs.append(t.op.lower()) - # agg_funcs = ['sum', 'min', 'max', 'avg', 'count', 'std'] - # - # if ( - # query.having is not None - # or query.group_by is not None - # or set(agg_funcs) & set(funcs) - # ): - # # is aggregate - # group_by_targets = [] - # for t in query.targets: - # target_copy = copy.deepcopy(t) - # group_by_targets.append(target_copy) - # # last_step = self.plan.steps[-1] - # return GroupByStep(dataframe=last_step.result, columns=query.group_by, targets=group_by_targets) - - def plan_project(self, query, dataframe, ignore_doubles=False): - out_identifiers = [] - - if len(query.targets) == 1 and isinstance(query.targets[0], Star): - last_step = self.plan.steps[-1] - return last_step - - for target in query.targets: - if ( - isinstance(target, Identifier) - or isinstance(target, Star) - or isinstance(target, Function) - or isinstance(target, Constant) - or isinstance(target, BinaryOperation) - ): - out_identifiers.append(target) - else: - new_identifier = Identifier(str(target.to_string(alias=False)), alias=target.alias) - out_identifiers.append(new_identifier) - return self.plan.add_step( - ProjectStep(dataframe=dataframe, columns=out_identifiers, ignore_doubles=ignore_doubles) - ) - - def plan_create_table(self, query: CreateTable): - if query.from_select is None: - if query.columns is not None: - self.plan.add_step( - CreateTableStep( - table=query.name, - columns=query.columns, - is_replace=query.is_replace, - ) - ) - return - - raise PlanningException(f'Not implemented "create table": {query.to_string()}') - - integration_name = query.name.parts[0] - - last_step = self.plan_select(query.from_select, integration=integration_name) - - # create table step - self.plan.add_step( - SaveToTable( - table=query.name, - dataframe=last_step.result, - is_replace=query.is_replace, - ) - ) - - def plan_insert(self, query): - table = query.table - if query.from_select is not None: - integration_name = query.table.parts[0] - - # plan sub-select first - last_step = self.plan_select(query.from_select, integration=integration_name) - - # possible knowledge base parameters - select = query.from_select - params = {} - if isinstance(select, Select) and select.using is not None: - for k, v in select.using.items(): - if k.startswith("kb_"): - params[k] = v - - self.plan.add_step( - InsertToTable( - table=table, - dataframe=last_step.result, - params=params, - ) - ) - else: - self.plan.add_step( - InsertToTable( - table=table, - query=query, - ) - ) - - def plan_update(self, query): - last_step = None - if query.from_select is not None: - integration_name = query.table.parts[0] - last_step = self.plan_select(query.from_select, integration=integration_name) - - # plan sub-select first - update_command = copy.deepcopy(query) - # clear subselect - update_command.from_select = None - - table = query.table - self.plan.add_step(UpdateToTable(table=table, dataframe=last_step, update_command=update_command)) - - def plan_delete(self, query: Delete): - # find subselects - main_integration, _ = self.resolve_database_table(query.table) - - is_api_db = self.integrations.get(main_integration, {}).get("class_type") == "api" - - find_selects = self.get_nested_selects_plan_fnc(main_integration, force=is_api_db) - query_traversal(query.where, find_selects) - - self.prepare_integration_select(main_integration, query.where) - - return self.plan.add_step(DeleteStep(table=query.table, where=query.where)) - - def plan_cte(self, query): - for cte in query.cte: - step = self.plan_select(cte.query) - name = _resolve_identifier_part(cte.name) - self.cte_results[name] = step.result - - def check_single_integration(self, query): - query_info = self.get_query_info(query) - - # can we send all query to integration? - - # one integration and not mindsdb objects in query - if ( - len(query_info["mdb_entities"]) == 0 - and len(query_info["integrations"]) == 1 - and "files" not in query_info["integrations"] - and "views" not in query_info["integrations"] - and len(query_info["user_functions"]) == 0 - ): - int_name = list(query_info["integrations"])[0] - # if is sql database - if self.integrations.get(int_name, {}).get("class_type") != "api": - # send to this integration - self.prepare_integration_select(int_name, query) - - last_step = self.plan.add_step(FetchDataframeStep(integration=int_name, query=query)) - return last_step - - def plan_select(self, query, integration=None): - if isinstance(query, (Union, Except, Intersect)): - return self.plan_union(query, integration=integration) - - if query.cte is not None: - self.plan_cte(query) - - from_table = query.from_table - - if isinstance(from_table, Identifier): - return self.plan_select_identifier(query) - elif isinstance(from_table, Select): - return self.plan_nested_select(query) - elif isinstance(from_table, Join): - plan_join = PlanJoin(self) - return plan_join.plan(query, integration) - elif isinstance(from_table, NativeQuery): - integration = from_table.integration.parts[0].lower() - step = FetchDataframeStep(integration=integration, raw_query=from_table.query) - last_step = self.plan.add_step(step) - return self.plan_sub_select(query, last_step) - - elif isinstance(from_table, ast.Data): - step = DataStep(from_table.data) - last_step = self.plan.add_step(step) - return self.plan_sub_select(query, last_step, add_absent_cols=True) - elif from_table is None: - # one line select - step = QueryStep(query, from_table=pd.DataFrame([None])) - return self.plan.add_step(step) - else: - raise PlanningException(f"Unsupported from_table {type(from_table)}") - - def plan_sub_select(self, query, prev_step, add_absent_cols=False, skip_for_aggregation=False): - if ( - query.group_by is not None - or query.order_by is not None - or query.having is not None - or query.distinct is True - or query.where is not None - or query.limit is not None - or query.offset is not None - or len(query.targets) != 1 - or not isinstance(query.targets[0], Star) - ): - if query.from_table.alias is not None: - table_name = query.from_table.alias.parts[-1] - elif isinstance(query.from_table, Identifier): - table_name = query.from_table.parts[-1] - else: - table_name = None - - query2 = copy.deepcopy(query) - query2.from_table = None - sup_select = SubSelectStep( - query2, - prev_step.result, - table_name=table_name, - add_absent_cols=add_absent_cols, - skip_for_aggregation=skip_for_aggregation, - ) - self.plan.add_step(sup_select) - return sup_select - return prev_step - - def plan_union(self, query, integration=None): - step1 = self.plan_select(query.left, integration=integration) - step2 = self.plan_select(query.right, integration=integration) - operation = "union" - if isinstance(query, Except): - operation = "except" - elif isinstance(query, Intersect): - operation = "intersect" - - return self.plan.add_step( - UnionStep(left=step1.result, right=step2.result, unique=query.unique, operation=operation) - ) - - # method for compatibility - def from_query(self, query=None): - self.plan = QueryPlan() - - if query is None: - query = self.query - - if isinstance(query, (Select, Union, Except, Intersect)): - if self.check_single_integration(query): - return self.plan - self.plan_select(query) - elif isinstance(query, CreateTable): - self.plan_create_table(query) - elif isinstance(query, Insert): - self.plan_insert(query) - elif isinstance(query, Update): - self.plan_update(query) - elif isinstance(query, Delete): - self.plan_delete(query) - else: - raise PlanningException(f"Unsupported query type {type(query)}") - - plan = self.handle_partitioning(self.plan) - - if not plan.is_resumable: - # optimizations for insert from selects - plan = self.check_insert_from_select(self.plan) - - self.plan = plan - return plan - - def check_insert_from_select(self, plan: QueryPlan): - # special case: register insert from select (it is the same as mark resumable) - if not ( - len(plan.steps) == 2 - and isinstance(plan.steps[0], FetchDataframeStep) - and isinstance(plan.steps[1], InsertToTable) - ): - return plan - - plan.is_resumable = True - - select_step, insert_step = plan.steps - - # -- to check if it is an insert into KB and to partition it -- - # check if the first table is a KB - table = insert_step.table - kb_name = _resolve_identifier_part(table) - if len(table.parts) > 1: - project_name = _resolve_identifier_part(table, 0) - else: - project_name = self.default_namespace - - kb_info = self.kb_metadata.get((project_name, kb_name)) - if kb_info is None: - return plan - - # Knowledge base storage is not pgvector or pgvector is enabled in config - if config["knowledge_bases"]["disable_autobatch"]: - return plan - - if config["knowledge_bases"]["disable_pgvector_autobatch"] and kb_info["vector_db_engine"] == "pgvector": - return plan - - if not isinstance(select_step.query, Select): - # we can't make probe select for this query - return plan - - # convert - default_batch_size = 1000 - track_column = kb_info.get("id_column", "id") - - probe_select = copy.deepcopy(select_step.query) - probe_select.limit = Constant(1) - probe_select.targets = [Identifier(parts=[track_column])] - - fetch_step = FetchDataframeStepPartition( - step_num=0, - integration=select_step.integration, - query=select_step.query, - params={"batch_size": default_batch_size, "track_column": track_column}, - steps=[insert_step], - ) - - new_plan = QueryPlan( - steps=[fetch_step], - is_resumable=True, - is_async=True, - probe_query={"database": select_step.integration, "query": probe_select}, - failback_plan=plan, - ) - return new_plan - - def handle_partitioning(self, plan: QueryPlan) -> QueryPlan: - """ - If plan has fetching in partitions: - try to rebuild plan to send fetched chunk of data through the following steps, if it is possible - """ - - # handle fetchdataframe partitioning - steps_in = plan.steps - steps_out = [] - - step = None - partition_step = None - for step in steps_in: - if isinstance(step, FetchDataframeStep) and step.params is not None: - batch_size = step.params.get("batch_size") - if batch_size is not None: - # found batched fetch - partition_step = FetchDataframeStepPartition( - step_num=step.step_num, - integration=step.integration, - query=step.query, - raw_query=step.raw_query, - params=step.params, - ) - steps_out.append(partition_step) - # mark plan - plan.is_resumable = True - continue - else: - step.params = None - - if partition_step is not None: - # check and add step into partition - - can_be_partitioned = False - if isinstance(step, (JoinStep, ApplyPredictorStep, InsertToTable)): - can_be_partitioned = True - elif isinstance(step, QueryStep): - query = step.query - if ( - query.group_by is None - and query.order_by is None - and query.distinct is False - and query.limit is None - and query.offset is None - ): - no_identifiers = [ - target for target in step.query.targets if not isinstance(target, (Star, Identifier)) - ] - if len(no_identifiers) == 0: - can_be_partitioned = True - - if not can_be_partitioned: - if len(partition_step.steps) == 0: - # Nothing can be partitioned, failback to old plan - plan.is_resumable = False - return plan - partition_step = None - else: - partition_step.steps.append(step) - continue - - steps_out.append(step) - - if plan.is_resumable and isinstance(step, InsertToTable): - plan.is_async = True - - plan.steps = steps_out - return plan - - def prepare_steps(self, query): - statement_planner = PreparedStatementPlanner(self) - - # return generator - return statement_planner.prepare_steps(query) - - def execute_steps(self, params=None): - statement_planner = PreparedStatementPlanner(self) - - # return generator - return statement_planner.execute_steps(params) - - # def fetch(self, row_count): - # statement_planner = PreparedStatementPlanner(self) - # return statement_planner.fetch(row_count) - # - # def close(self): - # statement_planner = PreparedStatementPlanner(self) - # return statement_planner.close() - - def get_statement_info(self): - statement_planner = PreparedStatementPlanner(self) - - return statement_planner.get_statement_info() diff --git a/mindsdb/api/executor/planner/query_prepare.py b/mindsdb/api/executor/planner/query_prepare.py deleted file mode 100644 index 3fc3954711a..00000000000 --- a/mindsdb/api/executor/planner/query_prepare.py +++ /dev/null @@ -1,490 +0,0 @@ -import copy -from mindsdb_sql_parser import ast - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import steps -from mindsdb.api.executor.planner import utils - - -def to_string(identifier): - # alternative to AST.to_string() but without quoting - return ".".join(identifier.parts) - - -class Table: - def __init__(self, node=None, ds=None, is_predictor=None): - self.node = node - self.is_predictor = is_predictor - self.ds = ds - self.name = to_string(node) - self.columns = None - # None is unknown - self.columns_map = None - self.keys = None - if node.alias: - self.alias = to_string(node.alias) - else: - self.alias = None - # self.alias = self.name - - -class Column: - def __init__(self, node=None, table=None, name=None, type=None): - alias = None - if node is not None: - if isinstance(node, ast.Identifier): - # set name - name = node.parts[-1] # ??? - - else: - if table is not None and name is not None: - node = ast.Identifier(parts=[table.name, name]) - - self.node = node # link to AST - self.alias = alias # for ProjectStep - - self.is_star = False - - self.table = table # link to AST table - self.name = name # column name - self.type = type - - -class Statement: - def __init__(self): - self.columns = [] - # self.query = None - self.params = None - self.result = None - - # Tables on first level of select - self.tables_lvl1 = None - - # mapping tables by name {'ta.ble': Table()} - self.tables_map = None - - self.offset = 0 - - -class PreparedStatementPlanner: - def __init__(self, planner): - self.planner = planner - - def get_type_of_var(self, v): - if isinstance(v, str): - return "str" - elif isinstance(v, float): - return "float" - elif isinstance(v, int): - return "integer" - - return "str" - - def get_statement_info(self): - stmt = self.planner.statement - - if stmt is None: - raise PlanningException("Statement is not prepared") - - columns_result = [] - - for column in stmt.columns: - table, ds = None, None - if column.table is not None: - table = column.table.name - ds = column.table.ds - columns_result.append( - dict( - alias=column.alias, - type=column.type, - name=column.name, - table_name=table, - table_alias=table, - ds=ds, - ) - ) - - parameters = [] - for param in stmt.params: - name = "?" - parameters.append( - dict( - alias=name, - type="str", - name=name, - ) - ) - - return {"parameters": parameters, "columns": columns_result} - - def get_table_of_column(self, t): - tables_map = self.planner.statement.tables_map - - # get tables to check - if len(t.parts) > 1: - # try to find table - table_parts = t.parts[:-1] - table_name = ".".join(table_parts) - if table_name in tables_map: - return tables_map[table_name] - - elif len(table_parts) > 1: - # maybe datasource is 1st part - table_parts = table_parts[1:] - table_name = ".".join(table_parts) - if table_name in tables_map: - return tables_map[table_name] - - def table_from_identifier(self, table): - # disambiguate - if self.planner.is_predictor(table): - ds, table = self.planner.get_predictor_namespace_and_name_from_identifier(table) - is_predictor = True - - else: - ds, table = self.planner.resolve_database_table(table) - is_predictor = False - - if table.alias is not None: - # access by alias if table is having alias - keys = [to_string(table.alias)] - - else: - # access by table name, in all variants - keys = [] - parts = [] - # in reverse order - for p in table.parts[::-1]: - parts.insert(0, p) - keys.append(".".join(parts)) - - # remember table - tbl = Table(ds=ds, node=table, is_predictor=is_predictor) - tbl.keys = keys - - return tbl - - def prepare_select(self, query): - # prepare select with or without predictor - - stmt = self.planner.statement - - # get all predictors - query_predictors = [] - - def find_predictors(node, is_table, **kwargs): - if is_table and isinstance(node, ast.Identifier): - if self.planner.is_predictor(node): - query_predictors.append(node) - - utils.query_traversal(query, find_predictors) - - # === get all tables from 1st level of query === - stmt.tables_map = {} - stmt.tables_lvl1 = [] - if query.from_table is not None: - if isinstance(query.from_table, ast.Join): - # get all tables - join_tables = utils.convert_join_to_list(query.from_table) - else: - join_tables = [dict(table=query.from_table)] - - if isinstance(query.from_table, ast.Select): - # nested select, get only last select - join_tables = [dict(table=utils.get_deepest_select(query.from_table).from_table)] - - for i, join_table in enumerate(join_tables): - table = join_table["table"] - if isinstance(table, ast.Identifier): - tbl = self.table_from_identifier(table) - - if tbl.is_predictor: - # Is the last table? - if i + 1 < len(join_tables): - raise PlanningException("Predictor must be last table in query") - - stmt.tables_lvl1.append(tbl) - for key in tbl.keys: - stmt.tables_map[key] = tbl - - else: - # don't add unknown table to looking list - continue - - # is there any predictors at other levels? - lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor] - if len(query_predictors) != len(lvl1_predictors): - raise PlanningException("Predictor is not at first level") - - # === get targets === - columns = [] - get_all_tables = False - for t in query.targets: - column = Column(t) - - # column alias - alias = None - if t.alias is not None: - alias = to_string(t.alias) - - if isinstance(t, ast.Star): - if len(stmt.tables_lvl1) == 0: - # if "from" is emtpy we can't make plan - raise PlanningException("Can't find table") - - column.is_star = True - get_all_tables = True - - elif isinstance(t, ast.Identifier): - if alias is None: - alias = t.parts[-1] - - table = self.get_table_of_column(t) - if table is None: - # table is not known - get_all_tables = True - else: - column.table = table - - elif isinstance(t, ast.Constant): - if alias is None: - alias = str(t.value) - column.type = self.get_type_of_var(t.value) - elif isinstance(t, ast.Function): - # mysql function - if t.op == "connection_id": - column.type = "integer" - else: - column.type = "str" - else: - # TODO go down into lower level. - # It can be function, operation, select. - # But now show it as string - - # TODO add several known types for function, i.e ABS-int - - # TODO TypeCast - as casted type - column.type = "str" - - if alias is not None: - column.alias = alias - columns.append(column) - - # === get columns from tables === - request_tables = set() - for column in columns: - if column.table is not None: - request_tables.add(column.table.name) - - for table in stmt.tables_lvl1: - if get_all_tables or table.name in request_tables: - if table.is_predictor: - step = steps.GetPredictorColumns(namespace=table.ds, predictor=table.node) - else: - step = steps.GetTableColumns(namespace=table.ds, table=table.name) - yield step - - if step.result_data is not None: - # save results - table.columns = step.result_data.columns - table.columns_map = {column.name.upper(): column for column in step.result_data.columns} - - # === create columns list === - columns_result = [] - for i, column in enumerate(columns): - if column.is_star: - # add data from all tables - for table in stmt.tables_lvl1: - if table.columns is None: - raise PlanningException(f"Table is not found {table.name}") - - for col in table.columns: - # col = {name: 'col', type: 'str'} - column2 = Column(table=table, name=col.name) - column2.alias = col.name - column2.type = col.type - - columns_result.append(column2) - - # to next column - continue - - elif column.name is not None: - # is Identifier - if isinstance(column.name, ast.Star): - continue - col_name = column.name.upper() - if column.table is not None: - table = column.table - if table.columns_map is not None: - if col_name in table.columns_map: - column.type = table.columns_map[col_name].type - else: - # continue - raise PlanningException(f"Column not found {col_name}") - - else: - # table is not found, looking for in all tables - for table in stmt.tables_lvl1: - if table.columns_map is not None: - col = table.columns_map.get(col_name) - if col is not None: - column.type = col.type - column.table = table - break - - # forcing alias - if column.alias is None: - column.alias = f"column_{i}" - - # forcing type - if column.type is None: - column.type = "str" - - columns_result.append(column) - - # save columns - stmt.columns = columns_result - - def prepare_insert(self, query): - stmt = self.planner.statement - - # get table columns - table = self.table_from_identifier(query.table) - if table.is_predictor: - step = steps.GetPredictorColumns(namespace=table.ds, predictor=table.node) - else: - step = steps.GetTableColumns(namespace=table.ds, table=table.name) - yield step - - if step.result_data is not None: - # save results - - if len(step.result_data["tables"]) > 0: - table_info = step.result_data["tables"][0] - columns_info = step.result_data["columns"][table_info] - - table.columns = [] - table.ds = table_info[0] - for col in columns_info: - if isinstance(col, tuple): - # is predictor - col = dict(name=col[0], type="str") - table.columns.append( - Column( - name=col["name"], - type=col["type"], - ) - ) - - # map by names - table.columns_map = {i.name.upper(): i for i in table.columns} - - # save results - columns_result = [] - for col in query.columns: - col_name = col.parts[-1] - - column = Column(table=table, name=col_name) - - if table.columns_map is not None: - col = table.columns_map.get(col_name) - if col is not None: - column.type = col.type - - if column.type is None: - # forcing type - column.type = "str" - - columns_result.append(column) - - stmt.columns = columns_result - - def prepare_show(self, query): - stmt = self.planner.statement - - stmt.columns = [ - Column(name="Variable_name", type="str"), - Column(name="Value", type="str"), - ] - return [] - - def prepare_steps(self, query): - stmt = Statement() - self.planner.statement = stmt - - self.planner.query = query - - query = copy.deepcopy(query) - - params = utils.get_query_params(query) - - stmt.params = params - - # get columns - if isinstance(query, ast.Select): - # prepare select - return self.prepare_select(query) - if isinstance(query, ast.Union): - # get column definition only from select - return self.prepare_select(query.left) - if isinstance(query, ast.Insert): - # return self.prepare_insert(query) - # TODO do we need columns? - return [] - if isinstance(query, ast.Delete): - ... - # TODO do we need columns? - return [] - if isinstance(query, ast.Show): - return self.prepare_show(query) - else: - # do nothing - return [] - # raise NotImplementedError(query.__name__) - - def execute_steps(self, params=None): - # find all parameters - stmt = self.planner.statement - - # is already executed - if stmt is None: - if params is not None: - raise PlanningException("Can't execute statement") - stmt = Statement() - - # === form query with new target === - - query = self.planner.query - - if params is not None: - if len(params) != len(stmt.params): - raise PlanningException("Count of execution parameters don't match prepared statement") - - query = utils.fill_query_params(query, params) - - self.planner.query = query - - # prevent from second execution - stmt.params = None - - if ( - isinstance(query, ast.Select) - or isinstance(query, ast.Union) - or isinstance(query, ast.CreateTable) - or isinstance(query, ast.Insert) - or isinstance(query, ast.Update) - or isinstance(query, ast.Delete) - or isinstance(query, ast.Intersect) - or isinstance(query, ast.Except) - ): - return self.plan_query(query) - else: - return [] - - def plan_query(self, query): - # use v1 planner - self.planner.from_query(query) - step = None - for step in self.planner.plan.steps: - yield step diff --git a/mindsdb/api/executor/planner/step_result.py b/mindsdb/api/executor/planner/step_result.py deleted file mode 100644 index 25aa27f1c0d..00000000000 --- a/mindsdb/api/executor/planner/step_result.py +++ /dev/null @@ -1,19 +0,0 @@ -class Result: - """A placeholder for cached results of some previous plan step""" - def __init__(self, step_num): - self.step_num = step_num - - def __hash__(self): - return 'Result' + self.step_num.__hash__() - - def __eq__(self, other): - if isinstance(other, Result): - return self.step_num == other.step_num - return False - - @property - def ref_name(self): - return f'result_{self.step_num}' - - def __repr__(self): - return f'Result(step={self.step_num})' diff --git a/mindsdb/api/executor/planner/steps.py b/mindsdb/api/executor/planner/steps.py deleted file mode 100644 index 91a17aecf6a..00000000000 --- a/mindsdb/api/executor/planner/steps.py +++ /dev/null @@ -1,306 +0,0 @@ -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner.step_result import Result - - -class PlanStep: - def __init__(self, step_num=None): - self.step_num = step_num - - @property - def result(self): - if self.step_num is None: - raise PlanningException( - f"Can't reference a step with no assigned step number. Tried to reference: {type(self)}" - ) - return Result(self.step_num) - - def __eq__(self, other): - if type(self) != type(other): - return False - - for k in vars(self): - # skip result comparison - if k == "result_data": - continue - - if getattr(self, k) != getattr(other, k): - return False - - return True - - def __repr__(self): - attrs_dict = vars(self) - attrs_str = ", ".join([f"{k}={str(v)}" for k, v in attrs_dict.items()]) - return f"{self.__class__.__name__}({attrs_str})" - - def set_result(self, result): - self.result_data = result - - -class ProjectStep(PlanStep): - """Selects columns from a dataframe""" - - def __init__(self, columns, dataframe, ignore_doubles=False, *args, **kwargs): - super().__init__(*args, **kwargs) - self.columns = columns - self.dataframe = dataframe - self.ignore_doubles = ignore_doubles - - -# TODO remove -class FilterStep(PlanStep): - """Filters some dataframe according to a query""" - - def __init__(self, dataframe, query, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dataframe = dataframe - self.query = query - - -# TODO remove -class GroupByStep(PlanStep): - """Groups output by columns and computes aggregation functions""" - - def __init__(self, dataframe, columns, targets, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dataframe = dataframe - self.columns = columns - self.targets = targets - - -class JoinStep(PlanStep): - """Joins two dataframes, producing a new dataframe""" - - def __init__(self, left, right, query, *args, **kwargs): - super().__init__(*args, **kwargs) - self.left = left - self.right = right - self.query = query - - -class UnionStep(PlanStep): - """Union of two dataframes, producing a new dataframe""" - - def __init__(self, left, right, unique, operation="union", *args, **kwargs): - super().__init__(*args, **kwargs) - self.left = left - self.right = right - self.unique = unique - self.operation = operation - - -# TODO remove -class OrderByStep(PlanStep): - """Applies sorting to a dataframe""" - - def __init__(self, dataframe, order_by, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dataframe = dataframe - self.order_by = order_by - - -class LimitOffsetStep(PlanStep): - """Applies limit and offset to a dataframe""" - - def __init__(self, dataframe, limit=None, offset=None, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dataframe = dataframe - self.limit = limit - self.offset = offset - - -class FetchDataframeStep(PlanStep): - """Fetches a dataframe from external integration""" - - def __init__(self, integration, query=None, raw_query=None, params=None, *args, **kwargs): - super().__init__(*args, **kwargs) - self.integration = integration - self.query = query - self.raw_query = raw_query - self.params = params - - -class FetchDataframeStepPartition(FetchDataframeStep): - """Fetches a dataframe from external integration in partitions""" - - def __init__(self, steps=None, condition=None, *args, **kwargs): - super().__init__(*args, **kwargs) - if steps is None: - steps = [] - self.steps = steps - self.condition = condition - - -class ApplyPredictorStep(PlanStep): - """Applies a mindsdb predictor on some dataframe and returns a new dataframe with predictions""" - - def __init__( - self, - namespace, - predictor, - dataframe, - params: dict = None, - row_dict: dict = None, - columns_map: dict = None, - *args, - **kwargs, - ): - super().__init__(*args, **kwargs) - self.namespace = namespace - self.predictor = predictor - self.dataframe = dataframe - self.params = params - - # columns to add to input data, struct: {column name: value} - self.row_dict = row_dict - - # rename columns in input data, struct: {a str: b Identifier} - # renames b to a - self.columns_map = columns_map - - -class ApplyTimeseriesPredictorStep(ApplyPredictorStep): - """Applies a mindsdb predictor on some dataframe and returns a new dataframe with predictions. - Accepts an additional parameter output_time_filter that specifies for which dates the predictions should be returned - """ - - def __init__(self, *args, output_time_filter=None, **kwargs): - super().__init__(*args, **kwargs) - self.output_time_filter = output_time_filter - - -class ApplyPredictorRowStep(PlanStep): - """Applies a mindsdb predictor to one row of values and returns a dataframe of one row, the predictor.""" - - def __init__(self, namespace, predictor, row_dict, params=None, *args, **kwargs): - super().__init__(*args, **kwargs) - self.namespace = namespace - self.predictor = predictor - self.row_dict = row_dict - self.params = params - - -class GetPredictorColumns(PlanStep): - """Returns an empty dataframe of shape and columns like predictor results.""" - - def __init__(self, namespace, predictor, *args, **kwargs): - super().__init__(*args, **kwargs) - self.namespace = namespace - self.predictor = predictor - - -class GetTableColumns(PlanStep): - """Returns an empty dataframe of shape and columns like select from table.""" - - def __init__(self, namespace, table, *args, **kwargs): - super().__init__(*args, **kwargs) - self.namespace = namespace - self.table = table - - -class MapReduceStep(PlanStep): - """Applies a step for each value in a list, and then reduces results to a single dataframe""" - - def __init__(self, values, step, reduce="union", partition=None, *args, **kwargs): - """ - :param values: input step data - :param step: step to be applied - :param reduce: type of reduce to be applied - :param partition: type of partition to be applied - - - split data by chunks with equal size - - None - every record is variables to fill - """ - super().__init__(*args, **kwargs) - self.values = values - self.step = step - self.reduce = reduce - self.partition = partition - - -class MultipleSteps(PlanStep): - def __init__(self, steps, reduce=None, *args, **kwargs): - """Runs multiple steps and reduces results to a single dataframe""" - super().__init__(*args, **kwargs) - self.steps = steps - self.reduce = reduce - - -class SaveToTable(PlanStep): - def __init__(self, table, dataframe, is_replace=False, params=None, *args, **kwargs): - """ - Creates table if not exists and fills it with content of dataframe - is_replace - to drop table beforehand - """ - super().__init__(*args, **kwargs) - self.table = table - self.dataframe = dataframe - self.is_replace = is_replace - if params is None: - params = {} - self.params = params - - -class InsertToTable(PlanStep): - def __init__(self, table, dataframe=None, query=None, params=None, *args, **kwargs): - """Fills table with content of dataframe""" - super().__init__(*args, **kwargs) - self.table = table - self.dataframe = dataframe - self.query = query - if params is None: - params = {} - self.params = params - - -class CreateTableStep(PlanStep): - def __init__(self, table, columns=None, is_replace=False, *args, **kwargs): - """Fills table with content of dataframe""" - super().__init__(*args, **kwargs) - self.table = table - self.columns = columns - self.is_replace = is_replace - - -class UpdateToTable(PlanStep): - def __init__(self, table, dataframe, update_command, *args, **kwargs): - """Fills table with content of dataframe""" - super().__init__(*args, **kwargs) - self.table = table - self.dataframe = dataframe - self.update_command = update_command - - -class DeleteStep(PlanStep): - def __init__(self, table, where, *args, **kwargs): - """Fills table with content of dataframe""" - super().__init__(*args, **kwargs) - self.table = table - self.where = where - - -class SubSelectStep(PlanStep): - def __init__( - self, query, dataframe, table_name=None, add_absent_cols=False, skip_for_aggregation=False, *args, **kwargs - ): - """Performs select from dataframe""" - super().__init__(*args, **kwargs) - self.query = query - self.dataframe = dataframe - self.table_name = table_name - self.add_absent_cols = add_absent_cols - self.skip_for_aggregation = skip_for_aggregation - - -class QueryStep(PlanStep): - def __init__(self, query, from_table=None, *args, strict_where=True, **kwargs): - """Performs query using injected dataframe""" - super().__init__(*args, **kwargs) - self.query = query - self.from_table = from_table - self.strict_where = strict_where - - -class DataStep(PlanStep): - def __init__(self, data, *args, **kwargs): - super().__init__(*args, **kwargs) - self.data = data diff --git a/mindsdb/api/executor/planner/ts_utils.py b/mindsdb/api/executor/planner/ts_utils.py deleted file mode 100644 index 0bb293ec181..00000000000 --- a/mindsdb/api/executor/planner/ts_utils.py +++ /dev/null @@ -1,93 +0,0 @@ -from mindsdb_sql_parser.ast import Identifier, Operation, BinaryOperation, BetweenOperation, OrderBy - -from mindsdb.api.executor.planner.exceptions import PlanningException - - -def find_time_filter(op, time_column_name): - if not op: - return - if op.op == 'and': - left = find_time_filter(op.args[0], time_column_name) - right = find_time_filter(op.args[1], time_column_name) - if left and right: - raise PlanningException('Can provide only one filter by predictor order_by column, found two') - - return left or right - elif ((isinstance(op.args[0], Identifier) and op.args[0].parts[-1].lower() == time_column_name.lower()) - or (isinstance(op.args[1], Identifier) and op.args[1].parts[-1].lower() == time_column_name.lower())): - return op - - -def replace_time_filter(op, time_filter, new_filter): - if op == time_filter: - return new_filter - if isinstance(op, BinaryOperation): - op.args[0] = replace_time_filter(op.args[0], time_filter, new_filter) - op.args[1] = replace_time_filter(op.args[1], time_filter, new_filter) - return op - - -def find_and_remove_time_filter(op, time_filter): - if isinstance(op, BinaryOperation) or isinstance(op, BetweenOperation): - if op == time_filter: - return None - elif op.op == 'and': - # TODO maybe OR operation too? - - # next level - left_arg = find_and_remove_time_filter(op.args[0], time_filter) - right_arg = find_and_remove_time_filter(op.args[1], time_filter) - - # if found in one arg return other - if left_arg is None: - return right_arg - if right_arg is None: - return left_arg - - op.args = [left_arg, right_arg] - return op - - return op - - -def validate_ts_where_condition(op, allowed_columns, allow_and=True): - """Error if the where condition caontains invalid ops, is nested or filters on some column that's not time or partition""" - if not op: - return - allowed_ops = ['and', '>', '>=', '=', '<', '<=', 'between', 'in'] - if not allow_and: - allowed_ops.remove('and') - if op.op not in allowed_ops: - raise PlanningException( - f'For time series predictors only the following operations are allowed in WHERE: {str(allowed_ops)}, found instead: {str(op)}.') - - for arg in op.args: - if isinstance(arg, Identifier): - if arg.parts[-1].lower() not in allowed_columns: - raise PlanningException( - f'For time series predictor only the following columns are allowed in WHERE: {str(allowed_columns)}, found instead: {str(arg)}.') - # remove alias - arg.parts = [arg.parts[-1]] - - if isinstance(op.args[0], Operation): - validate_ts_where_condition(op.args[0], allowed_columns, allow_and=True) - if isinstance(op.args[1], Operation): - validate_ts_where_condition(op.args[1], allowed_columns, allow_and=True) - - -def recursively_check_join_identifiers_for_ambiguity(item, aliased_fields=None): - if item is None: - return - elif isinstance(item, Identifier): - if len(item.parts) == 1: - if aliased_fields is not None and item.parts[0] in aliased_fields: - # is alias - return - raise PlanningException(f'Ambigous identifier {str(item)}, provide table name for operations on a join.') - elif isinstance(item, Operation): - recursively_check_join_identifiers_for_ambiguity(item.args, aliased_fields=aliased_fields) - elif isinstance(item, OrderBy): - recursively_check_join_identifiers_for_ambiguity(item.field, aliased_fields=aliased_fields) - elif isinstance(item, list): - for arg in item: - recursively_check_join_identifiers_for_ambiguity(arg, aliased_fields=aliased_fields) diff --git a/mindsdb/api/executor/planner/utils.py b/mindsdb/api/executor/planner/utils.py deleted file mode 100644 index e16de9e5b94..00000000000 --- a/mindsdb/api/executor/planner/utils.py +++ /dev/null @@ -1,126 +0,0 @@ -import copy -from typing import List - -from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, Parameter -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.api.executor.planner.exceptions import PlanningException - - -def get_predictor_name_identifier(identifier): - new_identifier = copy.deepcopy(identifier) - if len(new_identifier.parts) > 1: - new_identifier.parts.pop(0) - return new_identifier - - -def disambiguate_predictor_column_identifier(identifier, predictor): - """Removes integration name from column if it's present, adds table path if it's absent""" - table_ref = predictor.alias.parts_to_str() if predictor.alias else predictor.parts_to_str() - parts = list(identifier.parts) - if parts[0] == table_ref: - parts = parts[1:] - - new_identifier = Identifier(parts=parts) - return new_identifier - - -def recursively_extract_column_values(op, row_dict, predictor): - if isinstance(op, BinaryOperation) and op.op == '=': - id = op.args[0] - value = op.args[1] - - if not ( - isinstance(id, Identifier) - and (isinstance(value, Constant) or isinstance(value, Parameter)) - ): - raise PlanningException(f'The WHERE clause for selecting from a predictor' - f' must contain pairs \'Identifier(...) = Constant(...)\',' - f' found instead: {id.to_tree()}, {value.to_tree()}') - - id = disambiguate_predictor_column_identifier(id, predictor) - - if str(id) in row_dict: - raise PlanningException(f'Multiple values provided for {str(id)}') - if isinstance(value, Constant): - value = value.value - row_dict[str(id)] = value - elif isinstance(op, BinaryOperation) and op.op == 'and': - recursively_extract_column_values(op.args[0], row_dict, predictor) - recursively_extract_column_values(op.args[1], row_dict, predictor) - else: - raise PlanningException(f'Only \'and\' and \'=\' operations allowed in WHERE clause, found: {op.to_tree()}') - - -def get_deepest_select(select): - if not select.from_table or not isinstance(select.from_table, Select): - return select - return get_deepest_select(select.from_table) - - -def convert_join_to_list(join): - # join tree to table list - - if isinstance(join.right, ast.Join): - raise NotImplementedError('Wrong join AST') - - items = [] - - if isinstance(join.left, ast.Join): - # dive to next level - items.extend(convert_join_to_list(join.left)) - else: - # this is first table - items.append(dict( - table=join.left - )) - - # all properties set to right table - items.append(dict( - table=join.right, - join_type=join.join_type, - is_implicit=join.implicit, - condition=join.condition - )) - - return items - - -def get_query_params(query): - # find all parameters - params = [] - - def params_find(node, **kwargs): - if isinstance(node, ast.Parameter): - params.append(node) - return node - - query_traversal(query, params_find) - return params - - -def fill_query_params(query, params): - - params = copy.deepcopy(params) - - def params_replace(node, **kwargs): - if isinstance(node, ast.Parameter): - value = params.pop(0) - return ast.Constant(value) - - # put parameters into query - query_traversal(query, params_replace) - - return query - - -def filters_to_bin_op(filters: List[BinaryOperation]): - # make a new where clause without params - where = None - for flt in filters: - if where is None: - where = flt - else: - where = BinaryOperation(op='and', args=[where, flt]) - return where diff --git a/mindsdb/api/executor/sql_query/__init__.py b/mindsdb/api/executor/sql_query/__init__.py deleted file mode 100644 index 4e4d507f6d1..00000000000 --- a/mindsdb/api/executor/sql_query/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .sql_query import SQLQuery diff --git a/mindsdb/api/executor/sql_query/result_set.py b/mindsdb/api/executor/sql_query/result_set.py deleted file mode 100644 index f3b22e13e63..00000000000 --- a/mindsdb/api/executor/sql_query/result_set.py +++ /dev/null @@ -1,467 +0,0 @@ -import copy -from array import array -from typing import Any, Generator - -import numpy as np -import pandas as pd -from pandas.api import types as pd_types -import sqlalchemy.types as sqlalchemy_types - -from mindsdb_sql_parser.ast import TableColumn - -from mindsdb.utilities import log -from mindsdb.utilities.types.column import Column -from mindsdb.api.executor.exceptions import WrongArgumentError -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.integrations.libs.response import TableResponse - - -logger = log.getLogger(__name__) - - -def get_mysql_data_type_from_series(series: pd.Series, do_infer: bool = False) -> MYSQL_DATA_TYPE: - """Maps pandas Series data type to corresponding MySQL data type. - - This function examines the dtype of a pandas Series and returns the appropriate - MySQL data type enum value. For object dtypes, it can optionally attempt to infer - a more specific type. - - Args: - series (pd.Series): The pandas Series to determine the MySQL type for - do_infer (bool): If True and series has object dtype, attempt to infer a more specific type - - Returns: - MYSQL_DATA_TYPE: The corresponding MySQL data type enum value - """ - dtype = series.dtype - if pd_types.is_object_dtype(dtype) and do_infer is True: - dtype = series.infer_objects().dtype - - if pd_types.is_object_dtype(dtype): - return MYSQL_DATA_TYPE.TEXT - if pd_types.is_datetime64_dtype(dtype): - return MYSQL_DATA_TYPE.DATETIME - if pd_types.is_string_dtype(dtype): - return MYSQL_DATA_TYPE.TEXT - if pd_types.is_bool_dtype(dtype): - return MYSQL_DATA_TYPE.BOOL - if pd_types.is_integer_dtype(dtype): - return MYSQL_DATA_TYPE.INT - if pd_types.is_numeric_dtype(dtype): - return MYSQL_DATA_TYPE.FLOAT - return MYSQL_DATA_TYPE.TEXT - - -def _dump_vector(value: Any) -> Any: - if isinstance(value, array): - return value.tolist() - return value - - -def rename_df_columns(df: pd.DataFrame, names: list | None = None) -> None: - """Inplace rename of dataframe columns - - Args: - df (pd.DataFrame): dataframe - names (Optional[List]): columns names to set - """ - if names is not None: - df.columns = names - else: - df.columns = list(range(len(df.columns))) - - -class ResultSet: - def __init__( - self, - columns: list[Column] | None = None, - values: list[list] | None = None, - df: pd.DataFrame | None = None, - affected_rows: int | None = None, - is_prediction: bool = False, - mysql_types: list[MYSQL_DATA_TYPE] | None = None, - table_response: TableResponse = None, - ): - """ - Args: - columns: list of Columns - values (List[List]): data of resultSet, have to be list of lists with length equal to column - df (pd.DataFrame): injected dataframe, have to have enumerated columns and length equal to columns - affected_rows (int): number of affected rows - """ - self._table_response: TableResponse = table_response - if table_response: - self._columns = table_response.columns - elif columns is None: - self._columns = [] - else: - self._columns = columns - - if df is None: - if values is None: - df = None - else: - df = pd.DataFrame(values) - self._df = df - - self.affected_rows = affected_rows - - self.is_prediction = is_prediction - - self.mysql_types = mysql_types - - def __repr__(self): - col_names = ", ".join([col.name for col in self._columns]) - - if self._table_response is not None: - return f"{self.__class__.__name__}(table response, cols: {col_names})" - return f"{self.__class__.__name__}({self.length()} rows, cols: {col_names})" - - def __len__(self) -> int: - self._load_table_response() - if self._df is None: - return 0 - return len(self._df) - - def __getitem__(self, slice_val): - # return resultSet with sliced dataframe - self._load_table_response() - df = self._df[slice_val] - return ResultSet(columns=self.columns, df=df) - - # --- converters --- - - @classmethod - def from_df( - cls, - df: pd.DataFrame, - database=None, - table_name=None, - table_alias=None, - is_prediction: bool = False, - mysql_types: list[MYSQL_DATA_TYPE] | None = None, - ): - match mysql_types: - case None: - mysql_types = [None] * len(df.columns) - case list() if len(mysql_types) != len(df.columns): - raise WrongArgumentError(f"Mysql types length mismatch: {len(mysql_types)} != {len(df.columns)}") - - columns = [ - Column(name=column_name, table_name=table_name, table_alias=table_alias, database=database, type=mysql_type) - for column_name, mysql_type in zip(df.columns, mysql_types) - ] - - rename_df_columns(df) - return cls(df=df, columns=columns, is_prediction=is_prediction, mysql_types=mysql_types) - - @classmethod - def from_table_response(cls, table_response): - return cls(table_response=table_response) - - @classmethod - def from_df_cols(cls, df: pd.DataFrame, columns_dict: dict[str, Column], strict: bool = True) -> "ResultSet": - """Create ResultSet from dataframe and dictionary of columns - - Args: - df (pd.DataFrame): dataframe - columns_dict (dict[str, Column]): dictionary of columns - strict (bool): if True, raise an error if a column is not found in columns_dict - - Returns: - ResultSet: result set - - Raises: - ValueError: if a column is not found in columns_dict and strict is True - """ - alias_idx = {column.alias: column for column in columns_dict.values() if column.alias is not None} - - columns = [] - for column_name in df.columns: - if strict and column_name not in columns_dict: - raise ValueError(f"Column {column_name} not found in columns_dict") - column = columns_dict.get(column_name) or alias_idx.get(column_name) or Column(name=column_name) - columns.append(column) - - rename_df_columns(df) - - return cls(columns=columns, df=df) - - def to_df(self): - columns_names = self.get_column_names() - df = self.get_raw_df() - rename_df_columns(df, columns_names) - return df - - def to_df_cols(self, prefix: str = "") -> tuple[pd.DataFrame, dict[str, Column]]: - # returns dataframe and dict of columns - # can be restored to ResultSet by from_df_cols method - - columns = [] - col_names = {} - for col in self._columns: - name = col.get_hash_name(prefix) - columns.append(name) - col_names[name] = col - - df = self.get_raw_df() - rename_df_columns(df, columns) - return df, col_names - - # --- tables --- - - def get_tables(self): - tables_idx = [] - tables = [] - cols = ["database", "table_name", "table_alias"] - for col in self._columns: - table = (col.database, col.table_name, col.table_alias) - if table not in tables_idx: - tables_idx.append(table) - tables.append(dict(zip(cols, table))) - return tables - - # --- columns --- - - def get_col_index(self, col): - """ - Get column index - :param col: column object - :return: index of column - """ - - col_idx = None - for i, col0 in enumerate(self._columns): - if col0 is col: - col_idx = i - break - if col_idx is None: - raise WrongArgumentError(f"Column is not found: {col}") - return col_idx - - def add_column(self, col, values=None): - self._load_table_response() - self._columns.append(col) - - col_idx = len(self._columns) - 1 - if self._df is not None: - self._df[col_idx] = values - return col_idx - - def del_column(self, col): - self._load_table_response() - idx = self.get_col_index(col) - self._columns.pop(idx) - - self._df.drop(idx, axis=1, inplace=True) - rename_df_columns(self._df) - - @property - def columns(self): - return self._columns - - def get_column_names(self): - columns = [col.name if col.alias is None else col.alias for col in self._columns] - return columns - - def find_columns(self, alias=None, table_alias=None): - col_list = [] - for col in self.columns: - if alias is not None and col.alias.lower() != alias.lower(): - continue - if table_alias is not None and col.table_alias.lower() != table_alias.lower(): - continue - col_list.append(col) - - return col_list - - def copy_column_to(self, col, result_set2): - # copy with values - idx = self.get_col_index(col) - - values = [row[idx] for row in self._records] - - col2 = copy.deepcopy(col) - - result_set2.add_column(col2, values) - return col2 - - def set_col_type(self, col_idx, type_name): - self._load_table_response() - self.columns[col_idx].type = type_name - if self._df is not None: - self._df[col_idx] = self._df[col_idx].astype(type_name) - - # --- records --- - - def _load_table_response(self): - """Fully load the table response by fetching all data from the table response and storing it in the _df attribute.""" - if self._table_response is None: - return - - self._table_response.fetchall() - if self._df is None: - self._df = self._table_response._data - else: - self._df = pd.concat([self._df, self._table_response._data]) - self._table_response = None - - def stream_data(self) -> Generator[pd.DataFrame, None, None]: - """Stream data from the result set. - - Yields: - pd.DataFrame: Data frame. - """ - if self._df is not None: - yield self._df - else: - for el in self._table_response.iterate_no_save(): - yield el - - def get_raw_df(self): - self._load_table_response() - names = range(len(self._columns)) - if self._df is None: - return pd.DataFrame([], columns=names) - self._df.columns = names - return self._df - - def add_raw_df(self, df): - if len(df.columns) != len(self._columns): - raise WrongArgumentError(f"Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}") - self._load_table_response() - - rename_df_columns(df) - - if self._df is None: - self._df = df - else: - rename_df_columns(self._df) - self._df = pd.concat([self._df, df], ignore_index=True) - - def add_raw_values(self, values): - # If some values are None, the DataFrame could have incorrect integer types, since 'NaN' is technically a float, so it will convert ints to floats automatically. - df = pd.DataFrame(values).convert_dtypes( - convert_integer=True, - convert_floating=True, - infer_objects=False, - convert_string=False, - convert_boolean=False, - ) - self.add_raw_df(df) - - def get_ast_columns(self) -> list[TableColumn]: - """Converts ResultSet columns to a list of TableColumn objects with SQLAlchemy types. - - This method processes each column in the ResultSet, determines its MySQL data type - (inferring it if necessary), and maps it to the appropriate SQLAlchemy type. - The resulting TableColumn objects most likely will be used in CREATE TABLE statement. - - Returns: - list[TableColumn]: A list of TableColumn objects with properly mapped SQLAlchemy types - """ - columns: list[TableColumn] = [] - self._load_table_response() - - type_mapping = { - MYSQL_DATA_TYPE.TINYINT: sqlalchemy_types.INTEGER, - MYSQL_DATA_TYPE.SMALLINT: sqlalchemy_types.INTEGER, - MYSQL_DATA_TYPE.MEDIUMINT: sqlalchemy_types.INTEGER, - MYSQL_DATA_TYPE.INT: sqlalchemy_types.INTEGER, - MYSQL_DATA_TYPE.BIGINT: sqlalchemy_types.INTEGER, - MYSQL_DATA_TYPE.YEAR: sqlalchemy_types.INTEGER, - MYSQL_DATA_TYPE.BOOL: sqlalchemy_types.BOOLEAN, - MYSQL_DATA_TYPE.BOOLEAN: sqlalchemy_types.BOOLEAN, - MYSQL_DATA_TYPE.FLOAT: sqlalchemy_types.FLOAT, - MYSQL_DATA_TYPE.DOUBLE: sqlalchemy_types.FLOAT, - MYSQL_DATA_TYPE.TIME: sqlalchemy_types.Time, - MYSQL_DATA_TYPE.DATE: sqlalchemy_types.Date, - MYSQL_DATA_TYPE.DATETIME: sqlalchemy_types.DateTime, - MYSQL_DATA_TYPE.TIMESTAMP: sqlalchemy_types.TIMESTAMP, - } - - for i, column in enumerate(self._columns): - column_type: MYSQL_DATA_TYPE | None = column.type - - # infer MYSQL_DATA_TYPE if not set - if isinstance(column_type, MYSQL_DATA_TYPE) is False: - if column_type is not None: - logger.warning(f"Unexpected column type: {column_type}") - if self._df is None: - column_type = MYSQL_DATA_TYPE.TEXT - else: - column_type = get_mysql_data_type_from_series(self._df.iloc[:, i]) - - sqlalchemy_type = type_mapping.get(column_type, sqlalchemy_types.TEXT) - - columns.append(TableColumn(name=column.alias, type=sqlalchemy_type)) - return columns - - def to_lists(self, json_types=False): - """ - :param type_cast: cast numpy types - array->list, datetime64->str - :return: list of lists - """ - self._load_table_response() - - if len(self.get_raw_df()) == 0: - return [] - # output for APIs. simplify types - if json_types: - df = self.get_raw_df().copy() - for name, dtype in df.dtypes.to_dict().items(): - if pd.api.types.is_datetime64_any_dtype(dtype): - df[name] = df[name].dt.strftime("%Y-%m-%d %H:%M:%S.%f") - for i, column in enumerate(self.columns): - if column.type == MYSQL_DATA_TYPE.VECTOR: - df[i] = df[i].apply(_dump_vector) - df.replace({np.nan: None}, inplace=True) - return df.to_records(index=False).tolist() - - # slower but keep timestamp type - df = self._df.replace({np.nan: None}) # TODO rework - return df.to_dict("split")["data"] - - def get_column_values(self, col_idx): - # get by column index - df = self.get_raw_df() - return list(df[df.columns[col_idx]]) - - def set_column_values(self, col_name, values): - # values is one value or list of values - self._load_table_response() - cols = self.find_columns(col_name) - if len(cols) == 0: - col_idx = self.add_column(Column(name=col_name)) - else: - col_idx = self.get_col_index(cols[0]) - - if self._df is not None: - self._df[col_idx] = values - - def add_from_result_set(self, rs): - source_names = rs.get_column_names() - - col_sequence = [] - for name in self.get_column_names(): - col_sequence.append(source_names.index(name)) - - raw_df = rs.get_raw_df().iloc[:, col_sequence] - - self.add_raw_df(raw_df) - - @property - def records(self): - return list(self.get_records()) - - def get_records(self): - # get records as dicts. - # !!! Attention: !!! - # if resultSet contents duplicate column name: only one of them will be in output - names = self.get_column_names() - for row in self.to_lists(): - yield dict(zip(names, row)) - - def length(self): - return len(self) diff --git a/mindsdb/api/executor/sql_query/sql_query.py b/mindsdb/api/executor/sql_query/sql_query.py deleted file mode 100644 index 0ec9e58a872..00000000000 --- a/mindsdb/api/executor/sql_query/sql_query.py +++ /dev/null @@ -1,372 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import inspect -from textwrap import dedent -from typing import Union, Dict - -import pandas as pd -from mindsdb_sql_parser import parse_sql, ASTNode - -from mindsdb.api.executor.planner.steps import ( - ApplyTimeseriesPredictorStep, - ApplyPredictorRowStep, - ApplyPredictorStep, - InsertToTable, - FetchDataframeStepPartition, -) - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.api.executor.planner import query_planner - -from mindsdb.api.executor.utilities.sql import get_query_models -from mindsdb.interfaces.model.functions import get_model_record, get_project_record -from mindsdb.api.executor.exceptions import ( - BadTableError, - UnknownError, - LogicError, -) -from mindsdb.interfaces.query_context.context_controller import query_context_controller -import mindsdb.utilities.profiler as profiler -from mindsdb.utilities.fs import create_process_mark, delete_process_mark -from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.types.column import Column - -from . import steps -from .result_set import ResultSet -from .steps.base import BaseStepCall - - -class SQLQuery: - step_handlers = {} - - def __init__( - self, - sql: Union[ASTNode, str], - session, - execute: bool = True, - database: str = None, - query_id: int = None, - stop_event=None, - ): - self.session = session - - self.query_id = query_id - if self.query_id is not None: - # get sql and database from resumed query - run_query = query_context_controller.get_query(self.query_id) - sql = run_query.sql - database = run_query.database - - if database is not None: - self.database = database - else: - self.database = session.database - - # Handle None or empty database - convert to None for context - if self.database is None or self.database == "": - db_context = None - else: - db_context = self.database.lower() - self.context = {"database": db_context, "row_id": 0} - - self.columns_list = None - self.steps_data: Dict[int, ResultSet] = {} - - self.planner: query_planner.QueryPlanner = None - self.parameters = [] - self.fetched_data: ResultSet = None - - self.outer_query = None - self.run_query = None - self.stop_event = stop_event - - if isinstance(sql, str): - self.query = parse_sql(sql) - self.context["query_str"] = sql - else: - self.query = sql - renderer = SqlalchemyRender("mysql") - try: - self.context["query_str"] = renderer.get_string(self.query, with_failback=True) - except Exception: - self.context["query_str"] = str(self.query) - - self.create_planner() - - if execute: - self.execute_query() - - @classmethod - def register_steps(cls): - cls.step_handlers = {} - for _, cl in inspect.getmembers(steps): - if inspect.isclass(cl) and issubclass(cl, BaseStepCall): - if cl.bind is not None: - step_name = cl.bind.__name__ - cls.step_handlers[step_name] = cl - - @profiler.profile() - def create_planner(self): - databases = self.session.database_controller.get_list() - - predictor_metadata = [] - kb_metadata = {} - - query_tables = get_query_models(self.query, default_database=self.database) - - for project_name, table_name, table_version in query_tables: - project = get_project_record(project_name) - if project is None: - continue - - # check if KB - kb = self.session.kb_controller.get(table_name, project.id) - if kb is not None: - params = kb.params.copy() - vector_db = self.session.integration_controller.get_by_id(kb.vector_database_id) - params["vector_db_engine"] = vector_db.get("engine") if vector_db is not None else None - kb_metadata[(project_name, table_name)] = params - - args = {"name": table_name, "project_name": project_name} - if table_version is not None: - args["active"] = None - args["version"] = table_version - - model_record = get_model_record(**args) - if model_record is None: - # check if it is an agent - try: - agent = self.session.agents_controller.get_agent(table_name, project_name) - except EntityNotExistsError: - continue - if agent is not None: - predictor = { - "name": table_name, - "integration_name": project_name, # integration_name, - "timeseries": False, - "id": agent.id, - "to_predict": "answer", - } - predictor_metadata.append(predictor) - - continue - - if model_record.status == "error": - dot_version_str = "" - and_version_str = "" - if table_version is not None: - dot_version_str = f".{table_version}" - and_version_str = f" and version = {table_version}" - - raise BadTableError( - dedent(f"""\ - The model '{table_name}{dot_version_str}' cannot be used as it is currently in 'error' status. - For detailed information about the error, please execute the following command: - - select error from information_schema.models where name = '{table_name}'{and_version_str}; - """) - ) - - ts_settings = model_record.learn_args.get("timeseries_settings", {}) - predictor = { - "name": table_name, - "integration_name": project_name, # integration_name, - "timeseries": False, - "id": model_record.id, - "to_predict": model_record.to_predict, - } - if ts_settings.get("is_timeseries") is True: - window = ts_settings.get("window") - order_by = ts_settings.get("order_by") - if isinstance(order_by, list): - order_by = order_by[0] - group_by = ts_settings.get("group_by") - if isinstance(group_by, list) is False and group_by is not None: - group_by = [group_by] - predictor.update( - { - "timeseries": True, - "window": window, - "horizon": ts_settings.get("horizon"), - "order_by_column": order_by, - "group_by_columns": group_by, - } - ) - - predictor["model_types"] = model_record.dtype_dict or {} - - predictor_metadata.append(predictor) - - database = None if self.database == "" else self.database.lower() - - self.context["predictor_metadata"] = predictor_metadata - self.planner = query_planner.QueryPlanner( - self.query, - integrations=databases, - predictor_metadata=predictor_metadata, - default_namespace=database, - kb_metadata=kb_metadata, - ) - - def prepare_query(self): - """it is prepared statement call""" - try: - for step in self.planner.prepare_steps(self.query): - data = self.execute_step(step) - step.set_result(data) - self.steps_data[step.step_num] = data - except PlanningException as e: - raise LogicError(e) from e - - statement_info = self.planner.get_statement_info() - - self.columns_list = [] - for col in statement_info["columns"]: - self.columns_list.append( - Column( - database=col["ds"], - table_name=col["table_name"], - table_alias=col["table_alias"], - name=col["name"], - alias=col["alias"], - type=col["type"], - ) - ) - - self.parameters = [ - Column(name=col["name"], alias=col["alias"], type=col["type"]) for col in statement_info["parameters"] - ] - - def execute_query(self): - if self.fetched_data is not None: - # no need to execute - return - - try: - steps = list(self.planner.execute_steps()) - except PlanningException as e: - raise LogicError(e) from e - - # -- a plan with failback -- - if self.planner.plan.probe_query is not None: - try: - probe_query = self.planner.plan.probe_query - SQLQuery(probe_query["query"], session=self.session, database=probe_query["database"]) - except Exception: - # switch to failback plan - self.planner.plan = self.planner.plan.failback_plan - steps = self.planner.plan.steps - - if self.planner.plan.is_resumable: - # create query - if self.query_id is not None: - self.run_query = query_context_controller.get_query(self.query_id) - else: - self.run_query = query_context_controller.create_query( - self.context["query_str"], database=self.database - ) - - if self.planner.plan.is_async and ctx.task_id is None: - # release KB locks before inserting in background - db_released, partition_params = self.release_kb_lock(steps) - if db_released: - # faiss db is used as a table to insert - if partition_params.get("threads", 1) > 1: - raise ValueError( - "It is not possible to use threads for FAISS knowledge base, " - f"please remove `threads={partition_params['threads']}` parameter" - ) - - # add to task - self.run_query.add_to_task() - # return query info - # columns in upper case - rec = {k.upper(): v for k, v in self.run_query.get_info().items()} - self.fetched_data = ResultSet.from_df(pd.DataFrame([rec])) - self.columns_list = self.fetched_data.columns - return - self.run_query.mark_as_run() - - ctx.run_query_id = self.run_query.record.id - - step_result: list[ResultSet] = None - process_mark = None - try: - steps_classes = (x.__class__ for x in steps) - predict_steps = (ApplyPredictorRowStep, ApplyPredictorStep, ApplyTimeseriesPredictorStep) - if any(s in predict_steps for s in steps_classes): - process_mark = create_process_mark("predict") - for step in steps: - with profiler.Context(f"step: {step.__class__.__name__}"): - step_result = self.execute_step(step) - self.steps_data[step.step_num] = step_result - except Exception as e: - if self.run_query is not None: - # set error and place where it stopped - self.run_query.on_error(e, step.step_num if "step" in locals() else -1, self.steps_data) - raise e - else: - # mark running query as completed - if self.run_query is not None: - self.run_query.finish() - ctx.run_query_id = None - finally: - if process_mark is not None: - delete_process_mark("predict", process_mark) - - # save updated query - self.query = self.planner.query - - # there was no executing - if len(self.steps_data) == 0: - return - - self.fetched_data = step_result - - try: - if self.columns_list is None: - self.columns_list = self.fetched_data.columns - - for col in self.fetched_data.find_columns("__mindsdb_row_id"): - self.fetched_data.del_column(col) - - except Exception as e: - raise UnknownError("error in column list step") from e - - def execute_step(self, step, steps_data=None): - cls_name = step.__class__.__name__ - handler = self.step_handlers.get(cls_name) - if handler is None: - raise UnknownError(f"Unknown step: {cls_name}") - - return handler(self, steps_data=steps_data).call(step) - - def release_kb_lock(self, steps): - # find knowledge bases that are used as tables to insert. - # then release locks of vector for these knowledge bases - # return partition step params and databases names that were unlocked - db_released, partition_params = [], {} - for step in steps: - if isinstance(step, InsertToTable): - db_name = self.session.kb_controller.release_lock(step.table, project_name=self.database) - if db_name: - db_released.append(db_name) - if isinstance(step, FetchDataframeStepPartition): - dbs, _ = self.release_kb_lock(step.steps) - db_released.extend(dbs) - partition_params.update(step.params) - return db_released, partition_params - - -SQLQuery.register_steps() diff --git a/mindsdb/api/executor/sql_query/steps/__init__.py b/mindsdb/api/executor/sql_query/steps/__init__.py deleted file mode 100644 index ba4bd523ff6..00000000000 --- a/mindsdb/api/executor/sql_query/steps/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from .apply_predictor_step import ApplyPredictorStepCall, ApplyPredictorRowStepCall, ApplyTimeseriesPredictorStepCall -from .delete_step import DeleteStepCall -from .fetch_dataframe import FetchDataframeStepCall -from .fetch_dataframe_partition import FetchDataframePartitionCall -from .insert_step import InsertToTableCall, SaveToTableCall, CreateTableCall -from .join_step import JoinStepCall -from .map_reduce_step import MapReduceStepCall -from .multiple_step import MultipleStepsCall -from .prepare_steps import GetPredictorColumnsCall, GetTableColumnsCall -from .project_step import ProjectStepCall -from .sql_steps import LimitOffsetStepCall, DataStepCall -from .subselect_step import SubSelectStepCall, QueryStepCall -from .union_step import UnionStepCall -from .update_step import UpdateToTableCall diff --git a/mindsdb/api/executor/sql_query/steps/apply_predictor_step.py b/mindsdb/api/executor/sql_query/steps/apply_predictor_step.py deleted file mode 100644 index a12e56f80fb..00000000000 --- a/mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +++ /dev/null @@ -1,414 +0,0 @@ -import datetime as dt -import re - -import pandas as pd - -from mindsdb_sql_parser.ast import ( - BinaryOperation, - Identifier, - Constant, - BetweenOperation, - Parameter, -) -from mindsdb_sql_parser.ast.mindsdb import Latest - -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import ( - ApplyTimeseriesPredictorStep, - ApplyPredictorRowStep, - ApplyPredictorStep, -) - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities.types.column import Column -from mindsdb.utilities.cache import get_cache, dataframe_checksum - -from .base import BaseStepCall - - -def get_preditor_alias(step, mindsdb_database): - predictor_name = ".".join(step.predictor.parts) - predictor_alias = ".".join(step.predictor.alias.parts) if step.predictor.alias is not None else predictor_name - return (mindsdb_database, predictor_name, predictor_alias) - - -class ApplyPredictorBaseCall(BaseStepCall): - def apply_predictor(self, project_name, predictor_name, df, version, params): - # is it an agent? - agent = self.session.agents_controller.get_agent(predictor_name, project_name) - if agent is not None: - messages = df.to_dict("records") - if params is None: - params = {} - if "query_str" in self.context: - params["original_query"] = self.context["query_str"] - - predictions = self.session.agents_controller.get_completion( - agent, messages=messages, project_name=project_name, params=params - ) - - else: - project_datanode = self.session.datahub.get(project_name) - predictions = project_datanode.predict(model_name=predictor_name, df=df, version=version, params=params) - return predictions - - -class ApplyPredictorRowStepCall(ApplyPredictorBaseCall): - bind = ApplyPredictorRowStep - - def call(self, step): - project_name = step.namespace - predictor_name = step.predictor.parts[0] - where_data0 = step.row_dict - project_datanode = self.session.datahub.get(project_name) - - # fill params - where_data = {} - for key, value in where_data0.items(): - if isinstance(value, Parameter): - rs = self.steps_data[value.value.step_num] - if rs.length() == 1: - # one value, don't do list - value = rs.get_column_values(col_idx=0)[0] - else: - value = rs.get_column_values(col_idx=0) - where_data[key] = value - - version = None - if len(step.predictor.parts) > 1 and step.predictor.parts[-1].isdigit(): - version = int(step.predictor.parts[-1]) - - df = pd.DataFrame([where_data]) - predictions = self.apply_predictor(project_name, predictor_name, df, version, step.params) - - # update predictions with input data - for k, v in where_data.items(): - predictions[k] = v - - table_name = get_preditor_alias(step, self.context.get("database")) - - if len(predictions) == 0: - columns_names = project_datanode.get_table_columns_names(predictor_name) - predictions = pd.DataFrame([], columns=columns_names) - - return ResultSet.from_df( - df=predictions, - database=table_name[0], - table_name=table_name[1], - table_alias=table_name[2], - is_prediction=True, - ) - - -class ApplyPredictorStepCall(ApplyPredictorBaseCall): - bind = ApplyPredictorStep - - def call(self, step): - # set row_id - data = self.steps_data[step.dataframe.step_num] - - params = step.params or {} - - # adding __mindsdb_row_id, use first table if exists - if len(data.find_columns("__mindsdb_row_id")) == 0: - table = data.get_tables()[0] if len(data.get_tables()) > 0 else None - - row_id_col = Column( - name="__mindsdb_row_id", - database=table["database"] if table is not None else None, - table_name=table["table_name"] if table is not None else None, - table_alias=table["table_alias"] if table is not None else None, - ) - - row_id = self.context.get("row_id") - values = range(row_id, row_id + data.length()) - data.add_column(row_id_col, values) - self.context["row_id"] += data.length() - - project_name = step.namespace - predictor_name = step.predictor.parts[0] - - # add constants from where - if step.row_dict is not None: - for k, v in step.row_dict.items(): - if isinstance(v, Result): - prev_result = self.steps_data[v.step_num] - # TODO we await only one value: model.param = (subselect) - v = prev_result.get_column_values(col_idx=0)[0] - data.set_column_values(k, v) - - predictor_metadata = {} - for pm in self.context["predictor_metadata"]: - if pm["name"] == predictor_name and pm["integration_name"].lower() == project_name: - predictor_metadata = pm - break - is_timeseries = predictor_metadata["timeseries"] - _mdb_forecast_offset = None - if is_timeseries: - if "> LATEST" in self.context["query_str"]: - # stream mode -- if > LATEST, forecast starts on inferred next timestamp - _mdb_forecast_offset = 1 - elif "= LATEST" in self.context["query_str"]: - # override: when = LATEST, forecast starts on last provided timestamp instead of inferred next time - _mdb_forecast_offset = 0 - else: - # normal mode -- emit a forecast ($HORIZON data points on each) for each provided timestamp - params["force_ts_infer"] = True - _mdb_forecast_offset = None - - data.add_column(Column(name="__mdb_forecast_offset"), _mdb_forecast_offset) - - table_name = get_preditor_alias(step, self.context["database"]) - - project_datanode = self.session.datahub.get(project_name) - if len(data) == 0: - columns_names = project_datanode.get_table_columns_names(predictor_name) + ["__mindsdb_row_id"] - result = ResultSet(is_prediction=True) - for column_name in columns_names: - result.add_column( - Column( - name=column_name, database=table_name[0], table_name=table_name[1], table_alias=table_name[2] - ) - ) - else: - predictor_id = predictor_metadata["id"] - table_df = data.to_df() - - if self.session.predictor_cache is not False: - key = f"{predictor_name}_{predictor_id}_{dataframe_checksum(table_df)}" - - predictor_cache = get_cache("predict") - predictions = predictor_cache.get(key) - else: - predictions = None - - if predictions is None: - # handle columns mapping to model - if step.columns_map is not None: - # step.columns_map is {str: Identifier} - - cols_to_rename = {} - for model_col, table_col in step.columns_map.items(): - if len(table_col.parts) != 2: - continue - tbl_name, col_name = table_col.parts - data_cols = data.find_columns(col_name, table_alias=tbl_name) - if len(data_cols) == 0: - continue - # add first found column to rename list - cols_to_rename[data.get_col_index(data_cols[0])] = model_col - # update input data - if cols_to_rename: - columns = list(table_df.columns) - for col_idx, name in cols_to_rename.items(): - columns[col_idx] = name - table_df.columns = columns - - version = None - if len(step.predictor.parts) > 1 and step.predictor.parts[-1].isdigit(): - version = int(step.predictor.parts[-1]) - predictions = self.apply_predictor(project_name, predictor_name, table_df, version, params) - - if self.session.predictor_cache is not False: - if predictions is not None and isinstance(predictions, pd.DataFrame): - predictor_cache.set(key, predictions) - - # apply filter - if is_timeseries: - pred_data = predictions.to_dict(orient="records") - where_data = list(data.get_records()) - pred_data = self.apply_ts_filter(pred_data, where_data, step, predictor_metadata) - predictions = pd.DataFrame(pred_data) - - result = ResultSet.from_df( - predictions, - database=table_name[0], - table_name=table_name[1], - table_alias=table_name[2], - is_prediction=True, - ) - - return result - - def apply_ts_filter(self, predictor_data, table_data, step, predictor_metadata): - if step.output_time_filter is None: - # no filter, exit - return predictor_data - - # apply filter - group_cols = predictor_metadata["group_by_columns"] - order_col = predictor_metadata["order_by_column"] - - filter_args = step.output_time_filter.args - filter_op = step.output_time_filter.op - - # filter field must be order column - if not (isinstance(filter_args[0], Identifier) and filter_args[0].parts[-1] == order_col): - # exit otherwise - return predictor_data - - def get_date_format(samples): - # Try common formats first with explicit patterns - for date_format, pattern in ( - ("%Y-%m-%d", r"[\d]{4}-[\d]{2}-[\d]{2}"), - ("%Y-%m-%d %H:%M:%S", r"[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}"), - # ('%Y-%m-%d %H:%M:%S%z', r'[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}\+[\d]{2}:[\d]{2}'), - # ('%Y', '[\d]{4}') - ): - if re.match(pattern, samples[0]): - # suggested format - for sample in samples: - try: - dt.datetime.strptime(sample, date_format) - except ValueError: - date_format = None - break - if date_format is not None: - return date_format - - # Use dateparser as fallback and infer format - try: - # Parse the first sample to get its format - # The import is heavy, so we do it here on-demand - import dateparser - - parsed_date = dateparser.parse(samples[0]) - if parsed_date is None: - raise ValueError("Could not parse date") - - # Verify the format works for all samples - for sample in samples[1:]: - if dateparser.parse(sample) is None: - raise ValueError("Inconsistent date formats in samples") - # Convert to strftime format based on the input - if re.search(r"\d{2}:\d{2}:\d{2}", samples[0]): - return "%Y-%m-%d %H:%M:%S" - return "%Y-%m-%d" - except (ValueError, AttributeError): - # If dateparser fails, return a basic format as last resort - return "%Y-%m-%d" - - model_types = predictor_metadata["model_types"] - if model_types.get(order_col) in ("float", "integer"): - # convert strings to digits - fnc = {"integer": int, "float": float}[model_types[order_col]] - - # convert predictor_data - if len(predictor_data) > 0: - if isinstance(predictor_data[0][order_col], str): - for row in predictor_data: - row[order_col] = fnc(row[order_col]) - elif isinstance(predictor_data[0][order_col], dt.date): - # convert to datetime - for row in predictor_data: - row[order_col] = fnc(row[order_col]) - - # convert predictor_data - if isinstance(table_data[0][order_col], str): - for row in table_data: - row[order_col] = fnc(row[order_col]) - elif isinstance(table_data[0][order_col], dt.date): - # convert to datetime - for row in table_data: - row[order_col] = fnc(row[order_col]) - - # convert args to date - samples = [arg.value for arg in filter_args if isinstance(arg, Constant) and isinstance(arg.value, str)] - if len(samples) > 0: - for arg in filter_args: - if isinstance(arg, Constant) and isinstance(arg.value, str): - arg.value = fnc(arg.value) - - if model_types.get(order_col) in ("date", "datetime") or isinstance(predictor_data[0][order_col], pd.Timestamp): # noqa - # convert strings to date - # it is making side effect on original data by changing it but let it be - - def _cast_samples(data, order_col): - if isinstance(data[0][order_col], str): - samples = [row[order_col] for row in data] - date_format = get_date_format(samples) - - for row in data: - row[order_col] = dt.datetime.strptime(row[order_col], date_format) - elif isinstance(data[0][order_col], dt.datetime): - pass # check because dt.datetime is instance of dt.date but here we don't need to add HH:MM:SS - elif isinstance(data[0][order_col], dt.date): - # convert to datetime - for row in data: - row[order_col] = dt.datetime.combine(row[order_col], dt.datetime.min.time()) - - # convert predictor_data - if len(predictor_data) > 0: - _cast_samples(predictor_data, order_col) - - # convert table data - _cast_samples(table_data, order_col) - - # convert args to date - samples = [arg.value for arg in filter_args if isinstance(arg, Constant) and isinstance(arg.value, str)] - if len(samples) > 0: - date_format = get_date_format(samples) - - for arg in filter_args: - if isinstance(arg, Constant) and isinstance(arg.value, str): - arg.value = dt.datetime.strptime(arg.value, date_format) - # TODO can be dt.date in args? - - # first pass: get max values for Latest in table data - latest_vals = {} - if Latest() in filter_args: - for row in table_data: - if group_cols is None: - key = 0 # the same for any value - else: - key = tuple([str(row[i]) for i in group_cols]) - val = row[order_col] - if key not in latest_vals or latest_vals[key] < val: - latest_vals[key] = val - - # second pass: do filter rows - data2 = [] - for row in predictor_data: - val = row[order_col] - - if isinstance(step.output_time_filter, BetweenOperation): - if val >= filter_args[1].value and val <= filter_args[2].value: - data2.append(row) - elif isinstance(step.output_time_filter, BinaryOperation): - op_map = { - "<": "__lt__", - "<=": "__le__", - ">": "__gt__", - ">=": "__ge__", - "=": "__eq__", - } - arg = filter_args[1] - if isinstance(arg, Latest): - if group_cols is None: - key = 0 # the same for any value - else: - key = tuple([str(row[i]) for i in group_cols]) - if key not in latest_vals: - # pass this row - continue - arg = latest_vals[key] - elif isinstance(arg, Constant): - arg = arg.value - - if filter_op not in op_map: - # unknown operation, exit immediately - return predictor_data - - # check condition - filter_op2 = op_map[filter_op] - if getattr(val, filter_op2)(arg): - data2.append(row) - else: - # unknown operation, add anyway - data2.append(row) - - return data2 - - -class ApplyTimeseriesPredictorStepCall(ApplyPredictorStepCall): - bind = ApplyTimeseriesPredictorStep diff --git a/mindsdb/api/executor/sql_query/steps/base.py b/mindsdb/api/executor/sql_query/steps/base.py deleted file mode 100644 index 8ae929bb483..00000000000 --- a/mindsdb/api/executor/sql_query/steps/base.py +++ /dev/null @@ -1,21 +0,0 @@ - -class BaseStepCall: - bind = None - - def __init__(self, sql_query, steps_data=None): - if steps_data is None: - steps_data = sql_query.steps_data - self.steps_data = steps_data - - self.sql_query = sql_query - self.context = sql_query.context - self.session = sql_query.session - - def set_columns_list(self, columns_list): - self.sql_query.columns_list = columns_list - - def get_columns_list(self): - return self.sql_query.columns_list - - def call(self, step): - raise NotImplementedError diff --git a/mindsdb/api/executor/sql_query/steps/delete_step.py b/mindsdb/api/executor/sql_query/steps/delete_step.py deleted file mode 100644 index de2e2581252..00000000000 --- a/mindsdb/api/executor/sql_query/steps/delete_step.py +++ /dev/null @@ -1,48 +0,0 @@ -import copy - -from mindsdb_sql_parser.ast import ( - Identifier, - Constant, - Delete, - Parameter, - Tuple, -) -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.api.executor.planner.steps import DeleteStep - -from mindsdb.api.executor.sql_query.result_set import ResultSet - -from .base import BaseStepCall - - -class DeleteStepCall(BaseStepCall): - - bind = DeleteStep - - def call(self, step): - if len(step.table.parts) > 1: - integration_name = step.table.parts[0] - table_name_parts = step.table.parts[1:] - else: - integration_name = self.context['database'] - table_name_parts = step.table.parts - - dn = self.session.datahub.get(integration_name) - - # make command - query = Delete( - table=Identifier(parts=table_name_parts), - where=copy.deepcopy(step.where), - ) - - # fill params - def fill_params(node, **kwargs): - if isinstance(node, Parameter): - rs = self.steps_data[node.value.step_num] - items = [Constant(i) for i in rs.get_column_values(col_idx=0)] - return Tuple(items) - - query_traversal(query.where, fill_params) - - response = dn.query(query=query, session=self.session) - return ResultSet(affected_rows=response.affected_rows) diff --git a/mindsdb/api/executor/sql_query/steps/fetch_dataframe.py b/mindsdb/api/executor/sql_query/steps/fetch_dataframe.py deleted file mode 100644 index d73666e49e3..00000000000 --- a/mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +++ /dev/null @@ -1,130 +0,0 @@ -from mindsdb_sql_parser.ast import ( - Identifier, - Constant, - Select, - Join, - Parameter, - BinaryOperation, - Tuple, - Union, - Intersect, -) - -from mindsdb.api.executor.planner.steps import FetchDataframeStep -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.exceptions import UnknownError -from mindsdb.interfaces.query_context.context_controller import query_context_controller -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.integrations.libs.response import TableResponse - -from .base import BaseStepCall - - -def get_table_alias(table_obj, default_db_name): - # (database, table, alias) - if isinstance(table_obj, Identifier): - if len(table_obj.parts) == 1: - name = (default_db_name, table_obj.parts[0]) - else: - name = (table_obj.parts[0], table_obj.parts[-1]) - elif isinstance(table_obj, Select): - # it is subquery - if table_obj.alias is None: - name = "t" - else: - name = table_obj.alias.parts[0] - name = (default_db_name, name) - elif isinstance(table_obj, Join): - # get from first table - return get_table_alias(table_obj.left, default_db_name) - else: - # unknown yet object - return default_db_name, "t", "t" - - if table_obj.alias is not None: - name = name + (".".join(table_obj.alias.parts),) - else: - name = name + (name[1],) - return name - - -def get_fill_param_fnc(steps_data): - def fill_params(node, callstack=None, **kwargs): - if not isinstance(node, Parameter): - return - - if not isinstance(node.value, Result): - # is simple parameter and not set - raise ValueError(f"Parameter is not set: {node.value}") - - rs = steps_data[node.value.step_num] - items = [Constant(i) for i in rs.get_column_values(col_idx=0)] - - is_single_item = True - if callstack: - node_prev = callstack[0] - if isinstance(node_prev, BinaryOperation): - # Check case: 'something IN Parameter()' - if node_prev.op.lower() == "in" and node_prev.args[1] is node: - is_single_item = False - - if is_single_item and len(items) == 1: - # extract one value for option 'col=(subselect)' - node = items[0] - else: - node = Tuple(items) - return node - - return fill_params - - -class FetchDataframeStepCall(BaseStepCall): - bind = FetchDataframeStep - - def call(self, step): - dn = self.session.datahub.get(step.integration) - query = step.query - - if dn is None: - raise UnknownError(f"Unknown integration name: {step.integration}") - - if query is None: - table_alias = (self.context.get("database"), "result", "result") - - response: TableResponse = dn.query(step.raw_query, session=self.session) - df = response.data_frame - else: - if isinstance(step.query, (Union, Intersect)): - table_alias = ["", "", ""] - else: - table_alias = get_table_alias(step.query.from_table, self.context.get("database")) - - # TODO for information_schema we have 'database' = 'mindsdb' - - # fill params - fill_params = get_fill_param_fnc(self.steps_data) - query_traversal(query, fill_params) - - query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session) - - response: TableResponse = dn.query(query=query, session=self.session) - response.set_columns_attrs( - table_name=table_alias[1], - table_alias=table_alias[2], - database=table_alias[0], - ) - if context_callback: - context_callback(response.data_frame, response.columns) - return ResultSet.from_table_response(response) - - # if query registered, set progress - if self.sql_query.run_query is not None: - self.sql_query.run_query.set_progress(processed_rows=len(df)) - return ResultSet.from_df( - df, - table_name=table_alias[1], - table_alias=table_alias[2], - database=table_alias[0], - mysql_types=[column.type for column in response.columns], - ) diff --git a/mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py b/mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py deleted file mode 100644 index 9775a2867e9..00000000000 --- a/mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +++ /dev/null @@ -1,264 +0,0 @@ -import time -import copy -import pandas as pd -from typing import List - -from mindsdb_sql_parser import ASTNode, Constant -from mindsdb.api.executor.planner.steps import FetchDataframeStepPartition -from mindsdb.integrations.utilities.query_traversal import query_traversal - -from mindsdb.interfaces.query_context.context_controller import RunningQuery -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities import log -from mindsdb.utilities.config import config -from mindsdb.utilities.partitioning import get_max_thread_count, split_data_frame -from mindsdb.api.executor.sql_query.steps.fetch_dataframe import get_table_alias, get_fill_param_fnc -from mindsdb.utilities.context_executor import ContextThreadPoolExecutor - -from mindsdb.interfaces.query_context.context_controller import query_context_controller - - -from .base import BaseStepCall - - -logger = log.getLogger(__name__) - - -class FetchDataframePartitionCall(BaseStepCall): - """ - Alternative to FetchDataframeCall but fetch data by batches wrapping user's query to: - - select * from ({user query}) - where {track_column} > {previous value} - order by track_column - limit size {batch_size} ` - - """ - - bind = FetchDataframeStepPartition - - def call(self, step: FetchDataframeStepPartition) -> ResultSet: - """ - Parameters: - - batch_size - count of rows to fetch from database per iteration, optional default 1000 - - threads - run partitioning in threads, bool or int, optinal, if set: - - int value: use this as count of threads - - true: table threads, autodetect count of thread - - false: disable threads even if ml task queue is enabled - - track_column - column used for creating partitions - - query will be sorted by this column and select will be limited by batch_size - - error (default raise) - - when `error='skip'`, errors in partition will be skipped and execution will be continued - """ - - self.dn = self.session.datahub.get(step.integration) - query = step.query - - # fill params - fill_params = get_fill_param_fnc(self.steps_data) - query_traversal(query, fill_params) - - self.table_alias = get_table_alias(step.query.from_table, self.context.get("database")) - self.current_step_num = step.step_num - - if step.condition is not None: - if "limit" in step.condition: - return self.repeat_till_reach_limit(step, step.condition["limit"]) - - # get query record - run_query = self.sql_query.run_query - if run_query is None: - raise RuntimeError("Error with partitioning of the query") - run_query.set_params(step.params) - - self.substeps = step.steps - - # ml task queue enabled? - use_threads, thread_count = False, None - if config["ml_task_queue"]["type"] == "redis": - use_threads = True - - # use threads? - if "threads" in step.params: - threads = step.params["threads"] - if isinstance(threads, int): - thread_count = threads - use_threads = True - if threads is True: - use_threads = True - if threads is False: - # disable even with ml task queue - use_threads = False - - on_error = step.params.get("error", "raise") - try: - if use_threads: - return self.fetch_threads(run_query, query, thread_count=thread_count, on_error=on_error) - else: - return self.fetch_iterate(run_query, query, on_error=on_error) - finally: - # release KB locks after inserting in background - self.sql_query.release_kb_lock(self.substeps) - - def repeat_till_reach_limit(self, step, limit): - first_table_limit = limit * 2 - dn = self.session.datahub.get(step.integration) - - query = step.query - - # fill params - query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session) - - try_num = 1 - started_at = time.time() - while True: - self.substeps = copy.deepcopy(step.steps) - query2 = copy.deepcopy(query) - - if first_table_limit is not None: - query2.limit = Constant(first_table_limit) - else: - query2.limit = None - - response = dn.query(query=query2, session=self.session) - df = response.data_frame - - result = self.exec_sub_steps(df) - - if len(result) >= limit or first_table_limit is None or len(df) < first_table_limit: - # we have enough results - # OR first table doesn't return requested count of rows - # OR it is a flag to stop - result = result[:limit] - break - - # break if process is too long or to many tries - if try_num > 3 or time.time() - started_at > 5: - # the last try without the limit - first_table_limit = None - continue - - # no enough results - if len(result) > 0: - # forecast the required limit (depending on how much row we don't have) - first_table_limit = int(first_table_limit * limit / len(result) * try_num + 10**try_num) - else: - first_table_limit = first_table_limit * 10 - - try_num += 1 - - if context_callback: - context_callback(df, response.columns) - - return result - - def fetch_iterate(self, run_query: RunningQuery, query: ASTNode, on_error: str = None) -> ResultSet: - """ - Process batches one by one in circle - """ - - results = [] - - for df in run_query.get_partitions(self.dn, self, query): - try: - sub_data = self.exec_sub_steps(df) - run_query.set_progress(processed_rows=len(df)) - results.append(sub_data) - except Exception as e: - if on_error == "skip": - logger.error(e) - else: - raise e - - return self.concat_results(results) - - def concat_results(self, results: List[ResultSet]) -> ResultSet: - """ - Concatenate list of result sets to single result set - """ - df_list = [] - for res in results: - df, col_names = res.to_df_cols() - if len(df) > 0: - df_list.append(df) - - data = ResultSet() - if len(df_list) > 0: - data = ResultSet.from_df_cols(pd.concat(df_list), col_names) - - return data - - def exec_sub_steps(self, df: pd.DataFrame) -> ResultSet: - """ - FetchDataframeStepPartition has substeps defined - Every batch of data have to be used to execute these substeps - - batch of data is put as result of FetchDataframeStepPartition - - substep are executed using result of previos step (like it is all fetched data is available) - - the final result is returned and used outside to concatenate with results of other's batches - """ - input_data = ResultSet.from_df( - df, table_name=self.table_alias[1], table_alias=self.table_alias[2], database=self.table_alias[0] - ) - - if len(self.substeps) == 0: - return input_data - - # execute with modified previous results - steps_data2 = self.steps_data.copy() - steps_data2[self.current_step_num] = input_data - - sub_data = None - for substep in self.substeps: - sub_data = self.sql_query.execute_step(substep, steps_data=steps_data2) - steps_data2[substep.step_num] = sub_data - return sub_data - - def fetch_threads( - self, run_query: RunningQuery, query: ASTNode, thread_count: int = None, on_error: str = None - ) -> ResultSet: - """ - Process batches in threads - - spawn required count of threads - - create in/out queue to communicate with threads - - send task to threads and receive results - """ - - # create communication queues - - if thread_count is None: - thread_count = get_max_thread_count() - - # 3 tasks per worker during 1 batch - partition_size = int(run_query.batch_size / thread_count) - # min partition size - if partition_size < 10: - partition_size = 10 - - results = [] - - with ContextThreadPoolExecutor(max_workers=thread_count) as executor: - for df in run_query.get_partitions(self.dn, self, query): - # split into chunks and send to workers - futures = [] - for df2 in split_data_frame(df, partition_size): - futures.append([executor.submit(self.exec_sub_steps, df2), len(df2)]) - - error = None - for future, rows_count in futures: - try: - results.append(future.result()) - run_query.set_progress(processed_rows=rows_count) - except Exception as e: - if on_error == "skip": - logger.error(e) - else: - executor.shutdown() - error = e - - if error: - raise error - if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set(): - executor.shutdown() - raise RuntimeError("Query is interrupted") - - return self.concat_results(results) diff --git a/mindsdb/api/executor/sql_query/steps/insert_step.py b/mindsdb/api/executor/sql_query/steps/insert_step.py deleted file mode 100644 index d7ea17cd6cb..00000000000 --- a/mindsdb/api/executor/sql_query/steps/insert_step.py +++ /dev/null @@ -1,124 +0,0 @@ -from mindsdb_sql_parser.ast import Identifier, Function - -from mindsdb.api.executor.planner.steps import SaveToTable, InsertToTable, CreateTableStep -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities.types.column import Column -from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.api.executor.exceptions import NotSupportedYet, LogicError -from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES - -from .base import BaseStepCall - - -class InsertToTableCall(BaseStepCall): - bind = InsertToTable - - def call(self, step): - is_replace = False - is_create = False - - if type(step) == SaveToTable: - is_create = True - - if step.is_replace: - is_replace = True - - if len(step.table.parts) > 1: - integration_name = step.table.parts[0] - table_name = Identifier(parts=step.table.parts[1:]) - else: - integration_name = self.context["database"] - table_name = step.table - - dn = self.session.datahub.get(integration_name) - - if hasattr(dn, "create_table") is False: - raise NotSupportedYet(f"Creating table in '{integration_name}' is not supported") - - if step.dataframe is not None: - data = self.steps_data[step.dataframe.step_num] - elif step.query is not None: - data = ResultSet() - if step.query.columns is None: - # Is query like: INSERT INTO table VALUES (...) - table_columns_df = dn.get_table_columns_df(str(table_name)) - columns_names = table_columns_df[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME].to_list() - for column_name in columns_names: - data.add_column(Column(name=column_name)) - else: - # Is query like: INSERT INTO table (column_name, ...) VALUES (...) - for col in step.query.columns: - data.add_column(Column(name=col.name)) - - records = [] - for row in step.query.values: - record = [] - for v in row: - if isinstance(v, Identifier) and v.parts[0] == "None": - # Allow explicitly inserting NULL values. - record.append(None) - continue - # Value is a function - if isinstance(v, Function): - record.append(v) - continue - record.append(v.value) - records.append(record) - - data.add_raw_values(records) - else: - raise LogicError(f"Data not found for insert: {step}") - - # del 'service' columns - for col in data.find_columns("__mindsdb_row_id"): - data.del_column(col) - for col in data.find_columns("__mdb_forecast_offset"): - data.del_column(col) - - # region del columns filtered at projection step - columns_list = self.get_columns_list() - if columns_list is not None: - filtered_column_names = [x.name for x in columns_list] - for col in data.columns: - if col.name.startswith("predictor."): - continue - if col.name in filtered_column_names: - continue - data.del_column(col) - # endregion - - # drop double names - col_names = set() - for col in data.columns: - if col.alias in col_names: - data.del_column(col) - else: - col_names.add(col.alias) - - response = dn.create_table( - table_name=table_name, result_set=data, is_replace=is_replace, is_create=is_create, params=step.params - ) - return ResultSet(affected_rows=response.affected_rows) - - -class SaveToTableCall(InsertToTableCall): - bind = SaveToTable - - -class CreateTableCall(BaseStepCall): - bind = CreateTableStep - - def call(self, step): - if len(step.table.parts) > 1: - integration_name = step.table.parts[0] - table_name = Identifier(parts=step.table.parts[1:]) - else: - integration_name = self.context["database"] - table_name = step.table - - dn = self.session.datahub.get(integration_name) - if dn is None: - raise EntityNotExistsError("Database not found", integration_name) - - dn.create_table(table_name=table_name, columns=step.columns, is_replace=step.is_replace, is_create=True) - return ResultSet() diff --git a/mindsdb/api/executor/sql_query/steps/join_step.py b/mindsdb/api/executor/sql_query/steps/join_step.py deleted file mode 100644 index 166521d5383..00000000000 --- a/mindsdb/api/executor/sql_query/steps/join_step.py +++ /dev/null @@ -1,109 +0,0 @@ -import copy - -import numpy as np - -from mindsdb_sql_parser.ast import ( - Identifier, BinaryOperation, Constant -) -from mindsdb.api.executor.planner.steps import ( - JoinStep, -) -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.utilities.sql import query_df_with_type_infer_fallback -from mindsdb.api.executor.exceptions import NotSupportedYet - -from .base import BaseStepCall - - -class JoinStepCall(BaseStepCall): - - bind = JoinStep - - def call(self, step): - left_data = self.steps_data[step.left.step_num] - right_data = self.steps_data[step.right.step_num] - - if right_data.is_prediction or left_data.is_prediction: - # ignore join condition, use row_id - l_row_ids = left_data.find_columns('__mindsdb_row_id') - r_row_ids = right_data.find_columns('__mindsdb_row_id') - - if len(l_row_ids) == 0: - if len(r_row_ids) == 0: - raise RuntimeError('Unable to find row id') - else: - # copy from right to left - idx = right_data.get_col_index(r_row_ids[0]) - left_data.set_column_values('__mindsdb_row_id', right_data.get_column_values(idx)) - l_row_ids = left_data.find_columns('__mindsdb_row_id') - elif len(r_row_ids) == 0: - # copy from left to right - idx = left_data.get_col_index(l_row_ids[0]) - right_data.set_column_values('__mindsdb_row_id', left_data.get_column_values(idx)) - r_row_ids = right_data.find_columns('__mindsdb_row_id') - - a_row_id = l_row_ids[0].get_hash_name(prefix='A') - b_row_id = r_row_ids[0].get_hash_name(prefix='B') - - join_condition = f'table_a.{a_row_id} = table_b.{b_row_id}' - - join_type = step.query.join_type.lower() - if join_type == 'join': - # join type is not specified. using join to prediction data - if left_data.is_prediction: - join_type = 'left join' - elif right_data.is_prediction: - join_type = 'right join' - else: - def adapt_condition(node, **kwargs): - if not isinstance(node, Identifier) or len(node.parts) != 2: - return - - table_alias, alias = node.parts - cols = left_data.find_columns(alias, table_alias) - if len(cols) == 1: - col_name = cols[0].get_hash_name(prefix='A') - return Identifier(parts=['table_a', col_name]) - - cols = right_data.find_columns(alias, table_alias) - if len(cols) == 1: - col_name = cols[0].get_hash_name(prefix='B') - return Identifier(parts=['table_b', col_name]) - - if step.query.condition is None: - # prevent memory overflow - if len(left_data) * len(right_data) < 10 ** 7: - step.query.condition = BinaryOperation(op='=', args=[Constant(0), Constant(0)]) - else: - raise NotSupportedYet('Unable to join table without condition') - - condition = copy.deepcopy(step.query.condition) - query_traversal(condition, adapt_condition) - - join_condition = SqlalchemyRender('postgres').get_string(condition) - join_type = step.query.join_type - - table_a, names_a = left_data.to_df_cols(prefix='A') - table_b, names_b = right_data.to_df_cols(prefix='B') - - query = f""" - SELECT * FROM table_a {join_type} table_b - ON {join_condition} - """ - resp_df, _description = query_df_with_type_infer_fallback(query, { - 'table_a': table_a, - 'table_b': table_b - }) - - resp_df.replace({np.nan: None}, inplace=True) - - names_a.update(names_b) - data = ResultSet.from_df_cols(df=resp_df, columns_dict=names_a) - - for col in data.find_columns('__mindsdb_row_id'): - data.del_column(col) - - return data diff --git a/mindsdb/api/executor/sql_query/steps/map_reduce_step.py b/mindsdb/api/executor/sql_query/steps/map_reduce_step.py deleted file mode 100644 index 71dd2781d74..00000000000 --- a/mindsdb/api/executor/sql_query/steps/map_reduce_step.py +++ /dev/null @@ -1,146 +0,0 @@ -import copy - -from mindsdb_sql_parser.ast import ( - BinaryOperation, - UnaryOperation, - Constant, -) -from mindsdb.api.executor.planner.steps import ( - MapReduceStep, - FetchDataframeStep, - MultipleSteps, - SubSelectStep, -) - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.exceptions import LogicError -from mindsdb.utilities.partitioning import process_dataframe_in_partitions - -from .base import BaseStepCall - - -def markQueryVar(where): - if isinstance(where, BinaryOperation): - markQueryVar(where.args[0]) - markQueryVar(where.args[1]) - elif isinstance(where, UnaryOperation): - markQueryVar(where.args[0]) - elif isinstance(where, Constant): - if str(where.value).startswith('$var['): - where.is_var = True - where.var_name = where.value - - -def replaceQueryVar(where, var_value, var_name): - if isinstance(where, BinaryOperation): - replaceQueryVar(where.args[0], var_value, var_name) - replaceQueryVar(where.args[1], var_value, var_name) - elif isinstance(where, UnaryOperation): - replaceQueryVar(where.args[0], var_value, var_name) - elif isinstance(where, Constant): - if hasattr(where, 'is_var') and where.is_var is True and where.value == f'$var[{var_name}]': - where.value = var_value - - -def join_query_data(target, source): - if len(target.columns) == 0: - target = source - else: - target.add_from_result_set(source) - return target - - -class MapReduceStepCall(BaseStepCall): - - bind = MapReduceStep - - def call(self, step: MultipleSteps): - if step.reduce != 'union': - raise LogicError(f'Unknown MapReduceStep type: {step.reduce}') - - partition = getattr(step, 'partition', None) - - if partition is not None: - data = self._reduce_partition(step, partition) - - else: - data = self._reduce_vars(step) - - return data - - def _reduce_partition(self, step, partition): - if not isinstance(partition, int): - raise ValueError('Only integers are supported in partition definition.') - if partition <= 0: - raise ValueError('Partition must be a positive number') - - input_idx = step.values.step_num - input_data = self.steps_data[input_idx] - input_columns = list(input_data.columns) - - substeps = step.step - if not isinstance(substeps, list): - substeps = [substeps] - - data = ResultSet() - - df = input_data.get_raw_df() - - def callback(chunk): - return self._exec_partition(chunk, substeps, input_idx, input_columns) - - for result in process_dataframe_in_partitions(df, callback, partition): - if result: - data = join_query_data(data, result) - - return data - - def _exec_partition(self, df, substeps, input_idx, input_columns): - - input_data2 = ResultSet(columns=input_columns.copy()) - input_data2.add_raw_df(df) - - # execute with modified previous results - steps_data2 = self.steps_data.copy() - steps_data2[input_idx] = input_data2 - - sub_data = None - for substep in substeps: - sub_data = self.sql_query.execute_step(substep, steps_data=steps_data2) - steps_data2[substep.step_num] = sub_data - - return sub_data - - def _reduce_vars(self, step): - # extract vars - step_data = self.steps_data[step.values.step_num] - vars = [] - for row in step_data.get_records(): - var_group = {} - vars.append(var_group) - for name, value in row.items(): - if name != '__mindsdb_row_id': - var_group[name] = value - - substep = step.step - - data = ResultSet() - - for var_group in vars: - steps2 = copy.deepcopy(substep) - - self._fill_vars(steps2, var_group) - - sub_data = self.sql_query.execute_step(steps2) - data = join_query_data(data, sub_data) - - return data - - def _fill_vars(self, step, var_group): - if isinstance(step, MultipleSteps): - for substep in step.steps: - self._fill_vars(substep, var_group) - elif isinstance(step, (FetchDataframeStep, SubSelectStep)): - markQueryVar(step.query.where) - for name, value in var_group.items(): - replaceQueryVar(step.query.where, value, name) diff --git a/mindsdb/api/executor/sql_query/steps/multiple_step.py b/mindsdb/api/executor/sql_query/steps/multiple_step.py deleted file mode 100644 index 791f5a66e29..00000000000 --- a/mindsdb/api/executor/sql_query/steps/multiple_step.py +++ /dev/null @@ -1,24 +0,0 @@ -from mindsdb.api.executor.planner.steps import MultipleSteps - -from mindsdb.api.executor.exceptions import NotSupportedYet - -from .base import BaseStepCall - - -class MultipleStepsCall(BaseStepCall): - - bind = MultipleSteps - - def call(self, step): - - if step.reduce != 'union': - raise NotSupportedYet(f"Only MultipleSteps with type = 'union' is supported. Got '{step.type}'") - data = None - for substep in step.steps: - subdata = self.sql_query.execute_step(substep) - if data is None: - data = subdata - else: - data.add_from_result_set(subdata) - - return data diff --git a/mindsdb/api/executor/sql_query/steps/prepare_steps.py b/mindsdb/api/executor/sql_query/steps/prepare_steps.py deleted file mode 100644 index 7b2950a8e5f..00000000000 --- a/mindsdb/api/executor/sql_query/steps/prepare_steps.py +++ /dev/null @@ -1,55 +0,0 @@ -from mindsdb_sql_parser.ast import ( - Identifier, - Constant, - Select, - Star, -) -from mindsdb.api.executor.planner.steps import ( - GetPredictorColumns, - GetTableColumns, -) - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities.types.column import Column -from mindsdb.utilities.config import config - -from .base import BaseStepCall - - -class GetPredictorColumnsCall(BaseStepCall): - bind = GetPredictorColumns - - def call(self, step): - mindsdb_database_name = config.get("default_project") - - predictor_name = step.predictor.parts[-1] - dn = self.session.datahub.get(mindsdb_database_name) - columns_names = dn.get_table_columns_names(predictor_name) - - data = ResultSet() - for column_name in columns_names: - data.add_column(Column(name=column_name, table_name=predictor_name, database=mindsdb_database_name)) - return data - - -class GetTableColumnsCall(BaseStepCall): - bind = GetTableColumns - - def call(self, step): - table = step.table - dn = self.session.datahub.get(step.namespace) - ds_query = Select(from_table=Identifier(table), targets=[Star()], limit=Constant(0)) - - response = dn.query(ds_query, session=self.session) - - data = ResultSet() - for column in response.columns: - data.add_column( - Column( - name=column["name"], - type=column.get("type"), - table_name=table, - database=self.context.get("database"), - ) - ) - return data diff --git a/mindsdb/api/executor/sql_query/steps/project_step.py b/mindsdb/api/executor/sql_query/steps/project_step.py deleted file mode 100644 index cb5e9db46b7..00000000000 --- a/mindsdb/api/executor/sql_query/steps/project_step.py +++ /dev/null @@ -1,86 +0,0 @@ -from collections import defaultdict - -from mindsdb_sql_parser.ast import ( - Identifier, - Select, - Star, -) -from mindsdb.api.executor.planner.steps import ProjectStep -from mindsdb.integrations.utilities.query_traversal import query_traversal - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.utilities.sql import query_df -from mindsdb.api.executor.exceptions import ( - KeyColumnDoesNotExist, - NotSupportedYet -) - -from .base import BaseStepCall - - -class ProjectStepCall(BaseStepCall): - - bind = ProjectStep - - def call(self, step): - result_set = self.steps_data[step.dataframe.step_num] - - df, col_names = result_set.to_df_cols() - col_idx = {} - tbl_idx = defaultdict(list) - for name, col in col_names.items(): - col_idx[col.alias] = name - col_idx[(col.table_alias, col.alias)] = name - # add to tables - tbl_idx[col.table_name].append(name) - if col.table_name != col.table_alias: - tbl_idx[col.table_alias].append(name) - - # analyze condition and change name of columns - def check_fields(node, is_table=None, **kwargs): - if is_table: - raise NotSupportedYet('Subqueries is not supported in target') - if isinstance(node, Identifier): - # only column name - col_name = node.parts[-1] - if isinstance(col_name, Star): - if len(node.parts) == 1: - # left as is - return - else: - # replace with all columns from table - table_name = node.parts[-2] - return [ - Identifier(parts=[col]) - for col in tbl_idx.get(table_name, []) - ] - - if len(node.parts) == 1: - key = col_name - else: - table_name = node.parts[-2] - key = (table_name, col_name) - - if key not in col_idx: - raise KeyColumnDoesNotExist(f'Table not found for column: {key}') - - new_name = col_idx[key] - return Identifier(parts=[new_name], alias=node.alias) - - query = Select( - targets=step.columns, - from_table=Identifier('df_table') - ) - - targets0 = query_traversal(query.targets, check_fields) - targets = [] - for target in targets0: - if isinstance(target, list): - targets.extend(target) - else: - targets.append(target) - query.targets = targets - - res = query_df(df, query, session=self.session) - - return ResultSet.from_df_cols(df=res, columns_dict=col_names, strict=False) diff --git a/mindsdb/api/executor/sql_query/steps/sql_steps.py b/mindsdb/api/executor/sql_query/steps/sql_steps.py deleted file mode 100644 index 5a2bea49feb..00000000000 --- a/mindsdb/api/executor/sql_query/steps/sql_steps.py +++ /dev/null @@ -1,41 +0,0 @@ -import pandas as pd - -from mindsdb.api.executor.planner.steps import ( - LimitOffsetStep, - DataStep, -) - -from mindsdb.api.executor.sql_query.result_set import ResultSet - -from .base import BaseStepCall - - -class LimitOffsetStepCall(BaseStepCall): - - bind = LimitOffsetStep - - def call(self, step): - step_data = self.steps_data[step.dataframe.step_num] - - df = step_data.get_raw_df() - - step_data2 = ResultSet(columns=list(step_data.columns)) - - if isinstance(step.offset, int): - df = df[step.offset:] - if isinstance(step.limit, int): - df = df[:step.limit] - - step_data2.add_raw_df(df) - - return step_data2 - - -class DataStepCall(BaseStepCall): - - bind = DataStep - - def call(self, step): - # create resultset - df = pd.DataFrame(step.data) - return ResultSet.from_df(df, database='', table_name='') diff --git a/mindsdb/api/executor/sql_query/steps/subselect_step.py b/mindsdb/api/executor/sql_query/steps/subselect_step.py deleted file mode 100644 index 40e3dfbd2f2..00000000000 --- a/mindsdb/api/executor/sql_query/steps/subselect_step.py +++ /dev/null @@ -1,243 +0,0 @@ -from collections import defaultdict - -import pandas as pd - -from mindsdb_sql_parser.ast import ( - Identifier, - Select, - Star, - Constant, - Function, - Variable, - BinaryOperation, -) - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SERVER_VARIABLES -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import SubSelectStep, QueryStep -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities.types.column import Column -from mindsdb.api.executor.utilities.sql import query_df -from mindsdb.api.executor.exceptions import KeyColumnDoesNotExist -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.integrations.utilities.sql_utils import has_aggregate_function -from mindsdb.interfaces.query_context.context_controller import query_context_controller - -from .base import BaseStepCall -from .fetch_dataframe import get_fill_param_fnc - - -class SubSelectStepCall(BaseStepCall): - bind = SubSelectStep - - def call(self, step): - result = self.steps_data[step.dataframe.step_num] - - table_name = step.table_name - if table_name is None: - table_name = "df_table" - else: - table_name = table_name - - query = step.query - query.from_table = Identifier("df_table") - - if step.add_absent_cols and isinstance(query, Select): - query_cols = set() - - def f_all_cols(node, **kwargs): - if isinstance(node, Identifier): - query_cols.add(node.parts[-1]) - elif isinstance(node, Result): - prev_result = self.steps_data[node.step_num] - return Constant(prev_result.get_column_values(col_idx=0)[0]) - - query_traversal(query.where, f_all_cols) - - result_cols = [col.name for col in result.columns] - - for col_name in query_cols: - if col_name not in result_cols: - result.add_column(Column(name=col_name)) - - # inject previous step values - if isinstance(query, Select): - fill_params = get_fill_param_fnc(self.steps_data) - query_traversal(query, fill_params) - - df = result.to_df() - - if step.skip_for_aggregation: - # Check if query has aggregations and result is already aggregated (single row) - # If so, and the query is just selecting the aggregated columns, skip query_df - # to avoid re-aggregating already aggregated data - # TODO remove `len(df) == 1` condition - if isinstance(query, Select) and len(df) == 1: - has_aggregation = has_aggregate_function(query.targets) - if ( - has_aggregation - and query.where is None - and query.group_by is None - and query.order_by is None - and query.limit is None - ): - # Query is just aggregations with no WHERE, GROUP BY, ORDER BY, or LIMIT - # The result is already aggregated, so just return it as-is - database = result.columns[0].database if result.columns else None - return ResultSet.from_df(df, database, table_name) - - res = query_df(df, query, session=self.session) - - # get database from first column - database = result.columns[0].database - - return ResultSet.from_df(res, database, table_name) - - -class QueryStepCall(BaseStepCall): - bind = QueryStep - - def call(self, step: QueryStep): - query = step.query - - if step.from_table is not None: - if isinstance(step.from_table, pd.DataFrame): - result_set = ResultSet.from_df(step.from_table) - else: - result_set = self.steps_data[step.from_table.step_num] - else: - # only from_table can content result - prev_step_num = query.from_table.value.step_num - result_set = self.steps_data[prev_step_num] - - df, col_names = result_set.to_df_cols() - col_idx = {} - tbl_idx = defaultdict(list) - for name, col in col_names.items(): - col_idx[col.alias] = name - col_idx[(col.table_alias, col.alias)] = name - # add to tables - tbl_idx[col.table_name].append(name) - if col.table_name != col.table_alias: - tbl_idx[col.table_alias].append(name) - - lower_col_idx = {} - for key, value in col_idx.items(): - if isinstance(key, int): - key = str(key) - if isinstance(key, str): - lower_col_idx[key.lower()] = value - continue - lower_col_idx[tuple(str(x).lower() for x in key)] = value - - # get aliases of first level - aliases = [] - for col in query.targets: - if col.alias is not None: - aliases.append(col.alias.parts[0]) - - # analyze condition and change name of columns - def check_fields(node, is_target=None, **kwargs): - if isinstance(node, Function): - function_name = node.op.lower() - - functions_results = { - "database": self.session.database, - "current_user": self.session.username, - "user": self.session.username, - "version": "8.0.17", - "current_schema": "public", - "schema": "public", - "connection_id": self.context.get("connection_id"), - } - if function_name in functions_results: - return Constant(functions_results[function_name], alias=Identifier(parts=[function_name])) - - if isinstance(node, Variable): - var_name = node.value - column_name = f"@@{var_name}" - result = SERVER_VARIABLES.get(column_name) - if result is None: - raise ValueError(f"Unknown variable '{var_name}'") - else: - return Constant(result[0], alias=Identifier(parts=[column_name])) - - if isinstance(node, Identifier): - # only column name - col_name = node.parts[-1] - if is_target and isinstance(col_name, Star): - if len(node.parts) == 1: - # left as is - return - else: - # replace with all columns from table - table_name = node.parts[-2] - return [Identifier(parts=[col]) for col in tbl_idx.get(table_name, [])] - - if node.parts[-1].lower() == "session_user": - return Constant(self.session.username, alias=node) - if node.parts[-1].lower() == "$$": - # NOTE: sinve version 9.0 mysql client sends query 'select $$'. - # Connection can be continued only if answer is parse error. - raise ValueError( - "You have an error in your SQL syntax; check the manual that corresponds to your server " - "version for the right syntax to use near '$$' at line 1" - ) - - key, column_quoted = (), False - - match node.parts, node.is_quoted: - case [column_name], [column_quoted]: - if column_name in aliases: - # key is defined as alias - return - - key = column_name if column_quoted else column_name.lower() - - if key not in col_idx and key not in lower_col_idx: - # it can be local alias of a query, like: - # SELECT t1.a + t2.a col1, min(t1.a) c - # FROM dummy_data.tbl1 as t1 - # JOIN pg.tbl2 as t2 on t1.c=t2.c - # group by col1 - # order by c -- <--- "с" is alias - return - case [*_, table_name, column_name], [*_, column_quoted]: - key = (table_name, column_name) if column_quoted else (table_name.lower(), column_name.lower()) - - search_idx = col_idx if column_quoted else lower_col_idx - - if key not in search_idx: - raise KeyColumnDoesNotExist(f"Table not found for column: {key}") - - new_name = search_idx[key] - return Identifier(parts=[new_name], alias=node.alias, with_rollup=node.with_rollup) - - # fill params - fill_params = get_fill_param_fnc(self.steps_data) - query_traversal(query, fill_params) - - if not step.strict_where: - # remove conditions with not-existed columns. - # these conditions can be already used as input to model or knowledge base - # but can be absent in their output - - def remove_not_used_conditions(node, **kwargs): - if isinstance(node, BinaryOperation): - for arg in node.args: - if isinstance(arg, Identifier) and len(arg.parts) > 1: - key = tuple(arg.parts[-2:]) - if key not in col_idx: - # exclude - node.args = [Constant(0), Constant(0)] - node.op = "=" - - query_traversal(query.where, remove_not_used_conditions) - - query_traversal(query, check_fields) - query.where = query_context_controller.remove_lasts(query.where) - - query.from_table = Identifier("df_table") - res = query_df(df, query, session=self.session) - - return ResultSet.from_df_cols(df=res, columns_dict=col_names, strict=False) diff --git a/mindsdb/api/executor/sql_query/steps/union_step.py b/mindsdb/api/executor/sql_query/steps/union_step.py deleted file mode 100644 index 366ab9fa69b..00000000000 --- a/mindsdb/api/executor/sql_query/steps/union_step.py +++ /dev/null @@ -1,53 +0,0 @@ -from mindsdb.api.executor.planner.steps import UnionStep - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.exceptions import WrongArgumentError -from mindsdb.api.executor.utilities.sql import query_df_with_type_infer_fallback -import numpy as np - -from .base import BaseStepCall - - -class UnionStepCall(BaseStepCall): - bind = UnionStep - - def call(self, step): - left_result = self.steps_data[step.left.step_num] - right_result = self.steps_data[step.right.step_num] - - # count of columns have to match - if len(left_result.columns) != len(right_result.columns): - raise WrongArgumentError( - f"UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} " - ) - - # types have to match - # TODO: return checking type later - # for i, left_col in enumerate(left_result.columns): - # right_col = right_result.columns[i] - # type1, type2 = left_col.type, right_col.type - # if type1 is not None and type2 is not None: - # if type1 != type2: - # raise ErSqlWrongArguments(f'UNION types mismatch: {type1} != {type2}') - - table_a, names = left_result.to_df_cols() - table_b, _ = right_result.to_df_cols() - - if step.operation.lower() == "intersect": - op = "INTERSECT" - else: - op = "UNION" - - if step.unique is not True: - op += " ALL" - - query = f""" - SELECT * FROM table_a - {op} - SELECT * FROM table_b - """ - - resp_df, _description = query_df_with_type_infer_fallback(query, {"table_a": table_a, "table_b": table_b}) - resp_df.replace({np.nan: None}, inplace=True) - - return ResultSet.from_df_cols(df=resp_df, columns_dict=names) diff --git a/mindsdb/api/executor/sql_query/steps/update_step.py b/mindsdb/api/executor/sql_query/steps/update_step.py deleted file mode 100644 index 47d60301b4d..00000000000 --- a/mindsdb/api/executor/sql_query/steps/update_step.py +++ /dev/null @@ -1,127 +0,0 @@ -from mindsdb_sql_parser.ast import ( - BinaryOperation, - Identifier, - Constant, - Update, -) -from mindsdb.api.executor.planner.steps import UpdateToTable -from mindsdb.integrations.utilities.query_traversal import query_traversal - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.exceptions import WrongArgumentError - -from .base import BaseStepCall - - -class UpdateToTableCall(BaseStepCall): - - bind = UpdateToTable - - def call(self, step): - if len(step.table.parts) > 1: - integration_name = step.table.parts[0] - table_name_parts = step.table.parts[1:] - else: - integration_name = self.context['database'] - table_name_parts = step.table.parts - - dn = self.session.datahub.get(integration_name) - - result_step = step.dataframe - - params_map_index = [] - - if step.update_command.keys is not None: - result_data = self.steps_data[result_step.result.step_num] - - where = None - update_columns = {} - - key_columns = [i.to_string() for i in step.update_command.keys] - if len(key_columns) == 0: - raise WrongArgumentError('No key columns in update statement') - for col in result_data.columns: - name = col.name - value = Constant(None) - - if name in key_columns: - # put it to where - - condition = BinaryOperation( - op='=', - args=[Identifier(name), value] - ) - if where is None: - where = condition - else: - where = BinaryOperation( - op='and', - args=[where, condition] - ) - else: - # put to update - update_columns[name] = value - - params_map_index.append([name, value]) - - if len(update_columns) is None: - raise WrongArgumentError(f'No columns for update found in: {result_data.columns}') - - update_query = Update( - table=Identifier(parts=table_name_parts), - update_columns=update_columns, - where=where - ) - - else: - # make command - update_query = Update( - table=Identifier(parts=table_name_parts), - update_columns=step.update_command.update_columns, - where=step.update_command.where - ) - - if result_step is None: - # run as is - response = dn.query(query=update_query, session=self.session) - return ResultSet(affected_rows=response.affected_rows) - result_data = self.steps_data[result_step.result.step_num] - - # link nodes with parameters for fast replacing with values - input_table_alias = step.update_command.from_select_alias - if input_table_alias is None: - raise WrongArgumentError('Subselect in update requires alias') - - def prepare_map_index(node, is_table, **kwargs): - if isinstance(node, Identifier) and not is_table: - # is input table field - if node.parts[0] == input_table_alias.parts[0]: - node2 = Constant(None) - param_name = node.parts[-1] - params_map_index.append([param_name, node2]) - # replace node with constant - return node2 - elif node.parts[0] == table_name_parts[0]: - # remove updated table alias - node.parts = node.parts[1:] - - # do mapping - query_traversal(update_query, prepare_map_index) - - # check all params is input data: - data_header = [col.alias for col in result_data.columns] - - for param_name, _ in params_map_index: - if param_name not in data_header: - raise WrongArgumentError(f'Field {param_name} not found in input data. Input fields: {data_header}') - - # perform update - for row in result_data.get_records(): - # run update from every row from input data - - # fill params: - for param_name, param in params_map_index: - param.value = row[param_name] - - response = dn.query(query=update_query, session=self.session) - return ResultSet(affected_rows=response.affected_rows) diff --git a/mindsdb/api/executor/utilities/__init__.py b/mindsdb/api/executor/utilities/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/executor/utilities/functions.py b/mindsdb/api/executor/utilities/functions.py deleted file mode 100644 index fd95b729276..00000000000 --- a/mindsdb/api/executor/utilities/functions.py +++ /dev/null @@ -1,37 +0,0 @@ -import urllib -import tempfile -from pathlib import Path - -import requests - - -# def get_column_in_case(columns, name): -# ''' -# ''' -# candidates = [] -# name_lower = name.lower() -# for column in columns: -# if column.lower() == name_lower: -# candidates.append(column) -# if len(candidates) != 1: -# return None -# return candidates[0] - - -def download_file(url): - try: - parse_result = urllib.parse.urlparse(url) - scheme = parse_result.scheme - except ValueError: - raise Exception(f"Invalid url: {url}") - except Exception as e: - raise Exception(f"URL parsing error: {e}") from e - temp_dir = tempfile.mkdtemp(prefix="mindsdb_file_download_") - if scheme == "": - raise Exception(f"Unknown url schema: {url}") - - response = requests.get(url) - temp_file_path = Path(temp_dir).joinpath("file") - with open(str(temp_file_path), "wb") as file: - file.write(response.content) - return str(temp_file_path) diff --git a/mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py b/mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py deleted file mode 100644 index d667ca62440..00000000000 --- a/mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +++ /dev/null @@ -1,712 +0,0 @@ -import re -from mindsdb_sql_parser.ast import Identifier, Function, Constant, BinaryOperation, Interval, ASTNode, UnaryOperation - - -# ---- helper ----- - - -def cast(node: ASTNode, typename: str) -> BinaryOperation: - return BinaryOperation("::", args=[node, Identifier(typename)]) - - -def date_part(node, part): - """ - Wrap element into DATE_PART function - - Docs: - https://duckdb.org/docs/stable/sql/functions/date#date_partpart-date - """ - node.args = apply_nested_functions(node.args) - - if len(node.args) != 1: - raise ValueError(f"Wrong arguments: {node.args}") - - return Function("DATE_PART", args=[Constant(part), cast(node.args[0], "date")]) - - -# ------------------------------ - - -def char_fn(node: Function) -> Function | None: - """Replace MySQL's multy-arg CHAR call to chain of DuckDB's CHR calls - - Example: - CHAR(77, 78, 79) => CHR(77) || CHR(78) || CHR(79) - - Args: - node (Function): Function node to adapt - - Returns: - Function | None: Adapted function node - """ - if len(node.args) == 1: - node.op = "chr" - return node - - acc = None - for arg in node.args: - fn = Function(op="chr", args=[arg]) - if acc is None: - acc = fn - continue - acc = BinaryOperation("||", args=[acc, fn]) - - acc.parentheses = True - acc.alias = node.alias - return acc - - -def locate_fn(node: Function) -> Function | None: - """Replace MySQL's LOCATE (or INSTR) call to DuckDB's STRPOS call - - Example: - LOCATE('bar', 'foobarbar') => STRPOS('foobarbar', 'bar') - INSTR('foobarbar', 'bar') => STRPOS('foobarbar', 'bar') - LOCATE('bar', 'foobarbar', 3) => ValueError (there is no analogue in DuckDB) - - Args: - node (Function): Function node to adapt - - Returns: - Function | None: Adapted function node - - Raises: - ValueError: If the function has 3 arguments - """ - if len(node.args) == 3: - raise ValueError("MySQL LOCATE function with 3 arguments is not supported") - if node.op == "locate": - node.args = [node.args[1], node.args[0]] - elif node.op == "insrt": - node.args = [node.args[0], node.args[1]] - node.op = "strpos" - - -def unhex_fn(node: Function) -> None: - """Check MySQL's UNHEX function call arguments to ensure they are strings, - because DuckDB's UNHEX accepts only string arguments, while MySQL's UNHEX can accept integer arguments. - NOTE: if return dataframe from duckdb then unhex values are array - this may be an issue - - Args: - node (Function): Function node to adapt - - Returns: - None - - Raises: - ValueError: If the function argument is not a string - """ - for arg in node.args: - if not isinstance(arg, (str, bytes)): - raise ValueError("MySQL UNHEX function argument must be a string") - - -def format_fn(node: Function) -> None: - """Adapt MySQL's FORMAT function to DuckDB's FORMAT function - - Example: - FORMAT(1234567.89, 0) => FORMAT('{:,.0f}', 1234567.89) - FORMAT(1234567.89, 2) => FORMAT('{:,.2f}', 1234567.89) - FORMAT(name, 2) => FORMAT('{:,.2f}', name) - FORMAT('{:.2f}', 1234567.89) => FORMAT('{:,.2f}', 1234567.89) # no changes for original style - - Args: - node (Function): Function node to adapt - - Returns: - None - - Raises: - ValueError: If MySQL's function has 3rd 'locale' argument, like FORMAT(12332.2, 2, 'de_DE') - """ - match node.args[0], node.args[1]: - case Constant(value=(int() | float())), Constant(value=int()): - ... - case Identifier(), Constant(value=int()): - ... - case _: - return node - - if len(node.args) > 2: - raise ValueError("'locale' argument of 'format' function is not supported") - decimal_places = node.args[1].value - - if isinstance(node.args[0], Constant): - node.args[1].value = node.args[0].value - node.args[0].value = f"{{:,.{decimal_places}f}}" - else: - node.args[1] = node.args[0] - node.args[0] = Constant(f"{{:,.{decimal_places}f}}") - - -def sha2_fn(node: Function) -> None: - """Adapt MySQL's SHA2 function to DuckDB's SHA256 function - - Example: - SHA2('test', 256) => SHA256('test') - - Args: - node (Function): Function node to adapt - - Returns: - None - - Raises: - ValueError: If the function has more than 1 argument or the argument is not 256 - """ - if len(node.args) > 1 and node.args[1].value != 256: - raise ValueError("Only sha256 is supported") - node.op = "sha256" - node.args = [node.args[0]] - - -def length_fn(node: Function) -> None: - """Adapt MySQL's LENGTH function to DuckDB's STRLEN function - NOTE: duckdb also have LENGTH, therefore it can not be used - - Example: - LENGTH('test') => STRLEN('test') - - Args: - node (Function): Function node to adapt - - Returns: - None - """ - node.op = "strlen" - - -def regexp_substr_fn(node: Function) -> None: - """Adapt MySQL's REGEXP_SUBSTR function to DuckDB's REGEXP_EXTRACT function - - Example: - REGEXP_SUBSTR('foobarbar', 'bar', 1, 1) => REGEXP_EXTRACT('foobarbar', 'bar') - - Args: - node (Function): Function node to adapt - - Returns: - None - - Raises: - ValueError: If the function has more than 2 arguments or 3rd or 4th argument is not 1 - """ - if ( - len(node.args) == 3 - and node.args[2].value != 1 - or len(node.args) == 4 - and (node.args[3].value != 1 or node.args[2].value != 1) - or len(node.args) > 4 - ): - raise ValueError("Only 2 arguments are supported for REGEXP_SUBSTR function") - node.args = node.args[:2] - node.op = "regexp_extract" - - -def substring_index_fn(node: Function) -> BinaryOperation | Function: - """Adapt MySQL's SUBSTRING_INDEX function to DuckDB's SPLIT_PART function - - Example: - SUBSTRING_INDEX('a.b.c.d', '.', 1) => SPLIT_PART('a.b.c.d', '.', 1) - SUBSTRING_INDEX('a.b.c.d', '.', 2) => CONCAT_WS('.', SPLIT_PART('a.b.c.d', '.', 1), SPLIT_PART('a.b.c.d', '.', 2)) - - Args: - node (Function): Function node to adapt - - Returns: - BinaryOperation | Function: Binary operation node or function node - - Raises: - ValueError: If the function has more than 3 arguments or the 3rd argument is not 1 - """ - if len(node.args[1].value) > 1: - raise ValueError("Only one car in separator") - - if node.args[2].value == 1: - node.op = "split_part" - return node - - acc = [node.args[1]] - for i in range(node.args[2].value): - fn = Function(op="split_part", args=[node.args[0], node.args[1], Constant(i + 1)]) - acc.append(fn) - - acc = Function(op="concat_ws", args=acc) - acc.alias = node.alias - return acc - - -def curtime_fn(node: Function) -> BinaryOperation: - """Adapt MySQL's CURTIME function to DuckDB's GET_CURRENT_TIME function. - To get the same type as MySQL's CURTIME function, we need to cast the result to time type. - - Example: - CURTIME() => GET_CURRENT_TIME()::time - - Args: - node (Function): Function node to adapt - - Returns: - BinaryOperation: Binary operation node - """ - return cast(Function(op="get_current_time", args=[]), "time") - - -def timestampdiff_fn(node: Function) -> None: - """Adapt MySQL's TIMESTAMPDIFF function to DuckDB's DATE_DIFF function - NOTE: Looks like cast string args to timestamp works in most cases, but there may be some exceptions. - - Example: - TIMESTAMPDIFF(YEAR, '2000-02-01', '2003-05-01') => DATE_DIFF('year', timestamp '2000-02-01', timestamp '2003-05-01') - - Args: - node (Function): Function node to adapt - - Returns: - None - """ - node.op = "date_diff" - node.args[0] = Constant(node.args[0].parts[0]) - node.args[1] = cast(node.args[1], "timestamp") - node.args[2] = cast(node.args[2], "timestamp") - - -def extract_fn(node: Function) -> None: - """Adapt MySQL's EXTRACT function to DuckDB's EXTRACT function - TODO: multi-part args, like YEAR_MONTH, is not supported yet - NOTE: Looks like adding 'timestamp' works in most cases, but there may be some exceptions. - - Example: - EXTRACT(YEAR FROM '2000-02-01') => EXTRACT('year' from timestamp '2000-02-01') - - Args: - node (Function): Function node to adapt - - Returns: - None - """ - part = node.args[0].parts[0] - if part.upper() == "YEAR_MONTH": - node.args = apply_nested_functions([node.from_arg, Constant("%Y%m")]) - node.from_arg = None - date_format_fn(node) - return cast(node, "int") - elif part.upper() == "DAY_MINUTE": - node.args = apply_nested_functions([node.from_arg, Constant("%e%H%i")]) - node.from_arg = None - date_format_fn(node) - return cast(node, "int") - else: - node.args[0] = Constant(part) - if not isinstance(node.from_arg, Identifier): - node.from_arg = cast(node.from_arg, "timestamp") - - -def get_format_fn(node: Function) -> Constant: - """ - Replace function with a constant according to table: - Important! The parameters can be only constants. - - Example: GET_FORMAT(DATE, 'USA') => '%m.%d.%Y' - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_get-format - """ - - if len(node.args) != 2: - raise ValueError("MySQL GET_FORMAT supports only 2 arguments") - - arg1, arg2 = node.args - - if not isinstance(arg1, Identifier) and len(arg1.parts) != 1: - raise ValueError(f"Unknown type: {arg1}") - - if not isinstance(arg2, Constant): - raise ValueError(f"Unknown format name: {arg2}") - - match arg1.parts[0].upper(), arg2.value.upper(): - case "DATE", "USA": - value = "%m.%d.%Y" - case "DATE", "JIS": - value = "%Y-%m-%d" - case "DATE", "ISO": - value = "%Y-%m-%d" - case "DATE", "EUR": - value = "%d.%m.%Y" - case "DATE", "INTERNAL": - value = "%Y%m%d" - - case "DATETIME", "USA": - value = "%Y-%m-%d %H.%i.%s" - case "DATETIME", "JIS": - value = "%Y-%m-%d %H:%i:%s" - case "DATETIME", "ISO": - value = "%Y-%m-%d %H:%i:%s" - case "DATETIME", "EUR": - value = "%Y-%m-%d %H.%i.%s" - case "DATETIME", "INTERNAL": - value = "%Y%m%d%H%i%s" - - case "TIME", "USA": - value = "%h:%i:%s %p" - case "TIME", "JIS": - value = "%H:%i:%s" - case "TIME", "ISO": - value = "%H:%i:%s" - case "TIME", "EUR": - value = "%H.%i.%s" - case "TIME", "INTERNAL": - value = "%H%i%s" - - case _: - value = "" - - return Constant(value) - - -def date_format_fn(node: Function): - """ - Adapt to strftime function and convert keys in format string. - - DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y') - => - strftime('2009-10-04 22:23:00'::datetime, '%A %B %Y') - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_date-format - https://duckdb.org/docs/stable/sql/functions/timestamp.html#strftimetimestamp-format - https://duckdb.org/docs/stable/sql/functions/dateformat.html#format-specifiers - """ - specifiers_map = { - "%c": "%-m", # Month, numeric (0..12) -> Month as decimal - "%D": "%-d", # Day with English suffix -> Day as decimal (no suffix in DuckDB) - "%e": "%-d", # Day of month (0..31) -> Day as decimal - "%h": "%I", # Hour (01..12) - "%i": "%M", # Minutes - "%j": "%j", # Day of year - "%k": "%-H", # Hour (0..23) -> Hour as decimal - "%l": "%-I", # Hour (1..12) -> Hour as decimal - "%M": "%B", # Month name -> Full month name - "%r": "%I:%M:%S %p", # Time, 12-hour - "%s": "%S", # Seconds - "%T": "%X", # Time, 24-hour - "%u": "%V", # Week, mode 1, Monday is first day, can be wrong in the edges of year - "%v": "%V", # Week, mode 3, Monday is first day - "%V": "%U", # Week, mode 2, Sunday is first day, can be wrong in the edges of year - "%W": "%A", # Weekday name -> Full weekday name - "%X": "%G", # Year for week - "%x": "%G", # Year for week - } - node.op = "strftime" - - node.args = apply_nested_functions(node.args) - - if len(node.args) != 2 or not isinstance(node.args[1], Constant): - raise ValueError(f"Wrong arguments: {node.args}") - - def repl_f(match): - specifier = match.group() - return specifiers_map.get(specifier, specifier) - - # adapt format string - node.args[1].value = re.sub(r"%[a-zA-Z]", repl_f, node.args[1].value) - - # add type casting - node.args[0] = cast(node.args[0], "timestamp") - - -def from_unixtime_fn(node): - """ - Adapt to make_timestamp function - FROM_UNIXTIME(1447430881) => make_timestamp((1447430881::int8 *1000000)) - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_from-unixtime - https://duckdb.org/docs/stable/sql/functions/timestamp#make_timestampmicroseconds - """ - - if len(node.args) != 1: - raise ValueError(f"Wrong arguments: {node.args}") - - node.op = "make_timestamp" - - node.args[0] = BinaryOperation("*", args=[cast(node.args[0], "int8"), Constant(1_000_000)]) - - -def from_days_fn(node): - """ - Adapt to converting days to interval and adding to first day of the 0 year: - FROM_DAYS(735669) => '0000-01-01'::date + (735669 * INTERVAL '1 day') - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_from-days - """ - node.args = apply_nested_functions(node.args) - - if len(node.args) != 1: - raise ValueError(f"Wrong arguments: {node.args}") - - return BinaryOperation( - op="+", - args=[ - BinaryOperation("::", args=[Constant("0000-01-01"), Identifier("date")]), - BinaryOperation("*", args=[node.args[0], Interval("1 day")]), - ], - ) - - -def dayofyear_fn(node): - """ - Addapt to DATE_PART: - DAYOFYEAR('2007-02-03') => DATE_PART('doy', '2007-02-03'::date) - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofyear - """ - - return date_part(node, "doy") - - -def dayofweek_fn(node): - """ - Addapt to DATE_PART: - DAYOFWEEK('2007-02-03'); => DATE_PART('dow', '2007-02-03'::date) + 1; - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofweek - """ - return BinaryOperation("+", args=[date_part(node, "dow"), Constant(1)]) - - -def dayofmonth_fn(node): - """ - Addapt to DATE_PART: - DAYOFMONTH('2007-02-03') => DATE_PART('day', '2007-02-03'::date) - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofmonth - """ - - return date_part(node, "day") - - -def dayname_fn(node): - """ - Use the same function with type casting - DAYNAME('2007-02-03') => DAYNAME('2007-02-03'::date) - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayname - """ - if len(node.args) != 1: - raise ValueError(f"Wrong arguments: {node.args}") - - node.args[0] = cast(node.args[0], "date") - - -def curdate_fn(node): - """ - Replace the name of the function - CURDATE() => CURRENT_DATE() - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_curdate - https://duckdb.org/docs/stable/sql/functions/date.html#current_date - """ - node.op = "CURRENT_DATE" - - -def datediff_fn(node): - """ - Change argument's order and cast to date: - DATEDIFF('2007-12-31 23:59:59','2007-11-30') => datediff('day',DATE '2007-11-30', DATE '2007-12-31 23:59:59') - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_datediff - https://duckdb.org/docs/stable/sql/functions/date#date_diffpart-startdate-enddate - - """ - if len(node.args) != 2: - raise ValueError(f"Wrong arguments: {node.args}") - - arg1, arg2 = node.args - node.args = [Constant("day"), cast(arg2, "date"), cast(arg1, "date")] - - -def adddate_fn(node): - """ - Replace the name of the function and add type casting - Important! The second parameter can be only interval (not count of days). - SELECT ADDDATE('2008-01-02', INTERVAL 31 DAY) => SELECT DATE_ADD('2008-01-02'::date, INTERVAL 31 DAY) - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_adddate - https://duckdb.org/docs/stable/sql/functions/date.html#date_adddate-interval - """ - if len(node.args) != 2: - raise ValueError(f"Wrong arguments: {node.args}") - - node.op = "DATE_ADD" - node.args[0] = cast(node.args[0], "timestamp") - - -def date_sub_fn(node): - """ - Use DATE_ADD with negative interval - SELECT DATE_SUB('1998-01-02', INTERVAL 31 DAY) => select DATE_ADD('1998-01-02'::date, -INTERVAL 31 DAY) - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_date-add - https://duckdb.org/docs/stable/sql/functions/date.html#date_adddate-interval - """ - if len(node.args) != 2: - raise ValueError(f"Wrong arguments: {node.args}") - - node.op = "DATE_ADD" - node.args[0] = cast(node.args[0], "timestamp") - node.args[1] = UnaryOperation("-", args=[node.args[1]]) - - -def addtime_fn(node): - """ - Convert second parameter into interval. - Important! - - The second parameter can be only a constant. - - The first parameter can be only date/datetime (not just time) - - ADDTIME('2007-12-31', '1 1:1:1.2') - => - DATE_ADD('2007-12-31'::timestamp, INTERVAL '1 day 1 hour 1 minute 1.2 second') - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_addtime - https://duckdb.org/docs/stable/sql/functions/date.html#date_adddate-interval - """ - node.args = apply_nested_functions(node.args) - - if len(node.args) != 2: - raise ValueError(f"Wrong arguments: {node.args}") - - interval = node.args[1] - if not isinstance(interval, Constant) or not isinstance(interval.value, str): - raise ValueError(f"The second argument have to be string: {node.args[1]}") - - pattern = r"^(?:(\d+)\s+)?(?:(\d+):)?(?:(\d+):)?(\d+)(?:\.(\d+))?$" - - match = re.match(pattern, interval.value) - if not match: - raise ValueError(f"Invalid MySQL time format: {interval.value}") - - # Extract components - days, hours, minutes, seconds, fractional = match.groups() - # Build interval string - parts = [] - if days and int(days) > 0: - parts.append(f"{days} day") - - if hours and int(hours) > 0: - parts.append(f"{int(hours)} hour") - - if minutes and int(minutes) > 0: - parts.append(f"{int(minutes)} minute") - - seconds = int(seconds) if seconds else 0 - fractional = float(f"0.{fractional}") if fractional else 0.0 - total_seconds = seconds + fractional - if total_seconds > 0: - seconds_str = str(total_seconds).rstrip("0").rstrip(".") - parts.append(f"{seconds_str} second") - - # If all components are zero, return 0 seconds - if not parts: - interval_str = "0 second" - else: - interval_str = " ".join(parts) - - return Function( - "DATE_ADD", - args=[ - cast(node.args[0], "timestamp"), - Interval(interval_str), - ], - ) - - -def convert_tz_fn(node): - """ - Concatenate timezone to first argument and cast it as timestamptz. Then use `timezone` function - Important! Duckdb doesn't recognize timezones in digital formats: +10:00 - - CONVERT_TZ('2004-01-01 12:00:00','GMT','MET') - => - timezone('MET', ('2004-01-01 12:00:00' || ' ' || 'GMT')::timestamptz); - - Docs: - https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_convert-tz - https://duckdb.org/docs/stable/sql/functions/timestamptz.html#timezonetext-timestamp - """ - node.args = apply_nested_functions(node.args) - - if len(node.args) != 3: - raise ValueError(f"Wrong arguments: {node.args}") - - date, tzfrom, tzto = node.args - - # concatenate tz name: date || ' ' || tzfrom - tzdate = BinaryOperation("||", args=[BinaryOperation("||", args=[date, Constant(" ")]), tzfrom], parentheses=True) - - return Function( - "timezone", - args=[ - tzto, - cast(tzdate, "timestamptz"), - ], - ) - - -def apply_nested_functions(args): - args2 = [] - for arg in args: - if isinstance(arg, Function): - fnc = mysql_to_duckdb_fnc(arg) - if args2 is not None: - arg = fnc(arg) - args2.append(arg) - return args2 - - -def mysql_to_duckdb_fnc(node): - fnc_name = node.op.lower() - - mysql_to_duck_fn_map = { - "char": char_fn, - "locate": locate_fn, - "insrt": locate_fn, - "unhex": unhex_fn, - "format": format_fn, - "sha2": sha2_fn, - "length": length_fn, - "regexp_substr": regexp_substr_fn, - "substring_index": substring_index_fn, - "curtime": curtime_fn, - "timestampdiff": timestampdiff_fn, - "extract": extract_fn, - "get_format": get_format_fn, - "date_format": date_format_fn, - "from_unixtime": from_unixtime_fn, - "from_days": from_days_fn, - "dayofyear": dayofyear_fn, - "dayofweek": dayofweek_fn, - "day": dayofmonth_fn, - "dayofmonth": dayofmonth_fn, - "dayname": dayname_fn, - "curdate": curdate_fn, - "datediff": datediff_fn, - "adddate": adddate_fn, - "date_sub": date_sub_fn, - "date_add": adddate_fn, - "addtime": addtime_fn, - "convert_tz": convert_tz_fn, - } - if fnc_name in mysql_to_duck_fn_map: - return mysql_to_duck_fn_map[fnc_name] diff --git a/mindsdb/api/executor/utilities/sql.py b/mindsdb/api/executor/utilities/sql.py deleted file mode 100644 index f02a9e02d67..00000000000 --- a/mindsdb/api/executor/utilities/sql.py +++ /dev/null @@ -1,352 +0,0 @@ -import copy -from typing import List - -import duckdb -from duckdb import InvalidInputException -import numpy as np -import orjson -import psutil -import pandas as pd - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import ASTNode, Select, Identifier, Function, Constant - -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.utilities import log -from mindsdb.utilities.exception import QueryError -from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier -from mindsdb.utilities.json_encoder import CustomJSONEncoder -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.api.executor.utilities.mysql_to_duckdb_functions import mysql_to_duckdb_fnc - -logger = log.getLogger(__name__) - - -def _get_query_tables(query: ASTNode, resolve_function: callable, default_database: str = None) -> List[tuple]: - """Find all tables/models in the query - - Args: - query (ASTNode): query - resolve_function (callable): function apply to identifier - default_database (str): database name that will be used if there is no db name in identifier - - Returns: - List[tuple]: list with (db/project name, table name, version) - """ - tables = [] - - def _get_tables(node, is_table, **kwargs): - if is_table and isinstance(node, Identifier): - table = resolve_function(node) - if table[0] is None: - table = (default_database,) + table[1:] - tables.append(table) - - query_traversal(query, _get_tables) - return tables - - -def get_query_tables(query: ASTNode, default_database: str = None) -> List[tuple]: - return _get_query_tables(query, resolve_table_identifier, default_database) - - -def get_query_models(query: ASTNode, default_database: str = None) -> List[tuple]: - return _get_query_tables(query, resolve_model_identifier, default_database) - - -def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_functions=None, prevent_oom=True): - """Duckdb need to infer column types if column.dtype == object. By default it take 1000 rows, - but that may be not sufficient for some cases. This func try to run query multiple times - increasing butch size for type infer - - Args: - query_str (str): query to execute - dataframes (dict): dataframes - user_functions: functions controller which register new functions in connection - prevent_oom: get results if the safe way. Raise exception before memory overflow and application is killed - - Returns: - pandas.DataFrame - pandas.columns - - Raises: - QueryError: Raised when DuckDB fails to execute the query - """ - - try: - with duckdb.connect(database=":memory:") as con: - if user_functions: - user_functions.register(con) - - for name, value in dataframes.items(): - con.register(name, value) - - exception = None - for sample_size in [1000, 10000, 1000000]: - try: - # 80% from free RAM - available_ram_mb = int(psutil.virtual_memory().available * 0.8 / 1024**2) - con.execute(f"SET memory_limit = '{available_ram_mb}MB';") - - con.execute(f"set global pandas_analyze_sample={sample_size};") - if not prevent_oom: - result_df = con.execute(query_str).fetchdf() - else: - con.execute(query_str) - all_chunks = [] - total_rows = 0 - ram_per_row = None - available_ram = None - while True: - # get ~4M rows - chunk = con.fetch_df_chunk(2000) - if len(chunk) == 0: - break - - total_rows += len(chunk) - - if len(all_chunks) > 0: - # start to check only from second iteration, to skip the cases when result is less 4M rows - if ram_per_row is None: - # probe ram usage on first 100 rows - ram_per_row = chunk[:100].memory_usage(deep=True).sum() / 100 - - available_ram = psutil.virtual_memory().available - - # reserve *2.4 more memory that object occupies because it might be required in subsequent - # usage of result (pd.concat and in next steps of planner). +1GB as free reserve - if total_rows * ram_per_row * 2.4 + 1024**3 > available_ram: - raise RuntimeError( - f"DuckDB query result doesn't fit into RAM. Total rows in result exceeds {total_rows}. " - f"If you're joining across databases: try to add WHERE conditions to tables to reduce amount of data before the join" - ) - - all_chunks.append(chunk) - if len(all_chunks) == 0: - # if no data, we need an empty dataframe with columns - result_df = chunk - elif len(all_chunks) == 1: - result_df = all_chunks[0] - else: - result_df = pd.concat(all_chunks) - del all_chunks - - except InvalidInputException as e: - exception = e - else: - break - else: - raise exception - description = con.description - except InvalidInputException as e: - raise QueryError( - db_type="DuckDB", - db_error_msg=f"DuckDB failed to execute query, likely due to inability to determine column data types. Details: {e}", - failed_query=query_str, - is_external=False, - is_expected=False, - ) from e - except Exception as e: - raise QueryError( - db_type="DuckDB", db_error_msg=str(e), failed_query=query_str, is_external=False, is_expected=False - ) from e - - return result_df, description - - -_duckdb_functions_and_kw_list = None - - -def get_duckdb_functions_and_kw_list() -> list[str] | None: - """Returns a list of all functions and keywords supported by DuckDB. - The list is merge of: - - list of duckdb's functions: 'select * from duckdb_functions()' or 'pragma functions' - - ist of keywords, because of some functions are just sintax-sugar - and not present in the duckdb_functions (like 'if()'). - - hardcoded list of window_functions, because there are no way to get if from duckdb, - and they are not present in the duckdb_functions() - - Returns: - list[str] | None: List of supported functions and keywords, or None if unable to retrieve the list. - """ - global _duckdb_functions_and_kw_list - window_functions_list = [ - "cume_dist", - "dense_rank", - "first_value", - "lag", - "last_value", - "lead", - "nth_value", - "ntile", - "percent_rank", - "rank_dense", - "rank", - "row_number", - ] - if _duckdb_functions_and_kw_list is None: - try: - df, _ = query_df_with_type_infer_fallback( - """ - select distinct name - from ( - select function_name as name from duckdb_functions() - union all - select keyword_name as name from duckdb_keywords() - ) ta; - """, - dataframes={}, - ) - df.columns = [name.lower() for name in df.columns] - _duckdb_functions_and_kw_list = df["name"].drop_duplicates().str.lower().to_list() + window_functions_list - except Exception as e: - logger.warning(f"Unable to get DuckDB functions list: {e}") - - return _duckdb_functions_and_kw_list - - -def query_df(df, query, session=None): - """Perform simple query ('select' from one table, without subqueries and joins) on DataFrame. - - Args: - df (pandas.DataFrame): data - query (mindsdb_sql_parser.ast.Select | str): select query - - Returns: - pandas.DataFrame - """ - - if isinstance(query, str): - query_ast = parse_sql(query) - query_str = query - else: - query_ast = copy.deepcopy(query) - query_str = str(query) - - if isinstance(query_ast, Select) is False or isinstance(query_ast.from_table, Identifier) is False: - raise QueryError( - db_type="DuckDB", - db_error_msg="Only 'SELECT from TABLE' statements supported for internal query", - failed_query=query_str, - is_external=False, - is_expected=False, - ) - - query_ast.from_table.parts = ["df"] - - return query_dfs({"df": df}, query_ast, session=session) - - -def query_dfs(dataframes, query_ast, session=None): - json_columns = set() - - if session is not None: - user_functions = session.function_controller.create_function_set() - else: - user_functions = None - - # region collect table aliases. Strip schema/db prefix from column identifiers, but keep table aliases. - # Examples: - # files.col = 1 -> col = 1 (schema prefix stripped) - # files.a1.col = 1 -> a1.col = 1 (schema prefix stripped, alias kept) - # a1.col = a2.col -> a1.col = a2.col (aliases untouched, no schema prefix) - # "Custom SQL Query".col -> col (replaced subquery alias stripped) - known_aliases = set() - - def collect_aliases(node, is_table, **kwargs): - if not is_table or not isinstance(node, Identifier): - return - known_aliases.add(node.parts[-1].lower()) - if node.alias is not None: - known_aliases.add(node.alias.parts[-1].lower()) - - query_traversal(query_ast, collect_aliases) - # endregion - - def adapt_query(node, is_table, **kwargs): - if is_table: - return - if isinstance(node, Identifier): - if len(node.parts) > 1 and node.parts[0].lower() not in known_aliases: - node.parts = node.parts[1:] - return node - if isinstance(node, Function): - fnc = mysql_to_duckdb_fnc(node) - if fnc is not None: - node2 = fnc(node) - if node2 is not None: - # copy alias - node2.alias = node.alias - return node2 - - fnc_name = node.op.lower() - if fnc_name == "database" and len(node.args) == 0: - if session is not None: - cur_db = session.database - else: - cur_db = None - return Constant(cur_db) - elif fnc_name == "truncate": - # replace mysql 'truncate' function to duckdb 'round' - node.op = "round" - if len(node.args) == 1: - node.args.append(0) - elif fnc_name == "json_extract": - json_columns.add(node.args[0].parts[-1]) - else: - if user_functions is not None: - user_functions.check_function(node) - - duckdb_functions_and_kw_list = get_duckdb_functions_and_kw_list() or [] - custom_functions_list = [] if user_functions is None else list(user_functions.functions.keys()) - all_functions_list = duckdb_functions_and_kw_list + custom_functions_list - if len(all_functions_list) > 0 and fnc_name not in all_functions_list: - raise QueryError( - db_type="DuckDB", - db_error_msg=( - f"Unknown function: '{fnc_name}'. This function is not recognized during internal query processing.\n" - "Please use DuckDB-supported functions instead." - ), - failed_query=str(query_ast), - is_external=False, - is_expected=False, - ) - - query_traversal(query_ast, adapt_query) - - def _convert(v): - if isinstance(v, dict) or isinstance(v, list): - try: - default_encoder = CustomJSONEncoder().default - return orjson.dumps( - v, default=default_encoder, option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME - ).decode("utf-8") - except Exception: - pass - return v - - render = SqlalchemyRender("postgres") - try: - query_str = render.get_string(query_ast, with_failback=False) - except Exception: - logger.exception(f"Exception during query casting to 'postgres' dialect. Query:\n{str(query_ast)}.\nError:") - query_str = render.get_string(query_ast, with_failback=True) - - for table_name, df in dataframes.items(): - for column in json_columns: - df[column] = df[column].apply(_convert) - - if len(df) > 0: - # workaround to prevent duckdb.TypeMismatchException - for sys_name, sys_col in ( - ("models", "TRAINING_OPTIONS"), - ("predictors", "TRAINING_OPTIONS"), - ("ml_engines", "CONNECTION_DATA"), - ): - if table_name.lower() in sys_name and sys_col in df.columns: - df[sys_col] = df[sys_col].astype("string") - - result_df, description = query_df_with_type_infer_fallback(query_str, dataframes, user_functions=user_functions) - result_df.replace({np.nan: None}, inplace=True) - result_df.columns = [x[0] for x in description] - return result_df diff --git a/mindsdb/api/http/__init__.py b/mindsdb/api/http/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/http/gui.py b/mindsdb/api/http/gui.py deleted file mode 100644 index 88d5953b266..00000000000 --- a/mindsdb/api/http/gui.py +++ /dev/null @@ -1,89 +0,0 @@ -import os -import shutil -import tempfile -from pathlib import Path -from zipfile import ZipFile - -import requests -from packaging.version import Version - -from mindsdb.utilities.config import Config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def download_gui(destignation, version): - if isinstance(destignation, str): - destignation = Path(destignation) - dist_zip_path = str(destignation.joinpath("dist.zip")) - bucket = "https://mindsdb-web-builds.s3.amazonaws.com/" - - resources = [{"url": bucket + "dist-V" + version + ".zip", "path": dist_zip_path}] - - def get_resources(resource): - response = requests.get(resource["url"]) - if response.status_code != requests.status_codes.codes.ok: - raise Exception(f"Error {response.status_code} GET {resource['url']}") - open(resource["path"], "wb").write(response.content) - - try: - for r in resources: - get_resources(r) - except Exception: - logger.exception("Error during downloading files from s3:") - return False - - static_folder = destignation - static_folder.mkdir(mode=0o777, exist_ok=True, parents=True) - ZipFile(dist_zip_path).extractall(static_folder) - - if static_folder.joinpath("dist").is_dir(): - shutil.move(str(destignation.joinpath("dist").joinpath("index.html")), static_folder) - shutil.move(str(destignation.joinpath("dist").joinpath("assets")), static_folder) - shutil.rmtree(destignation.joinpath("dist")) - - os.remove(dist_zip_path) - - version_txt_path = destignation.joinpath("version.txt") - with open(version_txt_path, "wt") as f: - f.write(version) - - return True - - """ - # to make downloading faster download each resource in a separate thread - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: - future_to_url = {executor.submit(get_resources, r): r for r in resources} - for future in concurrent.futures.as_completed(future_to_url): - res = future.result() - if res is not None: - raise res - """ - - -def update_static(gui_version: Version): - """Update Scout files basing on compatible-config.json content. - Files will be downloaded and updated if new version of GUI > current. - Current GUI version stored in static/version.txt. - """ - config = Config() - static_path = Path(config["paths"]["static"]) - - logger.info(f"New version of GUI available ({gui_version.base_version}). Downloading...") - - temp_dir = tempfile.mkdtemp(prefix="mindsdb_gui_files_") - success = download_gui(temp_dir, gui_version.base_version) - if success is False: - shutil.rmtree(temp_dir) - return False - - temp_dir_for_rm = tempfile.mkdtemp(prefix="mindsdb_gui_files_") - shutil.rmtree(temp_dir_for_rm) - shutil.copytree(str(static_path), temp_dir_for_rm) - shutil.rmtree(str(static_path)) - shutil.copytree(temp_dir, str(static_path)) - shutil.rmtree(temp_dir_for_rm) - - logger.info(f"GUI version updated to {gui_version.base_version}") - return True diff --git a/mindsdb/api/http/initialize.py b/mindsdb/api/http/initialize.py deleted file mode 100644 index f0d72aef0bb..00000000000 --- a/mindsdb/api/http/initialize.py +++ /dev/null @@ -1,479 +0,0 @@ -import os -import secrets -import mimetypes -import threading -import webbrowser - -from pathlib import Path -from http import HTTPStatus - - -import requests -from flask import Flask, url_for, request, send_from_directory -from flask_compress import Compress -from flask_restx import Api -from werkzeug.exceptions import HTTPException -from packaging.version import Version, parse as parse_version - -from mindsdb.__about__ import __version__ as mindsdb_version -from mindsdb.api.http.gui import update_static -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.agents import ns_conf as agents_ns -from mindsdb.api.http.namespaces.analysis import ns_conf as analysis_ns -from mindsdb.api.http.namespaces.auth import ns_conf as auth_ns -from mindsdb.api.http.namespaces.chatbots import ns_conf as chatbots_ns -from mindsdb.api.http.namespaces.jobs import ns_conf as jobs_ns -from mindsdb.api.http.namespaces.config import ns_conf as conf_ns -from mindsdb.api.http.namespaces.databases import ns_conf as databases_ns -from mindsdb.api.http.namespaces.default import ns_conf as default_ns, check_session_auth -from mindsdb.api.http.namespaces.file import ns_conf as file_ns -from mindsdb.api.http.namespaces.handlers import ns_conf as handlers_ns -from mindsdb.api.http.namespaces.integrations import ns_conf as integrations_ns -from mindsdb.api.http.namespaces.knowledge_bases import ns_conf as knowledge_bases_ns -from mindsdb.api.http.namespaces.models import ns_conf as models_ns -from mindsdb.api.http.namespaces.projects import ns_conf as projects_ns -from mindsdb.api.http.namespaces.sql import ns_conf as sql_ns -from mindsdb.api.http.namespaces.tab import ns_conf as tab_ns -from mindsdb.api.http.namespaces.tree import ns_conf as tree_ns -from mindsdb.api.http.namespaces.views import ns_conf as views_ns -from mindsdb.api.http.namespaces.util import ns_conf as utils_ns -from mindsdb.api.http.namespaces.webhooks import ns_conf as webhooks_ns -from mindsdb.interfaces.database.integrations import integration_controller -from mindsdb.interfaces.database.database import DatabaseController -from mindsdb.interfaces.file.file_controller import FileController -from mindsdb.interfaces.jobs.jobs_controller import JobsController -from mindsdb.interfaces.storage import db -from mindsdb.metrics.server import init_metrics -from mindsdb.utilities import log -from mindsdb.utilities.config import config, HTTP_AUTH_TYPE -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.json_encoder import ORJSONProvider -from mindsdb.utilities.ps import is_pid_listen_port, wait_func_is_true -from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 -from mindsdb.utilities.otel import trace # noqa: F401 -from mindsdb.api.common.middleware import verify_pat -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID - -logger = log.getLogger(__name__) - - -class _NoOpFlaskInstrumentor: - def instrument_app(self, app): - pass - - -class _NoOpRequestsInstrumentor: - def instrument(self): - pass - - -try: - from opentelemetry.instrumentation.flask import FlaskInstrumentor - from opentelemetry.instrumentation.requests import RequestsInstrumentor -except ImportError: - logger.debug( - "OpenTelemetry is not avaiable. Please run `pip install -r requirements/requirements-opentelemetry.txt` to use it." - ) - FlaskInstrumentor = _NoOpFlaskInstrumentor - RequestsInstrumentor = _NoOpRequestsInstrumentor - - -class Swagger_Api(Api): - """ - This is a modification of the base Flask Restplus Api class due to the issue described here - https://github.com/noirbizarre/flask-restplus/issues/223 - """ - - @property - def specs_url(self): - return url_for(self.endpoint("specs"), _external=False) - - -def get_last_compatible_gui_version() -> Version | bool: - logger.debug("Getting last compatible frontend...") - try: - res = requests.get( - "https://mindsdb-web-builds.s3.amazonaws.com/compatible-config.json", - timeout=5, - ) - except (ConnectionError, requests.exceptions.ConnectionError) as e: - logger.error(f"Is no connection. {e}") - return False - except Exception as e: - logger.error(f"Is something wrong with getting compatible-config.json: {e}") - return False - - if res.status_code != 200: - logger.error(f"Cant get compatible-config.json: returned status code = {res.status_code}") - return False - - try: - versions = res.json() - except Exception as e: - logger.error(f"Cant decode compatible-config.json: {e}") - return False - - current_mindsdb_lv = parse_version(mindsdb_version) - - try: - gui_versions = {} - max_mindsdb_lv = None - max_gui_lv = None - for el in versions["mindsdb"]: - if el["mindsdb_version"] is None: - gui_lv = parse_version(el["gui_version"]) - else: - mindsdb_lv = parse_version(el["mindsdb_version"]) - gui_lv = parse_version(el["gui_version"]) - if mindsdb_lv.base_version not in gui_versions or gui_lv > gui_versions[mindsdb_lv.base_version]: - gui_versions[mindsdb_lv.base_version] = gui_lv - if max_mindsdb_lv is None or max_mindsdb_lv < mindsdb_lv: - max_mindsdb_lv = mindsdb_lv - if max_gui_lv is None or max_gui_lv < gui_lv: - max_gui_lv = gui_lv - - all_mindsdb_lv = [parse_version(x) for x in gui_versions.keys()] - all_mindsdb_lv.sort() - - if current_mindsdb_lv.base_version in gui_versions: - gui_version_lv = gui_versions[current_mindsdb_lv.base_version] - elif current_mindsdb_lv > all_mindsdb_lv[-1]: - gui_version_lv = max_gui_lv - else: - lower_versions = { - key: value for key, value in gui_versions.items() if parse_version(key) < current_mindsdb_lv - } - if len(lower_versions) == 0: - gui_version_lv = gui_versions[all_mindsdb_lv[0].base_version] - else: - all_lower_versions = [parse_version(x) for x in lower_versions.keys()] - gui_version_lv = gui_versions[all_lower_versions[-1].base_version] - except Exception: - logger.exception("Error in compatible-config.json structure") - return False - - logger.debug(f"Last compatible frontend version: {gui_version_lv}.") - return gui_version_lv - - -def get_current_gui_version() -> Version: - logger.debug("Getting current frontend version...") - static_path = Path(config["paths"]["static"]) - version_txt_path = static_path.joinpath("version.txt") - - current_gui_version = None - if version_txt_path.is_file(): - with open(version_txt_path, "rt") as f: - current_gui_version = f.readline() - - current_gui_lv = None if current_gui_version is None else parse_version(current_gui_version) - logger.debug(f"Current frontend version: {current_gui_lv}.") - - return current_gui_lv - - -def initialize_static(): - last_gui_version_lv = get_last_compatible_gui_version() - current_gui_version_lv = get_current_gui_version() - required_gui_version = config["gui"].get("version") - - if required_gui_version is not None: - required_gui_version_lv = parse_version(required_gui_version) - success = True - if current_gui_version_lv is None or required_gui_version_lv != current_gui_version_lv: - success = update_static(required_gui_version_lv) - else: - if last_gui_version_lv is False: - logger.debug( - "The number of the latest version has not been determined, " - f"so we will continue using the current version: {current_gui_version_lv}" - ) - return False - - if current_gui_version_lv == last_gui_version_lv: - logger.debug(f"The latest version is already in use: {current_gui_version_lv}") - return True - success = update_static(last_gui_version_lv) - - if db.session: - db.session.close() - return success - - -def initialize_app(is_restart: bool = False): - static_root = config["paths"]["static"] - logger.debug(f"Static route: {static_root}") - init_static_thread = None - if not is_restart: - gui_exists = Path(static_root).joinpath("index.html").is_file() - logger.debug(f"Does GUI already exist.. {'YES' if gui_exists else 'NO'}") - - if config["gui"]["autoupdate"] is True or (config["gui"]["open_on_start"] is True and gui_exists is False): - logger.debug("Initializing static...") - init_static_thread = threading.Thread(target=initialize_static, name="initialize_static") - init_static_thread.start() - else: - logger.debug(f"Skip initializing static: config['gui']={config['gui']}, gui_exists={gui_exists}") - - app, api = initialize_flask() - - if not is_restart and config["gui"]["open_on_start"]: - if init_static_thread is not None: - init_static_thread.join() - open_gui(init_static_thread) - - Compress(app) - - initialize_interfaces(app) - - if os.path.isabs(static_root) is False: - static_root = os.path.join(os.getcwd(), static_root) - static_root = Path(static_root) - - @app.route("/", defaults={"path": ""}, methods=["GET"]) - @app.route("/", methods=["GET"]) - def root_index(path): - if path.startswith("api/"): - return http_error( - HTTPStatus.NOT_FOUND, - "Not found", - "The endpoint you are trying to access does not exist on the server.", - ) - - try: - # Ensure the requested path is within the static directory - # https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to - requested_path = (static_root / path).resolve() - - if not requested_path.is_relative_to(static_root.resolve()): - return http_error( - HTTPStatus.FORBIDDEN, - "Forbidden", - "You are not allowed to access the requested resource.", - ) - - if requested_path.is_file(): - return send_from_directory(static_root, path) - else: - return send_from_directory(static_root, "index.html") - - except (ValueError, OSError): - return http_error( - HTTPStatus.BAD_REQUEST, - "Bad Request", - "Invalid path requested.", - ) - - protected_namespaces = [ - tab_ns, - utils_ns, - conf_ns, - file_ns, - sql_ns, - analysis_ns, - handlers_ns, - tree_ns, - projects_ns, - databases_ns, - views_ns, - models_ns, - chatbots_ns, - agents_ns, - jobs_ns, - knowledge_bases_ns, - integrations_ns, - ] - - for ns in protected_namespaces: - api.add_namespace(ns) - api.add_namespace(default_ns) - api.add_namespace(auth_ns) - api.add_namespace(webhooks_ns) - - @api.errorhandler(Exception) - def handle_exception(e): - logger.error(f"http exception: {e}") - # pass through HTTP errors - # NOTE flask_restx require 'message', also it modyfies 'application/problem+json' to 'application/json' - if isinstance(e, HTTPException): - return ( - {"title": e.name, "detail": e.description, "message": e.description}, - e.code, - {"Content-Type": "application/problem+json"}, - ) - return ( - { - "title": getattr(type(e), "__name__") or "Unknown error", - "detail": str(e), - "message": str(e), - }, - 500, - {"Content-Type": "application/problem+json"}, - ) - - @app.teardown_appcontext - def remove_session(*args, **kwargs): - db.session.remove() - - @app.before_request - def before_request(): - ctx.set_default() - - h = request.headers.get("Authorization") - if not h or not h.startswith("Bearer "): - bearer = None - else: - bearer = h.split(" ", 1)[1].strip() or None - - # region routes where auth is required - http_auth_type = config["auth"]["http_auth_type"] - if ( - config["auth"]["http_auth_enabled"] is True - and any(request.path.startswith(f"/api{ns.path}") for ns in protected_namespaces) - and ( - (http_auth_type == HTTP_AUTH_TYPE.SESSION and check_session_auth() is False) - or (http_auth_type == HTTP_AUTH_TYPE.TOKEN and verify_pat(bearer) is False) - or ( - http_auth_type == HTTP_AUTH_TYPE.SESSION_OR_TOKEN - and check_session_auth() is False - and verify_pat(bearer) is False - ) - ) - ): - logger.debug(f"Auth failed for path {request.path}") - return http_error( - HTTPStatus.UNAUTHORIZED, - "Unauthorized", - "Authorization is required to complete the request", - ) - # endregion - - company_id = request.headers.get("company-id") - user_id = request.headers.get("user-id") - user_class = request.headers.get("user-class") - enforce_user_id = request.headers.get("enforce-user-id") - - if user_class is not None: - try: - user_class = int(user_class) - except Exception as e: - logger.error(f"Could not parse user_class: {user_class} | exception: {e}") - user_class = 0 - else: - user_class = 0 - - ctx.company_id = company_id if company_id is not None else DEFAULT_COMPANY_ID - ctx.user_id = user_id if user_id is not None else DEFAULT_USER_ID - ctx.user_class = user_class - if enforce_user_id is not None: - ctx.enforce_user_id = enforce_user_id.lower() not in ("false", "0", "no", "") - - logger.debug("Done initializing app.") - return app - - -def initialize_flask(): - logger.debug("Initializing flask...") - # region required for windows https://github.com/mindsdb/mindsdb/issues/2526 - mimetypes.add_type("text/css", ".css") - mimetypes.add_type("text/javascript", ".js") - # endregion - - static_path = os.path.join(config["paths"]["static"], "static/") - if os.path.isabs(static_path) is False: - static_path = os.path.join(os.getcwd(), static_path) - kwargs = {"static_url_path": "/static", "static_folder": static_path} - logger.debug(f"Static path: {static_path}") - - app = Flask(__name__, **kwargs) - init_metrics(app) - - # Instrument Flask app and requests using either real or no-op instrumentors - FlaskInstrumentor().instrument_app(app) - RequestsInstrumentor().instrument() - - app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 60 - app.config["SWAGGER_HOST"] = "http://localhost:8000/mindsdb" - app.json = ORJSONProvider(app) - - http_auth_type = config["auth"]["http_auth_type"] - authorizations = {} - security = [] - - if http_auth_type in (HTTP_AUTH_TYPE.SESSION, HTTP_AUTH_TYPE.SESSION_OR_TOKEN): - app.config["SECRET_KEY"] = os.environ.get("FLASK_SECRET_KEY", secrets.token_hex(32)) - app.config["SESSION_COOKIE_NAME"] = "session" - app.config["PERMANENT_SESSION_LIFETIME"] = config["auth"]["http_permanent_session_lifetime"] - authorizations["session"] = {"type": "apiKey", "in": "cookie", "name": "session"} - security.append(["session"]) - - if http_auth_type in (HTTP_AUTH_TYPE.TOKEN, HTTP_AUTH_TYPE.SESSION_OR_TOKEN): - authorizations["bearer"] = {"type": "apiKey", "in": "header", "name": "Authorization"} - security.append(["bearer"]) - - logger.debug("Creating swagger API..") - api = Swagger_Api( - app, - authorizations=authorizations, - security=security, - url_prefix=":8000", - prefix="/api", - doc="/doc/", - ) - - def __output_json_orjson(data, code, headers=None): - from flask import current_app, make_response - - dumped = current_app.json.dumps(data) - resp = make_response(dumped, code) - if headers: - resp.headers.extend(headers) - resp.mimetype = "application/json" - return resp - - api.representations["application/json"] = __output_json_orjson - - return app, api - - -def open_gui(init_static_thread): - port = config["api"]["http"]["port"] - host = config["api"]["http"]["host"] - - if host in ("", "0.0.0.0"): - url = f"http://127.0.0.1:{port}/" - else: - url = f"http://{host}:{port}/" - logger.info(f" - GUI available at {url}") - - pid = os.getpid() - thread = threading.Thread( - target=_open_webbrowser, - args=(url, pid, port, init_static_thread, config["paths"]["static"]), - daemon=True, - name="open_webbrowser", - ) - thread.start() - - -def initialize_interfaces(app): - app.integration_controller = integration_controller - app.database_controller = DatabaseController() - app.file_controller = FileController() - app.jobs_controller = JobsController() - - -def _open_webbrowser(url: str, pid: int, port: int, init_static_thread, static_folder): - """Open webbrowser with url when http service is started. - - If some error then do nothing. - """ - if init_static_thread is not None: - init_static_thread.join() - try: - is_http_active = wait_func_is_true(func=is_pid_listen_port, timeout=15, pid=pid, port=port) - if is_http_active: - webbrowser.open(url) - except Exception: - logger.exception(f"Failed to open {url} in webbrowser with exception:") - db.session.close() diff --git a/mindsdb/api/http/namespaces/__init__.py b/mindsdb/api/http/namespaces/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/http/namespaces/agents.py b/mindsdb/api/http/namespaces/agents.py deleted file mode 100644 index 3a7d6612499..00000000000 --- a/mindsdb/api/http/namespaces/agents.py +++ /dev/null @@ -1,399 +0,0 @@ -import os -import json -from http import HTTPStatus -from typing import Dict, Iterable, List - -import pandas as pd -from flask import request, Response -from flask_restx import Resource - -from mindsdb.interfaces.agents.agents_controller import AgentsController -from mindsdb.interfaces.agents.utils.data_catalog_builder import dataframe_to_markdown -from mindsdb.interfaces.storage import db -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.projects import ns_conf -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities.log import getLogger -from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError - - -logger = getLogger(__name__) - - -AGENT_QUICK_RESPONSE = "I understand your request. I'm working on a detailed response for you." - - -def create_agent(project_name, name, agent): - if name is None: - return http_error(HTTPStatus.BAD_REQUEST, "Missing field", 'Missing "name" field for agent') - - params = agent.get("params", {}) - if agent.get("model"): - model = agent["model"] - elif "model_name" in agent: - model = {"model_name": agent.get("model_name"), "provider": agent.get("provider")} - else: - model = None - - if agent.get("data"): - params["data"] = agent["data"] - if agent.get("prompt_template"): - params["prompt_template"] = agent["prompt_template"] - - agents_controller = AgentsController() - - try: - existing_agent = agents_controller.get_agent(name, project_name=project_name) - except (ValueError, EntityNotExistsError): - # Project must exist. - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist") - if existing_agent is not None: - return http_error( - HTTPStatus.CONFLICT, - "Agent already exists", - f"Agent with name {name} already exists. Please choose a different one.", - ) - - try: - created_agent = agents_controller.add_agent(name=name, project_name=project_name, model=model, params=params) - return created_agent.as_dict(), HTTPStatus.CREATED - except (ValueError, EntityExistsError): - # Model doesn't exist. - return http_error( - HTTPStatus.NOT_FOUND, - "Resource not found", - f'The model "{model}" does not exist. Please ensure that the name is correct and try again.', - ) - except NotImplementedError: - # Free users trying to create agent. - return http_error( - HTTPStatus.UNAUTHORIZED, - "Unavailable to free users", - f'The model "{model}" does not exist. Please ensure that the name is correct and try again.', - ) - - -@ns_conf.route("//agents") -class AgentsResource(Resource): - @ns_conf.doc("list_agents") - @api_endpoint_metrics("GET", "/agents") - def get(self, project_name): - """List all agents""" - session = SessionController() - try: - all_agents = session.agents_controller.get_agents(project_name) - except EntityNotExistsError: - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - return [a.as_dict() for a in all_agents] - - @ns_conf.doc("create_agent") - @api_endpoint_metrics("POST", "/agents") - def post(self, project_name): - """Create a agent""" - - # Check for required parameters. - if "agent" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "agent" parameter in POST body' - ) - - agent = request.json["agent"] - - name = agent.get("name") - return create_agent(project_name, name, agent) - - -@ns_conf.route("//agents/") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("agent_name", "Name of the agent") -class AgentResource(Resource): - @ns_conf.doc("get_agent") - @api_endpoint_metrics("GET", "/agents/agent") - def get(self, project_name, agent_name): - """Gets an agent by name""" - session = SessionController() - try: - existing_agent = session.agents_controller.get_agent(agent_name, project_name=project_name) - if existing_agent is None: - return http_error( - HTTPStatus.NOT_FOUND, "Agent not found", f"Agent with name {agent_name} does not exist" - ) - return existing_agent.as_dict() - except (ValueError, EntityNotExistsError): - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - @ns_conf.doc("update_agent") - @api_endpoint_metrics("PUT", "/agents/agent") - def put(self, project_name, agent_name): - """Updates an agent by name, creating one if it doesn't exist""" - - # Check for required parameters. - if "agent" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "agent" parameter in POST body' - ) - agents_controller = AgentsController() - - try: - existing_agent_record = agents_controller.get_agent(agent_name, project_name=project_name) - except (ValueError, EntityNotExistsError): - # Project must exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - if existing_agent_record is None: - return http_error( - HTTPStatus.BAD_REQUEST, - "Creation is not allowed", - "Creation of an agent using the PUT method is not allowed.", - ) - - agent = request.json["agent"] - name = agent.get("name", None) - - # Agent must not exist with new name. - if name is not None and name != agent_name: - agent_with_new_name = agents_controller.get_agent(name, project_name=project_name) - if agent_with_new_name is not None: - return http_error( - HTTPStatus.CONFLICT, - "Agent already exists", - f"Agent with name {name} already exists. Please choose a different one.", - ) - - if existing_agent_record is None: - # Create - return create_agent(project_name, name, agent) - - # Update - try: - params = agent.get("params", {}) - - if agent.get("model"): - model = agent["model"] - elif "model_name" in agent: - model = {"model_name": agent.get("model_name"), "provider": agent.get("provider")} - else: - model = None - - if agent.get("data"): - params["data"] = agent["data"] - if agent.get("prompt_template"): - params["prompt_template"] = agent["prompt_template"] - - updated_agent = agents_controller.update_agent( - agent_name, - project_name=project_name, - name=name, - model=model, - params=params, - ) - - return updated_agent.as_dict() - except EntityExistsError as e: - return http_error(HTTPStatus.NOT_FOUND, "Resource should not exists", str(e)) - except EntityNotExistsError as e: - # Agent doesn't exist. - return http_error(HTTPStatus.NOT_FOUND, "Resource not found", str(e)) - except ValueError as e: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", str(e)) - - @ns_conf.doc("delete_agent") - @api_endpoint_metrics("DELETE", "/agents/agent") - def delete(self, project_name, agent_name): - """Deletes a agent by name""" - agents_controller = AgentsController() - - try: - existing_agent = agents_controller.get_agent(agent_name, project_name=project_name) - if existing_agent is None: - return http_error( - HTTPStatus.NOT_FOUND, "Agent not found", f"Agent with name {agent_name} does not exist" - ) - except (ValueError, EntityNotExistsError): - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - agents_controller.delete_agent(agent_name, project_name=project_name) - return "", HTTPStatus.NO_CONTENT - - -def _completion_event_generator(agent_name: str, messages: List[Dict], project_name: str) -> Iterable[str]: - logger.info(f"Starting completion event generator for agent {agent_name}") - - def json_serialize(data): - return f"data: {json.dumps(data)}\n\n" - - try: - # Populate API key by default if not present. - session = SessionController() - existing_agent = session.agents_controller.get_agent(agent_name, project_name=project_name) - if not existing_agent.params: - existing_agent.params = {} - existing_agent.params["openai_api_key"] = existing_agent.params.get( - "openai_api_key", os.getenv("OPENAI_API_KEY") - ) - # Have to commit/flush here so DB isn't locked while streaming. - db.session.commit() - - # Skills are no longer used - mode parameter handling removed - - completion_stream = session.agents_controller.get_completion( - existing_agent, messages, project_name=project_name, tools=[], stream=True - ) - - for chunk in completion_stream: - if isinstance(chunk, str) and chunk.startswith("data: "): - # The chunk is already formatted correctly, yield it as is - yield chunk - elif isinstance(chunk, dict): - # Convert DataFrame content to JSON-serializable format - if chunk.get("type") == "data" and isinstance(chunk.get("content"), pd.DataFrame): - df = chunk["content"] - # Convert DataFrame to markdown format - chunk["content"] = dataframe_to_markdown(df) - - if "error" in chunk: - # Handle error chunks - logger.error(f"Error in completion stream: {chunk['error']}") - yield json_serialize({"error": chunk["error"]}) - elif chunk.get("type") == "context": - # Handle context message - yield json_serialize({"type": "context", "content": chunk.get("content")}) - elif chunk.get("type") == "sql": - # Handle SQL query message - yield json_serialize({"type": "sql", "content": chunk.get("content")}) - elif chunk.get("type") == "status": - # Handle status message - yield json_serialize({"type": "context", "content": chunk.get("content")}) - else: - # Chunk should already be formatted by agent stream. - yield json_serialize(chunk) - else: - # For any other unexpected chunk types - yield json_serialize({"output": str(chunk)}) - - logger.debug(f"Streamed chunk: {str(chunk)[:100]}...") - - logger.info("Completion stream finished") - - except Exception: - error_message = "Error in completion event generator" - logger.exception(error_message) - yield json_serialize({"error": error_message}) - - finally: - yield json_serialize({"type": "end"}) - - -@ns_conf.route("//agents//completions/stream") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("agent_name", "Name of the agent") -class AgentCompletionsStream(Resource): - @ns_conf.doc("agent_completions_stream") - @api_endpoint_metrics("POST", "/agents/agent/completions/stream") - def post(self, project_name, agent_name): - # Extract messages from request (HTTP format only) - if "messages" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, - "Missing parameter", - 'Must provide "messages" parameter in POST body', - ) - - messages = request.json["messages"] - - session = SessionController() - try: - existing_agent = session.agents_controller.get_agent(agent_name, project_name=project_name) - if existing_agent is None: - logger.warning(f"Agent {agent_name} not found in project {project_name}") - return http_error( - HTTPStatus.NOT_FOUND, "Agent not found", f"Agent with name {agent_name} does not exist" - ) - except ValueError as e: - logger.warning(f"Project {project_name} not found: {e}") - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - try: - gen = _completion_event_generator(agent_name, messages, project_name) - logger.info(f"Starting streaming response for agent {agent_name}") - return Response(gen, mimetype="text/event-stream") - except Exception as e: - logger.exception(f"Error during streaming for agent {agent_name}:") - return http_error( - HTTPStatus.INTERNAL_SERVER_ERROR, "Streaming error", f"An error occurred during streaming: {e}" - ) - - -@ns_conf.route("//agents//completions") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("agent_name", "Name of the agent") -class AgentCompletions(Resource): - @ns_conf.doc("agent_completions") - @api_endpoint_metrics("POST", "/agents/agent/completions") - def post(self, project_name, agent_name): - """Queries an agent given a list of messages""" - # Check for required parameters. - if "messages" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "messages" parameter in POST body' - ) - agents_controller = AgentsController() - - try: - existing_agent = agents_controller.get_agent(agent_name, project_name=project_name) - if existing_agent is None: - return http_error( - HTTPStatus.NOT_FOUND, "Agent not found", f"Agent with name {agent_name} does not exist" - ) - except (ValueError, EntityNotExistsError): - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - # Skills are no longer used - mode parameter handling removed - - messages = request.json["messages"] - - completion = agents_controller.get_completion( - existing_agent, - messages, - project_name=project_name, - # Don't need to include backoffice_db related tools into this endpoint. - # Underlying handler (e.g. Langchain) will handle default tools like mdb_read, mdb_write, etc. - tools=[], - ) - - output_col = agents_controller.assistant_column - model_output = completion.iloc[-1][output_col] - trace_id = completion.iloc[-1]["trace_id"] - - response = {"message": {"content": model_output, "role": "assistant", "trace_id": trace_id}} - - if existing_agent.params.get("return_context", False): - context = [] - if "context" in completion.columns: - try: - last_context = completion.iloc[-1]["context"] - if last_context: - context = json.loads(last_context) - except (json.JSONDecodeError, IndexError): - logger.warning("Error decoding context:", exc_info=True) - pass # Keeping context as an empty list in case of error - - response["message"]["context"] = context - - return response diff --git a/mindsdb/api/http/namespaces/analysis.py b/mindsdb/api/http/namespaces/analysis.py deleted file mode 100644 index 467aae888d2..00000000000 --- a/mindsdb/api/http/namespaces/analysis.py +++ /dev/null @@ -1,123 +0,0 @@ -import time - -import pandas as pd -from flask import request -from flask_restx import Resource -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Constant -from pandas.core.frame import DataFrame - -from mindsdb.api.http.namespaces.configs.analysis import ns_conf -from mindsdb.api.executor.utilities.sql import get_query_tables -from mindsdb.api.http.utils import http_error -from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.api.executor.data_types.response_type import ( - RESPONSE_TYPE as SQL_RESPONSE_TYPE, -) -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def analyze_df(df: DataFrame) -> dict: - if len(df) == 0: - return {} - - cols = pd.Series(df.columns) - - # https://stackoverflow.com/questions/24685012/pandas-dataframe-renaming-multiple-identically-named-columns - for dup in cols[cols.duplicated()].unique(): - cols[cols[cols == dup].index.values.tolist()] = [ - dup + "." + str(i) if i != 0 else dup for i in range(sum(cols == dup)) - ] - - # rename the columns with the cols list. - df.columns = cols - - from dataprep_ml.insights import analyze_dataset - - analysis = analyze_dataset(df) - return analysis.to_dict() - - -@ns_conf.route("/query") -class QueryAnalysis(Resource): - @ns_conf.doc("post_query_to_analyze") - @api_endpoint_metrics("POST", "/analysis/query") - def post(self): - data = request.json - query = data.get("query") - context = data.get("context", {}) - limit = data.get("limit") - if query is None or len(query) == 0: - return http_error(400, "Missed query", "Need provide query to analyze") - - try: - ast = parse_sql(query) - except Exception as e: - return http_error(500, "Wrong query", str(e)) - - if limit is not None: - ast.limit = Constant(limit) - query = str(ast) - - mysql_proxy = FakeMysqlProxy() - mysql_proxy.set_context(context) - - try: - result = mysql_proxy.process_query(query) - except Exception as e: - logger.exception("Error during query analysis:") - return http_error(500, "Error", f"Unexpected error duting query analysis: {e}") - - if result.type == SQL_RESPONSE_TYPE.ERROR: - return http_error(500, f"Error {result.error_code}", result.error_message) - if result.type != SQL_RESPONSE_TYPE.TABLE: - return http_error(500, "Error", "Query does not return data") - - column_names = [column.name for column in result.result_set.columns] - df = result.result_set.to_df() - try: - analysis = analyze_df(df) - except ImportError: - return { - "analysis": {}, - "timestamp": time.time(), - "error": 'To use this feature, please install the "dataprep_ml" package.', - } - - query_tables = [table.to_string() for table in get_query_tables(ast)] - - return { - "analysis": analysis, - "column_names": column_names, - "row_count": len(result.result_set), - "timestamp": time.time(), - "tables": query_tables, - } - - -@ns_conf.route("/data") -class DataAnalysis(Resource): - @ns_conf.doc("post_data_to_analyze") - @api_endpoint_metrics("POST", "/analysis/data") - def post(self): - payload = request.json - column_names = payload.get("column_names") - data = payload.get("data") - - timestamp = time.time() - try: - analysis = analyze_df(DataFrame(data, columns=column_names)) - return {"analysis": analysis, "timestamp": time.time()} - except ImportError: - return { - "analysis": {}, - "timestamp": timestamp, - "error": 'To use this feature, please install the "dataprep_ml" package.', - } - except Exception as e: - # Don't want analysis exceptions to show up on UI. - # TODO: Fix analysis so it doesn't throw exceptions at all. - return {"analysis": {}, "timestamp": timestamp, "error": str(e)} diff --git a/mindsdb/api/http/namespaces/auth.py b/mindsdb/api/http/namespaces/auth.py deleted file mode 100644 index 5010be496f9..00000000000 --- a/mindsdb/api/http/namespaces/auth.py +++ /dev/null @@ -1,159 +0,0 @@ -import base64 -import secrets -import time -import urllib - -import requests -from flask import redirect, request, url_for -from flask_restx import Resource - -from mindsdb.api.http.namespaces.configs.auth import ns_conf -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities.config import Config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def get_access_token() -> str: - """return current access token - - Returns: - str: token - """ - return Config().get("auth", {}).get("oauth", {}).get("tokens", {}).get("access_token") - - -def request_user_info(access_token: str = None) -> dict: - """request user info from cloud - - Args: - access_token (str, optional): token that used to get user data - - Returns: - dict: user data - """ - if access_token is None: - access_token = get_access_token() - if access_token is None: - raise KeyError() - - auth_server = Config()["auth"]["oauth"]["server_host"] - - response = requests.get( - f"https://{auth_server}/auth/userinfo", - headers={"Authorization": f"Bearer {access_token}"}, - timeout=5, - ) - if response.status_code != 200: - raise Exception(f"Wrong response: {response.status_code}, {response.text}") - - return response.json() - - -@ns_conf.route("/callback", methods=["GET"]) -@ns_conf.route("/callback/cloud_home", methods=["GET"]) -@ns_conf.hide -class Auth(Resource): - @ns_conf.doc(params={"code": "authentification code"}) - @api_endpoint_metrics("GET", "/auth/code") - def get(self): - """callback from auth server if authentification is successful""" - config = Config() - code = request.args.get("code") - - aws_meta_data = config["aws_meta_data"] - public_hostname = aws_meta_data["public-hostname"] - instance_id = aws_meta_data["instance-id"] - - oauth_meta = config["auth"]["oauth"] - client_id = oauth_meta["client_id"] - client_secret = oauth_meta["client_secret"] - auth_server = oauth_meta["server_host"] - client_basic = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode() - - redirect_uri = f"https://{public_hostname}{request.path}" - response = requests.post( - f"https://{auth_server}/auth/token", - data={ - "code": code, - "grant_type": "authorization_code", - "redirect_uri": redirect_uri, - }, - headers={"Authorization": f"Basic {client_basic}"}, - ) - tokens = response.json() - if "expires_in" in tokens: - tokens["expires_at"] = round(time.time() + tokens["expires_in"] - 1) - del tokens["expires_in"] - - user_data = request_user_info(tokens["access_token"]) - - previous_username = config["auth"]["oauth"].get("username") - new_username = user_data["name"] - if previous_username is not None and new_username != previous_username: - return redirect("/forbidden") - - config.update( - { - "auth": { - "provider": "cloud", - "oauth": {"username": new_username, "tokens": tokens}, - } - } - ) - - try: - resp = requests.put( - f"https://{auth_server}/cloud/instance", - json={ - "instance_id": instance_id, - "public_hostname": public_hostname, - "ami_id": aws_meta_data.get("ami-id"), - }, - headers={"Authorization": f"Bearer {tokens['access_token']}"}, - timeout=5, - ) - if resp.status_code != 200: - logger.warning(f"Wrong response from cloud server: {resp.status_code}") - except Exception as e: - logger.warning(f"Cant't send request to cloud server: {e}", exc_info=True) - - if request.path.endswith("/auth/callback/cloud_home"): - return redirect(f"https://{auth_server}") - else: - return redirect(url_for("root_index")) - - -@ns_conf.route("/cloud_login", methods=["GET"]) -@ns_conf.hide -class CloudLoginRoute(Resource): - @ns_conf.doc( - responses={302: "Redirect to auth server"}, - params={"location": "final redirection should lead to that location"}, - ) - @api_endpoint_metrics("GET", "/auth/cloud_login") - def get(self): - """redirect to cloud login form""" - location = request.args.get("location") - config = Config() - - aws_meta_data = config["aws_meta_data"] - public_hostname = aws_meta_data["public-hostname"] - auth_server = config["auth"]["oauth"]["server_host"] - - if location == "cloud_home": - redirect_uri = f"https://{public_hostname}/api/auth/callback/cloud_home" - else: - redirect_uri = f"https://{public_hostname}/api/auth/callback" - - args = urllib.parse.urlencode( - { - "client_id": config["auth"]["oauth"]["client_id"], - "scope": "openid profile aws_marketplace", - "response_type": "code", - "nonce": secrets.token_urlsafe(), - "redirect_uri": redirect_uri, - } - ) - return redirect(f"https://{auth_server}/auth/authorize?{args}") diff --git a/mindsdb/api/http/namespaces/chatbots.py b/mindsdb/api/http/namespaces/chatbots.py deleted file mode 100644 index d9d5fbf2baf..00000000000 --- a/mindsdb/api/http/namespaces/chatbots.py +++ /dev/null @@ -1,277 +0,0 @@ -from http import HTTPStatus - -from flask import request -from flask_restx import Resource - -from mindsdb.api.http.namespaces.configs.projects import ns_conf -from mindsdb.interfaces.agents.agents_controller import AgentsController -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.api.http.utils import http_error -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.interfaces.chatbot.chatbot_controller import ChatBotController -from mindsdb.interfaces.model.functions import PredictorRecordNotFound -from mindsdb.interfaces.storage.db import Predictor -from mindsdb.utilities.exception import EntityNotExistsError - - -def create_chatbot(project_name, name, chatbot): - if name is None: - return http_error(HTTPStatus.BAD_REQUEST, "Missing field", 'Missing "name" field for chatbot') - - model_name = chatbot.get("model_name", None) - agent_name = chatbot.get("agent_name", None) - if model_name is None and agent_name is None: - return http_error( - HTTPStatus.BAD_REQUEST, - "Missing field", - 'Must include either "model_name" or "agent_name" field for chatbot', - ) - - session_controller = SessionController() - - if "database_id" in chatbot or "database_name" in chatbot or ("db_engine" in chatbot and "db_params" in chatbot): - try: - database_id = get_or_create_database_for_chatbot(chatbot, session_controller) - except ValueError as value_error: - return http_error(HTTPStatus.NOT_FOUND, "Database not found", str(value_error)) - - else: - return http_error( - HTTPStatus.BAD_REQUEST, - "Missing field", - 'Missing "database_id" or ("db_engine" and "database_param") fields for chatbot', - ) - - is_running = chatbot.get("is_running", False) - params = chatbot.get("params", {}) - - chatbot_controller = ChatBotController() - - # Chatbot can't already exist. - # TODO all checks should be inside of controller - - try: - existing_chatbot = chatbot_controller.get_chatbot(name, project_name=project_name) - except EntityNotExistsError: - # Project must exist. - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist") - if existing_chatbot is not None: - return http_error( - HTTPStatus.CONFLICT, - "Chatbot already exists", - f"Chatbot with name {name} already exists. Please use a different name", - ) - - # Model and agent need to exist. - if agent_name is not None: - agent = AgentsController().get_agent(agent_name, project_name) - if agent is None: - return http_error(HTTPStatus.NOT_FOUND, "Agent not found", f"Agent with name {agent_name} not found") - model_name = agent.model_name - - model_name_no_version, version = Predictor.get_name_and_version(model_name) - try: - session_controller.model_controller.get_model(model_name_no_version, version=version, project_name=project_name) - except PredictorRecordNotFound: - return http_error(HTTPStatus.NOT_FOUND, "Model not found", f"Model with name {model_name} not found") - - created_chatbot = chatbot_controller.add_chatbot( - name, - project_name, - model_name=model_name, - agent_name=agent_name, - database_id=database_id, - is_running=is_running, - params=params, - ) - return created_chatbot.as_dict(), HTTPStatus.CREATED - - -def get_or_create_database_for_chatbot(chatbot: dict, session_controller: SessionController) -> int: - """ - Get or create a database for a chatbot, based on the chatbot configuration provided in the request. - - Args: - chatbot (dict): The chatbot configuration. - session_controller (SessionController): The session controller. - - Returns: - int: The database ID. - """ - if "database_id" in chatbot: - database_record = session_controller.integration_controller.get_by_id(chatbot["database_id"]) - if database_record: - return database_record["id"] - else: - raise ValueError(f"Database with ID {chatbot['database_id']} not found") - - elif "database_name" in chatbot: - database_record = session_controller.integration_controller.get(chatbot["database_name"]) - if database_record: - return database_record["id"] - else: - raise ValueError(f"Database with name {chatbot['database_name']} not found") - - if "db_params" in chatbot and "db_engine" in chatbot: - db_name = chatbot["name"] + "_db" - - # try to drop - existing_db = session_controller.integration_controller.get(db_name) - if existing_db: - # drop - session_controller.integration_controller.delete(db_name) - - return session_controller.integration_controller.add(db_name, chatbot["db_engine"], chatbot["db_params"]) - - return None - - -@ns_conf.route("//chatbots") -class ChatBotsResource(Resource): - @ns_conf.doc("list_chatbots") - @api_endpoint_metrics("GET", "/chatbots") - def get(self, project_name): - """List all chatbots""" - chatbot_controller = ChatBotController() - try: - all_bots = chatbot_controller.get_chatbots(project_name) - except (ValueError, EntityNotExistsError): - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - return all_bots - - @ns_conf.doc("create_chatbot") - @api_endpoint_metrics("POST", "/chatbots") - def post(self, project_name): - """Create a chatbot""" - - # Check for required parameters. - if "chatbot" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "chatbot" parameter in POST body' - ) - - chatbot = request.json["chatbot"] - - name = chatbot.get("name") - return create_chatbot(project_name, name, chatbot) - - -@ns_conf.route("//chatbots/") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("chatbot_name", "Name of the chatbot") -class ChatBotResource(Resource): - @ns_conf.doc("get_chatbot") - @api_endpoint_metrics("GET", "/chatbots/chatbot") - def get(self, project_name, chatbot_name): - """Gets a chatbot by name""" - chatbot_controller = ChatBotController() - try: - existing_chatbot = chatbot_controller.get_chatbot(chatbot_name, project_name=project_name) - if existing_chatbot is None: - return http_error( - HTTPStatus.NOT_FOUND, "Chatbot not found", f"Chatbot with name {chatbot_name} does not exist" - ) - return existing_chatbot - except (ValueError, EntityNotExistsError): - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - @ns_conf.doc("update_chatbot") - @api_endpoint_metrics("PUT", "/chatbots/chatbot") - def put(self, project_name, chatbot_name): - """Updates a chatbot by name, creating one if it doesn't exist""" - - # Check for required parameters. - if "chatbot" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "chatbot" parameter in POST body' - ) - chatbot_controller = ChatBotController() - - try: - existing_chatbot = chatbot_controller.get_chatbot(chatbot_name, project_name=project_name) - except EntityNotExistsError: - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - session = SessionController() - - chatbot = request.json["chatbot"] - name = chatbot.get("name", None) - agent_name = chatbot.get("agent_name", None) - model_name = chatbot.get("model_name", None) - try: - database_id = get_or_create_database_for_chatbot(chatbot, session) - except ValueError as value_error: - return http_error(HTTPStatus.NOT_FOUND, "Database not found", str(value_error)) - is_running = chatbot.get("is_running", None) - params = chatbot.get("params", None) - - # Model needs to exist. - if model_name is not None: - model_name_no_version, version = Predictor.get_name_and_version(model_name) - try: - session.model_controller.get_model(model_name_no_version, version=version, project_name=project_name) - except PredictorRecordNotFound: - return http_error(HTTPStatus.NOT_FOUND, "Model not found", f"Model with name {model_name} not found") - - # Agent needs to exist. - if agent_name is not None: - agent = session.agents_controller.get_agent(agent_name, project_name) - if agent is None: - return http_error(HTTPStatus.NOT_FOUND, "Agent not found", f"Agent with name {agent_name} not found") - - # Chatbot must not exist with new name. - if name is not None: - if name != chatbot_name: - chatbot_with_new_name = chatbot_controller.get_chatbot(name, project_name=project_name) - if chatbot_with_new_name is not None: - return http_error( - HTTPStatus.CONFLICT, - "Chatbot already exists", - f"Chatbot with name {name} already exists. Please choose a different one.", - ) - - if existing_chatbot is None: - # Create - return create_chatbot(project_name, name, chatbot) - - # Update - updated_chatbot = chatbot_controller.update_chatbot( - chatbot_name, - project_name=project_name, - name=name, - model_name=model_name, - agent_name=agent_name, - database_id=database_id, - is_running=is_running, - params=params, - ) - return updated_chatbot.as_dict() - - @ns_conf.doc("delete_chatbot") - @api_endpoint_metrics("DELETE", "/chatbots/chatbot") - def delete(self, project_name, chatbot_name): - """Deletes a chatbot by name""" - chatbot_controller = ChatBotController() - try: - existing_chatbot = chatbot_controller.get_chatbot(chatbot_name, project_name=project_name) - if existing_chatbot is None: - return http_error( - HTTPStatus.NOT_FOUND, "Chatbot not found", f"Chatbot with name {chatbot_name} does not exist" - ) - except EntityNotExistsError: - # Project needs to exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - chatbot_controller.delete_chatbot(chatbot_name, project_name=project_name) - return "", HTTPStatus.NO_CONTENT diff --git a/mindsdb/api/http/namespaces/config.py b/mindsdb/api/http/namespaces/config.py deleted file mode 100644 index da4412b7891..00000000000 --- a/mindsdb/api/http/namespaces/config.py +++ /dev/null @@ -1,249 +0,0 @@ -import copy -import shutil -import tempfile -from pathlib import Path -from http import HTTPStatus - -from flask import request -from flask_restx import Resource -from flask import current_app as ca - -from mindsdb.api.http.namespaces.configs.config import ns_conf -from mindsdb.api.http.utils import http_error -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities.api_status import get_api_status -from mindsdb.utilities import log -from mindsdb.utilities.functions import decrypt, encrypt -from mindsdb.utilities.config import Config -from mindsdb.integrations.libs.response import HandlerStatusResponse -from mindsdb.interfaces.knowledge_base.default_storage_resolver import ( - get_env_available_engines, - resolve_default_storage_engines, -) - - -logger = log.getLogger(__name__) - - -@ns_conf.route("/") -@ns_conf.param("name", "Get config") -class GetConfig(Resource): - @ns_conf.doc("get_config") - @api_endpoint_metrics("GET", "/config") - def get(self): - config = Config() - resp = {"auth": {"http_auth_enabled": config["auth"]["http_auth_enabled"]}} - for key in ["default_llm", "default_embedding_model", "default_reranking_model"]: - value = config.get(key) - if value is not None: - resp[key] = value - - knowledge_bases_config = copy.deepcopy(config["knowledge_bases"]) - knowledge_bases_config.update(resolve_default_storage_engines(config)) - knowledge_bases_config["engines"] = get_env_available_engines() - resp["knowledge_bases"] = knowledge_bases_config - - api_status = get_api_status() - api_configs = copy.deepcopy(config["api"]) - for api_name, api_config in api_configs.items(): - api_config["running"] = api_status.get(api_name, False) - resp["api"] = api_configs - - return resp - - @ns_conf.doc("put_config") - @api_endpoint_metrics("PUT", "/config") - def put(self): - data = request.json - - allowed_arguments = { - "auth", - "default_llm", - "default_embedding_model", - "default_reranking_model", - "knowledge_bases", - } - unknown_arguments = list(set(data.keys()) - allowed_arguments) - if len(unknown_arguments) > 0: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", f"Unknown argumens: {unknown_arguments}") - - nested_keys_to_validate = {"auth", "knowledge_bases"} - for key in data.keys(): - if key in nested_keys_to_validate: - unknown_arguments = list(set(data[key].keys()) - set(Config()[key].keys())) - if len(unknown_arguments) > 0: - return http_error( - HTTPStatus.BAD_REQUEST, "Wrong arguments", f"Unknown argumens: {unknown_arguments}" - ) - - overwrite_arguments = {"default_llm", "default_embedding_model", "default_reranking_model"} - overwrite_data = {k: data[k] for k in overwrite_arguments if k in data} - merge_data = {k: data[k] for k in data if k not in overwrite_arguments} - - if len(overwrite_data) > 0: - Config().update(overwrite_data, overwrite=True) - if len(merge_data) > 0: - Config().update(merge_data) - - Config().update(data) - - return "", 200 - - -@ns_conf.route("/integrations") -@ns_conf.param("name", "List all database integration") -class ListIntegration(Resource): - @api_endpoint_metrics("GET", "/config/integrations") - def get(self): - return {"integrations": [k for k in ca.integration_controller.get_all(show_secrets=False)]} - - -@ns_conf.route("/all_integrations") -@ns_conf.param("name", "List all database integration") -class AllIntegration(Resource): - @ns_conf.doc("get_all_integrations") - @api_endpoint_metrics("GET", "/config/all_integrations") - def get(self): - integrations = ca.integration_controller.get_all(show_secrets=False) - return integrations - - -@ns_conf.route("/integrations/") -@ns_conf.param("name", "Database integration") -class Integration(Resource): - @ns_conf.doc("get_integration") - @api_endpoint_metrics("GET", "/config/integrations/integration") - def get(self, name): - integration = ca.integration_controller.get(name, show_secrets=False) - if integration is None: - return http_error(HTTPStatus.NOT_FOUND, "Not found", f"Can't find integration: {name}") - integration = copy.deepcopy(integration) - return integration - - @ns_conf.doc("put_integration") - @api_endpoint_metrics("PUT", "/config/integrations/integration") - def put(self, name): - params = {} - if request.is_json: - params.update((request.json or {}).get("params", {})) - else: - params.update(request.form or {}) - - if len(params) == 0: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", "type of 'params' must be dict") - - files = request.files - temp_dir = None - if files is not None and len(files) > 0: - temp_dir = tempfile.mkdtemp(prefix="integration_files_") - for key, file in files.items(): - temp_dir_path = Path(temp_dir) - file_name = Path(file.filename) - file_path = temp_dir_path.joinpath(file_name).resolve() - if temp_dir_path not in file_path.parents: - raise Exception(f"Can not save file at path: {file_path}") - file.save(file_path) - params[key] = str(file_path) - - is_test = params.get("test", False) - # TODO: Move this to new Endpoint - - config = Config() - secret_key = config.get("secret_key", "dummy-key") - - if is_test: - del params["test"] - handler_type = params.pop("type", None) - params.pop("publish", None) - try: - handler = ca.integration_controller.create_tmp_handler(name, handler_type, params) - status = handler.check_connection() - except ImportError as e: - status = HandlerStatusResponse(success=False, error_message=str(e)) - if temp_dir is not None: - shutil.rmtree(temp_dir) - - resp = status.to_json() - - if status.success and "code" in params: - if hasattr(handler, "handler_storage"): - # attach storage if exists - export = handler.handler_storage.export_files() - if export: - # encrypt with flask secret key - encrypted = encrypt(export, secret_key) - resp["storage"] = encrypted.decode() - - return resp, 200 - - config = Config() - secret_key = config.get("secret_key", "dummy-key") - - integration = ca.integration_controller.get(name, show_secrets=False) - if integration is not None: - return http_error( - HTTPStatus.BAD_REQUEST, "Wrong argument", f"Integration with name '{name}' already exists" - ) - - try: - engine = params.pop("type", None) - params.pop("publish", False) - storage = params.pop("storage", None) - ca.integration_controller.add(name, engine, params) - - # copy storage - if storage is not None: - handler = ca.integration_controller.get_data_handler(name) - - export = decrypt(storage.encode(), secret_key) - handler.handler_storage.import_files(export) - - except Exception as e: - logger.exception("An error occurred during the creation of the integration:") - if temp_dir is not None: - shutil.rmtree(temp_dir) - return http_error(HTTPStatus.INTERNAL_SERVER_ERROR, "Error", f"Error during config update: {e}") - - if temp_dir is not None: - shutil.rmtree(temp_dir) - return {}, 200 - - @ns_conf.doc("delete_integration") - @api_endpoint_metrics("DELETE", "/config/integrations/integration") - def delete(self, name): - integration = ca.integration_controller.get(name) - if integration is None: - return http_error( - HTTPStatus.BAD_REQUEST, "Integration does not exists", f"Nothing to delete. '{name}' not exists." - ) - try: - ca.integration_controller.delete(name) - except Exception as e: - logger.exception("An error occurred while deleting the integration") - return http_error(HTTPStatus.INTERNAL_SERVER_ERROR, "Error", f"Error during integration delete: {e}") - return "", 200 - - @ns_conf.doc("modify_integration") - @api_endpoint_metrics("POST", "/config/integrations/integration") - def post(self, name): - params = {} - params.update((request.json or {}).get("params", {})) - params.update(request.form or {}) - - if not isinstance(params, dict): - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", "type of 'params' must be dict") - integration = ca.integration_controller.get(name) - if integration is None: - return http_error( - HTTPStatus.BAD_REQUEST, "Integration does not exists", f"Nothin to modify. '{name}' not exists." - ) - try: - if "enabled" in params: - params["publish"] = params["enabled"] - del params["enabled"] - ca.integration_controller.modify(name, params) - - except Exception as e: - logger.exception("An error occurred while modifying the integration") - return http_error(HTTPStatus.INTERNAL_SERVER_ERROR, "Error", f"Error during integration modification: {e}") - return "", 200 diff --git a/mindsdb/api/http/namespaces/configs/__init__.py b/mindsdb/api/http/namespaces/configs/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/http/namespaces/configs/agents.py b/mindsdb/api/http/namespaces/configs/agents.py deleted file mode 100644 index 256056775b0..00000000000 --- a/mindsdb/api/http/namespaces/configs/agents.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('agents', description='API to perform operations on MindsDB Agents') diff --git a/mindsdb/api/http/namespaces/configs/analysis.py b/mindsdb/api/http/namespaces/configs/analysis.py deleted file mode 100644 index b678b7595f5..00000000000 --- a/mindsdb/api/http/namespaces/configs/analysis.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('analysis', description='Dataset analysis') diff --git a/mindsdb/api/http/namespaces/configs/auth.py b/mindsdb/api/http/namespaces/configs/auth.py deleted file mode 100644 index dadfe7b85b4..00000000000 --- a/mindsdb/api/http/namespaces/configs/auth.py +++ /dev/null @@ -1,4 +0,0 @@ -from flask_restx import Namespace - - -ns_conf = Namespace('auth', description='Authentification routes') diff --git a/mindsdb/api/http/namespaces/configs/chatbots.py b/mindsdb/api/http/namespaces/configs/chatbots.py deleted file mode 100644 index 8260c766914..00000000000 --- a/mindsdb/api/http/namespaces/configs/chatbots.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('chatbots', description='API to perform operations on MindsDB Chatbots') diff --git a/mindsdb/api/http/namespaces/configs/config.py b/mindsdb/api/http/namespaces/configs/config.py deleted file mode 100644 index bad4e4afc69..00000000000 --- a/mindsdb/api/http/namespaces/configs/config.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('config', description='Configuration changes') diff --git a/mindsdb/api/http/namespaces/configs/databases.py b/mindsdb/api/http/namespaces/configs/databases.py deleted file mode 100644 index c5e97ab5fc6..00000000000 --- a/mindsdb/api/http/namespaces/configs/databases.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('databases', description='API to perform operations that read and write MindsDB databases') diff --git a/mindsdb/api/http/namespaces/configs/default.py b/mindsdb/api/http/namespaces/configs/default.py deleted file mode 100644 index a6334bd66ae..00000000000 --- a/mindsdb/api/http/namespaces/configs/default.py +++ /dev/null @@ -1,4 +0,0 @@ -from flask_restx import Namespace - - -ns_conf = Namespace('default', description='default ns', path='/') diff --git a/mindsdb/api/http/namespaces/configs/files.py b/mindsdb/api/http/namespaces/configs/files.py deleted file mode 100644 index 233e17d1486..00000000000 --- a/mindsdb/api/http/namespaces/configs/files.py +++ /dev/null @@ -1,4 +0,0 @@ -from flask_restx import Namespace - - -ns_conf = Namespace('files', description='Files') diff --git a/mindsdb/api/http/namespaces/configs/handlers.py b/mindsdb/api/http/namespaces/configs/handlers.py deleted file mode 100644 index 4a7bd39daaa..00000000000 --- a/mindsdb/api/http/namespaces/configs/handlers.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('handlers', description='Represent hanlder object') diff --git a/mindsdb/api/http/namespaces/configs/integrations.py b/mindsdb/api/http/namespaces/configs/integrations.py deleted file mode 100644 index 14d3fab27fc..00000000000 --- a/mindsdb/api/http/namespaces/configs/integrations.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace("integrations", description="API for integration-level operations (passthrough, capabilities)") diff --git a/mindsdb/api/http/namespaces/configs/jobs.py b/mindsdb/api/http/namespaces/configs/jobs.py deleted file mode 100644 index 635b049b658..00000000000 --- a/mindsdb/api/http/namespaces/configs/jobs.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('jobs', description='API to perform operations on MindsDB jobs') diff --git a/mindsdb/api/http/namespaces/configs/knowledge_bases.py b/mindsdb/api/http/namespaces/configs/knowledge_bases.py deleted file mode 100644 index d0384795430..00000000000 --- a/mindsdb/api/http/namespaces/configs/knowledge_bases.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('knowledge_bases', description='API to perform operations on MindsDB Knowledge Bases') diff --git a/mindsdb/api/http/namespaces/configs/projects.py b/mindsdb/api/http/namespaces/configs/projects.py deleted file mode 100644 index ce0e0f5660a..00000000000 --- a/mindsdb/api/http/namespaces/configs/projects.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('projects', description='Projects') diff --git a/mindsdb/api/http/namespaces/configs/sql.py b/mindsdb/api/http/namespaces/configs/sql.py deleted file mode 100644 index f64bfe024ea..00000000000 --- a/mindsdb/api/http/namespaces/configs/sql.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('sql', description='API for sql commands') diff --git a/mindsdb/api/http/namespaces/configs/tabs.py b/mindsdb/api/http/namespaces/configs/tabs.py deleted file mode 100644 index 19d93c643ec..00000000000 --- a/mindsdb/api/http/namespaces/configs/tabs.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('tabs', description='Save/load data from WebUI tabs') diff --git a/mindsdb/api/http/namespaces/configs/tree.py b/mindsdb/api/http/namespaces/configs/tree.py deleted file mode 100644 index 9bdaefb4c3d..00000000000 --- a/mindsdb/api/http/namespaces/configs/tree.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('tree', description='API for getting project structure') diff --git a/mindsdb/api/http/namespaces/configs/util.py b/mindsdb/api/http/namespaces/configs/util.py deleted file mode 100644 index dfd2d70acec..00000000000 --- a/mindsdb/api/http/namespaces/configs/util.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('util', description='General routes') diff --git a/mindsdb/api/http/namespaces/configs/webhooks.py b/mindsdb/api/http/namespaces/configs/webhooks.py deleted file mode 100644 index bbea6f6f225..00000000000 --- a/mindsdb/api/http/namespaces/configs/webhooks.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask_restx import Namespace - -ns_conf = Namespace('webhooks', description='API to receive messages from bots') diff --git a/mindsdb/api/http/namespaces/databases.py b/mindsdb/api/http/namespaces/databases.py deleted file mode 100644 index f5a75f6bc73..00000000000 --- a/mindsdb/api/http/namespaces/databases.py +++ /dev/null @@ -1,392 +0,0 @@ -import shutil -import tempfile -from http import HTTPStatus -from typing import Dict -from pathlib import Path - -from flask import request -from flask_restx import Resource - -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.databases import ns_conf -from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.api.executor.datahub.classes.tables_row import TablesRow -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb_sql_parser import parse_sql, ParsingException -from mindsdb_sql_parser.ast import CreateTable, DropTables -from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.integrations.libs.response import HandlerStatusResponse -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -@ns_conf.route("/") -class DatabasesResource(Resource): - @ns_conf.doc("list_databases") - @api_endpoint_metrics("GET", "/databases") - def get(self): - """List all databases""" - session = SessionController() - return session.database_controller.get_list() - - @ns_conf.doc("create_database") - @api_endpoint_metrics("POST", "/databases") - def post(self): - """Create a database""" - if "database" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Wrong argument", 'Must provide "database" parameter in POST body' - ) - check_connection = request.json.get("check_connection", False) - session = SessionController() - database = request.json["database"] - parameters = {} - if "name" not in database: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Missing "name" field for database') - if "engine" not in database: - return http_error( - HTTPStatus.BAD_REQUEST, - "Wrong argument", - 'Missing "engine" field for database. If you want to create a project instead, use the /api/projects endpoint.', - ) - if "parameters" in database: - parameters = database["parameters"] - name = database["name"] - - if session.database_controller.exists(name): - return http_error(HTTPStatus.CONFLICT, "Name conflict", f"Database with name {name} already exists.") - - storage = None - if check_connection: - try: - handler = session.integration_controller.create_tmp_handler(name, database["engine"], parameters) - status = handler.check_connection() - except ImportError as import_error: - status = HandlerStatusResponse(success=False, error_message=str(import_error)) - - if status.success is not True: - if hasattr(status, "redirect_url") and isinstance(status.redirect_url, str): - return { - "status": "redirect_required", - "redirect_url": status.redirect_url, - "detail": status.error_message, - }, HTTPStatus.OK - return {"status": "connection_error", "detail": status.error_message}, HTTPStatus.OK - - if status.copy_storage: - storage = handler.handler_storage.export_files() - - new_integration_id = session.integration_controller.add(name, database["engine"], parameters) - - if storage: - handler = session.integration_controller.get_data_handler(name, connect=False) - handler.handler_storage.import_files(storage) - - new_integration = session.database_controller.get_integration(new_integration_id) - return new_integration, HTTPStatus.CREATED - - -@ns_conf.route("/status") -class DatabasesStatusResource(Resource): - @ns_conf.doc("check_database_connection_status") - @api_endpoint_metrics("POST", "/databases/status") - def post(self): - """Check the connection parameters for a database""" - data = {} - if request.content_type == "application/json": - data.update(request.json or {}) - elif request.content_type.startswith("multipart/form-data"): - data.update(request.form or {}) - - if "engine" not in data: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Missing "engine" field for database') - - engine = data["engine"] - parameters = data - del parameters["engine"] - - files = request.files - temp_dir = None - if files is not None and len(files) > 0: - temp_dir = tempfile.mkdtemp(prefix="integration_files_") - for key, file in files.items(): - temp_dir_path = Path(temp_dir) - file_name = Path(file.filename) - file_path = temp_dir_path.joinpath(file_name).resolve() - if temp_dir_path not in file_path.parents: - raise Exception(f"Can not save file at path: {file_path}") - file.save(file_path) - parameters[key] = str(file_path) - - session = SessionController() - - try: - handler = session.integration_controller.create_tmp_handler("test_connection", engine, parameters) - status = handler.check_connection() - except ImportError as import_error: - status = HandlerStatusResponse(success=False, error_message=str(import_error)) - except Exception as unknown_error: - status = HandlerStatusResponse(success=False, error_message=str(unknown_error)) - finally: - if temp_dir is not None: - shutil.rmtree(temp_dir) - - if not status.success: - if hasattr(status, "redirect_url") and isinstance(status.redirect_url, str): - return { - "status": "redirect_required", - "redirect_url": status.redirect_url, - "detail": status.error_message, - }, HTTPStatus.OK - return {"status": "connection_error", "detail": status.error_message}, HTTPStatus.OK - - return { - "status": "success", - }, HTTPStatus.OK - - -@ns_conf.route("/") -class DatabaseResource(Resource): - @ns_conf.doc("get_database") - @api_endpoint_metrics("GET", "/databases/database") - def get(self, database_name): - """Gets a database by name""" - session = SessionController() - check_connection = request.args.get("check_connection", "false").lower() in ("1", "true") - try: - project = session.database_controller.get_project(database_name) - result = {"name": database_name, "type": "project", "id": project.id, "engine": None} - if check_connection: - result["connection_status"] = {"success": True, "error_message": None} - except (ValueError, EntityNotExistsError): - integration = session.integration_controller.get(database_name) - if integration is None: - return http_error( - HTTPStatus.NOT_FOUND, "Database not found", f"Database with name {database_name} does not exist." - ) - result = integration - if check_connection: - integration["connection_status"] = {"success": False, "error_message": None} - try: - handler = session.integration_controller.get_data_handler(database_name) - status = handler.check_connection() - integration["connection_status"]["success"] = status.success - integration["connection_status"]["error_message"] = status.error_message - except Exception as e: - integration["connection_status"]["success"] = False - integration["connection_status"]["error_message"] = str(e) - - return result - - @ns_conf.doc("update_database") - @api_endpoint_metrics("PUT", "/databases/database") - def put(self, database_name): - """Updates or creates a database""" - if "database" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Wrong argument", 'Must provide "database" parameter in POST body' - ) - - session = SessionController() - parameters = {} - database = request.json["database"] - check_connection = request.json.get("check_connection", False) - - if "parameters" in database: - parameters = database["parameters"] - if not session.database_controller.exists(database_name): - # Create. - if "engine" not in database: - return http_error( - HTTPStatus.BAD_REQUEST, - "Wrong argument", - 'Missing "engine" field for new database. ' - "If you want to create a project instead, use the POST /api/projects endpoint.", - ) - try: - new_integration_id = session.integration_controller.add( - database_name, database["engine"], parameters, check_connection=check_connection - ) - except Exception as e: - return http_error(HTTPStatus.BAD_REQUEST, "Connection error", str(e) or "Connection error") - new_integration = session.database_controller.get_integration(new_integration_id) - return new_integration, HTTPStatus.CREATED - - try: - session.integration_controller.modify(database_name, parameters, check_connection=check_connection) - except Exception as e: - return http_error(HTTPStatus.BAD_REQUEST, "Connection error", str(e) or "Connection error") - - return session.integration_controller.get(database_name) - - @ns_conf.doc("delete_database") - @api_endpoint_metrics("DELETE", "/databases/database") - def delete(self, database_name): - """Deletes a database by name""" - session = SessionController() - if not session.database_controller.exists(database_name): - return http_error( - HTTPStatus.NOT_FOUND, "Database not found", f"Database with name {database_name} does not exist." - ) - try: - session.database_controller.delete(database_name) - except Exception as e: - logger.debug(f"Error while deleting database '{database_name}'", exc_info=True) - return http_error( - HTTPStatus.BAD_REQUEST, - "Error", - f"Cannot delete database {database_name}. " - + "This is most likely a system database, a permanent integration, or an ML engine with active models. " - + f"Full error: {e}. " - + "Please check the name and try again.", - ) - return "", HTTPStatus.NO_CONTENT - - -def _tables_row_to_obj(table_row: TablesRow) -> Dict: - type = table_row.TABLE_TYPE.lower() - if table_row.TABLE_TYPE == "BASE TABLE": - type = "data" - return {"name": table_row.TABLE_NAME, "type": type} - - -@ns_conf.route("//tables") -class TablesList(Resource): - @ns_conf.doc("list_tables") - @api_endpoint_metrics("GET", "/databases/database/tables") - def get(self, database_name): - """Get all tables in a database""" - session = SessionController() - datanode = session.datahub.get(database_name) - all_tables = datanode.get_tables() - table_objs = [_tables_row_to_obj(t) for t in all_tables] - return table_objs - - @ns_conf.doc("create_table") - @api_endpoint_metrics("POST", "/databases/database/tables") - def post(self, database_name): - """Creates a table in a database""" - if "table" not in request.json: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Must provide "table" parameter in POST body') - table = request.json["table"] - if "name" not in table: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Missing "name" field for table') - if "select" not in table: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Missing "select" field for table') - table_name = table["name"] - select_query = table["select"] - replace = False - if "replace" in table: - replace = table["replace"] - - session = SessionController() - try: - session.database_controller.get_project(database_name) - error_message = ( - f"Database {database_name} is a project. " - + f"If you want to create a model or view, use the projects/{database_name}/models/{table_name} or " - + f"projects/{database_name}/views/{table_name} endpoints instead." - ) - return http_error(HTTPStatus.BAD_REQUEST, "Error", error_message) - except EntityNotExistsError: - # Only support creating tables from integrations. - pass - - datanode = session.datahub.get(database_name) - if datanode is None: - return http_error( - HTTPStatus.NOT_FOUND, "Database not exists", f"Database with name {database_name} does not exist" - ) - all_tables = datanode.get_tables() - for t in all_tables: - if t.TABLE_NAME == table_name and not replace: - return http_error(HTTPStatus.CONFLICT, "Name conflict", f"Table with name {table_name} already exists") - - try: - select_ast = parse_sql(select_query) - except ParsingException: - return http_error(HTTPStatus.BAD_REQUEST, "Error", f"Could not parse select statement {select_query}") - - create_ast = CreateTable(f"{database_name}.{table_name}", from_select=select_ast, is_replace=replace) - - mysql_proxy = FakeMysqlProxy() - - try: - mysql_proxy.process_query(create_ast.get_string()) - except Exception as e: - return http_error(HTTPStatus.BAD_REQUEST, "Error", str(e)) - - all_tables = datanode.get_tables() - try: - matching_table = next(t for t in all_tables if t.TABLE_NAME == table_name) - return _tables_row_to_obj(matching_table), HTTPStatus.CREATED - except StopIteration: - return http_error( - HTTPStatus.INTERNAL_SERVER_ERROR, "Error", f"Table with name {table_name} could not be created" - ) - - -@ns_conf.route("//tables/") -@ns_conf.param("database_name", "Name of the database") -@ns_conf.param("table_name", "Name of the table") -class TableResource(Resource): - @ns_conf.doc("get_table") - @api_endpoint_metrics("GET", "/databases/database/tables/table") - def get(self, database_name, table_name): - session = SessionController() - datanode = session.datahub.get(database_name) - all_tables = datanode.get_tables() - try: - matching_table = next(t for t in all_tables if t.TABLE_NAME == table_name) - return _tables_row_to_obj(matching_table) - except StopIteration: - return http_error(HTTPStatus.NOT_FOUND, "Table not found", f"Table with name {table_name} not found") - - @ns_conf.doc("drop_table") - @api_endpoint_metrics("DELETE", "/databases/database/tables/table") - def delete(self, database_name, table_name): - session = SessionController() - try: - session.database_controller.get_project(database_name) - error_message = ( - f"Database {database_name} is a project. " - + f"If you want to delete a model or view, use the projects/{database_name}/models/{table_name} or " - + f"projects/{database_name}/views/{table_name} endpoints instead." - ) - return http_error(HTTPStatus.BAD_REQUEST, "Error", error_message) - except EntityNotExistsError: - # Only support dropping tables from integrations. - pass - - datanode = session.datahub.get(database_name) - if datanode is None: - return http_error( - HTTPStatus.NOT_FOUND, "Database not found", f"Database with name {database_name} not found" - ) - all_tables = datanode.get_tables() - try: - next(t for t in all_tables if t.TABLE_NAME == table_name) - except StopIteration: - return http_error(HTTPStatus.NOT_FOUND, "Table not found", f"Table with name {table_name} not found") - - drop_ast = DropTables(tables=[table_name], if_exists=True) - - try: - integration_handler = session.integration_controller.get_data_handler(database_name) - except Exception: - return http_error( - HTTPStatus.INTERNAL_SERVER_ERROR, "Error", f"Could not get database handler for {database_name}" - ) - try: - result = integration_handler.query(drop_ast) - except NotImplementedError: - return http_error( - HTTPStatus.BAD_REQUEST, "Error", f"Database {database_name} does not support dropping tables." - ) - if result.type == RESPONSE_TYPE.ERROR: - return http_error(HTTPStatus.BAD_REQUEST, "Error", result.error_message) - return "", HTTPStatus.NO_CONTENT diff --git a/mindsdb/api/http/namespaces/default.py b/mindsdb/api/http/namespaces/default.py deleted file mode 100644 index 4b2e0940ba5..00000000000 --- a/mindsdb/api/http/namespaces/default.py +++ /dev/null @@ -1,154 +0,0 @@ -from flask import request, session -from flask_restx import Resource -from flask_restx import fields - -from mindsdb.__about__ import __version__ as mindsdb_version -from mindsdb.api.http.namespaces.configs.default import ns_conf -from mindsdb.api.http.utils import http_error -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities.config import config, HTTP_AUTH_TYPE -from mindsdb.utilities import log -from mindsdb.api.common.middleware import generate_pat, revoke_pat, verify_pat - - -logger = log.getLogger(__name__) - - -def check_session_auth() -> bool: - """checking whether current user is authenticated - - Returns: - bool: True if user authentication is approved - """ - try: - if config["auth"]["http_auth_enabled"] is False: - return True - return session.get("username") == config["auth"]["username"] - except Exception: - return False - - -@ns_conf.route("/login", methods=["POST"]) -class LoginRoute(Resource): - @ns_conf.doc( - responses={200: "Success", 400: "Error in username or password", 401: "Invalid username or password"}, - body=ns_conf.model( - "request_login", - {"username": fields.String(description="Username"), "password": fields.String(description="Password")}, - ), - ) - @api_endpoint_metrics("POST", "/default/login") - def post(self): - """Check user's credentials and creates a session""" - username = request.json.get("username") - password = request.json.get("password") - if ( - isinstance(username, str) is False - or len(username) == 0 - or isinstance(password, str) is False - or len(password) == 0 - ): - return http_error(400, "Error in username or password", "Username and password should be string") - - inline_username = config["auth"]["username"] - inline_password = config["auth"]["password"] - - if username != inline_username or password != inline_password: - return http_error(401, "Forbidden", "Invalid username or password") - - logger.info(f"User '{username}' logged in successfully") - - response = {} - if config["auth"]["http_auth_type"] in (HTTP_AUTH_TYPE.SESSION, HTTP_AUTH_TYPE.SESSION_OR_TOKEN): - session.clear() - session["username"] = username - session.permanent = True - - if config["auth"]["http_auth_type"] in (HTTP_AUTH_TYPE.TOKEN, HTTP_AUTH_TYPE.SESSION_OR_TOKEN): - if config["auth"]["token"]: - response["token"] = config["auth"]["token"] - else: - response["token"] = generate_pat() - - return response, 200 - - -@ns_conf.route("/logout", methods=["POST"]) -class LogoutRoute(Resource): - @ns_conf.doc(responses={200: "Success"}) - @api_endpoint_metrics("POST", "/default/logout") - def post(self): - session.clear() - # We can't forcibly log out a user with the - h = request.headers.get("Authorization") - if not h or not h.startswith("Bearer "): - bearer = None - else: - bearer = h.split(" ", 1)[1].strip() or None - revoke_pat(bearer) - return "", 200 - - -@ns_conf.route("/status") -class StatusRoute(Resource): - @ns_conf.doc( - responses={200: "Success"}, - model=ns_conf.model( - "response_status", - { - "environment": fields.String(description="The name of current environment: cloud, local or other"), - "mindsdb_version": fields.String(description="Current version of mindsdb"), - "auth": fields.Nested( - ns_conf.model( - "response_status_auth", - { - "confirmed": fields.Boolean(description="is current user authenticated"), - "required": fields.Boolean(description="is authenticated required"), - "provider": fields.Boolean(description="current authenticated provider: local of 3d-party"), - }, - ) - ), - }, - ), - ) - @api_endpoint_metrics("GET", "/default/status") - def get(self): - """returns auth and environment data""" - environment = "local" - - environment = config.get("environment") - if environment is None: - if config.get("cloud", False): - environment = "cloud" - elif config.get("aws_marketplace", False): - environment = "aws_marketplace" - else: - environment = "local" - - auth_provider = "disabled" - if config["auth"]["http_auth_enabled"] is True: - if config["auth"].get("provider") is not None: - auth_provider = config["auth"].get("provider") - else: - auth_provider = "local" - - auth_confirmed = False - auth_type = config["auth"]["http_auth_type"] - if auth_type in (HTTP_AUTH_TYPE.SESSION, HTTP_AUTH_TYPE.SESSION_OR_TOKEN): - auth_confirmed = auth_confirmed or check_session_auth() - if auth_type in (HTTP_AUTH_TYPE.TOKEN, HTTP_AUTH_TYPE.SESSION_OR_TOKEN): - auth_confirmed = auth_confirmed or verify_pat( - request.headers.get("Authorization", "").replace("Bearer ", "") - ) - - resp = { - "mindsdb_version": mindsdb_version, - "environment": environment, - "auth": { - "confirmed": auth_confirmed, - "http_auth_enabled": config["auth"]["http_auth_enabled"], - "provider": auth_provider, - }, - } - - return resp diff --git a/mindsdb/api/http/namespaces/file.py b/mindsdb/api/http/namespaces/file.py deleted file mode 100644 index c93562b5a9f..00000000000 --- a/mindsdb/api/http/namespaces/file.py +++ /dev/null @@ -1,305 +0,0 @@ -import os -import shutil -import tarfile -import tempfile -import zipfile -from pathlib import Path -from urllib.parse import urlparse - -import multipart -import requests -from flask import current_app as ca -from flask import request -from flask_restx import Resource - -from mindsdb.api.http.namespaces.configs.files import ns_conf -from mindsdb.api.http.utils import http_error -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities.config import config -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities import log -from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls -from mindsdb.utilities.fs import safe_extract -from mindsdb.integrations.utilities.files.file_reader import FileProcessingError - -logger = log.getLogger(__name__) -MAX_FILE_SIZE = 1024 * 1024 * 100 # 100Mb - - -@ns_conf.route("/") -class FilesList(Resource): - @ns_conf.doc("get_files_list") - @api_endpoint_metrics("GET", "/files") - def get(self): - """List all files""" - return ca.file_controller.get_files() - - -@ns_conf.route("/") -@ns_conf.param("name", "MindsDB's name for file") -class File(Resource): - @ns_conf.doc("put_file") - @api_endpoint_metrics("PUT", "/files/file") - def put(self, name: str): - """add new file as table - - The table name is path paramether - - Data is provided as json or form data. File can be provided with FormData or via URL. - - If file is in FormData, then the form contain: - - source_type (str) - 'file' - - file (binary) - the file itself - - original_file_name (str, optional) - the name with which the file will be saved - - If file should be uploaded from URL: - - source_type (str) - 'url' - - source (str) - the URL - - original_file_name (str, optional) - the name with which the file will be saved - """ - - data = {} - mindsdb_file_name = name.lower() - - def on_field(field): - name = field.field_name.decode() - value = field.value.decode() - data[name] = value - - file_object = None - - def on_file(file): - nonlocal file_object - file_name = file.file_name.decode() - data["file"] = file_name - if Path(file_name).name != file_name: - if file_object is not None and not file_object.closed: - file_object.close() - raise ValueError(f"Wrong file name: {file_name}") - file_object = file.file_object - - temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_") - - if request.headers["Content-Type"].startswith("multipart/form-data"): - parser = multipart.create_form_parser( - headers=request.headers, - on_field=on_field, - on_file=on_file, - config={ - "UPLOAD_DIR": temp_dir_path.encode(), # bytes required - "UPLOAD_KEEP_FILENAME": False, - "UPLOAD_KEEP_EXTENSIONS": True, - "UPLOAD_DELETE_TMP": False, - "MAX_MEMORY_FILE_SIZE": 0, - }, - ) - - while True: - chunk = request.stream.read(8192) - if not chunk: - break - parser.write(chunk) - parser.finalize() - parser.close() - - if Path(data["file"]).name != data["file"]: - raise ValueError(f"Wrong file name: {data['file']}") - - if file_object is not None: - if not file_object.closed: - try: - file_object.flush() - except (AttributeError, ValueError, OSError): - logger.debug("Failed to flush file_object before closing.", exc_info=True) - file_object.close() - Path(file_object.name).rename(Path(file_object.name).parent / Path(data["file"]).name) - file_object = None - allowed_keys = {"source_type", "file", "original_file_name"} - else: - data = request.json - allowed_keys = {"source_type", "source", "original_file_name"} - - if isinstance(data, dict) is False: - return http_error( - 400, - "Invalid request parameters", - "Unexpected parameters in request", - ) - - if len(set(data.keys()) - allowed_keys) > 0: - unexpected_keys = set(data.keys()) - allowed_keys - return http_error( - 400, - "Invalid request parameters", - f"Unexpected parameters in request: {', '.join(unexpected_keys)}. Allowed parameters: {', '.join(sorted(allowed_keys))}", - ) - - existing_file_names = ca.file_controller.get_files_names(lower=True) - if mindsdb_file_name.lower() in existing_file_names: - return http_error( - 400, - "File already exists", - f"File with name '{mindsdb_file_name}' already exists", - ) - - source_type = data.get("source_type", "file") - if source_type not in ("file", "url"): - return http_error( - 400, - "Wrong file source type", - f'Only "file" and "url" supported as file source, got "{source_type}"', - ) - - if source_type == "url": - url_file_upload_enabled = config["url_file_upload"]["enabled"] - if url_file_upload_enabled is False: - return http_error(400, "URL file upload is disabled.", "URL file upload is disabled.") - - if "file" in data: - return http_error( - 400, - "Fields conflict", - 'URL source type can not be used together with "file" field.', - ) - if "source" not in data: - return http_error( - 400, - "Missed file source", - 'If the file\'s source type is URL, the "source" field should be specified.', - ) - url = data["source"] - try: - url = urlparse(url) - if not (url.scheme and url.netloc): - raise ValueError() - url = url.geturl() - except Exception: - return http_error( - 400, - "Invalid URL", - f"The URL is not valid: {data['source']}", - ) - - allowed_origins = config["url_file_upload"]["allowed_origins"] - disallowed_origins = config["url_file_upload"]["disallowed_origins"] - - if validate_urls(url, allowed_origins, disallowed_origins) is False: - return http_error( - 400, - "Invalid URL", - "URL is not allowed for security reasons. Allowed hosts are: " - f"{', '.join(allowed_origins) if allowed_origins else 'not specified'}.", - ) - - data["file"] = clear_filename(mindsdb_file_name) - if config.is_cloud: - if is_private_url(url): - return http_error(400, f"URL is private: {url}") - - if ctx.user_class != 1: - info = requests.head(url, timeout=30) - file_size = info.headers.get("Content-Length") - try: - file_size = int(file_size) - except Exception: - pass - - if file_size is None: - return http_error( - 400, - "Error getting file info", - "Can't determine remote file size", - ) - if file_size > MAX_FILE_SIZE: - return http_error( - 400, - "File is too big", - f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB", - ) - with requests.get(url, stream=True) as r: - if r.status_code != 200: - return http_error(400, "Error getting file", f"Got status code: {r.status_code}") - - temp_dir_real = os.path.realpath(temp_dir_path) - file_path = os.path.realpath(os.path.join(temp_dir_real, data["file"])) - if os.path.commonpath([file_path, temp_dir_real]) != temp_dir_real: - return http_error(400, "Invalid file path", f"Wrong file name: {data['file']}") - - with open(file_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - - if "file" not in data: - return http_error( - 400, - "File field is missed", - 'The "field" field is missed in the form', - ) - - original_file_name = clear_filename(data.get("original_file_name")) - - file_path = os.path.join(temp_dir_path, data["file"]) - temp_dir_real = os.path.realpath(temp_dir_path) - file_path_real = os.path.realpath(file_path) - if os.path.commonpath([file_path_real, temp_dir_real]) != temp_dir_real: - shutil.rmtree(temp_dir_path, ignore_errors=True) - return http_error(400, "Invalid file path", f"Wrong file name: {data['file']}") - file_path = file_path_real - lp = file_path.lower() - if lp.endswith((".zip", ".tar.gz")): - try: - if lp.endswith(".zip"): - with zipfile.ZipFile(file_path) as f: - safe_extract(f, temp_dir_path) - elif lp.endswith(".tar.gz"): - with tarfile.open(file_path) as f: - safe_extract(f, temp_dir_path) - except Exception as e: - shutil.rmtree(temp_dir_path, ignore_errors=True) - return http_error(500, "Error", str(e)) - os.remove(file_path) - files = os.listdir(temp_dir_path) - if len(files) != 1: - shutil.rmtree(temp_dir_path, ignore_errors=True) - return http_error(400, "Wrong content.", "Archive must contain only one data file.") - file_path = os.path.join(temp_dir_path, files[0]) - mindsdb_file_name = files[0] - if not os.path.isfile(file_path): - shutil.rmtree(temp_dir_path, ignore_errors=True) - return http_error(400, "Wrong content.", "Archive must contain data file in root.") - - try: - if not Path(mindsdb_file_name).suffix == "": - return http_error(400, "Error", "File name cannot contain extension.") - ca.file_controller.save_file(mindsdb_file_name, file_path, file_name=original_file_name) - except FileProcessingError as e: - return http_error(400, "Error", str(e)) - except Exception as e: - return http_error(500, "Error", str(e)) - finally: - shutil.rmtree(temp_dir_path, ignore_errors=True) - - return "", 200 - - @ns_conf.doc("delete_file") - @api_endpoint_metrics("DELETE", "/files/file") - def delete(self, name: str): - """delete file""" - - try: - ca.file_controller.delete_file(name) - except FileNotFoundError: - logger.exception(f"Error when deleting file '{name}'") - return http_error( - 400, - "Error deleting file", - f"There was an error while trying to delete file with name '{name}'", - ) - except Exception as e: - logger.error(e) - return http_error( - 500, - "Error occured while deleting file", - f"There was an error while trying to delete file with name '{name}'", - ) - return "", 200 diff --git a/mindsdb/api/http/namespaces/handlers.py b/mindsdb/api/http/namespaces/handlers.py deleted file mode 100644 index 551b48ab7ca..00000000000 --- a/mindsdb/api/http/namespaces/handlers.py +++ /dev/null @@ -1,341 +0,0 @@ -import os -import tempfile -import importlib.util as iutil -import multipart -from pathlib import Path -from http import HTTPStatus - -from flask import request, send_file, current_app as ca -from flask_restx import Resource - -from mindsdb_sql_parser.ast import Identifier -from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine - -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.integrations.libs.ml_exec_base import process_cache -from mindsdb.integrations.utilities.install import install_dependencies -from mindsdb.interfaces.storage.model_fs import HandlerStorage -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.handlers import ns_conf -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.api.executor.command_executor import ExecuteCommands -from mindsdb.utilities.exception import EntityExistsError -from mindsdb.utilities.config import config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def _resolve_handler_readme_path(handler_folder: str) -> Path: - handler_folder_name = Path(handler_folder).name - if handler_folder_name != handler_folder or ".." in handler_folder: - raise ValueError(f"Handler folder '{handler_folder}' is invalid.") - - mindsdb_path = Path(iutil.find_spec("mindsdb").origin).parent - base_handlers_path = mindsdb_path.joinpath("integrations/handlers").resolve() - readme_path = base_handlers_path.joinpath(handler_folder_name).joinpath("README.md").resolve() - - if base_handlers_path not in readme_path.parents: - raise ValueError(f"Handler folder '{handler_folder}' is invalid.") - - return readme_path - - -@ns_conf.route("/") -class HandlersList(Resource): - @ns_conf.doc("handlers_list") - @api_endpoint_metrics("GET", "/handlers") - def get(self): - """List all db handlers""" - - if request.args.get("lazy") == "1": - handlers = ca.integration_controller.get_handlers_metadata() - else: - handlers = ca.integration_controller.get_handlers_import_status() - result = [] - for handler_type, handler_meta in handlers.items(): - if handler_meta is None: - continue - # remove non-integration handlers - if handler_type not in ["utilities", "dummy_data"]: - row = {"name": handler_type} - row.update(handler_meta) - del row["path"] - result.append(row) - return result - - -@ns_conf.route("//icon") -class HandlerIcon(Resource): - @ns_conf.param("handler_name", "Handler name") - @api_endpoint_metrics("GET", "/handlers/handler/icon") - def get(self, handler_name): - try: - handler_meta = ca.integration_controller.get_handlers_metadata().get(handler_name) - if handler_meta is None: - return http_error(HTTPStatus.NOT_FOUND, "Icon not found", f"Icon for {handler_name} not found") - icon = handler_meta.get("icon") - if icon is None or handler_meta.get("path") is None: - return http_error( - HTTPStatus.NOT_FOUND, - "Icon not found", - f"Icon for '{handler_name}' not found", - ) - icon_name = icon["name"] - # Use the stored handler path directly so community handlers - # (stored outside the mindsdb package) are also served correctly. - icon_path = handler_meta["path"] / icon_name - if not icon_path.is_absolute(): - icon_path = Path(os.getcwd()) / icon_path - except Exception: - error_message = f"Icon for '{handler_name}' not found" - logger.warning(error_message) - return http_error(HTTPStatus.NOT_FOUND, "Icon not found", error_message) - else: - return send_file(icon_path) - - -@ns_conf.route("/") -class HandlerInfo(Resource): - @ns_conf.param("handler_name", "Handler name") - @api_endpoint_metrics("GET", "/handlers/handler") - def get(self, handler_name): - handler_meta = ca.integration_controller.get_handler_meta(handler_name) - if handler_meta is None: - return http_error(HTTPStatus.NOT_FOUND, "Handler not found", f"Handler '{handler_name}' not found") - row = {"name": handler_name} - row.update(handler_meta) - row.pop("path", None) - row.pop("icon", None) - return row - - -@ns_conf.route("//readme") -class HandlerReadme(Resource): - @ns_conf.param("handler_name", "Handler name") - @api_endpoint_metrics("GET", "/handlers/handler/readme") - def get(self, handler_name): - try: - handler_meta = ca.integration_controller.get_handler_meta(handler_name) - except Exception: - return http_error( - HTTPStatus.NOT_FOUND, - "Readme not found", - f"Handler '{handler_name}' not found", - ) - - def make_response(*, error_message=None, readme=None): - return {"name": handler_name, "readme": readme, "error_message": error_message} - - if handler_meta is None: - error_message = f"Handler '{handler_name}' not found" - logger.warning(error_message) - return make_response(error_message=error_message) - - handler_folder = handler_meta.get("import", {}).get("folder") - if handler_folder is None: - error_message = f"Handler '{handler_name}' does not define a folder" - logger.warning(error_message) - return make_response(error_message=error_message) - - # Community handlers have their path set after fetching; use it directly. - # Built-in handlers resolve through the package tree. - handler_path = handler_meta.get("path") - if handler_path is not None: - readme_path = Path(handler_path) / "README.md" - else: - try: - readme_path = _resolve_handler_readme_path(handler_folder) - except ValueError as exc: - error_message = str(exc) - logger.warning(error_message) - return make_response(error_message=error_message) - - try: - with open(readme_path, "r", encoding="utf-8") as readme_file: - readme_content = readme_file.read() - except FileNotFoundError: - error_message = f"README.md for handler '{handler_name}' not found" - logger.warning(error_message) - return make_response(error_message=error_message) - - return make_response(readme=readme_content) - - -@ns_conf.route("//install") -class InstallDependencies(Resource): - @ns_conf.param("handler_name", "Handler name") - @api_endpoint_metrics("POST", "/handlers/handler/install") - def post(self, handler_name): - handler_meta = ca.integration_controller.get_handler_meta(handler_name) - - if handler_meta is None: - return f"Unknown handler: {handler_name}", 400 - - if handler_meta.get("import", {}).get("success", False) is True: - return "Installed", 200 - - dependencies = handler_meta["import"]["dependencies"] - if len(dependencies) == 0: - return "Installed", 200 - - result = install_dependencies(dependencies) - - # reload it if any result, so we can get new error message - ca.integration_controller.reload_handler_module(handler_name) - if result.get("success") is True: - # If warm processes are available in the cache, remove them. - # This will force a new process to be created with the installed dependencies. - process_cache.remove_processes_for_handler(handler_name) - return "", 200 - return http_error( - 500, - f"Failed to install dependencies for {handler_meta.get('title', handler_name)}", - result.get("error_message", "unknown error"), - ) - - -def prepare_formdata(): - params = {} - file_names = [] - - def on_field(field): - name = field.field_name.decode() - value = field.value.decode() - params[name] = value - - def on_file(file): - file_name = file.file_name.decode() - if Path(file_name).name != file_name: - raise ValueError(f"Wrong file name: {file_name}") - - field_name = file.field_name.decode() - if field_name not in ("code", "modules"): - raise ValueError(f"Wrong field name: {field_name}") - - params[field_name] = file.file_object - file_names.append(field_name) - - temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_") - - parser = multipart.create_form_parser( - headers=request.headers, - on_field=on_field, - on_file=on_file, - config={ - "UPLOAD_DIR": temp_dir_path.encode(), # bytes required - "UPLOAD_KEEP_FILENAME": True, - "UPLOAD_KEEP_EXTENSIONS": True, - "MAX_MEMORY_FILE_SIZE": float("inf"), - }, - ) - - while True: - chunk = request.stream.read(8192) - if not chunk: - break - parser.write(chunk) - parser.finalize() - parser.close() - - for file_name in file_names: - file_path = os.path.join(temp_dir_path, file_name) - with open(file_path, "wb") as f: - params[file_name].seek(0) - f.write(params[file_name].read()) - params[file_name].close() - params[file_name] = file_path - - return params - - -@ns_conf.route("/byom/") -@ns_conf.param("name", "Name of the model") -class BYOMUpload(Resource): - @ns_conf.doc("post_file") - @api_endpoint_metrics("POST", "/handlers/byom/handler") - def post(self, name): - if config["byom"]["enabled"] is not True: - return http_error( - HTTPStatus.FORBIDDEN, - "BYOM is disabled", - "BYOM is disabled" - if config.is_cloud - else ( - "BYOM is disabled on this server. To enable this feature, set the environment variable " - "MINDSDB_BYOM_ENABLED=true, or change the value in the configuration file config['byom']['enabled'] = True" - ), - ) - params = prepare_formdata() - - code_file_path = params["code"] - try: - module_file_path = params["modules"] - except KeyError: - module_file_path = Path(code_file_path).parent / "requirements.txt" - module_file_path.touch() - module_file_path = str(module_file_path) - - connection_args = {"code": code_file_path, "modules": module_file_path, "type": params.get("type")} - - session = SessionController() - - base_ml_handler = session.integration_controller.get_ml_handler(name) - base_ml_handler.update_engine(connection_args) - - engine_storage = HandlerStorage(base_ml_handler.integration_id) - - engine_versions = [int(x) for x in engine_storage.get_connection_args()["versions"].keys()] - - return {"last_engine_version": max(engine_versions), "engine_versions": engine_versions} - - @ns_conf.doc("put_file") - @api_endpoint_metrics("PUT", "/handlers/byom/handler") - def put(self, name): - """upload new model - params in FormData: - - code - - modules - """ - if config["byom"]["enabled"] is not True: - return http_error( - HTTPStatus.FORBIDDEN, - "BYOM is disabled", - "BYOM is disabled" - if config.is_cloud - else ( - "BYOM is disabled on this server. To enable this feature, set the environment variable " - "MINDSDB_BYOM_ENABLED=true, or change the value in the configuration file config['byom']['enabled'] = True" - ), - ) - - params = prepare_formdata() - - code_file_path = params["code"] - try: - module_file_path = params["modules"] - except KeyError: - module_file_path = Path(code_file_path).parent / "requirements.txt" - module_file_path.touch() - module_file_path = str(module_file_path) - - connection_args = { - "code": code_file_path, - "modules": module_file_path, - "mode": params.get("mode"), - "type": params.get("type"), - } - - ast_query = CreateMLEngine(name=Identifier(name), handler="byom", params=connection_args) - sql_session = SessionController() - command_executor = ExecuteCommands(sql_session) - try: - command_executor.execute_command(ast_query) - except EntityExistsError: - return http_error( - HTTPStatus.CONFLICT, - "Engine already exists", - f'Engine "{name}" already exists', - ) - - return "", 200 diff --git a/mindsdb/api/http/namespaces/integrations.py b/mindsdb/api/http/namespaces/integrations.py deleted file mode 100644 index e3fb7836ca8..00000000000 --- a/mindsdb/api/http/namespaces/integrations.py +++ /dev/null @@ -1,174 +0,0 @@ -from http import HTTPStatus - -from flask import request -from flask_restx import Resource - -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.integrations import ns_conf -from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.integrations.libs.passthrough import PassthroughProtocol -from mindsdb.integrations.libs.passthrough_types import ( - ALLOWED_METHODS, - FORBIDDEN_REQUEST_HEADERS, - PassthroughError, - PassthroughNotSupportedError, - PassthroughRequest, - PassthroughResponse, - PassthroughValidationError, -) -from mindsdb.interfaces.database.integrations import integration_controller -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def _handler_supports_passthrough(handler_module) -> bool: - handler_cls = getattr(handler_module, "Handler", None) - if handler_cls is None: - return False - # issubclass is the right check for Protocol when classes define the - # methods as real methods (not just dynamic attrs); runtime_checkable - # Protocols support issubclass in that mode. - try: - return issubclass(handler_cls, PassthroughProtocol) - except TypeError: - return False - - -def _get_passthrough_handler(name: str): - """Look up the datasource's handler and verify it satisfies the contract.""" - proxy = FakeMysqlProxy() - handler = proxy.session.integration_controller.get_data_handler(name) - if not isinstance(handler, PassthroughProtocol): - raise PassthroughNotSupportedError(f"datasource '{name}' does not support REST passthrough") - return handler - - -def _parse_passthrough_request(payload: dict) -> PassthroughRequest: - if not isinstance(payload, dict): - raise PassthroughValidationError("request body must be a JSON object") - - method = payload.get("method") - path = payload.get("path") - if not isinstance(method, str) or method.upper() not in ALLOWED_METHODS: - raise PassthroughValidationError(f"'method' must be one of {sorted(ALLOWED_METHODS)}") - if not isinstance(path, str) or not path.startswith("/"): - raise PassthroughValidationError("'path' must be a string starting with '/'") - - headers = payload.get("headers") or {} - if not isinstance(headers, dict): - raise PassthroughValidationError("'headers' must be an object") - for name in headers: - if not isinstance(name, str): - raise PassthroughValidationError("header names must be strings") - if name.lower() in FORBIDDEN_REQUEST_HEADERS or name.lower().startswith("proxy-"): - raise PassthroughValidationError(f"header '{name}' is not allowed in passthrough requests") - - query = payload.get("query") or {} - if not isinstance(query, dict): - raise PassthroughValidationError("'query' must be an object") - - return PassthroughRequest( - method=method.upper(), - path=path, - query={str(k): str(v) for k, v in query.items()}, - headers={str(k): str(v) for k, v in headers.items()}, - body=payload.get("body"), - ) - - -def _serialize_response(resp: PassthroughResponse) -> dict: - return { - "status_code": resp.status_code, - "headers": resp.headers, - "body": resp.body, - "content_type": resp.content_type, - } - - -def _passthrough_error_response(err: PassthroughError): - return { - "error_code": err.error_code, - "message": str(err), - }, err.http_status - - -@ns_conf.route("//passthrough") -@ns_conf.param("name", "Datasource name") -class Passthrough(Resource): - @ns_conf.doc("passthrough") - @api_endpoint_metrics("POST", "/integrations/passthrough") - def post(self, name: str): - payload = request.json or {} - try: - req = _parse_passthrough_request(payload) - handler = _get_passthrough_handler(name) - response = handler.api_passthrough(req) - except PassthroughError as e: - return _passthrough_error_response(e) - except Exception as e: # noqa: BLE001 - logger.exception("passthrough failed for datasource %s", name) - return http_error(HTTPStatus.INTERNAL_SERVER_ERROR, "PassthroughError", str(e)) - - return _serialize_response(response), 200 - - -@ns_conf.route("//passthrough/test") -@ns_conf.param("name", "Datasource name") -class PassthroughTest(Resource): - @ns_conf.doc("passthrough_test") - @api_endpoint_metrics("POST", "/integrations/passthrough/test") - def post(self, name: str): - try: - handler = _get_passthrough_handler(name) - except PassthroughError as e: - return _passthrough_error_response(e) - except Exception as e: # noqa: BLE001 - logger.exception("passthrough test lookup failed for datasource %s", name) - return http_error(HTTPStatus.INTERNAL_SERVER_ERROR, "PassthroughError", str(e)) - - result = handler.test_passthrough() - return result, 200 - - -@ns_conf.route("/capabilities") -class Capabilities(Resource): - """Return structured passthrough capabilities per handler. - - The new ``handlers`` dict is the canonical shape callers should migrate - to. The legacy flat ``bearer_passthrough`` list is still populated for - backward compat — Minds can migrate on its own timeline. - """ - - @ns_conf.doc("integration_capabilities") - @api_endpoint_metrics("GET", "/integrations/capabilities") - def get(self): - handlers: dict[str, dict] = {} - bearer_engines: list[str] = [] - handler_modules = getattr(integration_controller, "handler_modules", {}) or {} - for engine, module in handler_modules.items(): - try: - if not _handler_supports_passthrough(module): - continue - handler_cls = getattr(module, "Handler", None) - # Read the declarative auth mode off the handler class. Default - # to "bearer" so protocol-only handlers that don't inherit the - # mixin still land in a sensible bucket. - auth_mode = getattr(handler_cls, "_auth_mode", "bearer") - handlers[engine] = { - "auth_modes": [auth_mode], - "operations": ["passthrough"], - } - if auth_mode == "bearer": - bearer_engines.append(engine) - except Exception: - # A broken handler module should not break the capabilities endpoint. - logger.debug("skipping handler %s during capability probe", engine, exc_info=True) - bearer_engines.sort() - return { - "handlers": handlers, - # TODO: remove in v2 once Minds has migrated to the `handlers` - # structured shape. Keep backward-compat for now. - "bearer_passthrough": bearer_engines, - }, 200 diff --git a/mindsdb/api/http/namespaces/jobs.py b/mindsdb/api/http/namespaces/jobs.py deleted file mode 100644 index 184f6df7db6..00000000000 --- a/mindsdb/api/http/namespaces/jobs.py +++ /dev/null @@ -1,77 +0,0 @@ -from http import HTTPStatus - -from flask import request, current_app as ca -from flask_restx import Resource - -from mindsdb.api.http.namespaces.configs.projects import ns_conf -from mindsdb.api.http.utils import http_error -from mindsdb.metrics.metrics import api_endpoint_metrics - -from mindsdb.interfaces.jobs.jobs_controller import parse_job_date - - -@ns_conf.route("//jobs") -class JobsResource(Resource): - @ns_conf.doc("list_jobs") - @api_endpoint_metrics("GET", "/jobs") - def get(self, project_name): - """List all jobs in a project""" - return ca.jobs_controller.get_list(project_name) - - @ns_conf.doc("create_job") - @api_endpoint_metrics("POST", "/jobs") - def post(self, project_name): - """Create a job in a project""" - - # Check for required parameters. - if "job" not in request.json: - return http_error(HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "job" parameter in POST body') - - job = request.json["job"] - - name = job.pop("name") - if job["start_at"] is not None: - job["start_at"] = parse_job_date(job["start_at"]) - if job["end_at"] is not None: - job["end_at"] = parse_job_date(job["end_at"]) - - create_job_name = ca.jobs_controller.add(name, project_name, **job) - - return ca.jobs_controller.get(create_job_name, project_name) - - -@ns_conf.route("//jobs/") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("job_name", "Name of the job") -class JobResource(Resource): - @ns_conf.doc("get_job") - @api_endpoint_metrics("GET", "/jobs/job") - def get(self, project_name, job_name): - """Gets a job by name""" - job_info = ca.jobs_controller.get(job_name, project_name) - if job_info is not None: - return job_info - - return http_error(HTTPStatus.NOT_FOUND, "Job not found", f"Job with name {job_name} does not exist") - - @ns_conf.doc("delete_job") - @api_endpoint_metrics("DELETE", "/jobs/job") - def delete(self, project_name, job_name): - """Deletes a job by name""" - ca.jobs_controller.delete(job_name, project_name) - - return "", HTTPStatus.NO_CONTENT - - -@ns_conf.route("//jobs//history") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("job_name", "Name of the job") -class JobsHistory(Resource): - @ns_conf.doc("job_history") - @api_endpoint_metrics("GET", "/jobs/job/history") - def get(self, project_name, job_name): - """Get history of job calls""" - if ca.jobs_controller.get(job_name, project_name) is None: - return http_error(HTTPStatus.NOT_FOUND, "Job not found", f"Job with name {job_name} does not exist") - - return ca.jobs_controller.get_history(job_name, project_name) diff --git a/mindsdb/api/http/namespaces/knowledge_bases.py b/mindsdb/api/http/namespaces/knowledge_bases.py deleted file mode 100644 index ccddff6ff24..00000000000 --- a/mindsdb/api/http/namespaces/knowledge_bases.py +++ /dev/null @@ -1,297 +0,0 @@ -from http import HTTPStatus - -from flask import request -from flask_restx import Resource -from mindsdb_sql_parser.ast import Identifier - -from mindsdb.api.http.namespaces.configs.projects import ns_conf -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.api.executor.exceptions import ExecutorException -from mindsdb.api.http.utils import http_error - -from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.integrations.utilities.rag.splitters.file_splitter import FileSplitter, FileSplitterConfig -from mindsdb.interfaces.file.file_controller import FileController -from mindsdb.interfaces.knowledge_base.preprocessing.constants import ( - DEFAULT_CRAWL_DEPTH, - DEFAULT_WEB_FILTERS, - DEFAULT_WEB_CRAWL_LIMIT, -) -from mindsdb.interfaces.knowledge_base.preprocessing.document_loader import DocumentLoader -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.utilities import log -from mindsdb.utilities.exception import EntityNotExistsError, EntityExistsError - - -logger = log.getLogger(__name__) - - -@ns_conf.route("//knowledge_bases") -class KnowledgeBasesResource(Resource): - @ns_conf.doc("list_knowledge_bases") - @api_endpoint_metrics("GET", "/knowledge_bases") - def get(self, project_name): - """List all knowledge bases""" - session = SessionController() - project_controller = ProjectController() - try: - _ = project_controller.get(name=project_name) - except EntityNotExistsError: - # Project must exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - # KB Controller already returns dict. - return session.kb_controller.list(project_name) - - @ns_conf.doc("create_knowledge_base") - @api_endpoint_metrics("POST", "/knowledge_bases") - def post(self, project_name): - """Create a knowledge base""" - - # Check for required parameters. - if "knowledge_base" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "knowledge_base" parameter in POST body' - ) - - knowledge_base = request.json["knowledge_base"] - # Explicitly require embedding model & vector database. - required_fields = ["name"] - for field in required_fields: - if field not in knowledge_base: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", f'Must provide "{field}" field in "knowledge_base"' - ) - if "storage" in knowledge_base: - if "database" not in knowledge_base["storage"] or "table" not in knowledge_base["storage"]: - return http_error( - HTTPStatus.BAD_REQUEST, - "Missing parameter", - 'Must provide "database" and "table" field in "storage" param', - ) - - session = SessionController() - project_controller = ProjectController() - try: - project = project_controller.get(name=project_name) - except EntityNotExistsError: - # Project must exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - kb_name = knowledge_base.get("name") - existing_kb = session.kb_controller.get(kb_name, project.id) - if existing_kb is not None: - # Knowledge Base must not exist. - return http_error( - HTTPStatus.CONFLICT, - "Knowledge Base already exists", - f"Knowledge Base with name {kb_name} already exists", - ) - - # Legacy: Support for embedding model identifier. - # embedding_model_identifier = None - # if knowledge_base.get('model'): - # embedding_model_identifier = Identifier(parts=[knowledge_base['model']]) - - storage = knowledge_base.get("storage") - embedding_table_identifier = None - if storage is not None: - embedding_table_identifier = Identifier(parts=[storage["database"], storage["table"]]) - - params = knowledge_base.get("params", {}) - - optional_parameter_fields = [ - "embedding_model", - "reranking_model", - "content_columns", - "metadata_columns", - "id_column", - ] - - for field in optional_parameter_fields: - if field in knowledge_base: - params[field] = knowledge_base[field] - - try: - new_kb = session.kb_controller.add( - kb_name, - project.name, - embedding_table_identifier, - params=params, - preprocessing_config=knowledge_base.get("preprocessing"), - ) - except ValueError as e: - return http_error(HTTPStatus.BAD_REQUEST, "Invalid preprocessing configuration", str(e)) - except EntityExistsError as e: - return http_error(HTTPStatus.BAD_REQUEST, "Knowledge base already exists", str(e)) - - return new_kb.as_dict(session.show_secrets), HTTPStatus.CREATED - - -@ns_conf.route("//knowledge_bases/") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("knowledge_base_name", "Name of the knowledge_base") -class KnowledgeBaseResource(Resource): - @ns_conf.doc("get_knowledge_base") - @api_endpoint_metrics("GET", "/knowledge_bases/knowledge_base") - def get(self, project_name, knowledge_base_name): - """Gets a knowledge base by name""" - session = SessionController() - project_controller = ProjectController() - try: - project = project_controller.get(name=project_name) - except EntityNotExistsError: - # Project must exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - existing_kb = session.kb_controller.get(knowledge_base_name, project.id) - if existing_kb is None: - return http_error( - HTTPStatus.NOT_FOUND, - "Knowledge Base not found", - f"Knowledge Base with name {knowledge_base_name} does not exist", - ) - return existing_kb.as_dict(session.show_secrets), HTTPStatus.OK - - @ns_conf.doc("update_knowledge_base") - @api_endpoint_metrics("PUT", "/knowledge_bases/knowledge_base") - def put(self, project_name: str, knowledge_base_name: str): - """Updates a knowledge base with optional preprocessing.""" - - # Check for required parameters - if "knowledge_base" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "knowledge_base" parameter in PUT body' - ) - - session = SessionController() - project_controller = ProjectController() - - try: - project = project_controller.get(name=project_name) - except EntityNotExistsError: - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - try: - kb_data = request.json["knowledge_base"] - - # Retrieve the knowledge base table for updates - table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get("params")) - if table is None: - return http_error( - HTTPStatus.NOT_FOUND, - "Knowledge Base not found", - f"Knowledge Base with name {knowledge_base_name} does not exist", - ) - - # Set up dependencies for DocumentLoader - file_controller = FileController() - file_splitter_config = FileSplitterConfig() - file_splitter = FileSplitter(file_splitter_config) - mysql_proxy = FakeMysqlProxy() - - # Initialize DocumentLoader with required components - document_loader = DocumentLoader( - file_controller=file_controller, - file_splitter=file_splitter, - mysql_proxy=mysql_proxy, - ) - - # Configure table with dependencies - table.document_loader = document_loader - - # Update preprocessing configuration if provided - if "preprocessing" in kb_data: - table.configure_preprocessing(kb_data["preprocessing"]) - - # Process raw data rows if provided - if kb_data.get("rows"): - table.insert_rows(kb_data["rows"]) - - # Process files if specified - if kb_data.get("files"): - table.insert_files(kb_data["files"]) - - # Process web pages if URLs provided - if kb_data.get("urls"): - table.insert_web_pages( - urls=kb_data["urls"], - limit=kb_data.get("limit") or DEFAULT_WEB_CRAWL_LIMIT, - crawl_depth=kb_data.get("crawl_depth", DEFAULT_CRAWL_DEPTH), - filters=kb_data.get("filters", DEFAULT_WEB_FILTERS), - ) - - # Process query if provided - if kb_data.get("query"): - table.insert_query_result(kb_data["query"], project_name) - - # update KB - update_kb_data = {} - if "params" in kb_data: - allowed_keys = [ - "id_column", - "metadata_columns", - "content_columns", - "preprocessing", - "reranking_model", - "embedding_model", - ] - update_kb_data = {k: v for k, v in kb_data["params"].items() if k in allowed_keys} - if update_kb_data or "preprocessing" in kb_data: - session.kb_controller.update( - knowledge_base_name, - project.name, - params=update_kb_data, - preprocessing_config=kb_data.get("preprocessing"), - ) - - except ExecutorException as e: - logger.exception("Error during preprocessing and insertion:") - return http_error( - HTTPStatus.BAD_REQUEST, - "Invalid SELECT query", - f'Executing "query" failed. Needs to be a valid SELECT statement that returns data: {e}', - ) - - except Exception as e: - logger.exception("Error during preprocessing and insertion:") - return http_error( - HTTPStatus.BAD_REQUEST, "Preprocessing Error", f"Error during preprocessing and insertion: {e}" - ) - - return "", HTTPStatus.OK - - @ns_conf.doc("delete_knowledge_base") - @api_endpoint_metrics("DELETE", "/knowledge_bases/knowledge_base") - def delete(self, project_name: str, knowledge_base_name: str): - """Deletes a knowledge base.""" - project_controller = ProjectController() - try: - project = project_controller.get(name=project_name) - except EntityNotExistsError: - # Project must exist. - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - session_controller = SessionController() - existing_kb = session_controller.kb_controller.get(knowledge_base_name, project.id) - if existing_kb is None: - # Knowledge Base must exist. - return http_error( - HTTPStatus.NOT_FOUND, - "Knowledge Base not found", - f"Knowledge Base with name {knowledge_base_name} does not exist", - ) - - session_controller.kb_controller.delete(knowledge_base_name, project_name) - return "", HTTPStatus.NO_CONTENT diff --git a/mindsdb/api/http/namespaces/models.py b/mindsdb/api/http/namespaces/models.py deleted file mode 100644 index 2337b351cf2..00000000000 --- a/mindsdb/api/http/namespaces/models.py +++ /dev/null @@ -1,269 +0,0 @@ -from http import HTTPStatus - -import json - -from flask import request -from flask_restx import Resource -import pandas as pd - -from mindsdb.api.http.namespaces.configs.projects import ns_conf -from mindsdb.api.http.utils import http_error -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.interfaces.model.functions import PredictorRecordNotFound -from mindsdb.interfaces.storage.db import Predictor -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.mindsdb import CreatePredictor -from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.utilities.config import config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -@ns_conf.route("//models") -class ModelsList(Resource): - @ns_conf.doc("list_models") - @api_endpoint_metrics("GET", "/models") - def get(self, project_name): - """List all models""" - session = SessionController() - - try: - session.database_controller.get_project(project_name) - except EntityNotExistsError: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - return session.model_controller.get_models(with_versions=True, project_name=project_name) - - @ns_conf.doc("train_model") - @api_endpoint_metrics("POST", "/models") - def post(self, project_name): - """Creates a new model and trains it""" - session = SessionController() - - if "query" not in request.json: - return http_error(HTTPStatus.BAD_REQUEST, "Query required", 'Missing "query" SQL statement') - query = request.json["query"] - - project_datanode = session.datahub.get(project_name) - if project_datanode is None: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - try: - create_statement = parse_sql(query) - except Exception: - return http_error( - HTTPStatus.BAD_REQUEST, "Invalid query string", f"SQL CREATE statement is invalid: {query}" - ) - - if type(create_statement) is not CreatePredictor: - return http_error( - HTTPStatus.BAD_REQUEST, - "Invalid CREATE SQL statement", - f"SQL statement is not a CREATE model statement: {query}", - ) - - model_name = create_statement.name.parts[1].lower() - try: - session.model_controller.get_model(model_name, project_name=project_name) - return http_error( - HTTPStatus.CONFLICT, "Model already exists", f"Model with name {model_name} already exists" - ) - except PredictorRecordNotFound: - pass - - ml_integration = config["default_ml_engine"] - if create_statement.using is not None: - # Convert using to lowercase - create_statement.using = {k.lower(): v for k, v in create_statement.using.items()} - ml_integration = create_statement.using.pop("engine", ml_integration) - - if ml_integration is None: - return http_error(HTTPStatus.NOT_FOUND, "ML handler not found", "Default ML handler is not specified") - - try: - ml_handler = session.integration_controller.get_ml_handler(ml_integration) - except Exception: - return http_error( - HTTPStatus.NOT_FOUND, "ML handler not found", f"Cannot find ML handler with name {ml_integration}" - ) - - try: - model_df = session.model_controller.create_model(create_statement, ml_handler) - # Consistent format with GET /projects//models/ - return { - "name": model_df.at[0, "NAME"], - "accuracy": None, - "active": model_df.at[0, "ACTIVE"], - "version": model_df.at[0, "VERSION"], - "status": model_df.at[0, "STATUS"], - "predict": model_df.at[0, "PREDICT"], - "mindsdb_version": model_df.at[0, "MINDSDB_VERSION"], - "error": model_df.at[0, "ERROR"], - "fetch_data_query": model_df.at[0, "SELECT_DATA_QUERY"], - "problem_definition": model_df.at[0, "TRAINING_OPTIONS"], - }, HTTPStatus.CREATED - except Exception as e: - logger.exception("Something went wrong while creating and training model") - return http_error( - HTTPStatus.INTERNAL_SERVER_ERROR, - "Unable to train model", - f"Something went wrong while creating and training model {model_name}: {e}", - ) - - -@ns_conf.route("//models/") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("model_name", "Name of the model") -class ModelResource(Resource): - @ns_conf.doc("get_model") - @api_endpoint_metrics("GET", "/models/model") - def get(self, project_name, model_name): - """Get a model by name and version""" - session = SessionController() - - project_datanode = session.datahub.get(project_name) - if project_datanode is None: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - name_no_version, version = Predictor.get_name_and_version(model_name) - try: - return session.model_controller.get_model(name_no_version, version=version, project_name=project_name) - except PredictorRecordNotFound: - return http_error(HTTPStatus.NOT_FOUND, "Model not found", f"Model with name {model_name} not found") - - @ns_conf.doc("update_model") - @api_endpoint_metrics("PUT", "/models/model") - def put(self, project_name, model_name): - """Update model""" - - session = SessionController() - - project_datanode = session.datahub.get(project_name) - if project_datanode is None: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - if "problem_definition" not in request.json: - return http_error( - HTTPStatus.BAD_REQUEST, "problem_definition required", 'Missing "problem_definition" field' - ) - - problem_definition = request.json["problem_definition"] - - model_name, version = Predictor.get_name_and_version(model_name) - - session.model_controller.update_model( - session, - project_name, - model_name, - version=version, - problem_definition=problem_definition, - ) - return session.model_controller.get_model(model_name, version=version, project_name=project_name) - - @ns_conf.doc("delete_model") - @api_endpoint_metrics("DELETE", "/models/model") - def delete(self, project_name, model_name): - """Deletes a model by name""" - - session = SessionController() - - project_datanode = session.datahub.get(project_name) - if project_datanode is None: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - name_no_version, version = Predictor.get_name_and_version(model_name) - try: - session.model_controller.get_model(name_no_version, version=version, project_name=project_name) - except PredictorRecordNotFound: - return http_error(HTTPStatus.NOT_FOUND, "Model not found", f"Model with name {model_name} not found") - - try: - session.model_controller.delete_model(name_no_version, project_name, version=version) - except Exception as e: - logger.exception(f"Something went wrong while deleting model '{model_name}'") - return http_error( - HTTPStatus.INTERNAL_SERVER_ERROR, - "Error deleting model", - f"Something went wrong while deleting {model_name}: {e}", - ) - - return "", HTTPStatus.NO_CONTENT - - -@ns_conf.route("//models//predict") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("model_name", "Name of the model") -class ModelPredict(Resource): - @ns_conf.doc("post_model_predict") - @api_endpoint_metrics("POST", "/models/model/predict") - def post(self, project_name, model_name): - """Call prediction""" - - name_no_version, version = Predictor.get_name_and_version(model_name) - - session = SessionController() - project_datanode = session.datahub.get(project_name) - if project_datanode is None: - return http_error( - HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist" - ) - - try: - session.model_controller.get_model(name_no_version, version=version, project_name=project_name) - except PredictorRecordNotFound: - return http_error(HTTPStatus.NOT_FOUND, "Model not found", f"Model with name {model_name} not found") - - data = request.json["data"] - if isinstance(data, str): - # Support object or serialized object. - data = json.loads(data) - params = request.json.get("params") - - predictions = project_datanode.predict( - model_name=name_no_version, - df=pd.DataFrame(data), - version=version, - params=params, - ) - - return predictions.to_dict("records") - - -@ns_conf.route("//models//describe") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("model_name", "Name of the model") -class ModelDescribe(Resource): - @ns_conf.doc("describe_model") - @api_endpoint_metrics("GET", "/models/model/describe") - def get(self, project_name, model_name): - """Describes a model""" - session = SessionController() - - project_datanode = session.datahub.get(project_name) - if project_datanode is None: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - name_no_version, version = Predictor.get_name_and_version(model_name) - - try: - session.model_controller.get_model(name_no_version, version=version, project_name=project_name) - except PredictorRecordNotFound: - return http_error(HTTPStatus.NOT_FOUND, "Model not found", f"Model with name {model_name} not found") - - attribute = None - if "attribute" in request.json: - attribute = request.json["attribute"] - - try: - description_df = session.model_controller.describe_model( - session, project_name, name_no_version, attribute, version=version - ) - return description_df.to_dict("records") - except Exception: - return http_error( - HTTPStatus.BAD_REQUEST, - "ML handler unsupported", - f"ML handler for {model_name} does not support model description", - ) diff --git a/mindsdb/api/http/namespaces/projects.py b/mindsdb/api/http/namespaces/projects.py deleted file mode 100644 index 7dae70fa2c4..00000000000 --- a/mindsdb/api/http/namespaces/projects.py +++ /dev/null @@ -1,37 +0,0 @@ -from http import HTTPStatus - -from flask_restx import Resource - -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.projects import ns_conf -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.utilities.exception import EntityNotExistsError - - -@ns_conf.route("/") -class ProjectsList(Resource): - @ns_conf.doc("list_projects") - @api_endpoint_metrics("GET", "/projects") - def get(self): - """List all projects""" - session = SessionController() - - projects = [{"name": i} for i in session.datahub.get_projects_names()] - return projects - - -@ns_conf.route("/") -class ProjectsGet(Resource): - @ns_conf.doc("get_project") - @api_endpoint_metrics("GET", "/projects/project") - def get(self, project_name): - """Gets a project by name""" - session = SessionController() - - try: - project = session.database_controller.get_project(project_name) - except EntityNotExistsError: - return http_error(HTTPStatus.NOT_FOUND, "Project not exists", f"Project name {project_name} does not exist") - - return {"name": project.name} diff --git a/mindsdb/api/http/namespaces/sql.py b/mindsdb/api/http/namespaces/sql.py deleted file mode 100644 index fec3f2a8483..00000000000 --- a/mindsdb/api/http/namespaces/sql.py +++ /dev/null @@ -1,448 +0,0 @@ -import time -from enum import Enum -from http import HTTPStatus -from collections import defaultdict - -from flask import request, Response -from flask_restx import Resource - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser import ast - -import mindsdb.utilities.hooks as hooks -import mindsdb.utilities.profiler as profiler -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.sql import ns_conf -from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.api.executor.data_types.sql_answer import SQLAnswer -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE as SQL_RESPONSE_TYPE -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.executor.exceptions import ExecutorException, UnknownError -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities import log -from mindsdb.utilities.config import Config -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.exception import QueryError -from mindsdb.utilities.functions import mark_process -from mindsdb.interfaces.agents.chart_agent import ChartAgent - -logger = log.getLogger(__name__) - - -class ReponseFormat(Enum): - DEFAULT = None - SSE = "sse" - JSONLINES = "jsonlines" - - -@ns_conf.route("/query") -@ns_conf.param("query", "Execute query") -class Query(Resource): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - @ns_conf.doc("query") - @api_endpoint_metrics("POST", "/sql/query") - @mark_process(name="http_query") - def post(self): - start_time = time.time() - query = request.json["query"] - context = request.json.get("context", {}) - - if "params" in request.json: - ctx.params = request.json["params"] - - try: - response_format = ReponseFormat(request.json.get("response_format", None)) - except ValueError: - return http_error(HTTPStatus.BAD_REQUEST, "Invalid stream format", "Please provide a valid stream format.") - - if isinstance(query, str) is False or isinstance(context, dict) is False: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", 'Please provide "query" with the request.') - logger.debug(f"Incoming query: {query}") - - if context.get("profiling") is True: - profiler.enable() - - error_type = None - error_traceback = None - - profiler.set_meta(query=query, api="http", environment=Config().get("environment")) - with profiler.Context("http_query_processing"): - mysql_proxy = FakeMysqlProxy() - mysql_proxy.set_context(context) - - if context.get("native_query"): - db = context.get("db") - if not db: - return { - "type": "error", - "error_code": 0, - "error_message": "native_query requires 'db' in context", - }, 400 - - logger.debug(f"Running query natively for database {db}") - - try: - handler = mysql_proxy.session.integration_controller.get_data_handler(db) - raw_result = handler.native_query(query) - except Exception as e: - error_type = "unexpected" - result = SQLAnswer( - resp_type=SQL_RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(e), - ) - else: - if raw_result.type == SQL_RESPONSE_TYPE.ERROR: - # raw_result will be ErrorResponse. - error_type = "expected" - result = SQLAnswer( - resp_type=SQL_RESPONSE_TYPE.ERROR, - error_code=0, - error_message=raw_result.error_message, - ) - elif raw_result.type == SQL_RESPONSE_TYPE.OK: - result = SQLAnswer( - resp_type=SQL_RESPONSE_TYPE.OK, - error_code=0, - error_message=None, - ) - else: - # raw_result will be TableResponse. - result_set = ResultSet.from_table_response(raw_result) - result = SQLAnswer( - resp_type=SQL_RESPONSE_TYPE.TABLE, - result_set=result_set, - ) - - else: - try: - result: SQLAnswer = mysql_proxy.process_query(query) - except ExecutorException as e: - # classified error - error_type = "expected" - result = SQLAnswer( - resp_type=SQL_RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(e), - ) - logger.warning(f"Error query processing: {e}") - except QueryError as e: - error_type = "expected" if e.is_expected else "unexpected" - result = SQLAnswer( - resp_type=SQL_RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(e), - ) - if e.is_expected: - logger.warning(f"Query failed due to expected reason: {e}") - else: - logger.exception("Error query processing:") - except (UnknownError, Exception) as e: - error_type = "unexpected" - result = SQLAnswer( - resp_type=SQL_RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(e), - ) - logger.exception("Error query processing:") - - context = mysql_proxy.get_context() - - if response_format == ReponseFormat.JSONLINES: - query_response = result.stream_http_response_jsonlines(context=context) - query_response = Response(query_response, mimetype="application/jsonlines") - elif response_format == ReponseFormat.SSE: - query_response = result.stream_http_response_sse(context=context) - query_response = Response(query_response, mimetype="text/event-stream") - else: - query_response = result.dump_http_response(context=context), 200 - - hooks.after_api_query( - company_id=ctx.company_id, - user_id=ctx.user_id, - api="http", - command=None, - payload=query, - error_type=error_type, - error_code=result.error_code, - error_text=result.error_message, - traceback=error_traceback, - ) - - end_time = time.time() - log_msg = f"SQL processed in {(end_time - start_time):.2f}s ({end_time:.2f}-{start_time:.2f}), result is {result.type}, " - if result.type is SQL_RESPONSE_TYPE.TABLE and response_format is ReponseFormat.DEFAULT: - log_msg += f" one-piece result ({len(query_response[0]['data'])} rows), " - elif result.type is SQL_RESPONSE_TYPE.TABLE: - log_msg += f" {response_format} result, " - elif result.type is SQL_RESPONSE_TYPE.ERROR: - log_msg += f" ({result.error_message}), " - log_msg += f"used handlers: {ctx.used_handlers}" - logger.debug(log_msg) - - return query_response - - -@ns_conf.route("/charter") -@ns_conf.param("charter", "Generate Chart.js configuration from SQL query") -class Charter(Resource): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def _extract_error_message(self, error: Exception, context: str) -> str: - """ - Extract a user-friendly error message from an exception. - - Args: - error: The exception to extract message from - context: Context string for the error (e.g., "chart generation", "query execution") - - Returns: - str: User-friendly error message - """ - error_str = str(error) - - # Handle Pydantic validation errors - if "validation" in error_str.lower() or "pydantic" in error_str.lower(): - # Try to extract validation details - if hasattr(error, "errors"): - # Pydantic validation errors - try: - errors = error.errors() - if errors: - error_details = [] - for err in errors[:3]: # Limit to first 3 errors - loc = " -> ".join(str(x) for x in err.get("loc", [])) - msg = err.get("msg", "Validation error") - error_details.append(f"{loc}: {msg}") - if error_details: - return f"Chart configuration validation failed: {'; '.join(error_details)}" - except Exception: - pass - - # Check for retry errors from Pydantic AI - if "retries" in error_str.lower() or "retry" in error_str.lower(): - return "Failed to generate valid chart configuration after multiple attempts. The AI model may have generated an invalid format. Please try again or check your query." - - # Handle QueryError with db_error_msg - if isinstance(error, QueryError): - if hasattr(error, "db_error_msg") and error.db_error_msg: - msg = error.db_error_msg - if hasattr(error, "failed_query") and error.failed_query: - msg += f"\n\nFailed query: {error.failed_query[:200]}..." - return msg - if hasattr(error, "failed_query") and error.failed_query: - return f"Query execution failed: {error_str}\n\nFailed query: {error.failed_query[:200]}..." - - # Handle ExecutorException - if isinstance(error, ExecutorException): - return f"Query execution error: {error_str}" - - # For other exceptions, try to extract the main message - # Remove traceback-like content - lines = error_str.split("\n") - # Take the first line which usually contains the main error message - main_message = lines[0] if lines else error_str - - # If it's a generic exception, add context - if len(main_message) < 50 and context: - return f"Error during {context}: {main_message}" - - return main_message - - @ns_conf.doc("charter") - @api_endpoint_metrics("POST", "/sql/charter") - @mark_process(name="http_charter") - def post(self): - start_time = time.time() - - # Validate request - if not request.json: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", 'Please provide JSON body with "query".') - - query = request.json.get("query") - prompt = request.json.get("prompt") - context = request.json.get("context", {}) - if "params" in request.json: - ctx.params = request.json["params"] - - if not isinstance(query, str): - return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", 'Please provide "query" as a string.') - - if not isinstance(context, dict): - return http_error(HTTPStatus.BAD_REQUEST, "Wrong arguments", 'Please provide "context" as a dictionary.') - - logger.debug(f"Incoming charter request: query={query[:100]}..., prompt={prompt}") - - mysql_proxy = FakeMysqlProxy() - mysql_proxy.set_context(context) - try: - result: SQLAnswer = mysql_proxy.process_query(query) - except Exception as e: - error_msg = self._extract_error_message(e, "query execution") - logger.warning(f"Query error: {error_msg}") - return http_error(HTTPStatus.BAD_REQUEST, "Query error", error_msg) - - df = result.result_set.to_df() - - try: - chart_agent = ChartAgent(executor=mysql_proxy) - response = chart_agent.generate_chart_with_data(query, df, prompt) - - end_time = time.time() - logger.debug(f"Charter processed in {(end_time - start_time):.2f}s") - - return response, 200 - - except Exception as e: - error_msg = self._extract_error_message(e, "chart generation or execution") - logger.warning(f"Error in chart generation or execution: {error_msg}") - return http_error(HTTPStatus.BAD_REQUEST, "Chart generation failed", error_msg) - - -@ns_conf.route("/query/utils/parametrize_constants") -class ParametrizeConstants(Resource): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - @api_endpoint_metrics("POST", "/query/utils/parametrize_constants") - def post(self): - sql_query = request.json["query"] - - # find constants in the query and replace them with parameters - query = parse_sql(sql_query) - - parameters = [] - param_counts = {} - databases = defaultdict(set) - - def to_parameter(param_name, value): - if param_name is None: - param_name = default_param_name - - num = param_counts.get(param_name, 1) - param_counts[param_name] = num + 1 - - if num > 1: - param_name = param_name + str(num) - - parameters.append({"name": param_name, "value": value, "type": type(value).__name__}) - return ast.Parameter(param_name) - - def find_constants_f(node, is_table, is_target, callstack, **kwargs): - if is_table and isinstance(node, ast.Identifier): - if len(node.parts) > 1: - databases[node.parts[0]].add(".".join(node.parts[1:])) - - if not isinstance(node, ast.Constant): - return - - # it is a target - if is_target and node.alias is not None: - return to_parameter(node.alias.parts[-1], node.value) - - param_name = None - - for item in callstack: - # try to find the name - if isinstance(item, (ast.BinaryOperation, ast.BetweenOperation)) and item.op.lower() not in ( - "and", - "or", - ): - # it is probably a condition - for arg in item.args: - if isinstance(arg, ast.Identifier): - param_name = arg.parts[-1] - break - if param_name is not None: - break - - if item.alias is not None: - # it is probably a query target - param_name = item.alias.parts[-1] - break - - return to_parameter(param_name, node.value) - - if isinstance(query, ast.Update): - for name, value in dict(query.update_columns).items(): - if isinstance(value, ast.Constant): - query.update_columns[name] = to_parameter(name, value.value) - else: - default_param_name = name - query_traversal(value, find_constants_f) - - elif isinstance(query, ast.Insert): - # iterate over node.values and do some processing - if query.values: - values = [] - for row in query.values: - row2 = [] - for i, val in enumerate(row): - if isinstance(val, ast.Constant): - param_name = None - if query.columns and i < len(query.columns): - param_name = query.columns[i].name - elif query.table: - param_name = query.table.parts[-1] - val = to_parameter(param_name, val.value) - row2.append(val) - values.append(row2) - query.values = values - - default_param_name = "param" - query_traversal(query, find_constants_f) - - # to lists: - databases = {k: list(v) for k, v in databases.items()} - response = {"query": str(query), "parameters": parameters, "databases": databases} - return response, 200 - - -@ns_conf.route("/list_databases") -@ns_conf.param("list_databases", "lists databases of mindsdb") -class ListDatabases(Resource): - @ns_conf.doc("list_databases") - @api_endpoint_metrics("GET", "/sql/list_databases") - def get(self): - listing_query = "SHOW DATABASES" - mysql_proxy = FakeMysqlProxy() - try: - result: SQLAnswer = mysql_proxy.process_query(listing_query) - - # iterate over result.data and perform a query on each item to get the name of the tables - if result.type == SQL_RESPONSE_TYPE.ERROR: - listing_query_response = { - "type": "error", - "error_code": result.error_code, - "error_message": result.error_message, - } - elif result.type == SQL_RESPONSE_TYPE.OK: - listing_query_response = {"type": "ok"} - elif result.type == SQL_RESPONSE_TYPE.TABLE: - listing_query_response = { - "data": [ - { - "name": db_row[0], - "tables": [ - table_row[0] - for table_row in mysql_proxy.process_query( - "SHOW TABLES FROM `{}`".format(db_row[0]) - ).result_set.to_lists() - ], - } - for db_row in result.result_set.to_lists() - ] - } - except Exception as e: - logger.exception("Error while retrieving list of databases") - listing_query_response = { - "type": "error", - "error_code": 0, - "error_message": str(e), - } - - return listing_query_response, 200 diff --git a/mindsdb/api/http/namespaces/tab.py b/mindsdb/api/http/namespaces/tab.py deleted file mode 100644 index 360d1b6ce13..00000000000 --- a/mindsdb/api/http/namespaces/tab.py +++ /dev/null @@ -1,130 +0,0 @@ -import json -from http import HTTPStatus - -from flask import request -from flask_restx import Resource - -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.api.http.namespaces.configs.tabs import ns_conf -from mindsdb.utilities import log -from mindsdb.api.http.utils import http_error -from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.interfaces.tabs.tabs_controller import ( - tabs_controller, - get_storage, - TABS_FILENAME, -) - - -logger = log.getLogger(__name__) - - -def _is_request_valid() -> bool: - """check if request body contains all (and only) required fields - - Returns: - bool: True if all required data in the request - """ - try: - data = request.json - except Exception: - return False - if ( - isinstance(data, dict) is False - or len(data.keys()) == 0 - or len(set(data.keys()) - {"index", "name", "content"}) != 0 - ): - return False - return True - - -@ns_conf.route("/") -class Tabs(Resource): - @ns_conf.doc("get_tabs") - @api_endpoint_metrics("GET", "/tabs") - def get(self): - mode = request.args.get("mode") - - if mode == "new": - return tabs_controller.get_all(), 200 - else: - # deprecated - storage = get_storage() - tabs = None - try: - raw_data = storage.file_get(TABS_FILENAME) - tabs = json.loads(raw_data) - except FileNotFoundError: - # Fresh installs or new-mode usage won't have legacy single-file storage. - return {}, 200 - except Exception: - logger.warning("unable to get tabs data", exc_info=True) - return {}, 200 - return tabs, 200 - - @ns_conf.doc("save_tab") - @api_endpoint_metrics("POST", "/tabs") - def post(self): - mode = request.args.get("mode") - - if mode == "new": - if _is_request_valid() is False: - return http_error(400, "Error", "Invalid parameters") - data = request.json - tab_meta = tabs_controller.add(**data) - tabs_meta = tabs_controller._get_tabs_meta() - return {"tab_meta": tab_meta, "tabs_meta": tabs_meta}, 200 - else: - # deprecated - storage = get_storage() - try: - tabs = request.json - b_types = json.dumps(tabs).encode("utf-8") - storage.file_set(TABS_FILENAME, b_types) - except Exception: - logger.exception("Unable to store tabs data:") - return http_error( - HTTPStatus.INTERNAL_SERVER_ERROR, - "Can't save tabs", - "something went wrong during tabs saving", - ) - - return "", 200 - - -@ns_conf.route("/") -@ns_conf.param("tab_id", "id of tab") -class Tab(Resource): - @ns_conf.doc("get_tab") - @api_endpoint_metrics("GET", "/tabs/tab") - def get(self, tab_id: int): - try: - tab_data = tabs_controller.get(int(tab_id)) - except EntityNotExistsError: - return http_error(404, "Error", "The tab does not exist") - - return tab_data, 200 - - @ns_conf.doc("put_tab") - @api_endpoint_metrics("PUT", "/tabs/tab") - def put(self, tab_id: int): - if _is_request_valid() is False: - return http_error(400, "Error", "Invalid parameters") - data = request.json - try: - tab_meta = tabs_controller.modify(int(tab_id), **data) - except EntityNotExistsError: - return http_error(404, "Error", "The tab does not exist") - - tabs_meta = tabs_controller._get_tabs_meta() - - return {"tab_meta": tab_meta, "tabs_meta": tabs_meta}, 200 - - @ns_conf.doc("delete_tab") - @api_endpoint_metrics("DELETE", "/tabs/tab") - def delete(self, tab_id: int): - try: - tabs_controller.delete(int(tab_id)) - except EntityNotExistsError: - return http_error(404, "Error", "The tab does not exist") - return "", 200 diff --git a/mindsdb/api/http/namespaces/tree.py b/mindsdb/api/http/namespaces/tree.py deleted file mode 100644 index 87e03225dfd..00000000000 --- a/mindsdb/api/http/namespaces/tree.py +++ /dev/null @@ -1,114 +0,0 @@ -import inspect -from collections import defaultdict - -from flask import current_app as ca, request -from flask_restx import Resource - -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.tree import ns_conf -from mindsdb.metrics.metrics import api_endpoint_metrics - - -@ns_conf.route("/") -class GetRoot(Resource): - @ns_conf.doc("get_tree_root") - @api_endpoint_metrics("GET", "/tree") - def get(self): - databases = ca.database_controller.get_list() - result = [ - { - "name": x["name"], - "class": "db", - "type": x["type"], - "engine": x["engine"], - "deletable": x["deletable"], - "visible": x["visible"], - } - for x in databases - ] - return result - - -@ns_conf.route("/") -@ns_conf.param("db_name", "Name of the database") -class GetLeaf(Resource): - @ns_conf.doc("get_tree_leaf") - @api_endpoint_metrics("GET", "/tree/database") - def get(self, db_name): - with_schemas = request.args.get("all_schemas") - if isinstance(with_schemas, str): - with_schemas = with_schemas.lower() in ("1", "true") - else: - # Show all schemas by default for better UX - with_schemas = True - db_name = db_name.lower() - databases = ca.database_controller.get_dict() - if db_name not in databases: - return http_error(400, "Error", f"There is no element with name '{db_name}'") - db = databases[db_name] - if db["type"] == "project": - project = ca.database_controller.get_project(db_name) - tables = project.get_tables() - tables = [ - { - "name": key, - "schema": None, - "class": "table", - "type": val["type"], - "engine": val.get("engine"), - "deletable": val.get("deletable"), - } - for key, val in tables.items() - ] - - jobs = ca.jobs_controller.get_list(db_name) - tables = tables + [ - {"name": job["name"], "schema": None, "class": "job", "type": "job", "engine": "job", "deletable": True} - for job in jobs - ] - elif db["type"] == "data": - handler = ca.integration_controller.get_data_handler(db_name) - if "all" in inspect.signature(handler.get_tables).parameters: - response = handler.get_tables(all=with_schemas) - else: - response = handler.get_tables() - if response.type != "table": - return [] - table_types = {"BASE TABLE": "table", "VIEW": "view"} - tables = response.data_frame.to_dict(orient="records") - - schemas = defaultdict(list) - - for table_meta in tables: - table_meta = {key.lower(): val for key, val in table_meta.items()} - schama = table_meta.get("table_schema") - schemas[schama].append( - { - "name": table_meta["table_name"], - "class": "table", - "type": table_types.get(table_meta.get("table_type")), - "engine": None, - "deletable": False, - } - ) - if len(schemas) == 1 and list(schemas.keys())[0] is None: - tables = schemas[None] - else: - tables = [ - {"name": key, "class": "schema", "deletable": False, "children": val} - for key, val in schemas.items() - ] - elif db["type"] == "system": - system_db = ca.database_controller.get_system_db(db_name) - tables = system_db.get_tree_tables() - tables = [ - { - "name": table.name, - "class": table.kind, - "type": "system view", - "engine": None, - "deletable": table.deletable, - } - for table in tables.values() - ] - return tables diff --git a/mindsdb/api/http/namespaces/util.py b/mindsdb/api/http/namespaces/util.py deleted file mode 100644 index 9fad9be3f6a..00000000000 --- a/mindsdb/api/http/namespaces/util.py +++ /dev/null @@ -1,97 +0,0 @@ -import os -import tempfile -from pathlib import Path - -import psutil -from flask_restx import Resource - -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.api.http.namespaces.configs.util import ns_conf -from mindsdb.api.http.gui import update_static -from mindsdb.utilities.fs import clean_unlinked_process_marks -from mindsdb.api.http.utils import http_error - - -def get_active_tasks(): - response = {"learn": False, "predict": False, "analyse": False} - - if os.name != "posix": - return response - - for process_type in response: - processes_dir = Path(tempfile.gettempdir()).joinpath(f"mindsdb/processes/{process_type}/") - if not processes_dir.is_dir(): - continue - clean_unlinked_process_marks() - process_marks = [x.name for x in processes_dir.iterdir()] - if len(process_marks) > 0: - response[process_type] = True - - return response - - -@ns_conf.route("/ping") -class Ping(Resource): - @ns_conf.doc("get_ping") - @api_endpoint_metrics("GET", "/util/ping") - def get(self): - """Checks server avaliable""" - return {"status": "ok"} - - -@ns_conf.route("/ping/ml_task_queue") -class PingMLTaskQueue(Resource): - @ns_conf.doc("get_ping_ml_task_queue") - @api_endpoint_metrics("GET", "/util/ping/ml_task_queue") - def get(self): - """Check if ML tasks queue process is alive""" - processes_dir = Path(tempfile.gettempdir()).joinpath("mindsdb/processes/internal/") - if processes_dir.is_dir(): - ml_tasks_queue_mark = next( - (x for x in processes_dir.iterdir() if x.name.endswith("ml_task_consumer")), None - ) - if ml_tasks_queue_mark is not None: - try: - pid = int(ml_tasks_queue_mark.name.split("-")[0]) - process = psutil.Process(pid) - if process.status() in (psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD): - raise psutil.NoSuchProcess(pid) - return "", 200 - except Exception: - return "", 404 - return "", 404 - - -@ns_conf.route("/readiness") -class ReadinessProbe(Resource): - @ns_conf.doc("get_ready") - @api_endpoint_metrics("GET", "/util/readiness") - def get(self): - """Checks server is ready for work""" - - tasks = get_active_tasks() - for key in tasks: - if tasks[key] is True: - return http_error(503, "not ready", "not ready") - - return "", 200 - - -@ns_conf.route("/ping_native") -class PingNative(Resource): - @ns_conf.doc("get_ping_native") - @api_endpoint_metrics("GET", "/util/ping_native") - def get(self): - """Checks server use native for learn or analyse. - Will return right result only on Linux. - """ - return get_active_tasks() - - -@ns_conf.route("/update-gui") -class UpdateGui(Resource): - @ns_conf.doc("get_update_gui") - @api_endpoint_metrics("GET", "/util/update-gui") - def get(self): - update_static() - return "", 200 diff --git a/mindsdb/api/http/namespaces/views.py b/mindsdb/api/http/namespaces/views.py deleted file mode 100644 index 1a43464db48..00000000000 --- a/mindsdb/api/http/namespaces/views.py +++ /dev/null @@ -1,136 +0,0 @@ -from http import HTTPStatus - -from flask import request -from flask_restx import Resource - -from mindsdb.api.http.utils import http_error -from mindsdb.api.http.namespaces.configs.projects import ns_conf -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.metrics.metrics import api_endpoint_metrics -from mindsdb.utilities.exception import EntityNotExistsError - - -@ns_conf.route("//views") -class ViewsList(Resource): - @ns_conf.doc("list_views") - @api_endpoint_metrics("GET", "/views") - def get(self, project_name): - """List all views""" - session = SessionController() - try: - project = session.database_controller.get_project(project_name) - except EntityNotExistsError: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - all_views = project.get_views() - all_view_objs = [] - # Only want to return relevant fields to the user. - for view in all_views: - all_view_objs.append({"id": view["metadata"]["id"], "name": view["name"], "query": view["query"]}) - return all_view_objs - - @ns_conf.doc("create_view") - @api_endpoint_metrics("POST", "/views") - def post(self, project_name): - """Create a new view""" - if "view" not in request.json: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Must provide "view" parameter in POST body') - session = SessionController() - view_obj = request.json["view"] - if "name" not in view_obj: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Missing "name" field for view') - if "query" not in view_obj: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Missing "query" field for view') - name = view_obj["name"] - query = view_obj["query"] - - try: - project = session.database_controller.get_project(project_name) - except EntityNotExistsError: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - if project.get_view(name) is not None: - return http_error(HTTPStatus.CONFLICT, "Name conflict", f"View with name {name} already exists.") - - project.create_view(name, query, session) - created_view = project.get_view(name) - # Only want to return relevant fields to the user. - return { - "id": created_view["metadata"]["id"], - "name": created_view["name"], - "query": created_view["query"], - }, HTTPStatus.CREATED - - -@ns_conf.route("//views/") -@ns_conf.param("project_name", "Name of the project") -@ns_conf.param("view_name", "Name of the view") -class ViewResource(Resource): - @ns_conf.doc("get_view") - @api_endpoint_metrics("GET", "/views/view") - def get(self, project_name, view_name): - """Get a view by name""" - session = SessionController() - try: - project = session.database_controller.get_project(project_name) - except EntityNotExistsError: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - view = project.get_view(view_name) - if view is None: - return http_error(HTTPStatus.NOT_FOUND, "View not found", f"View with name {view_name} does not exist") - - # Only want to return relevant fields to the user. - return {"id": view["metadata"]["id"], "name": view["name"], "query": view["query"]} - - @ns_conf.doc("update_view") - @api_endpoint_metrics("PUT", "/views/view") - def put(self, project_name, view_name): - """Updates or creates a view""" - if "view" not in request.json: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Must provide "view" parameter in PUT body') - request_view = request.json["view"] - session = SessionController() - try: - project = session.database_controller.get_project(project_name) - except EntityNotExistsError: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - existing_view = project.get_view(view_name) - if existing_view is None: - # Create - if "query" not in request_view: - return http_error(HTTPStatus.BAD_REQUEST, "Wrong argument", 'Missing "query" field for new view') - project.create_view(view_name, request_view["query"], session) - created_view = project.get_view(view_name) - # Only want to return relevant fields to the user. - return { - "id": created_view["metadata"]["id"], - "name": created_view["name"], - "query": created_view["query"], - }, HTTPStatus.CREATED - - new_query = existing_view["query"] - if "query" in request_view: - new_query = request_view["query"] - project.update_view(view_name, new_query) - - existing_view = project.get_view(view_name) - # Only want to return relevant fields to the user. - return {"id": existing_view["metadata"]["id"], "name": existing_view["name"], "query": existing_view["query"]} - - @ns_conf.doc("delete_view") - @api_endpoint_metrics("DELETE", "/views/view") - def delete(self, project_name, view_name): - """Deletes a view by name""" - session = SessionController() - try: - project = session.database_controller.get_project(project_name) - except EntityNotExistsError: - return http_error(HTTPStatus.NOT_FOUND, "Project not found", f"Project name {project_name} does not exist") - - if project.get_view(view_name) is None: - return http_error(HTTPStatus.NOT_FOUND, "View not found", f"View with name {view_name} does not exist") - - project.delete_view(view_name) - return "", HTTPStatus.NO_CONTENT diff --git a/mindsdb/api/http/namespaces/webhooks.py b/mindsdb/api/http/namespaces/webhooks.py deleted file mode 100644 index d7e849d28bd..00000000000 --- a/mindsdb/api/http/namespaces/webhooks.py +++ /dev/null @@ -1,29 +0,0 @@ -from flask import request -from flask_restx import Resource - -from mindsdb.api.http.namespaces.configs.webhooks import ns_conf -from mindsdb.interfaces.chatbot.chatbot_controller import ChatBotController -from mindsdb.metrics.metrics import api_endpoint_metrics - - -# Stores the memory of the various chat-bots mapped by their webhook tokens. -# This is required because each time a new request is made, a new instance of the ChatBotTask is created. -# This causes the memory to be lost. -chat_bot_memory = {} - - -@ns_conf.route('/chatbots/') -class ChatbotWebhooks(Resource): - @ns_conf.doc('chatbots_webhook') - @api_endpoint_metrics('POST', '/webhooks/chatbots/') - def post(self, webhook_token: str) -> None: - """ - This endpoint is used to receive messages posted by bots from different platforms. - - Args: - webhook_token (str): The token of the webhook. It is used to uniquely identify the webhook. - """ - request_data = request.json - - chat_bot_controller = ChatBotController() - return chat_bot_controller.on_webhook(webhook_token, request_data, chat_bot_memory) diff --git a/mindsdb/api/http/openapi.yml b/mindsdb/api/http/openapi.yml deleted file mode 100644 index 0bb17c75972..00000000000 --- a/mindsdb/api/http/openapi.yml +++ /dev/null @@ -1,512 +0,0 @@ -openapi: 3.0.0 -info: - title: MindsDB API - description: >- - OpenAPI Specification for MindsDB's REST API. Each API Endpoint corresponds - to a specific SQL Statement e.g POST /model => CREATE MODEL - version: 0.0.1 - contact: - email: admin@mindsdb.com - license: - name: Server Side Public License (SSPL v1) - url: 'https://github.com/mindsdb/mindsdb/blob/main/LICENSE' -servers: - - url: 'https://cloud.mindsdb.com/v1/api' - description: MindsDB Production cloud server - - url: 'http://alpha.mindsdb.com/v1/api' - description: MindsDB Alpha cloud server - - url: 'http://127.0.0.1/v1/api' - description: MindsDB local deployments -components: - securitySchemes: - ApiKeyAuth: - type: apiKey - in: header - name: X-API-KEY -paths: - /databases: - get: - security: - - ApiKeyAuth: [] - summary: Returns a list of database names. - description: This endpoint retrieves all databases created by the user. - responses: - '200': - description: A JSON array of database names - content: - application/json: - schema: - type: array - items: - type: string - example: 'database name' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - post: - security: - - ApiKeyAuth: [] - summary: Creates a new database connection. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - engine: - type: string - user: - type: string - password: - type: string - host: - type: string - port: - type: string - database: - type: string - responses: - '201': - description: Database was succesfully created - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - put: - security: - - ApiKeyAuth: [] - summary: Updates an existing database connection. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - user: - type: string - password: - type: string - host: - type: string - port: - type: string - database: - type: string - responses: - '201': - description: Database was succesfully updated - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/databases/{databaseName}': - get: - security: - - ApiKeyAuth: [] - summary: Gets info about existing database. - description: This endpoint retrieves a specific database info. - parameters: - - name: databaseName - in: path - description: databaseName to select - required: true - schema: - type: string - responses: - '200': - description: A JSON object with database informations - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - delete: - security: - - ApiKeyAuth: [] - summary: Deletes an existing database. - description: This endpoint deletes a database connection. - parameters: - - name: databaseName - in: path - description: databaseName to delete - required: true - schema: - type: string - responses: - '200': - description: A JSON array of database names - content: - application/json: - schema: - type: array - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - /models: - post: - security: - - ApiKeyAuth: [] - summary: This endpoint trains a new ML Model. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - project_name: - type: string - description: Name of the project where the model is created - predictor_name: - type: string - description: Name of the model to be created. - integration_name: - type: string - description: Name of the database integration created - target_column: - type: string - description: Column to be predicted. - query: - type: string - description: The SQL query to get the data - responses: - '200': - description: Model training started - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - get: - security: - - ApiKeyAuth: [] - summary: Returns a list of models. - description: This endpoint retrieves the status of all models. - responses: - '200': - description: A JSON array of models names - content: - application/json: - schema: - type: array - items: - type: string - example: 'model name' - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/models/{modelName}': - get: - security: - - ApiKeyAuth: [] - summary: Gets info about specific model. - description: This endpoint retrieves the status of a specific model. - parameters: - - name: modelName - in: path - description: The name of the model - required: true - schema: - type: string - responses: - '200': - description: A JSON object with database informations - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Model not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - post: - security: - - ApiKeyAuth: [] - summary: This endpoint fetches predictions from the model. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - model_name: - type: string - description: Name of the project where the model is created - data: - type: string - description: The data for querying the model as SQL Statment or FILE - responses: - '200': - description: Prediction data - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: str - delete: - security: - - ApiKeyAuth: [] - summary: Deletes an existing model. - description: This endpoint deletes a model. - parameters: - - name: modelName - in: path - description: modelName to delete - required: true - schema: - type: string - responses: - '200': - description: A JSON array of database names - content: - application/json: - schema: - type: array - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Database not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string - '/models/{modelName}/describe': - get: - security: - - ApiKeyAuth: [] - summary: Describe a model - description: This endpoint returns the attributes of a specific model. - parameters: - - name: modelName - in: path - description: The name of the model - required: true - schema: - type: string - - name: features - in: query - description: Include how the model encoded data info - required: false - schema: - type: boolean - default: false - responses: - '200': - description: A JSON object with model informations - content: - application/json: - schema: - type: object - items: - type: string - '401': - description: Invalid API key error message - content: - application/json: - schema: - type: object - items: - type: string - '404': - description: Model not found - content: - application/json: - schema: - type: object - items: - type: string - '500': - description: Server error - content: - application/json: - schema: - type: object - items: - type: string \ No newline at end of file diff --git a/mindsdb/api/http/start.py b/mindsdb/api/http/start.py deleted file mode 100644 index f2373ebb114..00000000000 --- a/mindsdb/api/http/start.py +++ /dev/null @@ -1,123 +0,0 @@ -import gc -from importlib import import_module -from contextlib import asynccontextmanager, AsyncExitStack - -gc.disable() - -from flask import Flask -from starlette.applications import Starlette -from starlette.routing import Mount, Route -from starlette.responses import JSONResponse -from a2wsgi import WSGIMiddleware -import uvicorn - -from mindsdb.api.http.initialize import initialize_app -from mindsdb.interfaces.storage import db -from mindsdb.utilities import log -from mindsdb.utilities.config import config -from mindsdb.utilities.functions import init_lexer_parsers -from mindsdb.integrations.libs.ml_exec_base import process_cache -from mindsdb.api.common.middleware import PATAuthMiddleware - -gc.enable() - -logger = log.getLogger(__name__) - - -async def _health_check(request): - """Async health check that bypasses the WSGI worker pool for the mindsdb API.""" - return JSONResponse({"status": "ok"}) - - -def _mount_optional_api(name: str, mount_path: str, get_app_fn, routes) -> object | None: - try: - optional_app = get_app_fn() - except ImportError as exc: - logger.warning( - "%s support is disabled (%s). To enable it, install the %s extra: pip install 'mindsdb[%s]'", - name, - exc, - name, - name.lower(), - ) - return - - if name.upper() != "MCP" or config["api"]["mcp"]["oauth"]["enabled"] is False: - optional_app.add_middleware(PATAuthMiddleware) - - routes.append(Mount(mount_path, app=optional_app)) - return optional_app - - -def start(verbose, app: Flask = None, is_restart: bool = False): - db.init() - init_lexer_parsers() - - if app is None: - app = initialize_app(is_restart) - - port = config["api"]["http"]["port"] - host = config["api"]["http"]["host"] - - process_cache.init() - - routes = [] - sub_apps = [] - - # Health check FIRST - async endpoint that bypasses WSGI worker pool - # This ensures health checks respond even when all workers are blocked - routes.append(Route("/api/util/ping", _health_check, methods=["GET"])) - - for name, path, factory in [ - ("A2A", "/a2a", lambda: import_module("mindsdb.api.a2a").get_a2a_app()), - ("MCP", "/mcp", lambda: import_module("mindsdb.api.mcp").get_mcp_app()), - ]: - mounted = _mount_optional_api(name, path, factory, routes) - if mounted is not None: - sub_apps.append(mounted) - - # RFC 9728: /.well-known/oauth-protected-resource must be at the server root, - # not under the /mcp mount, so we register it here before the Flask fallback. - try: - well_known_routes = import_module("mindsdb.api.mcp").get_mcp_well_known_routes() - routes.extend(well_known_routes) - except ImportError: - pass - except Exception as e: - logger.warning(f"Error during registering of mcp well-known routes: {e}") - - @asynccontextmanager - async def lifespan(_): - """Propagate ASGI lifespan events to mounted sub-apps. - - Starlette's Mount does not forward startup/shutdown lifespan events to - sub-applications automatically. This context manager manually enters the - lifespan context of each collected sub-app so their internal state - (e.g. StreamableHTTPSessionManager task group for MCP) is properly - initialized on startup and torn down on shutdown. - """ - async with AsyncExitStack() as stack: - for sub_app in sub_apps: - await stack.enter_async_context(sub_app.router.lifespan_context(sub_app)) - yield - - # Root app LAST so it won't shadow the others - routes.append( - Mount( - "/", - app=WSGIMiddleware( - app, - workers=config["api"]["http"]["a2wsgi"]["workers"], - send_queue_size=config["api"]["http"]["a2wsgi"]["send_queue_size"], - ), - ) - ) - - # Setting logging to None makes uvicorn use the existing logging configuration - uvicorn.run( - Starlette(routes=routes, lifespan=lifespan, debug=verbose), - host=host, - port=int(port), - log_level=None, - log_config=None, - ) diff --git a/mindsdb/api/http/utils.py b/mindsdb/api/http/utils.py deleted file mode 100644 index 564fe0fce1d..00000000000 --- a/mindsdb/api/http/utils.py +++ /dev/null @@ -1,37 +0,0 @@ -import json -from typing import Optional -from datetime import datetime - -from flask import Response - - -def http_error(status_code: int, title: Optional[str] = None, detail: Optional[str] = None): - ''' Wrapper for error responce acoording with RFC 7807 (https://tools.ietf.org/html/rfc7807) - - :param status_code: int - http status code for response - :param title: str - :param detail: str - - :return: flask Response object - ''' - if title is None: - title = 'Error' - if detail is None: - if 400 <= status_code < 500: - detail = "A client error occurred. Please check your request and try again." - elif 500 <= status_code < 600: - detail = "A server error occurred. Please try again later." - else: - detail = "An error occurred while processing the request. Please try again later." - - return Response( - response=json.dumps({ - 'title': title, - 'detail': detail, - 'timestamp': str(datetime.now()) - }), - status=status_code, - headers={ - 'Content-Type': 'application/problem+json' - } - ) diff --git a/mindsdb/api/litellm/__init__.py b/mindsdb/api/litellm/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/litellm/start.py b/mindsdb/api/litellm/start.py deleted file mode 100644 index e051769509f..00000000000 --- a/mindsdb/api/litellm/start.py +++ /dev/null @@ -1,83 +0,0 @@ -import asyncio -from mindsdb.utilities import log -from mindsdb.utilities.config import Config -from mindsdb.interfaces.agents.litellm_server import run_server, run_server_async - -logger = log.getLogger(__name__) - - -async def start_async(verbose=False): - """Start the LiteLLM server - - Args: - verbose (bool): Whether to enable verbose logging - """ - config = Config() - - # Get agent name from command line args - agent_name = config.cmd_args.agent - if not agent_name: - logger.error("Agent name is required for LiteLLM server. Use --agent parameter.") - return 1 - - # Get project name or use default - project_name = config.cmd_args.project or "mindsdb" - - # Get MCP server connection details - mcp_host = "127.0.0.1" - mcp_port = int(config.get("api", {}).get("http", {}).get("port", 47334)) - - # Get LiteLLM server settings - litellm_host = config.get("api", {}).get("litellm", {}).get("host", "0.0.0.0") - litellm_port = int(config.get("api", {}).get("litellm", {}).get("port", 8000)) - - logger.info(f"Starting LiteLLM server for agent '{agent_name}' in project '{project_name}'") - logger.info(f"Connecting to MCP server at {mcp_host}:{mcp_port}") - logger.info(f"Binding to {litellm_host}:{litellm_port}") - - return await run_server_async( - agent_name=agent_name, - project_name=project_name, - mcp_host=mcp_host, - mcp_port=mcp_port, - host=litellm_host, - port=litellm_port, - ) - - -def start(verbose=False): - """Start the LiteLLM server (synchronous wrapper) - - Args: - verbose (bool): Whether to enable verbose logging - """ - from mindsdb.interfaces.storage import db - - db.init() - - # Run the async function in the event loop - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = loop.run_until_complete(start_async(verbose)) - - if result == 0: - # Run the server - config = Config() - agent_name = config.cmd_args.agent - project_name = config.cmd_args.project or "mindsdb" - mcp_host = config.get("api", {}).get("mcp", {}).get("host", "127.0.0.1") - mcp_port = int(config.get("api", {}).get("mcp", {}).get("port", 47337)) - litellm_host = config.get("api", {}).get("litellm", {}).get("host", "0.0.0.0") - litellm_port = int(config.get("api", {}).get("litellm", {}).get("port", 8000)) - - return run_server( - agent_name=agent_name, - project_name=project_name, - mcp_host=mcp_host, - mcp_port=mcp_port, - host=litellm_host, - port=litellm_port, - ) - else: - logger.error("LiteLLM server initialization failed") - return result diff --git a/mindsdb/api/mcp/__init__.py b/mindsdb/api/mcp/__init__.py deleted file mode 100644 index 3473a394e61..00000000000 --- a/mindsdb/api/mcp/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from mindsdb.api.mcp.app import get_mcp_app, get_mcp_well_known_routes - -__all__ = ["get_mcp_app", "get_mcp_well_known_routes"] diff --git a/mindsdb/api/mcp/app.py b/mindsdb/api/mcp/app.py deleted file mode 100644 index ea810595ac0..00000000000 --- a/mindsdb/api/mcp/app.py +++ /dev/null @@ -1,94 +0,0 @@ -from contextlib import asynccontextmanager - -from starlette.applications import Starlette -from starlette.middleware import Middleware -from starlette.middleware.authentication import AuthenticationMiddleware -from starlette.middleware.cors import CORSMiddleware -from starlette.requests import Request -from starlette.responses import JSONResponse -from starlette.routing import Route - -from mcp.server.auth.middleware.bearer_auth import BearerAuthBackend -from mcp.server.auth.middleware.auth_context import AuthContextMiddleware - -from mindsdb.utilities.config import config -from mindsdb.api.common.middleware import RateLimitMiddleware -from mindsdb.api.mcp.mcp_instance import mcp - -# region these imports required for correct initialization -from mindsdb.api.mcp import tools # noqa: F401 -from mindsdb.api.mcp import resources # noqa: F401 -from mindsdb.api.mcp import prompts # noqa: F401 -from mindsdb.api.mcp import completions # noqa: F401 -# endregion - - -def _get_status(request: Request) -> JSONResponse: - return JSONResponse({"status": "ok", "service": "mindsdb-mcp"}) - - -def get_mcp_app(): - sse_starlette = mcp.sse_app() - http_starlette = mcp.streamable_http_app() - - @asynccontextmanager - async def lifespan(_): - """Required for streamable_http to run task group""" - async with http_starlette.router.lifespan_context(http_starlette): - yield - - middleware = [] - - # Preserve AuthenticationMiddleware from http_starlette so that - # RequireAuthMiddleware can read scope["user"] set by BearerAuthBackend. - if mcp._token_verifier is not None: - middleware = [ - Middleware(AuthenticationMiddleware, backend=BearerAuthBackend(mcp._token_verifier)), - Middleware(AuthContextMiddleware), - ] - - combined_app = Starlette( - routes=list(sse_starlette.routes) + list(http_starlette.routes), - middleware=middleware, - lifespan=lifespan, - ) - - # Rate limit should be added before CORS, so that CORS adds correct headers - if config["api"]["mcp"]["rate_limit"]["enabled"]: - combined_app.add_middleware( - RateLimitMiddleware, - requests_per_minute=config["api"]["mcp"]["rate_limit"]["requests_per_minute"], - ) - - if config["api"]["mcp"]["cors"]["enabled"]: - combined_app.add_middleware( - CORSMiddleware, - allow_origins=config["api"]["mcp"]["cors"]["allow_origins"], - allow_origin_regex=config["api"]["mcp"]["cors"]["allow_origin_regex"], - allow_methods=["GET", "POST", "DELETE", "OPTIONS"], - allow_headers=config["api"]["mcp"]["cors"]["allow_headers"], - expose_headers=["mcp-session-id"], - ) - - combined_app.add_route("/status", _get_status, methods=["GET"]) - - return combined_app - - -def get_mcp_well_known_routes() -> list[Route]: - """Return OAuth protected resource metadata routes for mounting at the server root. - - RFC 9728 requires /.well-known/oauth-protected-resource to be served at the - server root, not under the /mcp sub-path, so start.py registers these separately. - """ - from mcp.server.auth.routes import create_protected_resource_routes - - auth = mcp.settings.auth - if not auth or not auth.resource_server_url: - return [] - - return create_protected_resource_routes( - resource_url=auth.resource_server_url, - authorization_servers=[auth.issuer_url], - scopes_supported=auth.required_scopes, - ) diff --git a/mindsdb/api/mcp/completions.py b/mindsdb/api/mcp/completions.py deleted file mode 100644 index 94bf2abe2cd..00000000000 --- a/mindsdb/api/mcp/completions.py +++ /dev/null @@ -1,35 +0,0 @@ -from mcp.types import Completion, PromptReference, ResourceTemplateReference - -from mindsdb.api.mcp.mcp_instance import mcp -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.utilities.context import context as ctx -from mindsdb.api.mcp.resources.schema import _get_database_names -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -@mcp.completion() -async def handle_completion(ref, argument, context): - if not isinstance(ref, (ResourceTemplateReference, PromptReference)): - return None - - try: - if argument.name == "database_name": - names = _get_database_names() - return Completion(values=[n for n in names if n.startswith(argument.value)]) - - if argument.name == "table_name": - database_name = (context.arguments or {}).get("database_name") - if not database_name: - return None - ctx.set_default() - session = SessionController() - datanode = session.datahub.get(database_name) - all_tables = datanode.get_tables() - names = [t.TABLE_NAME for t in all_tables] - return Completion(values=[n for n in names if n.startswith(argument.value)]) - except Exception as e: - logger.info(f"Couldn't get completion for parameter {argument.name}: {e}") - - return None diff --git a/mindsdb/api/mcp/mcp_instance.py b/mindsdb/api/mcp/mcp_instance.py deleted file mode 100644 index fa65ab47711..00000000000 --- a/mindsdb/api/mcp/mcp_instance.py +++ /dev/null @@ -1,36 +0,0 @@ -from mcp.server.fastmcp import FastMCP -from mcp.server.transport_security import TransportSecuritySettings - -from mindsdb.api.mcp.oauth import build_oauth_components -from mindsdb.utilities.config import config - - -def _create_mcp() -> FastMCP: - token_verifier, auth_settings = build_oauth_components() - - dns_rebinding_protection = config["api"]["mcp"]["dns_rebinding_protection"] - transport_security = TransportSecuritySettings(enable_dns_rebinding_protection=dns_rebinding_protection) - - return FastMCP( - name="MindsDB", - instructions=( - "MindsDB is a data platform that connects to external databases and data sources.\n" - "Use the available resources to discover connected databases and their schema,\n" - "then use the `query` tool to retrieve or manipulate data with SQL.\n" - "\n" - "Workflow:\n" - "1. Read `schema://databases` to list available data sources.\n" - "2. Read `schema://databases/{name}/tables` to explore tables in a source.\n" - "3. Read `schema://databases/{name}/tables/{table}/columns` to inspect columns.\n" - "4. Use the `query` tool to run SQL queries against the data." - ), - dependencies=["mindsdb"], - streamable_http_path="/streamable", - debug=False, - token_verifier=token_verifier, - auth=auth_settings, - transport_security=transport_security, - ) - - -mcp = _create_mcp() diff --git a/mindsdb/api/mcp/oauth.py b/mindsdb/api/mcp/oauth.py deleted file mode 100644 index 32d5efb71dd..00000000000 --- a/mindsdb/api/mcp/oauth.py +++ /dev/null @@ -1,167 +0,0 @@ -from typing import Any -from urllib.parse import urljoin - -import httpx -from pydantic import AnyHttpUrl -from mcp.server.auth.settings import AuthSettings -from mcp.server.auth.provider import AccessToken, TokenVerifier -from mcp.shared.auth_utils import check_resource_allowed, resource_url_from_server_url - -from mindsdb.utilities.config import config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class IntrospectionTokenVerifier(TokenVerifier): - """Token verifier that uses OAuth 2.0 Token Introspection (RFC 7662). - Intended for use when MindsDB acts as a Resource Server and token - issuance is delegated to an external provider (e.g. Keycloak). - - Args: - introspection_endpoint: Full URL of the RFC 7662 introspection endpoint. - server_url: Public URL of this MCP server (e.g. ``http://host:port/mcp/streamable``). - Used to derive the expected ``aud`` (audience) claim value. - client_id: OAuth client ID used to authenticate against the introspection endpoint. - client_secret: OAuth client secret used to authenticate against the introspection endpoint. - """ - - def __init__( - self, - introspection_endpoint: str, - server_url: str, - client_id: str, - client_secret: str, - ): - self.introspection_endpoint = introspection_endpoint - self.server_url = server_url - self.client_id = client_id - self.client_secret = client_secret - self.resource_url = resource_url_from_server_url(server_url) - - async def verify_token(self, token: str) -> AccessToken | None: - """Verify a bearer token via the introspection endpoint. - - Args: - token: Raw bearer token string extracted from the Authorization header. - - Returns: - AccessToken: Populated access token on successful verification. - None: If the token is inactive, the audience is invalid, the endpoint - is unreachable, or any other error occurs. - """ - # to prevent SSRF attacks it must start from https, or be local server - if not self.introspection_endpoint.startswith(("https://", "http://localhost:", "http://127.0.0.1:")): - return None - - timeout = httpx.Timeout(10.0, connect=5.0) - limits = httpx.Limits(max_connections=10, max_keepalive_connections=5) - - async with httpx.AsyncClient( - timeout=timeout, - limits=limits, - verify=True, - follow_redirects=False, - ) as client: - try: - form_data = { - "token": token, - "client_id": self.client_id, - "client_secret": self.client_secret, - } - headers = {"Content-Type": "application/x-www-form-urlencoded"} - - response = await client.post( - self.introspection_endpoint, - data=form_data, - headers=headers, - ) - - if response.status_code != 200: - return None - - data = response.json() - if not data.get("active", False): - return None - - if not self._validate_resource(data): - return None - - return AccessToken( - token=token, - client_id=data.get("client_id", "unknown"), - scopes=data.get("scope", "").split() if data.get("scope") else [], - expires_at=data.get("exp"), - resource=self.resource_url, - ) - - except Exception as e: - logger.error(f"Error during token verification: {e}") - return None - - def _validate_resource(self, token_data: dict[str, Any]) -> bool: - """Validate that the token was issued for this resource server (RFC 8707). - - Args: - token_data: Parsed JSON response from the introspection endpoint. - - Returns: - bool: True if at least one audience entry matches this server's resource URL, - False if ``aud`` is missing or no entry matches. - """ - if not self.server_url or not self.resource_url: - return False - - aud: list[str] | str | None = token_data.get("aud") - if isinstance(aud, list): - return any(check_resource_allowed(self.resource_url, a) for a in aud) - if isinstance(aud, str): - return check_resource_allowed(self.resource_url, aud) - return False - - -def build_oauth_components() -> tuple[IntrospectionTokenVerifier, AuthSettings] | tuple[None, None]: - """Build token verifier and auth settings from the OAuth config section. - - Returns: - tuple[IntrospectionTokenVerifier, AuthSettings]: Token verifier and auth settings ready - to pass to FastMCP if OAuth is enabled. - tuple[None, None]: If OAuth ``enabled`` is False or not set. - """ - oauth_cfg = config["api"]["mcp"]["oauth"] - if not oauth_cfg.get("enabled", False): - return None, None - - public_url = oauth_cfg.get("public_url", "").rstrip("/") - if public_url: - mcp_endpoint_url = f"{public_url}/mcp/streamable" - else: - host = config["api"]["http"]["host"] - port = config["api"]["http"]["port"] - # Bind-all addresses (0.0.0.0 / ::) are not valid client-facing destinations. - # Replace with loopback so the advertised resource_metadata URL is reachable. - if host in ("0.0.0.0", "", "::"): - host = "127.0.0.1" - mcp_endpoint_url = f"http://{host}:{port}/mcp/streamable" - - issuer_url = oauth_cfg.get("issuer_url", "").rstrip("/") + "/" - client_id = oauth_cfg.get("client_id", "") - client_secret = oauth_cfg.get("client_secret", "") - scope = oauth_cfg.get("scope", "mcp:tools") - - introspection_endpoint = urljoin(issuer_url, "protocol/openid-connect/token/introspect") - - token_verifier = IntrospectionTokenVerifier( - introspection_endpoint=introspection_endpoint, - server_url=mcp_endpoint_url, - client_id=client_id, - client_secret=client_secret, - ) - - auth_settings = AuthSettings( - issuer_url=AnyHttpUrl(issuer_url), - required_scopes=[scope], - resource_server_url=AnyHttpUrl(mcp_endpoint_url), - ) - - return token_verifier, auth_settings diff --git a/mindsdb/api/mcp/prompts/__init__.py b/mindsdb/api/mcp/prompts/__init__.py deleted file mode 100644 index 437673b53d3..00000000000 --- a/mindsdb/api/mcp/prompts/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from mindsdb.api.mcp.prompts import sample_table # noqa: F401 diff --git a/mindsdb/api/mcp/prompts/sample_table.py b/mindsdb/api/mcp/prompts/sample_table.py deleted file mode 100644 index 2473715aa7d..00000000000 --- a/mindsdb/api/mcp/prompts/sample_table.py +++ /dev/null @@ -1,21 +0,0 @@ -from mcp.types import TextContent - -from mindsdb.api.mcp.mcp_instance import mcp - - -@mcp.prompt(name="sample_table", description="Fetch 5 sample rows from a table and describe its structure.") -def sample_table(database_name: str, table_name: str) -> list[TextContent]: - return [ - TextContent( - type="text", - text=( - f"Use the `query` tool to fetch 5 sample rows from the table `{table_name}` " - f"in database `{database_name}`:\n\n" - f"```sql\n" - f"SELECT * FROM `{database_name}`.`{table_name}` LIMIT 5;\n" - f"```\n\n" - f"After getting the results, briefly describe the table structure " - f"and what kind of data it contains." - ), - ) - ] diff --git a/mindsdb/api/mcp/resources/__init__.py b/mindsdb/api/mcp/resources/__init__.py deleted file mode 100644 index 5cd0b60720d..00000000000 --- a/mindsdb/api/mcp/resources/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from mindsdb.api.mcp.resources import schema # noqa: F401 diff --git a/mindsdb/api/mcp/resources/schema.py b/mindsdb/api/mcp/resources/schema.py deleted file mode 100644 index 6986c7dd420..00000000000 --- a/mindsdb/api/mcp/resources/schema.py +++ /dev/null @@ -1,136 +0,0 @@ -from pydantic import BaseModel - -from mindsdb.api.mcp.mcp_instance import mcp -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.utilities.context import context as ctx -from mindsdb.integrations.libs.response import TableResponse, ErrorResponse -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class TableInfo(BaseModel): - TABLE_NAME: str - TABLE_TYPE: str - TABLE_SCHEMA: str - - -class ColumnInfo(BaseModel): - COLUMN_NAME: str - MYSQL_DATA_TYPE: str - - -class KnowledgeBaseInfo(BaseModel): - name: str - project: str - metadata_columns: list[str] - content_columns: list[str] - id_column: str - - -def _get_database_names() -> list[str]: - ctx.set_default() - session = SessionController() - databases = session.database_controller.get_list() - return [x["name"] for x in databases if x["type"] == "data"] - - -@mcp.resource( - "schema://databases", - mime_type="application/json", - description=( - "Initial list of connected data source names available for querying. " - "This resource may be cached by the client. " - "To get the current list of databases during a session, use the `query` tool: " - "SHOW DATABASES" - ), -) -def list_databases() -> list[str]: - return _get_database_names() - - -@mcp.resource( - "schema://databases/{database_name}/tables", - mime_type="application/json", - description=( - "Initial list of tables in the specified connected database. " - "This resource may be cached by the client. " - "To get the current list of tables during a session (e.g. after CREATE/DROP TABLE), " - "use the `query` tool: " - "SHOW TABLES FROM {database_name}" - ), -) -def db_tables(database_name: str) -> list[TableInfo]: - ctx.set_default() - session = SessionController() - datanode = session.datahub.get(database_name) - if datanode is None: - raise ValueError(f"Database '{database_name}' is not found.") - all_tables = datanode.get_tables() - all_tables = [ - { - "TABLE_NAME": table.TABLE_NAME, - "TABLE_TYPE": table.TABLE_TYPE, - "TABLE_SCHEMA": table.TABLE_SCHEMA, - } - for table in all_tables - ] - return all_tables - - -@mcp.resource( - "schema://databases/{database_name}/tables/{table_name}/columns", - mime_type="application/json", - description=( - "Initial column names and types for a specific table in a connected database. " - "This resource may be cached by the client. " - "To get the current column list during a session (e.g. after ALTER TABLE), " - "use the `query` tool: " - "SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS " - "WHERE TABLE_SCHEMA = '{database_name}' AND TABLE_NAME = '{table_name}'" - ), -) -def db_table_columns(database_name: str, table_name: str) -> list[ColumnInfo]: - ctx.set_default() - session = SessionController() - handler = session.integration_controller.get_data_handler(database_name) - columns_answer = handler.get_columns(table_name) - - if isinstance(columns_answer, TableResponse): - if columns_answer.type != RESPONSE_TYPE.COLUMNS_TABLE: - raise ValueError( - "Database returned a successful response, but the column list does not match the expected format" - ) - df = columns_answer.fetchall() - response = df[["COLUMN_NAME", "MYSQL_DATA_TYPE"]].to_dict(orient="records") - return response - if isinstance(columns_answer, ErrorResponse): - raise ValueError(columns_answer.error_message) - raise ValueError(f"Unexpected handler response type: {columns_answer}") - - -@mcp.resource( - "schema://knowledge_bases", - description=( - "Initial list of knowledge bases with their project, column configuration, and ID column. " - "This resource may be cached by the client. " - "To get the current list of knowledge bases during a session, use the `query` tool: " - "SHOW KNOWLEDGE BASES" - ), -) -def list_knowledge_bases() -> list[KnowledgeBaseInfo]: - ctx.set_default() - session = SessionController() - project_names = session.datahub.get_projects_names() - result = [] - for project_name in project_names: - kbs = session.kb_controller.list(project_name) - for kb in kbs: - result.append( - { - "name": kb.get("name"), - "project": kb.get("project"), - "metadata_columns": kb.get("metadata_columns"), - "content_columns": kb.get("content_columns"), - "id_column": kb.get("id_column"), - } - ) - return result diff --git a/mindsdb/api/mcp/tools/__init__.py b/mindsdb/api/mcp/tools/__init__.py deleted file mode 100644 index a07edf06817..00000000000 --- a/mindsdb/api/mcp/tools/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from mindsdb.api.mcp.tools import query # noqa: F401 diff --git a/mindsdb/api/mcp/tools/query.py b/mindsdb/api/mcp/tools/query.py deleted file mode 100644 index 42026e32b1f..00000000000 --- a/mindsdb/api/mcp/tools/query.py +++ /dev/null @@ -1,60 +0,0 @@ -from textwrap import dedent -from typing import Annotated - -from pydantic import Field - -from mindsdb.api.mcp.mcp_instance import mcp -from mindsdb.api.mcp.types import ErrorResponse, QueryResponseAnswer, response_adapter -from mindsdb.api.mysql.mysql_proxy.mysql_proxy import SQLAnswer -from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -query_tool_description = dedent("""\ - Execute a SQL query against MindsDB and return the result. - - Queries use MySQL syntax. Use fully qualified names (`database`.`table`) or set `context` to specify - the default database. Use backticks (`) to quote identifiers that are reserved words or contain - special characters. - - Returns one of: - - `{"type": "ok"}` — for statements with no output (INSERT, UPDATE, etc.) - - `{"type": "table", "column_names": [...], "data": [[...], ...]}` — for SELECT results - - `{"type": "error", "error_message": "..."}` — on failure -""") - - -@mcp.tool(name="query", description=query_tool_description) -def query( - query: Annotated[str, Field(description="SQL query to execute against MindsDB.")], - context: Annotated[ - dict | None, - Field( - description=( - 'Default database context, e.g. {"db": "my_postgres"}. ' - "Required if the query does not use fully qualified table names." - ) - ), - ] = None, -) -> QueryResponseAnswer: - ctx.set_default() - - if context is None: - context = {} - - logger.debug(f"Incoming MCP query: {query}") - - mysql_proxy = FakeMysqlProxy() - mysql_proxy.set_context(context) - - try: - result: SQLAnswer = mysql_proxy.process_query(query) - query_response: dict = result.dump_http_response() - except Exception as e: - logger.exception("Error processing query:") - return ErrorResponse(type="error", error_code=0, error_message=str(e)) - - return response_adapter.validate_python(query_response) diff --git a/mindsdb/api/mcp/types.py b/mindsdb/api/mcp/types.py deleted file mode 100644 index 0275742116f..00000000000 --- a/mindsdb/api/mcp/types.py +++ /dev/null @@ -1,25 +0,0 @@ -from typing import Annotated, Literal, Union - -from pydantic import BaseModel, Field, TypeAdapter - - -class OkResponse(BaseModel): - type: Literal["ok"] - affected_rows: int | None = None - - -class ErrorResponse(BaseModel): - type: Literal["error"] - error_code: int - error_message: str - - -class TableResponse(BaseModel): - type: Literal["table"] - column_names: list[str] - data: list[list] - - -QueryResponseAnswer = Annotated[Union[OkResponse, ErrorResponse, TableResponse], Field(discriminator="type")] - -response_adapter = TypeAdapter(QueryResponseAnswer) diff --git a/mindsdb/api/mysql/__init__.py b/mindsdb/api/mysql/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/mysql/mysql_proxy/__init__.py b/mindsdb/api/mysql/mysql_proxy/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/mysql/mysql_proxy/classes/__init__.py b/mindsdb/api/mysql/mysql_proxy/classes/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/mysql/mysql_proxy/classes/client_capabilities.py b/mindsdb/api/mysql/mysql_proxy/classes/client_capabilities.py deleted file mode 100644 index be1a9de2c80..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/classes/client_capabilities.py +++ /dev/null @@ -1,130 +0,0 @@ -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import CAPABILITIES - - -class ClentCapabilities(): - _capabilities = 0 - - def __init__(self, capabilities): - self._capabilities = capabilities - - def has(self, cap): - return cap & self._capabilities > 0 - - def to_dict(self): - funcs = [func for func in dir(self) if func.upper() == func] - res = {} - for f in funcs: - res[f] = getattr(self, f) - return res - - @property - def LONG_PASSWORD(self): - return self.has(CAPABILITIES.CLIENT_LONG_PASSWORD) - - @property - def FOUND_ROWS(self): - return self.has(CAPABILITIES.CLIENT_FOUND_ROWS) - - @property - def LONG_FLAG(self): - return self.has(CAPABILITIES.CLIENT_LONG_FLAG) - - @property - def CONNECT_WITH_DB(self): - return self.has(CAPABILITIES.CLIENT_CONNECT_WITH_DB) - - @property - def NO_SCHEMA(self): - return self.has(CAPABILITIES.CLIENT_NO_SCHEMA) - - @property - def COMPRESS(self): - return self.has(CAPABILITIES.CLIENT_COMPRESS) - - @property - def ODBC(self): - return self.has(CAPABILITIES.CLIENT_ODBC) - - @property - def LOCAL_FILES(self): - return self.has(CAPABILITIES.CLIENT_LOCAL_FILES) - - @property - def IGNORE_SPACE(self): - return self.has(CAPABILITIES.CLIENT_IGNORE_SPACE) - - @property - def PROTOCOL_41(self): - return self.has(CAPABILITIES.CLIENT_PROTOCOL_41) - - @property - def INTERACTIVE(self): - return self.has(CAPABILITIES.CLIENT_INTERACTIVE) - - @property - def SSL(self): - return self.has(CAPABILITIES.CLIENT_SSL) - - @property - def IGNORE_SIGPIPE(self): - return self.has(CAPABILITIES.CLIENT_IGNORE_SIGPIPE) - - @property - def TRANSACTIONS(self): - return self.has(CAPABILITIES.CLIENT_TRANSACTIONS) - - @property - def RESERVED(self): - return self.has(CAPABILITIES.CLIENT_RESERVED) - - @property - def RESERVED2(self): - return self.has(CAPABILITIES.CLIENT_RESERVED2) - - @property - def MULTI_STATEMENTS(self): - return self.has(CAPABILITIES.CLIENT_MULTI_STATEMENTS) - - @property - def MULTI_RESULTS(self): - return self.has(CAPABILITIES.CLIENT_MULTI_RESULTS) - - @property - def PS_MULTI_RESULTS(self): - return self.has(CAPABILITIES.CLIENT_PS_MULTI_RESULTS) - - @property - def PLUGIN_AUTH(self): - return self.has(CAPABILITIES.CLIENT_PLUGIN_AUTH) - - @property - def CONNECT_ATTRS(self): - return self.has(CAPABILITIES.CLIENT_CONNECT_ATTRS) - - @property - def PLUGIN_AUTH_LENENC_CLIENT_DATA(self): - return self.has(CAPABILITIES.CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA) - - @property - def CAN_HANDLE_EXPIRED_PASSWORDS(self): - return self.has(CAPABILITIES.CLIENT_CAN_HANDLE_EXPIRED_PASSWORDS) - - @property - def SESSION_TRACK(self): - return self.has(CAPABILITIES.CLIENT_SESSION_TRACK) - - @property - def DEPRECATE_EOF(self): - return self.has(CAPABILITIES.CLIENT_DEPRECATE_EOF) - - @property - def SSL_VERIFY_SERVER_CERT(self): - return self.has(CAPABILITIES.CLIENT_SSL_VERIFY_SERVER_CERT) - - @property - def REMEMBER_OPTIONS(self): - return self.has(CAPABILITIES.CLIENT_REMEMBER_OPTIONS) - - @property - def SECURE_CONNECTION(self): - return self.has(CAPABILITIES.CLIENT_SECURE_CONNECTION) diff --git a/mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/__init__.py b/mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/__init__.py deleted file mode 100644 index 79eea9f8c91..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .fake_mysql_proxy import FakeMysqlProxy - -__all__ = ['FakeMysqlProxy'] diff --git a/mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py b/mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py deleted file mode 100644 index 70feed8a7d3..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +++ /dev/null @@ -1,38 +0,0 @@ -from mindsdb.api.executor.controllers import SessionController -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import CHARSET_NUMBERS -from mindsdb.api.mysql.mysql_proxy.mysql_proxy import MysqlProxy -from mindsdb.utilities.config import config - - -def empty_fn(): - pass - - -class Dummy: - pass - - -class FakeMysqlProxy(MysqlProxy): - def __init__(self): - request = Dummy() - client_address = ['', ''] - server = Dummy() - server.connection_id = 0 - server.hook_before_handle = empty_fn - - self.charset = 'utf8' - self.charset_text_type = CHARSET_NUMBERS['utf8_general_ci'] - self.client_capabilities = None - - self.request = request - self.client_address = client_address - self.server = server - self.connection_id = None - - self.session = SessionController() - self.session.database = config.get('default_project') - - def is_cloud_connection(self): - return { - 'is_cloud': False - } diff --git a/mindsdb/api/mysql/mysql_proxy/classes/server_capabilities.py b/mindsdb/api/mysql/mysql_proxy/classes/server_capabilities.py deleted file mode 100644 index a0c708e0fb1..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/classes/server_capabilities.py +++ /dev/null @@ -1,22 +0,0 @@ -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import DEFAULT_CAPABILITIES - - -class ServerCapabilities(): - def __init__(self, capabilities): - self._capabilities = capabilities - - def has(self, cap): - return cap & self._capabilities > 0 - - def set(self, cap, value=True): - if value: - self._capabilities = self._capabilities | cap - else: - self._capabilities = self._capabilities & (~cap) - - @property - def value(self): - return self._capabilities - - -server_capabilities = ServerCapabilities(DEFAULT_CAPABILITIES) diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/README.md b/mindsdb/api/mysql/mysql_proxy/data_types/README.md deleted file mode 100644 index 90e2f4239c6..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/README.md +++ /dev/null @@ -1,20 +0,0 @@ -MindsDB toolkit for faking to be a mysql server ---------------- - - -Here we have all the libraries that we need to speak mysql protocol 4.1+ - -The reference of this implementation is as described in MariaDB documentation -https://mariadb.com/kb/en/library/2-text-protocol/ - -There are two main objects here, -- the packet which is the unit that is exchanged between the server and client -- the datum, which is the subblocks that a packet is built on - -There are various types of packets, which you can find in the packets directory, such as handshake, ok, err msg, etc -Also, we try to keep a catalog of the constants that are needed for this implementation in constants - -ENJOY! - -NOTE: All of these libraries are homebrewn by mindsdb from scratch, -if yuu need to copy something from a different project, please make sure you place it in external_libs \ No newline at end of file diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/__init__.py b/mindsdb/api/mysql/mysql_proxy/data_types/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py deleted file mode 100644 index adc664b3275..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import struct - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ( - DEFAULT_CAPABILITIES, - NULL_VALUE, - ONE_BYTE_ENC, - THREE_BYTE_ENC, - TWO_BYTE_ENC, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -NULL_VALUE_INT = ord(NULL_VALUE) - - -class Datum: - __slots__ = ["value", "var_type", "var_len"] - - def __init__(self, var_type, value=None, var_len=None): - # TODO other types: float, timestamp - self.value = b"" - - if var_len is None: - idx = var_type.find("<") - var_len = var_type[idx + 1 : -1] - var_type = var_type[:idx] - self.var_type = var_type - self.var_len = var_len - - if value is not None: - self.set(value) - - def set(self, value): - self.value = value - - def setFromBuff(self, buff): - if self.var_len == "lenenc": - start = 1 - ln_enc = buff[0] - if int(ln_enc) <= ONE_BYTE_ENC[0]: - start = 0 - end = 1 - elif int(ln_enc) == TWO_BYTE_ENC[0]: - end = 3 - elif int(ln_enc) == THREE_BYTE_ENC[0]: - end = 4 - elif ln_enc: - end = 9 - - num_str = buff[start:end] - if end > 9: - logger.error("Cant decode integer greater than 8 bytes") - return buff[end - 1 :] # noqa: E203 - - for j in range(8 - (end - start)): - num_str += b"\0" - - if self.var_type == "int": - self.value = struct.unpack("i", num_str) - return buff[end:] - - if self.var_type in ["byte", "string"]: - length = struct.unpack("Q", num_str)[0] - self.value = buff[end : (length + end)] # noqa: E203 - return buff[(length + end) :] # noqa: E203 - - if self.var_len == "EOF": - length = len(buff) - self.var_len = str(length) - self.value = buff - return "" - else: - length = self.var_len - - if self.var_type == "string" and self.var_len == "NUL": - for j, x in enumerate(buff): - if int(x) == 0: - length = j + 1 - break - - length = int(length) - if self.var_type in ["byte", "string"]: - end = length - self.value = buff[:end] - else: # if its an integer - end = length - num_str = buff[:end] - if end > 8: - logger.error("cant decode integer greater than 8 bytes") - return buff[end:] - for j in range(8 - end): - num_str += b"\0" - self.value = struct.unpack("Q", num_str)[0] - if str(self.var_len) == "NUL": - self.value = self.value[:-1] - return buff[end:] - - @classmethod - def serialize_int(cls, value): - if value is None: - return NULL_VALUE - - byte_count = -(value.bit_length() // (-8)) - - if byte_count == 0: - return b"\0" - if value < NULL_VALUE_INT: - return struct.pack("B", value) - if value >= NULL_VALUE_INT and byte_count <= 2: - return TWO_BYTE_ENC + struct.pack("H", value) - if byte_count <= 3: - return THREE_BYTE_ENC + struct.pack("i", value)[:3] - if byte_count <= 8: - return THREE_BYTE_ENC + struct.pack("Q", value) - - def toStringPacket(self): - return self.get_serializer()(self.value) - - def get_serializer(self): - if self.var_type in ("string", "byte"): - if self.var_len == "lenenc": - if isinstance(self.value, bytes): - return self.serialize_bytes - return self.serialize_str - if self.var_len == "EOF": - return self.serialize_str_eof - if self.var_len == "NUL": - return lambda v: bytes(v, "utf-8") + struct.pack("b", 0) - if self.var_len == "packet": - return lambda v: v.get_packet_string() - else: - return lambda v: struct.pack(self.var_len + "s", bytes(v, "utf-8"))[: int(self.var_len)] - - if self.var_type == "int": - if self.var_len == "lenenc": - return self.serialize_int - else: - return lambda v: struct.pack("Q", v)[: int(self.var_len)] - - @classmethod - def serialize_str_eof(cls, value): - length = len(value) - var_len = length - if length == 0: - return b"" - else: - return struct.pack("{len}s".format(len=var_len), bytes(value, "utf-8"))[:length] - - # def serialize_obj(self, value): - # return self.serialize_str(str(value)) - - @classmethod - def serialize_str(cls, value): - return cls.serialize_bytes(value.encode("utf8")) - - @classmethod - def serialize_bytes(cls, value): - val_len = len(value) - - if val_len == 0: - return b"\0" - - if val_len < NULL_VALUE_INT: - return struct.pack("B", val_len) + value - - byte_count = -(val_len.bit_length() // (-8)) - if byte_count <= 2: - return TWO_BYTE_ENC + struct.pack("H", val_len) + value - if byte_count <= 3: - return THREE_BYTE_ENC + struct.pack("i", val_len)[:3] + value - if byte_count <= 8: - return THREE_BYTE_ENC + struct.pack("Q", val_len) + value - - -def test(): - import pprint - - u = Datum("int<8>", DEFAULT_CAPABILITIES >> 16) - pprint.pprint(u.toStringPacket()) - - -# only run the test if this file is called from debugger -if __name__ == "__main__": - test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py deleted file mode 100644 index f5c3c1e4b6d..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +++ /dev/null @@ -1,165 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import struct - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MAX_PACKET_SIZE -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class Packet: - def __init__( - self, - length=0, - body="", - packet_string=None, - socket=None, - session=None, - proxy=None, - **kwargs, - ): - self.mysql_socket = socket - self.session = session - self.proxy = proxy - self._kwargs = kwargs - - self.setup() - if packet_string is not None: - self.load_from_packet_string(packet_string) - else: - self.load_from_params(length, session.packet_sequence_number, body) - - def setup(self, length=0, seq=0, body=None): - ... - - def load_from_params(self, length, seq, body): - self._length = length - self._seq = seq - self._body = body - - def setBody(self, body_string): - self._body = body_string - self._length = len(body_string) - - def load_from_packet_string(self, packet_string): - len_header = struct.unpack("i", packet_string[:3] + b"\x00")[0] - count_header = int(packet_string[3]) - body = packet_string[4:] - self.load_from_params(length=len_header, seq=count_header, body=body) - - def get_packet_string(self): - body = self.body - len_header = struct.pack(" 0 else 0) - - for i in range(num_packets): - left_limit = i * MAX_PACKET_SIZE - right_limit = mod if i + 1 == num_packets else MAX_PACKET_SIZE * (i + 1) - body = body_string[left_limit:right_limit] - ret += [Packet(length=right_limit, seq=i + 1, body=body)] - - return ret - - def __str__(self): - return str({"body": self.body, "length": self.length, "seq": self.seq}) - - -def test(): - import pprint - - pprint.pprint(Packet.bodyStringToPackets("abdds")[0].get_packet_string()) - - -# only run the test if this file is called from debugger -if __name__ == "__main__": - test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/__init__.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/__init__.py deleted file mode 100644 index cc42606183d..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.err_packet import ErrPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.handshake_packet import HandshakePacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.fast_auth_fail_packet import FastAuthFail -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.password_answer import PasswordAnswer -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.handshake_response_packet import HandshakeResponsePacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.ok_packet import OkPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.switch_auth_packet import SwitchOutPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.switch_auth_response_packet import SwitchOutResponse -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.command_packet import CommandPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.column_count_packet import ColumnCountPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.column_definition_packet import ColumnDefenitionPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.resultset_row_package import ResultsetRowPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.eof_packet import EofPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.stmt_prepare_header import STMTPrepareHeaderPacket -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets.binary_resultset_row_package import BinaryResultsetRowPacket diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py deleted file mode 100644 index 4f034747e46..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +++ /dev/null @@ -1,182 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import datetime as dt -import struct - -import pandas as pd - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import TYPES - - -class BinaryResultsetRowPacket(Packet): - """ - Implementation based on: - https://mariadb.com/kb/en/resultset-row/#binary-resultset-row - https://dev.mysql.com/doc/internals/en/null-bitmap.html - """ - - def setup(self): - data = self._kwargs.get("data", {}) - columns = self._kwargs.get("columns", {}) - - self.value = [b"\x00"] - - # NOTE: according to mysql's doc offset=0 only for COM_STMT_EXECUTE, mariadb's doc does't mention that - # but in fact it looks like offset=2 everywhere - offset = 2 - nulls_bitmap = bytearray((len(columns) + offset + 7) // 8) - for i, el in enumerate(data): - if el is not None: - continue - byte_index = (i + offset) // 8 - bit_index = (i + offset) % 8 - nulls_bitmap[byte_index] |= 1 << bit_index - self.value.append(bytes(nulls_bitmap)) - - for i, col in enumerate(columns): - # NOTE at this moment all types sends as strings, and it works - val = data[i] - if val is None: - continue - - enc = None - env_val = None - col_type = col["type"] - if col_type == TYPES.MYSQL_TYPE_DOUBLE: - enc = ", but actually for json there is no differ with string<> - enc = "string" - else: - enc = "string" - - if enc == "": - raise Exception(f"Column with type {col_type} cant be encripted") - - if enc == "byte": - self.value.append(Datum("string", val, "lenenc").toStringPacket()) - elif enc == "string": - if not isinstance(val, str): - val = str(val) - self.value.append(Datum("string", val, "lenenc").toStringPacket()) - else: - if env_val is None: - env_val = struct.pack(enc, val) - self.value.append(env_val) - - def encode_time(self, val: dt.time | str) -> bytes: - """https://mariadb.com/kb/en/resultset-row/#time-binary-encoding""" - if isinstance(val, str): - try: - val = dt.datetime.strptime(val, "%H:%M:%S").time() - except ValueError: - val = dt.datetime.strptime(val, "%H:%M:%S.%f").time() - if val == dt.time(0, 0, 0): - return struct.pack(" 0: - out += struct.pack(" - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet - - -class ColumnCountPacket(Packet): - def setup(self): - count = self._kwargs.get('count', 0) - self.column_count = Datum('int', count) - - @property - def body(self): - - order = [ - 'column_count' - ] - - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - pprint.pprint( - str(ColumnCountPacket(count=1).get_packet_string()) - ) - - -if __name__ == "__main__": - ColumnCountPacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/column_definition_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/column_definition_packet.py deleted file mode 100644 index 837c1f0e440..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/column_definition_packet.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import CHARSET_NUMBERS, TYPES -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class ColumnDefenitionPacket(Packet): - ''' - Implementation based on: - https://mariadb.com/kb/en/library/resultset/ - ''' - - # https://dev.mysql.com/doc/internals/en/com-query-response.html - def setup(self): - self.catalog = Datum('string', 'def') - self.schema = Datum( - 'string', - self._kwargs.get('schema', '')) - self.table_alias = Datum( - 'string', - self._kwargs.get('table_alias', '') - ) - self.table_name = Datum( - 'string', - self._kwargs.get('table_name', '') - ) - self.column_alias = Datum( - 'string', - self._kwargs.get('column_alias', '') - ) - self.column_name = Datum( - 'string', - self._kwargs.get('column_name', '') - ) - self.fixed_length = Datum('int', 0xC) - charset = self._kwargs.get('charset', CHARSET_NUMBERS["utf8_unicode_ci"]) - self.character_set = Datum('int<2>', charset) - self.column_length = Datum('int<4>', self._kwargs.get('max_length', 0xf)) # may be this? https://books.google.ru/books?id=G2YqBS9CQ0AC&lpg=PP1&hl=ru&pg=PA428#v=onepage&q&f=false - self.column_type = Datum( - 'int<1>', - self._kwargs.get( - 'column_type', - self._kwargs.get('column_type', TYPES.MYSQL_TYPE_VARCHAR) - ) - ) - - self.flags = Datum('int<2>', self._kwargs.get('flags', 0)) - self.decimals = Datum('int<1>', 0) - - self.unused = Datum('int<2>', 0) - - @property - def body(self): - order = [ - 'catalog', - 'schema', - 'table_alias', - 'table_name', - 'column_alias', - 'column_name', - 'fixed_length', - 'character_set', - 'column_length', - 'column_type', - 'flags', - 'decimals', - 'unused' - ] - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - pprint.pprint(str(ColumnDefenitionPacket().get_packet_string())) - - -# only run the test if this file is called from debugger -if __name__ == "__main__": - ColumnDefenitionPacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/command_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/command_packet.py deleted file mode 100644 index 2d16f8c858a..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/command_packet.py +++ /dev/null @@ -1,159 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import struct -import math - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import COMMANDS, getConstName, TYPES - - -class CommandPacket(Packet): - ''' - Implementation based on description: - https://mariadb.com/kb/en/library/1-connecting-connecting/#initial-handshake-packet - ''' - - def _read_byte(self, buffer): - b = buffer[:1] - buffer = buffer[1:] - b = struct.unpack(' 0: - return - - # read null-map - null_bytes = math.floor((num_params + 7) / 8) - nulls = [] - for i in range(null_bytes): - b, buffer = self._read_byte(buffer) - for i in range(8): - nulls.append(((1 << i) & b) != 0) - - # read send-type byte - b, buffer = self._read_byte(buffer) - - types = [] - if b == 1: - # read types - for i in range(num_params): - t, buffer = self._read_byte(buffer) - s, buffer = self._read_byte(buffer) - types.append(dict( - type=t, - signed=s - )) - - datumtypes = { - TYPES.MYSQL_TYPE_VAR_STRING: 'string', - TYPES.MYSQL_TYPE_STRING: 'string', - TYPES.MYSQL_TYPE_VARCHAR: 'string', - - TYPES.MYSQL_TYPE_TINY: 'int<1>', - TYPES.MYSQL_TYPE_SHORT: 'int<2>', - TYPES.MYSQL_TYPE_LONG: 'int<4>', - TYPES.MYSQL_TYPE_LONGLONG: 'int<8>', - } - - for i in range(num_params): - if nulls[i]: - self.parameters.append(None) - continue - - datum_type = datumtypes.get(types[i]['type']) - if datum_type is not None: - x = Datum(datum_type) - buffer = x.setFromBuff(buffer) - value = x.value - if isinstance(value, bytes): - value = value.decode() - - self.parameters.append(value) - else: - # NOTE at this moment all sends as strings and it works - raise Exception(f"Unsupported type {types[i]['type']}") - - def setup(self, length=0, count_header=1, body=''): - if length == 0: - return - - # self.salt=self.session.salt - - self._length = length - self._seq = count_header - self._body = body - - self.type = Datum('int<1>') - buffer = body - buffer = self.type.setFromBuff(buffer) - - if self.type.value in (COMMANDS.COM_QUERY, COMMANDS.COM_STMT_PREPARE): - self.sql = Datum('str') - buffer = self.sql.setFromBuff(buffer) - elif self.type.value == COMMANDS.COM_STMT_EXECUTE: - # https://mariadb.com/kb/en/com_stmt_execute/ - self.stmt_id = Datum('int<4>') - buffer = self.stmt_id.setFromBuff(buffer) - self.flags = Datum('int<1>') - buffer = self.flags.setFromBuff(buffer) - self.iteration_count = Datum('int<4>') - buffer = self.iteration_count.setFromBuff(buffer) - - self.parameters = [] - - prepared_stmt = self.session.prepared_stmts[self.stmt_id.value] - - num_params = len(prepared_stmt['statement'].params) - self.read_params(buffer, num_params) - # - # if prepared_stmt['type'] == 'select': - # num_params = len(prepared_stmt['statement'].parameters) - # - # self.read_params(buffer, num_params) - # - # elif prepared_stmt['type'] in ['insert', 'delete']: - # # if prepared_stmt['type'] == 'insert': - # # prepared_stmt['statement'].sql - # # statement = parse_sql(prepared_stmt['statement'].sql) - # # num_params = 0 - # # for row in statement.values: - # # for item in row: - # # if isinstance(item, Parameter): - # # num_params = num_params + 1 - # # elif prepared_stmt['type'] == 'delete': - # # num_params = prepared_stmt['statement'].sql.count('?') - # - # num_params = len(prepared_stmt['statement'].parameters) - # self.read_params(buffer, num_params) - elif self.type.value == COMMANDS.COM_STMT_CLOSE: - self.stmt_id = Datum('int<4>') - buffer = self.stmt_id.setFromBuff(buffer) - elif self.type.value == COMMANDS.COM_STMT_FETCH: - self.stmt_id = Datum('int<4>') - buffer = self.stmt_id.setFromBuff(buffer) - self.limit = Datum('int<4>') - buffer = self.limit.setFromBuff(buffer) - elif self.type.value == COMMANDS.COM_INIT_DB: - self.database = Datum('str') - buffer = self.database.setFromBuff(buffer) - else: - self.data = Datum('str') - buffer = self.data.setFromBuff(buffer) - - def __str__(self): - return str({ - 'header': {'length': self.length, 'seq': self.seq}, - 'type': getConstName(COMMANDS, self.type.value), - 'vars': self.__dict__ - }) diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/eof_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/eof_packet.py deleted file mode 100644 index a1effe5bd50..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/eof_packet.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class EofPacket(Packet): - ''' - Implementation based on: - https://mariadb.com/kb/en/library/1-connecting-connecting/#initial-handshake-packet - ''' - - def setup(self): - status = 0 if 'status' not in self._kwargs else self._kwargs['status'] - self.eof_header = Datum('int<1>', int('0xfe', 0)) - self.warning_count = Datum('int<2>', 0) - self.server_status = Datum('int<2>', status) - - @property - def body(self): - - order = [ - 'eof_header', - 'warning_count', - 'server_status' - ] - - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - pprint.pprint(str(EofPacket().get_packet_string())) - - -# only run the test if this file is called from debugger -if __name__ == "__main__": - EofPacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/err_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/err_packet.py deleted file mode 100644 index 84b83f99d90..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/err_packet.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class ErrPacket(Packet): - ''' - Implementation based on: - https://mariadb.com/kb/en/library/1-connecting-connecting/#initial-handshake-packet - ''' - - def setup(self): - err_code = 0 - if 'err_code' in self._kwargs: - err_code = self._kwargs['err_code'] - - msg = 'ERROR' - if 'msg' in self._kwargs: - msg = self._kwargs['msg'] - - self.err_header = Datum('int<1>', 255) - self.err_code = Datum('int<2>', err_code) - self.msg = Datum('string', msg) - - @property - def body(self): - - order = [ - 'err_header', - 'err_code', - 'msg' - ] - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - pprint.pprint(str(ErrPacket().get_packet_string())) - - -# only run the test if this file is called from debugger -if __name__ == "__main__": - ErrPacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/fast_auth_fail_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/fast_auth_fail_packet.py deleted file mode 100644 index 3973f0e0b75..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/fast_auth_fail_packet.py +++ /dev/null @@ -1,20 +0,0 @@ -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class FastAuthFail(Packet): - def setup(self): - self.cont = Datum('int<1>', 4) # 0x04 - - @property - def body(self): - - order = [ - 'cont' - ] - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/handshake_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/handshake_packet.py deleted file mode 100644 index 286f9d76df4..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/handshake_packet.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ( - DEFAULT_AUTH_METHOD, - DEFAULT_COALLITION_ID, - FILLER_FOR_WIRESHARK_DUMP, - SERVER_STATUS_AUTOCOMMIT -) -from mindsdb.api.mysql.mysql_proxy.classes.server_capabilities import server_capabilities -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class HandshakePacket(Packet): - ''' - Implementation based on: - https://mariadb.com/kb/en/library/1-connecting-connecting/#initial-handshake-packet - ''' - - def setup(self): - capabilities = server_capabilities.value - self.protocol_version = Datum('int<1>', 10) - self.server_version = Datum('string', '5.7.1-MindsDB-1.0') - self.connection_id = Datum('int<4>', self.proxy.connection_id) - self.scramble_1st_part = Datum('string<8>', self.proxy.salt[:8]) - self.reserved_byte = Datum('string<1>', '') - self.server_capabilities_1st_part = Datum('int<2>', capabilities) - self.server_default_collation = Datum('int<1>', DEFAULT_COALLITION_ID) - self.status_flags = Datum('int<2>', SERVER_STATUS_AUTOCOMMIT) - self.server_capabilities_2nd_part = Datum('int<2>', capabilities >> 16) - self.wireshark_filler = Datum('int<1>', FILLER_FOR_WIRESHARK_DUMP) - # self.wireshark_filler = Datum('int<1>', len(self.proxy.salt)) - self.reserved_filler1 = Datum('string<6>', '') - self.reserved_filler2 = Datum('string<4>', '') - self.scramble_2nd_part = Datum('string', self.proxy.salt[8:]) - self.null_close = Datum('string', DEFAULT_AUTH_METHOD) - - @property - def body(self): - order = [ - 'protocol_version', - 'server_version', - 'connection_id', - 'scramble_1st_part', - 'reserved_byte', - 'server_capabilities_1st_part', - 'server_default_collation', - 'status_flags', - 'server_capabilities_2nd_part', - 'wireshark_filler', - 'reserved_filler1', - 'reserved_filler2', - 'scramble_2nd_part', - 'null_close' - ] - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - pprint.pprint(str(HandshakePacket().get_packet_string())) - - -# only run the test if this file is called from debugger -if __name__ == "__main__": - HandshakePacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/handshake_response_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/handshake_response_packet.py deleted file mode 100644 index 751886024bd..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/handshake_response_packet.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -# https://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::HandshakeResponse - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum -from mindsdb.api.mysql.mysql_proxy.classes.client_capabilities import ClentCapabilities -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import CAPABILITIES -from mindsdb.api.mysql.mysql_proxy.classes.server_capabilities import server_capabilities - - -class HandshakeResponsePacket(Packet): - ''' - Implementation based on description: - https://mariadb.com/kb/en/library/1-connecting-connecting/#initial-handshake-packet - ''' - - def setup(self, length=0, count_header=1, body=''): - length = len(body) - - if length == 0: - return - - self.salt = self.proxy.salt - - self._length = length - self._seq = count_header - self._body = body - - self.capabilities = Datum('int<4>') - self.max_packet_size = Datum('int<4>') - self.reserved = Datum('string<23>') - self.username = Datum('string') - - self.enc_password = Datum('string') - self.database = Datum('string') - - self.charset = Datum('int<1>') - - self.client_auth_plugin = Datum('string') - - buffer = body - - if len(body) == 32 and body[9:] == (b'\x00' * 23): - self.type = 'SSLRequest' - buffer = self.capabilities.setFromBuff(buffer) - buffer = self.max_packet_size.setFromBuff(buffer) - buffer = self.charset.setFromBuff(buffer) - else: - self.type = 'HandshakeResponse' - buffer = self.capabilities.setFromBuff(buffer) - capabilities = ClentCapabilities(self.capabilities.value) - buffer = self.max_packet_size.setFromBuff(buffer) - buffer = self.charset.setFromBuff(buffer) - buffer = self.reserved.setFromBuff(buffer) - buffer = self.username.setFromBuff(buffer) - - if server_capabilities.has(CAPABILITIES.CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA) \ - and capabilities.PLUGIN_AUTH_LENENC_CLIENT_DATA: - self.enc_password = Datum('string') - buffer = self.enc_password.setFromBuff(buffer) - elif server_capabilities.has(CAPABILITIES.CLIENT_SECURE_CONNECTION) \ - and capabilities.SECURE_CONNECTION: - self.auth_resp_len = Datum('int<1>') - buffer = self.auth_resp_len.setFromBuff(buffer) - self.enc_password = Datum(f'string<{self.auth_resp_len.value}>') - buffer = self.enc_password.setFromBuff(buffer) - else: - pass_byte = Datum('int<1>') - buffer = pass_byte.setFromBuff(buffer) - - if capabilities.CONNECT_WITH_DB: - buffer = self.database.setFromBuff(buffer) - if capabilities.PLUGIN_AUTH: - buffer = self.client_auth_plugin.setFromBuff(buffer) - - # at the end is CLIENT_CONNECT_ATTRS, but we dont use it and dont parse - - self.session.username = self.username.value - - def __str__(self): - return str({ - 'header': {'length': self.length, 'seq': self.seq}, - 'username': self.username.value, - 'password': self.enc_password.value, - 'database': self.database.value - }) diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/ok_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/ok_packet.py deleted file mode 100644 index be2d91cbc89..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/ok_packet.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import struct - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SESSION_TRACK, SERVER_STATUS - - -class OkPacket(Packet): - ''' - Implementation based on: - https://mariadb.com/kb/en/library/1-connecting-connecting/#initial-handshake-packet - ''' - - ''' - int<1> 0x00 : OK_Packet header or (0xFE if CLIENT_DEPRECATE_EOF is set) - int affected rows - int last insert id - int<2> server status - int<2> warning count - if session_tracking_supported (see CLIENT_SESSION_TRACK) - string info - if (status flags & SERVER_SESSION_STATE_CHANGED) - string session state info - string value of variable - else - string info - ''' - - def setup(self): - eof = self._kwargs.get('eof', False) - self.ok_header = Datum('int<1>', 0xFE if eof is True else 0) - self.affected_rows = Datum('int', self._kwargs.get('affected_rows') or 0) - self.last_insert_id = Datum('int', 0) - status = self._kwargs.get('status', 0x0002) - self.server_status = Datum('int<2>', status) - # Datum('int<2>', 0) - self.warning_count = Datum('int<2>', 0) - - self.state_track = None - state_track = self._kwargs.get('state_track') # [[key: value]] - if state_track is not None: - accum = b'' - status = status | SERVER_STATUS.SERVER_SESSION_STATE_CHANGED - self.server_status = Datum('int<2>', status) - self.state_track = b'' - self.state_track += Datum('string', '').toStringPacket() # 'info' - human readable status information - for el in state_track: - # NOTE at this moment just system variables - name, value = el - part = Datum('string', name).toStringPacket() - part += Datum('string', value).toStringPacket() - accum += struct.pack('i', SESSION_TRACK.SESSION_TRACK_SYSTEM_VARIABLES)[:1] + struct.pack('i', len(part))[:1] + part - # self.state_track - # self.state_track.append(Datum('string', '') - accum = struct.pack('i', len(accum))[:1] + accum - self.state_track += accum - - self.info = Datum('string') - - @property - def body(self): - - order = [ - 'ok_header', - 'affected_rows', - 'last_insert_id', - 'server_status', - 'warning_count', - 'state_track', - 'info', - ] - string = b'' - for key in order: - item = getattr(self, key) - section_pack = b'' - if item is None: - continue - elif isinstance(item, bytes): - section_pack = item - else: - section_pack = getattr(self, key).toStringPacket() - string += section_pack - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - pprint.pprint(str(OkPacket(state_track=[['character_set_client', 'utf8'], ['character_set_connection', 'utf8'], ['character_set_results', 'utf8']]).get_packet_string())) - - -# only run the test if this file is called from debugger -if __name__ == "__main__": - OkPacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/password_answer.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/password_answer.py deleted file mode 100644 index e6486f06225..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/password_answer.py +++ /dev/null @@ -1,13 +0,0 @@ -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class PasswordAnswer(Packet): - def setup(self, length=0, count_header=1, body=''): - length = len(body) - - if length == 0: - return - self.password = Datum('string') - buffer = body - buffer = self.password.setFromBuff(buffer) diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py deleted file mode 100644 index e32bcc7fd59..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import NULL_VALUE - - -class ResultsetRowPacket(Packet): - """ - Implementation based on: - https://dev.mysql.com/doc/internals/en/com-query-response.html#packet-ProtocolText::ResultsetRow - https://mariadb.com/kb/en/resultset-row/ - """ - - def setup(self): - data = self._kwargs.get("data", {}) - self.value = [] - for val in data: - if val is None: - self.value.append(NULL_VALUE) - elif isinstance(val, bytes): - self.value.append(Datum("byte", val)) - else: - self.value.append(Datum("string", str(val))) - - @property - def body(self): - string = b"" - for x in self.value: - if x is NULL_VALUE: - string += x - else: - string += x.toStringPacket() - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - - pprint.pprint(str(ResultsetRowPacket().get_packet_string())) - - -if __name__ == "__main__": - ResultsetRowPacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/stmt_prepare_header.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/stmt_prepare_header.py deleted file mode 100644 index 28371b401f8..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/stmt_prepare_header.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class STMTPrepareHeaderPacket(Packet): - ''' - Implementation based on description: - https://dev.mysql.com/doc/internals/en/com-stmt-prepare-response.html#packet-COM_STMT_PREPARE_OK - ''' - - def setup(self): - self.status = Datum('int<1>', 0) - self.stmt_id = Datum('int<4>', self._kwargs.get('stmt_id', 1)) - self.num_columns = Datum('int<2>', self._kwargs.get('num_columns', 0)) - self.num_params = Datum('int<2>', self._kwargs.get('num_params', 0)) - self.filler = Datum('int<1>', 0) - self.warning_count = Datum('int<2>', 0) - - @property - def body(self): - order = [ - 'status', - 'stmt_id', - 'num_columns', - 'num_params', - 'filler', - 'warning_count' - ] - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/switch_auth_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/switch_auth_packet.py deleted file mode 100644 index deac354bcad..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/switch_auth_packet.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class SwitchOutPacket(Packet): - ''' - Implementation based on: - https://mariadb.com/kb/en/library/1-connecting-connecting/#initial-handshake-packet - ''' - - def setup(self): - status = 0 if 'status' not in self._kwargs else self._kwargs['status'] # noqa - seed = self._kwargs['seed'] - method = self._kwargs['method'] - self.eof_header = Datum('int<1>', int('0xfe', 0)) - self.authentication_plugin_name = Datum('string', method) - self.seed = Datum('string', seed) - - @property - def body(self): - - order = [ - 'eof_header', - 'authentication_plugin_name', - 'seed' - ] - - string = b'' - for key in order: - string += getattr(self, key).toStringPacket() - - self.setBody(string) - return self._body - - @staticmethod - def test(): - import pprint - pprint.pprint(str(SwitchOutPacket().get_packet_string())) - - -if __name__ == "__main__": - SwitchOutPacket.test() diff --git a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/switch_auth_response_packet.py b/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/switch_auth_response_packet.py deleted file mode 100644 index 89561bdd4ca..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/switch_auth_response_packet.py +++ /dev/null @@ -1,16 +0,0 @@ -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - - -class SwitchOutResponse(Packet): - def setup(self, length=0, count_header=1, body=''): - length = len(body) - - if length == 0: - self.password = b'' - return - - self.enc_password = Datum('string') # 0x04 - buffer = body - buffer = self.enc_password.setFromBuff(buffer) - self.password = self.enc_password.value diff --git a/mindsdb/api/mysql/mysql_proxy/executor/__init__.py b/mindsdb/api/mysql/mysql_proxy/executor/__init__.py deleted file mode 100644 index 8c34740bda0..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/executor/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .mysql_executor import Executor - diff --git a/mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py b/mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py deleted file mode 100644 index ec9c122f3d6..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +++ /dev/null @@ -1,108 +0,0 @@ -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.exceptions import ParsingException -from mindsdb_sql_parser.ast.base import ASTNode -import mindsdb.utilities.profiler as profiler -from mindsdb.api.executor.sql_query import SQLQuery -from mindsdb.utilities.types.column import Column -from mindsdb.api.executor.planner import utils as planner_utils -from mindsdb.api.executor.data_types.answer import ExecuteAnswer -from mindsdb.api.executor.command_executor import ExecuteCommands -from mindsdb.api.executor.exceptions import SqlSyntaxError -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class Executor: - def __init__(self, session, sqlserver): - self.session = session - self.sqlserver = sqlserver - - self.query: ASTNode = None - - self.columns: list[Column] = [] - self.params: list[Column] = [] - self.data = None - self.server_status = None - self.is_executed = False - self.error_message = None - self.error_code = None - self.executor_answer: ExecuteAnswer = None - - self.sql = "" - self.sql_lower = "" - - context = {"connection_id": self.sqlserver.connection_id} - self.command_executor = ExecuteCommands(self.session, context) - - def change_default_db(self, new_db): - self.command_executor.change_default_db(new_db) - - def stmt_prepare(self, sql): - self.parse(sql) - - # if not params - params = planner_utils.get_query_params(self.query) - if len(params) == 0: - # execute immediately - self.do_execute() - - else: - # plan query - # TODO less complex. - # planner is inside SQLQuery now. - - sqlquery = SQLQuery(self.query, session=self.session, execute=False) - - sqlquery.prepare_query() - - self.params = [Column(name=p.value, alias=p.value, type=MYSQL_DATA_TYPE.TEXT) for p in params] - - # TODO: - # select * from mindsdb.models doesn't invoke prepare_steps and columns_list is empty - self.columns = sqlquery.columns_list - - def stmt_execute(self, param_values): - if self.is_executed: - return - - # fill params - self.query = planner_utils.fill_query_params(self.query, param_values) - - # execute query - self.do_execute() - - @profiler.profile() - def query_execute(self, sql): - self.parse(sql) - self.do_execute() - - @profiler.profile() - def parse(self, sql): - self.sql = sql - sql_lower = sql.lower() - self.sql_lower = sql_lower.replace("`", "") - - try: - self.query = parse_sql(sql) - except ParsingException as mdb_error: - # not all statements are parsed by parse_sql - logger.warning("Failed to parse SQL query") - logger.debug(f"Query that cannot be parsed: {sql}") - - raise SqlSyntaxError(f"The SQL statement cannot be parsed - {sql}: {mdb_error}") from mdb_error - except Exception: - logger.exception(f"Unexpected error while parsing SQL query: {sql}") - raise - - @profiler.profile() - def do_execute(self): - # it can be already run at prepare state - if self.is_executed: - return - - executor_answer: ExecuteAnswer = self.command_executor.execute_command(self.query) - self.executor_answer = executor_answer - - self.is_executed = True diff --git a/mindsdb/api/mysql/mysql_proxy/external_libs/README.md b/mindsdb/api/mysql/mysql_proxy/external_libs/README.md deleted file mode 100644 index 6187647620c..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/external_libs/README.md +++ /dev/null @@ -1,4 +0,0 @@ -Here we put libraries or helpers that are borrowed from other projects - -- Try only to use MIT code -- If the code you are using is not MIT licensed, email management@mindsdb.com before proceeding \ No newline at end of file diff --git a/mindsdb/api/mysql/mysql_proxy/external_libs/__init__.py b/mindsdb/api/mysql/mysql_proxy/external_libs/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/mysql/mysql_proxy/external_libs/mysql_scramble.py b/mindsdb/api/mysql/mysql_proxy/external_libs/mysql_scramble.py deleted file mode 100644 index fa2fb47445a..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/external_libs/mysql_scramble.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -MIT License -============ -Copyright (c) 2010, 2013 PyMySQL contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import hashlib -from functools import partial -import struct -import io -import sys - -PYPY = hasattr(sys, 'pypy_translation_info') -JYTHON = sys.platform.startswith('java') -IRONPYTHON = sys.platform == 'cli' -CPYTHON = not PYPY and not JYTHON and not IRONPYTHON - -range_type = range -text_type = str -long_type = int -str_type = str -unichr = chr - -sha_new = partial(hashlib.new, 'sha1') - - -def scramble(password, message): - SCRAMBLE_LENGTH = 20 - stage1 = sha_new(password.encode('utf-8')).digest() - stage2 = sha_new(stage1).digest() - s = sha_new() - s.update(message[:SCRAMBLE_LENGTH].encode('utf-8')) - s.update(stage2) - result = s.digest() - return _my_crypt(result, stage1) - - -def _my_crypt(message1, message2): - length = len(message1) - result = b'' - for i in range_type(length): - x = (struct.unpack('B', message1[i:i + 1])[0] - ^ struct.unpack('B', message2[i:i + 1])[0]) - result += struct.pack('B', x) - return result - - -# old_passwords support ported from libmysql/password.c -SCRAMBLE_LENGTH_323 = 8 - - -class RandStruct_323(object): - def __init__(self, seed1, seed2): - self.max_value = 0x3FFFFFFF - self.seed1 = seed1 % self.max_value - self.seed2 = seed2 % self.max_value - - def my_rnd(self): - self.seed1 = (self.seed1 * 3 + self.seed2) % self.max_value - self.seed2 = (self.seed1 + self.seed2 + 33) % self.max_value - return float(self.seed1) / float(self.max_value) - - -def scramble_323(password, message): - hash_pass = _hash_password_323(password) - hash_message = _hash_password_323(message[:SCRAMBLE_LENGTH_323]) - hash_pass_n = struct.unpack(">LL", hash_pass) - hash_message_n = struct.unpack(">LL", hash_message) - - rand_st = RandStruct_323(hash_pass_n[0] ^ hash_message_n[0], - hash_pass_n[1] ^ hash_message_n[1]) - outbuf = io.BytesIO() - for _ in range_type(min(SCRAMBLE_LENGTH_323, len(message))): - outbuf.write(int2byte(int(rand_st.my_rnd() * 31) + 64)) - extra = int2byte(int(rand_st.my_rnd() * 31)) - out = outbuf.getvalue() - outbuf = io.BytesIO() - for c in out: - outbuf.write(int2byte(byte2int(c) ^ byte2int(extra))) - return outbuf.getvalue() - - -def _hash_password_323(password): - nr = 1345345333 - add = 7 - nr2 = 0x12345671 - - # x in py3 is numbers, p27 is chars - for c in [byte2int(x) for x in password if x not in (' ', '\t', 32, 9)]: - nr ^= (((nr & 63) + add) * c) + (nr << 8) & 0xFFFFFFFF - nr2 = (nr2 + ((nr2 << 8) ^ nr)) & 0xFFFFFFFF - add = (add + c) & 0xFFFFFFFF - - r1 = nr & ((1 << 31) - 1) # kill sign bits - r2 = nr2 & ((1 << 31) - 1) - return struct.pack(">LL", r1, r2) - - -def byte2int(b): - if isinstance(b, int): - return b - else: - return struct.unpack("!B", b)[0] - - -def int2byte(i): - return struct.pack("!B", i) - - -def join_bytes(bs): - if len(bs) == 0: - return "" - else: - rv = bs[0] - for b in bs[1:]: - rv += b - return rv diff --git a/mindsdb/api/mysql/mysql_proxy/libs/__init__.py b/mindsdb/api/mysql/mysql_proxy/libs/__init__.py deleted file mode 100644 index 8b137891791..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/libs/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mindsdb/api/mysql/mysql_proxy/libs/constants/__init__.py b/mindsdb/api/mysql/mysql_proxy/libs/constants/__init__.py deleted file mode 100644 index 8b137891791..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/libs/constants/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py b/mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py deleted file mode 100644 index 0e8bd676801..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +++ /dev/null @@ -1,1242 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import enum -from dataclasses import dataclass, field - -# CAPABILITIES -# As defined in : https://dev.mysql.com/doc/dev/mysql-server/8.0.0/group__group__cs__capabilities__flags.html - -MAX_PACKET_SIZE = 16777215 - - -# capabilities description can be found on page 67 https://books.google.ru/books?id=5TjrxYHRAwEC&printsec=frontcover#v=onepage&q&f=false -# https://mariadb.com/kb/en/connection/ -# https://dev.mysql.com/doc/internals/en/capability-flags.html -class CAPABILITIES(object): - __slots__ = () - CLIENT_LONG_PASSWORD = 1 - CLIENT_FOUND_ROWS = 2 - CLIENT_LONG_FLAG = 4 - CLIENT_CONNECT_WITH_DB = 8 - CLIENT_NO_SCHEMA = 16 - CLIENT_COMPRESS = 32 - CLIENT_ODBC = 64 - CLIENT_LOCAL_FILES = 128 - CLIENT_IGNORE_SPACE = 256 - CLIENT_PROTOCOL_41 = 512 - CLIENT_INTERACTIVE = 1024 - CLIENT_SSL = 2048 - CLIENT_IGNORE_SIGPIPE = 4096 - CLIENT_TRANSACTIONS = 8192 - CLIENT_RESERVED = 16384 - CLIENT_RESERVED2 = 32768 - CLIENT_MULTI_STATEMENTS = 1 << 16 - CLIENT_MULTI_RESULTS = 1 << 17 - CLIENT_PS_MULTI_RESULTS = 1 << 18 - CLIENT_PLUGIN_AUTH = 1 << 19 - CLIENT_CONNECT_ATTRS = 1 << 20 - CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA = 1 << 21 - CLIENT_CAN_HANDLE_EXPIRED_PASSWORDS = 1 << 22 - CLIENT_SESSION_TRACK = 1 << 23 - CLIENT_DEPRECATE_EOF = 1 << 24 - CLIENT_SSL_VERIFY_SERVER_CERT = 1 << 30 - CLIENT_REMEMBER_OPTIONS = 1 << 31 - CLIENT_SECURE_CONNECTION = 0x00008000 - - -CAPABILITIES = CAPABILITIES() - - -# SERVER STATUS -class SERVER_STATUS(object): - __slots__ = () - SERVER_STATUS_IN_TRANS = 1 # A transaction is currently active - SERVER_STATUS_AUTOCOMMIT = 2 # Autocommit mode is set - SERVER_MORE_RESULTS_EXISTS = 8 # more results exists (more packet follow) - SERVER_QUERY_NO_GOOD_INDEX_USED = 16 - SERVER_QUERY_NO_INDEX_USED = 32 - SERVER_STATUS_CURSOR_EXISTS = ( - 64 # when using COM_STMT_FETCH, indicate that current cursor still has result (deprecated) - ) - SERVER_STATUS_LAST_ROW_SENT = ( - 128 # when using COM_STMT_FETCH, indicate that current cursor has finished to send results (deprecated) - ) - SERVER_STATUS_DB_DROPPED = 1 << 8 # database has been dropped - SERVER_STATUS_NO_BACKSLASH_ESCAPES = 1 << 9 # current escape mode is "no backslash escape" - SERVER_STATUS_METADATA_CHANGED = ( - 1 << 10 - ) # A DDL change did have an impact on an existing PREPARE (an automatic reprepare has been executed) - SERVER_QUERY_WAS_SLOW = 1 << 11 - SERVER_PS_OUT_PARAMs = 1 << 12 # this resultset contain stored procedure output parameter - SERVER_STATUS_IN_TRANS_READONLY = 1 << 13 # current transaction is a read-only transaction - SERVER_SESSION_STATE_CHANGED = 1 << 14 # session state change. see Session change type for more information - - -SERVER_STATUS = SERVER_STATUS() - - -# COMMANDS -class COMMANDS(object): - __slots__ = () - COM_CHANGE_USER = int("0x11", 0) - COM_DEBUG = int("0x0D", 0) - COM_INIT_DB = int("0x02", 0) - COM_PING = int("0x0e", 0) - COM_PROCESS_KILL = int("0xC", 0) - COM_QUERY = int("0x03", 0) - COM_QUIT = int("0x01", 0) - COM_RESET_CONNECTION = int("0x1f", 0) - COM_SET_OPTION = int("0x1b", 0) - COM_SHUTDOWN = int("0x0a", 0) - COM_SLEEP = int("0x00", 0) - COM_STATISTICS = int("0x09", 0) - COM_STMT_PREPARE = int("0x16", 0) - COM_STMT_EXECUTE = int("0x17", 0) - COM_STMT_FETCH = int("0x1c", 0) - COM_STMT_RESET = int("0x1a", 0) - COM_STMT_CLOSE = int("0x19", 0) - COM_FIELD_LIST = int("0x04", 0) # deprecated - - -COMMANDS = COMMANDS() - - -# FIELD TYPES -# https://dev.mysql.com/doc/dev/mysql-server/latest/field__types_8h_source.html -# https://mariadb.com/kb/en/result-set-packets/ -class TYPES(object): - __slots__ = () - MYSQL_TYPE_DECIMAL = 0 - MYSQL_TYPE_TINY = 1 - MYSQL_TYPE_SHORT = 2 - MYSQL_TYPE_LONG = 3 - MYSQL_TYPE_FLOAT = 4 - MYSQL_TYPE_DOUBLE = 5 - MYSQL_TYPE_NULL = 6 - MYSQL_TYPE_TIMESTAMP = 7 - MYSQL_TYPE_LONGLONG = 8 - MYSQL_TYPE_INT24 = 9 - MYSQL_TYPE_DATE = 10 - MYSQL_TYPE_TIME = 11 - MYSQL_TYPE_DATETIME = 12 - MYSQL_TYPE_YEAR = 13 - MYSQL_TYPE_NEWDATE = 14 - MYSQL_TYPE_VARCHAR = 15 - MYSQL_TYPE_BIT = 16 - MYSQL_TYPE_TIMESTAMP2 = 17 - MYSQL_TYPE_DATETIME2 = 18 - MYSQL_TYPE_TIME2 = 19 - MYSQL_TYPE_TYPED_ARRAY = 20 - MYSQL_TYPE_VECTOR = 242 - MYSQL_TYPE_INVALID = 243 - MYSQL_TYPE_BOOL = 244 - MYSQL_TYPE_JSON = 245 - MYSQL_TYPE_NEWDECIMAL = 246 - MYSQL_TYPE_ENUM = 247 - MYSQL_TYPE_SET = 248 - MYSQL_TYPE_TINY_BLOB = 249 - MYSQL_TYPE_MEDIUM_BLOB = 250 - MYSQL_TYPE_LONG_BLOB = 251 - MYSQL_TYPE_BLOB = 252 - MYSQL_TYPE_VAR_STRING = 253 - MYSQL_TYPE_STRING = 254 - MYSQL_TYPE_GEOMETRY = 255 - - -C_TYPES = TYPES() -TYPES = TYPES() - - -class MYSQL_DATA_TYPE(enum.Enum): - TINYINT = "TINYINT" - SMALLINT = "SMALLINT" - MEDIUMINT = "MEDIUMINT" - INT = "INT" - BIGINT = "BIGINT" - FLOAT = "FLOAT" - DOUBLE = "DOUBLE" - DECIMAL = "DECIMAL" - YEAR = "YEAR" - TIME = "TIME" - DATE = "DATE" - DATETIME = "DATETIME" - TIMESTAMP = "TIMESTAMP" - CHAR = "CHAR" - BINARY = "BINARY" - VARCHAR = "VARCHAR" - VARBINARY = "VARBINARY" - TINYBLOB = "TINYBLOB" - TINYTEXT = "TINYTEXT" - BLOB = "BLOB" - TEXT = "TEXT" - MEDIUMBLOB = "MEDIUMBLOB" - MEDIUMTEXT = "MEDIUMTEXT" - LONGBLOB = "LONGBLOB" - LONGTEXT = "LONGTEXT" - BIT = "BIT" - BOOL = "BOOL" - BOOLEAN = "BOOLEAN" - JSON = "JSON" - VECTOR = "VECTOR" - - -# Default values for attributes of MySQL data types as they appear in information_schema.columns -# These values match the MySQL v8.0.37 defaults and are used to properly represent column metadata -MYSQL_DATA_TYPE_COLUMNS_DEFAULT = { - MYSQL_DATA_TYPE.TINYINT: {"NUMERIC_PRECISION": 3, "NUMERIC_SCALE": 0}, - MYSQL_DATA_TYPE.SMALLINT: {"NUMERIC_PRECISION": 5, "NUMERIC_SCALE": 0}, - MYSQL_DATA_TYPE.MEDIUMINT: {"NUMERIC_PRECISION": 7, "NUMERIC_SCALE": 0}, - MYSQL_DATA_TYPE.INT: {"NUMERIC_PRECISION": 10, "NUMERIC_SCALE": 0}, - MYSQL_DATA_TYPE.BIGINT: {"NUMERIC_PRECISION": 19, "NUMERIC_SCALE": 0}, - MYSQL_DATA_TYPE.FLOAT: {"NUMERIC_PRECISION": 12}, - MYSQL_DATA_TYPE.DOUBLE: {"NUMERIC_PRECISION": 22}, - MYSQL_DATA_TYPE.DECIMAL: {"NUMERIC_PRECISION": 10, "NUMERIC_SCALE": 0, "COLUMN_TYPE": "decimal(10,0)"}, - MYSQL_DATA_TYPE.YEAR: { - # every column is null - }, - MYSQL_DATA_TYPE.TIME: {"DATETIME_PRECISION": 0}, - MYSQL_DATA_TYPE.DATE: { - # every column is null - }, - MYSQL_DATA_TYPE.DATETIME: {"DATETIME_PRECISION": 0}, - MYSQL_DATA_TYPE.TIMESTAMP: {"DATETIME_PRECISION": 0}, - MYSQL_DATA_TYPE.CHAR: { - "CHARACTER_MAXIMUM_LENGTH": 1, - "CHARACTER_OCTET_LENGTH": 4, - "CHARACTER_SET_NAME": "utf8", - "COLLATION_NAME": "utf8_bin", - "COLUMN_TYPE": "char(1)", - }, - MYSQL_DATA_TYPE.BINARY: {"CHARACTER_MAXIMUM_LENGTH": 1, "CHARACTER_OCTET_LENGTH": 1, "COLUMN_TYPE": "binary(1)"}, - MYSQL_DATA_TYPE.VARCHAR: { - "CHARACTER_MAXIMUM_LENGTH": 1024, # NOTE mandatory for field creation - "CHARACTER_OCTET_LENGTH": 4096, # NOTE mandatory for field creation - "CHARACTER_SET_NAME": "utf8", - "COLLATION_NAME": "utf8_bin", - "COLUMN_TYPE": "varchar(1024)", - }, - MYSQL_DATA_TYPE.VARBINARY: { - "CHARACTER_MAXIMUM_LENGTH": 1024, # NOTE mandatory for field creation - "CHARACTER_OCTET_LENGTH": 1024, # NOTE mandatory for field creation - "COLUMN_TYPE": "varbinary(1024)", - }, - MYSQL_DATA_TYPE.TINYBLOB: {"CHARACTER_MAXIMUM_LENGTH": 255, "CHARACTER_OCTET_LENGTH": 255}, - MYSQL_DATA_TYPE.TINYTEXT: { - "CHARACTER_MAXIMUM_LENGTH": 255, - "CHARACTER_OCTET_LENGTH": 255, - "CHARACTER_SET_NAME": "utf8", - "COLLATION_NAME": "utf8_bin", - }, - MYSQL_DATA_TYPE.BLOB: {"CHARACTER_MAXIMUM_LENGTH": 65535, "CHARACTER_OCTET_LENGTH": 65535}, - MYSQL_DATA_TYPE.TEXT: { - "CHARACTER_MAXIMUM_LENGTH": 65535, - "CHARACTER_OCTET_LENGTH": 65535, - "CHARACTER_SET_NAME": "utf8", - "COLLATION_NAME": "utf8_bin", - }, - MYSQL_DATA_TYPE.MEDIUMBLOB: {"CHARACTER_MAXIMUM_LENGTH": 16777215, "CHARACTER_OCTET_LENGTH": 16777215}, - MYSQL_DATA_TYPE.MEDIUMTEXT: { - "CHARACTER_MAXIMUM_LENGTH": 16777215, - "CHARACTER_OCTET_LENGTH": 16777215, - "CHARACTER_SET_NAME": "utf8", - "COLLATION_NAME": "utf8_bin", - }, - MYSQL_DATA_TYPE.LONGBLOB: { - "CHARACTER_MAXIMUM_LENGTH": 4294967295, - "CHARACTER_OCTET_LENGTH": 4294967295, - }, - MYSQL_DATA_TYPE.LONGTEXT: { - "CHARACTER_MAXIMUM_LENGTH": 4294967295, - "CHARACTER_OCTET_LENGTH": 4294967295, - "CHARACTER_SET_NAME": "utf8", - "COLLATION_NAME": "utf8_bin", - }, - MYSQL_DATA_TYPE.BIT: { - "NUMERIC_PRECISION": 1, - "COLUMN_TYPE": "bit(1)", - # 'NUMERIC_SCALE': null - }, - MYSQL_DATA_TYPE.BOOL: { - "DATA_TYPE": "tinyint", - "NUMERIC_PRECISION": 3, - "NUMERIC_SCALE": 0, - "COLUMN_TYPE": "tinyint(1)", - }, - MYSQL_DATA_TYPE.BOOLEAN: { - "DATA_TYPE": "tinyint", - "NUMERIC_PRECISION": 3, - "NUMERIC_SCALE": 0, - "COLUMN_TYPE": "tinyint(1)", - }, -} - - -class FIELD_FLAG(object): - __slots__ = () - NOT_NULL = 1 # field cannot be null - PRIMARY_KEY = 2 # field is a primary key - UNIQUE_KEY = 4 # field is unique - MULTIPLE_KEY = 8 # field is in a multiple key - BLOB = 16 # is this field a Blob - UNSIGNED = 32 # is this field unsigned - ZEROFILL_FLAG = 64 # is this field a zerofill - BINARY_COLLATION = 128 # whether this field has a binary collation - ENUM = 256 # Field is an enumeration - AUTO_INCREMENT = 512 # field auto-increment - TIMESTAMP = 1024 # field is a timestamp value - SET = 2048 # field is a SET - NO_DEFAULT_VALUE_FLAG = 4096 # field doesn't have default value - ON_UPDATE_NOW_FLAG = 8192 # field is set to NOW on UPDATE - NUM_FLAG = 32768 # field is num - - -FIELD_FLAG = FIELD_FLAG() - - -@dataclass(frozen=True) -class CTypeProperties: - """Properties that describe int-representation of mysql column. - - Attributes: - code (int): Code of the mysql type. - size (int | None): Size of the column. If not specified, then size is variable (text/blob types). - flags (list[int]): Flags of the mysql type. - """ - - code: int - size: int | None = None - flags: list[int] = field(default_factory=list) - - -# Map between data types and C types -# Fields size and flags been taken from tcp dump of mysql-server response -# https://dev.mysql.com/doc/c-api/8.0/en/c-api-prepared-statement-type-codes.html -DATA_C_TYPE_MAP = { - MYSQL_DATA_TYPE.TINYINT: CTypeProperties(C_TYPES.MYSQL_TYPE_TINY, 4), - MYSQL_DATA_TYPE.SMALLINT: CTypeProperties(C_TYPES.MYSQL_TYPE_SHORT, 6), - MYSQL_DATA_TYPE.MEDIUMINT: CTypeProperties(C_TYPES.MYSQL_TYPE_INT24, 9), - MYSQL_DATA_TYPE.INT: CTypeProperties(C_TYPES.MYSQL_TYPE_LONG, 11), - MYSQL_DATA_TYPE.BIGINT: CTypeProperties(C_TYPES.MYSQL_TYPE_LONGLONG, 20), - MYSQL_DATA_TYPE.FLOAT: CTypeProperties(C_TYPES.MYSQL_TYPE_FLOAT, 12), - MYSQL_DATA_TYPE.DOUBLE: CTypeProperties(C_TYPES.MYSQL_TYPE_DOUBLE, 22), - MYSQL_DATA_TYPE.DECIMAL: CTypeProperties(C_TYPES.MYSQL_TYPE_NEWDECIMAL), - MYSQL_DATA_TYPE.YEAR: CTypeProperties(C_TYPES.MYSQL_TYPE_YEAR, 4, [FIELD_FLAG.UNSIGNED, FIELD_FLAG.ZEROFILL_FLAG]), - MYSQL_DATA_TYPE.TIME: CTypeProperties(C_TYPES.MYSQL_TYPE_TIME, 10, [FIELD_FLAG.BINARY_COLLATION]), - MYSQL_DATA_TYPE.DATE: CTypeProperties(C_TYPES.MYSQL_TYPE_DATE, 10, [FIELD_FLAG.BINARY_COLLATION]), - MYSQL_DATA_TYPE.DATETIME: CTypeProperties(C_TYPES.MYSQL_TYPE_DATETIME, 19, [FIELD_FLAG.BINARY_COLLATION]), - MYSQL_DATA_TYPE.TIMESTAMP: CTypeProperties( - C_TYPES.MYSQL_TYPE_TIMESTAMP, 19, [FIELD_FLAG.BINARY_COLLATION, FIELD_FLAG.TIMESTAMP] - ), - MYSQL_DATA_TYPE.CHAR: CTypeProperties(C_TYPES.MYSQL_TYPE_STRING), - MYSQL_DATA_TYPE.BINARY: CTypeProperties(C_TYPES.MYSQL_TYPE_STRING, flags=[FIELD_FLAG.BINARY_COLLATION]), - MYSQL_DATA_TYPE.VARCHAR: CTypeProperties(C_TYPES.MYSQL_TYPE_VAR_STRING), - MYSQL_DATA_TYPE.VARBINARY: CTypeProperties(C_TYPES.MYSQL_TYPE_VAR_STRING, flags=[FIELD_FLAG.BINARY_COLLATION]), - MYSQL_DATA_TYPE.TINYBLOB: CTypeProperties( - C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB, FIELD_FLAG.BINARY_COLLATION] - ), - MYSQL_DATA_TYPE.TINYTEXT: CTypeProperties(C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB]), - MYSQL_DATA_TYPE.BLOB: CTypeProperties( - C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB, FIELD_FLAG.BINARY_COLLATION] - ), - MYSQL_DATA_TYPE.TEXT: CTypeProperties(C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB]), - MYSQL_DATA_TYPE.MEDIUMBLOB: CTypeProperties( - C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB, FIELD_FLAG.BINARY_COLLATION] - ), - MYSQL_DATA_TYPE.MEDIUMTEXT: CTypeProperties(C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB]), - MYSQL_DATA_TYPE.LONGBLOB: CTypeProperties( - C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB, FIELD_FLAG.BINARY_COLLATION] - ), - MYSQL_DATA_TYPE.LONGTEXT: CTypeProperties(C_TYPES.MYSQL_TYPE_BLOB, flags=[FIELD_FLAG.BLOB]), - MYSQL_DATA_TYPE.BIT: CTypeProperties(C_TYPES.MYSQL_TYPE_BIT, 8, [FIELD_FLAG.UNSIGNED]), - MYSQL_DATA_TYPE.BOOL: CTypeProperties(C_TYPES.MYSQL_TYPE_TINY, 1), - MYSQL_DATA_TYPE.BOOLEAN: CTypeProperties(C_TYPES.MYSQL_TYPE_TINY, 1), - MYSQL_DATA_TYPE.JSON: CTypeProperties( - C_TYPES.MYSQL_TYPE_JSON, flags=[FIELD_FLAG.BLOB, FIELD_FLAG.BINARY_COLLATION] - ), - MYSQL_DATA_TYPE.VECTOR: CTypeProperties( - C_TYPES.MYSQL_TYPE_VECTOR, 4096, flags=[FIELD_FLAG.BLOB, FIELD_FLAG.BINARY_COLLATION] - ), -} - - -# HANDSHAKE - -DEFAULT_COALLITION_ID = 83 -SERVER_STATUS_AUTOCOMMIT = 2 - -# NOTE real mysql-server returns by default all (capabilities 0xffff, extended 0xc1ff) -DEFAULT_CAPABILITIES = sum( - [ - CAPABILITIES.CLIENT_LONG_PASSWORD, - CAPABILITIES.CLIENT_LONG_FLAG, - CAPABILITIES.CLIENT_CONNECT_WITH_DB, - CAPABILITIES.CLIENT_PROTOCOL_41, - CAPABILITIES.CLIENT_TRANSACTIONS, - CAPABILITIES.CLIENT_FOUND_ROWS, - CAPABILITIES.CLIENT_LOCAL_FILES, - CAPABILITIES.CLIENT_CONNECT_ATTRS, - CAPABILITIES.CLIENT_PLUGIN_AUTH, - CAPABILITIES.CLIENT_SSL, - CAPABILITIES.CLIENT_SECURE_CONNECTION, - CAPABILITIES.CLIENT_DEPRECATE_EOF, - ] -) - -DEFAULT_AUTH_METHOD = "caching_sha2_password" # [mysql_native_password|caching_sha2_password] - -FILLER_FOR_WIRESHARK_DUMP = 21 - - -# Datum lenenc encoding - -NULL_VALUE = b"\xfb" -ONE_BYTE_ENC = b"\xfa" -TWO_BYTE_ENC = b"\xfc" -THREE_BYTE_ENC = b"\xfd" -EIGHT_BYTE_ENC = b"\xfe" - - -# ERROR CODES -class ERR(object): - __slots__ = () - ER_OLD_TEMPORALS_UPGRADED = 1880 - ER_ONLY_FD_AND_RBR_EVENTS_ALLOWED_IN_BINLOG_STATEMENT = 1730 - ER_ONLY_INTEGERS_ALLOWED = 1578 - ER_ONLY_ON_RANGE_LIST_PARTITION = 1512 - ER_OPEN_AS_READONLY = 1036 - ER_OPERAND_COLUMNS = 1241 - ER_OPTION_PREVENTS_STATEMENT = 1290 - ER_ORDER_WITH_PROC = 1386 - ER_OUT_OF_RESOURCES = 1041 - ER_OUT_OF_SORTMEMORY = 1038 - ER_OUTOFMEMORY = 1037 - ER_PARSE_ERROR = 1064 - ER_PART_STATE_ERROR = 1522 - ER_PARTITION_CLAUSE_ON_NONPARTITIONED = 1747 - ER_PARTITION_COLUMN_LIST_ERROR = 1653 - ER_PARTITION_CONST_DOMAIN_ERROR = 1563 - ER_PARTITION_ENTRY_ERROR = 1496 - ER_PARTITION_EXCHANGE_DIFFERENT_OPTION = 1731 - ER_PARTITION_EXCHANGE_FOREIGN_KEY = 1740 - ER_PARTITION_EXCHANGE_PART_TABLE = 1732 - ER_PARTITION_EXCHANGE_TEMP_TABLE = 1733 - ER_PARTITION_FIELDS_TOO_LONG = 1660 - ER_PARTITION_FUNC_NOT_ALLOWED_ERROR = 1491 - ER_PARTITION_FUNCTION_FAILURE = 1521 - ER_PARTITION_FUNCTION_IS_NOT_ALLOWED = 1564 - ER_PARTITION_INSTEAD_OF_SUBPARTITION = 1734 - ER_PARTITION_MAXVALUE_ERROR = 1481 - ER_PARTITION_MERGE_ERROR = 1572 - ER_PARTITION_MGMT_ON_NONPARTITIONED = 1505 - ER_PARTITION_NAME = 1633 - ER_PARTITION_NO_TEMPORARY = 1562 - ER_PARTITION_NOT_DEFINED_ERROR = 1498 - ER_PARTITION_REQUIRES_VALUES_ERROR = 1479 - ER_PARTITION_SUBPART_MIX_ERROR = 1483 - ER_PARTITION_SUBPARTITION_ERROR = 1482 - ER_PARTITION_WRONG_NO_PART_ERROR = 1484 - ER_PARTITION_WRONG_NO_SUBPART_ERROR = 1485 - ER_PARTITION_WRONG_VALUES_ERROR = 1480 - ER_PARTITIONS_MUST_BE_DEFINED_ERROR = 1492 - ER_PASSWD_LENGTH = 1372 - ER_PASSWORD_ANONYMOUS_USER = 1131 - ER_PASSWORD_FORMAT = 1827 - ER_PASSWORD_NO_MATCH = 1133 - ER_PASSWORD_NOT_ALLOWED = 1132 - ER_PATH_LENGTH = 1680 - ER_PLUGIN_CANNOT_BE_UNINSTALLED = 1883 - ER_PLUGIN_IS_NOT_LOADED = 1524 - ER_PLUGIN_IS_PERMANENT = 1702 - ER_PLUGIN_NO_INSTALL = 1721 - ER_PLUGIN_NO_UNINSTALL = 1720 - ER_PRIMARY_CANT_HAVE_NULL = 1171 - ER_PROC_AUTO_GRANT_FAIL = 1404 - ER_PROC_AUTO_REVOKE_FAIL = 1405 - ER_PROCACCESS_DENIED_ERROR = 1370 - ER_PS_MANY_PARAM = 1390 - ER_PS_NO_RECURSION = 1444 - ER_QUERY_CACHE_DISABLED = 1651 - ER_QUERY_INTERRUPTED = 1317 - ER_QUERY_ON_FOREIGN_DATA_SOURCE = 1430 - ER_QUERY_ON_MASTER = 1219 - ER_RANGE_NOT_INCREASING_ERROR = 1493 - ER_RBR_NOT_AVAILABLE = 1574 - ER_READ_ONLY_MODE = 1836 - ER_READ_ONLY_TRANSACTION = 1207 - ER_READY = 1076 - ER_RECORD_FILE_FULL = 1114 - ER_REGEXP_ERROR = 1139 - ER_RELAY_LOG_FAIL = 1371 - ER_RELAY_LOG_INIT = 1380 - ER_REMOVED_SPACES = 1466 - ER_RENAMED_NAME = 1636 - ER_REORG_HASH_ONLY_ON_SAME_N = 1510 - ER_REORG_NO_PARAM_ERROR = 1511 - ER_REORG_OUTSIDE_RANGE = 1520 - ER_REORG_PARTITION_NOT_EXIST = 1516 - ER_REQUIRES_PRIMARY_KEY = 1173 - ER_RESERVED_SYNTAX = 1382 - ER_RESIGNAL_WITHOUT_ACTIVE_HANDLER = 1645 - ER_REVOKE_GRANTS = 1269 - ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET = 1748 - ER_ROW_DOES_NOT_MATCH_PARTITION = 1737 - ER_ROW_IN_WRONG_PARTITION = 1863 - ER_ROW_IS_REFERENCED = 1217 - ER_ROW_IS_REFERENCED_2 = 1451 - ER_ROW_SINGLE_PARTITION_FIELD_ERROR = 1658 - ER_RPL_INFO_DATA_TOO_LONG = 1742 - ER_SAME_NAME_PARTITION = 1517 - ER_SAME_NAME_PARTITION_FIELD = 1652 - ER_SELECT_REDUCED = 1249 - ER_SERVER_IS_IN_SECURE_AUTH_MODE = 1275 - ER_SERVER_SHUTDOWN = 1053 - ER_SET_CONSTANTS_ONLY = 1204 - ER_SET_PASSWORD_AUTH_PLUGIN = 1699 - ER_SET_STATEMENT_CANNOT_INVOKE_FUNCTION = 1769 - ER_SHUTDOWN_COMPLETE = 1079 - ER_SIGNAL_BAD_CONDITION_TYPE = 1646 - ER_SIGNAL_EXCEPTION = 1644 - ER_SIGNAL_NOT_FOUND = 1643 - ER_SIGNAL_WARN = 1642 - ER_SIZE_OVERFLOW_ERROR = 1532 - ER_SKIPPING_LOGGED_TRANSACTION = 1771 - ER_SLAVE_CANT_CREATE_CONVERSION = 1678 - ER_SLAVE_CONFIGURATION = 1794 - ER_SLAVE_CONVERSION_FAILED = 1677 - ER_SLAVE_CORRUPT_EVENT = 1610 - ER_SLAVE_CREATE_EVENT_FAILURE = 1596 - ER_SLAVE_FATAL_ERROR = 1593 - ER_SLAVE_HAS_MORE_GTIDS_THAN_MASTER = 1885 - ER_SLAVE_HEARTBEAT_FAILURE = 1623 - ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE = 1624 - ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX = 1704 - ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MIN = 1703 - ER_SLAVE_IGNORE_SERVER_IDS = 1650 - ER_SLAVE_IGNORED_SSL_PARAMS = 1274 - ER_SLAVE_IGNORED_TABLE = 1237 - ER_SLAVE_INCIDENT = 1590 - ER_SLAVE_MASTER_COM_FAILURE = 1597 - ER_SLAVE_MI_INIT_REPOSITORY = 1871 - ER_SLAVE_MUST_STOP = 1198 - ER_SLAVE_NOT_RUNNING = 1199 - ER_SLAVE_RELAY_LOG_READ_FAILURE = 1594 - ER_SLAVE_RELAY_LOG_WRITE_FAILURE = 1595 - ER_SLAVE_RLI_INIT_REPOSITORY = 1872 - ER_SLAVE_SILENT_RETRY_TRANSACTION = 1806 - ER_SLAVE_THREAD = 1202 - ER_SLAVE_WAS_NOT_RUNNING = 1255 - ER_SLAVE_WAS_RUNNING = 1254 - ER_SP_ALREADY_EXISTS = 1304 - ER_SP_BAD_CURSOR_QUERY = 1322 - ER_SP_BAD_CURSOR_SELECT = 1323 - ER_SP_BAD_SQLSTATE = 1407 - ER_SP_BAD_VAR_SHADOW = 1453 - ER_SP_BADRETURN = 1313 - ER_SP_BADSELECT = 1312 - ER_SP_BADSTATEMENT = 1314 - ER_SP_CANT_ALTER = 1334 - ER_SP_CANT_SET_AUTOCOMMIT = 1445 - ER_SP_CASE_NOT_FOUND = 1339 - ER_SP_COND_MISMATCH = 1319 - ER_SP_CURSOR_AFTER_HANDLER = 1338 - ER_SP_CURSOR_ALREADY_OPEN = 1325 - ER_SP_CURSOR_MISMATCH = 1324 - ER_SP_CURSOR_NOT_OPEN = 1326 - ER_SP_DOES_NOT_EXIST = 1305 - ER_SP_DROP_FAILED = 1306 - ER_SP_DUP_COND = 1332 - ER_SP_DUP_CURS = 1333 - ER_SP_DUP_HANDLER = 1413 - ER_SP_DUP_PARAM = 1330 - ER_SP_DUP_VAR = 1331 - ER_SP_FETCH_NO_DATA = 1329 - ER_SP_GOTO_IN_HNDLR = 1358 - ER_SP_LABEL_MISMATCH = 1310 - ER_SP_LABEL_REDEFINE = 1309 - ER_SP_LILABEL_MISMATCH = 1308 - ER_SP_NO_AGGREGATE = 1460 - ER_SP_NO_DROP_SP = 1357 - ER_SP_NO_RECURSION = 1424 - ER_SP_NO_RECURSIVE_CREATE = 1303 - ER_SP_NO_RETSET = 1415 - ER_SP_NORETURN = 1320 - ER_SP_NORETURNEND = 1321 - ER_SP_NOT_VAR_ARG = 1414 - ER_SP_PROC_TABLE_CORRUPT = 1457 - ER_SP_RECURSION_LIMIT = 1456 - ER_SP_STORE_FAILED = 1307 - ER_SP_SUBSELECT_NYI = 1335 - ER_SP_UNDECLARED_VAR = 1327 - ER_SP_UNINIT_VAR = 1311 - ER_SP_VARCOND_AFTER_CURSHNDLR = 1337 - ER_SP_WRONG_NAME = 1458 - ER_SP_WRONG_NO_OF_ARGS = 1318 - ER_SP_WRONG_NO_OF_FETCH_ARGS = 1328 - ER_SPATIAL_CANT_HAVE_NULL = 1252 - ER_SPATIAL_MUST_HAVE_GEOM_COL = 1687 - ER_SPECIFIC_ACCESS_DENIED_ERROR = 1227 - ER_SQL_SLAVE_SKIP_COUNTER_NOT_SETTABLE_IN_GTID_MODE = 1858 - ER_SQLTHREAD_WITH_SECURE_SLAVE = 1763 - ER_SR_INVALID_CREATION_CTX = 1601 - ER_STACK_OVERRUN = 1119 - ER_STACK_OVERRUN_NEED_MORE = 1436 - ER_STARTUP = 1408 - ER_STMT_CACHE_FULL = 1705 - ER_STMT_HAS_NO_OPEN_CURSOR = 1421 - ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG = 1336 - ER_STOP_SLAVE_IO_THREAD_TIMEOUT = 1876 - ER_STOP_SLAVE_SQL_THREAD_TIMEOUT = 1875 - ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_FORMAT = 1560 - ER_STORED_FUNCTION_PREVENTS_SWITCH_SQL_LOG_BIN = 1695 - ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_DIRECT = 1686 - ER_SUBPARTITION_ERROR = 1500 - ER_SUBPARTITION_NAME = 1634 - ER_SUBQUERY_NO_1_ROW = 1242 - ER_SYNTAX_ERROR = 1149 - ER_TABLE_CANT_HANDLE_AUTO_INCREMENT = 1164 - ER_TABLE_CANT_HANDLE_BLOB = 1163 - ER_TABLE_CANT_HANDLE_FT = 1214 - ER_TABLE_CANT_HANDLE_SPKEYS = 1464 - ER_TABLE_CORRUPT = 1877 - ER_TABLE_DEF_CHANGED = 1412 - ER_TABLE_EXISTS_ERROR = 1050 - ER_TABLE_HAS_NO_FT = 1764 - ER_TABLE_IN_FK_CHECK = 1725 - ER_TABLE_IN_SYSTEM_TABLESPACE = 1809 - ER_TABLE_MUST_HAVE_COLUMNS = 1113 - ER_TABLE_NAME = 1632 - ER_TABLE_NEEDS_REBUILD = 1707 - ER_TABLE_NEEDS_UPGRADE = 1459 - ER_TABLE_NOT_LOCKED = 1100 - ER_TABLE_NOT_LOCKED_FOR_WRITE = 1099 - ER_TABLE_SCHEMA_MISMATCH = 1808 - ER_TABLEACCESS_DENIED_ERROR = 1142 - ER_TABLENAME_NOT_ALLOWED_HERE = 1250 - ER_TABLES_DIFFERENT_METADATA = 1736 - ER_TABLESPACE_AUTO_EXTEND_ERROR = 1530 - ER_TABLESPACE_DISCARDED = 1814 - ER_TABLESPACE_EXISTS = 1813 - ER_TABLESPACE_MISSING = 1812 - ER_TEMP_FILE_WRITE_FAILURE = 1878 - ER_TEMP_TABLE_PREVENTS_SWITCH_OUT_OF_RBR = 1559 - ER_TEMPORARY_NAME = 1635 - ER_TEXTFILE_NOT_READABLE = 1085 - ER_TOO_BIG_DISPLAYWIDTH = 1439 - ER_TOO_BIG_FIELDLENGTH = 1074 - ER_TOO_BIG_FOR_UNCOMPRESS = 1256 - ER_TOO_BIG_PRECISION = 1426 - ER_TOO_BIG_ROWSIZE = 1118 - ER_TOO_BIG_SCALE = 1425 - ER_TOO_BIG_SELECT = 1104 - ER_TOO_BIG_SET = 1097 - ER_TOO_HIGH_LEVEL_OF_NESTING_FOR_SELECT = 1473 - ER_TOO_LONG_BODY = 1437 - ER_TOO_LONG_FIELD_COMMENT = 1629 - ER_TOO_LONG_IDENT = 1059 - ER_TOO_LONG_INDEX_COMMENT = 1688 - ER_TOO_LONG_KEY = 1071 - ER_TOO_LONG_STRING = 1162 - ER_TOO_LONG_TABLE_COMMENT = 1628 - ER_TOO_LONG_TABLE_PARTITION_COMMENT = 1793 - ER_TOO_MANY_CONCURRENT_TRXS = 1637 - ER_TOO_MANY_DELAYED_THREADS = 1151 - ER_TOO_MANY_FIELDS = 1117 - ER_TOO_MANY_KEY_PARTS = 1070 - ER_TOO_MANY_KEYS = 1069 - ER_TOO_MANY_PARTITION_FUNC_FIELDS_ERROR = 1655 - ER_TOO_MANY_PARTITIONS_ERROR = 1499 - ER_TOO_MANY_ROWS = 1172 - ER_TOO_MANY_TABLES = 1116 - ER_TOO_MANY_USER_CONNECTIONS = 1203 - ER_TOO_MANY_VALUES_ERROR = 1657 - ER_TOO_MUCH_AUTO_TIMESTAMP_COLS = 1293 - ER_TRANS_CACHE_FULL = 1197 - ER_TRG_ALREADY_EXISTS = 1359 - ER_TRG_CANT_CHANGE_ROW = 1362 - ER_TRG_CANT_OPEN_TABLE = 1606 - ER_TRG_CORRUPTED_FILE = 1602 - ER_TRG_DOES_NOT_EXIST = 1360 - ER_TRG_IN_WRONG_SCHEMA = 1435 - ER_TRG_INVALID_CREATION_CTX = 1604 - ER_TRG_NO_CREATION_CTX = 1603 - ER_TRG_NO_DEFINER = 1454 - ER_TRG_NO_SUCH_ROW_IN_TRG = 1363 - ER_TRG_ON_VIEW_OR_TEMP_TABLE = 1361 - ER_TRUNCATE_ILLEGAL_FK = 1701 - ER_TRUNCATED_WRONG_VALUE = 1292 - ER_TRUNCATED_WRONG_VALUE_FOR_FIELD = 1366 - ER_UDF_EXISTS = 1125 - ER_UDF_NO_PATHS = 1124 - ER_UNDO_RECORD_TOO_BIG = 1713 - ER_UNEXPECTED_EOF = 1039 - ER_UNION_TABLES_IN_DIFFERENT_DIR = 1212 - ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF = 1503 - ER_UNKNOWN_ALTER_ALGORITHM = 1800 - ER_UNKNOWN_ALTER_LOCK = 1801 - ER_UNKNOWN_CHARACTER_SET = 1115 - ER_UNKNOWN_COLLATION = 1273 - ER_UNKNOWN_COM_ERROR = 1047 - ER_UNKNOWN_ERROR = 1105 - ER_UNKNOWN_EXPLAIN_FORMAT = 1791 - ER_UNKNOWN_KEY_CACHE = 1284 - ER_UNKNOWN_LOCALE = 1649 - ER_UNKNOWN_PARTITION = 1735 - ER_UNKNOWN_PROCEDURE = 1106 - ER_UNKNOWN_STMT_HANDLER = 1243 - ER_UNKNOWN_STORAGE_ENGINE = 1286 - ER_UNKNOWN_SYSTEM_VARIABLE = 1193 - ER_UNKNOWN_TABLE = 1109 - ER_UNKNOWN_TARGET_BINLOG = 1373 - ER_UNKNOWN_TIME_ZONE = 1298 - ER_UNSUPORTED_LOG_ENGINE = 1579 - ER_UNSUPPORTED_ENGINE = 1726 - ER_UNSUPPORTED_EXTENSION = 1112 - ER_UNSUPPORTED_PS = 1295 - ER_UNTIL_COND_IGNORED = 1279 - ER_UPDATE_INF = 1134 - ER_UPDATE_LOG_DEPRECATED_IGNORED = 1315 - ER_UPDATE_LOG_DEPRECATED_TRANSLATED = 1316 - ER_UPDATE_TABLE_USED = 1093 - ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE = 1175 - ER_USER_LIMIT_REACHED = 1226 - ER_USERNAME = 1468 - ER_VALUES_IS_NOT_INT_TYPE_ERROR = 1697 - ER_VAR_CANT_BE_READ = 1233 - ER_VARIABLE_IS_NOT_STRUCT = 1272 - ER_VARIABLE_IS_READONLY = 1621 - ER_VARIABLE_NOT_SETTABLE_IN_SF_OR_TRIGGER = 1765 - ER_VARIABLE_NOT_SETTABLE_IN_SP = 1838 - ER_VARIABLE_NOT_SETTABLE_IN_TRANSACTION = 1766 - ER_VIEW_CHECK_FAILED = 1369 - ER_VIEW_CHECKSUM = 1392 - ER_VIEW_DELETE_MERGE_VIEW = 1395 - ER_VIEW_FRM_NO_USER = 1447 - ER_VIEW_INVALID = 1356 - ER_VIEW_INVALID_CREATION_CTX = 1600 - ER_VIEW_MULTIUPDATE = 1393 - ER_VIEW_NO_CREATION_CTX = 1599 - ER_VIEW_NO_EXPLAIN = 1345 - ER_VIEW_NO_INSERT_FIELD_LIST = 1394 - ER_VIEW_NONUPD_CHECK = 1368 - ER_VIEW_OTHER_USER = 1448 - ER_VIEW_PREVENT_UPDATE = 1443 - ER_VIEW_RECURSIVE = 1462 - ER_VIEW_SELECT_CLAUSE = 1350 - ER_VIEW_SELECT_DERIVED = 1349 - ER_VIEW_SELECT_TMPTABLE = 1352 - ER_VIEW_SELECT_VARIABLE = 1351 - ER_VIEW_WRONG_LIST = 1353 - ER_WARN_ALLOWED_PACKET_OVERFLOWED = 1301 - ER_WARN_CANT_DROP_DEFAULT_KEYCACHE = 1438 - ER_WARN_DATA_OUT_OF_RANGE = 1264 - ER_WARN_DEPRECATED_SYNTAX = 1287 - ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT = 1681 - ER_WARN_DEPRECATED_SYNTAX_WITH_VER = 1554 - ER_WARN_ENGINE_TRANSACTION_ROLLBACK = 1622 - ER_WARN_FIELD_RESOLVED = 1276 - ER_WARN_HOSTNAME_WONT_WORK = 1285 - ER_WARN_I_S_SKIPPED_TABLE = 1684 - ER_WARN_INDEX_NOT_APPLICABLE = 1739 - ER_WARN_INVALID_TIMESTAMP = 1299 - ER_WARN_NULL_TO_NOTNULL = 1263 - ER_WARN_PURGE_LOG_IN_USE = 1867 - ER_WARN_PURGE_LOG_IS_ACTIVE = 1868 - ER_WARN_QC_RESIZE = 1282 - ER_WARN_TOO_FEW_RECORDS = 1261 - ER_WARN_TOO_MANY_RECORDS = 1262 - ER_WARN_USING_OTHER_HANDLER = 1266 - ER_WARN_VIEW_MERGE = 1354 - ER_WARN_VIEW_WITHOUT_KEY = 1355 - ER_WARNING_NOT_COMPLETE_ROLLBACK = 1196 - ER_WARNING_NOT_COMPLETE_ROLLBACK_WITH_CREATED_TEMP_TABLE = 1751 - ER_WARNING_NOT_COMPLETE_ROLLBACK_WITH_DROPPED_TEMP_TABLE = 1752 - ER_WRONG_ARGUMENTS = 1210 - ER_WRONG_AUTO_KEY = 1075 - ER_WRONG_COLUMN_NAME = 1166 - ER_WRONG_DB_NAME = 1102 - ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR = 1486 - ER_WRONG_FIELD_SPEC = 1063 - ER_WRONG_FIELD_TERMINATORS = 1083 - ER_WRONG_FIELD_WITH_GROUP = 1055 - ER_WRONG_FK_DEF = 1239 - ER_WRONG_GROUP_FIELD = 1056 - ER_WRONG_KEY_COLUMN = 1167 - ER_WRONG_LOCK_OF_SYSTEM_TABLE = 1428 - ER_WRONG_MAGIC = 1389 - ER_WRONG_MRG_TABLE = 1168 - ER_WRONG_NAME_FOR_CATALOG = 1281 - ER_WRONG_NAME_FOR_INDEX = 1280 - ER_WRONG_NATIVE_TABLE_STRUCTURE = 1682 - ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT = 1222 - ER_WRONG_OBJECT = 1347 - ER_WRONG_OUTER_JOIN = 1120 - ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT = 1582 - ER_WRONG_PARAMCOUNT_TO_PROCEDURE = 1107 - ER_WRONG_PARAMETERS_TO_NATIVE_FCT = 1583 - ER_WRONG_PARAMETERS_TO_PROCEDURE = 1108 - ER_WRONG_PARAMETERS_TO_STORED_FCT = 1584 - ER_WRONG_PARTITION_NAME = 1567 - ER_WRONG_PERFSCHEMA_USAGE = 1683 - ER_WRONG_SIZE_NUMBER = 1531 - ER_WRONG_SPVAR_TYPE_IN_LIMIT = 1691 - ER_WRONG_STRING_LENGTH = 1470 - ER_WRONG_SUB_KEY = 1089 - ER_WRONG_SUM_SELECT = 1057 - ER_WRONG_TABLE_NAME = 1103 - ER_WRONG_TYPE_COLUMN_VALUE_ERROR = 1654 - ER_WRONG_TYPE_FOR_VAR = 1232 - ER_WRONG_USAGE = 1221 - ER_WRONG_VALUE = 1525 - ER_WRONG_VALUE_COUNT = 1058 - ER_WRONG_VALUE_COUNT_ON_ROW = 1136 - ER_WRONG_VALUE_FOR_TYPE = 1411 - ER_WRONG_VALUE_FOR_VAR = 1231 - ER_WSAS_FAILED = 1383 - ER_XA_RBDEADLOCK = 1614 - ER_XA_RBROLLBACK = 1402 - ER_XA_RBTIMEOUT = 1613 - ER_XAER_DUPID = 1440 - ER_XAER_INVAL = 1398 - ER_XAER_NOTA = 1397 - ER_XAER_OUTSIDE = 1400 - ER_XAER_RMERR = 1401 - ER_XAER_RMFAIL = 1399 - ER_YES = 1003 - ER_ZLIB_Z_BUF_ERROR = 1258 - ER_ZLIB_Z_DATA_ERROR = 1259 - ER_ZLIB_Z_MEM_ERROR = 1257 - ER_BAD_DB_ERROR = 1049 - ER_BAD_TABLE_ERROR = 1051 - ER_KEY_COLUMN_DOES_NOT_EXIST = 1072 - ER_DUP_FIELDNAME = 1060 - ER_DB_DROP_DELETE = 1009 - ER_NON_INSERTABLE_TABLE = 1471 - ER_NOT_SUPPORTED_YET = 1235 - - -ERR = ERR() - - -class WARN(object): - __slots__ = () - WARN_COND_ITEM_TRUNCATED = 1647 - WARN_DATA_TRUNCATED = 1265 - WARN_NO_MASTER_INF = 1617 - WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED = 1638 - WARN_ON_BLOCKHOLE_IN_RBR = 1870 - WARN_OPTION_BELOW_LIMIT = 1708 - WARN_OPTION_IGNORED = 1618 - WARN_PLUGIN_BUSY = 1620 - WARN_PLUGIN_DELETE_BUILTIN = 1619 - - -WARN = WARN() - -# CHARACTER SET NUMBERS - -# noqa -CHARSET_NUMBERS = { - "big5_chinese_ci": 1, - "latin2_czech_cs": 2, - "dec8_swedish_ci": 3, - "cp850_general_ci": 4, - "latin1_german1_ci": 5, - "hp8_english_ci": 6, - "koi8r_general_ci": 7, - "latin1_swedish_ci": 8, - "latin2_general_ci": 9, - "swe7_swedish_ci": 10, - "ascii_general_ci": 11, - "ujis_japanese_ci": 12, - "sjis_japanese_ci": 13, - "cp1251_bulgarian_ci": 14, - "latin1_danish_ci": 15, - "hebrew_general_ci": 16, - "tis620_thai_ci": 18, - "euckr_korean_ci": 19, - "latin7_estonian_cs": 20, - "latin2_hungarian_ci": 21, - "koi8u_general_ci": 22, - "cp1251_ukrainian_ci": 23, - "gb2312_chinese_ci": 24, - "greek_general_ci": 25, - "cp1250_general_ci": 26, - "latin2_croatian_ci": 27, - "gbk_chinese_ci": 28, - "cp1257_lithuanian_ci": 29, - "latin5_turkish_ci": 30, - "latin1_german2_ci": 31, - "armscii8_general_ci": 32, - "utf8_general_ci": 33, - "cp1250_czech_cs": 34, - "ucs2_general_ci": 35, - "cp866_general_ci": 36, - "keybcs2_general_ci": 37, - "macce_general_ci": 38, - "macroman_general_ci": 39, - "cp852_general_ci": 40, - "latin7_general_ci": 41, - "latin7_general_cs": 42, - "macce_bin": 43, - "cp1250_croatian_ci": 44, - "utf8mb4_general_ci": 45, - "utf8mb4_bin": 46, - "latin1_bin": 47, - "latin1_general_ci": 48, - "latin1_general_cs": 49, - "cp1251_bin": 50, - "cp1251_general_ci": 51, - "cp1251_general_cs": 52, - "macroman_bin": 53, - "utf16_general_ci": 54, - "utf16_bin": 55, - "utf16le_general_ci": 56, - "cp1256_general_ci": 57, - "cp1257_bin": 58, - "cp1257_general_ci": 59, - "utf32_general_ci": 60, - "utf32_bin": 61, - "utf16le_bin": 62, - "binary": 63, - "armscii8_bin": 64, - "ascii_bin": 65, - "cp1250_bin": 66, - "cp1256_bin": 67, - "cp866_bin": 68, - "dec8_bin": 69, - "greek_bin": 70, - "hebrew_bin": 71, - "hp8_bin": 72, - "keybcs2_bin": 73, - "koi8r_bin": 74, - "koi8u_bin": 75, - "latin2_bin": 77, - "latin5_bin": 78, - "latin7_bin": 79, - "cp850_bin": 80, - "cp852_bin": 81, - "swe7_bin": 82, - "utf8_bin": 83, - "big5_bin": 84, - "euckr_bin": 85, - "gb2312_bin": 86, - "gbk_bin": 87, - "sjis_bin": 88, - "tis620_bin": 89, - "ucs2_bin": 90, - "ujis_bin": 91, - "geostd8_general_ci": 92, - "geostd8_bin": 93, - "latin1_spanish_ci": 94, - "cp932_japanese_ci": 95, - "cp932_bin": 96, - "eucjpms_japanese_ci": 97, - "eucjpms_bin": 98, - "cp1250_polish_ci": 99, - "utf16_unicode_ci": 101, - "utf16_icelandic_ci": 102, - "utf16_latvian_ci": 103, - "utf16_romanian_ci": 104, - "utf16_slovenian_ci": 105, - "utf16_polish_ci": 106, - "utf16_estonian_ci": 107, - "utf16_spanish_ci": 108, - "utf16_swedish_ci": 109, - "utf16_turkish_ci": 110, - "utf16_czech_ci": 111, - "utf16_danish_ci": 112, - "utf16_lithuanian_ci": 113, - "utf16_slovak_ci": 114, - "utf16_spanish2_ci": 115, - "utf16_roman_ci": 116, - "utf16_persian_ci": 117, - "utf16_esperanto_ci": 118, - "utf16_hungarian_ci": 119, - "utf16_sinhala_ci": 120, - "utf16_german2_ci": 121, - "utf16_croatian_ci": 122, - "utf16_unicode_520_ci": 123, - "utf16_vietnamese_ci": 124, - "ucs2_unicode_ci": 128, - "ucs2_icelandic_ci": 129, - "ucs2_latvian_ci": 130, - "ucs2_romanian_ci": 131, - "ucs2_slovenian_ci": 132, - "ucs2_polish_ci": 133, - "ucs2_estonian_ci": 134, - "ucs2_spanish_ci": 135, - "ucs2_swedish_ci": 136, - "ucs2_turkish_ci": 137, - "ucs2_czech_ci": 138, - "ucs2_danish_ci": 139, - "ucs2_lithuanian_ci": 140, - "ucs2_slovak_ci": 141, - "ucs2_spanish2_ci": 142, - "ucs2_roman_ci": 143, - "ucs2_persian_ci": 144, - "ucs2_esperanto_ci": 145, - "ucs2_hungarian_ci": 146, - "ucs2_sinhala_ci": 147, - "ucs2_german2_ci": 148, - "ucs2_croatian_ci": 149, - "ucs2_unicode_520_ci": 150, - "ucs2_vietnamese_ci": 151, - "ucs2_general_mysql500_ci": 159, - "utf32_unicode_ci": 160, - "utf32_icelandic_ci": 161, - "utf32_latvian_ci": 162, - "utf32_romanian_ci": 163, - "utf32_slovenian_ci": 164, - "utf32_polish_ci": 165, - "utf32_estonian_ci": 166, - "utf32_spanish_ci": 167, - "utf32_swedish_ci": 168, - "utf32_turkish_ci": 169, - "utf32_czech_ci": 170, - "utf32_danish_ci": 171, - "utf32_lithuanian_ci": 172, - "utf32_slovak_ci": 173, - "utf32_spanish2_ci": 174, - "utf32_roman_ci": 175, - "utf32_persian_ci": 176, - "utf32_esperanto_ci": 177, - "utf32_hungarian_ci": 178, - "utf32_sinhala_ci": 179, - "utf32_german2_ci": 180, - "utf32_croatian_ci": 181, - "utf32_unicode_520_ci": 182, - "utf32_vietnamese_ci": 183, - "utf8_unicode_ci": 192, - "utf8_icelandic_ci": 193, - "utf8_latvian_ci": 194, - "utf8_romanian_ci": 195, - "utf8_slovenian_ci": 196, - "utf8_polish_ci": 197, - "utf8_estonian_ci": 198, - "utf8_spanish_ci": 199, - "utf8_swedish_ci": 200, - "utf8_turkish_ci": 201, - "utf8_czech_ci": 202, - "utf8_danish_ci": 203, - "utf8_lithuanian_ci": 204, - "utf8_slovak_ci": 205, - "utf8_spanish2_ci": 206, - "utf8_roman_ci": 207, - "utf8_persian_ci": 208, - "utf8_esperanto_ci": 209, - "utf8_hungarian_ci": 210, - "utf8_sinhala_ci": 211, - "utf8_german2_ci": 212, - "utf8_croatian_ci": 213, - "utf8_unicode_520_ci": 214, - "utf8_vietnamese_ci": 215, - "utf8_general_mysql500_ci": 223, - "utf8mb4_unicode_ci": 224, - "utf8mb4_icelandic_ci": 225, - "utf8mb4_latvian_ci": 226, - "utf8mb4_romanian_ci": 227, - "utf8mb4_slovenian_ci": 228, - "utf8mb4_polish_ci": 229, - "utf8mb4_estonian_ci": 230, - "utf8mb4_spanish_ci": 231, - "utf8mb4_swedish_ci": 232, - "utf8mb4_turkish_ci": 233, - "utf8mb4_czech_ci": 234, - "utf8mb4_danish_ci": 235, - "utf8mb4_lithuanian_ci": 236, - "utf8mb4_slovak_ci": 237, - "utf8mb4_spanish2_ci": 238, - "utf8mb4_roman_ci": 239, - "utf8mb4_persian_ci": 240, - "utf8mb4_esperanto_ci": 241, - "utf8mb4_hungarian_ci": 242, - "utf8mb4_sinhala_ci": 243, - "utf8mb4_german2_ci": 244, - "utf8mb4_croatian_ci": 245, - "utf8mb4_unicode_520_ci": 246, - "utf8mb4_vietnamese_ci": 247, -} - - -SQL_RESERVED_WORDS = [ - "ALL", - "ANALYSE", - "ANALYZE", - "AND", - "ANY", - "AS", - "ASC", - "AUTHORIZATION", - "BETWEEN", - "BINARY", - "BOTH", - "CASE", - "CAST", - "CHECK", - "COLLATE", - "COLUMN", - "CONSTRAINT", - "CREATE", - "CROSS", - "CURRENT_DATE", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "CURRENT_USER", - "DEFAULT", - "DEFERRABLE", - "DESC", - "DISTINCT", - "DO", - "ELSE", - "END", - "EXCEPT", - "FALSE", - "FOR", - "FOREIGN", - "FREEZE", - "FROM", - "FULL", - "GRANT", - "GROUP", - "HAVING", - "ILIKE", - "IN", - "INITIALLY", - "INNER", - "INTERSECT", - "INTO", - "IS", - "ISNULL", - "JOIN", - "LEADING", - "LEFT", - "LIKE", - "LIMIT", - "LOCALTIME", - "LOCALTIMESTAMP", - "NATURAL", - "NEW", - "NOT", - "NOTNULL", - "NULL", - "OFF", - "OFFSET", - "OLD", - "ON", - "ONLY", - "OR", - "ORDER", - "OUTER", - "OVERLAPS", - "PLACING", - "PRIMARY", - "REFERENCES", - "RIGHT", - "SELECT", - "SESSION_USER", - "SIMILAR", - "SOME", - "TABLE", - "THEN", - "TO", - "TRAILING", - "TRUE", - "UNION", - "UNIQUE", - "USER", - "USING", - "VERBOSE", - "WHEN", - "WHERE", -] - -SERVER_VARIABLES = { - # var_name: (value, type, charset) - "@@session.auto_increment_increment": (1, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@auto_increment_increment": (1, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@character_set_client": ("utf8", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@character_set_connection": ("utf8", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@character_set_results": ("utf8", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@GLOBAL.character_set_server": ("latin1", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@character_set_server": ("latin1", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@GLOBAL.collation_server": ("latin1_swedish_ci", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@collation_server": ("latin1_swedish_ci", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@init_connect": ("", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), # None or '' ? - "@@interactive_timeout": (28800, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@license": ("GPL", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@lower_case_table_names": (0, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@GLOBAL.lower_case_table_names": (0, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@max_allowed_packet": (16777216, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@net_buffer_length": (16384, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@net_write_timeout": (60, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@query_cache_size": (16777216, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@query_cache_type": ("OFF", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@sql_mode": ( - "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION", - TYPES.MYSQL_TYPE_VAR_STRING, - CHARSET_NUMBERS["utf8_general_ci"], - ), - # '@@system_time_zone': ('MSK', TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS['utf8_general_ci']), - "@@system_time_zone": ("UTC", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@time_zone": ("SYSTEM", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@session.tx_isolation": ("REPEATABLE-READ", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@tx_isolation": ("REPEATABLE-READ", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@wait_timeout": (28800, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@session.tx_read_only": ("0", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@version_comment": ("(MindsDB)", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@version": ("8.0.17", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@collation_connection": ("utf8_general_ci", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@performance_schema": (1, TYPES.MYSQL_TYPE_LONGLONG, CHARSET_NUMBERS["binary"]), - "@@GLOBAL.transaction_isolation": ( - "REPEATABLE-READ", - TYPES.MYSQL_TYPE_VAR_STRING, - CHARSET_NUMBERS["utf8_general_ci"], - ), - "@@transaction_isolation": ("REPEATABLE-READ", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@event_scheduler": ("OFF", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@default_storage_engine": ("InnoDB", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), - "@@default_tmp_storage_engine": ("InnoDB", TYPES.MYSQL_TYPE_VAR_STRING, CHARSET_NUMBERS["utf8_general_ci"]), -} - - -class SESSION_TRACK(object): - __slots__ = () - SESSION_TRACK_SYSTEM_VARIABLES = 0x00 - SESSION_TRACK_SCHEMA = 0x01 - SESSION_TRACK_STATE_CHANGE = 0x02 - SESSION_TRACK_GTIDS = 0x03 - SESSION_TRACK_TRANSACTION_CHARACTERISTICS = 0x04 - SESSION_TRACK_TRANSACTION_STATE = 0x05 - - -SESSION_TRACK = SESSION_TRACK() - -ALL = vars() - - -def VAR_NAME(val, prefix=""): - global ALL - - for key in ALL.keys(): - value = ALL[key] - if value == val and key != "val": - if prefix == "" or (prefix != "" and prefix == key[: len(prefix)]): - return key - return None - - -def getConstName(consts, value): - attrs = [x for x in dir(consts) if x.startswith("__") is False] - constNames = {getattr(consts, x): x for x in attrs} - if value in constNames: - return constNames[value] - return None diff --git a/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py b/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py deleted file mode 100644 index 8f691db994c..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +++ /dev/null @@ -1,850 +0,0 @@ -""" -******************************************************* - * Copyright (C) 2017 MindsDB Inc. - * - * This file is part of MindsDB Server. - * - * MindsDB Server can not be copied and/or distributed without the express - * permission of MindsDB Inc - ******************************************************* -""" - -import atexit -import base64 -import os -import select -import socket -import socketserver as SocketServer -import ssl -import struct -import sys -import tempfile -import traceback -import logging -from functools import partial - -import mindsdb.utilities.hooks as hooks -import mindsdb.utilities.profiler as profiler -from mindsdb.utilities.sql import clear_sql -from mindsdb.api.mysql.mysql_proxy.classes.client_capabilities import ClentCapabilities -from mindsdb.api.mysql.mysql_proxy.classes.server_capabilities import ( - server_capabilities, -) -from mindsdb.api.executor.controllers import SessionController -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packet import Packet -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_packets import ( - BinaryResultsetRowPacket, - ColumnCountPacket, - ColumnDefenitionPacket, - CommandPacket, - EofPacket, - ErrPacket, - FastAuthFail, - HandshakePacket, - HandshakeResponsePacket, - OkPacket, - PasswordAnswer, - ResultsetRowPacket, - STMTPrepareHeaderPacket, - SwitchOutPacket, - SwitchOutResponse, -) -from mindsdb.api.mysql.mysql_proxy.executor import Executor -from mindsdb.api.mysql.mysql_proxy.external_libs.mysql_scramble import ( - scramble as scramble_func, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ( - DEFAULT_AUTH_METHOD, - CHARSET_NUMBERS, - SERVER_STATUS, - CAPABILITIES, - COMMANDS, - ERR, - getConstName, -) -from mindsdb.api.executor.data_types.answer import ExecuteAnswer -from mindsdb.api.executor.data_types.sql_answer import SQLAnswer -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.api.executor import exceptions as executor_exceptions -from mindsdb.api.common.middleware import check_auth -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities.types.column import Column -from mindsdb.utilities import log -from mindsdb.utilities.config import config -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.otel import increment_otel_query_request_counter -from mindsdb.utilities.wizards import make_ssl_cert -from mindsdb.utilities.exception import QueryError -from mindsdb.utilities.functions import mark_process -from mindsdb.api.mysql.mysql_proxy.utilities.dump import ( - dump_result_set_to_mysql, - column_to_mysql_column_dict, - dump_columns_info, - dump_chunks, -) -from mindsdb.api.executor.exceptions import WrongCharsetError -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID - -logger = log.getLogger(__name__) - - -def empty_fn(): - pass - - -class MysqlTCPServer(SocketServer.ThreadingTCPServer): - """ - Custom TCP Server with increased request queue size - """ - - request_queue_size = 30 - - -class MysqlProxy(SocketServer.BaseRequestHandler): - """ - The Main Server controller class - """ - - @staticmethod - def server_close(srv): - srv.server_close() - - def __init__(self, request, client_address, server): - self.charset = "utf8" - self.charset_text_type = CHARSET_NUMBERS["utf8_general_ci"] - self.session = None - self.client_capabilities = None - self.connection_id = None - super().__init__(request, client_address, server) - - def init_session(self): - logger.debug("New connection [{ip}:{port}]".format(ip=self.client_address[0], port=self.client_address[1])) - - if self.server.connection_id >= 65025: - self.server.connection_id = 0 - self.server.connection_id += 1 - self.connection_id = self.server.connection_id - self.session = SessionController(api_type="sql") - - if hasattr(self.server, "salt") and isinstance(self.server.salt, str): - self.salt = self.server.salt - else: - self.salt = base64.b64encode(os.urandom(15)).decode() - - self.socket = self.request - self.logging = logger - - self.current_transaction = None - - logger.debug("session salt: {salt}".format(salt=self.salt)) - - def handshake(self): - def switch_auth(method="mysql_native_password"): - self.packet(SwitchOutPacket, seed=self.salt, method=method).send() - switch_out_answer = self.packet(SwitchOutResponse) - switch_out_answer.get() - password = switch_out_answer.password - if method == "mysql_native_password" and len(password) == 0: - password = scramble_func("", self.salt) - return password - - def get_fast_auth_password(): - logger.debug("Asking for fast auth password") - self.packet(FastAuthFail).send() - password_answer = self.packet(PasswordAnswer) - password_answer.get() - try: - password = password_answer.password.value.decode() - except Exception: - logger.warning("error: no password in Fast Auth answer") - self.packet( - ErrPacket, - err_code=ERR.ER_PASSWORD_NO_MATCH, - msg="Is not password in connection query.", - ).send() - return None - return password - - username = None - password = None - - logger.debug("send HandshakePacket") - self.packet(HandshakePacket).send() - - handshake_resp = self.packet(HandshakeResponsePacket) - handshake_resp.get() - if handshake_resp.length == 0: - logger.debug("HandshakeResponsePacket empty") - self.packet(OkPacket).send() - return False - self.client_capabilities = ClentCapabilities(handshake_resp.capabilities.value) - - client_auth_plugin = handshake_resp.client_auth_plugin.value.decode() - - self.session.is_ssl = False - - if handshake_resp.type == "SSLRequest": - logger.debug("switch to SSL") - self.session.is_ssl = True - - ssl_context = ssl.SSLContext() - ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2 - ssl_context.load_cert_chain(self.server.cert_path) - ssl_socket = ssl_context.wrap_socket(self.socket, server_side=True, do_handshake_on_connect=True) - - self.socket = ssl_socket - handshake_resp = self.packet(HandshakeResponsePacket) - handshake_resp.get() - client_auth_plugin = handshake_resp.client_auth_plugin.value.decode() - - username = handshake_resp.username.value.decode() - - if client_auth_plugin != DEFAULT_AUTH_METHOD: - if client_auth_plugin == "mysql_native_password": - password = switch_auth("mysql_native_password") - else: - new_method = ( - "caching_sha2_password" - if client_auth_plugin == "caching_sha2_password" - else "mysql_native_password" - ) - - if new_method == "caching_sha2_password" and self.session.is_ssl is False: - logger.warning( - f"Check auth, user={username}, ssl={self.session.is_ssl}, auth_method={client_auth_plugin}: " - "error: cant switch to caching_sha2_password without SSL" - ) - self.packet( - ErrPacket, - err_code=ERR.ER_PASSWORD_NO_MATCH, - msg="caching_sha2_password without SSL not supported", - ).send() - return False - - logger.debug( - f"Check auth, user={username}, ssl={self.session.is_ssl}, auth_method={client_auth_plugin}: " - f"switch auth method to {new_method}" - ) - password = switch_auth(new_method) - - if new_method == "caching_sha2_password": - if password == b"\x00": - password = "" - else: - password = get_fast_auth_password() - elif "caching_sha2_password" in client_auth_plugin: - logger.debug( - f"Check auth, user={username}, ssl={self.session.is_ssl}, auth_method={client_auth_plugin}: " - "check auth using caching_sha2_password" - ) - password = handshake_resp.enc_password.value - if password == b"\x00": - password = "" - else: - # FIXME https://github.com/mindsdb/mindsdb/issues/1374 - # if self.session.is_ssl: - # password = get_fast_auth_password() - # else: - password = switch_auth() - elif "mysql_native_password" in client_auth_plugin: - logger.debug( - f"Check auth, user={username}, ssl={self.session.is_ssl}, auth_method={client_auth_plugin}: " - "check auth using mysql_native_password" - ) - password = handshake_resp.enc_password.value - else: - logger.debug( - f"Check auth, user={username}, ssl={self.session.is_ssl}, auth_method={client_auth_plugin}: " - "unknown method, possible ERROR. Try to switch to mysql_native_password" - ) - password = switch_auth("mysql_native_password") - - try: - self.session.database = handshake_resp.database.value.decode() - except Exception: - self.session.database = None - logger.debug( - f"Check auth, user={username}, ssl={self.session.is_ssl}, auth_method={client_auth_plugin}: " - f"connecting to database {self.session.database}" - ) - - auth_data = self.server.check_auth(username, password, scramble_func, self.salt, ctx.company_id, ctx.user_id) - if auth_data["success"]: - self.session.username = auth_data["username"] - self.session.auth = True - self.packet(OkPacket).send() - return True - else: - self.packet( - ErrPacket, - err_code=ERR.ER_PASSWORD_NO_MATCH, - msg=f"Access denied for user {username}", - ).send() - logger.warning(f"Access denied for user {username}") - return False - - def send_package_group(self, packages): - string = b"".join([x.accum() for x in packages]) - self.socket.sendall(string) - - def answer_stmt_close(self, stmt_id): - self.session.unregister_stmt(stmt_id) - - def send_query_answer(self, answer: SQLAnswer): - if answer.type in (RESPONSE_TYPE.TABLE, RESPONSE_TYPE.COLUMNS_TABLE): - packages = [] - - if len(answer.result_set) >= 1000: - # for big responses leverage pandas map function to convert data to packages - self.send_table_packets(result_set=answer.result_set) - else: - packages += self.get_table_packets(result_set=answer.result_set) - - if answer.status is not None: - packages.append(self.last_packet(status=answer.status)) - else: - packages.append(self.last_packet()) - self.send_package_group(packages) - elif answer.type == RESPONSE_TYPE.OK: - self.packet(OkPacket, state_track=answer.state_track, affected_rows=answer.affected_rows).send() - elif answer.type == RESPONSE_TYPE.ERROR: - self.packet(ErrPacket, err_code=answer.error_code, msg=answer.error_message).send() - elif answer.type == RESPONSE_TYPE.EOF: - self.packet(EofPacket).send() - - def _get_column_defenition_packets(self, columns: dict, data=None): - if data is None: - data = [] - packets = [] - for i, column in enumerate(columns): - logger.debug( - "%s._get_column_defenition_packets: handling column - %s of %s type", - self.__class__.__name__, - column, - type(column), - ) - table_name = column.get("table_name", "table_name") - column_name = column.get("name", "column_name") - column_alias = column.get("alias", column_name) - flags = column.get("flags", 0) - if isinstance(flags, list): - flags = sum(flags) - if column.get("size") is None: - length = 1 - for row in data: - if isinstance(row, dict): - length = max(len(str(row[column_alias])), length) - else: - length = max(len(str(row[i])), length) - column["size"] = 1 - - packets.append( - self.packet( - ColumnDefenitionPacket, - schema=column.get("database", "mindsdb_schema"), - table_alias=column.get("table_alias", table_name), - table_name=table_name, - column_alias=column_alias, - column_name=column_name, - column_type=column["type"], - charset=column.get("charset", CHARSET_NUMBERS["utf8_unicode_ci"]), - max_length=column["size"], - flags=flags, - ) - ) - return packets - - def get_table_packets(self, result_set: ResultSet, status=0): - data_frame, columns_dict = dump_result_set_to_mysql(result_set) - data = data_frame.to_dict("split")["data"] - - # TODO remove columns order - packets = [self.packet(ColumnCountPacket, count=len(columns_dict))] - packets.extend(self._get_column_defenition_packets(columns_dict, data)) - - if self.client_capabilities.DEPRECATE_EOF is False: - packets.append(self.packet(EofPacket, status=status)) - - packets += [self.packet(ResultsetRowPacket, data=x) for x in data] - return packets - - def send_table_packets(self, result_set: ResultSet, status: int = 0): - """Send table packets to client, piece by piece - - Args: - result_set (ResultSet): the result set to send - status (int): the status to send - - Returns: - None - """ - columns_dicts = dump_columns_info(result_set, infer_column_size=True) - - packets = [self.packet(ColumnCountPacket, count=len(columns_dicts))] - packets.extend(self._get_column_defenition_packets(columns_dicts)) - - if self.client_capabilities.DEPRECATE_EOF is False: - packets.append(self.packet(EofPacket, status=status)) - self.send_package_group(packets) - - chunk_size = 1000 - df = result_set.get_raw_df() - if len(df) > 0: - for chunk in dump_chunks(df, columns_dicts, chunk_size): - for i in range(len(chunk)): - chunk[i] = self.packet(body=chunk[i], length=len(chunk[i])).accum() - self.socket.sendall(b"".join(chunk)) - - def decode_utf(self, text): - try: - return text.decode("utf-8") - except Exception: - raise WrongCharsetError(f"SQL contains non utf-8 values: {text}") - - def is_cloud_connection(self): - """Determine source of connection. Must be call before handshake. - Idea based on: real mysql connection does not send anything before server handshake, so - soket should be in 'out' state. In opposite, clout connection sends '0000' right after - connection. '0000' selected because in real mysql connection it should be lenght of package, - and it can not be 0. - """ - is_cloud = config.get("cloud", False) - - if sys.platform != "linux" or is_cloud is False: - return {"is_cloud": False} - - read_poller = select.poll() - read_poller.register(self.request, select.POLLIN) - events = read_poller.poll(30) - - if len(events) == 0: - return {"is_cloud": False} - - first_byte = self.request.recv(4, socket.MSG_PEEK) - if first_byte == b"\x00\x00\x00\x00": - self.request.recv(4) - client_capabilities = self.request.recv(8) - client_capabilities = struct.unpack("L", client_capabilities)[0] - - size_str = "16" # 16 bytes of null-terminated string - company_id = self.request.recv(size_str) - company_id = company_id.decode().strip("\x00") - if not company_id: - company_id = DEFAULT_COMPANY_ID - - user_id = self.request.recv(size_str) - user_id = user_id.decode().strip("\x00") - if not user_id: - user_id = DEFAULT_USER_ID - - user_class = self.request.recv(1) - user_class = struct.unpack("B", user_class)[0] - user_class = user_class & 3 - - database_name_len = self.request.recv(2) - database_name_len = struct.unpack("H", database_name_len)[0] - - database_name = "" - if database_name_len > 0: - database_name = self.request.recv(database_name_len).decode() - - return { - "is_cloud": True, - "client_capabilities": client_capabilities, - "company_id": company_id, - "user_id": user_id, - "user_class": user_class, - "database": database_name, - } - - return {"is_cloud": False} - - def to_mysql_columns(self, columns_list: list[Column]) -> list[dict[str, str | int]]: - database_name = None if self.session.database == "" else self.session.database.lower() - return [column_to_mysql_column_dict(column, database_name=database_name) for column in columns_list] - - @profiler.profile() - def process_query(self, sql: str) -> SQLAnswer: - log.log_ram_info(logger) - executor = Executor(session=self.session, sqlserver=self) - executor.query_execute(sql) - executor_answer = executor.executor_answer - - if executor_answer.data is None: - resp = SQLAnswer( - resp_type=RESPONSE_TYPE.OK, - state_track=executor_answer.state_track, - affected_rows=executor_answer.affected_rows, - ) - else: - resp = SQLAnswer( - resp_type=RESPONSE_TYPE.TABLE, - state_track=executor_answer.state_track, - result_set=executor_answer.data, - status=executor.server_status, - affected_rows=executor_answer.affected_rows, - mysql_types=executor_answer.data.mysql_types, - ) - - # Increment the counter and include metadata in attributes - increment_otel_query_request_counter(ctx.get_metadata(query=sql)) - - return resp - - def answer_stmt_prepare(self, sql): - executor = Executor(session=self.session, sqlserver=self) - stmt_id = self.session.register_stmt(executor) - - executor.stmt_prepare(sql) - - packages = [ - self.packet( - STMTPrepareHeaderPacket, - stmt_id=stmt_id, - num_columns=len(executor.columns), - num_params=len(executor.params), - ) - ] - - if len(executor.params) > 0: - parameters_def = self.to_mysql_columns(executor.params) - packages.extend(self._get_column_defenition_packets(parameters_def)) - if self.client_capabilities.DEPRECATE_EOF is False: - status = sum([SERVER_STATUS.SERVER_STATUS_AUTOCOMMIT]) - packages.append(self.packet(EofPacket, status=status)) - - if len(executor.columns) > 0: - columns_def = self.to_mysql_columns(executor.columns) - packages.extend(self._get_column_defenition_packets(columns_def)) - - if self.client_capabilities.DEPRECATE_EOF is False: - status = sum([SERVER_STATUS.SERVER_STATUS_AUTOCOMMIT]) - packages.append(self.packet(EofPacket, status=status)) - - self.send_package_group(packages) - - def answer_stmt_execute(self, stmt_id, parameters): - prepared_stmt = self.session.prepared_stmts[stmt_id] - executor: Executor = prepared_stmt["statement"] - - executor.stmt_execute(parameters) - - executor_answer: ExecuteAnswer = executor.executor_answer - - if executor_answer.data is None: - resp = SQLAnswer(resp_type=RESPONSE_TYPE.OK, state_track=executor_answer.state_track) - return self.send_query_answer(resp) - - # TODO prepared_stmt['type'] == 'lock' is not used but it works - result_set = executor_answer.data - data_frame, columns_dict = dump_result_set_to_mysql(result_set) - data = data_frame.to_dict("split")["data"] - - packages = [self.packet(ColumnCountPacket, count=len(columns_dict))] - packages.extend(self._get_column_defenition_packets(columns_dict)) - - if self.client_capabilities.DEPRECATE_EOF is False: - packages.append(self.packet(EofPacket, status=0x0062)) - - # send all - for row in data: - packages.append(self.packet(BinaryResultsetRowPacket, data=row, columns=columns_dict)) - - server_status = executor.server_status or 0x0002 - packages.append(self.last_packet(status=server_status)) - prepared_stmt["fetched"] += len(data) - - return self.send_package_group(packages) - - def answer_stmt_fetch(self, stmt_id, limit): - prepared_stmt = self.session.prepared_stmts[stmt_id] - executor = prepared_stmt["statement"] - fetched = prepared_stmt["fetched"] - executor_answer: ExecuteAnswer = executor.executor_answer - - if executor_answer.data is None: - resp = SQLAnswer(resp_type=RESPONSE_TYPE.OK, state_track=executor_answer.state_track) - return self.send_query_answer(resp) - - packages = [] - columns = self.to_mysql_columns(executor_answer.data.columns) - for row in executor_answer.data[fetched:limit].to_lists(): - packages.append(self.packet(BinaryResultsetRowPacket, data=row, columns=columns)) - - prepared_stmt["fetched"] += len(executor_answer.data[fetched:limit]) - - if len(executor_answer.data) <= limit + fetched: - status = sum( - [ - SERVER_STATUS.SERVER_STATUS_AUTOCOMMIT, - SERVER_STATUS.SERVER_STATUS_LAST_ROW_SENT, - ] - ) - else: - status = sum( - [ - SERVER_STATUS.SERVER_STATUS_AUTOCOMMIT, - SERVER_STATUS.SERVER_STATUS_CURSOR_EXISTS, - ] - ) - - packages.append(self.last_packet(status=status)) - self.send_package_group(packages) - - def handle(self): - """ - Handle new incoming connections - :return: - """ - ctx.set_default() - - self.server.hook_before_handle() - - logger.debug("Handling new incoming connection.") - cloud_connection = self.is_cloud_connection() - - ctx.company_id = cloud_connection.get("company_id", DEFAULT_COMPANY_ID) - ctx.user_id = cloud_connection.get("user_id", DEFAULT_USER_ID) - logger.debug(f"Connection context: company_id: {ctx.company_id}, user_id: {ctx.user_id}.") - - self.init_session() - if cloud_connection["is_cloud"] is False: - if self.handshake() is False: - return - else: - ctx.user_class = cloud_connection["user_class"] - self.client_capabilities = ClentCapabilities(cloud_connection["client_capabilities"]) - self.session.database = cloud_connection["database"] - self.session.username = "cloud" - self.session.auth = True - - while True: - logger.debug("Got a new packet") - p = self.packet(CommandPacket) - - try: - success = p.get() - except Exception: - logger.exception("Session closed, on packet read error:") - return - - if success is False: - logger.debug("Session closed by client") - return - - logger.debug("Command TYPE: {type}".format(type=getConstName(COMMANDS, p.type.value))) - - sql = None - response = None - error_type = None - error_code = None - error_text = None - error_traceback = None - - try: - if p.type.value == COMMANDS.COM_QUERY: - sql = self.decode_utf(p.sql.value) - sql = clear_sql(sql) - logger.debug(f"Incoming query: {sql}") - profiler.set_meta(query=sql, api="mysql", environment=config.get("environment")) - with profiler.Context("mysql_query_processing"), mark_process("mysql_query"): - response = self.process_query(sql) - elif p.type.value == COMMANDS.COM_STMT_PREPARE: - sql = self.decode_utf(p.sql.value) - self.answer_stmt_prepare(sql) - elif p.type.value == COMMANDS.COM_STMT_EXECUTE: - self.answer_stmt_execute(p.stmt_id.value, p.parameters) - elif p.type.value == COMMANDS.COM_STMT_FETCH: - self.answer_stmt_fetch(p.stmt_id.value, p.limit.value) - elif p.type.value == COMMANDS.COM_STMT_CLOSE: - self.answer_stmt_close(p.stmt_id.value) - elif p.type.value == COMMANDS.COM_QUIT: - logger.debug("Session closed, on client disconnect") - self.session = None - break - elif p.type.value == COMMANDS.COM_INIT_DB: - new_database = p.database.value.decode() - - executor = Executor(session=self.session, sqlserver=self) - executor.change_default_db(new_database) - - response = SQLAnswer(RESPONSE_TYPE.OK) - elif p.type.value == COMMANDS.COM_FIELD_LIST: - # this command is deprecated, but console client still use it. - response = SQLAnswer(RESPONSE_TYPE.OK) - elif p.type.value == COMMANDS.COM_STMT_RESET: - response = SQLAnswer(RESPONSE_TYPE.OK) - elif p.type.value == COMMANDS.COM_PING: - response = SQLAnswer(RESPONSE_TYPE.OK) - elif p.type.value == COMMANDS.COM_CHANGE_USER: - # This package should trigger re-authentication. For now it is forbidden. - logger.warning("Got COM_CHANGE_USER packet that could not be processed, return error.") - response = SQLAnswer( - resp_type=RESPONSE_TYPE.ERROR, - error_code=None, - error_message="Packet COM_CHANGE_USER could not be processed", - ) - elif p.type.value == COMMANDS.COM_DEBUG: - response = SQLAnswer(resp_type=RESPONSE_TYPE.EOF) - elif p.type.value == COMMANDS.COM_SET_OPTION: - # While regular MySQL options have no effect on mindsdb, we can safely return Ok. - logger.warning("Unexpected packet COM_SET_OPTION recieved, return ok.") - response = SQLAnswer(RESPONSE_TYPE.OK) - elif p.type.value == COMMANDS.COM_SLEEP: - # error - is the only valid answer for the packet - response = SQLAnswer( - resp_type=RESPONSE_TYPE.ERROR, - error_code=None, - error_message="", - ) - elif p.type.value == COMMANDS.COM_PROCESS_KILL: - logger.warning("Unexpected packet COM_PROCESS_KILL recieved, return error.") - response = SQLAnswer( - resp_type=RESPONSE_TYPE.ERROR, - error_code=None, - error_message="Packet COM_PROCESS_KILL could not be processed", - ) - elif p.type.value == COMMANDS.COM_RESET_CONNECTION: - logger.warning("Unexpected packet COM_RESET_CONNECTION recieved, return error.") - response = SQLAnswer( - resp_type=RESPONSE_TYPE.ERROR, - error_code=None, - error_message="Packet COM_RESET_CONNECTION could not be processed", - ) - elif p.type.value == COMMANDS.COM_SHUTDOWN: - logger.warning("Unexpected packet COM_SHUTDOWN recieved, return error.") - response = SQLAnswer( - resp_type=RESPONSE_TYPE.ERROR, - error_code=None, - error_message="Packet COM_SHUTDOWN could not be processed", - ) - else: - logger.warning("Command has no specific handler, return OK msg") - logger.debug(str(p)) - response = SQLAnswer(RESPONSE_TYPE.OK) - - except (QueryError, executor_exceptions.ExecutorException, executor_exceptions.UnknownError) as e: - error_type = "expected" if e.is_expected else "unexpected" - error_code = e.mysql_error_code - if e.is_expected: - if logger.isEnabledFor(logging.DEBUG): - logger.info("Query execution failed with expected error:", exc_info=True) - else: - logger.info(f"Query execution failed with expected error: {e}") - else: - logger.exception("Query execution failed with error") - response = SQLAnswer( - resp_type=RESPONSE_TYPE.ERROR, - error_code=error_code, - error_message=str(e), - ) - - except Exception as e: - error_type = "unexpected" - error_traceback = traceback.format_exc() - logger.exception("ERROR while executing query:") - error_code = ERR.ER_SYNTAX_ERROR - response = SQLAnswer( - resp_type=RESPONSE_TYPE.ERROR, - error_code=error_code, - error_message=str(e), - ) - - if response is not None: - self.send_query_answer(response) - if response.type == RESPONSE_TYPE.ERROR: - error_text = response.error_message - error_code = response.error_code - error_type = error_type or "expected" - - hooks.after_api_query( - company_id=ctx.company_id, - user_id=ctx.user_id, - api="mysql", - command=getConstName(COMMANDS, p.type.value), - payload=sql, - error_type=error_type, - error_code=error_code, - error_text=error_text, - traceback=error_traceback, - ) - - def packet(self, packetClass=Packet, **kwargs): - """ - Factory method for packets - - :param packetClass: - :param kwargs: - :return: - """ - p = packetClass(socket=self.socket, session=self.session, proxy=self, **kwargs) - self.session.inc_packet_sequence_number() - return p - - def last_packet(self, status=0x0002): - if self.client_capabilities.DEPRECATE_EOF is True: - return self.packet(OkPacket, eof=True, status=status) - else: - return self.packet(EofPacket, status=status) - - def set_context(self, context): - if "db" in context: - self.session.database = context["db"] - else: - self.session.database = config.get("default_project") - - if "profiling" in context: - self.session.profiling = context["profiling"] - if "predictor_cache" in context: - self.session.predictor_cache = context["predictor_cache"] - if "show_secrets" in context: - self.session.show_secrets = context["show_secrets"] - - def get_context(self): - context = {"show_secrets": self.session.show_secrets} - if self.session.database is not None: - context["db"] = self.session.database - if self.session.profiling is True: - context["profiling"] = True - if self.session.predictor_cache is False: - context["predictor_cache"] = False - - return context - - @staticmethod - def startProxy(): - """ - Create a server and wait for incoming connections until Ctrl-C - """ - global logger - - cert_path = config["api"]["mysql"].get("certificate_path") - if cert_path is None or cert_path == "": - cert_path = tempfile.mkstemp(prefix="mindsdb_cert_", text=True)[1] - make_ssl_cert(cert_path) - atexit.register(lambda: os.remove(cert_path)) - elif not os.path.exists(cert_path): - logger.error("Certificate defined in 'certificate_path' setting does not exist") - - # TODO make it session local - server_capabilities.set(CAPABILITIES.CLIENT_SSL, config["api"]["mysql"]["ssl"]) - - host = config["api"]["mysql"]["host"] - port = int(config["api"]["mysql"]["port"]) - - logger.info(f"Starting MindsDB Mysql proxy server on tcp://{host}:{port}") - - SocketServer.TCPServer.allow_reuse_address = True - server = MysqlTCPServer((host, port), MysqlProxy) - server.mindsdb_config = config - server.check_auth = partial(check_auth, config=config) - server.cert_path = cert_path - server.connection_id = 0 - server.hook_before_handle = empty_fn - - atexit.register(MysqlProxy.server_close, srv=server) - - # Activate the server; this will keep running until you - # interrupt the program with Ctrl-C - logger.info("Waiting for incoming connections...") - server.serve_forever() diff --git a/mindsdb/api/mysql/mysql_proxy/utilities/__init__.py b/mindsdb/api/mysql/mysql_proxy/utilities/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/api/mysql/mysql_proxy/utilities/dump.py b/mindsdb/api/mysql/mysql_proxy/utilities/dump.py deleted file mode 100644 index 82fa0a5232f..00000000000 --- a/mindsdb/api/mysql/mysql_proxy/utilities/dump.py +++ /dev/null @@ -1,507 +0,0 @@ -import struct -import datetime -from typing import Any -from array import array - -import orjson -import numpy as np -from numpy import dtype as np_dtype -import pandas as pd -from pandas.api import types as pd_types - -from mindsdb.api.executor.sql_query.result_set import ResultSet, get_mysql_data_type_from_series -from mindsdb.utilities.types.column import Column -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ( - MYSQL_DATA_TYPE, - DATA_C_TYPE_MAP, - CTypeProperties, - CHARSET_NUMBERS, - NULL_VALUE, -) -from mindsdb.utilities import log -from mindsdb.utilities.json_encoder import CustomJSONEncoder -from mindsdb.api.mysql.mysql_proxy.data_types.mysql_datum import Datum - -logger = log.getLogger(__name__) - -# Pre-bind default encoder for custom types so we can serialize JSON consistently -_default_json = CustomJSONEncoder().default - - -def column_to_mysql_column_dict(column: Column, database_name: str | None = None) -> dict[str, str | int]: - """Convert Column object to dict with column properties. - - Args: - column (Column): Column object to convert. - database_name (str | None): Name of the database. - - Returns: - dict[str, str | int]: Dictionary with mysql column properties. - """ - # region infer type. Should not happen, but what if it is dtype? - if isinstance(column.type, str): - try: - column.type = MYSQL_DATA_TYPE(column.type.upper()) - except ValueError: - pass - elif isinstance(column.type, np_dtype): - if pd_types.is_integer_dtype(column.type): - column.type = MYSQL_DATA_TYPE.INT - elif pd_types.is_numeric_dtype(column.type): - column.type = MYSQL_DATA_TYPE.FLOAT - elif pd_types.is_datetime64_any_dtype(column.type): - column.type = MYSQL_DATA_TYPE.DATETIME - else: - column.type = MYSQL_DATA_TYPE.TEXT - # endregion - - if isinstance(column.type, MYSQL_DATA_TYPE) is False: - logger.warning(f"Unexpected column type: {column.type}. Use TEXT as fallback.") - column.type = MYSQL_DATA_TYPE.TEXT - - charset = CHARSET_NUMBERS["utf8_unicode_ci"] - if column.type in (MYSQL_DATA_TYPE.JSON, MYSQL_DATA_TYPE.VECTOR): - charset = CHARSET_NUMBERS["binary"] - - type_properties: CTypeProperties = DATA_C_TYPE_MAP[column.type] - - result = { - "database": column.database or database_name, - # TODO add 'original_table' - "table_name": column.table_name, - "name": column.name, - "alias": column.alias or column.name, - "size": type_properties.size, - "flags": type_properties.flags, - "type": type_properties.code, - "type_enum": column.type, - "charset": charset, - } - return result - - -def _dump_bool(var: Any) -> int | None: - """Dumps a boolean value to an integer, as in MySQL boolean type is tinyint with values 0 and 1. - NOTE: None consider as True in dataframe with dtype=bool, we can't change it - - Args: - var (Any): The boolean value to dump - - Returns: - int | None: 1 or 0 or None - """ - if pd.isna(var): - return None - return "1" if var else "0" - - -def _dump_str(var: Any) -> str | None: - """Dumps a value to a string. - - Args: - var (Any): The value to dump - - Returns: - str | None: The string representation of the value or None if the value is None - """ - if isinstance(var, bytes): - try: - return var.decode("utf-8") - except Exception: - return str(var)[2:-1] - if isinstance(var, (dict, list)): - try: - return orjson.dumps( - var, - default=_default_json, - option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME, - ).decode("utf-8") - except Exception: - return str(var) - # pd.isna returns array of bools for list - # and the truth value of a numpy array is ambiguous - if isinstance(var, (list, np.ndarray)) is False and pd.isna(var): - return None - return str(var) - - -def _dump_int_or_str(var: Any) -> str | None: - """Dumps a value to a string. - If the value is numeric - then cast it to int to avoid float representation. - - Args: - var (Any): The value to dump. - - Returns: - str | None: The string representation of the value or None if the value is None - """ - if pd.isna(var): - return None - try: - return str(int(var)) - except ValueError: - return str(var) - - -def _dump_date(var: datetime.date | str | None) -> str | None: - """Dumps a date value to a string. - - Args: - var (datetime.date | str | None): The date value to dump - - Returns: - str | None: The string representation of the date value or None if the value is None - """ - if isinstance(var, (datetime.date, pd.Timestamp)): # it is also True for datetime.datetime - return var.strftime("%Y-%m-%d") - elif isinstance(var, str): - return var - elif pd.isna(var): - return None - logger.warning(f"Unexpected value type for DATE: {type(var)}, {var}") - return _dump_str(var) - - -def _dump_datetime(var: datetime.datetime | str | None) -> str | None: - """Dumps a datetime value to a string. - # NOTE mysql may display only %Y-%m-%d %H:%M:%S format for datetime column - - Args: - var (datetime.datetime | str | None): The datetime value to dump - - Returns: - str | None: The string representation of the datetime value or None if the value is None - """ - if isinstance(var, datetime.date): # it is also datetime.datetime - if hasattr(var, "tzinfo") and var.tzinfo is not None: - return var.astimezone(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S") - return var.strftime("%Y-%m-%d %H:%M:%S") - elif isinstance(var, pd.Timestamp): - if var.tzinfo is not None: - return var.tz_convert("UTC").strftime("%Y-%m-%d %H:%M:%S") - return var.strftime("%Y-%m-%d %H:%M:%S") - elif isinstance(var, str): - return var - elif pd.isna(var): - return None - logger.warning(f"Unexpected value type for DATETIME: {type(var)}, {var}") - return _dump_str(var) - - -def _dump_time(var: datetime.time | str | None) -> str | None: - """Dumps a time value to a string. - - Args: - var (datetime.time | str | None): The time value to dump - - Returns: - str | None: The string representation of the time value or None if the value is None - """ - if isinstance(var, datetime.time): - if var.tzinfo is not None: - # NOTE strftime does not support timezone, so we need to convert to UTC - offset_seconds = var.tzinfo.utcoffset(None).total_seconds() - time_seconds = var.hour * 3600 + var.minute * 60 + var.second - utc_seconds = (time_seconds - offset_seconds) % (24 * 3600) - hours = int(utc_seconds // 3600) - minutes = int((utc_seconds % 3600) // 60) - seconds = int(utc_seconds % 60) - var = datetime.time(hours, minutes, seconds, var.microsecond) - return var.strftime("%H:%M:%S") - elif isinstance(var, datetime.datetime): - if var.tzinfo is not None: - return var.astimezone(datetime.timezone.utc).strftime("%H:%M:%S") - return var.strftime("%H:%M:%S") - elif isinstance(var, pd.Timestamp): - if var.tzinfo is not None: - return var.tz_convert("UTC").strftime("%H:%M:%S") - return var.strftime("%H:%M:%S") - elif isinstance(var, str): - return var - elif pd.isna(var): - return None - logger.warning(f"Unexpected value type for TIME: {type(var)}, {var}") - return _dump_str(var) - - -def _dump_vector(value: Any) -> bytes | None: - """Convert array or list of floats to a bytes. - - Args: - value (Any): The value to dump - - Returns: - bytes | None: The bytes representation of the vector value or None if the value is None - """ - if isinstance(value, (array, list, np.ndarray)): - return b"".join([struct.pack(" pd.Series: - """Convert values in a series to a string representation of a date. - NOTE: MySQL require exactly %Y-%m-%d for DATE type. - - Args: - series (pd.Series): The series to handle - - Returns: - pd.Series: The series with the date values as strings - """ - if pd_types.is_datetime64_any_dtype(series.dtype): - return series.dt.strftime("%Y-%m-%d") - elif pd_types.is_object_dtype(series.dtype): - return series.apply(_dump_date) - logger.info(f"Unexpected dtype: {series.dtype} for column with type DATE") - return series.apply(_dump_str) - - -def _handle_series_as_datetime(series: pd.Series) -> pd.Series: - """Convert values in a series to a string representation of a datetime. - NOTE: MySQL's DATETIME type require exactly %Y-%m-%d %H:%M:%S format. - - Args: - series (pd.Series): The series to handle - - Returns: - pd.Series: The series with the datetime values as strings - """ - if pd_types.is_datetime64_any_dtype(series.dtype): - return series.dt.strftime("%Y-%m-%d %H:%M:%S") - elif pd_types.is_object_dtype(series.dtype): - return series.apply(_dump_datetime) - logger.info(f"Unexpected dtype: {series.dtype} for column with type DATETIME") - return series.apply(_dump_str) - - -def _handle_series_as_time(series: pd.Series) -> pd.Series: - """Convert values in a series to a string representation of a time. - NOTE: MySQL's TIME type require exactly %H:%M:%S format. - - Args: - series (pd.Series): The series to handle - - Returns: - pd.Series: The series with the time values as strings - """ - if pd_types.is_timedelta64_ns_dtype(series.dtype): - base_time = pd.Timestamp("2000-01-01") - series = (base_time + series).dt.strftime("%H:%M:%S") - elif pd_types.is_datetime64_dtype(series.dtype): - series = series.dt.strftime("%H:%M:%S") - elif pd_types.is_object_dtype(series.dtype): - series = series.apply(_dump_time) - else: - logger.info(f"Unexpected dtype: {series.dtype} for column with type TIME") - series = series.apply(_dump_str) - return series - - -def _handle_series_as_int(series: pd.Series) -> pd.Series: - """Dump series to str(int) (or just str, of can't case to int). This need because of DataFrame store imput int as - float if dtype is object: pd.DataFrame([None, 1], dtype='object') -> [NaN, 1.0] - - Args: - series (pd.Series): The series to handle - - Returns: - pd.Series: The series with the int values as strings - """ - if pd_types.is_integer_dtype(series.dtype): - if series.dtype == "Int64": - # NOTE: 'apply' converts values to python floats - return series.astype(object).apply(_dump_str) - return series.apply(_dump_str) - return series.apply(_dump_int_or_str) - - -def _handle_series_as_vector(series: pd.Series) -> pd.Series: - """Convert values in a series to a bytes representation of a vector. - NOTE: MySQL's VECTOR type require exactly 4 bytes per float. - - Args: - series (pd.Series): The series to handle - - Returns: - pd.Series: The series with the vector values as bytes - """ - return series.apply(_dump_vector) - - -def dump_result_set_to_mysql( - result_set: ResultSet, infer_column_size: bool = False -) -> tuple[pd.DataFrame, list[dict[str, str | int]]]: - """ - Dumps the ResultSet to a format that can be used to send as MySQL response packet. - NOTE: This method modifies the original DataFrame and columns. - - Args: - result_set (ResultSet): result set to dump - infer_column_size (bool): If True, infer the 'size' attribute of the column from the data. - Exact size is not necessary, approximate is enough. - - Returns: - tuple[pd.DataFrame, list[dict[str, str | int]]]: A tuple containing the modified DataFrame and a list - of MySQL column dictionaries. The dataframe values are - str or None, dtype=object - """ - df = result_set.get_raw_df() - - for i, column in enumerate(result_set.columns): - series = df[i] - if isinstance(column.type, MYSQL_DATA_TYPE) is False: - column.type = get_mysql_data_type_from_series(series) - - column_type: MYSQL_DATA_TYPE = column.type - - match column_type: - case MYSQL_DATA_TYPE.BOOL | MYSQL_DATA_TYPE.BOOLEAN: - series = series.apply(_dump_bool) - case MYSQL_DATA_TYPE.DATE: - series = _handle_series_as_date(series) - case MYSQL_DATA_TYPE.DATETIME: - series = _handle_series_as_datetime(series) - case MYSQL_DATA_TYPE.TIME: - series = _handle_series_as_time(series) - case ( - MYSQL_DATA_TYPE.INT - | MYSQL_DATA_TYPE.TINYINT - | MYSQL_DATA_TYPE.SMALLINT - | MYSQL_DATA_TYPE.MEDIUMINT - | MYSQL_DATA_TYPE.BIGINT - | MYSQL_DATA_TYPE.YEAR - ): - series = _handle_series_as_int(series) - case MYSQL_DATA_TYPE.VECTOR: - series = _handle_series_as_vector(series) - case _: - series = series.apply(_dump_str) - - # inplace modification of dt types raise SettingWithCopyWarning, so do regular replace - # we may split this operation for dt and other types for optimisation - df[i] = series.replace([np.nan, pd.NA, pd.NaT], None) - - columns_dicts = [column_to_mysql_column_dict(column) for column in result_set.columns] - - if infer_column_size and any(column_info.get("size") is None for column_info in columns_dicts): - if len(df) == 0: - for column_info in columns_dicts: - if column_info["size"] is None: - column_info["size"] = 1 - else: - sample = df.head(100) - for i, column_info in enumerate(columns_dicts): - try: - column_info["size"] = sample[sample.columns[i]].astype(str).str.len().max() - except Exception: - column_info["size"] = 1 - - return df, columns_dicts - - -def dump_columns_info(result_set: ResultSet, infer_column_size: bool = False) -> list[dict[str, str | int]]: - """Preare list of columns attrs that are required for dump to mysql protocol - - Args: - result_set (ResultSet): result set - infer_column_size (bool): If True, infer the 'size' attribute of the column from the data. - Exact size is not necessary, approximate is enough. - - Returns: - list[dict[str, str | int]]: list of MySQL column dictionaries. - """ - df = result_set.get_raw_df() - - for i, column in enumerate(result_set.columns): - series = df[i] - if isinstance(column.type, MYSQL_DATA_TYPE) is False: - column.type = get_mysql_data_type_from_series(series) - - columns_dicts = [column_to_mysql_column_dict(column) for column in result_set.columns] - - if infer_column_size and any(column_info.get("size") is None for column_info in columns_dicts): - if len(df) == 0: - for column_info in columns_dicts: - if column_info["size"] is None: - column_info["size"] = 1 - else: - sample = df.head(100) - for i, column_info in enumerate(columns_dicts): - try: - column_info["size"] = sample[sample.columns[i]].astype(str).str.len().max() - except Exception: - column_info["size"] = 1 - - return columns_dicts - - -def serialize_bytes(data: bytes) -> bytes: - """serialize bytes to mysql protocol - - Args: - data (bytes): the bytes to serialize - - Returns: - bytes: the serialized bytes - """ - if data == NULL_VALUE: - return data - else: - return Datum.serialize_bytes(data) - - -def dump_chunks(df: pd.DataFrame, columns_info: list[dict], chunk_size: int): - """Serialize dataframe values to mysql TEXT protocol - - Args: - df (pd.DataFrame): the dataframe to serialize - columns_info (list[dict]): the columns info - chunk_size (int): the chunk size - - Yields: - list[bytes]: the serialized dataframe values - """ - start = 0 - while start < len(df): - serieces = [] - for i, column in enumerate(columns_info): - series = df[i][start : start + chunk_size] - match column["type_enum"]: - case MYSQL_DATA_TYPE.BOOL | MYSQL_DATA_TYPE.BOOLEAN: - series = series.apply(_dump_bool) - case MYSQL_DATA_TYPE.DATE: - series = _handle_series_as_date(series) - case MYSQL_DATA_TYPE.DATETIME: - series = _handle_series_as_datetime(series) - case MYSQL_DATA_TYPE.TIME: - series = _handle_series_as_time(series) - case MYSQL_DATA_TYPE.VECTOR: - series = _handle_series_as_vector(series) - case ( - MYSQL_DATA_TYPE.INT - | MYSQL_DATA_TYPE.TINYINT - | MYSQL_DATA_TYPE.SMALLINT - | MYSQL_DATA_TYPE.MEDIUMINT - | MYSQL_DATA_TYPE.BIGINT - | MYSQL_DATA_TYPE.YEAR - | MYSQL_DATA_TYPE.FLOAT - | MYSQL_DATA_TYPE.DOUBLE - | MYSQL_DATA_TYPE.DECIMAL - | MYSQL_DATA_TYPE.TIMESTAMP - ): - pass - case MYSQL_DATA_TYPE.TEXT: - # NOTE: it would be good do nothing for TEXT column as for INT. - # However, while we use TEXT as a fallback for undetected types, we need to handle it carefully. - series = series.apply(_dump_str) - case _: - series = series.apply(_dump_str) - serieces.append(series.astype(bytes).mask(series.isnull(), NULL_VALUE).apply(serialize_bytes)) - - yield pd.concat(serieces, axis=1).sum(axis=1).tolist() - - start += chunk_size diff --git a/mindsdb/api/mysql/start.py b/mindsdb/api/mysql/start.py deleted file mode 100644 index 60c9e8b4ba1..00000000000 --- a/mindsdb/api/mysql/start.py +++ /dev/null @@ -1,13 +0,0 @@ -import mindsdb.interfaces.storage.db as db -from mindsdb.api.mysql.mysql_proxy.mysql_proxy import MysqlProxy -from mindsdb.utilities import log -from mindsdb.utilities.functions import init_lexer_parsers - - -def start(verbose=False): - logger = log.getLogger(__name__) - logger.info("MySQL API is starting..") - db.init() - init_lexer_parsers() - - MysqlProxy.startProxy() diff --git a/mindsdb/integrations/LICENSE b/mindsdb/integrations/LICENSE deleted file mode 100644 index af73a205f1d..00000000000 --- a/mindsdb/integrations/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -## MIT License - -Copyright (c) 2019 MindsDB, Inc. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/mindsdb/integrations/README.md b/mindsdb/integrations/README.md deleted file mode 100644 index 495f079db32..00000000000 --- a/mindsdb/integrations/README.md +++ /dev/null @@ -1,31 +0,0 @@ -MindsDB integrations are broadly categorized into two types: - -1. Datasources - -Datasources in MindsDB refer to the different data storage and management systems that you can connect with MindsDB. These include traditional databases as well as data accessible through APIs. There are few different types of Datasources: - -* [Databases](https://docs.mindsdb.com/integrations/data-integrations/all-data-integrations) -* [Applications](https://docs.mindsdb.com/integrations/app-integrations/binance) -* [Vector Databases](https://docs.mindsdb.com/integrations/vector-db-integrations/chromadb) - -2. AI-Engines - -[AI-Engines](https://docs.mindsdb.com/ai-engines/overview) in MindsDB are the core of our AI and ML capabilities. This category encompasses a diverse range of artificial intelligence and machine learning modeling options, including: - - * Generative AI: Unlock the potential of generative algorithms for innovative solutions. - * Automated Machine Learning (Auto-ML): Simplify complex ML processes with automation, making AI more accessible. - - -## Directory Overview - -* `/handlers`: Contains code for each integration, organized by handler names. -* `/utilities`: Utilities for tasks like parsing dates, filtering SQL, and managing dependencies. -* `/libs`: Libraries used across various handlers. - -## Contributing - -If you're interested in contributing a new integration, please refer to our detailed `How To` guidelines: - -* [Building a Database Handler](https://docs.mindsdb.com/contribute/data-handlers) -* [Building a Machine Learning Handler](https://docs.mindsdb.com/contribute/ml-handlers) -* [Building an Application Handler](https://docs.mindsdb.com/contribute/app-handlers) \ No newline at end of file diff --git a/mindsdb/integrations/__init__.py b/mindsdb/integrations/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/__init__.py b/mindsdb/integrations/handlers/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/anthropic_handler/README.md b/mindsdb/integrations/handlers/anthropic_handler/README.md deleted file mode 100644 index 6e65f85559d..00000000000 --- a/mindsdb/integrations/handlers/anthropic_handler/README.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: Anthropic -sidebarTitle: Anthropic ---- - -This documentation describes the integration of MindsDB with [Anthropic](https://www.anthropic.com/), an AI research company. -The integration allows for the deployment of Anthropic models within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Anthropic within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain the Anthropic API key required to deploy and use Anthropic models within MindsDB. Follow the [instructions for obtaining the API key](https://docs.anthropic.com/claude/docs/getting-access-to-claude). - -## Setup - -Create an AI engine from the [Anthropic handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/anthropic_handler). - -```sql -CREATE ML_ENGINE anthropic_engine -FROM anthropic -USING - anthropic_api_key = 'your-anthropic-api-key'; -``` - -Create a model using `anthropic_engine` as an engine. - -```sql -CREATE MODEL anthropic_model -PREDICT target_column -USING - engine = 'anthropic_engine', -- engine name as created via CREATE ML_ENGINE - column = 'column_name', -- column that stores input/question to the model - max_tokens = , -- max number of tokens to be generated by the model (default is 100) - model = 'model_name'; -- choose one of 'claude-instant-1.2', 'claude-2.1', 'claude-3-opus-20240229', 'claude-3-sonnet-20240229' (default is 'claude-2.1') -``` - - - -The integrations between Anthropic and MindsDB was implemented using [Anthropic Python SDK](https://github.com/anthropics/anthropic-sdk-python). - - -## Usage - -The following usage examples utilize `anthropic_engine` to create a model with the `CREATE MODEL` statement. - -Create and deploy the Anthropic model within MindsDB to ask any question. - -```sql -CREATE MODEL anthropic_model -PREDICT answer -USING - column = 'question', - engine = 'anthropic_engine', - max_tokens = 300, - model = 'claude-2.1'; -- choose one of 'claude-instant-1.2', 'claude-2.1', 'claude-3-opus-20240229', 'claude-3-sonnet-20240229' -``` - -Where: - -| Name | Description | -|-------------------|---------------------------------------------------------------------------| -| `column` | It defines the prompt to the model. | -| `engine` | It defines the Anthropic engine. | -| `max_tokens` | It defines the maximum number of tokens to generate before stopping. | -| `model` | It defines model that will complete your prompt. | - - - -**Default Model** - -When you create an Anthropic model in MindsDB, it uses the `claude-2.1` model by default. But you can use other available models by passing the model name to the `model` parameter in the `USING` clause of the `CREATE MODEL` statement. - - - - -**Default Max Tokens** - -When you create an Anthropic model in MindsDB, it uses 100 tokens as the maximum by default. But you can adjust this value by passing it to the `max_tokens` parameter in the `USING` clause of the `CREATE MODEL` statement. - - -Query the model to get predictions. - -```sql -SELECT question, answer -FROM anthropic_model -WHERE question = 'Where is Stockholm located?'; -``` - -Here is the output: - -```sql -+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -| question | answer | -+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -| Where is Stockholm located? | Stockholm is the capital and largest city of Sweden. It is located on Sweden's south-central east coast, where Lake Mälaren meets the Baltic Sea. | -+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ -``` - - - -**Next Steps** - -Go to the [Use Cases](https://docs.mindsdb.com/use-cases/overview) section to see more examples. - diff --git a/mindsdb/integrations/handlers/anthropic_handler/__about__.py b/mindsdb/integrations/handlers/anthropic_handler/__about__.py deleted file mode 100644 index 57ad1e1346d..00000000000 --- a/mindsdb/integrations/handlers/anthropic_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Anthropic handler' -__package_name__ = 'mindsdb_anthropic_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Anthropic" -__author__ = 'Balaji Seetharaman ' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/anthropic_handler/__init__.py b/mindsdb/integrations/handlers/anthropic_handler/__init__.py deleted file mode 100644 index c49e22ffb50..00000000000 --- a/mindsdb/integrations/handlers/anthropic_handler/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -from .__about__ import __version__ as version -from .__about__ import __description__ as description -from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -try: - from .anthropic_handler import AnthropicHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Anthropic" -name = "anthropic" -type = HANDLER_TYPE.ML -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB -permanent = False -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path", "support_level"] diff --git a/mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py b/mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py deleted file mode 100644 index 22f0c9a16df..00000000000 --- a/mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +++ /dev/null @@ -1,106 +0,0 @@ -from typing import Dict, Optional - -import pandas as pd -from anthropic import Anthropic - -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.utilities import log - -from mindsdb.integrations.utilities.handler_utils import get_api_key - -logger = log.getLogger(__name__) - - -class AnthropicHandler(BaseMLEngine): - """ - Integration with the Anthropic LLM Python Library - """ - - name = "anthropic" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.default_chat_model = "claude-2.1" - self.supported_chat_models = ["claude-instant-1.2", "claude-2.1", "claude-3-opus-20240229", "claude-3-sonnet-20240229"] - self.default_max_tokens = 100 - self.generative = True - self.connection = None - - def create( - self, - target: str, - df: Optional[pd.DataFrame] = None, - args: Optional[Dict] = None, - ) -> None: - - if "using" not in args: - raise Exception( - "Anthropic engine requires a USING clause! Refer to its documentation for more details." - ) - - if "model" not in args["using"]: - args["using"]["model"] = self.default_chat_model - elif args["using"]["model"] not in self.supported_chat_models: - raise Exception( - f"Invalid chat model. Please use one of {self.supported_chat_models}" - ) - - if "max_tokens" not in args["using"]: - args["using"]["max_tokens"] = self.default_max_tokens - - self.model_storage.json_set("args", args) - - def predict( - self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None - ) -> None: - - args = self.model_storage.json_get("args") - api_key = get_api_key('anthropic', args["using"], self.engine_storage, strict=False) - - self.connection = Anthropic( - api_key=api_key, - ) - - input_column = args["using"]["column"] - - if input_column not in df.columns: - raise RuntimeError(f'Column "{input_column}" not found in input data') - - result_df = pd.DataFrame() - - result_df["predictions"] = df[input_column].apply(self.predict_answer) - - result_df = result_df.rename(columns={"predictions": args["target"]}) - - return result_df - - def predict_answer(self, text): - """ - connects with anthropic messages api to predict the answer for the particular question - - """ - - args = self.model_storage.json_get("args") - - message = self.connection.messages.create( - model=args["using"]["model"], - max_tokens=args["using"]["max_tokens"], - messages=[ - {"role": "user", "content": text} - ] - ) - - content_blocks = message.content - - # assuming that message.content contains one ContentBlock item - # returning text value if type==text and content_blocks value if type!=text - if isinstance(content_blocks, list) and len(content_blocks) > 0: - content_block = content_blocks[0] - if content_block.type == 'text': - return content_block.text - else: - return content_blocks - else: - raise Exception( - f"Invalid output: {content_blocks}" - ) diff --git a/mindsdb/integrations/handlers/anthropic_handler/icon.svg b/mindsdb/integrations/handlers/anthropic_handler/icon.svg deleted file mode 100644 index f3e653f3bff..00000000000 --- a/mindsdb/integrations/handlers/anthropic_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/anthropic_handler/requirements.txt b/mindsdb/integrations/handlers/anthropic_handler/requirements.txt deleted file mode 100644 index a4dfccbf26e..00000000000 --- a/mindsdb/integrations/handlers/anthropic_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -anthropic==0.18.1 diff --git a/mindsdb/integrations/handlers/anthropic_handler/tests/test_anthropic.py b/mindsdb/integrations/handlers/anthropic_handler/tests/test_anthropic.py deleted file mode 100644 index 1594b79ee71..00000000000 --- a/mindsdb/integrations/handlers/anthropic_handler/tests/test_anthropic.py +++ /dev/null @@ -1,109 +0,0 @@ -import os -import pytest -import pandas as pd -from unittest.mock import patch - -from .base_ml_test import BaseMLAPITest - - -@pytest.mark.skipif(os.environ.get("ANTHROPIC_API_KEY") is None, reason="Missing API key!") -class TestAnthropic(BaseMLAPITest): - """Test Class for Anthropic Integration Testing""" - - def setup_method(self): - """Setup test environment, creating a project""" - super().setup_method() - self.run_sql("create database proj") - self.run_sql( - f""" - CREATE ML_ENGINE anthropic - FROM anthropic - USING - anthropic_api_key = '{self.get_api_key("ANTHROPIC_API_KEY")}'; - """ - ) - - def test_invalid_model_parameter(self): - """Test for invalid Anthropic model parameter""" - self.run_sql( - f""" - CREATE MODEL proj.test_anthropic_invalid_model - PREDICT answer - USING - engine='anthropic', - column='question', - model='this-claude-does-not-exist', - api_key='{self.get_api_key("ANTHROPIC_API_KEY")}'; - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_anthropic_invalid_model") - - def test_unknown_model_argument(self): - """Test for unknown argument when creating a Anthropic model""" - self.run_sql( - f""" - CREATE MODEL proj.test_anthropic_unknown_argument - PREDICT answer - USING - engine='anthropic', - column='question', - api_key='{self.get_api_key("ANTHROPIC_API_KEY")}', - evidently_wrong_argument='wrong value'; - """ - ) - with pytest.raises(Exception): - self.wait_predictor("proj", "test_anthropic_unknown_argument") - - def test_single_qa(self): - """Test for single question/answer pair""" - self.run_sql( - f""" - CREATE MODEL proj.test_anthropic_single_qa - PREDICT answer - USING - engine='anthropic', - column='question', - api_key='{self.get_api_key("ANTHROPIC_API_KEY")}'; - """ - ) - self.wait_predictor("proj", "test_anthropic_single_qa") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_anthropic_single_qa - WHERE question = 'What is the capital of Sweden?'; - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_bulk_qa(self, mock_handler): - """Test for bulk question/answer pairs""" - df = pd.DataFrame.from_dict( - {"question": ["What is the capital of Sweden?", "What is the second planet of the solar system?"]} - ) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - f""" - CREATE MODEL proj.test_anthropic_bulk_qa - PREDICT answer - USING - engine='anthropic', - column='question', - api_key='{self.get_api_key("ANTHROPIC_API_KEY")}'; - """ - ) - self.wait_predictor("proj", "test_anthropic_bulk_qa") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_anthropic_bulk_qa as p; - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - assert "venus" in result_df["answer"].iloc[1].lower() diff --git a/mindsdb/integrations/handlers/bedrock_handler/README.md b/mindsdb/integrations/handlers/bedrock_handler/README.md deleted file mode 100644 index ef8383bc49a..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/README.md +++ /dev/null @@ -1,150 +0,0 @@ ---- -title: Amazon Bedrock -sidebarTitle: Amazon Bedrock ---- - -This documentation describes the integration of MindsDB with [Amazon Bedrock](https://aws.amazon.com/bedrock/), a fully managed service that offers a choice of high-performing foundation models (FMs) from leading AI companies. -The integration allows for the deployment of models offered by Amazon Bedrock within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Amaon Bedrock within MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Obtain the AWS credentials for a user with access to the Amazon Bedrock service. - -## Setup - -Create an AI engine from the [Amazon Bedrock handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/bedrock_handler). - -```sql -CREATE ML_ENGINE bedrock_engine -FROM bedrock -USING - aws_access_key_id = 'AQAXEQK89OX07YS34OP', - aws_secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', - aws_session_token = 'FwoGZXIvYXdzEJr...', - region_name = 'us-east-1'; -``` - -Required parameters for creating an engine include the following: - -- `aws_access_key_id`: The AWS access key ID for the user. -- `aws_secret_access_key`: The AWS secret access key for the user. -- `region_name`: The AWS region to use. - -Optional parameters include the following: - -- `aws_session_token`: The AWS session token for the user. This is required when using temporary security credentials. - -Create a model using `bedrock_engine` as an engine. - -```sql -CREATE MODEL bedrock_model -PREDICT answer -USING - engine = 'bedrock_engine', - question_column = 'question', - max_tokens = 100, - temperature = 0.3; -``` - -Required parameters for creating a model include the following: - -- `engine`: The name of the engine created via `CREATE ML_ENGINE`. - -Optional parameters include the following: - -- `mode`: The mode to run inference in. The default mode is `default` and the other supported mode is `conversational`. -- `model_id`: The model ID to use for inference. The default model ID is `amazon.titan-text-premier-v1:0` and a list of other supported models can be found https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html. -- `question_column`: The column that stores the user input. -- `context_column`: The column that stores context to the user input. -- `prompt_template`: A template for the prompt with placeholders to be replaced by the user input. -- `max_tokens`: The maximum number of tokens to be generated in the model's responses. -- `temperature`: The likelihood of the model selecting higher-probability options while generating a response. -- `top_p`: The percentage of most-likely candidates that the model considers for the next token. -- `stop`: A list of tokens that the model should stop generating at. - - -For the `default` and `conversational` modes, one of the following need to be provided: - * `prompt_template`. - * `question_column`, and an optional `context_column`. - - -## Usage - -### Default Mode - -In the `default` mode, the model will generate a separate response for each input provided. No context is maintained between the inputs. - -```sql -CREATE MODEL bedrock_default_model -PREDICT answer -USING - engine = 'bedrock_engine', - prompt_template = 'Answer the users input in a helpful way: {{question}}'; -``` - -To generate a response for a single input, the following query can be used: - -```sql -SELECT * -FROM bedrock_default_model -WHERE question = 'What is the capital of Sweden?'; -``` - -The response will look like the following: - -| question | answer | -| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| What is the capital of Sweden? | The capital of Sweden is Stockholm. Stockholm is the largest city in Sweden, with a population of over 900,000 people in the city proper and over 2 million in the metropolitan area. It is known for its beautiful architecture, scenic waterways, and rich cultural heritage. The city is built on 14 islands, which are connected by over 50 bridges, and is home to many museums, galleries, and historic landmarks. Some of the most famous attractions in Stockholm include the Vasa Museum, the Stockholm Palace, and the Old Town (Gamla Stan). | - -To generate responses for multiple inputs, the following query can be used: - -```sql -SELECT * -FROM files.unrelated_questions AS d -JOIN bedrock_default_model AS m -``` - -The response will look like the following: - -| question | answer | -| ---------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| What is the capital of Sweden? | The capital of Sweden is Stockholm. Stockholm is the most populated city in Sweden with over 975,000 residents. The city is known for its stunning architecture and beautiful waterways. | -| What is the second planet in the solar system? | The second planet from the sun in our solar system is Venus. Venus is often called Earth's "sister planet" because of their similar size, mass, and density. However, the two planets have very different atmospheres and surface conditions. Venus has a thick, toxic atmosphere composed of carbon dioxide, which traps heat and causes the planet to have surface temperatures that can reach up to 471 degrees Celsius (880 degrees Fahrenheit). Venus also has a highly reflective cloud cover that obscures its surface, making it difficult to study. Despite these challenges, Venus has been the subject of numerous scientific missions, including several orbiters and landers that have provided valuable insights into the planet's geology, atmosphere, and climate. | - - -`files.unrelated_questions` is a simple CSV file containing a `question` column (as expected by the above model) that has been uploaded to MindsDB. It is, however, possible to use any other supported data source in the same manner. - - -### Conversational Mode - -In the `conversational` mode, the model will maintain context between the inputs and generate a single response. This response will be placed in the last row of the result set. - -```sql -CREATE MODEL bedrock_conversational_model -PREDICT answer -USING - engine = 'bedrock_engine', - mode = 'conversational', - question_column = 'question'; -``` - -The syntax for generating responses in the `conversational` mode is the same as in the `default` mode. - -However, when generating responses for multiple inputs, the difference between the two modes becomes apparent. As mentioned above, the `conversational` mode maintains context between the inputs and generates a single response, which is placed in the last row of the result set: - -```sql -SELECT * -FROM files.related_questions AS d -JOIN bedrock_default_model AS m -``` - -This is what the response will look like: - -| question | answer | -| ----------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| What is the capital of Sweden? | [NULL] | -| What are some cool places to visit there? | The capital of Sweden is Stockholm. It’s a beautiful city, with lots of old buildings and a scenic waterfront. You should definitely visit the Royal Palace, which is the largest palace in Scandinavia. You can also visit the Vasa Museum, which has a famous 17th-century warship that sank in Stockholm harbor. And you should definitely check out the ABBA Museum, which is dedicated to the famous pop group. | diff --git a/mindsdb/integrations/handlers/bedrock_handler/__about__.py b/mindsdb/integrations/handlers/bedrock_handler/__about__.py deleted file mode 100644 index ea1518a836c..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Amazon Bedrock handler' -__package_name__ = 'mindsdb_bedrock_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Amazon Bedrock" -__author__ = 'MindsDB Inc.' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023- mindsdb' diff --git a/mindsdb/integrations/handlers/bedrock_handler/__init__.py b/mindsdb/integrations/handlers/bedrock_handler/__init__.py deleted file mode 100644 index 1b4b1a9d08f..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from mindsdb.integrations.handlers.bedrock_handler.__about__ import __version__ as version, __description__ as description -try: - from .bedrock_handler import AmazonBedrockHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Amazon Bedrock' -name = 'bedrock' -type = HANDLER_TYPE.ML -icon_path = 'icon.svg' -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/bedrock_handler/bedrock_handler.py b/mindsdb/integrations/handlers/bedrock_handler/bedrock_handler.py deleted file mode 100644 index 0dc3f98ac23..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/bedrock_handler.py +++ /dev/null @@ -1,328 +0,0 @@ -import numpy as np -import pandas as pd -from typing import Text, Tuple, Dict, List, Optional, Any - -from mindsdb.utilities import log - -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.integrations.libs.llm.utils import get_completed_prompts -from mindsdb.integrations.libs.api_handler_exceptions import MissingConnectionParams -from mindsdb.integrations.handlers.bedrock_handler.utilities import create_amazon_bedrock_client -from mindsdb.integrations.handlers.bedrock_handler.settings import AmazonBedrockHandlerEngineConfig, AmazonBedrockHandlerModelConfig - - -logger = log.getLogger(__name__) - - -class AmazonBedrockHandler(BaseMLEngine): - """ - This handler handles connection and inference with the Amazon Bedrock API. - """ - - name = 'bedrock' - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.generative = True - - def create_engine(self, connection_args: Dict) -> None: - """ - Validates the AWS credentials provided on creation of an engine. - - Args: - connection_args (Dict): The parameters of the engine. - - Raises: - Exception: If the handler is not configured with valid API credentials. - """ - connection_args = {k.lower(): v for k, v in connection_args.items()} - AmazonBedrockHandlerEngineConfig(**connection_args) - - def create(self, target, args: Dict = None, **kwargs: Any) -> None: - """ - Creates a model by validating the model configuration and saving it to the storage. - - Args: - target (Text): The target column name. - args (Dict): The parameters of the model. - kwargs (Any): Other keyword arguments. - - Raises: - Exception: If the model is not configured with valid parameters. - - Returns: - None - """ - if 'using' not in args: - raise MissingConnectionParams("Amazon Bedrock engine requires a USING clause! Refer to its documentation for more details.") - else: - model_args = args['using'] - # Replace 'model_id' with 'id' to match the Amazon Bedrock handler model configuration. - # This is done to avoid the Pydantic warning regarding conflicts with the protected 'model_' namespace. - if 'model_id' in model_args: - model_args['id'] = model_args['model_id'] - del model_args['model_id'] - - handler_model_config = AmazonBedrockHandlerModelConfig(**model_args, connection_args=self.engine_storage.get_connection_args()) - - # Save the model configuration to the storage. - handler_model_params = handler_model_config.model_dump() - logger.info(f"Saving model configuration to storage: {handler_model_params}") - - args['target'] = target - args['handler_model_params'] = handler_model_params - self.model_storage.json_set('args', args) - - def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> pd.DataFrame: - """ - Makes predictions using a model by invoking the Amazon Bedrock API. - - Args: - df (pd.DataFrame): The input data to invoke the model with. - args (Dict): The parameters passed when making predictions. - - Raises: - ValueError: If the input data does not match the configuration of the model. - - Returns: - pd.DataFrame: The input data with the predicted values in a new column. - """ - args = self.model_storage.json_get('args') - handler_model_params = args['handler_model_params'] - mode = handler_model_params['mode'] - model_id = handler_model_params['id'] - inference_config = handler_model_params.get('inference_config') - target = args['target'] - - if mode == 'default': - prompts, empty_prompt_ids = self._prepare_data_for_default_mode(df, args) - predictions = self._predict_for_default_mode(model_id, prompts, inference_config) - - # Fill the empty predictions with None. - for i in sorted(empty_prompt_ids): - predictions.insert(i, None) - - elif mode == 'conversational': - prompt, total_questions = self._prepare_data_for_conversational_mode(df, args) - prediction = self._predict_for_conversational_mode(model_id, prompt, inference_config) - - # Create a list of None values for the total number of questions and replace the last one with the prediction. - predictions = [None] * total_questions - predictions[-1] = prediction - - pred_df = pd.DataFrame(predictions, columns=[target]) - return pred_df - - def _prepare_data_for_default_mode(self, df: pd.DataFrame, args: Dict) -> List[Dict]: - """ - Prepares the input data for the default mode of the Amazon Bedrock handler. - A separate prompt is prepared for each question. - - Args: - df (pd.DataFrame): The input data to invoke the model with. - args (Dict): The parameters of the model. - - Returns: - List[Dict]: The prepared prompts for invoking the Amazon Bedrock API. The model will be invoked for each prompt. - """ - handler_model_params = args['handler_model_params'] - question_column = handler_model_params.get('question_column') - context_column = handler_model_params.get('context_column') - prompt_template = handler_model_params.get('prompt_template') - - if question_column is not None: - questions, empty_prompt_ids = self._prepare_data_with_question_and_context_columns( - df, - question_column, - context_column - ) - - elif prompt_template is not None: - questions, empty_prompt_ids = self._prepare_data_with_prompt_template(df, prompt_template) - - # Prepare the prompts. - questions = [question for i, question in enumerate(questions) if i not in empty_prompt_ids] - prompts = [{"role": "user", "content": [{"text": question}]} for question in questions] - - return prompts, empty_prompt_ids - - def _prepare_data_for_conversational_mode(self, df: pd.DataFrame, args: Dict) -> Tuple[List[Dict], int]: - """ - Prepares the input data for the conversational mode of the Amazon Bedrock handler. - A single prompt is prepared for all the questions. - - Args: - df (pd.DataFrame): The input data to invoke the model with. - args (Dict): The parameters of the model. - - Returns: - Tuple[List[Dict], int]: The prepared prompt for invoking the Amazon Bedrock API and the total number of questions. - The model will be invoked once using this prompt which contains all the questions. - The total number of questions is used to produce the final list of predictions. - """ - handler_model_params = args['handler_model_params'] - question_column = handler_model_params.get('question_column') - context_column = handler_model_params.get('context_column') - prompt_template = handler_model_params.get('prompt_template') - - if question_column is not None: - questions, empty_prompt_ids = self._prepare_data_with_question_and_context_columns( - df, - question_column, - context_column - ) - - if prompt_template is not None: - questions, empty_prompt_ids = self._prepare_data_with_prompt_template(df, prompt_template) - - # Prepare the prompts. - questions = [question for i, question in enumerate(questions) if i not in empty_prompt_ids] - prompt = [{"role": "user", "content": [{"text": question} for question in questions]}] - - # Get the total number of questions; including the empty ones. - total_questions = len(df) - - return prompt, total_questions - - def _prepare_data_with_question_and_context_columns(self, df: pd.DataFrame, question_column: Text, context_column: Text = None) -> Tuple[List[Text], List[int]]: - """ - Prepares the input data with question and context columns. - - Args: - df (pd.DataFrame): The input data to invoke the model with. - question_column (Text): The column containing the questions. - context_column (Text): The column containing the context. - - Returns: - Tuple[List[Text], List[int]]: The questions to build the prompts for invoking the Amazon Bedrock API and the empty prompt IDs. - """ - if question_column not in df.columns: - raise ValueError(f"Column {question_column} not found in the dataframe!") - - if context_column and context_column not in df.columns: - raise ValueError(f"Column {context_column} not found in the dataframe!") - - if context_column: - empty_prompt_ids = np.where( - df[[context_column, question_column]] - .isna() - .all(axis=1) - .values - )[0] - contexts = list(df[context_column].apply(lambda x: str(x))) - questions_without_context = list(df[question_column].apply(lambda x: str(x))) - - questions = [ - f'Context: {c}\nQuestion: {q}\nAnswer: ' - for c, q in zip(contexts, questions_without_context) - ] - - else: - questions = list(df[question_column].apply(lambda x: str(x))) - empty_prompt_ids = np.where( - df[[question_column]].isna().all(axis=1).values - )[0] - - return questions, empty_prompt_ids - - def _prepare_data_with_prompt_template(self, df: pd.DataFrame, prompt_template: Text) -> Tuple[List[Text], List[int]]: - """ - Prepares the input data with a prompt template. - - Args: - df (pd.DataFrame): The input data to invoke the model with. - prompt_template (Text): The base prompt template to use. - - Returns: - Tuple[List[Text], List[int]]: The questions to build the prompts for invoking the Amazon Bedrock API and the empty prompt IDs. - """ - questions, empty_prompt_ids = get_completed_prompts(prompt_template, df) - - return questions, empty_prompt_ids - - def _predict_for_default_mode(self, model_id: Text, prompts: List[Text], inference_config: Dict) -> List[Text]: - """ - Makes predictions for the default mode of the Amazon Bedrock handler using the prepared prompts. - - Args: - model_id (Text): The ID of the model in Amazon Bedrock. - prompts (List[Text]): The prepared prompts for invoking the Amazon Bedrock API. - inference_config (Dict): The inference configuration supported by the Amazon Bedrock API. - - Returns: - List[Text]: The predictions made by the Amazon Bedrock API. - """ - predictions = [] - bedrock_runtime_client = create_amazon_bedrock_client( - 'bedrock-runtime', - **self.engine_storage.get_connection_args() - ) - - for prompt in prompts: - response = bedrock_runtime_client.converse( - modelId=model_id, - messages=[prompt], - inferenceConfig=inference_config - ) - predictions.append( - response["output"]["message"]["content"][0]["text"] - ) - - return predictions - - def _predict_for_conversational_mode(self, model_id: Text, prompt: List[Text], inference_config: Dict) -> Text: - """ - Makes a prediction for the conversational mode of the Amazon Bedrock handler using the prepared prompt. - - Args: - model_id (Text): The ID of the model in Amazon Bedrock. - prompts (List[Text]): Prepared prompts for invoking the Amazon Bedrock API. - inference_config (Dict): Inference configuration supported by the Amazon Bedrock API. - - Returns: - Text: The prediction made by the Amazon Bedrock API. - """ - bedrock_runtime_client = create_amazon_bedrock_client( - 'bedrock-runtime', - **self.engine_storage.get_connection_args() - ) - - response = bedrock_runtime_client.converse( - modelId=model_id, - messages=prompt, - inferenceConfig=inference_config - ) - - return response["output"]["message"]["content"][0]["text"] - - def describe(self, attribute: Optional[Text] = None) -> pd.DataFrame: - """ - Get the metadata or arguments of a model. - - Args: - attribute (Optional[Text]): Attribute to describe. Can be 'args' or 'metadata'. - - Returns: - pd.DataFrame: Model metadata or model arguments. - """ - args = self.model_storage.json_get('args') - - if attribute == 'args': - del args['handler_model_params'] - return pd.DataFrame(args.items(), columns=['key', 'value']) - - elif attribute == 'metadata': - model_id = args['handler_model_params']['id'] - try: - bedrock_client = create_amazon_bedrock_client( - 'bedrock', - **self.engine_storage.get_connection_args() - ) - meta = bedrock_client.get_foundation_model(modelIdentifier=model_id)['modelDetails'] - except Exception as e: - meta = {'error': str(e)} - return pd.DataFrame(dict(meta).items(), columns=['key', 'value']) - - else: - tables = ['args', 'metadata'] - return pd.DataFrame(tables, columns=['tables']) diff --git a/mindsdb/integrations/handlers/bedrock_handler/icon.svg b/mindsdb/integrations/handlers/bedrock_handler/icon.svg deleted file mode 100644 index a6f58975074..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/mindsdb/integrations/handlers/bedrock_handler/requirements.txt b/mindsdb/integrations/handlers/bedrock_handler/requirements.txt deleted file mode 100644 index 0f0cb6a9ecf..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pydantic-settings >= 2.1.0 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/bedrock_handler/settings.py b/mindsdb/integrations/handlers/bedrock_handler/settings.py deleted file mode 100644 index b9a375b4583..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/settings.py +++ /dev/null @@ -1,287 +0,0 @@ -import textwrap -from pydantic_settings import BaseSettings -from botocore.exceptions import ClientError -from typing import Text, List, Dict, Optional, Any, ClassVar -from pydantic import BaseModel, Field, model_validator, field_validator - -from mindsdb.integrations.handlers.bedrock_handler.utilities import create_amazon_bedrock_client -from mindsdb.integrations.utilities.handlers.validation_utilities import ParameterValidationUtilities - - -class AmazonBedrockHandlerSettings(BaseSettings): - """ - Settings for Amazon Bedrock handler. - - Attributes - ---------- - DEFAULT_MODE : Text - The default mode for the handler. - - SUPPORTED_MODES : List - List of supported modes for the handler. - - DEFAULT_TEXT_MODEL_ID : Text - The default model ID to use for text generation. This will be the default model ID for the default and conversational modes. - """ - # Modes. - # TODO: Add other modes. - DEFAULT_MODE: ClassVar[Text] = 'default' - SUPPORTED_MODES: ClassVar[List] = ['default', 'conversational'] - - # TODO: Set the default model ID for other modes. - # Model IDs. - DEFAULT_TEXT_MODEL_ID: ClassVar[Text] = 'amazon.titan-text-premier-v1:0' - - -class AmazonBedrockHandlerEngineConfig(BaseModel): - """ - Configuration model for engines created via the Amazon Bedrock handler. - - Attributes - ---------- - aws_access_key_id : Text - The AWS access key ID. - - aws_secret_access_key : Text - The AWS secret access key. - - region_name : Text - The AWS region name. - - aws_session_token : Text, Optional - The AWS session token. Optional, but required for temporary security credentials. - """ - aws_access_key_id: Text - aws_secret_access_key: Text - region_name: Text - aws_session_token: Optional[Text] = None - - class Config: - extra = "forbid" - - @model_validator(mode="before") - @classmethod - def check_if_params_contain_typos(cls, values: Any) -> Any: - """ - Checks if there are any typos in the parameters. - - Args: - values (Any): The parameters provided when creating an engine via the Amazon Bedrock handler. - - Raises: - ValueError: If there are any typos in the parameters. - """ - ParameterValidationUtilities.validate_parameter_spelling(cls, values) - - return values - - @model_validator(mode="after") - @classmethod - def check_access_to_amazon_bedrock(cls, model: BaseModel) -> BaseModel: - """ - Checks if the AWS credentials provided are valid and Amazon Bedrock is accessible. - - Args: - model (BaseModel): The parameters provided when creating an engine via the Amazon Bedrock handler. - - Raises: - ValueError: If the AWS credentials are invalid or Amazon Bedrock is not accessible. - """ - bedrock_client = create_amazon_bedrock_client( - "bedrock", - model.aws_access_key_id, - model.aws_secret_access_key, - model.region_name, - model.aws_session_token - ) - - try: - bedrock_client.list_foundation_models() - except ClientError as e: - raise ValueError(f"Invalid Amazon Bedrock credentials: {e}!") - - return model - - -class AmazonBedrockHandlerModelConfig(BaseModel): - """ - Configuration model for models created via the Amazon Bedrock handler. - - Attributes - ---------- - id : Text - The ID of the model in Amazon Bedrock. - - mode : Optional[Text] - The mode to run the handler model in. The default mode and the supported modes are defined in the AmazonBedrockHandlerSettings class. - - prompt_template : Optional[Text] - The base template for prompts with placeholders. - - question_column : Optional[Text] - The column name for questions to be asked. - - context_column : Optional[Text] - The column name for context to be provided with the questions. - - temperature : Optional[float] - The setting for the randomness in the responses generated by the model. - - top_p : Optional[float] - The setting for the probability of the tokens in the responses generated by the model. - - max_tokens : Optional[int] - The maximum number of tokens to generate in the responses. - - stop : Optional[List[Text]] - The list of sequences to stop the generation of tokens in the responses. - - connection_args : Dict - The connection arguments passed required to connect to Amazon Bedrock. These are AWS credentials provided when creating the engine. - """ - # User-provided Handler Model Prameters: These are parameters specific to the MindsDB handler for Amazon Bedrock provided by the user. - id: Text = Field(None) - mode: Optional[Text] = Field(AmazonBedrockHandlerSettings.DEFAULT_MODE) - prompt_template: Optional[Text] = Field(None) - question_column: Optional[Text] = Field(None) - context_column: Optional[Text] = Field(None) - - # Amazon Bedrock Model Parameters: These are parameters specific to the models in Amazon Bedrock. They are provided by the user. - temperature: Optional[float] = Field(None, bedrock_model_param=True, bedrock_model_param_name='temperature') - top_p: Optional[float] = Field(None, bedrock_model_param=True, bedrock_model_param_name='topP') - max_tokens: Optional[int] = Field(None, bedrock_model_param=True, bedrock_model_param_name='maxTokens') - stop: Optional[List[Text]] = Field(None, bedrock_model_param=True, bedrock_model_param_name='stopSequences') - - # System-provided Handler Model Parameters: These are parameters specific to the MindsDB handler for Amazon Bedrock provided by the system. - connection_args: Dict = Field(None, exclude=True) - - class Config: - extra = "forbid" - - @model_validator(mode="before") - @classmethod - def check_if_params_contain_typos(cls, values: Any) -> Any: - """ - Checks if there are any typos in the parameters. - - Args: - values (Any): The parameters provided when creating a model via the Amazon Bedrock handler. - - Raises: - ValueError: If there are any typos in the parameters. - """ - ParameterValidationUtilities.validate_parameter_spelling(cls, values) - - return values - - @field_validator("mode") - @classmethod - def check_if_mode_is_supported(cls, mode: Text) -> Text: - """ - Checks if the mode provided is supported. - - Args: - mode (Text): The mode to run the handler model in. - - Raises: - ValueError: If the mode provided is not supported. - """ - if mode not in AmazonBedrockHandlerSettings.SUPPORTED_MODES: - raise ValueError(f"Mode {mode} is not supported. The supported modes are {''.join(AmazonBedrockHandlerSettings.SUPPORTED_MODES)}!") - - return mode - - @model_validator(mode="after") - @classmethod - def check_if_model_id_is_valid_and_correct_for_mode(cls, model: BaseModel) -> BaseModel: - """ - Checks if the model ID and the parameters provided for the model are valid. - If a model ID is not provided, the default model ID for that mode will be used. - - Args: - values (Any): The parameters provided when creating a model via the Amazon Bedrock handler. - - Raises: - ValueError: If the model ID provided is invalid or the parameters provided are invalid for the chosen model. - """ - # TODO: Set the default model ID for other modes. - if model.id is None: - if model.mode in ['default', 'conversational']: - model.id = AmazonBedrockHandlerSettings.DEFAULT_TEXT_MODEL_ID - - bedrock_client = create_amazon_bedrock_client( - "bedrock", - **model.connection_args - ) - - try: - # Check if the model ID is valid and accessible. - response = bedrock_client.get_foundation_model(modelIdentifier=model.id) - except ClientError as e: - raise ValueError(f"Invalid Amazon Bedrock model ID: {e}!") - - # Check if the model is suitable for the mode provided. - if model.mode in ['default', 'conversational']: - if 'TEXT' not in response['modelDetails']['outputModalities']: - raise ValueError(f"The models used for the {model.mode} should support text generation!") - - return model - - @model_validator(mode="after") - @classmethod - def check_if_params_are_valid_for_mode(cls, model: BaseModel) -> BaseModel: - """ - Checks if the parameters required for the chosen mode provided are valid. - - Args: - model (BaseModel): The parameters provided when creating a model via the Amazon Bedrock handler. - - Raises: - ValueError: If the parameters provided are invalid for the mode provided. - """ - # If the mode is default, one of the following need to be provided: - # 1. prompt_template. - # 2. question_column with an optional context_column. - # TODO: Find the other possible parameters/combinations for the default mode. - if model.mode in ['default', 'conversational']: - error_message = textwrap.dedent( - f"""\ - For the {model.mode} mode, one of the following need to be provided: - 1) A `prompt_template` - 2) A `question_column` and an optional `context_column` - """ - ) - if model.prompt_template is None and model.question_column is None: - raise ValueError(error_message) - - if model.prompt_template is not None and model.question_column is not None: - raise ValueError(error_message) - - if model.context_column is not None and model.question_column is None: - raise ValueError(error_message) - - # TODO: Add validations for other modes. - - return model - - def model_dump(self) -> Dict: - """ - Dumps the model configuration to a dictionary. - - Returns: - Dict: The configuration of the model. - """ - bedrock_model_param_names = [val.get("bedrock_model_param_name") for key, val in self.model_json_schema(mode='serialization')['properties'].items() if val.get("bedrock_model_param")] - bedrock_model_params = [key for key, val in self.model_json_schema(mode='serialization')['properties'].items() if val.get("bedrock_model_param")] - - handler_model_params = [key for key, val in self.model_json_schema(mode='serialization')['properties'].items() if not val.get("bedrock_model_param")] - - inference_config = {} - for index, key in enumerate(bedrock_model_params): - if getattr(self, key) is not None: - inference_config[bedrock_model_param_names[index]] = getattr(self, key) - - return { - "inference_config": inference_config, - **{key: getattr(self, key) for key in handler_model_params} - } diff --git a/mindsdb/integrations/handlers/bedrock_handler/utilities.py b/mindsdb/integrations/handlers/bedrock_handler/utilities.py deleted file mode 100644 index 7ea506f88ca..00000000000 --- a/mindsdb/integrations/handlers/bedrock_handler/utilities.py +++ /dev/null @@ -1,46 +0,0 @@ -import boto3 -from typing import Text, Optional - - -def create_amazon_bedrock_client( - client: Text, - aws_access_key_id: Text, - aws_secret_access_key: Text, - region_name: Text, - aws_session_token: Optional[Text] = None, -) -> boto3.client: - """ - Create an Amazon Bedrock client via boto3. - - Parameters - ---------- - client : Text - The type of client to create. It can be 'bedrock' or 'bedrock-runtime'. - - aws_access_key_id : Text - The AWS access key ID. - - aws_secret_access_key : Text - The AWS secret access key. - - region_name : Text - The AWS region name. - - aws_session_token : Text, Optional - The AWS session token. Optional, but required for temporary security credentials. - - Returns - ------- - boto3.client - Amazon Bedrock client. - """ - if client not in ["bedrock", "bedrock-runtime"]: - raise ValueError("The client must be 'bedrock' or 'bedrock-runtime'") - - return boto3.client( - client, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - region_name=region_name, - aws_session_token=aws_session_token, - ) diff --git a/mindsdb/integrations/handlers/bigquery_handler/README.md b/mindsdb/integrations/handlers/bigquery_handler/README.md deleted file mode 100644 index d4cc9cacffc..00000000000 --- a/mindsdb/integrations/handlers/bigquery_handler/README.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: Google BigQuery -sidebarTitle: Google BigQuery ---- - -This documentation describes the integration of MindsDB with [Google BigQuery](https://cloud.google.com/bigquery?hl=en), a fully managed, AI-ready data analytics platform that helps you maximize value from your data. -The integration allows MindsDB to access data stored in the BigQuery warehouse and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect BigQuery to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your BigQuery warehouse from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE bigquery_datasource -WITH - engine = "bigquery", - parameters = { - "project_id": "bgtest-1111", - "dataset": "mydataset", - "service_account_keys": "/tmp/keys.json" - }; -``` - -Required connection parameters include the following: - -- `project_id`: The globally unique identifier for your project in Google Cloud where BigQuery is located. -- `dataset`: The default dataset to connect to. - -Optional connection parameters include the following: - -- `service_account_keys`: The full path to the service account key file. -- `service_account_json`: The content of a JSON file defined by the `service_account_keys` parameter. - - - One of `service_account_keys` or `service_account_json` has to be provided to - establish a connection to BigQuery. - - -## Usage - -Retrieve data from a specified table in the default dataset by providing the integration name and table name: - -```sql -SELECT * -FROM bigquery_datasource.table_name -LIMIT 10; -``` - -Retrieve data from a specified table in a different dataset by providing the integration name, dataset name and table name: - -```sql -SELECT * -FROM bigquery_datasource.dataset_name.table_name -LIMIT 10; -``` - -Run SQL in any supported BigQuery dialect directly on the connected BigQuery database: - -```sql -SELECT * FROM bigquery_datasource ( - - --Native Query Goes Here - SELECT * - FROM t1 - WHERE t1.a IN (SELECT t2.a - FROM t2 FOR SYSTEM_TIME AS OF t1.timestamp_column); - -); -``` - - - The above examples utilize `bigquery_datasource` as the datasource name, which - is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -- **Symptoms**: Failure to connect MindsDB with the BigQuery warehouse. -- **Checklist**: - 1. Make sure that the Google Cloud account is active and the Google BigQuery service is enabled. - 2. Confirm that the project ID, dataset and service account credentials are correct. Try a direct BigQuery connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and Google BigQuery. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -- **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -- **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - Examples: - _ Incorrect: SELECT _ FROM integration.travel data - _ Incorrect: SELECT _ FROM integration.'travel data' - _ Correct: SELECT _ FROM integration.\`travel data\` - diff --git a/mindsdb/integrations/handlers/bigquery_handler/__about__.py b/mindsdb/integrations/handlers/bigquery_handler/__about__.py deleted file mode 100644 index b002d6b7bbf..00000000000 --- a/mindsdb/integrations/handlers/bigquery_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Google BigQuery handler' -__package_name__ = 'mindsdb_bigquery_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for BigQuery" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/bigquery_handler/__init__.py b/mindsdb/integrations/handlers/bigquery_handler/__init__.py deleted file mode 100644 index cc200f18d56..00000000000 --- a/mindsdb/integrations/handlers/bigquery_handler/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .connection_args import connection_args, connection_args_example - -try: - from .bigquery_handler import BigQueryHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "Google BigQuery" -name = "bigquery" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "support_level", - "import_error", - "icon_path", - "connection_args", - "connection_args_example", -] diff --git a/mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py b/mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py deleted file mode 100644 index 7648a91c3fb..00000000000 --- a/mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +++ /dev/null @@ -1,463 +0,0 @@ -import json -from typing import Any, Dict, Optional, Text - -from google.cloud.bigquery import Client, QueryJobConfig, DEFAULT_RETRY -from google.api_core.exceptions import BadRequest, NotFound -import pandas as pd -from sqlalchemy_bigquery.base import BigQueryDialect - -from mindsdb.utilities import log -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.integrations.libs.base import MetaDatabaseHandler -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.utilities.handlers.auth_utilities.google import ( - GoogleServiceAccountOAuth2Manager, -) -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -logger = log.getLogger(__name__) - - -class BigQueryHandler(MetaDatabaseHandler): - """ - This handler handles connection and execution of Google BigQuery statements. - """ - - name = "bigquery" - - def __init__(self, name: Text, connection_data: Dict, **kwargs: Any): - super().__init__(name) - self.connection_data = connection_data - self.client = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Establishes a connection to a BigQuery warehouse. - - Raises: - ValueError: If the required connection parameters are not provided or if the credentials cannot be parsed. - mindsdb.integrations.utilities.handlers.auth_utilities.exceptions.NoCredentialsException: If none of the required forms of credentials are provided. - mindsdb.integrations.utilities.handlers.auth_utilities.exceptions.AuthException: If authentication fails. - - Returns: - google.cloud.bigquery.client.Client: The client object for the BigQuery connection. - """ - if self.is_connected is True: - return self.connection - - # Mandatory connection parameters - if not all(key in self.connection_data for key in ["project_id", "dataset"]): - raise ValueError("Required parameters (project_id, dataset) must be provided.") - - service_account_json = self.connection_data.get("service_account_json") - if isinstance(service_account_json, str): - # GUI send it as str - try: - service_account_json = json.loads(service_account_json) - except json.decoder.JSONDecodeError: - raise ValueError("'service_account_json' is not valid JSON") - if isinstance(service_account_json, dict) and isinstance(service_account_json.get("private_key"), str): - # some editors may escape new line symbol, also replace windows-like newlines - service_account_json["private_key"] = ( - service_account_json["private_key"].replace("\\n", "\n").replace("\r\n", "\n") - ) - - google_sa_oauth2_manager = GoogleServiceAccountOAuth2Manager( - credentials_file=self.connection_data.get("service_account_keys"), - credentials_json=service_account_json, - ) - credentials = google_sa_oauth2_manager.get_oauth2_credentials() - - billing_project = self.connection_data.get("billing_project") or self.connection_data["project_id"] - client = Client(project=billing_project, credentials=credentials) - self.is_connected = True - self.connection = client - return self.connection - - def disconnect(self): - """ - Closes the connection to the BigQuery warehouse if it's currently open. - """ - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the BigQuery warehouse. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - connection = self.connect() - connection.query("SELECT 1;", timeout=10, retry=DEFAULT_RETRY.with_deadline(10)) - - # Check if the dataset exists - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - dataset_ref = f"{dataset_project}.{self.connection_data['dataset']}" - connection.get_dataset(dataset_ref) - - response.success = True - except (BadRequest, ValueError) as e: - logger.error(f"Error connecting to BigQuery {self.connection_data['project_id']}, {e}!") - response.error_message = e - except NotFound: - response.error_message = ( - f"Error connecting to BigQuery {self.connection_data['project_id']}: " - f"dataset '{self.connection_data['dataset']}' not found" - ) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> Response: - """ - Executes a SQL query on the BigQuery warehouse and returns the result. - - Args: - query (str): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - connection = self.connect() - try: - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - job_config = QueryJobConfig(default_dataset=f"{dataset_project}.{self.connection_data['dataset']}") - query = connection.query(query, job_config=job_config) - result = query.to_dataframe() - has_table_result = isinstance(result, pd.DataFrame) and (not result.empty or len(result.columns) > 0) - if has_table_result: - response = Response(RESPONSE_TYPE.TABLE, result) - else: - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f"Error running query: {query} on {self.connection_data['project_id']}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - return response - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - renderer = SqlalchemyRender(BigQueryDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables and views in the configured dataset of the BigQuery warehouse. - - Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. - """ - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - query = f""" - SELECT table_name, table_schema, table_type - FROM `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` - WHERE table_type IN ('BASE TABLE', 'VIEW') - """ - result = self.native_query(query) - return result - - def get_columns(self, table_name) -> Response: - """ - Retrieves column details for a specified table in the configured dataset of the BigQuery warehouse. - - Args: - table_name (str): The name of the table for which to retrieve column information. - - Returns: - Response: A response object containing the column details, formatted as per the `Response` class. - Raises: - ValueError: If the 'table_name' is not a valid string. - """ - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - query = f""" - SELECT column_name AS Field, data_type as Type - FROM `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` - WHERE table_name = '{table_name}' - """ - result = self.native_query(query) - return result - - def meta_get_tables(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves table metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve metadata information. - - Returns: - Response: A response object containing the metadata information, formatted as per the `Response` class. - """ - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - query = f""" - SELECT - t.table_name, - t.table_schema, - t.table_type, - st.row_count - FROM - `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` AS t - LEFT JOIN - `{dataset_project}.{self.connection_data["dataset"]}.__TABLES__` AS st - ON - t.table_name = st.table_id - WHERE - t.table_type IN ('BASE TABLE', 'VIEW') - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" AND t.table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result - - def meta_get_columns(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - query = f""" - SELECT - table_name, - column_name, - data_type, - column_default, - CASE is_nullable - WHEN 'YES' THEN TRUE - ELSE FALSE - END AS is_nullable - FROM - `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" WHERE table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result - - def meta_get_column_statistics_for_table(self, table_name: str, columns: list) -> Response: - """ - Retrieves statistics for the specified columns in a table. - - Args: - table_name (str): The name of the table. - columns (list): A list of column names to retrieve statistics for. - - Returns: - Response: A response object containing the column statistics. - """ - # Check column data types - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - column_types_query = f""" - SELECT column_name, data_type - FROM `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` - WHERE table_name = '{table_name}' - """ - column_types_result = self.native_query(column_types_query) - - if column_types_result.resp_type != RESPONSE_TYPE.TABLE: - logger.error(f"Error retrieving column types for table {table_name}") - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Could not retrieve column types for table {table_name}", - ) - - column_type_map = dict( - zip( - column_types_result.data_frame["column_name"], - column_types_result.data_frame["data_type"], - ) - ) - - # Types that don't support MIN/MAX aggregations - UNSUPPORTED_MINMAX_PREFIXES = ("ARRAY", "STRUCT", "RECORD") - UNSUPPORTED_MINMAX_TYPES = ("GEOGRAPHY", "JSON", "BYTES") - - def supports_minmax(data_type: str) -> bool: - """Check if a BigQuery data type supports MIN/MAX operations.""" - if data_type is None: - return False - data_type_upper = data_type.upper() - if any(data_type_upper.startswith(prefix) for prefix in UNSUPPORTED_MINMAX_PREFIXES): - return False - if data_type_upper in UNSUPPORTED_MINMAX_TYPES: - return False - return True - - # To avoid hitting BigQuery's query size limits, we will chunk the columns into batches. - BATCH_SIZE = 20 - - def chunked(lst, n): - """Yields successive n-sized chunks from lst.""" - for i in range(0, len(lst), n): - yield lst[i : i + n] - - queries = [] - for column_batch in chunked(columns, BATCH_SIZE): - batch_queries = [] - for column in column_batch: - data_type = column_type_map.get(column) - - if supports_minmax(data_type): - # Full statistics for supported types - batch_queries.append( - f""" - SELECT - '{table_name}' AS table_name, - '{column}' AS column_name, - SAFE_DIVIDE(COUNTIF(`{column}` IS NULL), COUNT(*)) * 100 AS null_percentage, - CAST(MIN(`{column}`) AS STRING) AS minimum_value, - CAST(MAX(`{column}`) AS STRING) AS maximum_value, - COUNT(DISTINCT `{column}`) AS distinct_values_count - FROM - `{dataset_project}.{self.connection_data["dataset"]}.{table_name}` - """ - ) - else: - # Limited statistics for complex types (no MIN/MAX/COUNT DISTINCT) - logger.info(f"Skipping MIN/MAX for column {column} with unsupported type: {data_type}") - batch_queries.append( - f""" - SELECT - '{table_name}' AS table_name, - '{column}' AS column_name, - SAFE_DIVIDE(COUNTIF(`{column}` IS NULL), COUNT(*)) * 100 AS null_percentage, - CAST(NULL AS STRING) AS minimum_value, - CAST(NULL AS STRING) AS maximum_value, - CAST(NULL AS INT64) AS distinct_values_count - FROM - `{dataset_project}.{self.connection_data["dataset"]}.{table_name}` - """ - ) - - if batch_queries: - query = " UNION ALL ".join(batch_queries) - queries.append(query) - - results = [] - for query in queries: - try: - result = self.native_query(query) - if result.resp_type == RESPONSE_TYPE.TABLE: - results.append(result.data_frame) - else: - logger.error(f"Error retrieving column statistics for table {table_name}: {result.error_message}") - except Exception as e: - logger.error(f"Exception occurred while retrieving column statistics for table {table_name}: {e}") - - if not results: - logger.warning(f"No column statistics could be retrieved for table {table_name}.") - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"No column statistics could be retrieved for table {table_name}.", - ) - return Response( - RESPONSE_TYPE.TABLE, - pd.concat(results, ignore_index=True) if results else pd.DataFrame(), - ) - - def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves primary key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve primary key information. - - Returns: - Response: A response object containing the primary key information. - """ - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - query = f""" - SELECT - tc.table_name, - kcu.column_name, - kcu.ordinal_position, - tc.constraint_name - FROM - `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc - JOIN - `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu - ON - tc.constraint_name = kcu.constraint_name - WHERE - tc.constraint_type = 'PRIMARY KEY' - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" AND tc.table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result - - def meta_get_foreign_keys(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves foreign key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve foreign key information. - - Returns: - Response: A response object containing the foreign key information. - """ - dataset_project = self.connection_data.get("dataset_project") or self.connection_data["project_id"] - query = f""" - SELECT - ccu.table_name AS parent_table_name, - ccu.column_name AS parent_column_name, - kcu.table_name AS child_table_name, - kcu.column_name AS child_column_name, - tc.constraint_name - FROM - `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc - JOIN - `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu - ON - tc.constraint_name = kcu.constraint_name - JOIN - `{dataset_project}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE` AS ccu - ON - tc.constraint_name = ccu.constraint_name - WHERE - tc.constraint_type = 'FOREIGN KEY' - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" AND tc.table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result diff --git a/mindsdb/integrations/handlers/bigquery_handler/connection_args.py b/mindsdb/integrations/handlers/bigquery_handler/connection_args.py deleted file mode 100644 index b3b90b0f9ba..00000000000 --- a/mindsdb/integrations/handlers/bigquery_handler/connection_args.py +++ /dev/null @@ -1,27 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - project_id={ - "type": ARG_TYPE.STR, - "description": "Default BigQuery project id (used for billing and dataset lookup if not overridden).", - }, - billing_project={ - "type": ARG_TYPE.STR, - "description": "BigQuery project id to bill query jobs to (defaults to project_id).", - }, - dataset_project={"type": ARG_TYPE.STR, "description": "Project id that owns the dataset (defaults to project_id)."}, - dataset={"type": ARG_TYPE.STR, "description": "The BigQuery dataset name."}, - service_account_keys={ - "type": ARG_TYPE.PATH, - "description": "Full path or URL to the service account JSON file", - "secret": True, - }, - service_account_json={"type": ARG_TYPE.DICT, "description": "Content of service account JSON file", "secret": True}, -) - -connection_args_example = OrderedDict( - project_id="tough-future-332513", service_account_keys="/home/bigq/tough-future-332513.json" -) diff --git a/mindsdb/integrations/handlers/bigquery_handler/icon.svg b/mindsdb/integrations/handlers/bigquery_handler/icon.svg deleted file mode 100644 index f00de42f0bc..00000000000 --- a/mindsdb/integrations/handlers/bigquery_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/bigquery_handler/requirements.txt b/mindsdb/integrations/handlers/bigquery_handler/requirements.txt deleted file mode 100644 index 3aa034b5749..00000000000 --- a/mindsdb/integrations/handlers/bigquery_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -google-cloud-bigquery[pandas] -sqlalchemy-bigquery --r mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt \ No newline at end of file diff --git a/mindsdb/integrations/handlers/chromadb_handler/README.md b/mindsdb/integrations/handlers/chromadb_handler/README.md deleted file mode 100644 index 9aede209eb6..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/README.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: ChromaDB -sidebarTitle: ChromaDB ---- - -In this section, we present how to connect ChromaDB to MindsDB. - -[ChromaDB](https://www.trychroma.com/) is the open-source embedding database. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect ChromaDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to ChromaDB. - -## Connection - -This handler is implemented using the `chromadb` Python library. - -To connect to a remote ChromaDB instance, use the following statement: - -```sql -CREATE DATABASE chromadb_datasource -WITH ENGINE = 'chromadb' -PARAMETERS = { - "host": "YOUR_HOST", - "port": YOUR_PORT, - "distance": "l2/cosine/ip" -- optional, default is cosine -} -``` - -The required parameters are: - -* `host`: The host name or IP address of the ChromaDB instance. -* `port`: The TCP/IP port of the ChromaDB instance. -* `distance`: It defines how the distance between vectors is calculated. Available method include l2, cosine, and ip, as [explained here](https://docs.trychroma.com/docs/collections/configure). - -To connect to an in-memory ChromaDB instance, use the following statement: - -```sql -CREATE DATABASE chromadb_datasource -WITH ENGINE = "chromadb", -PARAMETERS = { - "persist_directory": "YOUR_PERSIST_DIRECTORY", - "distance": "l2/cosine/ip" -- optional -} -``` - -The required parameters are: - -* `persist_directory`: The directory to use for persisting data. -* `distance`: It defines how the distance between vectors is calculated. Available method include l2, cosine, and ip, as [explained here](https://docs.trychroma.com/docs/collections/configure). - -## Usage - -Now, you can use the established connection to create a collection (or table in the context of MindsDB) in ChromaDB and insert data into it: - -```sql -CREATE TABLE chromadb_datasource.test_embeddings ( - SELECT embeddings,'{"source": "fda"}' as metadata - FROM mysql_datasource.test_embeddings -); -``` - - -`mysql_datasource` is another MindsDB data source that has been created by connecting to a MySQL database. The `test_embeddings` table in the `mysql_datasource` data source contains the embeddings that we want to store in ChromaDB. - - -You can query your collection (table) as shown below: - -```sql -SELECT * -FROM chromadb_datasource.test_embeddings; -``` - -To filter the data in your collection (table) by metadata, you can use the following query: - -```sql -SELECT * -FROM chromadb_datasource.test_embeddings -WHERE `metadata.source` = "fda"; - -``` - -To conduct a similarity search, the following query can be used: - -```sql -SELECT * -FROM chromadb_datasource.test_embeddings -WHERE search_vector = ( - SELECT embeddings - FROM mysql_datasource.test_embeddings - LIMIT 1 -); diff --git a/mindsdb/integrations/handlers/chromadb_handler/__about__.py b/mindsdb/integrations/handlers/chromadb_handler/__about__.py deleted file mode 100644 index 8e7b0d2581b..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB ChromaDB handler" -__package_name__ = "mindsdb_chromadb_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for ChromaDB" -__author__ = "Daniel Usvyat" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/chromadb_handler/__init__.py b/mindsdb/integrations/handlers/chromadb_handler/__init__.py deleted file mode 100644 index 613aa3f0647..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example - -try: - from .chromadb_handler import ChromaDBHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "ChromaDB" -name = "chromadb" -type = HANDLER_TYPE.DATA -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB -icon_path = "icon.png" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "support_level", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py b/mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py deleted file mode 100644 index 32d0e566b00..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +++ /dev/null @@ -1,538 +0,0 @@ -import os -import ast -import shutil -import hashlib -from typing import Dict, List, Optional, Union -import threading - -import pandas as pd -import chromadb -from chromadb.api.shared_system_client import SharedSystemClient - -from mindsdb.integrations.handlers.chromadb_handler.settings import ChromaHandlerConfig -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, - TableField, - VectorStoreHandler, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class ChromaDBHandler(VectorStoreHandler): - """This handler handles connection and execution of the ChromaDB statements.""" - - name = "chromadb" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - self.handler_storage = kwargs["handler_storage"] - self._client = None - self.persist_directory = None - self.is_connected = False - self._use_handler_storage = False - - config = self.validate_connection_parameters(name, **kwargs) - - self._client_config = { - "chroma_server_host": config.host, - "chroma_server_http_port": config.port, - "persist_directory": self.persist_directory, - } - - self.create_collection_metadata = { - "hnsw:space": config.distance, - } - - def validate_connection_parameters(self, name, **kwargs): - """ - Validate the connection parameters. - """ - - _config = kwargs.get("connection_data") - _config["vector_store"] = name - - config = ChromaHandlerConfig(**_config) - - if config.persist_directory: - if os.path.isabs(config.persist_directory): - self.persist_directory = config.persist_directory - else: - # get full persistence directory from handler storage - self.persist_directory = self.handler_storage.folder_get(config.persist_directory) - self._use_handler_storage = True - - return config - - def _get_client(self): - client_config = self._client_config - if client_config is None: - raise Exception("Client config is not set!") - - # decide the client type to be used, either persistent or httpclient - if client_config["persist_directory"] is not None: - SharedSystemClient.clear_system_cache() - return chromadb.PersistentClient(path=client_config["persist_directory"]) - else: - return chromadb.HttpClient( - host=client_config["chroma_server_host"], - port=client_config["chroma_server_http_port"], - ) - - def _sync(self): - """Sync the database to disk if using persistent storage""" - if self.persist_directory and self._use_handler_storage: - self.handler_storage.folder_sync(self.persist_directory) - - def __del__(self): - """Ensure proper cleanup when the handler is destroyed""" - if self.is_connected: - self._sync() - self.disconnect() - - def connect(self): - """Connect to a ChromaDB database.""" - if self.is_connected is True: - return self._client - - try: - self._client = self._get_client() - self.is_connected = True - return self._client - except Exception as e: - self.is_connected = False - raise Exception(f"Error connecting to ChromaDB client, {e}!") - - def disconnect(self): - """Close the database connection.""" - if self.is_connected: - if hasattr(self._client, "close"): - self._client.close() # Some ChromaDB clients have a close method - self._client = None - self.is_connected = False - - def check_connection(self): - """Check the connection to the ChromaDB database.""" - response_code = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - self._client.heartbeat() - response_code.success = True - except Exception as e: - logger.error(f"Error connecting to ChromaDB , {e}!") - response_code.error_message = str(e) - finally: - if response_code.success is True and need_to_close: - self.disconnect() - if response_code.success is False and self.is_connected is True: - self.is_connected = False - - return response_code - - def _get_chromadb_operator(self, operator: FilterOperator) -> str: - mapping = { - FilterOperator.EQUAL: "$eq", - FilterOperator.NOT_EQUAL: "$ne", - FilterOperator.LESS_THAN: "$lt", - FilterOperator.LESS_THAN_OR_EQUAL: "$lte", - FilterOperator.GREATER_THAN: "$gt", - FilterOperator.GREATER_THAN_OR_EQUAL: "$gte", - FilterOperator.IN: "$in", - FilterOperator.NOT_IN: "$nin", - } - - if operator not in mapping: - raise Exception(f"Operator {operator} is not supported by ChromaDB!") - - return mapping[operator] - - def _translate_metadata_condition(self, conditions: List[FilterCondition]) -> Optional[dict]: - """ - Translate a list of FilterCondition objects a dict that can be used by ChromaDB. - E.g., - [ - FilterCondition( - column="metadata.created_at", - op=FilterOperator.LESS_THAN, - value="2020-01-01", - ), - FilterCondition( - column="metadata.created_at", - op=FilterOperator.GREATER_THAN, - value="2019-01-01", - ) - ] - --> - { - "$and": [ - {"created_at": {"$lt": "2020-01-01"}}, - {"created_at": {"$gt": "2019-01-01"}} - ] - } - """ - # we ignore all non-metadata conditions - if conditions is None: - return None - metadata_conditions = [ - condition for condition in conditions if condition.column.startswith(TableField.METADATA.value) - ] - if len(metadata_conditions) == 0: - return None - - # we translate each metadata condition into a dict - chroma_db_conditions = [] - for condition in metadata_conditions: - metadata_key = condition.column.split(".")[-1] - - chroma_db_conditions.append({metadata_key: {self._get_chromadb_operator(condition.op): condition.value}}) - - # we combine all metadata conditions into a single dict - metadata_condition = ( - {"$and": chroma_db_conditions} if len(chroma_db_conditions) > 1 else chroma_db_conditions[0] - ) - return metadata_condition - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - self.disconnect() - self.connect() - collection = self._client.get_collection(table_name) - filters = self._translate_metadata_condition(conditions) - - include = ["metadatas", "documents", "embeddings"] - - # Identify Search Intent - vector_filter = None - content_filter = None - - if conditions is not None: - # Embeddings - v_filters = [c for c in conditions if c.column == TableField.EMBEDDINGS.value] - if v_filters: - vector_filter = v_filters[0] - - # Semantic Search - c_filters = [c for c in conditions if c.column == TableField.CONTENT.value] - if c_filters: - content_filter = c_filters[0] - - # ID Filtering - ids_include = [] - ids_exclude = [] - - if conditions is not None: - for condition in conditions: - if condition.column != TableField.ID.value: - continue - if condition.op == FilterOperator.EQUAL: - ids_include.append(condition.value) - elif condition.op == FilterOperator.IN: - ids_include.extend(condition.value) - elif condition.op == FilterOperator.NOT_EQUAL: - ids_exclude.append(condition.value) - elif condition.op == FilterOperator.NOT_IN: - ids_exclude.extend(condition.value) - - # Trigger search if Vector OR Content is present - if vector_filter is not None or content_filter is not None: - # Similarity search - query_payload = { - "where": filters, - "include": include + ["distances"], - } - - # Handle Vector Search - if vector_filter: - query_payload["query_embeddings"] = vector_filter.value - - # Handle Text Search - if content_filter: - val = content_filter.value - if isinstance(val, list): - query_payload["query_texts"] = val - else: - query_payload["query_texts"] = [val] - - if limit is not None: - if len(ids_include) == 0 and len(ids_exclude) == 0: - query_payload["n_results"] = limit - else: - # get more results if we have filters by id - query_payload["n_results"] = limit * 10 - - result = collection.query(**query_payload) - ids = result["ids"][0] - documents = result["documents"][0] - metadatas = result["metadatas"][0] - distances = result["distances"][0] - embeddings = result["embeddings"][0] - - else: - # general get query (Exact Match) - result = collection.get( - ids=ids_include or None, - where=filters, - limit=limit, - offset=offset, - include=include, - ) - ids = result["ids"] - documents = result["documents"] - metadatas = result["metadatas"] - embeddings = result["embeddings"] - distances = None - - payload = { - TableField.ID.value: ids, - TableField.CONTENT.value: documents, - TableField.METADATA.value: metadatas, - TableField.EMBEDDINGS.value: list(embeddings), - } - - if columns is not None: - payload = {column: payload[column] for column in columns if column != TableField.DISTANCE.value} - - # Include distance - distance_filter = None - distance_col = TableField.DISTANCE.value - if distances is not None: - payload[distance_col] = distances - - if conditions is not None: - for cond in conditions: - if cond.column == distance_col: - distance_filter = cond - break - - df = pd.DataFrame(payload) - if ids_exclude or ids_include: - if ids_exclude: - df = df[~df[TableField.ID.value].isin(ids_exclude)] - if ids_include: - df = df[df[TableField.ID.value].isin(ids_include)] - if limit is not None: - df = df[:limit] - - if distance_filter is not None: - op_map = { - "<": "__lt__", - "<=": "__le__", - ">": "__gt__", - ">=": "__ge__", - "=": "__eq__", - } - op = op_map.get(distance_filter.op.value) - if op: - df = df[getattr(df[distance_col], op)(distance_filter.value)] - return df - - def _dataframe_metadata_to_chroma_metadata(self, metadata: Union[Dict[str, str], str]) -> Optional[Dict[str, str]]: - """Convert DataFrame metadata to ChromaDB compatible metadata format""" - if pd.isna(metadata) or metadata is None: - return None - if isinstance(metadata, dict): - if not metadata: - # ChromaDB does not support empty metadata dicts, but it does support None. - # Related: https://github.com/chroma-core/chroma/issues/791. - return None - # Filter out None values from the metadata dict - return {k: v for k, v in metadata.items() if pd.notna(v) and v is not None} - # Metadata is a string representation of a dictionary instead. - try: - parsed = ast.literal_eval(metadata) - if isinstance(parsed, dict): - # Filter out None values from the parsed dict - return {k: v for k, v in parsed.items() if pd.notna(v) and v is not None} - return None - except (ValueError, SyntaxError): - return None - - def _process_document_ids(self, df: pd.DataFrame) -> pd.DataFrame: - """ - Process document IDs for ChromaDB insertion/update. - Only generates IDs if none are provided, otherwise ensures IDs are strings. - - Args: - df (pd.DataFrame): Input DataFrame containing document data - - Returns: - pd.DataFrame: DataFrame with processed IDs - """ - df = df.copy() # Create a copy to avoid modifying the original - - if TableField.ID.value not in df.columns: - # No IDs provided - generate hash-based IDs from content - df = df.drop_duplicates(subset=[TableField.CONTENT.value]) - df[TableField.ID.value] = df[TableField.CONTENT.value].apply( - lambda content: hashlib.sha256(content.encode()).hexdigest() - ) - else: - # Convert IDs to strings and remove any duplicates - df[TableField.ID.value] = df[TableField.ID.value].astype(str) - df = df.drop_duplicates(subset=[TableField.ID.value], keep="last") - - return df - - def insert(self, collection_name: str, df: pd.DataFrame) -> Response: - """ - Insert/Upsert data into ChromaDB collection. - If records with same IDs exist, they will be updated. - """ - self.connect() - collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata) - - # Convert metadata from string to dict if needed - if TableField.METADATA.value in df.columns: - df[TableField.METADATA.value] = df[TableField.METADATA.value].apply( - self._dataframe_metadata_to_chroma_metadata - ) - # Drop rows where metadata conversion failed - df = df.dropna(subset=[TableField.METADATA.value]) - - # Convert embeddings from string to list if they are strings - if TableField.EMBEDDINGS.value in df.columns and df[TableField.EMBEDDINGS.value].dtype == "object": - df[TableField.EMBEDDINGS.value] = df[TableField.EMBEDDINGS.value].apply( - lambda x: ast.literal_eval(x) if isinstance(x, str) else x - ) - - # Process document IDs - df = self._process_document_ids(df) - - # Extract data from DataFrame - data_dict = df.to_dict(orient="list") - - if not hasattr(self._client, "_insert_lock"): - self._client._insert_lock = threading.Lock() - - with self._client._insert_lock: - try: - collection.upsert( - ids=data_dict[TableField.ID.value], - documents=data_dict[TableField.CONTENT.value], - embeddings=data_dict.get(TableField.EMBEDDINGS.value, None), - metadatas=data_dict.get(TableField.METADATA.value, None), - ) - self._sync() - except Exception as e: - logger.error(f"Error during upsert operation: {str(e)}") - raise Exception(f"Failed to insert/update data: {str(e)}") - return Response(RESPONSE_TYPE.OK, affected_rows=len(df)) - - def update( - self, - table_name: str, - data: pd.DataFrame, - key_columns: List[str] = None, - ): - """ - Update data in the ChromaDB database. - """ - self.connect() - collection = self._client.get_collection(table_name) - - # drop columns with all None values - - data.dropna(axis=1, inplace=True) - - data = data.to_dict(orient="list") - - collection.update( - ids=data[TableField.ID.value], - documents=data.get(TableField.CONTENT.value), - embeddings=data[TableField.EMBEDDINGS.value], - metadatas=data.get(TableField.METADATA.value), - ) - self._sync() - - def delete(self, table_name: str, conditions: List[FilterCondition] = None): - self.connect() - filters = self._translate_metadata_condition(conditions) - # get id filters - id_filters = [] - for condition in conditions: - if condition.column != TableField.ID.value: - continue - value = condition.value - if isinstance(value, list): - id_filters.extend(value) - else: - id_filters.append(value) - - if filters is None and len(id_filters) == 0: - raise Exception("Delete query must have at least one condition!") - collection = self._client.get_collection(table_name) - collection.delete(ids=id_filters or None, where=filters) - self._sync() - - def create_table(self, table_name: str, if_not_exists=True): - """ - Create a collection with the given name in the ChromaDB database. - """ - self.connect() - self._client.create_collection( - table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata - ) - self._sync() - - def drop_table(self, table_name: str, if_exists=True): - """ - Delete a collection from the ChromaDB database. - """ - self.connect() - try: - # NOTE: there is a bug in chromadb v0.6.3 - it delete only segments that loaded in memory, - # so we delete them manually - if self._client_config.get("persist_directory") is not None: - collection = self._client.get_collection(table_name) - segments = self._client._server._sysdb.get_segments(collection.id) - for segment in segments: - self._client._server._sysdb.delete_segment(collection=collection.id, id=segment["id"]) - shutil.rmtree( - os.path.join(self._client_config["persist_directory"], str(segment["id"])), ignore_errors=True - ) - - self._client.delete_collection(table_name) - self._sync() - except ValueError: - if if_exists: - return - else: - raise Exception(f"Collection {table_name} does not exist!") - - def get_tables(self) -> HandlerResponse: - """ - Get the list of collections in the ChromaDB database. - """ - self.connect() - collections = self._client.list_collections() - collections_name = pd.DataFrame( - columns=["table_name"], - data=collections, - ) - return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name) - - def get_columns(self, table_name: str) -> HandlerResponse: - # check if collection exists - self.connect() - try: - _ = self._client.get_collection(table_name) - except ValueError: - return Response( - resp_type=RESPONSE_TYPE.ERROR, - error_message=f"Table {table_name} does not exist!", - ) - return super().get_columns(table_name) diff --git a/mindsdb/integrations/handlers/chromadb_handler/connection_args.py b/mindsdb/integrations/handlers/chromadb_handler/connection_args.py deleted file mode 100644 index c227fcad27d..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/connection_args.py +++ /dev/null @@ -1,28 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - "type": ARG_TYPE.STR, - "description": "chromadb server host", - "required": False, - }, - port={ - "type": ARG_TYPE.STR, - "description": "chromadb server port", - "required": False, - }, - persist_directory={ - "type": ARG_TYPE.STR, - "description": "persistence directory for ChromaDB", - "required": False, - }, -) - -connection_args_example = OrderedDict( - host="localhost", - port="8000", - persist_directory="chromadb", -) diff --git a/mindsdb/integrations/handlers/chromadb_handler/icon.png b/mindsdb/integrations/handlers/chromadb_handler/icon.png deleted file mode 100644 index ccc188def9f..00000000000 Binary files a/mindsdb/integrations/handlers/chromadb_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/chromadb_handler/requirements.txt b/mindsdb/integrations/handlers/chromadb_handler/requirements.txt deleted file mode 100644 index 8ea77aea4f6..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -chromadb~=0.6.3 -onnxruntime==1.20.1 # 1.21.0 (latest as of April 10, 2025) causes issues in Windows \ No newline at end of file diff --git a/mindsdb/integrations/handlers/chromadb_handler/settings.py b/mindsdb/integrations/handlers/chromadb_handler/settings.py deleted file mode 100644 index 279c404384e..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/settings.py +++ /dev/null @@ -1,58 +0,0 @@ -import difflib -from typing import Any - -from pydantic import BaseModel, model_validator - - -class ChromaHandlerConfig(BaseModel): - """ - Configuration for VectorStoreHandler. - """ - - vector_store: str - persist_directory: str = None - host: str = None - port: str = None - password: str = None - distance: str = "cosine" - - class Config: - extra = "forbid" - - @model_validator(mode="before") - @classmethod - def check_param_typos(cls, values: Any) -> Any: - """Check if there are any typos in the parameters.""" - - expected_params = cls.model_fields.keys() - for key in values.keys(): - if key not in expected_params: - close_matches = difflib.get_close_matches(key, expected_params, cutoff=0.4) - if close_matches: - raise ValueError(f"Unexpected parameter '{key}'. Did you mean '{close_matches[0]}'?") - else: - raise ValueError(f"Unexpected parameter '{key}'.") - return values - - @model_validator(mode="before") - @classmethod - def check_config(cls, values: Any) -> Any: - """Check if config is valid.""" - - vector_store = values.get("vector_store") - host = values.get("host") - port = values.get("port") - persist_directory = values.get("persist_directory") - - if bool(port) != bool(host) or (host and persist_directory): - raise ValueError( - f"For {vector_store} handler - host and port must be provided together. " - f"Additionally, if host and port are provided, persist_directory should not be provided." - ) - - if persist_directory and (host or port): - raise ValueError( - f"For {vector_store} handler - if persistence_folder is provided, host, port should not be provided." - ) - - return values diff --git a/mindsdb/integrations/handlers/chromadb_handler/tests/__init__.py b/mindsdb/integrations/handlers/chromadb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/chromadb_handler/tests/test_chromadb_handler.py b/mindsdb/integrations/handlers/chromadb_handler/tests/test_chromadb_handler.py deleted file mode 100644 index d3e5d330d16..00000000000 --- a/mindsdb/integrations/handlers/chromadb_handler/tests/test_chromadb_handler.py +++ /dev/null @@ -1,83 +0,0 @@ -import unittest -from unittest.mock import Mock, patch -import pandas as pd -from mindsdb.integrations.handlers.chromadb_handler.chromadb_handler import ( - ChromaDBHandler, - TableField, -) - - -class MockCondition: - def __init__(self, column, op, value): - self.column = column - self.op = op - self.value = value - - -class TestChromaHandler(unittest.TestCase): - def setUp(self): - self.handler = ChromaDBHandler(name="test_chroma", connection_data={}, handler_storage=Mock()) - - # INSERT - @patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler.connect") - def test_insert_calls_upsert(self, mock_connect): - mock_client = Mock() - mock_collection = Mock() - mock_client.get_or_create_collection.return_value = mock_collection - self.handler._client = mock_client - self.handler.is_connected = True - - df = pd.DataFrame( - { - TableField.CONTENT.value: ["Cat Photo"], - TableField.EMBEDDINGS.value: [[0.9, 0.1, 0.1]], - TableField.ID.value: ["img_1"], - TableField.METADATA.value: [{"author": "Sriram"}], - } - ) - self.handler.insert("my_gallery", df) - - call_args = mock_collection.upsert.call_args[1] - self.assertEqual(call_args["embeddings"], [[0.9, 0.1, 0.1]]) - - # SELECT - @patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler.disconnect") - @patch("mindsdb.integrations.handlers.chromadb_handler.chromadb_handler.ChromaDBHandler.connect") - def test_select_semantic_search(self, mock_connect, mock_disconnect): - # Mock System - mock_client = Mock() - mock_collection = Mock() - mock_client.get_collection.return_value = mock_collection - - self.handler._client = mock_client - self.handler.is_connected = True - - # Mock Return Data - mock_result = { - "ids": [["id1"]], - "documents": [["Dog"]], - "metadatas": [[{}]], - "embeddings": [[[0.1, 0.2]]], - "distances": [[0.5]], - } - mock_collection.query.return_value = mock_result - mock_collection.get.return_value = mock_result - - conditions = [MockCondition(column=TableField.CONTENT.value, op="=", value="Dog")] - - self.handler.select("my_gallery", conditions=conditions) - - # Verification - if not mock_collection.query.called: - self.fail("CRITICAL: The handler used .get() (Exact Match) instead of .query() (Semantic Search)!") - - call_args = mock_collection.query.call_args[1] - - if "query_texts" not in call_args: - self.fail("CRITICAL: The handler called .query() but forgot 'query_texts'!") - - self.assertEqual(call_args["query_texts"], ["Dog"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/cohere_handler/README.md b/mindsdb/integrations/handlers/cohere_handler/README.md deleted file mode 100644 index a3947b4d9a0..00000000000 --- a/mindsdb/integrations/handlers/cohere_handler/README.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: Cohere -sidebarTitle: Cohere ---- - -This documentation describes the integration of MindsDB with [Cohere](https://cohere.com/), a technology company focused on artificial intelligence for the enterprise. -The integration allows for the deployment of Cohere models within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Cohere within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain the Cohere API key required to deploy and use Cohere models within MindsDB. Sign up for a Cohere account and request an API key from the Cohere dashboard. Learn more [here](https://cohere.com/pricing). - -## Setup - -Create an AI engine from the [Cohere handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/cohere_handler). - -```sql -CREATE ML_ENGINE cohere_engine -FROM cohere -USING - cohere_api_key = 'your-cohere-api-key'; -``` - -Create a model using `cohere_engine` as an engine. - -```sql -CREATE MODEL cohere_model -PREDICT target_column -USING - engine = 'cohere_engine', -- engine name as created via CREATE ML_ENGINE - task = 'task_name', -- choose one of 'text-summarization', 'text-generation' - column = 'column_name'; -- column that stores input/question to the model -``` - -## Usage - -The following usage examples utilize `cohere_engine` to create a model with the `CREATE MODEL` statement. - -Create a model to predict the answer to a question using the `text-generation` task. - -```sql -CREATE MODEL cohere_model -PREDICT answer -USING - engine = 'cohere_engine', - task = 'text-generation', - column = 'question'; -``` - -Where: - -| Name | Description | -|-------------------|------------------------------------------------------------------------| -| `task` | It defines the task to be accomplished. | -| `column` | It defines the column with the text to be acted upon. | -| `engine` | It defines the Cohere engine. | - -Query the model to get predictions. - -```sql -SELECT answer -FROM cohere_model -WHERE question = 'What is the capital of France?'; -``` - -Here is the output: - -| answer | -| ------ | -| The capital of France is Paris. Paris is France's largest city and a major global center for art, culture, fashion, and cuisine. It is renowned for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum. - - - -**Next Steps** - -Go to the [Use Cases](https://docs.mindsdb.com/use-cases/overview) section to see more examples. - diff --git a/mindsdb/integrations/handlers/cohere_handler/__about__.py b/mindsdb/integrations/handlers/cohere_handler/__about__.py deleted file mode 100644 index 37d18bbdb14..00000000000 --- a/mindsdb/integrations/handlers/cohere_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Cohere handler' -__package_name__ = 'mindsdb_cohere_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Cohere" -__author__ = 'Balaji Seetharaman ' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2023 - mindsdb' diff --git a/mindsdb/integrations/handlers/cohere_handler/__init__.py b/mindsdb/integrations/handlers/cohere_handler/__init__.py deleted file mode 100644 index c877967c89d..00000000000 --- a/mindsdb/integrations/handlers/cohere_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from .__about__ import __version__ as version, __description__ as description -from mindsdb.integrations.libs.const import HANDLER_TYPE -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -try: - from .cohere_handler import CohereHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Cohere' -name = 'cohere' -type = HANDLER_TYPE.ML -icon_path = 'icon.svg' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/cohere_handler/cohere_handler.py b/mindsdb/integrations/handlers/cohere_handler/cohere_handler.py deleted file mode 100644 index 34f820167de..00000000000 --- a/mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import Optional, Dict - -import cohere -import pandas as pd - -from mindsdb.integrations.libs.base import BaseMLEngine - -from mindsdb.utilities import log - -from mindsdb.integrations.utilities.handler_utils import get_api_key - - -logger = log.getLogger(__name__) - - -class CohereHandler(BaseMLEngine): - """ - Integration with the Cohere Python Library - """ - name = 'cohere' - - def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - if 'using' not in args: - raise Exception("Cohere engine requires a USING clause! Refer to its documentation for more details.") - - self.generative = True - self.model_storage.json_set('args', args) - - def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - - args = self.model_storage.json_get('args') - - input_keys = list(args.keys()) - - logger.info(f"Input keys: {input_keys}!") - - input_column = args['using']['column'] - - if input_column not in df.columns: - raise RuntimeError(f'Column "{input_column}" not found in input data') - - result_df = pd.DataFrame() - - if args['using']['task'] == 'text-summarization': - result_df['predictions'] = df[input_column].apply(self.predict_text_summary) - - elif args['using']['task'] == 'text-generation': - result_df['predictions'] = df[input_column].apply(self.predict_text_generation) - - else: - raise Exception(f"Task {args['using']['task']} is not supported!") - - result_df = result_df.rename(columns={'predictions': args['target']}) - - return result_df - - def predict_text_summary(self, text): - """ - connects with cohere api to predict the summary of the input text - - """ - - args = self.model_storage.json_get('args') - - api_key = get_api_key('cohere', args["using"], self.engine_storage, strict=False) - co = cohere.Client(api_key) - - response = co.summarize(text) - text_summary = response.summary - - return text_summary - - def predict_text_generation(self, text): - """ - connects with cohere api to predict the next prompt of the input text - - """ - args = self.model_storage.json_get('args') - - api_key = get_api_key('cohere', args["using"], self.engine_storage, strict=False) - co = cohere.Client(api_key) - - response = co.generate(text) - text_generated = response.generations[0].text - - return text_generated diff --git a/mindsdb/integrations/handlers/cohere_handler/icon.svg b/mindsdb/integrations/handlers/cohere_handler/icon.svg deleted file mode 100644 index e6a3e6d3053..00000000000 --- a/mindsdb/integrations/handlers/cohere_handler/icon.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/cohere_handler/requirements.txt b/mindsdb/integrations/handlers/cohere_handler/requirements.txt deleted file mode 100644 index aafc1250da4..00000000000 --- a/mindsdb/integrations/handlers/cohere_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -cohere==4.5.1 -aiohttp>=3.13.3 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/databricks_handler/README.md b/mindsdb/integrations/handlers/databricks_handler/README.md deleted file mode 100644 index c62f6907d6c..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/README.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Databricks -sidebarTitle: Databricks ---- -This documentation describes the integration of MindsDB with [Databricks](https://www.databricks.com/), the world's first data intelligence platform powered by generative AI. -The integration allows MindsDB to access data stored in a Databricks workspace and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Databricks to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - - -If the Databricks cluster you are attempting to connect to is terminated, executing the queries given below will attempt to start the cluster and therefore, the first query may take a few minutes to execute. - -To avoid any delays, ensure that the Databricks cluster is running before executing the queries. - - -## Connection - -Establish a connection to your Databricks workspace from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE databricks_datasource -WITH - engine = 'databricks', - parameters = { - "server_hostname": "adb-1234567890123456.7.azuredatabricks.net", - "http_path": "sql/protocolv1/o/1234567890123456/1234-567890-test123", - "access_token": "dapi1234567890ab1cde2f3ab456c7d89efa", - "schema": "example_db" - }; -``` - -Required connection parameters include the following: - -* `server_hostname`: The server hostname for the cluster or SQL warehouse. -* `http_path`: The HTTP path of the cluster or SQL warehouse. -* `access_token`: A Databricks personal access token for the workspace. - - -Refer the instructions given https://docs.databricks.com/en/integrations/compute-details.html and https://docs.databricks.com/en/dev-tools/python-sql-connector.html#authentication to find the connection parameters mentioned above for your compute resource. - - -Optional connection parameters include the following: - -* `session_configuration`: Additional (key, value) pairs to set as Spark session configuration parameters. This should be provided as a JSON string. -* `http_headers`: Additional (key, value) pairs to set in HTTP headers on every RPC request the client makes. This should be provided as a JSON string. -* `catalog`: The catalog to use for the connection. Default is `hive_metastore`. -* `schema`: The schema (database) to use for the connection. Default is `default`. - -## Usage - -Retrieve data from a specified table by providing the integration name, catalog, schema, and table name: - -```sql -SELECT * -FROM databricks_datasource.catalog_name.schema_name.table_name -LIMIT 10; -``` - - -The catalog and schema names only need to be provided if the table to be queried is not in the specified (or default) catalog and schema. - - -Run Databricks SQL queries directly on the connected Databricks workspace: - -```sql -SELECT * FROM databricks_datasource ( - - --Native Query Goes Here - SELECT - city, - car_model, - RANK() OVER (PARTITION BY car_model ORDER BY quantity) AS rank - FROM dealer - QUALIFY rank = 1; -); - -``` - - -The above examples utilize `databricks_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Databricks workspace. -* **Checklist**: - 1. Make sure the Databricks workspace is active. - 2. Confirm that server hostname, HTTP path, access token are correctly provided. If the catalog and schema are provided, ensure they are correct as well. - 3. Ensure a stable network between MindsDB and Databricks workspace. - - - -SQL statements running against tables (of reasonable size) are taking longer than expected. - -* **Symptoms**: SQL queries taking longer than expected to execute. -* **Checklist**: - 1. Ensure the Databricks cluster is running before executing the queries. - 2. Check the network connection between MindsDB and Databricks workspace. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing special characters. -* **Checklist**: - 1. Ensure table names with special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel-data - * Incorrect: SELECT * FROM integration.'travel-data' - * Correct: SELECT * FROM integration.\`travel-data\` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/databricks_handler/__about__.py b/mindsdb/integrations/handlers/databricks_handler/__about__.py deleted file mode 100644 index dd8fc695015..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Databricks handler" -__package_name__ = "mindsdb_databricks_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Databricks" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/databricks_handler/__init__.py b/mindsdb/integrations/handlers/databricks_handler/__init__.py deleted file mode 100644 index 9af359b9c82..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .databricks_handler import DatabricksHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Databricks" -name = "databricks" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "support_level", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/databricks_handler/connection_args.py b/mindsdb/integrations/handlers/databricks_handler/connection_args.py deleted file mode 100644 index 5ad6d2e473a..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/connection_args.py +++ /dev/null @@ -1,58 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - server_hostname={ - "type": ARG_TYPE.STR, - "description": "The server hostname for the cluster or SQL warehouse.", - "required": True, - "label": "Server Hostname", - }, - http_path={ - "type": ARG_TYPE.STR, - "description": "The HTTP path of the cluster or SQL warehouse.", - "required": True, - "label": "HTTP Path", - }, - access_token={ - "type": ARG_TYPE.STR, - "description": "A Databricks personal access token to authenticate the connection.", - "required": True, - "label": "Access Token", - "secret": True, - }, - session_configuration={ - "type": ARG_TYPE.STR, - "description": "Additional (key, value) pairs to set as Spark session configuration parameters.", - "required": False, - "label": "Session Configuration", - }, - http_headers={ - "type": ARG_TYPE.STR, - "description": "Additional (key, value) pairs to set in HTTP headers on every RPC request the connection makes." - " This parameter is optional.", - "required": False, - "label": "HTTP Headers", - }, - catalog={ - "type": ARG_TYPE.STR, - "description": "Catalog to use for the connection.", - "required": False, - "label": "Catalog", - }, - schema={ - "type": ARG_TYPE.STR, - "description": "Schema (database) to use for the connection.", - "required": False, - "label": "Schema", - }, -) - -connection_args_example = OrderedDict( - server_hostname="adb-1234567890123456.7.azuredatabricks.net", - http_path="sql/protocolv1/o/1234567890123456/1234-567890-test123", - access_token="dapi1234567890ab1cde2f3ab456c7d89efa", - schema="sales", -) diff --git a/mindsdb/integrations/handlers/databricks_handler/databricks_handler.py b/mindsdb/integrations/handlers/databricks_handler/databricks_handler.py deleted file mode 100644 index 755308d419b..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +++ /dev/null @@ -1,817 +0,0 @@ -from typing import Text, Dict, Any, Optional, List, Tuple - -import pandas as pd -import re - -from databricks.sql import connect, RequestError, ServerOperationError -from databricks.sql.client import Connection -from databricks.sqlalchemy import DatabricksDialect -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.integrations.libs.base import MetaDatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, - INF_SCHEMA_COLUMNS_NAMES_SET, -) -from mindsdb_sql_parser import ast -from mindsdb.integrations.utilities.query_traversal import query_traversal - -from mindsdb.utilities import log -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -logger = log.getLogger(__name__) - - -_INTERVAL_RE = re.compile(r"(?is)^\s*(?:interval\s+)?'?(?P[+-]?\d+)'?\s+(?P[a-zA-Z]+)\s*$") - - -def _escape_literal(value: str) -> str: - """Escape a literal string to be safely embedded into SQL single quotes. - - This function also validates the identifier before escaping to ensure it contains only safe characters. - """ - _validate_identifier(value) - return value.replace("'", "''") - - -def _quote_identifier(identifier: str) -> str: - """Quote identifiers (table/column) for Databricks SQL to avoid injection or syntax errors.""" - if not isinstance(identifier, str) or identifier == "": - raise ValueError("Invalid identifier value") - return f"`{identifier.replace('`', '``')}`" - - -def _validate_identifier(identifier: str) -> str: - """Validate and sanitize an identifier (table/column name).""" - if not isinstance(identifier, str) or not identifier: - raise ValueError("Identifier must be a non-empty string") - if not re.match(r"^[\w\s\-]+$", identifier): - raise ValueError(f"Identifier contains invalid characters: {identifier}") - return identifier - - -def _parse_interval_value(value: Any) -> Optional[int]: - if isinstance(value, bool): - return None - if isinstance(value, int): - return value - if isinstance(value, str): - value = value.strip().strip("'").strip('"') - if re.fullmatch(r"[+-]?\d+", value): - return int(value) - return None - - -def _parse_interval_unit(unit: Any) -> Optional[str]: - if isinstance(unit, str): - unit_upper = unit.strip().upper() - if unit_upper.endswith("S"): - unit_upper = unit_upper[:-1] - valid_units = {"YEAR", "MONTH", "DAY", "HOUR", "MINUTE", "SECOND", "WEEK", "QUARTER"} - if unit_upper in valid_units: - return unit_upper - return None - - -def _parse_interval_literal(value: Any) -> Optional[Tuple[int, str]]: - if value is None: - return None - - match = _INTERVAL_RE.match(str(value)) - if not match: - return None - - parsed_value = _parse_interval_value(match.group("value")) - parsed_unit = _parse_interval_unit(match.group("unit")) - if parsed_value is None or parsed_unit is None: - return None - return parsed_value, parsed_unit - - -def _get_interval_parts(node: ASTNode) -> Optional[Tuple[int, str]]: - """Extract interval parts from an ASTNode representing an INTERVAL literal. - - Args: - node (ASTNode): The ASTNode representing the INTERVAL literal. - Returns: - Optional[Tuple[int, str]]: A tuple with (value, unit) if successful, None otherwise. - """ - if isinstance(node, ast.UnaryOperation): - if node.op == "-" and node.args: - parts = _get_interval_parts(node.args[0]) - if parts is None: - return None - # Negate the value part - return -parts[0], parts[1] - return None - - if isinstance(node, ast.Constant): - return _parse_interval_literal(node.value) - - if not isinstance(node, ast.Interval): - return None - - parts = node.args or [] - - if len(parts) >= 2: - value = parts[0].value if isinstance(parts[0], ast.Constant) else parts[0] - unit = parts[1].value if isinstance(parts[1], ast.Constant) else parts[1] - parsed_value = _parse_interval_value(value) - parsed_unit = _parse_interval_unit(unit) - if parsed_value is not None and parsed_unit is not None: - return parsed_value, parsed_unit - - if len(parts) == 1: - raw = parts[0].value if isinstance(parts[0], ast.Constant) else parts[0] - return _parse_interval_literal(raw) - - return None - - -def _transform_databricks_sql_intervals(node: ASTNode, **kwargs: Any) -> ASTNode | None: - """Transform INTERVAL literals in the SQL query to be compatible with Databricks SQL syntax. - Transformation examples: - DATE_ADD(col, INTERVAL 5 HOUR) -> TIMESTAMPADD(hour, 5, col) - DATE_SUB(col, INTERVAL 30 MINUTE) -> TIMESTAMPADD(minute, -30, col) - DATE_ADD(col, INTERVAL 10 DAY) -> DATE_ADD(col, 10) - DATE_SUB(col, INTERVAL 2 WEEK) -> DATE_SUB(col, 14) - DATE_ADD(col, INTERVAL 3 MONTH) -> ADD_MONTHS(col, 3) - DATE_SUB(col, INTERVAL 1 YEAR) -> ADD_MONTHS(col, -12) - - - Args: - query (ASTNode): The SQL query represented as an ASTNode. - - Returns: - ASTNode: The transformed SQL query with compatible INTERVAL syntax. - """ - if not isinstance(node, ast.Function): - return None - - function_name = str(node.op).lower() - if function_name not in {"date_add", "date_sub"} or len(node.args) != 2: - return None - - interval_parts = _get_interval_parts(node.args[1]) - if interval_parts is None: - return None - value, unit = interval_parts - - date = node.args[0] - - if unit == "DAY": - if function_name == "date_add": - node.args[1] = ast.Constant(value) - else: - node.args[1] = ast.Constant(abs(value)) - return None - - if unit == "WEEK": - days_value = value * 7 - if function_name == "date_add": - node.args[1] = ast.Constant(days_value) - else: - node.args[1] = ast.Constant(abs(days_value)) - return None - - if unit in {"MONTH", "YEAR", "QUARTER"}: - month_value = value - if unit == "YEAR": - month_value = value * 12 - elif unit == "QUARTER": - month_value = value * 3 - if function_name == "date_sub": - month_value = -month_value - new_node = ast.Function( - op="add_months", - args=[date, ast.Constant(month_value)], - ) - return new_node - - if unit in {"HOUR", "MINUTE", "SECOND"}: - if function_name == "date_sub": - value = -value - new_node = ast.Function( - op="timestampadd", - args=[ - ast.Identifier(unit.lower()), - ast.Constant(value), - date, - ], - ) - return new_node - - -def _map_type(internal_type_name: str | None) -> MYSQL_DATA_TYPE: - """Map MyDatabricks SQL text types names to MySQL types as enum. - - Args: - internal_type_name (str): The name of the Databricks type to map. - - Returns: - MYSQL_DATA_TYPE: The MySQL type enum that corresponds to the MySQL text type name. - """ - if not isinstance(internal_type_name, str): - return MYSQL_DATA_TYPE.TEXT - - type_upper = internal_type_name.upper() - - type_mappings = { - "STRING": MYSQL_DATA_TYPE.TEXT, - "LONG": MYSQL_DATA_TYPE.BIGINT, - "SHORT": MYSQL_DATA_TYPE.SMALLINT, - "INT": MYSQL_DATA_TYPE.INT, - "INTEGER": MYSQL_DATA_TYPE.INT, - "BIGINT": MYSQL_DATA_TYPE.BIGINT, - "SMALLINT": MYSQL_DATA_TYPE.SMALLINT, - "TINYINT": MYSQL_DATA_TYPE.TINYINT, - "FLOAT": MYSQL_DATA_TYPE.FLOAT, - "DOUBLE": MYSQL_DATA_TYPE.DOUBLE, - "DECIMAL": MYSQL_DATA_TYPE.DECIMAL, - "BOOLEAN": MYSQL_DATA_TYPE.BOOL, - "DATE": MYSQL_DATA_TYPE.DATE, - "TIMESTAMP": MYSQL_DATA_TYPE.DATETIME, - "BINARY": MYSQL_DATA_TYPE.BINARY, - } - if type_upper in type_mappings: - return type_mappings[type_upper] - - try: - return MYSQL_DATA_TYPE(type_upper) - except Exception: - logger.info(f"Databricks handler: unknown type: {internal_type_name}, use TEXT as fallback.") - return MYSQL_DATA_TYPE.TEXT - - -class DatabricksHandler(MetaDatabaseHandler): - """ - This handler handles the connection and execution of SQL statements on Databricks. - """ - - name = "databricks" - - def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the Databricks workspace. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self.cache_thread_safe = True - - def __del__(self) -> None: - """ - Closes the connection when the handler instance is deleted. - """ - if self.is_connected is True: - self.disconnect() - - def connect(self) -> Connection: - """ - Establishes a connection to the Databricks workspace. - - Raises: - ValueError: If the expected connection parameters are not provided. - - Returns: - databricks.sql.client.Connection: A connection object to the Databricks workspace. - """ - if self.is_connected is True: - return self.connection - - if not all(key in self.connection_data for key in ["server_hostname", "http_path", "access_token"]): - raise ValueError("Required parameters (server_hostname, http_path, access_token) must be provided.") - - config = { - "server_hostname": self.connection_data["server_hostname"], - "http_path": self.connection_data["http_path"], - "access_token": self.connection_data["access_token"], - } - - optional_parameters = [ - "session_configuration", - "http_headers", - "catalog", - "schema", - ] - for parameter in optional_parameters: - if parameter in self.connection_data: - config[parameter] = self.connection_data[parameter] - - try: - self.connection = connect(**config) - self.is_connected = True - return self.connection - except RequestError as request_error: - logger.error(f"Request error when connecting to Databricks: {request_error}") - raise - except RuntimeError as runtime_error: - logger.error(f"Runtime error when connecting to Databricks: {runtime_error}") - raise - except Exception as unknown_error: - logger.error(f"Unknown error when connecting to Databricks: {unknown_error}") - raise - - def disconnect(self): - """ - Closes the connection to the Databricks workspace if it's currently open. - """ - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - return self.is_connected - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Databricks workspace. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - - query = "SELECT 1 FROM information_schema.schemata" - schema_value = self.connection_data.get("schema") - if isinstance(schema_value, str) and schema_value != "": - try: - escaped_schema = _escape_literal(schema_value) - query += f" WHERE schema_name = '{escaped_schema}'" - except ValueError as e: - logger.error(f"Invalid schema name: {e}") - response.error_message = str(e) - return response - - with connection.cursor() as cursor: - cursor.execute(query) - result = cursor.fetchall() - - if not result: - raise ValueError(f"The schema {self.connection_data['schema']} does not exist!") - - response.success = True - except ( - ValueError, - RequestError, - RuntimeError, - ServerOperationError, - ) as known_error: - logger.error(f"Connection check to Databricks failed, {known_error}!") - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f"Connection check to Databricks failed due to an unknown error, {unknown_error}!") - response.error_message = str(unknown_error) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a native SQL query on the Databricks workspace and returns the result. - - Args: - query (Text): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - with connection.cursor() as cursor: - try: - cursor.execute(query) - result = cursor.fetchall() - if cursor.description: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]), - ) - else: - response = Response(RESPONSE_TYPE.OK) - connection.commit() - except ServerOperationError as server_error: - logger.error(f"Server error running query: {query} on Databricks, {server_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(server_error), error_code=0) - except Exception as unknown_error: - logger.error(f"Unknown error running query: {query} on Databricks, {unknown_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(unknown_error), error_code=0) - - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode on the Databricks Workspace and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - # Transform the query to be compatible with Databricks SQL syntax - query_traversal(query, _transform_databricks_sql_intervals) - renderer = SqlalchemyRender(DatabricksDialect) - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self, all: bool = False) -> Response: - """ - Retrieves a list of all non-system tables in the connected schema of the Databricks workspace. - - Args: - all (bool): If True - return tables from all schemas. - - Returns: - Response: A response object containing a list of tables in the connected schema. - """ - all_filter = "and table_schema = current_schema()" - if all is True: - all_filter = "" - query = f""" - SELECT - table_schema, - table_name, - table_type - FROM - information_schema.tables - WHERE - table_schema != 'information_schema' - {all_filter} - """ - result = self.native_query(query) - if result.resp_type != RESPONSE_TYPE.TABLE or result.data_frame is None: - return result - df = result.data_frame - result.data_frame = df.rename(columns={col: col.upper() for col in df.columns}) - return result - - def get_columns(self, table_name: str, schema_name: str | None = None) -> Response: - """ - Retrieves column details for a specified table in the Databricks workspace. - - Args: - table_name (str): The name of the table for which to retrieve column information. - schema_name (str|None): The name of the schema in which the table is located. - - Raises: - ValueError: If the 'table_name' is not a valid string. - - Returns: - Response: A response object containing the column details. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - try: - table_literal = _escape_literal(table_name) - except ValueError as e: - logger.error(f"Invalid table name: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - if isinstance(schema_name, str): - try: - schema_name_sql = f"'{_escape_literal(schema_name)}'" - except ValueError as e: - logger.error(f"Invalid schema name: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - else: - schema_name_sql = "current_schema()" - - query = f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - null as CHARACTER_SET_NAME, - null as COLLATION_NAME - FROM - information_schema.columns - WHERE - table_name = '{table_literal}' - AND - table_schema = {schema_name_sql} - """ - - result = self.native_query(query) - if result.resp_type == RESPONSE_TYPE.OK: - result = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)), - ) - result.to_columns_table_response(map_type_fn=_map_type) - - return result - - def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves metadata information about the tables in the Databricks database to be stored in the data catalog. - - Args: - table_names (list): A list of table names for which to retrieve metadata information. - - Returns: - Response: A response object containing the metadata information, formatted as per the `Response` class. - """ - - schema_name = self.connection_data.get("schema") or "default" - - try: - schema_literal = _escape_literal(schema_name) - except ValueError as e: - logger.error(f"Invalid schema name: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - query = f""" - SELECT - table_catalog AS TABLE_CATALOG, - table_schema AS TABLE_SCHEMA, - table_name AS TABLE_NAME, - table_type AS TABLE_TYPE, - comment AS TABLE_DESCRIPTION, - NULL AS ROW_COUNT, - created AS CREATED, - last_altered AS LAST_ALTERED - FROM information_schema.tables - WHERE table_schema = '{schema_literal}' - AND table_type IN ('BASE TABLE', 'VIEW', 'MANAGED') - """ - - if table_names is not None and len(table_names) > 0: - try: - escaped_names = [] - for t in table_names: - escaped_names.append(f"'{_escape_literal(t)}'") - table_names_str = ", ".join(escaped_names) - query += f" AND table_name IN ({table_names_str})" - except ValueError as e: - logger.error(f"Invalid table name in list: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - result = self.native_query(query) - - if result.type == RESPONSE_TYPE.TABLE and result.data_frame is not None and not result.data_frame.empty: - result.data_frame["TABLE_SCHEMA"] = self.name - - return result - - def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - - schema_name = self.connection_data.get("schema") or "default" - - try: - schema_literal = _escape_literal(schema_name) - except ValueError as e: - logger.error(f"Invalid schema name: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - query = f""" - SELECT - table_name AS TABLE_NAME, - column_name AS COLUMN_NAME, - data_type AS DATA_TYPE, - comment AS COLUMN_DESCRIPTION, - column_default AS COLUMN_DEFAULT, - (is_nullable = 'YES') AS IS_NULLABLE, - character_maximum_length AS CHARACTER_MAXIMUM_LENGTH, - character_octet_length AS CHARACTER_OCTET_LENGTH, - numeric_precision AS NUMERIC_PRECISION, - numeric_scale AS NUMERIC_SCALE, - datetime_precision AS DATETIME_PRECISION, - NULL AS CHARACTER_SET_NAME, - NULL AS COLLATION_NAME - FROM information_schema.columns - WHERE table_schema = '{schema_literal}' - """ - - if table_names is not None and len(table_names) > 0: - try: - escaped_names = [] - for t in table_names: - escaped_names.append(f"'{_escape_literal(t.lower())}'") - table_names_str = ", ".join(escaped_names) - query += f" AND LOWER(table_name) IN ({table_names_str})" - except ValueError as e: - logger.error(f"Invalid table name in list: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - result = self.native_query(query) - return result - - def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves basic column statistics: null %, distinct count. - - Args: - table_names (list): A list of table names for which to retrieve column statistics metadata. - - Returns: - Response: A response object containing the column statistics metadata. - """ - schema_name = self.connection_data.get("schema") or "default" - - try: - schema_literal = _escape_literal(schema_name) - except ValueError as e: - logger.error(f"Invalid schema name: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - columns_query = f""" - SELECT table_name AS TABLE_NAME, column_name AS COLUMN_NAME - FROM information_schema.columns - WHERE table_schema = '{schema_literal}' - """ - - if table_names: - try: - escaped_names = [] - for t in table_names: - escaped_names.append(f"'{_escape_literal(t)}'") - table_names_str = ", ".join(escaped_names) - columns_query += f" AND table_name IN ({table_names_str})" - except ValueError as e: - logger.error(f"Invalid table name in list: {e}") - return Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - columns_result = self.native_query(columns_query) - if ( - columns_result.type == RESPONSE_TYPE.ERROR - or columns_result.data_frame is None - or columns_result.data_frame.empty - ): - return Response(RESPONSE_TYPE.ERROR, error_message="No columns found.") - columns_df = columns_result.data_frame - grouped = columns_df.groupby("TABLE_NAME") - all_stats = [] - - for table_name, group in grouped: - try: - _validate_identifier(table_name) - except ValueError as e: - logger.warning(f"Skipping invalid table name: {e}") - continue - - select_parts = [] - for _, row in group.iterrows(): - col = row["COLUMN_NAME"] - try: - _validate_identifier(col) - except ValueError as e: - logger.warning(f"Skipping invalid column name: {e}") - continue - - quoted_col = _quote_identifier(col) - safe_suffix = col.replace("`", "``") - select_parts.extend( - [ - f"SUM(CASE WHEN {quoted_col} IS NULL THEN 1 ELSE 0 END) AS {_quote_identifier(f'nulls_{safe_suffix}')}", - f"APPROX_COUNT_DISTINCT({quoted_col}) AS {_quote_identifier(f'distincts_{safe_suffix}')}", - f"MIN({quoted_col}) AS {_quote_identifier(f'min_{safe_suffix}')}", - f"MAX({quoted_col}) AS {_quote_identifier(f'max_{safe_suffix}')}", - ] - ) - - if not select_parts: - continue - - quoted_table_name = _quote_identifier(table_name) - stats_query = f""" - SELECT COUNT(*) AS `total_rows`, {", ".join(select_parts)} - FROM {quoted_table_name} - """ - - try: - stats_res = self.native_query(stats_query) - if stats_res.type != RESPONSE_TYPE.TABLE or stats_res.data_frame is None or stats_res.data_frame.empty: - logger.warning(f"Could not retrieve stats for table {table_name}") - for _, row in group.iterrows(): - all_stats.append( - { - "table_name": table_name, - "column_name": row["COLUMN_NAME"], - "null_percentage": None, - "distinct_values_count": None, - "most_common_values": [], - "most_common_frequencies": [], - "minimum_value": None, - "maximum_value": None, - } - ) - continue - - stats_data = stats_res.data_frame.iloc[0] - total_rows = stats_data.get("total_rows", 0) - - for _, row in group.iterrows(): - col = row["COLUMN_NAME"] - safe_suffix = col.replace("`", "``") - nulls = stats_data.get(f"nulls_{safe_suffix}", 0) - distincts = stats_data.get(f"distincts_{safe_suffix}", None) - min_val = stats_data.get(f"min_{safe_suffix}", None) - max_val = stats_data.get(f"max_{safe_suffix}", None) - null_pct = (nulls / total_rows) * 100 if total_rows > 0 else None - - all_stats.append( - { - "table_name": table_name, - "column_name": col, - "null_percentage": null_pct, - "distinct_values_count": distincts, - "most_common_values": [], - "most_common_frequencies": [], - "minimum_value": min_val, - "maximum_value": max_val, - } - ) - except Exception as e: - logger.error(f"Exception while fetching statistics for table {table_name}: {e}") - for _, row in group.iterrows(): - all_stats.append( - { - "table_name": table_name, - "column_name": row["COLUMN_NAME"], - "null_percentage": None, - "distinct_values_count": None, - "most_common_values": [], - "most_common_frequencies": [], - "minimum_value": None, - "maximum_value": None, - } - ) - if not all_stats: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(all_stats)) - - def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Response: - """ - Databricks doesn't have primary key constraints in data warehouses. - Return empty result like Snowflake does when no keys exist. - """ - empty_df = pd.DataFrame( - { - "table_name": pd.Series([], dtype="object"), - "column_name": pd.Series([], dtype="object"), - "ordinal_position": pd.Series([], dtype="Int64"), - "constraint_name": pd.Series([], dtype="object"), - } - ) - - return Response(RESPONSE_TYPE.TABLE, data_frame=empty_df) - - def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Response: - """ - Databricks doesn't have foreign key constraints in data warehouses. - Return empty result like Snowflake does when no keys exist. - """ - empty_df = pd.DataFrame( - { - "child_table_name": pd.Series([], dtype="object"), - "child_column_name": pd.Series([], dtype="object"), - "parent_table_name": pd.Series([], dtype="object"), - "parent_column_name": pd.Series([], dtype="object"), - } - ) - - return Response(RESPONSE_TYPE.TABLE, data_frame=empty_df) diff --git a/mindsdb/integrations/handlers/databricks_handler/icon.svg b/mindsdb/integrations/handlers/databricks_handler/icon.svg deleted file mode 100644 index 884a4263bbf..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/databricks_handler/requirements.txt b/mindsdb/integrations/handlers/databricks_handler/requirements.txt deleted file mode 100644 index 0137133cc54..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -databricks-sql-connector==4.2.3 diff --git a/mindsdb/integrations/handlers/databricks_handler/tests/__init__.py b/mindsdb/integrations/handlers/databricks_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/databricks_handler/tests/test_databricks_handler.py b/mindsdb/integrations/handlers/databricks_handler/tests/test_databricks_handler.py deleted file mode 100644 index 3cd765b2243..00000000000 --- a/mindsdb/integrations/handlers/databricks_handler/tests/test_databricks_handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from mindsdb.integrations.handlers.databricks_handler.databricks_handler import ( - DatabricksHandler, -) -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class DatabricksHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = { - "server_hostname": "adb-1234567890123456.7.azuredatabricks.net", - "http_path": "sql/protocolv1/o/1234567890123456/1234-567890-test123", - "access_token": "dapi1234567890ab1cde2f3ab456c7d89efa", - "schema": "sales", - } - cls.handler = DatabricksHandler("test_databricks_handler", cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_native_query_select(self): - query = "SELECT * FROM sales_features" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_2_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_4_get_columns(self): - columns = self.handler.get_columns("sales_features") - assert columns.type is not RESPONSE_TYPE.ERROR - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/README.md b/mindsdb/integrations/handlers/duckdb_faiss_handler/README.md deleted file mode 100644 index 3ba7f070627..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/README.md +++ /dev/null @@ -1,169 +0,0 @@ -# DuckDB + Faiss Handler - -## Using duckdb_faiss handler - -This handler combines DuckDB for metadata storage and SQL filtering with Faiss for high-performance vector similarity search. - - -### 1. Create a FAISS Database and Knowledge Base - -`duckdb_faiss` handler is installed by default with mindsdb. When the `storage` parameter is not specified it creates default vector storage. It can be: -- pgvector (if the KB_PGVECTOR_URL env variable is defined) -- otherwise, a duckdb_faiss database will be created by default - -Create knowledge base with default vector db: -``` -CREATE KNOWLEDGE BASE kb_animals -USING - embedding_model = {"provider": "openai", "model_name": "text-embedding-3-small"}; -``` - -You can create your own duckdb_faiss database manually as well: - -```sql -CREATE DATABASE mindsdb_faiss -WITH ENGINE = 'duckdb_faiss', -PARAMETERS = { - "persist_directory": "/data/faiss_db_location", - "metric": "ip", - "use_gpu": false, - "nlist": 10, - "nprobe": 2 -} -``` - -And use in knowledge base: -```sql -CREATE KNOWLEDGE BASE kb_animals -USING - storage = mindsdb_faiss.animals_table, - embedding_model = {"provider": "openai", "model_name": "text-embedding-3-small"}; -``` - -Parameters for duckdb_faiss database: -- `persist_directory`: Optional, custom storage path. If not set - a handler storage will be used -- `metric`: Optional, distance metric - possible values: cosine/ip/l1/l2. Default is "cosine" -- `use_gpu`: Optional, enable GPU acceleration (default: False) -- `nlist`: Optional, IVF parameter for clustering. Used as default value in create IVF index. Default is 1024 -- `nprobe`: Optional, controls the number of clusters to search during a query. Default is 1 - - -### 2. Insert data - -The same as for other vector storages, insert from select or from values: -```sql -INSERT INTO kb_animals (id, content, legs) -VALUES (1, 'duck', 2), (2, 'cat', 4); -``` - -### 3. Querying the Knowledge Base - -**Vector similarity search** -```sql -SELECT * FROM kb_animals -WHERE content = 'cat' AND distance < 0.5; -``` - -**Mixed search** -```sql -SELECT * FROM kb_animals -WHERE content = 'cat' AND legs = 4; -``` -Supported `LIKE`, `NOT LIKE`, `>`, `>=`, `<`, `<=` filters for metadata columns. - - -**Hybrid search** -```sql -SELECT * FROM kb_animals -WHERE content = 'cat' AND legs = 4 - AND hybrid_search = TRUE; -``` - -Can be used with bool `hybrid_search` or float `hybrid_search_alpha` parameters - - -## 4. Create FAISS Indexes - -When a new duckdb_faiss is created, it starts from using [flat FAISS index](https://faiss.ai/cpp_api/struct/structfaiss_1_1IndexFlat.html). It works by scanning all index file to get similar vectors. Also a flat index is located in RAM, and its size is restricted by available memory. -To speed up vector search you can convert to other type of indexes. Available options: -- ivf - [Inverted File](https://faiss.ai/cpp_api/struct/structfaiss_1_1IndexIVF.html). It is also located in memory, but faster than FLAT -- ivf_file, the same as ivf, but located on disk and doesn't require being loaded into RAM. This type of index isn't supported on Windows. - -Important: It is not possible to create an index for an empty FAISS knowledge base because both types of indexes require data in the knowledge base before creating it. The loaded data is used to train the index. The size of the training data and the number of clusters can affect index quality. - -Query: -```sql -CREATE INDEX ON KNOWLEDGE_BASE kb_animals -WITH ( - type = 'ivf_file', - nlist = 100, - train_count = 10000 -); -``` - -Parameters: -- `type` - optional, default is ivf_file - - for windows default is the 'ivf' -- `nlist` optional, number of clusters for IVF, default 1024, -- `train_count` optional, number of vectors to use for training, default is calculated from nlist. - - -## Implementation details - -### How it works - -When a duckdb_faiss table is created, the handler creates a folder for it. It contains: -- duckdb.db - a duckdb database to store metadata for knowledge base -- faiss_index - faiss index file -Folder name - is a table name - -The other files in folders in faiss table: -- duckdb.db* - all files related to duckdb (duckdb.db.wal) -- faiss_index* - all files related faiss index (partitions, merged index for ivf_file) -- dump/ - temporal folder for extracted vectors -- recover/ - temporal folder for index backup - -### Locks and concurrency - -Because IVF and FLAT indexes are loaded in RAM and the disk copy is used only to store changes in the index (insert/delete records), small indexes are unloaded from RAM after each request and loaded again before the next request. - -When the index becomes large the read time increases, so the index is cached in RAM and locked to prevent using it in different processes or threads. If mindsdb is used from different threads or processes, an `index file locked` exception might appear. The lock is released when the handler cache is cleared (default timeout is 1 min). - -Because insert-from-select into the knowledge base is performed in the background, the background process can't use the FAISS index if it is locked by a GUI. The implemented workaround is: -- before the query is sent into background - - search all locks for vector bases of KBs in the query and unload the FAISS database from cache -- after executing query in background - - do the same (unload the FAISS database from cache) - -Locks also prevent inserting into the knowledge base using threads. This query won't work: -```sql -INSERT INTO my_kb SELECT * FROM db1.table1 -USING threads=10 -``` - - -Important: The FAISS index isn't locked on Windows; the FAISS library can write to a locked file there. - -### Checking resources - -**RAM** -For indexes located in RAM, when data is inserted into the FAISS index it forecasts the required memory and does not allow the insert if it exceeds available memory. -This check is run after every 10k records inserted. - -**disk** -When an index is created, it requires two to three times more disk space (depending on the index type). The free disk space is also checked before starting to create the index. -What occupies disk: -- an old faiss_index file (its backup) -- fetched vectors from old index -- a new index - -### Keyword search - -Implemented by using duckdb [fts extension](https://duckdb.org/docs/stable/core_extensions/full_text_search#match_bm25-function) -When keyword search is used and FTS index doesn't exist—it is created. This index is removed when any record is inserted into KB (because FTS index isn't updated after inserts in DuckDB). - -### Mixed search optimizations -For queries that mix vectors and rich metadata: -- The handler estimates metadata selectivity (`COUNT(*) WHERE `) to choose the best execution plan. -- **Vector-first strategy** fetches an expanding set of candidates from FAISS until enough records satisfy the metadata filters. -- **Metadata-first strategy** constrains candidate IDs via DuckDB before scoring them in FAISS batches (`META_BATCH = 10,000`). diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/__about__.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/__about__.py deleted file mode 100644 index 30325e1c853..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB DuckDB Faiss handler" -__package_name__ = "mindsdb_duckdb_faiss_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for DuckDB with Faiss vector indexing" -__author__ = "MindsDB" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2024 - mindsdb" diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/__init__.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/__init__.py deleted file mode 100644 index fd2bd37f864..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example - -try: - from .duckdb_faiss_handler import DuckDBFaissHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "DuckDB Faiss" -name = "duckdb_faiss" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "support_level", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/connection_args.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/connection_args.py deleted file mode 100644 index 9ea5d0784b0..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/connection_args.py +++ /dev/null @@ -1,15 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - persist_directory={ - "type": ARG_TYPE.STR, - "description": "Optional custom directory for persisting data. If not provided, uses MindsDB's handler storage.", - "required": False, - "label": "Persist Directory", - }, -) - -connection_args_example = OrderedDict(persist_directory="/tmp/data") diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_handler.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_handler.py deleted file mode 100644 index 22153163ae8..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_handler.py +++ /dev/null @@ -1,293 +0,0 @@ -import os -import re -import shutil -import threading -import time -from contextlib import contextmanager -from dataclasses import dataclass -from pathlib import Path -from typing import List, Iterator - -import pandas as pd - - -from mindsdb.integrations.libs.response import ( - RESPONSE_TYPE, - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, -) -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - VectorStoreHandler, -) -from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase -from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs - -from mindsdb.utilities import log -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from .duckdb_faiss_table import DuckDBFaissTable - -logger = log.getLogger(__name__) - - -TABLE_CACHE_TTL_SECONDS = 60 - - -@dataclass -class TableCacheEntry: - table: DuckDBFaissTable - last_used_ts: float - in_use_count: int = 0 - - -class DuckDBFaissHandler(VectorStoreHandler, KeywordSearchBase): - """This handler handles connection and execution of DuckDB with Faiss vector indexing.""" - - name = "duckdb_faiss" - - def __init__(self, name: str, **kwargs): - super().__init__(name=name) - self.single_instance = True - self.usage_lock = False - - # Extract configuration - self.connection_data = kwargs.get("connection_data", {}) - self.handler_storage = kwargs.get("handler_storage") - self.renderer = SqlalchemyRender("postgres") - - # Storage paths - self._use_handler_storage = False - self.persist_directory = self.connection_data.get("persist_directory") - if self.persist_directory: - if not os.path.exists(self.persist_directory): - raise ValueError(f"Persist directory {self.persist_directory} does not exist") - else: - # Use default handler storage - self.persist_directory = self.handler_storage.folder_get("") - self._use_handler_storage = True - - Path(self.persist_directory).mkdir(parents=True, exist_ok=True) - - self.tables_cache = {} - self.tables_cache_lock = threading.Lock() - - def connect(self): - """ - Handler readiness check. - Must not open long-lived DuckDB/FAISS resources; tables are opened per operation. - """ - - self.is_connected = True - return True - - def disconnect(self): - with self.tables_cache_lock: - for item in self.tables_cache.values(): - item.table.close() - - self.tables_cache = {} - - def check_connection(self) -> Response: - """Check the connection to the database.""" - try: - if not self.is_connected: - self.connect() - return StatusResponse(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f"Connection check failed: {e}") - return StatusResponse(RESPONSE_TYPE.ERROR, error_message=str(e)) - - def __del__(self): - """Cleanup on deletion.""" - self.disconnect() - - # -- manage tables -- - - @staticmethod - def _validate_table_name(table_name: str) -> None: - if table_name in (".", ".."): - raise ValueError("Invalid table_name") - if "/" in table_name or "\\" in table_name: - raise ValueError("table_name must not contain path separators") - if not re.fullmatch(r"[A-Za-z0-9_-]+", table_name): - raise ValueError( - "Invalid table_name: only letters, digits, '_' and '-' are allowed (no spaces, dots, or other symbols)" - ) - - def get_table_dir(self, table_name: str) -> Path: - """ - Get folder for a table name - Prevent path traversal by requiring the resolved path to stay within persist_directory. - """ - root = Path(self.persist_directory).resolve() - table_dir = (Path(self.persist_directory) / table_name).resolve() - if table_dir == root or root not in table_dir.parents: - raise ValueError("Invalid table_name path") - return table_dir - - def _close_cached_table(self, table_name: str) -> None: - entry = self.tables_cache.pop(table_name, None) - if entry is None: - return - try: - entry.table.close() - except Exception: - logger.exception("Failed to close cached table '%s'", table_name) - - def _close_old_tables_cache(self): - """ - Close stale cached tables that have not been used for more than TTL. - Tables that are currently in use are never closed by pruning. - """ - if not self.tables_cache: - return - - with self.tables_cache_lock: - now_ts = time.time() - to_close: List[str] = [] - for table_name, entry in self.tables_cache.items(): - if entry.in_use_count > 0: - continue - if now_ts - entry.last_used_ts > TABLE_CACHE_TTL_SECONDS: - to_close.append(table_name) - - for table_name in to_close: - self._close_cached_table(table_name) - - @contextmanager - def open_table(self, table_name: str) -> Iterator[DuckDBFaissTable]: - """ - Open DuckDB and Faiss resources scoped to one vector table. - Must always be closed after use to avoid long-lived locks / RAM usage. - - If `use_cache=True` and `table.cache_required` is True, the opened table is cached - in `self.tables_cache` and re-used across calls. Cached tables are pruned if they - haven't been used for more than TABLE_CACHE_TTL_SECONDS. - """ - table_dir = self.get_table_dir(table_name) - if not table_dir.exists(): - raise ValueError(f"Table '{table_name}' does not exist") - - with self.tables_cache_lock: - entry = self.tables_cache.get(table_name) - - if entry is not None: - table = entry.table - else: - table = DuckDBFaissTable(table_name=table_name, table_dir=table_dir, handler=self).open() - - if table.cache_required: - entry = TableCacheEntry(table=table, last_used_ts=time.time()) - self.tables_cache[table_name] = entry - - try: - if entry: - with self.tables_cache_lock: - entry.in_use_count += 1 - - yield table - finally: - if entry: - entry.in_use_count -= 1 - entry.last_used_ts = time.time() - else: - table.close() - - self._close_old_tables_cache() - - def create_table(self, table_name: str, if_not_exists=True): - self._validate_table_name(table_name) - table_dir = self.get_table_dir(table_name) - if table_dir.exists() and not if_not_exists: - raise ValueError(f"Vector table '{table_name}' already exists") - table_dir.mkdir(parents=True, exist_ok=True) - - with self.open_table(table_name) as table: - with table.connection.cursor() as cur: - cur.execute("CREATE SEQUENCE IF NOT EXISTS faiss_id_sequence START 1") - cur.execute(""" - CREATE TABLE IF NOT EXISTS meta_data ( - faiss_id INTEGER PRIMARY KEY DEFAULT nextval('faiss_id_sequence'), -- id in FAISS index - id TEXT NOT NULL, -- chunk id - content TEXT, - metadata JSON - ) - """) - - def drop_table(self, table_name: str, if_exists=True): - """Drop table from both DuckDB and Faiss.""" - table_dir = self.get_table_dir(table_name) - - if not table_dir.exists(): - if if_exists: - return - raise ValueError(f"Vector table '{table_name}' does not exist") - - with self.tables_cache_lock: - self._close_cached_table(table_name) - - shutil.rmtree(table_dir, ignore_errors=False) - - if self._use_handler_storage: - self.handler_storage.folder_sync(table_name) - - def get_tables(self) -> Response: - """Get list of tables.""" - rows = [] - root = Path(self.persist_directory) - if root.exists(): - for item in root.iterdir(): - if not item.is_dir(): - continue - rows.append({"table_name": item.name}) - df = pd.DataFrame(rows, columns=["table_name"]) - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - # -- table methods -- - - def create_index(self, table_name: str, type: str = None, nlist: int = None, train_count: int = None): - with self.open_table(table_name) as table: - table.create_index(type=type, nlist=nlist, train_count=train_count) - - def insert(self, table_name: str, data: pd.DataFrame): - with self.open_table(table_name) as table: - table.insert(data) - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - with self.open_table(table_name) as table: - return table.select(conditions=conditions, offset=offset, limit=limit) - - def keyword_select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - keyword_search_args: KeywordSearchArgs = None, - ) -> pd.DataFrame: - with self.open_table(table_name) as table: - return table.keyword_select( - conditions=conditions, - offset=offset, - limit=limit, - keyword_search_args=keyword_search_args, - ) - - def delete(self, table_name: str, conditions: List[FilterCondition] = None): - """Delete data from both DuckDB and Faiss.""" - - with self.open_table(table_name) as table: - table.delete(conditions) - - def get_dimension(self, table_name: str) -> int: - with self.open_table(table_name) as table: - return table.get_dimension() diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_table.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_table.py deleted file mode 100644 index 3ee59e93c01..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/duckdb_faiss_table.py +++ /dev/null @@ -1,496 +0,0 @@ -from pathlib import Path -from typing import List -import math - -import pandas as pd -import orjson -import duckdb -from mindsdb_sql_parser.ast import ( - Select, - Delete, - Identifier, - BinaryOperation, - Constant, - NullConstant, - Star, - Tuple as AstTuple, - Function, - TypeCast, -) - - -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - FilterOperator, -) -from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs - -from mindsdb.utilities import log - -from .faiss_index import FaissIVFIndex - -logger = log.getLogger(__name__) - - -class DuckDBFaissTable: - META_BATCH_SIZE = 10_000 - VECTOR_MARGIN_K = 5 - VECTOR_GROWTH_MULTIPLIER = 5 - VECTOR_MAX_RATE = 0.25 - VECTOR_MAX_LIMIT = 1_000_000 - VECTOR_MAX_ITERATIONS = 3 - DEFAULT_LIMIT = 100 - - def __init__(self, table_name: str, table_dir: Path, handler): - self.table_name = table_name - self.handler = handler - self.connection: duckdb.DuckDBPyConnection | None = None - self.faiss_index: FaissIVFIndex | None = None - self.table_dir = table_dir - self.is_kw_index_enabled = False - self.cache_required = False - - def open(self) -> "DuckDBFaissTable": - duckdb_path = self.table_dir / "duckdb.db" - self.connection = duckdb.connect(str(duckdb_path)) - self.faiss_index = FaissIVFIndex(str(self.table_dir), self.handler.connection_data) - - self.cache_required = self.faiss_index.lock_required and self.faiss_index.get_size() > 100_000 - - # check keyword index - with self.connection.cursor() as cur: - # check index exists - df = cur.execute( - "SELECT * FROM information_schema.schemata WHERE schema_name = 'fts_main_meta_data'" - ).fetchdf() - if len(df) > 0: - self.is_kw_index_enabled = True - - return self - - def close(self) -> None: - self.faiss_index.close() - self.connection.close() - - @staticmethod - def _empty_result() -> pd.DataFrame: - return pd.DataFrame([], columns=["id", "content", "metadata", "distance"]) - - def _create_kw_index(self): - with self.connection.cursor() as cur: - cur.execute("PRAGMA create_fts_index('meta_data', 'id', 'content')") - self.is_kw_index_enabled = True - - def _drop_kw_index(self): - with self.connection.cursor() as cur: - cur.execute("pragma drop_fts_index('meta_data')") - self.is_kw_index_enabled = False - - def _sync(self, dump_faiss=True): - if dump_faiss: - self.faiss_index.dump() - - if self.handler._use_handler_storage: - self.handler.handler_storage.folder_sync(self.table_name) - - def create_index(self, type: str = None, nlist: int = None, train_count: int = None): - self.faiss_index.create_index(type, nlist=nlist, train_count=train_count) - # index was already saved. don't dump it twice - self._sync(dump_faiss=False) - - def insert(self, data: pd.DataFrame): - """Insert data into both DuckDB and Faiss.""" - - if self.is_kw_index_enabled: - # drop index, it will be created before a first keyword search - self._drop_kw_index() - - with self.connection.cursor() as cur: - df_ids = cur.execute(""" - insert into meta_data (id, content, metadata) ( - select id, content, metadata from data - ) - RETURNING faiss_id, id - """).fetchdf() - - data = data.merge(df_ids, on="id") - - vectors = data["embeddings"] - ids = data["faiss_id"] - - self.faiss_index.insert(list(vectors), list(ids)) - self._sync() - - def select( - self, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - """Select data with hybrid search logic.""" - - vector_filter = None - meta_filters = [] - if conditions is None: - conditions = [] - for condition in conditions: - if condition.column == "embeddings": - vector_filter = condition - else: - meta_filters.append(condition) - - if vector_filter is None: - # If only metadata in filter: - # query duckdb only - return self._select_from_metadata(meta_filters=meta_filters, limit=limit).drop("faiss_id", axis=1) - - # vector_filter is not None - if not meta_filters: - # If only content in filter: query faiss and attach to metadata - return self._select_with_vector(vector_filter=vector_filter, limit=limit) - - return self.mixed_search(vector_filter=vector_filter, meta_filters=meta_filters, limit=limit) - - def mixed_search(self, vector_filter, meta_filters, limit): - """ - 1. Measure selectivity of META_FILTERS: - Get predicted count of record after applying META_FILTERS using some of methods - Selectivity = count / total records - - 2. selectivity * total_recors > LIMIT / selectivity: - Use Vector-first search - Else: - Use Metadata-first search - """ - - if limit is None: - limit = self.DEFAULT_LIMIT - - total = self.faiss_index.get_size() - if total == 0 or limit == 0: - # no reason to do vector search - return self._empty_result() - - matched_count = self.get_metadata_search_count(meta_filters) - selectivity = matched_count / total - - # compare forecast count of affected records for vector and metadata search and choose what will take less - # do search even if selectivity is 0 because it might be approximate value in the future - if selectivity > 0 and selectivity * total > limit / selectivity: - df = self.vector_first_search(vector_filter, meta_filters, limit, selectivity) - else: - df = self.metadata_first_search(vector_filter, meta_filters, limit) - - return df[:limit] - - def get_metadata_search_count(self, meta_filters): - """ - Get count of records from duckdb with meta_filters - """ - - where_clause = self._translate_filters(meta_filters) - count_query = Select( - targets=[Function("count", args=[Star()], alias=Identifier("cnt"))], - from_table=Identifier("meta_data"), - where=where_clause, - ) - - with self.connection.cursor() as cur: - sql = self.handler.renderer.get_string(count_query, with_failback=True) - cur.execute(sql) - df = cur.fetchdf() - - return int(df["cnt"].iloc[0]) - - def vector_first_search(self, vector_filter, meta_filters, limit, selectivity): - """ - - Calculate required top results from faiss: it is predicted count of records, that required to be scanned - - Top_results = LIMIT / selectivity * VECTOR_MARGIN_K - - Circle: - Search Top_results vectors in faiss - Get ids - query duckdb with META_FILTERS and list of ids - If count of found records < LIMIT: - Increase Top_results = Top_results * VECTOR_GROWTH_MULTIPLIER to make next search iteration - If Top_results > total * VECTOR_MAX_RATE - or Top_results > VECTOR_MAX_LIMIT - or number of iteration >VECTOR_MAX_ITERATIONS: - Something went wrong, maybe META_FILTERS records has greater distance than average record - Break vector-first search and switch to metadata-first - If count of found records >= LIMIT: - Break and return results - """ - - total = self.faiss_index.get_size() - - top_results = math.ceil(limit / selectivity * self.VECTOR_MARGIN_K) - - for i in range(self.VECTOR_MAX_ITERATIONS): - df = self._select_with_vector(vector_filter=vector_filter, meta_filters=meta_filters, limit=top_results) - if len(df) >= limit: - # found required size of data - return df - - top_results = top_results * self.VECTOR_GROWTH_MULTIPLIER - - if top_results > total * self.VECTOR_MAX_RATE or top_results > self.VECTOR_MAX_LIMIT: - # give up with vector_first search - break - - # failback to metadata-first search - return self.metadata_first_search(vector_filter, meta_filters, limit) - - def metadata_first_search(self, vector_filter, meta_filters, limit): - """ - Metadata-first search - - Query list of all ids from duckdb table using META_FILTERS - - Split into batches by META_BATCH. - Per batch: - Get batch of ids - Use ID selector to search in FAISS only by batch of ids - use LIMIT - Combine results in single list alongside with distances - After all batches - get top LIMIT vectors with min distances - Get their ids and find records in duckdb table for them - """ - - embedding = vector_filter.value - if isinstance(embedding, str): - embedding = orjson.loads(embedding) - - where_clause = self._translate_filters(meta_filters) - ids_query = Select( - targets=[Identifier("faiss_id")], - from_table=Identifier("meta_data"), - where=where_clause, - ) - - with self.connection.cursor() as cur: - sql = self.handler.renderer.get_string(ids_query, with_failback=True) - meta_df = cur.execute(sql).fetchdf() - - if meta_df.empty: - return self._empty_result() - - faiss_ids = meta_df["faiss_id"].tolist() - results = [] - for start in range(0, len(faiss_ids), self.META_BATCH_SIZE): - batch_ids = faiss_ids[start : start + self.META_BATCH_SIZE] - - distances, faiss_ids_found = self.faiss_index.search(embedding, limit, allowed_ids=batch_ids) - results.extend(zip(distances, faiss_ids_found)) - - results.sort(key=lambda x: x[0]) - - results = results[:limit] - if len(results) == 0: - raise RuntimeError("Something went wrong, faiss database didn't return results") - distances, faiss_ids = zip(*results) - - meta_df = self._select_from_metadata(faiss_ids=faiss_ids, meta_filters=meta_filters) - vector_df = pd.DataFrame({"faiss_id": faiss_ids, "distance": distances}) - return vector_df.merge(meta_df, on="faiss_id").drop("faiss_id", axis=1).sort_values(by="distance") - - def keyword_select( - self, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - keyword_search_args: KeywordSearchArgs = None, - ) -> pd.DataFrame: - if not self.is_kw_index_enabled: - # keyword search is used for first time: create index - self._create_kw_index() - - with self.connection.cursor() as cur: - where_clause = self._translate_filters(conditions) - - score = Function( - namespace="fts_main_meta_data", - op="match_bm25", - args=[ - Identifier("id"), - Constant(keyword_search_args.query), - BinaryOperation(op=":=", args=[Identifier("fields"), Constant(keyword_search_args.column)]), - ], - ) - - no_emtpy_score = BinaryOperation(op="is not", args=[score, NullConstant()]) - if where_clause: - where_clause = BinaryOperation(op="and", args=[where_clause, no_emtpy_score]) - else: - where_clause = no_emtpy_score - - query = Select( - targets=[Star(), BinaryOperation(op="-", args=[Constant(1), score], alias=Identifier("distance"))], - from_table=Identifier("meta_data"), - where=where_clause, - ) - - if limit is not None: - query.limit = Constant(limit) - - if offset is not None: - query.offset = Constant(offset) - - sql = self.handler.renderer.get_string(query, with_failback=True) - cur.execute(sql) - df = cur.fetchdf() - df["metadata"] = df["metadata"].apply(orjson.loads) - return df - - def delete(self, conditions: List[FilterCondition] = None): - """Delete data from both DuckDB and Faiss.""" - with self.connection.cursor() as cur: - where_clause = self._translate_filters(conditions) - - query = Select(targets=[Identifier("faiss_id")], from_table=Identifier("meta_data"), where=where_clause) - cur.execute(self.handler.renderer.get_string(query, with_failback=True)) - df = cur.fetchdf() - ids = list(df["faiss_id"]) - - self.faiss_index.delete_ids(ids) - - query = Delete(table=Identifier("meta_data"), where=where_clause) - cur.execute(self.handler.renderer.get_string(query, with_failback=True)) - - self._sync() - - def get_dimension(self) -> int: - if self.faiss_index and self.faiss_index.index is not None: - return self.faiss_index.dim - - def get_total_size(self): - with self.connection.cursor() as cur: - cur.execute("select count(1) size from meta_data") - df = cur.fetchdf() - return df["size"].iloc[0] - - def _select_with_vector(self, vector_filter: FilterCondition, meta_filters=None, limit=None) -> pd.DataFrame: - embedding = vector_filter.value - if isinstance(embedding, str): - embedding = orjson.loads(embedding) - - distances, faiss_ids = self.faiss_index.search(embedding, limit or self.DEFAULT_LIMIT) - - # Fetch full data from DuckDB - if len(faiss_ids) > 0: - # ids = [str(idx) for idx in faiss_ids] - meta_df = self._select_from_metadata(faiss_ids=faiss_ids, meta_filters=meta_filters) - vector_df = pd.DataFrame({"faiss_id": faiss_ids, "distance": distances}) - return vector_df.merge(meta_df, on="faiss_id").drop("faiss_id", axis=1).sort_values(by="distance") - - return self._empty_result() - - def _select_from_metadata(self, faiss_ids=None, meta_filters=None, limit=None): - query = Select( - targets=[Star()], - from_table=Identifier("meta_data"), - ) - - where_clause = self._translate_filters(meta_filters) - - if faiss_ids: - # TODO what if ids list is too long - split search into batches - in_filter = BinaryOperation( - op="IN", args=[Identifier("faiss_id"), AstTuple([Constant(i) for i in faiss_ids])] - ) - # split into chunks - chunk_size = 10000 - if len(faiss_ids) > chunk_size: - dfs = [] - chunk = 0 - total = 0 - while chunk * chunk_size < len(faiss_ids): - # create results with partition - ids = faiss_ids[chunk * chunk_size : (chunk + 1) * chunk_size] - chunk += 1 - df = self._select_from_metadata(faiss_ids=ids, meta_filters=meta_filters, limit=limit) - total += len(df) - if limit is not None and limit <= total: - # cut the extra from the end - df = df[: -(total - limit)] - dfs.append(df) - break - if len(df) > 0: - dfs.append(df) - if len(dfs) == 0: - return pd.DataFrame([], columns=["faiss_id", "id", "content", "metadata"]) - return pd.concat(dfs) - - if where_clause is None: - where_clause = in_filter - else: - where_clause = BinaryOperation(op="AND", args=[where_clause, in_filter]) - - if limit is not None: - query.limit = Constant(limit) - - query.where = where_clause - - with self.connection.cursor() as cur: - sql = self.handler.renderer.get_string(query, with_failback=True) - cur.execute(sql) - df = cur.fetchdf() - df["metadata"] = df["metadata"].apply(orjson.loads) - return df - - def _translate_filters(self, meta_filters): - if not meta_filters: - return None - - where_clause = None - for item in meta_filters: - parts = item.column.split(".") - key = Identifier(parts[0]) - - # converts 'col.el1.el2' to col->'el1'->>'el2' - if len(parts) > 1: - # intermediate elements - for el in parts[1:-1]: - key = BinaryOperation(op="->", args=[key, Constant(el)]) - - # last element - key = BinaryOperation(op="->>", args=[key, Constant(parts[-1])]) - - is_orig_id = item.column == "metadata._original_doc_id" - - type_cast = None - value = item.value - - if isinstance(value, list) and len(value) > 0 and item.op in (FilterOperator.IN, FilterOperator.NOT_IN): - if is_orig_id: - # convert to str - item.value = [str(i) for i in value] - value = item.value[0] - elif is_orig_id: - if not isinstance(value, str): - value = item.value = str(item.value) - - if isinstance(value, int): - type_cast = "int" - elif isinstance(value, float): - type_cast = "float" - - if type_cast is not None: - key = TypeCast(type_cast, key) - - if item.op in (FilterOperator.NOT_IN, FilterOperator.IN): - values = [Constant(i) for i in item.value] - value = AstTuple(values) - else: - value = Constant(item.value) - - condition = BinaryOperation(op=item.op.value, args=[key, value]) - - if where_clause is None: - where_clause = condition - else: - where_clause = BinaryOperation(op="AND", args=[where_clause, condition]) - return where_clause diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/faiss_index.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/faiss_index.py deleted file mode 100644 index b276ebd00ed..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/faiss_index.py +++ /dev/null @@ -1,627 +0,0 @@ -import os -from typing import Iterable, List, Callable, Optional -import numpy as np -import psutil -from pathlib import Path - -try: - import fcntl -except ImportError: - fcntl = None - -import faiss # faiss or faiss-gpu - -from mindsdb.utilities import log - -from pydantic import BaseModel - - -logger = log.getLogger(__name__) - - -def _normalize_rows(x: np.ndarray) -> np.ndarray: - norms = np.linalg.norm(x, axis=1, keepdims=True) + 1e-12 - return x / norms - - -class FaissParams(BaseModel): - metric: str | None = "cosine" - use_gpu: bool | None = False - nlist: int | None = 1024 - nprobe: int | None = None - - -def merge_ondisk(trained_index: faiss.Index, shard_fnames: List[str], ivfdata_fname: str, shift_ids=False) -> None: - """ - Modified version of faiss.contrib.ondisk.merge_ondisk. Prevents leaving orphan memory mapped shard files - - Add the contents of the indexes stored in shard_fnames into the index trained_index. - The on-disk data is stored in ivfdata_fname - """ - assert not isinstance(trained_index, faiss.IndexIVFPQR), "IndexIVFPQR is not supported as an on disk index." - # merge the images into an on-disk index - # first load the inverted lists - ivfs = [] - indexes = [] - - for fname in shard_fnames: - # the IO_FLAG_MMAP is to avoid actually loading the data - # thus the total size of the inverted lists can exceed the available RAM - logger.info("read " + fname) - index = faiss.read_index(fname, faiss.IO_FLAG_MMAP) - index_ivf = faiss.extract_index_ivf(index) - ivfs.append(index_ivf.invlists) - - indexes.append(index) - - # construct the output index - index = trained_index - index_ivf = faiss.extract_index_ivf(index) - - assert index.ntotal == 0, "works only on empty index" - - # prepare the output inverted lists. They will be written to merged_index.ivfdata - invlists = faiss.OnDiskInvertedLists(index_ivf.nlist, index_ivf.code_size, ivfdata_fname) - - # merge all the inverted lists - ivf_vector = faiss.InvertedListsPtrVector() - for ivf in ivfs: - ivf_vector.push_back(ivf) - - logger.info("merge %d inverted lists " % ivf_vector.size()) - ntotal = invlists.merge_from_multiple(ivf_vector.data(), ivf_vector.size(), shift_ids) - - # now replace the inverted lists in the output index - index.ntotal = index_ivf.ntotal = ntotal - index_ivf.replace_invlists(invlists, True) - invlists.this.disown() - - del indexes - - -class FaissIndex: - def __init__(self, path: str, config: dict): - self._normalize_vectors = False - - self.config = FaissParams(**config) - - metric = self.config.metric - if metric == "cosine": - self._normalize_vectors = True - self.metric = faiss.METRIC_INNER_PRODUCT - elif metric == "ip": - self.metric = faiss.METRIC_INNER_PRODUCT - elif metric == "l1": - self.metric = faiss.METRIC_L1 - elif metric == "l2": - self.metric = faiss.METRIC_L2 - else: - raise ValueError(f"Unknown metric: {metric}") - - self.path = os.path.join(path, "faiss_index") - - self._since_ram_checked = 0 - - self.index = None - self.index_type = "flat" - self.dim = None - self.index_fd = None - self.lock_required = True - - recover_path = Path(self.path).parent / "recover" - if recover_path.exists(): - # move all files from recover dir that might be left after index failing - for item in recover_path.iterdir(): - if item.is_dir(): - continue - item.rename(Path(self.path).parent / item.name) - - if os.path.exists(self.path): - self._load_index() - - def _lock_index(self): - if not self.lock_required: - return - if os.name != "nt" and fcntl: - self.index_fd = open(self.path, "rb") - try: - fcntl.flock(self.index_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) - except OSError: - raise ValueError(f"Index is already used: {self.path}") - - def _load_index(self): - # check RAM - index_size = os.path.getsize(self.path) - # according to tests faiss index occupies ~ the same amount of RAM as file size - # add 10% and 1Gb to it, check only if index > 1Gb - _1gb = 1024**3 - required_ram = index_size * 1.1 + _1gb - available_ram = psutil.virtual_memory().available - if required_ram > _1gb and available_ram < required_ram: - to_free_gb = round((required_ram - available_ram) / _1gb, 2) - raise ValueError(f"Unable load FAISS index into RAM, free up at least : {to_free_gb} Gb") - - # check ivf_file before loading index and locking it - index_merged = Path(self.path).parent / "faiss_index_merged" - if index_merged.exists(): - self.lock_required = False - - self._lock_index() - - self.index = faiss.read_index(self.path) - self.dim = self.index.d - - index = self.index - if hasattr(index, "index"): - index = faiss.downcast_index(index.index) - if isinstance(index, faiss.IndexIVFFlat): - if index_merged.exists(): - self.index_type = "ivf_file" - else: - self.index_type = "ivf" - if self.config.nprobe is not None: - self.index.nprobe = self.config.nprobe - - def close(self): - if self.index_fd is not None: - self.index_fd.close() - self.index = None - - def __del__(self): - self.close() - - def _build_flat_index(self): - # TODO option to create hnsw - - index = faiss.IndexFlat(self.dim, self.metric) - index = faiss.IndexIDMap(index) - - if self.config.use_gpu: - try: - index = faiss.index_cpu_to_all_gpus(index) - except Exception: - pass - - self.index = index - - def _check_ram_usage(self, count_vectors, index_type: str = "flat", m=32, nlist=4096): - self._since_ram_checked += count_vectors - - # check after every 10k vectors - if self._since_ram_checked < 10000: - return - - match index_type: - case "flat": - required = self.dim * 4 * count_vectors - case "hnsw": - required = (self.dim * 4 + m * 2 * 4) * count_vectors - case "ivf": - required = (self.dim * 4 + 8) * count_vectors + self.dim * 4 * nlist - case "ivf_file": - # don't restrict for IVF file - required = 0 - case _: - raise ValueError(f"Unknown index type: {index_type}") - - # check RAM usage - # keep extra 1Gb - available = psutil.virtual_memory().available - 1 * 1024**3 - - if available < required: - raise ValueError("Unable insert records, not enough RAM") - - self._since_ram_checked = 0 - - def insert( - self, - vectors: Iterable[Iterable[float]], - ids: Iterable[int], - ) -> None: - if len(vectors) == 0: - return - - vectors = np.array(vectors) - ids = np.array(ids) - - if self.index is None: - # this if the first insert, detect dimension - self.dim = vectors.shape[1] - - self._build_flat_index() - - self._check_ram_usage(len(vectors), self.index_type) - - if vectors.shape[1] != self.dim: - raise ValueError(f"Dimension mismatch: expected {self.dim}, got {vectors.shape[1]}") - - if self._normalize_vectors: - vectors = _normalize_rows(vectors) - - self.index.add_with_ids(vectors, ids) - - def delete_ids(self, ids: List[int]) -> None: - """Mark IDs as deleted for filtering in searches.""" - ids = np.array(ids) - if self.index: - self.index.remove_ids(ids) - - def dump(self): - # TODO to not save it every time for big files? - # use two indexes: main and temporal - # temporal is Flat and stores data that wasn't moved into main, and have limit - if self.index: - faiss.write_index(self.index, self.path) - - def drop(self): - self.close() - - # remove index files (everything except duckdb) - for item in Path(self.path).parent.iterdir(): - if item.is_dir() or item.name.startswith("duckdb."): - continue - item.unlink() - - def search( - self, - query: Iterable[float], - limit: int = 10, - allowed_ids: Optional[Iterable[int]] = None, - ): - if self.index is None: - return [], [] - - queries = np.array([query]) - - if self._normalize_vectors: - queries = _normalize_rows(queries) - - params = None - if allowed_ids is not None: - allowed_ids_array = np.asarray(list(allowed_ids), dtype=np.int64) - ids_selector = faiss.IDSelectorArray( - len(allowed_ids_array), - faiss.swig_ptr(allowed_ids_array), - ) - params = faiss.IVFSearchParameters(sel=ids_selector) - - ds, ids = self.index.search(queries, limit, params=params) - - list_id = [i for i in ids[0] if i != -1] - list_distances = [1 - d for d in ds[0][: len(list_id)]] - - return list_distances, list_id - - -class FaissIVFIndex(FaissIndex): - def _dump_vectors(self, index, path: Path, batch_size: int = 30000): - """ - Extract and dump vectors and ids from index. Method is dependent on index type - """ - - if hasattr(index, "id_map"): - ids = faiss.vector_to_array(index.id_map).astype(np.int64, copy=False) - inner = index.index - - def get_batch_vectors(start, size): - return inner.reconstruct_n(start, size).astype(np.float32, copy=False) - - return self._dump_vectors_to_file(ids, path, index.ntotal, batch_size, get_batch_vectors) - else: - invlists = index.invlists - - index.set_direct_map_type(faiss.DirectMap.Hashtable) - - ids_list = [] - for list_no in range(index.nlist): - list_size = invlists.list_size(list_no) - if list_size == 0: - continue - - # Get IDs stored in this inverted list - id_array = faiss.rev_swig_ptr(invlists.get_ids(list_no), list_size) - ids_list.append(id_array) - - ids = np.hstack(ids_list).astype(np.int64) - - # to train index first batches will be used. shuffle ids to prevent using the same lists - # TODO shuffle only part of data? - np.random.shuffle(ids) - - def get_batch_vectors(start, size): - ids_batch = ids[start : start + size] - return index.reconstruct_batch(ids_batch).astype(np.float32, copy=False) - - return self._dump_vectors_to_file(ids, path, index.ntotal, batch_size, get_batch_vectors) - - def _dump_vectors_to_file( - self, - ids: np.ndarray, - path: Path, - ntotal: int, - batch_size: int, - get_batch_content: Callable[[int, int], np.ndarray], - ) -> int: - """ - - Write ids and vectors to memmap files in batches. - - :param ids: vector IDs in the same order as vectors will be dumped. - :param path: directory to store dumps. - :param ntotal: total number of vectors. - :param batch_size: number of vectors per batch file. - :param get_batch_content: function to get a batch content - - """ - - # Write all ids once to a single memmap file - ids_path = path / "ids.mmap" - mmap_ids = np.memmap(ids_path, dtype=np.int64, mode="w+", shape=(ntotal,)) - mmap_ids[:] = ids - - batch_num = 0 - while True: - if ntotal <= 0: - break - - start = batch_num * batch_size - size = min(ntotal, batch_size) - - ntotal -= size - batch_num += 1 - - vecs = get_batch_content(start, size) - - vecs_path = path / f"batch_{batch_num:05d}_vecs.mmap" - - # Create memmap for vectors and write - mmap_vecs = np.memmap(vecs_path, dtype=np.float32, mode="w+", shape=(size, self.dim)) - mmap_vecs[:] = vecs - mmap_vecs.flush() - del mmap_vecs - - del mmap_ids - return batch_num - - def _train_ivf(self, dump_path, train_count, nlist): - # Accumulate training data up to train_count - train_left = train_count - train_chunks = [] - - vec_files = self._get_dump_vector_files(dump_path) - - for fname in vec_files: - fpath = dump_path / fname - batch_data = np.fromfile(fpath, dtype="float32") - rows = int(batch_data.shape[0] / self.dim) - - train_chunks.append(batch_data.reshape([rows, self.dim])) - - train_left -= rows - if train_left <= 0: - break - - train_data = np.vstack(train_chunks) - train_data = train_data[:train_count, :] - - quantizer = faiss.IndexFlat(self.dim, self.metric) - ivf = faiss.IndexIVFFlat(quantizer, self.dim, nlist, self.metric) - - ivf.train(train_data) - return ivf - - def _get_dump_vector_files(self, dump_path): - # Collect vector batch files and sort by batch index - vec_files = [f for f in os.listdir(dump_path) if f.startswith("batch_")] - if not vec_files: - raise FileNotFoundError(f"No vector batch memmaps found in {dump_path}") - - vec_files.sort() - return vec_files - - def _create_ivf_index(self, dump_path, train_count, nlist): - """ - Build an in-memory IVF index - - :param dump_path: Directory containing memmap files - :param train_count: Number of vectors to use for training - :param nlist: number of clusters for IVF - """ - - # Load ids - ids_path = dump_path / "ids.mmap" - if not os.path.exists(ids_path): - raise FileNotFoundError(f"Missing ids memmap: {ids_path}") - ids = np.fromfile(ids_path, dtype="int64") - - ivf = self._train_ivf(dump_path, nlist=nlist, train_count=train_count) - - vec_files = self._get_dump_vector_files(dump_path) - - # load data - start = 0 - for fname in vec_files: - fpath = dump_path / fname - - batch_data = np.fromfile(fpath, dtype="float32") - rows = int(batch_data.shape[0] / self.dim) - - batch_vectors = batch_data.reshape([rows, self.dim]) - - ids_batch = np.asarray(ids[start : start + rows]) - ivf.add_with_ids(batch_vectors, ids_batch) - start += rows - - # remove dumps - for item in dump_path.iterdir(): - item.unlink() - - return ivf - - def _create_ivf_file_index(self, dump_path, train_count, nlist): - """Build an IVF on disk index""" - - index_path = dump_path.parent - trained_index = self._train_ivf(dump_path, train_count=train_count, nlist=nlist) - # store trained index - trained_path = str(index_path / "faiss_index.trained") - faiss.write_index(trained_index, trained_path) - - ids_path = dump_path / "ids.mmap" - if not os.path.exists(ids_path): - raise FileNotFoundError(f"Missing ids memmap: {ids_path}") - ids = np.fromfile(ids_path, dtype="int64") - - vec_files = self._get_dump_vector_files(dump_path) - - start = 0 - block_fnames = [] - for num, fname in enumerate(vec_files): - index = faiss.read_index(trained_path) - fpath = dump_path / fname - - batch_data = np.fromfile(fpath, dtype="float32") - rows = int(batch_data.shape[0] / self.dim) - - batch_vectors = batch_data.reshape([rows, self.dim]) - - ids_batch = np.asarray(ids[start : start + rows]) - index.add_with_ids(batch_vectors, ids_batch) - block_fname = str(index_path / f"faiss_index_block.{num}") - block_fnames.append(block_fname) - faiss.write_index(index, block_fname) - start += rows - - # remove dumps - for item in dump_path.iterdir(): - item.unlink() - - index = faiss.read_index(trained_path) - - merge_ondisk(index, block_fnames, str(index_path / "faiss_index_merged")) - os.unlink(trained_path) - for block_fname in block_fnames: - os.unlink(block_fname) - - return index - - def get_size(self): - if self.index is None: - return 0 - else: - return self.index.ntotal - - def check_required_disk_space(self, index_type): - base_path = Path(self.path).parent - available = psutil.disk_usage(str(base_path)).free - - # current size of index - index_size = 0 - for item in base_path.iterdir(): - if item.is_dir() or not item.name.startswith("faiss_index"): - continue - index_size += item.stat().st_size - - # k - how more space required than current index size - if index_type == "ivf_file": - # recovery + dump + shard files - k = 3.01 - else: - # recovery + dump - k = 2.01 - - # k-1 because the current index space will be reused - if available < index_size * (k - 1): - to_free_gb = round((index_size * (k - 1)) / 1024**3, 2) - raise ValueError(f"Unable run indexing FAISS not enough disk space, get free at least : {to_free_gb} Gb") - - def create_index(self, index_type=None, nlist=None, train_count=None): - """ - Create or recreate IVF index - - :param index_type: options are: 'ivf' (in RAM) or 'ivf_file' (on disk) - :param nlist: number of inverted lists - :param train_count: count of vectors to use for training. - - """ - - if index_type is None: - if os.name == "nt": - index_type = "ivf" - else: - index_type = "ivf_file" - - elif index_type not in ("ivf", "ivf_file"): - raise NotImplementedError("Only ivf or ivf_file indexes are supported") - - if index_type == "ivf_file" and os.name == "nt": - raise ValueError("'ivf_file' index is not supported on Windows. Try to use 'ivf' instead") - - # index might not fit into RAM, extract data to files - base_path = Path(self.path).parent - dump_path = base_path / "dump" - - # if self.index_type != 'flat': - # raise ValueError('Index was already created') - - # check params, apply defaults - if nlist is None: - nlist = self.config.nlist - - ntotal = self.get_size() - - # faiss shows warning if train count is less than 39 * nlist and recommend to use at least this size for train data - nlist_k = 39 - if train_count is not None: - if train_count < nlist * nlist_k: - raise ValueError(f"Train_count can't be less than nlist * {nlist_k} (is {nlist * nlist_k})") - else: - # get 10k if possible but not less than nlist * k - train_count = max(nlist * nlist_k, min(ntotal, 10000)) - - if train_count > ntotal: - raise ValueError(f"Not enough data to create index: {ntotal}, at least {train_count} records are required") - - self.check_required_disk_space(index_type) - - dump_path.mkdir(exist_ok=True) - - # remove old items - for item in dump_path.iterdir(): - item.unlink() - - self._dump_vectors(self.index, dump_path) - - # unload flat index from RAM - self.close() - - # buckup index files - recover_path = base_path / "recover" - recover_path.mkdir(exist_ok=True) - for item in base_path.iterdir(): - if item.is_dir() or item.name.startswith("duckdb."): - continue - item.rename(recover_path / item.name) - - # create ivf index - if index_type == "ivf": - ivf_index = self._create_ivf_index(dump_path, train_count=train_count, nlist=nlist) - self.lock_required = True - - elif index_type == "ivf_file": - ivf_index = self._create_ivf_file_index(dump_path, train_count=train_count, nlist=nlist) - self.lock_required = False - else: - raise ValueError(f"Unknown index type: {index_type}") - - self.index = ivf_index - self.index_type = index_type - self.dump() - self._lock_index() - - # remove unused files - dump_path.rmdir() - - for item in recover_path.iterdir(): - item.unlink() - recover_path.rmdir() diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/icon.svg b/mindsdb/integrations/handlers/duckdb_faiss_handler/icon.svg deleted file mode 100644 index 927569f00d6..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/icon.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - DuckDB+Faiss - diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/requirements.txt b/mindsdb/integrations/handlers/duckdb_faiss_handler/requirements.txt deleted file mode 100644 index 3dd4dc56e15..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -faiss-cpu==1.13.2 diff --git a/mindsdb/integrations/handlers/duckdb_faiss_handler/test_faiss_handler.py b/mindsdb/integrations/handlers/duckdb_faiss_handler/test_faiss_handler.py deleted file mode 100644 index 6a2711cfbcb..00000000000 --- a/mindsdb/integrations/handlers/duckdb_faiss_handler/test_faiss_handler.py +++ /dev/null @@ -1,82 +0,0 @@ -import pytest -from unittest.mock import patch - -import pandas as pd - -from tests.unit.executor.test_knowledge_base import TestKB, set_embedding - - -class TestFAISS(TestKB): - "Run unit tests using FAISS handler as storage" - - def _get_storage_table(self, kb_name): - try: - self.run_sql(f""" - DROP DATABASE faiss_{kb_name} - """) - except Exception: - pass - - self.run_sql(f""" - CREATE DATABASE faiss_{kb_name} - WITH ENGINE = 'duckdb_faiss' - """) - - try: - self.run_sql(f""" - drop table faiss_{kb_name}.kb_faiss - """) - except Exception: - pass - - return f"faiss_{kb_name}.kb_faiss" - - @pytest.mark.parametrize("index_type", ["ivf", "ivf_file"]) - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_ivf_index(self, mock_embedding, index_type): - """ - Run test two times: - - make ivf index and then reindex to ivf_file - - make ivf_file index and then reindex to ivf - """ - - set_embedding(mock_embedding) - - df = self._get_ral_table() - - df = pd.concat([df] * 300) - # unique ids - df["id"] = list(map(str, range(len(df)))) - - self.save_file("ral", df) - - self._create_kb("kb_ral", content_columns=["english"]) - - self.run_sql( - """ - insert into kb_ral - select id, english from files.ral - """ - ) - - for i in range(2): - self.run_sql(f"CREATE INDEX ON KNOWLEDGE_BASE kb_ral WITH (nlist=10, type='{index_type}')") - - # search works - ret = self.run_sql("select * from kb_ral where k.content = 'white' limit 1") - assert "white" in ret["chunk_content"][0] - - # -- test insert -- - self.run_sql("insert into kb_ral (id, english) values (10000, 'magpie')") - # search - ret = self.run_sql("select * from kb_ral where k.content = 'magpie' limit 1") - assert "magpie" in ret["chunk_content"][0] - - # -- test delete -- - self.run_sql("delete from kb_ral where id=10000") - # search - ret = self.run_sql("select * from kb_ral where k.content = 'magpie' limit 1") - assert len(ret) == 0 or "magpie" not in ret["chunk_content"][0] - - # toggle index type - index_type = "ivf_file" if index_type == "ivf" else "ivf" diff --git a/mindsdb/integrations/handlers/duckdb_handler/README.md b/mindsdb/integrations/handlers/duckdb_handler/README.md deleted file mode 100644 index 5fa9125b940..00000000000 --- a/mindsdb/integrations/handlers/duckdb_handler/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# DuckDB Handler -This is the implementation of the DuckDB handler for MindsDB. - -## DuckDB -DuckDB is an open-source analytical database system. DuckDB is designed for fast execution of analytical queries. -There are no external dependencies, and the DBMS runs completely embedded within a host process, similar to SQLite. -DuckDB provides a rich SQL dialect with support for complex queries with transactional guarantees (ACID). - -## Implementation -This handler was implemented using the `duckdb` Python client library. - -### DuckDB version -The DuckDB handler is currently using the `1.1.3` release version of the Python client library. In case of issues, make sure your DuckDB or MotherDuck database is compatible with this version. See the DuckDB handler [requirements.txt](requirements.txt) for details. - -The required arguments to establish a connection are: - -* `database`: the name of the DuckDB or MotherDuck database file. - - Set to `:memory:` to create an in-memory database. - - For MotherDuck, specify the database and motherduck_token. - -Additional optional arguments include: - -* `motherduck_token`: a token to authenticate with MotherDuck. -* `read_only`: a flag that specifies if the connection should be made in read-only mode. - - This is required if multiple processes want to access the same database file simultaneously. - -## Usage -To connect to a DuckDB or MotherDuck database in MindsDB, the following syntax can be used: - -### DuckDB Example -```sql -CREATE DATABASE duckdb_datasource -WITH -engine='duckdb', -parameters={ - "database": "db.duckdb" -}; -``` - -### MotherDuck Example -```sql -CREATE DATABASE md_datasource -WITH -engine='duckdb', -parameters={ - "database": "sample_data", - "motherduck_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9." -}; -``` - -Once the connection is established, you can query the database: - -```sql -SELECT * FROM duckdb_datasource.my_table; -``` - -For MotherDuck: -```sql -SELECT * FROM md_datasource.movies; -``` - -By leveraging these features, MindsDB provides powerful integrations with DuckDB and MotherDuck for scalable analytics. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/duckdb_handler/__about__.py b/mindsdb/integrations/handlers/duckdb_handler/__about__.py deleted file mode 100644 index 93b1d5d75cc..00000000000 --- a/mindsdb/integrations/handlers/duckdb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB DuckDB handler' -__package_name__ = 'mindsdb_duckdb_handler' -__version__ = '0.0.1' -__description__ = 'MindsDB handler for DuckDB' -__author__ = 'Kamil Tyborowski' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022 - mindsdb' diff --git a/mindsdb/integrations/handlers/duckdb_handler/__init__.py b/mindsdb/integrations/handlers/duckdb_handler/__init__.py deleted file mode 100644 index ca5fdcd9218..00000000000 --- a/mindsdb/integrations/handlers/duckdb_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .duckdb_handler import DuckDBHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "DuckDB" -name = "duckdb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "support_level", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/duckdb_handler/connection_args.py b/mindsdb/integrations/handlers/duckdb_handler/connection_args.py deleted file mode 100644 index 4d9591e5eb6..00000000000 --- a/mindsdb/integrations/handlers/duckdb_handler/connection_args.py +++ /dev/null @@ -1,27 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - -connection_args = OrderedDict( - database={ - "type": ARG_TYPE.STR, - "description": ( - "The database file to read and write from. The special value :memory: (default) " - "can be used to create an in-memory database." - ), - }, - motherduck_token={ - "type": ARG_TYPE.STR, - "description": "Motherduck access token if want to connect motherduck database.", - }, - read_only={ - "type": ARG_TYPE.BOOL, - "description": ("A flag that specifies if the connection should be made in read-only mode."), - }, -) - -connection_args_example = OrderedDict( - database="sample_data", - read_only=True, - motherduck_token="ey...enKoT.SsEcCa......", -) diff --git a/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py b/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py deleted file mode 100644 index bc407ef0575..00000000000 --- a/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py +++ /dev/null @@ -1,171 +0,0 @@ -import duckdb -import pandas as pd -from duckdb import DuckDBPyConnection -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.libs.response import HandlerResponse as Response -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class DuckDBHandler(DatabaseHandler): - """This handler handles connection and execution of the DuckDB statements.""" - - name = "duckdb" - - def __init__(self, name: str, **kwargs): - super().__init__(name) - self.parser = parse_sql - self.dialect = "postgresql" - self.connection_data = kwargs.get("connection_data") - self.renderer = SqlalchemyRender("postgres") - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self) -> DuckDBPyConnection: - """Connect to a DuckDB database. - - Returns: - DuckDBPyConnection: The database connection. - """ - - if self.is_connected is True: - return self.connection - motherduck_token = self.connection_data.get("motherduck_token") - if motherduck_token: - database = ( - f"md:{self.connection_data.get('database')}?motherduck_token={motherduck_token}&attach_mode=single" - ) - else: - database = self.connection_data.get("database") - - args = { - "database": database, - "read_only": self.connection_data.get("read_only"), - } - - self.connection = duckdb.connect(**args) - self.is_connected = True - - return self.connection - - def disconnect(self): - """Close the database connection.""" - - if self.is_connected is False: - return - - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """Check the connection to the DuckDB database. - - Returns: - StatusResponse: Connection success status and error message if an error occurs. - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - self.connect() - response.success = True - except Exception as e: - logger.error(f"Error connecting to DuckDB {self.connection_data['database']}, {e}!") - response.error_message = str(e) - finally: - if response.success is True and need_to_close: - self.disconnect() - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> Response: - """Execute a SQL query. - - Args: - query (str): The SQL query to execute. - - Returns: - Response: The query result. - """ - need_to_close = self.is_connected is False - - connection = self.connect() - cursor = connection.cursor() - - try: - cursor.execute(query) - - result = cursor.fetchall() - if result: - response = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]), - ) - else: - connection.commit() - response = Response(RESPONSE_TYPE.OK) - except Exception as e: - logger.error(f"Error running query: {query} on {self.connection_data['database']}!") - response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) - - cursor.close() - if need_to_close is True: - self.disconnect() - - return response - - def query(self, query: ASTNode) -> Response: - """Render and execute a SQL query. - - Args: - query (ASTNode): The SQL query. - - Returns: - Response: The query result. - """ - - query_str = self.renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """Get a list of all the tables in the database. - - Returns: - Response: Names of the tables in the database. - """ - - q = "SHOW TABLES;" - result = self.native_query(q) - df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) - return result - - def get_columns(self, table_name: str) -> Response: - """Get details about a table. - - Args: - table_name (str): Name of the table to retrieve details of. - - Returns: - Response: Details of the table. - """ - - query = f"DESCRIBE {table_name};" - return self.native_query(query) diff --git a/mindsdb/integrations/handlers/duckdb_handler/icon.svg b/mindsdb/integrations/handlers/duckdb_handler/icon.svg deleted file mode 100644 index fbc63d58213..00000000000 --- a/mindsdb/integrations/handlers/duckdb_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/duckdb_handler/tests/__init__.py b/mindsdb/integrations/handlers/duckdb_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/duckdb_handler/tests/test_duckdb_handler.py b/mindsdb/integrations/handlers/duckdb_handler/tests/test_duckdb_handler.py deleted file mode 100644 index 4df73a0e785..00000000000 --- a/mindsdb/integrations/handlers/duckdb_handler/tests/test_duckdb_handler.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest -from mindsdb.api.executor.data_types.response_type import ( - RESPONSE_TYPE, -) -from mindsdb.integrations.handlers.duckdb_handler.duckdb_handler import ( - DuckDBHandler, -) - - -class DuckDBHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = {'connection_data': {'database': 'db.duckdb'}} - cls.handler = DuckDBHandler('test_duckdb_handler', **cls.kwargs) - - def test_0_connect(self): - self.handler.connect() - - def test_1_check_connection(self): - self.handler.check_connection() - - def test_2_drop_table(self): - res = self.handler.query('DROP TABLE IF EXISTS integers;') - assert res.type is not RESPONSE_TYPE.ERROR - - def test_3_create_table(self): - res = self.handler.query('CREATE TABLE integers(i INTEGER)') - assert res.type is not RESPONSE_TYPE.ERROR - - def test_4_insert_into_table(self): - res = self.handler.query('INSERT INTO integers VALUES (42)') - assert res.type is not RESPONSE_TYPE.ERROR - - def test_5_select(self): - res = self.handler.query('SELECT * FROM integers;') - assert res.type is RESPONSE_TYPE.TABLE - - def test_6_describe_table(self): - res = self.handler.get_columns('integers') - assert res.type is RESPONSE_TYPE.TABLE - - def test_7_get_tables(self): - res = self.handler.get_tables() - assert res.type is not RESPONSE_TYPE.ERROR - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/dummy_data_handler/__about__.py b/mindsdb/integrations/handlers/dummy_data_handler/__about__.py deleted file mode 100644 index fe7820eb52a..00000000000 --- a/mindsdb/integrations/handlers/dummy_data_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = '' -__package_name__ = 'mindsdb_dummy_data_handler' -__version__ = '0.0.1' -__description__ = 'Handler for tests that use data handlers' -__author__ = '' -__github__ = '' -__pypi__ = '' -__license__ = '' -__copyright__ = '' diff --git a/mindsdb/integrations/handlers/dummy_data_handler/__init__.py b/mindsdb/integrations/handlers/dummy_data_handler/__init__.py deleted file mode 100644 index 492601efd9d..00000000000 --- a/mindsdb/integrations/handlers/dummy_data_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .dummy_data_handler import DummyHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = '' -name = 'dummy_data' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' -permanent = False - -__all__ = ['Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path'] diff --git a/mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py b/mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py deleted file mode 100644 index 6bac43a3e0f..00000000000 --- a/mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py +++ /dev/null @@ -1,106 +0,0 @@ -import time -from typing import Optional, List - -import duckdb -from typing import Any - -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse, HandlerStatusResponse -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - - -class DummyHandler(DatabaseHandler): - name = "dummy_data" - - def __init__(self, **kwargs): - super().__init__("dummy_data") - self.db_path = None - - args = kwargs.get("connection_data", {}) - if "db_path" in args: - self.db_path = args["db_path"] - - def connect(self): - """Set up any connections required by the handler""" - return self.db_path is not None - - def disconnect(self): - """Close any existing connections""" - return - - def check_connection(self) -> HandlerStatusResponse: - """Check connection to the handler - - Returns: - HandlerStatusResponse - """ - return HandlerStatusResponse(success=True) - - def native_query(self, query: Any, params: Optional[List] = None) -> HandlerResponse: - """Receive raw query and act upon it somehow - - Args: - query (Any): query in native format (str for sql databases, etc) - params (Optional[List]) - - Returns: - HandlerResponse - """ - con = duckdb.connect(self.db_path) - if params is not None: - query = query.replace("%s", "?") - cur = con.executemany(query, params) - if cur.rowcount >= 0: - result_df = cur.fetchdf() - else: - con.close() - return HandlerResponse(RESPONSE_TYPE.OK) - else: - result_df = con.execute(query).fetchdf() - con.close() - return HandlerResponse(RESPONSE_TYPE.TABLE, result_df) - - def query(self, query: ASTNode) -> HandlerResponse: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - - Returns: - HandlerResponse - """ - renderer = SqlalchemyRender("postgres") - query_str, params = renderer.get_exec_params(query, with_failback=True) - return self.native_query(query_str, params) - - def get_tables(self) -> HandlerResponse: - """Get a list of all the tables in the database - - Returns: - HandlerResponse: Names of the tables in the database - """ - q = "SHOW TABLES;" - result = self.native_query(q) - df = result.data_frame - result._data = df.rename(columns={df.columns[0]: "table_name"}) - return result - - def get_columns(self, table_name: str) -> HandlerResponse: - """Get details about a table - - Args: - table_name (str): Name of the table to retrieve details of. - - Returns: - HandlerResponse: Details of the table. - """ - query = f"DESCRIBE {table_name};" - return self.native_query(query) - - def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs): - while True: - if stop_event.is_set(): - return - time.sleep(0.3) diff --git a/mindsdb/integrations/handlers/dummy_data_handler/icon.svg b/mindsdb/integrations/handlers/dummy_data_handler/icon.svg deleted file mode 100644 index 3997869b9d1..00000000000 --- a/mindsdb/integrations/handlers/dummy_data_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/email_handler/README.md b/mindsdb/integrations/handlers/email_handler/README.md deleted file mode 100644 index 2089e7131f7..00000000000 --- a/mindsdb/integrations/handlers/email_handler/README.md +++ /dev/null @@ -1,124 +0,0 @@ -# Email Handler - -Email handler for MindsDB provides interfaces to connect to email services via APIs and pull data into MindsDB. It is also possible to send emails from MindsDB using this handler. - ---- - -## Table of Contents - -- [Email Handler](#github-handler) - - [Table of Contents](#table-of-contents) - - [Email Handler Implementation](#sendinblue-handler-implementation) - - [Email Handler Initialization](#sendinblue-handler-initialization) - - [Implemented Features](#implemented-features) - - [TODO](#todo) - - [Example Usage](#example-usage) - ---- - -## Email Handler Implementation - -This handler was implemented using the standard Python libraries: email, imaplib and smtplib. - -## Email Handler Initialization - -The Email handler is initialized with the following required parameters: - -- `email`: a required email address to use for authentication. -- `password`: a required password to use for authentication. - -To use the handler on a Gmail account, the password must be an [app password](https://support.google.com/accounts/answer/185833?hl=en). - -Additionally, the following optional parameters can be passed: - -- `smtp_server`: SMTP server to use for sending emails. Defaults to `smtp.gmail.com`. -- `smtp_port`: SMTP port to use for sending emails. Defaults to `587`. -- `imap_server`: IMAP server to use for receiving emails. Defaults to `imap.gmail.com`. - -At the moment, the handler has only been tested with Gmail and Outlook accounts. - -## Implemented Features - -- [x] Emails Table for a email account - - [x] Support SELECT - - [x] Support LIMIT - - [x] Support WHERE - - [x] Support ORDER BY - - [x] Support column selection - - [x] Support INSERT: send emails - - [x] Support to_field, subject and body columns - -## TODO - -- [ ] Test the handler for other email providers like Yahoo, etc. - - -### Connect to Gmail - -To connect your Gmail account to MindsDB, use the below `CREATE DATABASE` statement: - -```sql -CREATE DATABASE email_datasource -WITH ENGINE = 'email', -PARAMETERS = { - "email": "youremail@gmail.com", - "password": "yourpassword" -}; -``` - -It creates a database that comes with the `emails` table. Now you can query for emails like this: - -```sql -SELECT * -FROM email_datasource.emails; -``` - -And you can apply filters like this: - -```sql -SELECT id, to_field, subject, body -FROM email_datasource.emails -WHERE subject = 'MindsDB' -ORDER BY id -LIMIT 5; -``` - -Or, write emails like this: - -```sql -INSERT INTO email_datasource.emails(to_field, subject, body) -VALUES ("toemail@email.com", "MindsDB", "Hello from MindsDB!"); -``` - -### Connect to Outlook - -To connect your Outlook account to MindsDB, use the below `CREATE DATABASE` statement: - -```sql -CREATE DATABASE email_datasource -WITH ENGINE = 'email', -PARAMETERS = { - "email": "youremail@gmail.com", - "password": "yourpassword", - "smtp_server": "smtp.office365.com", - "smtp_port": "587", - "imap_server": "outlook.office365.com" -}; -``` - -It creates a database that comes with the `emails` table. Now you can query for emails like this: - -```sql -SELECT * -FROM email_datasource.emails; -``` - -And you can apply filters like this: - -```sql -SELECT id, to_field, subject, body -FROM email_datasource.emails -WHERE subject = 'MindsDB' -ORDER BY id -LIMIT 5; -``` diff --git a/mindsdb/integrations/handlers/email_handler/__about__.py b/mindsdb/integrations/handlers/email_handler/__about__.py deleted file mode 100644 index 3995071b0f7..00000000000 --- a/mindsdb/integrations/handlers/email_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Email handler' -__package_name__ = 'mindsdb_email_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for email" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/email_handler/__init__.py b/mindsdb/integrations/handlers/email_handler/__init__.py deleted file mode 100644 index 279b569176c..00000000000 --- a/mindsdb/integrations/handlers/email_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .email_handler import ( - EmailHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Email' -name = 'email' -type = HANDLER_TYPE.DATA -icon_path = 'icon.png' - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/email_handler/email_client.py b/mindsdb/integrations/handlers/email_handler/email_client.py deleted file mode 100644 index 90b1b727c6d..00000000000 --- a/mindsdb/integrations/handlers/email_handler/email_client.py +++ /dev/null @@ -1,165 +0,0 @@ -import imaplib -import email -import smtplib -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -from datetime import datetime, timedelta - -import pandas as pd -from mindsdb.integrations.handlers.email_handler.settings import EmailSearchOptions, EmailConnectionDetails -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class EmailClient: - '''Class for searching emails using IMAP (Internet Messaging Access Protocol)''' - - _DEFAULT_SINCE_DAYS = 10 - - def __init__( - self, - connection_data: EmailConnectionDetails - ): - self.email = connection_data.email - self.password = connection_data.password - self.imap_server = imaplib.IMAP4_SSL(connection_data.imap_server) - self.smtp_server = smtplib.SMTP(connection_data.smtp_server, connection_data.smtp_port) - - def select_mailbox(self, mailbox: str = 'INBOX'): - '''Logs in & selects a mailbox from IMAP server. Defaults to INBOX, which is the default inbox. - - Parameters: - mailbox (str): The name of the mailbox to select. - ''' - ok, resp = self.imap_server.login(self.email, self.password) - if ok != 'OK': - raise ValueError( - f'Unable to login to mailbox {mailbox}. Please check your credentials: {str(resp)}') - - logger.info(f'Logged in to mailbox {mailbox}') - - ok, resp = self.imap_server.select(mailbox) - if ok != 'OK': - raise ValueError( - f'Unable to select mailbox {mailbox}. Please check the mailbox name: {str(resp)}') - - logger.info(f'Selected mailbox {mailbox}') - - def logout(self): - '''Shuts down the connection to the IMAP and SMTP server.''' - - try: - ok, resp = self.imap_server.logout() - if ok != 'BYE': - logger.error( - f'Unable to logout of IMAP client: {str(resp)}') - logger.info('Logged out of IMAP server') - except Exception as e: - logger.error( - f'Exception occurred while logging out from IMAP server: {str(e)}') - - try: - self.smtp_server.quit() - logger.info('Logged out of SMTP server') - except Exception as e: - logger.error( - f'Exception occurred while logging out from SMTP server: {str(e)}') - - def send_email(self, to_addr: str, subject: str, body: str): - ''' - Sends an email to the given address. - - Parameters: - to_addr (str): The email address to send the email to. - subject (str): The subject of the email. - body (str): The body of the email. - ''' - - msg = MIMEMultipart() - msg['From'] = self.email - msg['To'] = to_addr - msg['Subject'] = subject - msg.attach(MIMEText(body, 'plain')) - - self.smtp_server.starttls() - self.smtp_server.login(self.email, self.password) - self.smtp_server.send_message(msg) - logger.info(f'Email sent to {to_addr} with subject: {subject}') - - def search_email(self, options: EmailSearchOptions) -> pd.DataFrame: - '''Searches emails based on the given options and returns a DataFrame. - - Parameters: - options (EmailSearchOptions): Options to use when searching using IMAP. - - Returns: - df (pd.DataFrame): A dataframe of emails resulting from the search. - ''' - self.select_mailbox(options.mailbox) - - try: - - query_parts = [] - if options.subject is not None: - query_parts.append(f'(SUBJECT "{options.subject}")') - - if options.to_field is not None: - query_parts.append(f'(TO "{options.to_field}")') - - if options.from_field is not None: - query_parts.append(f'(FROM "{options.from_field}")') - - if options.since_date is not None: - since_date_str = options.since_date.strftime('%d-%b-%Y') - else: - since_date = datetime.today() - timedelta(days=EmailClient._DEFAULT_SINCE_DAYS) - since_date_str = since_date.strftime('%d-%b-%Y') - query_parts.append(f'(SINCE "{since_date_str}")') - - if options.until_date is not None: - until_date_str = options.until_date.strftime('%d-%b-%Y') - query_parts.append(f'(BEFORE "{until_date_str}")') - - if options.since_email_id is not None: - query_parts.append(f'(UID {options.since_email_id}:*)') - - query = ' '.join(query_parts) - ret = [] - _, items = self.imap_server.uid('search', None, query) - items = items[0].split() - for emailid in items: - _, data = self.imap_server.uid('fetch', emailid, '(RFC822)') - email_message = email.message_from_bytes(data[0][1]) - - email_line = {} - email_line['id'] = emailid.decode() - email_line['to_field'] = email_message.get('To') - email_line['from_field'] = email_message.get('From') - email_line['subject'] = email_message.get('Subject') - email_line['date'] = email_message.get('Date') - - plain_payload = None - html_payload = None - content_type = 'html' - for part in email_message.walk(): - subtype = part.get_content_subtype() - if subtype == 'plain': - # Prioritize plain text payloads when present. - plain_payload = part.get_payload(decode=True) - content_type = 'plain' - break - if subtype == 'html': - html_payload = part.get_payload(decode=True) - body = plain_payload or html_payload - if body is None: - # Very rarely messages won't have plain text or html payloads. - continue - email_line['body'] = plain_payload or html_payload - email_line['body_content_type'] = content_type - ret.append(email_line) - except Exception as e: - raise Exception('Error searching email') from e - - return pd.DataFrame(ret) diff --git a/mindsdb/integrations/handlers/email_handler/email_handler.py b/mindsdb/integrations/handlers/email_handler/email_handler.py deleted file mode 100644 index 8bece57d60d..00000000000 --- a/mindsdb/integrations/handlers/email_handler/email_handler.py +++ /dev/null @@ -1,99 +0,0 @@ -from mindsdb.utilities import log - -from mindsdb.integrations.libs.api_handler import APIHandler - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb_sql_parser import parse_sql - -from mindsdb.integrations.handlers.email_handler.email_tables import EmailsTable -from mindsdb.integrations.handlers.email_handler.email_client import EmailClient -from mindsdb.integrations.handlers.email_handler.settings import EmailConnectionDetails - - -logger = log.getLogger(__name__) - - -class EmailHandler(APIHandler): - """ - A class for handling connections and interactions with Email (send and search). - - Parameters - ---------- - name : str - The name of the handler - connection_data : EmailConnectionDetails - The connection details for the email server - - see `EmailConnectionDetails` for more details and examples - - """ - - def __init__(self, name=None, **kwargs): - super().__init__(name) - - connection_data = kwargs.get("connection_data", {}) - self.connection_data = EmailConnectionDetails(**connection_data) - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - emails = EmailsTable(self) - self._register_table('emails', emails) - - def connect(self): - """Authenticate with the email servers using credentials.""" - - if self.is_connected is True: - return self.connection - - try: - self.connection = EmailClient(self.connection_data) - except Exception as e: - logger.error(f'Error connecting to email api: {e}!') - raise e - - self.is_connected = True - return self.connection - - def disconnect(self): - """ Close any existing connections - - Should switch self.is_connected. - """ - self.is_connected = False - - return self.connection.logout() - - def check_connection(self) -> StatusResponse: - - response = StatusResponse(False) - - try: - self.connect() - response.success = True - - except Exception as e: - response.error_message = f'Error connecting to Email: {e}. ' - logger.error(response.error_message) - - if response.success is False and self.is_connected is True: - self.is_connected = False - - return response - - def native_query(self, query: str) -> StatusResponse: - """Receive and process a raw query. - Parameters - ---------- - query : str - query in a native format - Returns - ------- - StatusResponse - Request status - """ - ast = parse_sql(query) - return self.query(ast) diff --git a/mindsdb/integrations/handlers/email_handler/email_ingestor.py b/mindsdb/integrations/handlers/email_handler/email_ingestor.py deleted file mode 100644 index f81beab247d..00000000000 --- a/mindsdb/integrations/handlers/email_handler/email_ingestor.py +++ /dev/null @@ -1,91 +0,0 @@ -import re - -from bs4 import BeautifulSoup -import bs4.element -import chardet - -import pandas as pd - -from mindsdb.integrations.handlers.email_handler.email_client import EmailClient -from mindsdb.integrations.handlers.email_handler.settings import EmailSearchOptions - - -class EmailIngestor: - """ - Parses emails into a DataFrame. - Does some preprocessing on the raw HTML to extract meaningful text. - """ - - def __init__(self, email_client: EmailClient, search_options: EmailSearchOptions): - self.email_client = email_client - self.search_options = search_options - - def _is_tag_visible(self, element): - if element.parent.name in ["style", "script", "head", "title", "meta", "[document]"]: - return False - if isinstance(element, bs4.element.Comment): - return False - return True - - def _preprocess_raw_html(self, html: str) -> str: - soup = BeautifulSoup(html, "html.parser") - texts = soup.find_all(text=True) - visible_texts = filter(self._is_tag_visible, texts) - return "\n".join(t.strip() for t in visible_texts) - - def _ingest_email_row(self, row: pd.Series) -> dict: - if row["body_content_type"] == "html": - # Extract meaningful text from raw HTML. - row["body"] = self._preprocess_raw_html(row["body"]) - body_str = row["body"] - encoding = None - if isinstance(body_str, bytes): - encoding = chardet.detect(body_str)["encoding"] - if encoding is None: - # If chardet can't detect the encoding, we default to utf-8. - encoding = "utf-8" - elif "windows" in encoding.lower(): - # Easier to treat this at utf-8 since str constructor doesn't support all encodings here: - # https://chardet.readthedocs.io/en/latest/supported-encodings.html. - encoding = "utf-8" - try: - body_str = str(body_str, encoding=encoding) - except UnicodeDecodeError: - # If illegal characters are found, we ignore them. - # I encountered this issue with some emails that had a mix of encodings. - body_str = row["body"].decode(encoding, errors="ignore") - # We split by paragraph so make sure there aren't too many newlines in a row. - body_str = re.sub(r"[\r\n]\s*[\r\n]", "\n\n", body_str) - email_data = { - "id": row["id"], - "body": body_str, - "subject": row["subject"], - "to_field": row["to_field"], - "from_field": row["from_field"], - "datetime": row["date"], - } - # Replacing None values {None: ""} - for key in email_data: - if email_data[key] is None: - email_data[key] = "" - - return email_data - - def ingest(self) -> pd.DataFrame: - emails_df = self.email_client.search_email(self.search_options) - all_email_data = [] - for _, row in emails_df.iterrows(): - all_email_data.append(self._ingest_email_row(row)) - - df = pd.DataFrame(all_email_data) - - # Replace "(UTC)" with empty string over a pandas DataFrame column - if "datetime" in df.columns: - df["datetime"] = df["datetime"].str.replace(" (UTC)", "") - - # Convert datetime string to datetime object, and normalize timezone to UTC. - df["datetime"] = pd.to_datetime( - df["datetime"], utc=True, format="%a, %d %b %Y %H:%M:%S %z", errors="coerce" - ) - - return df diff --git a/mindsdb/integrations/handlers/email_handler/email_tables.py b/mindsdb/integrations/handlers/email_handler/email_tables.py deleted file mode 100644 index 5c89e352c1a..00000000000 --- a/mindsdb/integrations/handlers/email_handler/email_tables.py +++ /dev/null @@ -1,158 +0,0 @@ -import datetime as dt -import pytz - -import pandas as pd - -from mindsdb_sql_parser import ast - -from mindsdb.integrations.handlers.email_handler.email_ingestor import EmailIngestor -from mindsdb.integrations.libs.api_handler import APITable - -from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor -from mindsdb.integrations.utilities.handlers.query_utilities.insert_query_utilities import INSERTQueryParser -from mindsdb.integrations.handlers.email_handler.settings import EmailSearchOptions -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class EmailsTable(APITable): - """The Emails Table implementation""" - - def select(self, query: ast.Select) -> pd.DataFrame: - """Pulls email data from the connected account. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query - - Returns - ------- - pd.DataFrame - Emails matching the query - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - - select_statement_parser = SELECTQueryParser( - query, - 'emails', - self.get_columns() - ) - selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() - - search_params = {} - for op, arg1, arg2 in where_conditions: - if arg2 is None: - logger.warning(f"Skipping condition: {arg1} {op} {arg2}." - "Please ignore if this is intentional, e.g. 'id > last' on first query of job run." - ) - continue - - if arg1 == 'datetime': - date = self.parse_date(arg2) - if op == '>': - search_params['since_date'] = date - elif op == '<': - search_params['until_date'] = date - else: - raise NotImplementedError("Only > and < operators are supported for created_at column.") - continue - - elif arg1 == 'id': - if op not in ['=', '>', '>=']: - raise NotImplementedError("Only =, > and >= operators are supported for id column.") - if op in ['=', '>=']: - search_params['since_email_id'] = int(arg2) - elif op == '>': - search_params['since_email_id'] = int(arg2) + 1 - - elif arg1 in ['mailbox', 'subject', 'to_field', 'from_field']: - if op != '=': - raise NotImplementedError("Only = operator is supported for mailbox, subject, to and from columns.") - else: - if arg1 == 'from_field': - search_params['from_field'] = arg2 - else: - search_params[arg1] = arg2 - - else: - raise NotImplementedError(f"Unsupported column: {arg1}.") - - self.handler.connect() - - if search_params: - search_options = EmailSearchOptions(**search_params) - else: - search_options = EmailSearchOptions() - - email_ingestor = EmailIngestor(self.handler.connection, search_options) - - emails_df = email_ingestor.ingest() - - # ensure all queries from query are applied to the dataframe - select_statement_executor = SELECTQueryExecutor( - emails_df, - selected_columns, - [], - order_by_conditions, - result_limit - ) - return select_statement_executor.execute_query() - - def insert(self, query: ast.Insert) -> None: - """Sends emails through the connected account. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query - - Returns - ------- - None - - Raises - ------ - ValueError - If the query contains an unsupported condition - """ - insert_statement_parser = INSERTQueryParser( - query, - supported_columns=['to_field', 'subject', 'body'], - mandatory_columns=['to_field', 'subject', 'body'], - all_mandatory=True - ) - email_data = insert_statement_parser.parse_query() - - for email in email_data: - connection = self.handler.connect() - to_addr = email['to_field'] - del email['to_field'] - connection.send_email(to_addr, **email) - - def get_columns(self): - return ['id', 'body', 'subject', 'to_field', 'from_field', 'datetime'] - - @staticmethod - def parse_date(date_str) -> dt.datetime: - - if isinstance(date_str, dt.datetime): - - return date_str - date_formats = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d'] - date = None - for date_format in date_formats: - try: - date = dt.datetime.strptime(date_str, date_format) - except ValueError: - pass - if date is None: - raise ValueError(f"Can't parse date: {date_str}") - date = date.astimezone(pytz.utc) - - return date diff --git a/mindsdb/integrations/handlers/email_handler/icon.png b/mindsdb/integrations/handlers/email_handler/icon.png deleted file mode 100644 index 5e6df1a8bbb..00000000000 Binary files a/mindsdb/integrations/handlers/email_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/email_handler/requirements.txt b/mindsdb/integrations/handlers/email_handler/requirements.txt deleted file mode 100644 index 5e32e7a0706..00000000000 --- a/mindsdb/integrations/handlers/email_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -chardet -# bs4 # in main dependencies list diff --git a/mindsdb/integrations/handlers/email_handler/settings.py b/mindsdb/integrations/handlers/email_handler/settings.py deleted file mode 100644 index 775a70efdd9..00000000000 --- a/mindsdb/integrations/handlers/email_handler/settings.py +++ /dev/null @@ -1,59 +0,0 @@ -import datetime -from pydantic import BaseModel - - -class EmailSearchOptions(BaseModel): - """ - Represents IMAP search options to use when searching emails - """ - # IMAP mailbox to search. - mailbox: str = "INBOX" - # Search by email subject. - subject: str = None - # Search based on who the email was sent to. - to_field: str = None - # Search based on who the email was from. - from_field: str = None - # Search based on when the email was received. - since_date: datetime.date = None - until_date: datetime.date = None - # Search for all emails after this ID. - since_email_id: int = None - - class Config: - json_schema_extra = { - "example": { - "mailbox": "INBOX", - "subject": "Test", - "to_email": "example@example.com", - "from_email": "hello@example.com", - "since_date": "2021-01-01", - "until_date": "2021-01-31", - "since_email_id": "123" - } - - } - extra = "forbid" - - -class EmailConnectionDetails(BaseModel): - """ - Represents the connection details for an email client - """ - email: str - password: str - imap_server: str = "imap.gmail.com" - smtp_server: str = "smtp.gmail.com" - smtp_port: int = 587 - - class Config: - json_schema_extra = { - "example": { - "email": "joe@bloggs.com", - "password": "password", - "imap_server": "imap.gmail.com", - "smtp_server": "smtp.gmail.com", - "smtp_port": 587 - } - } - extra = "forbid" diff --git a/mindsdb/integrations/handlers/email_handler/tests/test_email_handler.py b/mindsdb/integrations/handlers/email_handler/tests/test_email_handler.py deleted file mode 100644 index 6bd96b68e53..00000000000 --- a/mindsdb/integrations/handlers/email_handler/tests/test_email_handler.py +++ /dev/null @@ -1,138 +0,0 @@ -import os - -import pandas as pd -import pytest -from unittest.mock import MagicMock - -from mindsdb_sql_parser import parse_sql -from mindsdb.integrations.handlers.email_handler.email_tables import EmailsTable -from mindsdb.integrations.handlers.email_handler.email_handler import EmailHandler - - -class TestEmailHandler: - def setup_class(self): - # Check if env variables exist, if not fail the test - email = os.getenv("EMAIL_USERNAME") - password = os.getenv("EMAIL_PASSWORD") - assert email is not None, "EMAIL_USERNAME environment variable not found e.g. example@gmail.com" - assert password is not None, "EMAIL_PASSWORD environment variable not found" - - self.connection_data = {"email": email, "password": password} - self.email_handler = EmailHandler(connection_data=self.connection_data) - self.email_handler.connect() - self.emails_table_instance = EmailsTable(self.email_handler) - - def test_connect_already_connected(self): - self.email_handler.is_connected = True - connection = self.email_handler.connect() - assert connection is self.email_handler.connection, "The connection must be the same as the one in the handler." - - def test_check_connection(self): - response = self.email_handler.check_connection() - assert response.success is True, "The response success must be True." - - def test_select(self): - """ - Test the select method of EmailsTable Class - """ - - mock_df = pd.DataFrame( - { - "date": [ - "Wed, 02 Feb 2022 15:30:00 +0000", - "Thu, 10 Mar 2022 10:45:15 +0530", - "Fri, 16 Dec 2022 20:15:30 -0400", - ], - "body_content_type": ["html", "html", "text"], - "body": [ - "

Hello, World!

", - "

Hello, World!

", - "Hello, World!", - ], - "from_field": ["", "", ""], - "id": ["", "", ""], - "to_field": ["", "", ""], - "subject": ["", "", ""], - } - ) - - self.emails_table_instance.handler.connection.search_email = MagicMock(return_value=mock_df) - - query = parse_sql("SELECT * FROM emails limit 1") - - self.emails_table_instance.select(query) - - assert self.emails_table_instance.handler.connection.search_email.called, ( - "The search_email method must be called." - ) - - # select using invalid column should raise Exception - query = parse_sql("SELECT invalid_column FROM emails limit 1") - - with pytest.raises(Exception): - self.emails_table_instance.select(query) - - def test_insert(self): - """ - Test the insert method of EmailsTable Class - """ - - self.emails_table_instance.handler.connection.send_email = MagicMock() - - query = parse_sql( - "INSERT INTO email_datasource.emails(to_field, subject, body) " - 'VALUES ("toemail@email.com", "MindsDB", "Hello from MindsDB!")' - ) - - self.emails_table_instance.insert(query) - assert self.emails_table_instance.handler.connection.send_email.called, "The send_email method must be called." - - # insert using invalid column should raise Exception - query = parse_sql( - "INSERT INTO email_datasource.emails(to_field, subject, body, invalid_column) " - 'VALUES ("toemail@email.com", "MindsDB", "blaha" , "invalid")' - ) - - with pytest.raises(Exception): - self.emails_table_instance.insert(query) - - def test_get_columns(self): - """ - Test the get_columns method of EmailsTable Class - """ - - columns = self.emails_table_instance.get_columns() - assert isinstance(columns, list), "The returned value must be a list." - assert "id" in columns, "Column 'id' must be in the columns list." - assert "body" in columns, "Column 'body' must be in the columns list." - assert "subject" in columns, "Column 'subject' must be in the columns list." - assert "to_field" in columns, "Column 'to_field' must be in the columns list." - assert "from_field" in columns, "Column 'from_field' must be in the columns list." - assert "datetime" in columns, "Column 'datetime' must be in the columns list." - - def test_undetectable_encoding_handling(self): - """ - Test that the email handler can process emails with undetectable encodings - without raising exceptions. - """ - - undetectable_content = b"\x80\x81\x82\x83\x84\x85\x86\x87" - mock_df = pd.DataFrame( - { - "date": ["Wed, 02 Feb 2022 15:30:00 +0000"], - "body_content_type": ["text"], - "body": [undetectable_content], - "from_field": ["test@example.com"], - "id": ["test1"], - "to_field": ["recipient@example.com"], - "subject": ["Test email with undetectable encoding"], - } - ) - - self.emails_table_instance.handler.connection.search_email = MagicMock(return_value=mock_df) - query = parse_sql("SELECT * FROM emails limit 1") - result = self.emails_table_instance.select(query) - - assert result is not None, "The result must not be None." - assert "body" in result.columns, "The body should be in the result columns." - assert len(result) > 0, "The result should not be empty." diff --git a/mindsdb/integrations/handlers/file_handler/__about__.py b/mindsdb/integrations/handlers/file_handler/__about__.py deleted file mode 100644 index fa6d06e891d..00000000000 --- a/mindsdb/integrations/handlers/file_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB file handler' -__package_name__ = 'mindsdb_file_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for files" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/file_handler/__init__.py b/mindsdb/integrations/handlers/file_handler/__init__.py deleted file mode 100644 index 70567f98bf6..00000000000 --- a/mindsdb/integrations/handlers/file_handler/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .file_handler import FileHandler as Handler -from .__about__ import __version__ as version - - -title = "File" -name = "files" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB -permanent = True - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "icon_path", - "support_level", -] diff --git a/mindsdb/integrations/handlers/file_handler/file_handler.py b/mindsdb/integrations/handlers/file_handler/file_handler.py deleted file mode 100644 index 6a1fc443ee4..00000000000 --- a/mindsdb/integrations/handlers/file_handler/file_handler.py +++ /dev/null @@ -1,237 +0,0 @@ -import os -import shutil -import tempfile - -import pandas as pd -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import CreateTable, DropTables, Insert, Select, Identifier -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.api.executor.utilities.sql import query_dfs -from mindsdb.integrations.libs.base import DatabaseHandler -from mindsdb.integrations.libs.response import ( - RESPONSE_TYPE, - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - INF_SCHEMA_COLUMNS_NAMES_SET, -) -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - -DEFAULT_CHUNK_SIZE = 500 -DEFAULT_CHUNK_OVERLAP = 250 - - -def clean_cell(val): - if str(val) in ["", " ", " ", "NaN", "nan", "NA"]: - return None - return val - - -class FileHandler(DatabaseHandler): - """ - Handler for files - """ - - name = "files" - - def __init__( - self, - name=None, - file_storage=None, - connection_data={}, - file_controller=None, - **kwargs, - ): - super().__init__(name) - self.parser = parse_sql - self.fs_store = file_storage - self.custom_parser = connection_data.get("custom_parser", None) - self.clean_rows = connection_data.get("clean_rows", True) - self.chunk_size = connection_data.get("chunk_size", DEFAULT_CHUNK_SIZE) - self.chunk_overlap = connection_data.get("chunk_overlap", DEFAULT_CHUNK_OVERLAP) - self.file_controller = file_controller - self.cache_thread_safe = True - - def connect(self, **kwargs): - return - - def disconnect(self, **kwargs): - return - - def check_connection(self) -> StatusResponse: - return StatusResponse(True) - - def _get_table_page_names(self, table: Identifier): - table_name_parts = table.parts - - # Check if it's a multi-part name (e.g., `file_name.sheet_name`) - if len(table_name_parts) > 1: - table_name = table_name_parts[-2] - page_name = table_name_parts[-1] # Get the sheet name - else: - table_name = table_name_parts[-1] - page_name = None - return table_name, page_name - - def query(self, query: ASTNode) -> Response: - if type(query) is DropTables: - for table_identifier in query.tables: - if len(table_identifier.parts) == 2 and table_identifier.parts[0] != self.name: - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Can't delete table from database '{table_identifier.parts[0]}'", - ) - table_name = table_identifier.parts[-1] - try: - self.file_controller.delete_file(table_name) - except FileNotFoundError as e: - if not query.if_exists: - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Can't delete table '{table_name}': {e}", - ) - except Exception as e: - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Can't delete table '{table_name}': {e}", - ) - return Response(RESPONSE_TYPE.OK) - - if isinstance(query, CreateTable): - # Check if the table already exists or if the table name contains more than one namespace - existing_files = self.file_controller.get_files_names() - - if len(query.name.parts) != 1: - return Response( - RESPONSE_TYPE.ERROR, - error_message="Table name cannot contain more than one namespace", - ) - - table_name = query.name.parts[-1] - if table_name in existing_files: - if query.is_replace: - self.file_controller.delete_file(table_name) - else: - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Table '{table_name}' already exists", - ) - - temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_") - - try: - # Create a temp file to save the table - temp_file_path = os.path.join(temp_dir_path, f"{table_name}.csv") - - # Create an empty file using with the columns in the query - df = pd.DataFrame(columns=[col.name for col in query.columns]) - df.to_csv(temp_file_path, index=False) - - self.file_controller.save_file(table_name, temp_file_path, file_name=f"{table_name}.csv") - except Exception as unknown_error: - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Error creating table '{table_name}': {unknown_error}", - ) - finally: - # Remove the temp dir created - shutil.rmtree(temp_dir_path, ignore_errors=True) - - return Response(RESPONSE_TYPE.OK) - - elif isinstance(query, Select): - from mindsdb.integrations.utilities.query_traversal import query_traversal - - tables = {} - - not_found = [] - - def find_tables(node, is_table, **args): - if is_table and isinstance(node, Identifier): - table_name, page_name = self._get_table_page_names(node) - try: - df = self.file_controller.get_file_data(table_name, page_name) - except FileNotFoundError: - not_found.append(table_name) - return - - if page_name is not None: - table_name = f"{page_name}_{table_name}" - node.parts = [table_name] - tables[table_name] = df - - query_traversal(query, find_tables) - - if len(tables) == 0: - raise RuntimeError(f"Files not found: {', '.join(not_found)}") - - # Process the SELECT query - result_df = query_dfs(tables, query) - return Response(RESPONSE_TYPE.TABLE, data_frame=result_df) - - elif isinstance(query, Insert): - table_name, page_name = self._get_table_page_names(query.table) - - df = self.file_controller.get_file_data(table_name, page_name) - - # Create a new dataframe with the values from the query - new_df = pd.DataFrame(query.values, columns=[col.name for col in query.columns]) - - # Concatenate the new dataframe with the existing one - df = pd.concat([df, new_df], ignore_index=True) - - self.file_controller.set_file_data(table_name, df, page_name=page_name) - - return Response(RESPONSE_TYPE.OK) - - else: - return Response( - RESPONSE_TYPE.ERROR, - error_message="Only 'select', 'insert', 'create' and 'drop' queries allowed for files", - ) - - def native_query(self, query: str) -> Response: - ast = self.parser(query) - return self.query(ast) - - def get_tables(self) -> Response: - """ - List all files - """ - files_meta = self.file_controller.get_files() - data = [ - { - "TABLE_NAME": x["name"], - "TABLE_ROWS": x["row_count"], - "TABLE_TYPE": "BASE TABLE", - } - for x in files_meta - ] - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(data)) - - def get_columns(self, table_name) -> Response: - file_meta = self.file_controller.get_file_meta(table_name) - if file_meta is None: - result = Response( - RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)) - ) - result.to_columns_table_response(map_type_fn=lambda _: MYSQL_DATA_TYPE.TEXT) - return result - result = Response( - RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - [ - { - "COLUMN_NAME": x["name"].strip() if isinstance(x, dict) else x.strip(), - "DATA_TYPE": "str", - } - for x in file_meta["columns"] - ] - ), - ) - result.to_columns_table_response(map_type_fn=lambda _: MYSQL_DATA_TYPE.TEXT) - return result diff --git a/mindsdb/integrations/handlers/file_handler/icon.svg b/mindsdb/integrations/handlers/file_handler/icon.svg deleted file mode 100644 index 61cf7e0729c..00000000000 --- a/mindsdb/integrations/handlers/file_handler/icon.svg +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/file_handler/requirements.txt b/mindsdb/integrations/handlers/file_handler/requirements.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/groq_handler/README.md b/mindsdb/integrations/handlers/groq_handler/README.md deleted file mode 100644 index 7fee4101a6f..00000000000 --- a/mindsdb/integrations/handlers/groq_handler/README.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: Groq -sidebarTitle: Groq ---- - -This documentation describes the integration of MindsDB with [Groq](https://groq.com/), a cloud service that simplifies the way developers interact with cutting-edge LLMs through its API. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Groq within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain the Groq API key required to deploy and use Groq models within MindsDB. Get the API key from [here](https://console.groq.com/keys). - -## Setup - -Create an AI engine from the [Groq handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/groq_handler). - -```sql -CREATE ML_ENGINE groq_engine -FROM groq -USING - groq_api_key = 'api-key-value' -``` - -Create a model using `groq_engine` as an engine. - -```sql -CREATE MODEL groq_model -PREDICT answer -USING - engine = 'groq_engine', -- engine name as created via CREATE ML_ENGINE - model_name = 'model-name', -- choose one of available models - prompt_template = 'prompt-to-the-model' -- prompt message to be completed by the model - question_column = 'question', -- optional, column name that stores user input - context_column = 'context', -- optional, column that stores context of the user input - prompt_template = 'input your query here', -- optional, user provides instructions to the model here - user_column = 'user_input', -- optional, stores user input - assistant_column = 'conversation_context', -- optional, stores conversation context - prompt = 'instruction to the model', -- optional stores instruction to the model - max_tokens = 100, -- optional, token limit for answer - temperature = 0.3, -- temp -``` - -## Usage - -The following usage examples utilize `groq_engine` to create a model with the `CREATE MODEL` statement. - -Classify text sentiment using the Mistral 7B model. - -```sql -CREATE MODEL groq_model -PREDICT sentiment -USING - engine = 'groq_engine', - model_name = 'llama3-8b-8192', - prompt_template = 'Classify the sentiment of the following text as one of `positive`, `neutral` or `negative`: {{text}}. Give sentiment as result only.'; -``` - -Query the model to get predictions. - -```sql -SELECT text, sentiment -FROM groq_model -WHERE text = 'I love machine learning!'; -``` - -Here is the output: - -```sql -+--------------------------+-----------+ -| text | sentiment | -+--------------------------+-----------+ -| I love machine learning! | positive | -+--------------------------+-----------+ -``` - -## Supported Models - -For an overview of the models supported, visit the [following docs](https://console.groq.com/docs/models). This list will help you quickly identify the right models for your needs. - -## Troubleshooting Guide - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - Examples: - * Incorrect: - - ```sql - SELECT input.text, output.sentiment - FROM integration.travel data AS input - JOIN groq_model AS output - ``` - - * Incorrect: - - ```sql - SELECT input.text, output.sentiment - FROM integration.'travel data' AS input - JOIN groq_model AS output - ``` - - * Correct: - - ```sql - SELECT input.text, output.sentiment - FROM integration.`travel data` AS input - JOIN groq_model AS output - ``` - - diff --git a/mindsdb/integrations/handlers/groq_handler/__about__.py b/mindsdb/integrations/handlers/groq_handler/__about__.py deleted file mode 100644 index e9a2946b3cb..00000000000 --- a/mindsdb/integrations/handlers/groq_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Groq handler' -__package_name__ = 'mindsdb_groq_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Groq" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/groq_handler/__init__.py b/mindsdb/integrations/handlers/groq_handler/__init__.py deleted file mode 100644 index 4e4f79607da..00000000000 --- a/mindsdb/integrations/handlers/groq_handler/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -from .__about__ import __version__ as version -from .__about__ import __description__ as description -from mindsdb.integrations.libs.const import HANDLER_TYPE -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -try: - from .groq_handler import GroqHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Groq" -name = "groq" -type = HANDLER_TYPE.ML -icon_path = 'icon.svg' -permanent = False - -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] diff --git a/mindsdb/integrations/handlers/groq_handler/groq_handler.py b/mindsdb/integrations/handlers/groq_handler/groq_handler.py deleted file mode 100644 index 097edcb2738..00000000000 --- a/mindsdb/integrations/handlers/groq_handler/groq_handler.py +++ /dev/null @@ -1,145 +0,0 @@ -import os -import pandas as pd -import openai -from openai import OpenAI, NotFoundError, AuthenticationError -from typing import Dict, Optional -from mindsdb.integrations.handlers.openai_handler import Handler as OpenAIHandler -from mindsdb.integrations.utilities.handler_utils import get_api_key -from mindsdb.integrations.handlers.groq_handler.settings import groq_handler_config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class GroqHandler(OpenAIHandler): - """ - This handler handles connection to the Groq. - """ - - name = "groq" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.api_base = groq_handler_config.BASE_URL - self.default_model = groq_handler_config.DEFAULT_MODEL - self.default_mode = groq_handler_config.DEFAULT_MODE - self.supported_modes = groq_handler_config.SUPPORTED_MODES - - @staticmethod - def _check_client_connection(client: OpenAI): - """ - Check the Groq engine client connection by listing models. - - Args: - client (OpenAI): OpenAI client configured with the Groq API credentials. - - Raises: - Exception: If the client connection (API key) is invalid. - - Returns: - None - """ - try: - client.models.list() - except NotFoundError: - pass - except AuthenticationError as e: - if isinstance(e.body, dict) and e.body.get("code") == "invalid_api_key": - raise Exception("Invalid api key") - raise Exception(f"Something went wrong: {e}") - - def create_engine(self, connection_args): - """ - Validate the Groq API credentials on engine creation. - - Args: - connection_args (dict): Connection arguments. - - Raises: - Exception: If the handler is not configured with valid API credentials. - - Returns: - None - """ - connection_args = {k.lower(): v for k, v in connection_args.items()} - api_key = connection_args.get("groq_api_key") - if api_key is not None: - org = connection_args.get("api_organization") - api_base = connection_args.get("api_base") or os.environ.get("GROQ_BASE", groq_handler_config.BASE_URL) - client = self._get_client(api_key=api_key, base_url=api_base, org=org) - GroqHandler._check_client_connection(client) - - @staticmethod - def create_validation(target, args=None, **kwargs): - """ - Validate the Groq API credentials on model creation. - - Args: - target (str): Target column, not required for LLMs. - args (dict): Handler arguments. - kwargs (dict): Handler keyword arguments. - - Raises: - Exception: If the handler is not configured with valid API credentials. - - Returns: - None - """ - if "using" not in args: - raise Exception("Groq engine requires a USING clause! Refer to its documentation for more details.") - else: - args = args["using"] - - engine_storage = kwargs["handler_storage"] - connection_args = engine_storage.get_connection_args() - api_key = get_api_key("groq", args, engine_storage=engine_storage) - api_base = ( - connection_args.get("api_base") - or args.get("api_base") - or os.environ.get("GROQ_BASE", groq_handler_config.BASE_URL) - ) - org = args.get("api_organization") - client = OpenAIHandler._get_client(api_key=api_key, base_url=api_base, org=org) - GroqHandler._check_client_connection(client) - - @staticmethod - def is_chat_model(model_name): - """ - All Groq models use the chat completions endpoint, hence every model is a chat model - """ - return True - - def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame: - """ - Call the Groq engine to predict the next token. - - Args: - df (pd.DataFrame): Input data. - args (dict): Handler arguments. - - Returns: - pd.DataFrame: Predicted data - """ - api_key = get_api_key("groq", args, self.engine_storage) - supported_models = self._get_supported_models(api_key, self.api_base) - self.chat_completion_models = [model.id for model in supported_models] - return super().predict(df, args) - - @staticmethod - def _get_supported_models(api_key, base_url, org=None): - """ - Get the list of supported models for the Groq engine. - - Args: - api_key (str): API key. - base_url (str): Base URL. - org (str): Organization name. - - Returns: - List: List of supported models. - """ - client = openai.OpenAI(api_key=api_key, base_url=base_url, organization=org) - return client.models.list() - - def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None): - raise NotImplementedError("Fine-tuning is not supported for Groq AI engine.") diff --git a/mindsdb/integrations/handlers/groq_handler/icon.svg b/mindsdb/integrations/handlers/groq_handler/icon.svg deleted file mode 100644 index df8814f2988..00000000000 --- a/mindsdb/integrations/handlers/groq_handler/icon.svg +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/groq_handler/requirements.txt b/mindsdb/integrations/handlers/groq_handler/requirements.txt deleted file mode 100644 index 55108dc7877..00000000000 --- a/mindsdb/integrations/handlers/groq_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pydantic-settings >= 2.1.0 --r mindsdb/integrations/handlers/openai_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/groq_handler/settings.py b/mindsdb/integrations/handlers/groq_handler/settings.py deleted file mode 100644 index 6db593266ab..00000000000 --- a/mindsdb/integrations/handlers/groq_handler/settings.py +++ /dev/null @@ -1,31 +0,0 @@ -from pydantic_settings import BaseSettings - - -class GroqHandlerConfig(BaseSettings): - """ - Configuration for Groq handler. - - Attributes - ---------- - - BASE_URL : str - Base URL for the Groq API. - DEFAULT_MODEL : str - Default model to use for Groq API. - DEFAULT_MODE : str - Default mode to use for Groq API. - SUPPORTED_MODES : list[str] - List of supported modes for Groq API. - """ - - BASE_URL: str = "https://api.groq.com/openai/v1" - DEFAULT_MODEL: str = "llama3-8b-8192" - DEFAULT_MODE: str = "default" - SUPPORTED_MODES: list[str] = [ - 'default', - 'conversational', - 'conversational-full', - ] - - -groq_handler_config = GroqHandlerConfig() diff --git a/mindsdb/integrations/handlers/hubspot_handler/README.md b/mindsdb/integrations/handlers/hubspot_handler/README.md deleted file mode 100644 index 032024df64e..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/README.md +++ /dev/null @@ -1,189 +0,0 @@ -# HubSpot Handler - -HubSpot handler for MindsDB provides interfaces to connect to HubSpot via APIs and pull store data into MindsDB. - ---- - -## Table of Contents - -- [HubSpot Handler](#hubspot-handler) - - [Table of Contents](#table-of-contents) - - [About HubSpot](#about-hubspot) - - [Installation](#installation) - - [Authentication](#authentication) - - [Personal Access Token Authentication](#personal-access-token-authentication) - - [Supported Tables](#supported-tables) - - [Core CRM and Engagement Tables](#core-crm-and-engagement-tables) - - [Metadata Tables](#metadata-tables) - - [Association Tables](#association-tables) - - [Data Catalog Support](#data-catalog-support) - - [Example Usage](#example-usage) - - [Basic Connection](#basic-connection) - - [Querying Data](#querying-data) - - [Data Manipulation](#data-manipulation) - - [Notes on Filters and Limits](#notes-on-filters-and-limits) - ---- - -## About HubSpot - -HubSpot is a comprehensive CRM platform providing marketing, sales, content management, and customer service tools. This integration exposes HubSpot CRM data through MindsDB's SQL interface. - -**Official Website:** https://www.hubspot.com/products -**API Documentation:** https://developers.hubspot.com/docs/api/overview - -## Installation - -Install the handler dependencies using pip: - -```bash -pip install -r requirements.txt -``` - -**Required Dependencies:** -- `hubspot-api-client==12.0.0` - Official HubSpot Python client - -## Authentication - -The handler supports two authentication methods: - -### Personal Access Token Authentication - -Recommended for server-to-server integrations and production environments. - -**Steps to obtain an access token:** -1. Navigate to your HubSpot account settings -2. Go to Integrations -> Private Apps -3. Create a new private app or select an existing one -4. Configure required scopes for the tables you plan to access -5. Copy the generated access token - - -## Supported Tables - -### Core CRM and Engagement Tables - -These tables support `SELECT`, `INSERT`, `UPDATE`, and `DELETE` operations. - -| Table Name | Description | Reference | -|------------|-------------|-------------| -| `companies` | Company records from HubSpot CRM | https://developers.hubspot.com/docs/api-reference/crm-companies-v3/guide | -| `contacts` | Contact records from HubSpot CRM | https://developers.hubspot.com/docs/api-reference/crm-contacts-v3/guide | -| `deals` | Deal records from HubSpot CRM | https://developers.hubspot.com/docs/api-reference/crm-deals-v3/guide | -| `tickets` | Support ticket records | https://developers.hubspot.com/docs/api-reference/crm-tickets-v3/guide | -| `tasks` | Task and follow-up records | https://developers.hubspot.com/docs/api-reference/crm-tasks-v3/guide | -| `calls` | Call log records | https://developers.hubspot.com/docs/api-reference/crm-calls-v3/guide | -| `emails` | Email log records | https://developers.hubspot.com/docs/api-reference/crm-emails-v3/guide | -| `meetings` | Meeting records | https://developers.hubspot.com/docs/api-reference/crm-meetings-v3/guide | -| `notes` | Timeline notes | https://developers.hubspot.com/docs/api-reference/crm-notes-v3/guide | -| `leads` | Lead records including lead status and source | https://developers.hubspot.com/docs/api-reference/crm-leads-v3/guide | - -### Metadata Tables - -These tables are read-only and support `SELECT` only. - -| Table Name | Description | Reference | -|------------|-------------|-------------| -| `owners` | HubSpot owners with names and emails | https://developers.hubspot.com/docs/api-reference/crm-owners-v3/guide | -| `pipelines` | Deal pipelines with names and stages | https://developers.hubspot.com/docs/api-reference/crm-pipelines-v3/guide | - -### Association Tables - -Association tables are read-only and support `SELECT` only. They expose relationships between objects and include `association_type` and `association_label` columns. - - Reference: https://developers.hubspot.com/docs/api-reference/crm-associations-v4/guide - -| Table Name | Description | -|------------|-------------| -| `company_contacts` | Company to contact associations | -| `company_deals` | Company to deal associations | -| `company_tickets` | Company to ticket associations | -| `contact_companies` | Contact to company associations | -| `contact_deals` | Contact to deal associations | -| `contact_tickets` | Contact to ticket associations | -| `deal_companies` | Deal to company associations | -| `deal_contacts` | Deal to contact associations | -| `ticket_companies` | Ticket to company associations | -| `ticket_contacts` | Ticket to contact associations | -| `ticket_deals` | Ticket to deal associations | - -## Data Catalog Support - -The handler provides `SHOW TABLES` and `information_schema.columns` support for all tables. Column statistics are sampled for core CRM and engagement tables. - -## Example Usage - -### Basic Connection - -**Using Access Token:** -```sql -CREATE DATABASE hubspot_datasource -WITH ENGINE = 'hubspot', -PARAMETERS = { - "access_token": "pat-na1-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -}; -``` - -**Using OAuth (Advanced):** -```sql -CREATE DATABASE hubspot_datasource -WITH ENGINE = 'hubspot', -PARAMETERS = { - "client_id": "your-client-id", - "client_secret": "your-client-secret" -}; -``` - -### Querying Data - -**Basic Data Retrieval:** -```sql -SELECT * FROM hubspot_datasource.companies LIMIT 10; -SELECT * FROM hubspot_datasource.contacts LIMIT 10; -SELECT * FROM hubspot_datasource.deals LIMIT 10; -``` - -**Date Filters (Supported Functions):** -```sql -SELECT * FROM hubspot_datasource.deals -WHERE closedate >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR); -``` - -### Data Manipulation - -**Creating Records:** -```sql -INSERT INTO hubspot_datasource.companies (name, domain, industry, city, state) -VALUES ('Acme Corp', 'acme.com', 'COMPUTER_SOFTWARE', 'New York', 'NY'); - -INSERT INTO hubspot_datasource.contacts (email, firstname, phone) -VALUES ('john.doe@example.com', 'John', '+1234567890'); - -INSERT INTO hubspot_datasource.tasks (hs_task_subject, hs_task_status) -VALUES ('Follow up with Acme', 'WAITING'); -``` - -**Updating Records:** -```sql -UPDATE hubspot_datasource.companies -SET industry = 'COMPUTER_SOFTWARE', city = 'Austin' -WHERE name = 'Acme Corp'; - -UPDATE hubspot_datasource.deals -SET dealstage = '110382973', amount = '75000' -WHERE dealname = 'New Deal'; -``` - -**Deleting Records:** -```sql -DELETE FROM hubspot_datasource.deals -WHERE dealstage = 'closedlost' - AND createdate < '2023-01-01'; -``` - -## Notes on Filters and Limits - -- Supported filter operators include `=`, `!=`, `<`, `<=`, `>`, `>=`, `IN`, and `NOT IN`. -- Date helpers supported in filters include `CURDATE()`/`CURRENT_DATE`, `NOW()`/`CURRENT_TIMESTAMP`, `DATE_SUB`, and `DATE_ADD`. -- Updates and deletes evaluate conditions against a sample of up to 200 records before applying changes. -- Unsupported filters or order-by expressions are skipped rather than raising errors. diff --git a/mindsdb/integrations/handlers/hubspot_handler/__about__.py b/mindsdb/integrations/handlers/hubspot_handler/__about__.py deleted file mode 100644 index b8031b6aa43..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Hubspot handler" -__package_name__ = "mindsdb_hubspot_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for the Hubspot API" -__author__ = "Shivam Dhaka" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2025 - mindsdb" diff --git a/mindsdb/integrations/handlers/hubspot_handler/__init__.py b/mindsdb/integrations/handlers/hubspot_handler/__init__.py deleted file mode 100644 index 4d881147604..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - - -try: - from .hubspot_handler import HubspotHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Hubspot" -name = "hubspot" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", - "connection_args", - "connection_args_example", -] diff --git a/mindsdb/integrations/handlers/hubspot_handler/connection_args.py b/mindsdb/integrations/handlers/hubspot_handler/connection_args.py deleted file mode 100644 index 9154946b884..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/connection_args.py +++ /dev/null @@ -1,59 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import ( - HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE, -) - -connection_args = OrderedDict( - access_token={ - "type": ARG_TYPE.STR, - "description": ("The access token for the HubSpot API. Required for direct access token authentication."), - "required": False, - "label": "Access Token", - }, - client_id={ - "type": ARG_TYPE.STR, - "description": ("The client ID (consumer key) from your HubSpot app for OAuth authentication."), - "required": False, - "label": "Client ID", - }, - client_secret={ - "type": ARG_TYPE.PWD, - "description": ("The client secret (consumer secret) from your HubSpot app for OAuth authentication."), - "secret": True, - "required": False, - "label": "Client Secret", - }, - scope={ - "type": ARG_TYPE.STR, - "description": "Space-separated required OAuth scopes (scope URL param). Defaults to 'oauth'.", - "required": False, - "label": "Required Scopes", - }, - optional_scope={ - "type": ARG_TYPE.STR, - "description": "Space-separated optional OAuth scopes.", - "required": False, - "label": "Optional Scopes", - }, - redirect_uri={ - "type": ARG_TYPE.STR, - "description": ("Optional OAuth callback URI. Defaults to http://localhost:47334/verify-auth."), - "required": False, - "label": "Redirect URI", - }, - code={ - "type": ARG_TYPE.STR, - "description": "OAuth authorization code returned by HubSpot after user consent. Only used within UI flow.", - "required": False, - "label": "Authorization Code", - }, -) - -connection_args_example = OrderedDict( - access_token="your_access_token", - client_id="your_client_id", - client_secret="your_client_secret", - scopes="crm.objects.contacts.read crm.objects.companies.read", - redirect_uri="http://localhost:47334/verify-auth", -) diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_tables.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_tables.py deleted file mode 100644 index 4017789cd20..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_tables.py +++ /dev/null @@ -1,994 +0,0 @@ -""" -HubSpot Association Tables for MindsDB. - -This module provides association tables that expose the many-to-many relationships -between HubSpot CRM objects (companies, contacts, deals, tickets, etc.). - -Reference: https://developers.hubspot.com/docs/api/crm/associations -""" - -from __future__ import annotations - -from typing import Any -import pandas as pd - -from mindsdb.integrations.libs.api_handler import MetaAPIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, SortColumn -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class HubSpotAssociationTable(MetaAPIResource): - """ - Base class for HubSpot association tables. - - Association tables expose the many-to-many relationships between HubSpot objects. - They are read-only views that fetch association data from the HubSpot API. - """ - - # Subclasses must define these - FROM_OBJECT_TYPE: str = "" - TO_OBJECT_TYPE: str = "" - FROM_ID_COLUMN: str = "" - TO_ID_COLUMN: str = "" - - def get_columns(self) -> list[str]: - """Return column names for the association table.""" - return [self.FROM_ID_COLUMN, self.TO_ID_COLUMN, "association_type", "association_label"] - - def list( - self, - conditions: list[FilterCondition] | None = None, - limit: int | None = None, - sort: list[SortColumn] | None = None, - targets: list[str] | None = None, - **kwargs, - ) -> pd.DataFrame: - """Fetch associations between objects. - - When a condition on FROM_ID_COLUMN is present (eq or IN), the HubSpot - batch associations API is used so the query is O(filtered IDs) rather - than O(all objects). This makes JOIN queries like - FROM companies co - JOIN company_contacts cc ON cc.company_id = co.id - JOIN contacts c ON c.id = cc.contact_id - efficient. - """ - from_id_values = self._extract_from_id_conditions(conditions) - - if from_id_values: - associations = self._fetch_associations_by_ids(from_id_values, limit=limit) - else: - associations = self._fetch_associations(limit=limit) - - if not associations: - return pd.DataFrame(columns=self.get_columns()) - - df = pd.DataFrame(associations) - - # Apply any remaining (non-FROM_ID_COLUMN) conditions - remaining = [ - c for c in (conditions or []) if (c.column if hasattr(c, "column") else c[1]) != self.FROM_ID_COLUMN - ] - if remaining: - df = self._apply_conditions(df, remaining) - - return df - - def _extract_from_id_conditions(self, conditions: list[FilterCondition] | None) -> list[str] | None: - """Return FROM_ID_COLUMN values from eq/IN conditions and mark them applied.""" - if not conditions: - return None - for cond in conditions: - column = cond.column if hasattr(cond, "column") else cond[1] - if column != self.FROM_ID_COLUMN: - continue - op = str(cond.op.value if hasattr(cond, "op") and hasattr(cond.op, "value") else cond[0]).lower() - value = cond.value if hasattr(cond, "value") else cond[2] - if op in ("=", "==", "eq") and value is not None: - if hasattr(cond, "applied"): - cond.applied = True - return [str(value)] - if op == "in": - vals = list(value) if isinstance(value, (list, tuple, set)) else [value] - valid = [str(v) for v in vals if v is not None] - if valid: - if hasattr(cond, "applied"): - cond.applied = True - return valid - return None - - def _fetch_associations_by_ids(self, from_ids: list[str], limit: int | None = None) -> list[dict[str, Any]]: - """Use HubSpot batch associations API for specific from-object IDs.""" - from hubspot.crm.associations.models import ( - BatchInputPublicObjectId, - PublicObjectId, - ) - - hubspot = self.handler.connect() - BATCH = 100 - results: list[dict[str, Any]] = [] - - for i in range(0, len(from_ids), BATCH): - chunk = from_ids[i : i + BATCH] - try: - resp = hubspot.crm.associations.batch_api.read( - self.FROM_OBJECT_TYPE, - self.TO_OBJECT_TYPE, - BatchInputPublicObjectId(inputs=[PublicObjectId(id=fid) for fid in chunk]), - ) - for multi in resp.results or []: - from_id = str( - (multi._from or {}).get("id", "") - if isinstance(multi._from, dict) - else getattr(multi._from, "id", "") - ) - for assoc in multi.to or []: - to_id = str(assoc.get("id", "") if isinstance(assoc, dict) else getattr(assoc, "id", "")) - if not to_id: - continue - results.append( - { - self.FROM_ID_COLUMN: from_id, - self.TO_ID_COLUMN: to_id, - "association_type": None, - "association_label": None, - } - ) - if limit and len(results) >= limit: - logger.info( - f"Retrieved {len(results)} {self.FROM_OBJECT_TYPE}" - f"->{self.TO_OBJECT_TYPE} associations via batch API" - ) - return results - except Exception as e: - logger.warning( - f"Failed to batch fetch {self.FROM_OBJECT_TYPE}->{self.TO_OBJECT_TYPE} " - f"associations for chunk {chunk}: {e}" - ) - - logger.info( - f"Retrieved {len(results)} {self.FROM_OBJECT_TYPE}->{self.TO_OBJECT_TYPE} associations via batch API" - ) - return results - - def _fetch_associations(self, limit: int | None = None) -> list[dict[str, Any]]: - """ - Fetch associations by getting source objects with their associations. - - This approach fetches the source objects with associations included, - which is more efficient than making separate association API calls. - """ - hubspot = self.handler.connect() - results = [] - - try: - # Determine which API to use based on object type - if self.FROM_OBJECT_TYPE in ["companies", "contacts", "deals", "tickets"]: - source_objects = getattr(hubspot.crm, self.FROM_OBJECT_TYPE).get_all( - associations=[self.TO_OBJECT_TYPE], - limit=limit or 500, - ) - else: - source_objects = self.handler._get_objects_all( - self.FROM_OBJECT_TYPE, - associations=[self.TO_OBJECT_TYPE], - limit=limit or 500, - ) - - for obj in source_objects: - from_id = obj.id - associations = getattr(obj, "associations", None) - - if not associations: - continue - - to_objects = None - if isinstance(associations, dict): - to_objects = associations.get(self.TO_OBJECT_TYPE) - else: - to_objects = getattr(associations, self.TO_OBJECT_TYPE, None) - - if to_objects is None: - continue - - if isinstance(to_objects, dict): - to_objects = to_objects.get("results") or to_objects.get("items") or [] - elif hasattr(to_objects, "results"): - to_objects = to_objects.results or [] - elif hasattr(to_objects, "items"): - to_objects = to_objects.items or [] - - if not to_objects: - continue - - for assoc in to_objects: - if isinstance(assoc, dict): - to_id = assoc.get("id") or assoc.get("toObjectId") or assoc.get("to_object_id") - else: - to_id = ( - getattr(assoc, "id", None) - or getattr(assoc, "toObjectId", None) - or getattr(assoc, "to_object_id", None) - ) - if not to_id: - continue - - assoc_type = None - assoc_label = None - assoc_category = None - - if hasattr(assoc, "type"): - assoc_type = assoc.type - if hasattr(assoc, "category"): - assoc_category = assoc.category - elif isinstance(assoc, dict): - assoc_type = assoc.get("type") - assoc_category = assoc.get("category") - - if hasattr(assoc, "associationTypes") and assoc.associationTypes: - for at in assoc.associationTypes: - assoc_label = getattr(at, "label", assoc_label) - assoc_type = getattr(at, "typeId", assoc_type) - assoc_category = getattr(at, "category", assoc_category) - break - elif isinstance(assoc, dict): - assoc_types = assoc.get("associationTypes") or assoc.get("association_types") or [] - if assoc_types: - assoc_entry = assoc_types[0] - if isinstance(assoc_entry, dict): - assoc_label = assoc_label or assoc_entry.get("label") - assoc_type = assoc_type or assoc_entry.get("typeId") or assoc_entry.get("type") - assoc_category = assoc_category or assoc_entry.get("category") - - if not assoc_label and assoc_type is not None: - fallback_category = assoc_category or "HUBSPOT_DEFINED" - assoc_label = f"{fallback_category}:{assoc_type}" - - results.append( - { - self.FROM_ID_COLUMN: str(from_id), - self.TO_ID_COLUMN: str(to_id), - "association_type": assoc_type, - "association_label": assoc_label, - } - ) - - if limit and len(results) >= limit: - break - - logger.info(f"Retrieved {len(results)} {self.FROM_OBJECT_TYPE}->{self.TO_OBJECT_TYPE} associations") - return results - - except Exception as e: - logger.error(f"Failed to fetch associations: {str(e)}") - raise - - def _apply_conditions(self, df: pd.DataFrame, conditions: list[FilterCondition]) -> pd.DataFrame: - """Apply filter conditions to the DataFrame.""" - if df.empty: - return df - - for condition in conditions: - column = condition.column if hasattr(condition, "column") else condition[1] - op = str(condition.op if hasattr(condition, "op") else condition[0]).lower() - value = condition.value if hasattr(condition, "value") else condition[2] - - if column not in df.columns: - continue - - if op in ("=", "==", "eq"): - df = df[df[column] == str(value)] - elif op in ("!=", "<>", "neq"): - df = df[df[column] != str(value)] - elif op == "in": - values = list(value) if isinstance(value, (list, tuple, set)) else [value] - df = df[df[column].isin([str(v) for v in values])] - - return df - - def add(self, data: list[dict]) -> None: - """Create associations - not yet implemented.""" - raise NotImplementedError( - "Creating associations via INSERT is not yet supported. " - "Use the HubSpot API directly to create associations." - ) - - def modify(self, conditions: list[FilterCondition], values: dict) -> None: - """Update associations - not applicable.""" - raise NotImplementedError("Associations cannot be updated. Delete and recreate instead.") - - def remove(self, conditions: list[FilterCondition]) -> None: - """Delete associations - not yet implemented.""" - raise NotImplementedError( - "Deleting associations via DELETE is not yet supported. " - "Use the HubSpot API directly to remove associations." - ) - - -class CompanyContactsTable(HubSpotAssociationTable): - """Association table for company-contact relationships.""" - - FROM_OBJECT_TYPE = "companies" - TO_OBJECT_TYPE = "contacts" - FROM_ID_COLUMN = "company_id" - TO_ID_COLUMN = "contact_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "company_contacts", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking companies to their contacts", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "company_contacts", - "COLUMN_NAME": "company_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Company ID", - }, - { - "TABLE_NAME": "company_contacts", - "COLUMN_NAME": "contact_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Contact ID", - }, - { - "TABLE_NAME": "company_contacts", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "company_contacts", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "company_contacts", - "COLUMN_NAME": "company_id", - "REFERENCED_TABLE_NAME": "companies", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "company_contacts", - "COLUMN_NAME": "contact_id", - "REFERENCED_TABLE_NAME": "contacts", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class CompanyDealsTable(HubSpotAssociationTable): - """Association table for company-deal relationships.""" - - FROM_OBJECT_TYPE = "companies" - TO_OBJECT_TYPE = "deals" - FROM_ID_COLUMN = "company_id" - TO_ID_COLUMN = "deal_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "company_deals", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking companies to their deals", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "company_deals", - "COLUMN_NAME": "company_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Company ID", - }, - { - "TABLE_NAME": "company_deals", - "COLUMN_NAME": "deal_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Deal ID", - }, - { - "TABLE_NAME": "company_deals", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "company_deals", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "company_deals", - "COLUMN_NAME": "company_id", - "REFERENCED_TABLE_NAME": "companies", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "company_deals", - "COLUMN_NAME": "deal_id", - "REFERENCED_TABLE_NAME": "deals", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class CompanyTicketsTable(HubSpotAssociationTable): - """Association table for company-ticket relationships.""" - - FROM_OBJECT_TYPE = "companies" - TO_OBJECT_TYPE = "tickets" - FROM_ID_COLUMN = "company_id" - TO_ID_COLUMN = "ticket_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "company_tickets", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking companies to their tickets", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "company_tickets", - "COLUMN_NAME": "company_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Company ID", - }, - { - "TABLE_NAME": "company_tickets", - "COLUMN_NAME": "ticket_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Ticket ID", - }, - { - "TABLE_NAME": "company_tickets", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "company_tickets", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "company_tickets", - "COLUMN_NAME": "company_id", - "REFERENCED_TABLE_NAME": "companies", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "company_tickets", - "COLUMN_NAME": "ticket_id", - "REFERENCED_TABLE_NAME": "tickets", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class ContactCompaniesTable(HubSpotAssociationTable): - """Association table for contact-company relationships.""" - - FROM_OBJECT_TYPE = "contacts" - TO_OBJECT_TYPE = "companies" - FROM_ID_COLUMN = "contact_id" - TO_ID_COLUMN = "company_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "contact_companies", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking contacts to their companies", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "contact_companies", - "COLUMN_NAME": "contact_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Contact ID", - }, - { - "TABLE_NAME": "contact_companies", - "COLUMN_NAME": "company_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Company ID", - }, - { - "TABLE_NAME": "contact_companies", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "contact_companies", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "contact_companies", - "COLUMN_NAME": "contact_id", - "REFERENCED_TABLE_NAME": "contacts", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "contact_companies", - "COLUMN_NAME": "company_id", - "REFERENCED_TABLE_NAME": "companies", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class ContactDealsTable(HubSpotAssociationTable): - """Association table for contact-deal relationships.""" - - FROM_OBJECT_TYPE = "contacts" - TO_OBJECT_TYPE = "deals" - FROM_ID_COLUMN = "contact_id" - TO_ID_COLUMN = "deal_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "contact_deals", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking contacts to their deals", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "contact_deals", - "COLUMN_NAME": "contact_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Contact ID", - }, - { - "TABLE_NAME": "contact_deals", - "COLUMN_NAME": "deal_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Deal ID", - }, - { - "TABLE_NAME": "contact_deals", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "contact_deals", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "contact_deals", - "COLUMN_NAME": "contact_id", - "REFERENCED_TABLE_NAME": "contacts", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "contact_deals", - "COLUMN_NAME": "deal_id", - "REFERENCED_TABLE_NAME": "deals", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class ContactTicketsTable(HubSpotAssociationTable): - """Association table for contact-ticket relationships.""" - - FROM_OBJECT_TYPE = "contacts" - TO_OBJECT_TYPE = "tickets" - FROM_ID_COLUMN = "contact_id" - TO_ID_COLUMN = "ticket_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "contact_tickets", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking contacts to their tickets", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "contact_tickets", - "COLUMN_NAME": "contact_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Contact ID", - }, - { - "TABLE_NAME": "contact_tickets", - "COLUMN_NAME": "ticket_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Ticket ID", - }, - { - "TABLE_NAME": "contact_tickets", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "contact_tickets", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "contact_tickets", - "COLUMN_NAME": "contact_id", - "REFERENCED_TABLE_NAME": "contacts", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "contact_tickets", - "COLUMN_NAME": "ticket_id", - "REFERENCED_TABLE_NAME": "tickets", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class DealCompaniesTable(HubSpotAssociationTable): - """Association table for deal-company relationships.""" - - FROM_OBJECT_TYPE = "deals" - TO_OBJECT_TYPE = "companies" - FROM_ID_COLUMN = "deal_id" - TO_ID_COLUMN = "company_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "deal_companies", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking deals to their companies", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "deal_companies", - "COLUMN_NAME": "deal_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Deal ID", - }, - { - "TABLE_NAME": "deal_companies", - "COLUMN_NAME": "company_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Company ID", - }, - { - "TABLE_NAME": "deal_companies", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "deal_companies", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "deal_companies", - "COLUMN_NAME": "deal_id", - "REFERENCED_TABLE_NAME": "deals", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "deal_companies", - "COLUMN_NAME": "company_id", - "REFERENCED_TABLE_NAME": "companies", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class DealContactsTable(HubSpotAssociationTable): - """Association table for deal-contact relationships.""" - - FROM_OBJECT_TYPE = "deals" - TO_OBJECT_TYPE = "contacts" - FROM_ID_COLUMN = "deal_id" - TO_ID_COLUMN = "contact_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "deal_contacts", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking deals to their contacts", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "deal_contacts", - "COLUMN_NAME": "deal_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Deal ID", - }, - { - "TABLE_NAME": "deal_contacts", - "COLUMN_NAME": "contact_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Contact ID", - }, - { - "TABLE_NAME": "deal_contacts", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "deal_contacts", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "deal_contacts", - "COLUMN_NAME": "deal_id", - "REFERENCED_TABLE_NAME": "deals", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "deal_contacts", - "COLUMN_NAME": "contact_id", - "REFERENCED_TABLE_NAME": "contacts", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class TicketCompaniesTable(HubSpotAssociationTable): - """Association table for ticket-company relationships.""" - - FROM_OBJECT_TYPE = "tickets" - TO_OBJECT_TYPE = "companies" - FROM_ID_COLUMN = "ticket_id" - TO_ID_COLUMN = "company_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "ticket_companies", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking tickets to their companies", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "ticket_companies", - "COLUMN_NAME": "ticket_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Ticket ID", - }, - { - "TABLE_NAME": "ticket_companies", - "COLUMN_NAME": "company_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Company ID", - }, - { - "TABLE_NAME": "ticket_companies", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "ticket_companies", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "ticket_companies", - "COLUMN_NAME": "ticket_id", - "REFERENCED_TABLE_NAME": "tickets", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "ticket_companies", - "COLUMN_NAME": "company_id", - "REFERENCED_TABLE_NAME": "companies", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class TicketContactsTable(HubSpotAssociationTable): - """Association table for ticket-contact relationships.""" - - FROM_OBJECT_TYPE = "tickets" - TO_OBJECT_TYPE = "contacts" - FROM_ID_COLUMN = "ticket_id" - TO_ID_COLUMN = "contact_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "ticket_contacts", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking tickets to their contacts", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "ticket_contacts", - "COLUMN_NAME": "ticket_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Ticket ID", - }, - { - "TABLE_NAME": "ticket_contacts", - "COLUMN_NAME": "contact_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Contact ID", - }, - { - "TABLE_NAME": "ticket_contacts", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "ticket_contacts", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "ticket_contacts", - "COLUMN_NAME": "ticket_id", - "REFERENCED_TABLE_NAME": "tickets", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "ticket_contacts", - "COLUMN_NAME": "contact_id", - "REFERENCED_TABLE_NAME": "contacts", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -class TicketDealsTable(HubSpotAssociationTable): - """Association table for ticket-deal relationships.""" - - FROM_OBJECT_TYPE = "tickets" - TO_OBJECT_TYPE = "deals" - FROM_ID_COLUMN = "ticket_id" - TO_ID_COLUMN = "deal_id" - - def meta_get_tables(self, table_name: str) -> dict[str, Any]: - return { - "TABLE_NAME": "ticket_deals", - "TABLE_TYPE": "VIEW", - "TABLE_DESCRIPTION": "Association table linking tickets to their deals", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "ticket_deals", - "COLUMN_NAME": "ticket_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Ticket ID", - }, - { - "TABLE_NAME": "ticket_deals", - "COLUMN_NAME": "deal_id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Deal ID", - }, - { - "TABLE_NAME": "ticket_deals", - "COLUMN_NAME": "association_type", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association type ID", - }, - { - "TABLE_NAME": "ticket_deals", - "COLUMN_NAME": "association_label", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Association label", - }, - ] - - def meta_get_foreign_keys(self, table_name: str) -> list[dict[str, Any]]: - return [ - { - "TABLE_NAME": "ticket_deals", - "COLUMN_NAME": "ticket_id", - "REFERENCED_TABLE_NAME": "tickets", - "REFERENCED_COLUMN_NAME": "id", - }, - { - "TABLE_NAME": "ticket_deals", - "COLUMN_NAME": "deal_id", - "REFERENCED_TABLE_NAME": "deals", - "REFERENCED_COLUMN_NAME": "id", - }, - ] - - -# Export all association table classes -ASSOCIATION_TABLE_CLASSES = { - "company_contacts": CompanyContactsTable, - "company_deals": CompanyDealsTable, - "company_tickets": CompanyTicketsTable, - "contact_companies": ContactCompaniesTable, - "contact_deals": ContactDealsTable, - "contact_tickets": ContactTicketsTable, - "deal_companies": DealCompaniesTable, - "deal_contacts": DealContactsTable, - "ticket_companies": TicketCompaniesTable, - "ticket_contacts": TicketContactsTable, - "ticket_deals": TicketDealsTable, -} diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_utils.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_utils.py deleted file mode 100644 index c79ddf51df9..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/hubspot_association_utils.py +++ /dev/null @@ -1,99 +0,0 @@ -"""HubSpot association utilities for MindsDB.""" - -from typing import Any, Dict, List, Optional - - -PRIMARY_ASSOCIATIONS_CONFIG = { - "companies": [], - "contacts": [ - ("companies", "primary_company_id"), - ], - "deals": [ - ("companies", "primary_company_id"), - ("contacts", "primary_contact_id"), - ], - "tickets": [ - ("companies", "primary_company_id"), - ("contacts", "primary_contact_id"), - ("deals", "primary_deal_id"), - ], - "tasks": [ - ("contacts", "primary_contact_id"), - ("companies", "primary_company_id"), - ("deals", "primary_deal_id"), - ], - "calls": [ - ("contacts", "primary_contact_id"), - ("companies", "primary_company_id"), - ("deals", "primary_deal_id"), - ], - "emails": [ - ("contacts", "primary_contact_id"), - ("companies", "primary_company_id"), - ("deals", "primary_deal_id"), - ], - "meetings": [ - ("contacts", "primary_contact_id"), - ("companies", "primary_company_id"), - ("deals", "primary_deal_id"), - ], - "notes": [ - ("contacts", "primary_contact_id"), - ("companies", "primary_company_id"), - ("deals", "primary_deal_id"), - ], - "leads": [ - ("contacts", "primary_contact_id"), - ("companies", "primary_company_id"), - ], -} - - -def extract_primary_association(obj: Any, to_object_type: str) -> Optional[str]: - associations = getattr(obj, "associations", None) - if not associations: - return None - - if isinstance(associations, dict): - to_objects = associations.get(to_object_type, {}) - if isinstance(to_objects, dict): - results = to_objects.get("results", []) - elif isinstance(to_objects, list): - results = to_objects - else: - results = getattr(to_objects, "results", []) or [] - else: - to_objects = getattr(associations, to_object_type, None) - if to_objects is None: - return None - results = getattr(to_objects, "results", []) or [] - - if not results: - return None - - first_assoc = results[0] - if hasattr(first_assoc, "id"): - return str(first_assoc.id) - if isinstance(first_assoc, dict) and first_assoc.get("id"): - return str(first_assoc["id"]) - if hasattr(first_assoc, "toObjectId"): - return str(first_assoc.toObjectId) - - return None - - -def get_association_targets_for_object(object_type: str) -> List[str]: - config = PRIMARY_ASSOCIATIONS_CONFIG.get(object_type, []) - return [target for target, _ in config] - - -def get_primary_association_columns(object_type: str) -> List[str]: - config = PRIMARY_ASSOCIATIONS_CONFIG.get(object_type, []) - return [col_name for _, col_name in config] - - -def enrich_object_with_associations(obj: Any, object_type: str, row: Dict[str, Any]) -> Dict[str, Any]: - config = PRIMARY_ASSOCIATIONS_CONFIG.get(object_type, []) - for target_type, column_name in config: - row[column_name] = extract_primary_association(obj, target_type) - return row diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_handler.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_handler.py deleted file mode 100644 index 19a6c0244da..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/hubspot_handler.py +++ /dev/null @@ -1,1316 +0,0 @@ -from collections import Counter -from typing import Optional, List, Dict, Any, Tuple -import pandas as pd -from pandas.api import types as pd_types -from hubspot import HubSpot - -from mindsdb.integrations.handlers.hubspot_handler.hubspot_tables import ( - ContactsTable, - CompaniesTable, - DealsTable, - TicketsTable, - TasksTable, - CallsTable, - EmailsTable, - MeetingsTable, - NotesTable, - LeadsTable, - OwnersTable, - DealStagesTable, - to_hubspot_property, - to_internal_property, - HUBSPOT_TABLE_COLUMN_DEFINITIONS, -) -from mindsdb.integrations.handlers.hubspot_handler.hubspot_association_tables import ( - ASSOCIATION_TABLE_CLASSES, -) -from mindsdb.integrations.handlers.hubspot_handler.hubspot_association_utils import ( - PRIMARY_ASSOCIATIONS_CONFIG, -) -from mindsdb.integrations.libs.api_handler import MetaAPIHandler -from mindsdb.integrations.libs.passthrough import PassthroughMixin -from mindsdb.integrations.libs.passthrough_types import PassthroughRequest -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, extract_comparison_conditions - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.utilities import log -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Select, Identifier, BinaryOperation, Star -from mindsdb_sql_parser.ast import Join as SQLJoin - - -from mindsdb.integrations.handlers.hubspot_handler.hubspot_oauth import HubSpotOAuth2Manager -from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException - -logger = log.getLogger(__name__) - -# Maps (from_table, to_table) → (association_table_name, from_id_col, to_id_col) -# Used to suggest the correct association-table pattern when users write direct FK joins. -_DIRECT_JOIN_ASSOC_MAP = { - ("companies", "contacts"): ("company_contacts", "company_id", "contact_id"), - ("companies", "deals"): ("company_deals", "company_id", "deal_id"), - ("companies", "tickets"): ("company_tickets", "company_id", "ticket_id"), - ("contacts", "companies"): ("contact_companies", "contact_id", "company_id"), - ("contacts", "deals"): ("contact_deals", "contact_id", "deal_id"), - ("contacts", "tickets"): ("contact_tickets", "contact_id", "ticket_id"), - ("deals", "companies"): ("deal_companies", "deal_id", "company_id"), - ("deals", "contacts"): ("deal_contacts", "deal_id", "contact_id"), - ("tickets", "companies"): ("ticket_companies", "ticket_id", "company_id"), - ("tickets", "contacts"): ("ticket_contacts", "ticket_id", "contact_id"), - ("tickets", "deals"): ("ticket_deals", "ticket_id", "deal_id"), -} - - -def _extract_hubspot_error_message(error: Exception) -> str: - """Extract a user-friendly error message from HubSpot API exceptions.""" - error_str = str(error) - - if "403" in error_str and "MISSING_SCOPES" in error_str: - if "requiredGranularScopes" in error_str: - import json - - try: - start = error_str.find('{"status":') - if start != -1: - json_str = error_str[start : error_str.find("}", start) + 1] - error_data = json.loads(json_str) - if "errors" in error_data and len(error_data["errors"]) > 0: - context = error_data["errors"][0].get("context", {}) - scopes = context.get("requiredGranularScopes", []) - if scopes: - scopes_list = ", ".join(scopes) - return ( - f"Missing required HubSpot scopes. Your access token needs one or more of these permissions: {scopes_list}. " - f"Please update your HubSpot app scopes at https://developers.hubspot.com/ and regenerate your access token." - ) - except (json.JSONDecodeError, KeyError, IndexError): - pass - return ( - "Missing required HubSpot API permissions (scopes). " - "Please verify your access token has the necessary scopes. " - "Update scopes at https://developers.hubspot.com/" - ) - - if "401" in error_str or "Unauthorized" in error_str: - return "Invalid or expired HubSpot access token. Please regenerate your access token at https://developers.hubspot.com/" - - if "429" in error_str or "rate limit" in error_str.lower(): - return "HubSpot API rate limit exceeded. Please wait a moment and try again." - - if "ApiException" in error_str or "hubspot" in error_str.lower(): - return f"HubSpot API error: {error_str[:200]}" - - return str(error) - - -def _map_type(data_type: str) -> MYSQL_DATA_TYPE: - """Map HubSpot data types to MySQL types.""" - if data_type is None: - return MYSQL_DATA_TYPE.VARCHAR - - data_type_upper = data_type.upper() - - type_map = { - "VARCHAR": MYSQL_DATA_TYPE.VARCHAR, - "TEXT": MYSQL_DATA_TYPE.TEXT, - "INTEGER": MYSQL_DATA_TYPE.INT, - "INT": MYSQL_DATA_TYPE.INT, - "BIGINT": MYSQL_DATA_TYPE.BIGINT, - "DECIMAL": MYSQL_DATA_TYPE.DECIMAL, - "FLOAT": MYSQL_DATA_TYPE.FLOAT, - "DOUBLE": MYSQL_DATA_TYPE.DOUBLE, - "BOOLEAN": MYSQL_DATA_TYPE.BOOL, - "BOOL": MYSQL_DATA_TYPE.BOOL, - "DATE": MYSQL_DATA_TYPE.DATE, - "DATETIME": MYSQL_DATA_TYPE.DATETIME, - "TIMESTAMP": MYSQL_DATA_TYPE.DATETIME, - "TIME": MYSQL_DATA_TYPE.TIME, - } - - return type_map.get(data_type_upper, MYSQL_DATA_TYPE.VARCHAR) - - -class HubspotHandler(MetaAPIHandler, PassthroughMixin): - """Hubspot API handler implementation""" - - name = "hubspot" - - # REST passthrough — PAT (Private App Token) only. OAuth2 credentials - # (client_id/client_secret/refresh_token) are NOT supported here yet; - # that path needs an OAuthPassthroughMixin that refreshes tokens on - # demand. Passthrough with OAuth2 fails fast with a config error when - # `access_token` is missing from connection_data. - _bearer_token_arg = "access_token" - _base_url_default = "https://api.hubapi.com" - _test_request = PassthroughRequest(method="GET", path="/crm/v3/owners?limit=1") - - def __init__(self, name: str, **kwargs: Any) -> None: - """Initialize the handler.""" - super().__init__(name) - - connection_data = kwargs.get("connection_data") or {} - self.connection_data = connection_data - self.kwargs = kwargs - self.handler_storage = kwargs.get("handler_storage") - - self.connection: Optional[HubSpot] = None - self.is_connected: bool = False - self._association_tables = set(ASSOCIATION_TABLE_CLASSES.keys()) - self._non_object_tables = {"owners", "deal_stages"} - - # Register core CRM tables - self._register_table("companies", CompaniesTable(self)) - self._register_table("contacts", ContactsTable(self)) - self._register_table("deals", DealsTable(self)) - self._register_table("tickets", TicketsTable(self)) - - # Register engagement/activity tables - self._register_table("tasks", TasksTable(self)) - self._register_table("calls", CallsTable(self)) - self._register_table("emails", EmailsTable(self)) - self._register_table("meetings", MeetingsTable(self)) - self._register_table("notes", NotesTable(self)) - self._register_table("leads", LeadsTable(self)) - self._register_table("owners", OwnersTable(self)) - self._register_table("deal_stages", DealStagesTable(self)) - - for table_name, table_class in ASSOCIATION_TABLE_CLASSES.items(): - self._register_table(table_name, table_class(self)) - - def connect(self) -> HubSpot: - """Creates a new Hubspot API client if needed.""" - if self.is_connected and self.connection is not None: - return self.connection - - try: - access_token = self.connection_data.get("access_token") - client_id = self.connection_data.get("client_id") - client_secret = self.connection_data.get("client_secret") - - if access_token is not None: - if not isinstance(access_token, str) or not access_token.strip(): - raise ValueError("Invalid access_token provided") - - logger.info("Connecting to HubSpot using access token") - self.connection = HubSpot(access_token=access_token) - - elif client_id is not None or client_secret is not None: - if not client_id or not client_secret or not str(client_id).strip() or not str(client_secret).strip(): - raise ValueError("Invalid OAuth credentials provided") - logger.info("Connecting to HubSpot using OAuth credentials") - oauth_manager = HubSpotOAuth2Manager( - handler_storage=self.handler_storage, - client_id=client_id, - client_secret=client_secret, - scopes=self.connection_data.get("scope"), - optional_scopes=self.connection_data.get("optional_scope"), - redirect_uri=self.connection_data.get("redirect_uri"), - code=self.connection_data.get("code"), - datasource_name=self.name, - ) - logger.info("Attempting to obtain access token via OAuth flow") - self.connection = HubSpot(access_token=oauth_manager.get_access_token()) - - else: - raise ValueError( - "Authentication credentials missing. Provide either 'access_token' " - "or OAuth credentials: 'client_id' and 'client_secret'." - ) - - self.is_connected = True - logger.info("Successfully connected to HubSpot API") - return self.connection - - except AuthException: - self.connection = None - self.is_connected = False - logger.info("HubSpot OAuth authorization required") - raise - except Exception as e: - self.connection = None - self.is_connected = False - logger.error("Failed to connect to HubSpot API: %s", e) - raise ValueError(f"Connection to HubSpot failed: {e}") from e - - def disconnect(self) -> None: - """Close connection and cleanup resources.""" - self.connection = None - self.is_connected = False - logger.info("Disconnected from HubSpot API") - - def check_connection(self) -> StatusResponse: - """Checks whether the API client is connected to Hubspot.""" - response = StatusResponse(False) - - # Defer OAuth code-for-token exchange: CREATE DATABASE runs check_connection - # with ephemeral handler_storage, so tokens written here would be discarded; - # later requests then fail with BAD_AUTH_CODE. Exchange only when a request - if self.connection_data.get("code") and not self.is_connected: - from mindsdb.integrations.handlers.hubspot_handler.hubspot_oauth import _STORAGE_KEY - - if not self.handler_storage.encrypted_json_get(_STORAGE_KEY): - logger.info( - "Deferring HubSpot check_connection because OAuth code exchange must happen in a persistent context." - ) - response.success = True - return response - - try: - self.connect() - - if self.connection: - # Try to access contacts first (most common scope) - try: - list(self.connection.crm.contacts.get_all(limit=1)) - response.success = True - logger.info("HubSpot connection check successful (contacts accessible)") - except Exception as contacts_error: - try: - list(self.connection.crm.companies.get_all(limit=1)) - response.success = True - logger.info("HubSpot connection check successful (companies accessible)") - except Exception as companies_error: - contacts_msg = _extract_hubspot_error_message(contacts_error) - companies_msg = _extract_hubspot_error_message(companies_error) - error_msg = f"Cannot access HubSpot data. Contacts error: {contacts_msg}. Companies error: {companies_msg}" - logger.error(f"HubSpot connection check failed: {error_msg}") - response.error_message = error_msg - response.success = False - - except AuthException as error: - response.error_message = str(error) - response.redirect_url = error.auth_url - return response - except Exception as e: - error_msg = _extract_hubspot_error_message(e) - logger.error(f"HubSpot connection check failed: {error_msg}") - response.error_message = error_msg - response.success = False - - self.is_connected = response.success - return response - - def native_query(self, query: Optional[str] = None) -> Response: - """Receive and process a raw query.""" - logger.debug(f"[HubSpotHandler] native_query() called — query: {query}") - if not query: - return Response(RESPONSE_TYPE.ERROR, error_message="Query cannot be None or empty") - - try: - ast = parse_sql(query) - except Exception as e: - logger.error(f"Failed to execute native query: {str(e)}") - return Response(RESPONSE_TYPE.ERROR, error_message=f"Query execution failed: {str(e)}") - - try: - if isinstance(ast, Select) and isinstance(ast.from_table, SQLJoin): - logger.debug("[HubSpotHandler] native_query() — routing to _execute_join_query") - return self._execute_join_query(ast) - logger.debug("[HubSpotHandler] native_query() — routing to query()") - return self.query(ast) - except Exception as e: - logger.error(f"Failed to execute native query: {str(e)}") - return Response(RESPONSE_TYPE.ERROR, error_message=f"Query execution failed: {str(e)}") - - CORE_TABLES = frozenset( - {"companies", "contacts", "deals", "tickets", "tasks", "calls", "emails", "meetings", "notes"} - ) - - def get_tables(self) -> Response: - """Return list of tables available in the HubSpot integration.""" - try: - self.connect() - - tables_data = [] - all_tables = list(self._tables.keys()) - for table_name in all_tables: - try: - if table_name in self._association_tables: - table_info = { - "TABLE_SCHEMA": "hubspot", - "TABLE_NAME": table_name, - "TABLE_TYPE": "BASE TABLE", - } - tables_data.append(table_info) - continue - if table_name in self._non_object_tables: - self._tables[table_name].list(limit=1) - table_info = { - "TABLE_SCHEMA": "hubspot", - "TABLE_NAME": table_name, - "TABLE_TYPE": "BASE TABLE", - } - tables_data.append(table_info) - continue - - default_properties = self._tables[table_name].get_columns() - hubspot_properties = [ - to_hubspot_property(col) - for col in default_properties - if to_hubspot_property(col) != "hs_object_id" - ] - - # Different API paths for different object types - if table_name in ["companies", "contacts", "deals", "tickets"]: - getattr(self.connection.crm, table_name).get_all(limit=1, properties=hubspot_properties) - else: - # Engagement objects use crm.objects; fetch a single page to validate access. - self.connection.crm.objects.basic_api.get_page( - table_name, limit=1, properties=hubspot_properties - ) - - table_info = { - "TABLE_SCHEMA": "hubspot", - "TABLE_NAME": table_name, - "TABLE_TYPE": "BASE TABLE", - } - tables_data.append(table_info) - logger.info(f"Table '{table_name}' is accessible") - except Exception as access_error: - if "403" in str(access_error) or "MISSING_SCOPES" in str(access_error): - error_msg = _extract_hubspot_error_message(access_error) - logger.warning(f"Table '{table_name}' is not accessible: {error_msg}") - else: - logger.warning(f"Could not access table {table_name}: {str(access_error)}") - - if not tables_data: - error_msg = ( - "No HubSpot tables are accessible with your current access token. " - "Please ensure your token has the necessary scopes. " - "Update scopes at https://developers.hubspot.com/" - ) - logger.error(error_msg) - return Response(RESPONSE_TYPE.ERROR, error_message=error_msg) - - df = pd.DataFrame(tables_data) - logger.info(f"Retrieved metadata for {len(tables_data)} accessible table(s)") - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - except Exception as e: - error_msg = _extract_hubspot_error_message(e) - logger.error(f"Failed to get tables: {error_msg}") - return Response(RESPONSE_TYPE.ERROR, error_message=f"Failed to retrieve table list: {error_msg}") - - def get_columns(self, table_name: str) -> Response: - """Return column information for a specific table.""" - valid_tables = list(self._tables.keys()) - - if table_name not in valid_tables: - return Response( - RESPONSE_TYPE.ERROR, - error_message=f"Table '{table_name}' not found. Available tables: {', '.join(valid_tables)}", - ) - - try: - self.connect() - - discovered_columns = self._get_default_discovered_columns(table_name) - - columns_data = [] - for col in discovered_columns: - columns_data.append( - { - "COLUMN_NAME": col["column_name"], - "DATA_TYPE": col["data_type"], - "ORDINAL_POSITION": col["ordinal_position"], - "COLUMN_DEFAULT": None, - "IS_NULLABLE": "YES" - if col["is_nullable"] is True - else ("NO" if col["is_nullable"] is False else None), - "CHARACTER_MAXIMUM_LENGTH": None, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": None, - "NUMERIC_SCALE": None, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": None, - "COLLATION_NAME": None, - } - ) - - df = pd.DataFrame(columns_data) - logger.info(f"Retrieved {len(columns_data)} columns for table {table_name}") - - result = Response(RESPONSE_TYPE.TABLE, data_frame=df) - result.to_columns_table_response(map_type_fn=_map_type) - return result - - except Exception as e: - error_msg = _extract_hubspot_error_message(e) - logger.error(f"Failed to get columns for table {table_name}: {error_msg}") - return Response( - RESPONSE_TYPE.ERROR, error_message=f"Failed to retrieve columns for table '{table_name}': {error_msg}" - ) - - def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> Response: - """Return column statistics for data catalog.""" - try: - self.connect() - - all_tables = [ - name - for name in self._tables.keys() - if name not in self._association_tables and name not in self._non_object_tables - ] - if table_names: - tables_to_process = [t for t in table_names if t in all_tables] - else: - tables_to_process = all_tables - - all_statistics = [] - - for table_name in tables_to_process: - try: - table_statistics = [] - default_properties = self._tables[table_name].get_columns() - hubspot_properties = [ - to_hubspot_property(col) - for col in default_properties - if to_hubspot_property(col) != "hs_object_id" - ] - - # Get sample data based on object type - if table_name in ["companies", "contacts", "deals", "tickets"]: - sample_data = list( - getattr(self.connection.crm, table_name).get_all(limit=1000, properties=hubspot_properties) - ) - else: - sample_data = list(self._get_objects_all(table_name, limit=1000, properties=hubspot_properties)) - - if len(sample_data) > 0: - sample_size = len(sample_data) - logger.info(f"Calculating statistics from {sample_size} records for {table_name}") - - all_properties = set() - for item in sample_data: - if hasattr(item, "properties") and item.properties: - all_properties.update(item.properties.keys()) - - # Statistics for 'id' column - id_values = [item.id for item in sample_data] - id_stats = self._calculate_column_statistics("id", id_values) - table_statistics.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": "id", - "NULL_PERCENTAGE": (id_stats["null_count"] / sample_size) * 100 - if sample_size > 0 - else 0, - "DISTINCT_VALUES_COUNT": id_stats["distinct_count"], - "MINIMUM_VALUE": None, - "MAXIMUM_VALUE": None, - "MOST_COMMON_VALUES": None, - "MOST_COMMON_FREQUENCIES": None, - } - ) - - for prop_name in sorted(all_properties): - column_name = to_internal_property(prop_name) - - column_values = [] - for item in sample_data: - if hasattr(item, "properties") and item.properties: - column_values.append(item.properties.get(prop_name)) - else: - column_values.append(None) - - stats = self._calculate_column_statistics(column_name, column_values) - - most_common_values = None - most_common_frequencies = None - non_null_values = [v for v in column_values if v is not None] - if non_null_values: - value_counts = Counter(non_null_values) - top_5 = value_counts.most_common(5) - if top_5: - most_common_values = [str(v) for v, _ in top_5] - most_common_frequencies = [str(c) for _, c in top_5] - - table_statistics.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": column_name, - "NULL_PERCENTAGE": (stats["null_count"] / sample_size) * 100 - if sample_size > 0 - else 0, - "DISTINCT_VALUES_COUNT": stats["distinct_count"], - "MINIMUM_VALUE": None, - "MAXIMUM_VALUE": None, - "MOST_COMMON_VALUES": most_common_values, - "MOST_COMMON_FREQUENCIES": most_common_frequencies, - } - ) - - # Filter to only include default properties - table_statistics = [ - col - for col in table_statistics - if col["COLUMN_NAME"] in default_properties or col["COLUMN_NAME"] == "id" - ] - all_statistics.extend(table_statistics) - - except Exception as e: - logger.warning(f"Could not get statistics for table {table_name}: {str(e)}") - - df = pd.DataFrame(all_statistics) - logger.info( - f"Retrieved statistics for {len(all_statistics)} columns across {len(tables_to_process)} tables" - ) - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - except Exception as e: - logger.error(f"Failed to get column statistics: {str(e)}") - return Response(RESPONSE_TYPE.ERROR, error_message=f"Failed to retrieve column statistics: {str(e)}") - - def _get_default_discovered_columns(self, table_name: str) -> List[Dict[str, Any]]: - """Get default discovered columns when API data is unavailable.""" - if ( - table_name in self._association_tables or table_name in self._non_object_tables - ) and table_name in HUBSPOT_TABLE_COLUMN_DEFINITIONS: - base_columns = [] - ordinal_position = 1 - for col_name, data_type, description in HUBSPOT_TABLE_COLUMN_DEFINITIONS[table_name]: - base_columns.append( - { - "column_name": col_name, - "data_type": data_type, - "is_nullable": True, - "ordinal_position": ordinal_position, - "description": description, - "original_name": col_name, - } - ) - ordinal_position += 1 - return base_columns - - ordinal_position = 1 - base_columns = [ - { - "column_name": "id", - "data_type": "VARCHAR", - "is_nullable": False, - "ordinal_position": ordinal_position, - "description": "Unique identifier (Primary Key)", - "original_name": "id", - } - ] - ordinal_position += 1 - - if table_name in HUBSPOT_TABLE_COLUMN_DEFINITIONS: - for col_name, data_type, description in HUBSPOT_TABLE_COLUMN_DEFINITIONS[table_name]: - base_columns.append( - { - "column_name": col_name, - "data_type": data_type, - "is_nullable": True, - "ordinal_position": ordinal_position, - "description": description, - "original_name": col_name, - } - ) - ordinal_position += 1 - - return base_columns - - def _get_default_meta_columns(self, table_name: str) -> List[Dict[str, Any]]: - """Get default column metadata for data catalog when data is unavailable.""" - if ( - table_name in self._association_tables or table_name in self._non_object_tables - ) and table_name in HUBSPOT_TABLE_COLUMN_DEFINITIONS: - base_columns = [] - for col_name, data_type, description in HUBSPOT_TABLE_COLUMN_DEFINITIONS[table_name]: - base_columns.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": col_name, - "DATA_TYPE": data_type, - "COLUMN_DESCRIPTION": description, - "IS_NULLABLE": True, - "COLUMN_DEFAULT": None, - } - ) - return base_columns - - base_columns = [ - { - "TABLE_NAME": table_name, - "COLUMN_NAME": "id", - "DATA_TYPE": "VARCHAR", - "COLUMN_DESCRIPTION": "Unique identifier (Primary Key)", - "IS_NULLABLE": False, - "COLUMN_DEFAULT": None, - } - ] - - if table_name in HUBSPOT_TABLE_COLUMN_DEFINITIONS: - for col_name, data_type, description in HUBSPOT_TABLE_COLUMN_DEFINITIONS[table_name]: - base_columns.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": col_name, - "DATA_TYPE": data_type, - "COLUMN_DESCRIPTION": description, - "IS_NULLABLE": True, - "COLUMN_DEFAULT": None, - } - ) - - return base_columns - - def _get_table_description(self, table_name: str) -> str: - """Get description for a table.""" - descriptions = { - "companies": "HubSpot companies data including name, industry, location and other company properties", - "contacts": "HubSpot contacts data including email, name, phone and other contact properties", - "deals": "HubSpot deals data including deal name, amount, stage and other deal properties", - "tickets": "HubSpot tickets data including subject, status, priority and pipeline information", - "tasks": "HubSpot tasks data including subject, status, priority and due dates", - "calls": "HubSpot call logs including direction, duration, outcome and notes", - "emails": "HubSpot email logs including subject, direction, status and content", - "meetings": "HubSpot meeting logs including title, location, outcome and timing", - "notes": "HubSpot notes for timeline entries on records", - "company_contacts": "HubSpot company to contact associations", - "company_deals": "HubSpot company to deal associations", - "company_tickets": "HubSpot company to ticket associations", - "contact_companies": "HubSpot contact to company associations", - "contact_deals": "HubSpot contact to deal associations", - "contact_tickets": "HubSpot contact to ticket associations", - "deal_companies": "HubSpot deal to company associations", - "deal_contacts": "HubSpot deal to contact associations", - "ticket_companies": "HubSpot ticket to company associations", - "ticket_contacts": "HubSpot ticket to contact associations", - "ticket_deals": "HubSpot ticket to deal associations", - "owners": "HubSpot owners with names and emails", - "deal_stages": "HubSpot deal pipeline stages with labels", - "leads": "HubSpot leads data including lead status, source and other lead properties", - } - return descriptions.get(table_name, f"HubSpot {table_name} data") - - def _estimate_table_rows(self, table_name: str) -> Optional[int]: - """Get actual count of rows in a table using HubSpot Search API.""" - try: - if table_name in ["companies", "contacts", "deals", "tickets", "leads"]: - result = getattr(self.connection.crm, table_name).search_api.do_search( - public_object_search_request={"limit": 1} - ) - else: - result = self.connection.crm.objects.search_api.do_search( - table_name, public_object_search_request={"limit": 1} - ) - return result.total if hasattr(result, "total") else None - except Exception as e: - logger.warning(f"Could not get row count for {table_name} using search API: {str(e)}") - return None - - def _get_objects_all( - self, - object_type: str, - limit: Optional[int] = None, - properties: Optional[List[str]] = None, - **kwargs: Any, - ) -> List[Any]: - """Fetch objects with paging to honor custom limits for crm.objects.""" - results: List[Any] = [] - after = None - page_max_size = 100 - - if limit is None and "limit" in kwargs: - limit = kwargs.pop("limit") - if properties is None and "properties" in kwargs: - properties = kwargs.pop("properties") - - while True: - if limit is not None: - remaining = limit - len(results) - if remaining <= 0: - break - page_size = min(page_max_size, remaining) - else: - page_size = page_max_size - - page = self.connection.crm.objects.basic_api.get_page( - object_type, after=after, limit=page_size, properties=properties, **kwargs - ) - results.extend(page.results) - - if page.paging is None: - break - after = page.paging.next.after - - return results - - def _calculate_column_statistics(self, column_name: str, values: List[Any]) -> Dict[str, Any]: - """Calculate comprehensive statistics for a column.""" - total_count = len(values) - non_null_values = [v for v in values if v is not None] - null_count = total_count - len(non_null_values) - - stats = { - "null_count": null_count, - "distinct_count": len(set(str(v) for v in non_null_values)) if non_null_values else 0, - "min_value": None, - "max_value": None, - "average_value": None, - } - - if non_null_values: - try: - s = pd.Series(non_null_values) - if pd_types.is_numeric_dtype(s): - avg = s.mean() - stats["average_value"] = round(avg, 2) - except (ValueError, TypeError): - pass - - return stats - - def _infer_data_type_from_samples(self, values: List[Any]) -> str: - """Infer data type from multiple sample values for better accuracy.""" - non_null_values = [v for v in values if v is not None] - - if not non_null_values: - return "VARCHAR" - - type_counts = {} - for value in non_null_values[:100]: - inferred_type = self._infer_data_type(value) - type_counts[inferred_type] = type_counts.get(inferred_type, 0) + 1 - - if type_counts: - return max(type_counts.items(), key=lambda x: x[1])[0] - - return "VARCHAR" - - def _infer_data_type(self, value: Any) -> str: - """Infer SQL data type from Python value.""" - if value is None: - return "VARCHAR" - elif isinstance(value, bool): - return "BOOLEAN" - elif isinstance(value, int): - return "INTEGER" - elif isinstance(value, float): - return "DECIMAL" - elif isinstance(value, str): - if "T" in value and ("Z" in value or "+" in value): - return "TIMESTAMP" - return "VARCHAR" - else: - return "VARCHAR" - - def _rewrite_where_for_table(self, where_node: Any, table_alias: str, is_main_table: bool = False) -> Any: - """Extract WHERE conditions for a specific table alias, stripping the alias prefix. - - Returns a new WHERE AST node with aliases stripped, or None if no conditions - reference the given alias. - """ - if where_node is None: - return None - - if isinstance(where_node, BinaryOperation): - if where_node.op.lower() == "and": - left_cond = self._rewrite_where_for_table(where_node.args[0], table_alias, is_main_table) - right_cond = self._rewrite_where_for_table(where_node.args[1], table_alias, is_main_table) - if left_cond is not None and right_cond is not None: - return BinaryOperation("and", args=[left_cond, right_cond]) - return left_cond if left_cond is not None else right_cond - else: - # Leaf comparison — check if it belongs to this table - left_arg = where_node.args[0] if where_node.args else None - if isinstance(left_arg, Identifier): - ident_parts = left_arg.parts - if len(ident_parts) >= 2: - ref_alias = ident_parts[0].lower() - col_name = ident_parts[-1] - if ref_alias == table_alias.lower(): - stripped_args = [Identifier(col_name)] + list(where_node.args[1:]) - return BinaryOperation(where_node.op, args=stripped_args) - return None - elif len(ident_parts) == 1 and is_main_table: - # Unqualified condition belongs to the primary (FROM) table - return where_node - return None - - def _format_select_targets(self, targets) -> str: - """Render SELECT target list back to a SQL string fragment.""" - if not targets: - return "*" - parts = [] - for t in targets: - if isinstance(t, Star): - parts.append("*") - elif isinstance(t, Identifier): - parts.append(".".join(str(p) for p in t.parts)) - return ", ".join(parts) if parts else "*" - - def _suggest_association_query( - self, ast: Select, left_name: str, left_alias: str, right_name: str, right_alias: str - ) -> Response: - """Return a helpful error directing the user to use association tables. - - Analyses the WHERE clause to determine which table is being filtered so the - suggestion puts the filtered table first (making the join efficient). - """ - where_on_right = self._rewrite_where_for_table(ast.where, right_alias) is not None - where_on_left = self._rewrite_where_for_table(ast.where, left_alias, is_main_table=True) is not None - - # Put the filtered table first so the suggestion is efficient - if where_on_right and not where_on_left: - from_name, from_alias = right_name, right_alias - to_name, to_alias = left_name, left_alias - else: - from_name, from_alias = left_name, left_alias - to_name, to_alias = right_name, right_alias - - assoc_info = _DIRECT_JOIN_ASSOC_MAP.get((from_name, to_name)) - if assoc_info is None: - # Try reverse direction - assoc_info = _DIRECT_JOIN_ASSOC_MAP.get((to_name, from_name)) - - if assoc_info is None: - error_msg = ( - f"Direct JOINs between '{left_name}' and '{right_name}' are not supported. " - "Please use HubSpot association tables to join these objects." - ) - return Response(RESPONSE_TYPE.ERROR, error_message=error_msg) # type: ignore[arg-type] - - assoc_table, from_id_col, to_id_col = assoc_info - # 2-char alias, e.g. "cc" for company_contacts - assoc_alias = assoc_table[:2] - - col_str = self._format_select_targets(ast.targets) - where_clause = f"\nWHERE {ast.where}" if ast.where else "" - limit_clause = f"\nLIMIT {ast.limit.value}" if ast.limit else "" - - suggested = ( - f"SELECT {col_str}\n" - f"FROM `my_hubspot`.{from_name} {from_alias}\n" - f"JOIN `my_hubspot`.{assoc_table} {assoc_alias} " - f"ON {assoc_alias}.{from_id_col} = {from_alias}.id\n" - f"JOIN `my_hubspot`.{to_name} {to_alias} " - f"ON {to_alias}.id = {assoc_alias}.{to_id_col}" - f"{where_clause}" - f"{limit_clause}" - ) - - error_msg = ( - f"Direct JOINs between HubSpot objects using foreign key columns (e.g. primary_company_id) " - f"are not supported. The HubSpot API represents relationships through association tables.\n\n" - f"Please rewrite your query using the '{assoc_table}' association table:\n\n" - f"{suggested}" - ) - return Response(RESPONSE_TYPE.ERROR, error_message=error_msg) - - def _flatten_join_tree(self, from_node) -> List[Tuple[str, str, Any]]: - """Flatten nested Join AST nodes into an ordered list of (table_name, alias, on_condition).""" - - entries: List[Tuple[str, str, Any]] = [] - - def _get_alias(ident: Identifier) -> str: - alias = getattr(ident, "alias", None) - if alias is None: - return ident.parts[-1].lower() - if isinstance(alias, str): - return alias.lower() - return alias.parts[-1].lower() - - def _walk(node, join_condition=None): - if isinstance(node, SQLJoin): - _walk(node.left, None) - right_table = node.right.parts[-1].lower() - right_table_alias = _get_alias(node.right) - entries.append((right_table, right_table_alias, node.condition)) - elif isinstance(node, Identifier): - entries.append((node.parts[-1].lower(), _get_alias(node), join_condition)) - - _walk(from_node) - return entries - - def _parse_on_condition(self, on_node) -> Optional[Tuple[Optional[str], str, Optional[str], str]]: - """Parse an ON equality into (left_alias, left_col, right_alias, right_col), or None if invalid.""" - if not isinstance(on_node, BinaryOperation) or on_node.op != "=": - return None - left_ident, right_ident = on_node.args - if not (isinstance(left_ident, Identifier) and isinstance(right_ident, Identifier)): - return None - - def _split(ident): - parts = ident.parts - return (parts[0].lower(), parts[-1].lower()) if len(parts) >= 2 else (None, parts[0].lower()) - - left_alias, left_col = _split(left_ident) - right_alias, right_col = _split(right_ident) - return left_alias, left_col, right_alias, right_col - - def _execute_join_query(self, ast: Select) -> Response: - """Execute a JOIN query using the HubSpot associations API.""" - logger.debug("[HubSpotHandler] _execute_join_query() called") - - join_entries = self._flatten_join_tree(ast.from_table) - if len(join_entries) < 2 or len(join_entries) > 3: - return Response( - RESPONSE_TYPE.ERROR, error_message="Only 2- and 3-table joins via association tables are supported." - ) - - alias_map: Dict[str, str] = {alias: name for name, alias, _ in join_entries} - primary_alias = join_entries[0][1] - - if len(join_entries) == 2: - (left_table, left_alias, _), (right_table, right_alias, right_on) = join_entries - - # Reject direct core-to-core joins without an association table - if left_table in self.CORE_TABLES and right_table in self.CORE_TABLES: - return self._suggest_association_query(ast, left_table, left_alias, right_table, right_alias) - - on_parsed = self._parse_on_condition(right_on) - if not on_parsed: - return Response(RESPONSE_TYPE.ERROR, error_message="Unsupported JOIN condition.") - - on_left_alias, on_left_col, on_right_alias, on_right_col = on_parsed - if left_table in self._association_tables: - assoc_name, assoc_alias, assoc_join_col = ( - left_table, - left_alias, - on_left_col if on_left_alias == left_alias else on_right_col, - ) - core_name, core_alias, core_join_col = ( - right_table, - right_alias, - on_right_col if on_left_alias == left_alias else on_left_col, - ) - else: - assoc_name, assoc_alias, assoc_join_col = ( - right_table, - right_alias, - on_right_col if on_right_alias == right_alias else on_left_col, - ) - core_name, core_alias, core_join_col = ( - left_table, - left_alias, - on_left_col if on_right_alias == right_alias else on_right_col, - ) - - if assoc_name not in self._tables or core_name not in self._tables: - return Response(RESPONSE_TYPE.ERROR, error_message="Unknown table in JOIN.") - - row_limit = ast.limit.value if ast.limit else None - core_filter = self._rewrite_where_for_table( - ast.where, core_alias, is_main_table=(core_alias == primary_alias) - ) - assoc_filter = self._rewrite_where_for_table( - ast.where, assoc_alias, is_main_table=(assoc_alias == primary_alias) - ) - - # Fetch the filtered core table rows - core_rows = self._tables[core_name].select( - Select(targets=[Star()], from_table=Identifier(core_name), where=core_filter) - ) - if core_rows.empty or core_join_col not in core_rows.columns: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - core_id_list = core_rows[core_join_col].dropna().astype(str).tolist() - if not core_id_list: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - # Fetch association rows filtered by the core IDs - assoc_filters: List[FilterCondition] = [FilterCondition(assoc_join_col, FilterOperator.IN, core_id_list)] - if assoc_filter is not None: - try: - for filter_cond in extract_comparison_conditions(assoc_filter): - assoc_filters.append( - FilterCondition(filter_cond[1], FilterOperator(filter_cond[0].upper()), filter_cond[2]) - ) - except Exception: - pass - - assoc_rows = self._tables[assoc_name].list(conditions=assoc_filters, limit=row_limit) - if assoc_rows.empty: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - joined_df = assoc_rows.merge( - core_rows, left_on=assoc_join_col, right_on=core_join_col, how="inner", suffixes=("", f"_{core_alias}") - ) - output_df = self._resolve_select_targets(ast.targets, joined_df, alias_map, join_entries) - if row_limit: - output_df = output_df.head(row_limit) - return Response(RESPONSE_TYPE.TABLE, data_frame=output_df) - - (left_table, left_alias, _), (assoc_table, assoc_alias, left_on), (right_table, right_alias, right_on) = ( - join_entries - ) - - if assoc_table not in self._association_tables: - if left_table in self.CORE_TABLES and assoc_table in self.CORE_TABLES: - return self._suggest_association_query(ast, left_table, left_alias, assoc_table, assoc_alias) - return Response(RESPONSE_TYPE.ERROR, error_message="Only CORE JOIN ASSOC JOIN CORE pattern is supported.") - - left_on_parsed = self._parse_on_condition(left_on) - right_on_parsed = self._parse_on_condition(right_on) - if not left_on_parsed or not right_on_parsed: - return Response(RESPONSE_TYPE.ERROR, error_message="Unsupported JOIN condition — expected simple equality.") - - lop_left_alias, lop_left_col, _, lop_right_col = left_on_parsed - # left association column is the one that references the core table - left_assoc_col = lop_left_col if lop_left_alias == assoc_alias else lop_right_col - # left id column is the one that references the left core table - left_id_col = lop_left_col if lop_left_alias == left_alias else lop_right_col - - rop_left_alias, rop_left_col, _, rop_right_col = right_on_parsed - # right association column is the one that references the core table - right_assoc_col = rop_left_col if rop_left_alias == assoc_alias else rop_right_col - # right id column is the one that references the right core table - right_id_col = rop_right_col if rop_left_alias == assoc_alias else rop_left_col - - if left_table not in self._tables or assoc_table not in self._tables or right_table not in self._tables: - return Response(RESPONSE_TYPE.ERROR, error_message="Unknown table in JOIN.") - - row_limit = ast.limit.value if ast.limit else None - left_filter = self._rewrite_where_for_table(ast.where, left_alias, is_main_table=(left_alias == primary_alias)) - assoc_filter = self._rewrite_where_for_table( - ast.where, assoc_alias, is_main_table=(assoc_alias == primary_alias) - ) - right_filter = self._rewrite_where_for_table( - ast.where, right_alias, is_main_table=(right_alias == primary_alias) - ) - - left_rows = self._tables[left_table].select( - Select(targets=[Star()], from_table=Identifier(left_table), where=left_filter) - ) - if left_rows.empty or left_id_col not in left_rows.columns: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - left_id_list = left_rows[left_id_col].dropna().astype(str).tolist() - if not left_id_list: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - # Fetch association rows filtered by left-side IDs - assoc_filters: List[FilterCondition] = [FilterCondition(left_assoc_col, FilterOperator.IN, left_id_list)] - if assoc_filter is not None: - try: - for filter_cond in extract_comparison_conditions(assoc_filter): - assoc_filters.append( - FilterCondition(filter_cond[1], FilterOperator(filter_cond[0].upper()), filter_cond[2]) - ) - except Exception: - pass - - assoc_rows = self._tables[assoc_table].list(conditions=assoc_filters) - if assoc_rows.empty or right_assoc_col not in assoc_rows.columns: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - right_id_list = assoc_rows[right_assoc_col].dropna().astype(str).tolist() - if not right_id_list: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - right_filters: List[FilterCondition] = [FilterCondition(right_id_col, FilterOperator.IN, right_id_list)] - if right_filter is not None: - try: - for filter_cond in extract_comparison_conditions(right_filter): - right_filters.append( - FilterCondition(filter_cond[1], FilterOperator(filter_cond[0].upper()), filter_cond[2]) - ) - except Exception: - pass - - right_rows = self._tables[right_table].list(conditions=right_filters) - if right_rows.empty: - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - joined_df = assoc_rows.merge( - left_rows, left_on=left_assoc_col, right_on=left_id_col, how="inner", suffixes=("", f"_{left_alias}") - ) - joined_df = joined_df.merge( - right_rows, left_on=right_assoc_col, right_on=right_id_col, how="inner", suffixes=("", f"_{right_alias}") - ) - - output_df = self._resolve_select_targets(ast.targets, joined_df, alias_map, join_entries) - if row_limit: - output_df = output_df.head(row_limit) - return Response(RESPONSE_TYPE.TABLE, data_frame=output_df) - - def _resolve_select_targets( - self, - targets, - df: pd.DataFrame, - alias_map: Dict[str, str], - join_entries: List[Tuple[str, str, Any]], - ) -> pd.DataFrame: - """Resolve SELECT target list against a merged DataFrame. - - Handles qualified names (alias.col), unqualified names, and Star. - Returns a DataFrame with only the requested columns (renamed to alias.col if needed). - """ - if not targets: - return df - - selected_cols: List[str] = [] - renames: Dict[str, str] = {} - is_select_all = any(isinstance(target, Star) for target in targets) - if is_select_all: - return df - - for target in targets: - if isinstance(target, Identifier): - output_alias = getattr(target, "alias", None) - ident_parts = target.parts - if len(ident_parts) >= 2: - table_alias, col_name = ident_parts[0].lower(), ident_parts[-1] - if col_name in df.columns: - selected_cols.append(col_name) - if output_alias: - renames[col_name] = output_alias - else: - # Column may have been suffixed during merge (e.g. "id_co") - suffixed_col = f"{col_name}_{table_alias}" - if suffixed_col in df.columns: - selected_cols.append(suffixed_col) - renames[suffixed_col] = output_alias or col_name - else: - col_name = ident_parts[0] - if col_name in df.columns: - selected_cols.append(col_name) - if output_alias: - renames[col_name] = output_alias - - valid_cols = list(dict.fromkeys(c for c in selected_cols if c in df.columns)) - if valid_cols: - df = df[valid_cols] - if renames: - df = df.rename(columns=renames) - return df.reset_index(drop=True) - - def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Response: - """Return primary key metadata for the data catalog. - - Every object table has ``id`` as its PK. - Association tables have a composite PK on both ID columns. - """ - try: - self.connect() - except Exception as e: - return Response(RESPONSE_TYPE.ERROR, error_message=f"Failed to retrieve primary keys: {e}") - - all_tables = list(self._tables.keys()) - if table_names: - all_tables = [t for t in all_tables if t in table_names] - - rows: List[Dict[str, Any]] = [] - - for table_name in all_tables: - if table_name in self._association_tables: - id_cols = [c for c in self._tables[table_name].get_columns() if c.endswith("_id")] - for pos, col in enumerate(id_cols, start=1): - rows.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": col, - "ORDINAL_POSITION": pos, - "CONSTRAINT_NAME": f"pk_{table_name}", - } - ) - elif table_name == "deal_stages": - for pos, col in enumerate(["pipeline_id", "stage_id"], start=1): - rows.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": col, - "ORDINAL_POSITION": pos, - "CONSTRAINT_NAME": f"pk_{table_name}", - } - ) - else: - rows.append( - { - "TABLE_NAME": table_name, - "COLUMN_NAME": "id", - "ORDINAL_POSITION": 1, - "CONSTRAINT_NAME": f"pk_{table_name}", - } - ) - - df = ( - pd.DataFrame(rows) - if rows - else pd.DataFrame(columns=["TABLE_NAME", "COLUMN_NAME", "ORDINAL_POSITION", "CONSTRAINT_NAME"]) - ) - return Response(RESPONSE_TYPE.TABLE, data_frame=df) - - def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Response: - """Return foreign key metadata for the data catalog. - - Exposes two sets of relationships so the agent can generate correct JOINs: - - 1. Association table FKs — e.g. company_contacts.company_id → companies.id - 2. Object-table primary_*_id FKs — e.g. contacts.primary_company_id → companies.id - """ - try: - self.connect() - except Exception as e: - return Response(RESPONSE_TYPE.ERROR, error_message=f"Failed to retrieve foreign keys: {e}") - - _ASSOC_TARGET_TO_TABLE = { - "companies": "companies", - "contacts": "contacts", - "deals": "deals", - "tickets": "tickets", - } - - all_tables = set(self._tables.keys()) - if table_names: - all_tables = set(table_names).intersection(all_tables) - - rows: List[Dict[str, Any]] = [] - - # 1. Association table FKs — aggregated from each table's meta_get_foreign_keys() - for table_name in sorted(all_tables): - if table_name not in self._association_tables: - continue - table_obj = self._tables[table_name] - if hasattr(table_obj, "meta_get_foreign_keys"): - for fk in table_obj.meta_get_foreign_keys(table_name): - col = fk.get("COLUMN_NAME") - rows.append( - { - "CHILD_TABLE_NAME": fk.get("TABLE_NAME", table_name), - "CHILD_COLUMN_NAME": col, - "PARENT_TABLE_NAME": fk.get("REFERENCED_TABLE_NAME"), - "PARENT_COLUMN_NAME": fk.get("REFERENCED_COLUMN_NAME", "id"), - "CONSTRAINT_NAME": f"fk_{table_name}_{col}", - } - ) - - for table_name in sorted(all_tables): - if table_name in self._association_tables or table_name in self._non_object_tables: - continue - for target_type, column_name in PRIMARY_ASSOCIATIONS_CONFIG.get(table_name, []): - parent_table = _ASSOC_TARGET_TO_TABLE.get(target_type) - if parent_table is None: - continue - rows.append( - { - "CHILD_TABLE_NAME": table_name, - "CHILD_COLUMN_NAME": column_name, - "PARENT_TABLE_NAME": parent_table, - "PARENT_COLUMN_NAME": "id", - "CONSTRAINT_NAME": f"fk_{table_name}_{column_name}", - } - ) - - df = ( - pd.DataFrame(rows) - if rows - else pd.DataFrame( - columns=[ - "CHILD_TABLE_NAME", - "CHILD_COLUMN_NAME", - "PARENT_TABLE_NAME", - "PARENT_COLUMN_NAME", - "CONSTRAINT_NAME", - ] - ) - ) - return Response(RESPONSE_TYPE.TABLE, data_frame=df) diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_oauth.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_oauth.py deleted file mode 100644 index 9608055edad..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/hubspot_oauth.py +++ /dev/null @@ -1,151 +0,0 @@ -import time -import urllib.parse -from typing import Optional - -from flask import request -from hubspot import HubSpot -from hubspot.utils.oauth import get_auth_url - -from mindsdb.utilities import log -from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException - -logger = log.getLogger(__name__) - -_STORAGE_KEY = "hubspot_oauth_tokens" -_DEFAULT_REDIRECT_PATH = "/verify-auth" -_TOKEN_EXPIRY_BUFFER = 0.95 - - -class HubSpotOAuth2Manager: - """ - Manages HubSpot OAuth2 authorization_code flow for MindsDB. - """ - - def __init__( - self, - handler_storage, - client_id: str, - client_secret: str, - scopes: Optional[str] = None, - optional_scopes: Optional[str] = None, - redirect_uri: Optional[str] = None, - code: Optional[str] = None, - datasource_name: Optional[str] = None, - ) -> None: - self.handler_storage = handler_storage - self.client_id = client_id - self.client_secret = client_secret - self.scopes = tuple(scopes.split()) if scopes else ("oauth",) - self.optional_scopes = tuple(optional_scopes.split()) if optional_scopes else None - self.redirect_uri = redirect_uri - self.code = code - self.datasource_name = datasource_name - - def get_access_token(self) -> str: - """ - Return a valid HubSpot access token. - Raises: - AuthException: User authorization required; auth_url is attached. - """ - stored = self.handler_storage.encrypted_json_get(_STORAGE_KEY) - logger.debug(f"Retrieved stored token data: {stored}") - if stored: - if time.time() < stored.get("expires_at", 0): - return stored["access_token"] - - if stored.get("refresh_token"): - try: - return self._refresh_token(stored["refresh_token"]) - except Exception as e: - logger.warning("HubSpot token refresh failed, reauthorization required: %s", e) - - runtime_code = self._get_runtime_code() - if runtime_code: - try: - return self._exchange_code(runtime_code) - except Exception as e: - # OAuth codes are single-use and expire quickly. - # If the exchange fails (BAD_AUTH_CODE), don't retry — prompt re-authorization. - logger.warning("HubSpot code exchange failed (code may be expired/used): %s", e) - - redirect_uri = self._get_redirect_uri() - auth_url = get_auth_url( - scope=self.scopes, - optional_scope=self.optional_scopes, - client_id=self.client_id, - redirect_uri=redirect_uri, - ) - # Fix for HubSpot's strict URL parsing. Python's URL encode translates spaces to `+`, but - # HubSpot's optional_scopes requires `%20` or `,`. - auth_url = auth_url.replace("+", "%20") - - # Append state with datasource info so the frontend can complete the connection - # even when localStorage context is missing (e.g. script-initiated flows). - if self.datasource_name: - state_data = urllib.parse.urlencode( - { - "datasource_name": self.datasource_name, - "integrations_name": "hubspot", - "client_id": self.client_id, - "client_secret": self.client_secret, - "redirect_uri": redirect_uri, - "scope": " ".join(self.scopes) if self.scopes else "oauth", - "optional_scope": " ".join(self.optional_scopes) if self.optional_scopes else "", - } - ) - auth_url += f"&state={urllib.parse.quote(state_data)}" - - raise AuthException( - f"HubSpot authorization required. Please visit: {auth_url}", - auth_url=auth_url, - ) - - def _get_runtime_code(self) -> Optional[str]: - """Return the OAuth authorization code from explicit value or active request context.""" - if self.code: - return self.code - try: - return request.args.get("code") - except RuntimeError: - return None - - def _exchange_code(self, code: str) -> str: - """Exchange an authorization code for access and refresh tokens.""" - response = HubSpot().oauth.tokens_api.create( - grant_type="authorization_code", - code=code, - redirect_uri=self._get_redirect_uri(), - client_id=self.client_id, - client_secret=self.client_secret, - ) - return self._persist_tokens(response) - - def _refresh_token(self, refresh_token: str) -> str: - """Obtain a new access token using the stored refresh token.""" - response = HubSpot().oauth.tokens_api.create( - grant_type="refresh_token", - refresh_token=refresh_token, - redirect_uri=self._get_redirect_uri(), - client_id=self.client_id, - client_secret=self.client_secret, - ) - return self._persist_tokens(response) - - def _persist_tokens(self, token_response) -> str: - """Save token data to encrypted handler storage and return the access token.""" - tokens = { - "access_token": token_response.access_token, - "refresh_token": token_response.refresh_token, - "expires_at": time.time() + token_response.expires_in * _TOKEN_EXPIRY_BUFFER, - } - self.handler_storage.encrypted_json_set(_STORAGE_KEY, tokens) - return tokens["access_token"] - - def _get_redirect_uri(self) -> str: - if self.redirect_uri: - return self.redirect_uri - try: - origin = request.headers.get("ORIGIN", "http://localhost:47334") - except RuntimeError: - origin = "http://localhost:47334" - return origin + _DEFAULT_REDIRECT_PATH diff --git a/mindsdb/integrations/handlers/hubspot_handler/hubspot_tables.py b/mindsdb/integrations/handlers/hubspot_handler/hubspot_tables.py deleted file mode 100644 index ccd9c2b8e34..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/hubspot_tables.py +++ /dev/null @@ -1,4557 +0,0 @@ -from typing import List, Dict, Any, Optional, Tuple, Set, Iterable -import calendar -import re -from datetime import date, datetime, time, timedelta - -import pandas as pd -from hubspot import HubSpot -from hubspot.crm.associations.models import BatchInputPublicObjectId, PublicObjectId -from hubspot.crm.contacts.models import ( - BatchReadInputSimplePublicObjectId, - SimplePublicObjectId as ContactObjectId, -) -from hubspot.crm.objects import ( - SimplePublicObjectId as HubSpotObjectId, - SimplePublicObjectBatchInput as HubSpotObjectBatchInput, - SimplePublicObjectInputForCreate as HubSpotObjectInputCreate, - BatchInputSimplePublicObjectBatchInputForCreate, - BatchInputSimplePublicObjectBatchInput, - BatchInputSimplePublicObjectId, -) -from mindsdb_sql_parser import ast as sql_ast -from mindsdb_sql_parser.ast import ASTNode - -from mindsdb.integrations.utilities.handlers.query_utilities import UPDATEQueryExecutor, DELETEQueryExecutor -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.integrations.libs.api_handler import APIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, SortColumn, extract_comparison_conditions -from mindsdb.utilities import log -from mindsdb.integrations.handlers.hubspot_handler.hubspot_association_utils import ( - get_association_targets_for_object, - get_primary_association_columns, - enrich_object_with_associations, -) - -logger = log.getLogger(__name__) - - -# Reference: https://developers.hubspot.com/docs/api-reference/crm-properties-v3/guide#create-unique-identifier-properties -PROPERTY_ALIASES = { - "lastmodifieddate": "hs_lastmodifieddate", - "id": "hs_object_id", -} - -REVERSE_PROPERTY_ALIASES = {value: key for key, value in PROPERTY_ALIASES.items()} - - -def to_hubspot_property(col: str) -> str: - """Map internal column names to HubSpot property names.""" - return PROPERTY_ALIASES.get(col, col) - - -def to_internal_property(prop: str) -> str: - """Map HubSpot property names to internal column names.""" - return REVERSE_PROPERTY_ALIASES.get(prop, prop) - - -# Reference https://developers.hubspot.com/docs/api-reference/crm-properties-v3/guide#operators -CANONICAL_OPERATOR_MAP = { - "=": "eq", - "==": "eq", - "eq": "eq", - "!=": "neq", - "<>": "neq", - "ne": "neq", - "neq": "neq", - "<": "lt", - "lt": "lt", - "<=": "lte", - "lte": "lte", - ">": "gt", - "gt": "gt", - ">=": "gte", - "gte": "gte", - "in": "in", - "not in": "not_in", - "not_in": "not_in", -} - -CANONICAL_TOKENS = set(CANONICAL_OPERATOR_MAP.values()) - -OPERATOR_MAP = {token: token.upper() for token in CANONICAL_TOKENS} - -SQL_OPERATOR_MAP = { - "eq": "=", - "neq": "!=", - "lt": "<", - "lte": "<=", - "gt": ">", - "gte": ">=", - "in": "in", - "not_in": "not in", -} - - -def canonical_op(op: Any) -> str: - """Normalize operators to canonical tokens used across search and post-filtering.""" - if hasattr(op, "value"): - op = op.value - op_str = str(op).strip().lower() - return CANONICAL_OPERATOR_MAP.get(op_str, op_str) - - -def _parse_interval_value(interval_expr: Any) -> Optional[Tuple[float, str]]: - if interval_expr is None: - return None - - raw = None - if isinstance(interval_expr, sql_ast.Interval): - args = getattr(interval_expr, "args", []) or [] - if len(args) >= 2: - value = args[0].value if isinstance(args[0], sql_ast.Constant) else str(args[0]) - unit = args[1].value if isinstance(args[1], sql_ast.Constant) else str(args[1]) - raw = f"{value} {unit}" - elif len(args) == 1: - raw = args[0].value if isinstance(args[0], sql_ast.Constant) else str(args[0]) - elif isinstance(interval_expr, sql_ast.Constant): - raw = interval_expr.value - elif isinstance(interval_expr, sql_ast.UnaryOperation): - op = getattr(interval_expr, "op", None) - if op == "-" and interval_expr.args: - parsed = _parse_interval_value(interval_expr.args[0]) - if parsed is None: - return None - value, unit = parsed - return (-value, unit) - else: - raw = str(interval_expr) - - if raw is None: - return None - - match = re.search(r"(?i)interval\\s+'?([0-9]+(?:\\.[0-9]+)?)'?\\s+([a-zA-Z]+)", str(raw)) - if not match: - match = re.search(r"(?i)^\\s*'?([0-9]+(?:\\.[0-9]+)?)'?\\s+([a-zA-Z]+)\\s*$", str(raw)) - if not match: - return None - - value = float(match.group(1)) - unit = match.group(2).lower() - if unit.endswith("s"): - unit = unit[:-1] - return value, unit - - -def _add_months(dt_value: Any, months: int) -> Any: - if not isinstance(dt_value, (date, datetime)): - return dt_value - year = dt_value.year + (dt_value.month - 1 + months) // 12 - month = (dt_value.month - 1 + months) % 12 + 1 - day = min(dt_value.day, calendar.monthrange(year, month)[1]) - if isinstance(dt_value, datetime): - return dt_value.replace(year=year, month=month, day=day) - return dt_value.replace(year=year, month=month, day=day) - - -def _apply_interval(base: Any, interval: Tuple[float, str]) -> Any: - value, unit = interval - if not isinstance(base, (date, datetime)): - return base - - if unit == "year": - return _add_months(base, int(round(value * 12))) - if unit == "month": - return _add_months(base, int(round(value))) - - if unit in {"day", "hour", "minute", "second", "week"}: - if isinstance(base, date) and not isinstance(base, datetime) and unit in {"hour", "minute", "second"}: - base = datetime.combine(base, time.min) - - seconds = value - if unit == "week": - seconds = value * 7 - unit = "day" - if unit == "day": - return base + timedelta(days=seconds) - if unit == "hour": - return base + timedelta(hours=seconds) - if unit == "minute": - return base + timedelta(minutes=seconds) - if unit == "second": - return base + timedelta(seconds=seconds) - - return base - - -def _evaluate_function_value(node: sql_ast.Function) -> Optional[Any]: - func = getattr(node, "op", None) or getattr(node, "name", None) - if not func: - return None - - func = str(func).lower() - if func in {"curdate", "current_date"}: - return date.today() - if func in {"now", "current_timestamp"}: - return datetime.now() - if func in {"date_sub", "date_add"} and len(node.args) == 2: - base = _evaluate_value_node(node.args[0]) - interval = _parse_interval_value(node.args[1]) - if base is None or interval is None: - return None - if func == "date_sub": - interval = (-interval[0], interval[1]) - return _apply_interval(base, interval) - - return None - - -def _evaluate_value_node(node: ASTNode) -> Optional[Any]: - if isinstance(node, sql_ast.Constant): - return node.value - if isinstance(node, sql_ast.Identifier): - ident = node.parts[-1].lower() if node.parts else "" - if ident in {"curdate", "current_date"}: - return date.today() - if ident in {"now", "current_timestamp"}: - return datetime.now() - return None - if isinstance(node, sql_ast.Interval): - return _parse_interval_value(node) - if isinstance(node, sql_ast.Tuple): - return [item.value if isinstance(item, sql_ast.Constant) else _evaluate_value_node(item) for item in node.items] - if isinstance(node, sql_ast.Function): - return _evaluate_function_value(node) - if isinstance(node, sql_ast.UnaryOperation): - op = getattr(node, "op", None) - if op == "-" and node.args: - value = _evaluate_value_node(node.args[0]) - if value is None: - return None - if isinstance(value, (int, float)): - return -value - if isinstance(value, tuple) and len(value) == 2: - return (-value[0], value[1]) - return None - if isinstance(node, sql_ast.BinaryOperation): - op = getattr(node, "op", None) - if not op or len(node.args) != 2: - return None - left = _evaluate_value_node(node.args[0]) - right = _evaluate_value_node(node.args[1]) - if left is None or right is None: - return None - op = op.lower() - if op in {"+", "-"}: - if isinstance(left, (date, datetime)) and isinstance(right, tuple): - interval = right - if op == "-": - interval = (-interval[0], interval[1]) - return _apply_interval(left, interval) - if isinstance(right, (date, datetime)) and isinstance(left, tuple) and op == "+": - return _apply_interval(right, left) - if isinstance(left, (int, float)) and isinstance(right, (int, float)): - return left + right if op == "+" else left - right - if op == "*" and isinstance(left, (int, float)) and isinstance(right, (int, float)): - return left * right - if op == "/" and isinstance(left, (int, float)) and isinstance(right, (int, float)) and right != 0: - return left / right - return None - - -def _extract_comparison_conditions_with_functions(binary_op: ASTNode) -> List[List[Any]]: - conditions: List[List[Any]] = [] - - def _extract_identifier(node: ASTNode) -> Optional[sql_ast.Identifier]: - if isinstance(node, sql_ast.Identifier): - return node - if isinstance(node, sql_ast.Function): - func = getattr(node, "op", None) or getattr(node, "name", None) - if func and str(func).lower() in {"lower", "upper"} and node.args: - if isinstance(node.args[0], sql_ast.Identifier): - return node.args[0] - return None - - def _invert_comparison(op: str) -> Optional[str]: - inverse_ops = { - "<": ">", - "<=": ">=", - ">": "<", - ">=": "<=", - "lt": "gt", - "lte": "gte", - "gt": "lt", - "gte": "lte", - } - if op in inverse_ops: - return inverse_ops[op] - if op in {"=", "==", "eq", "!=", "<>", "neq"}: - return op - return None - - def _extract(node: ASTNode, **kwargs): - if isinstance(node, sql_ast.BinaryOperation): - op = node.op.lower() - if op == "and": - return - - arg1, arg2 = node.args - identifier = _extract_identifier(arg1) - if identifier is None: - identifier = _extract_identifier(arg2) - if identifier is None: - logger.debug(f"Skipping unsupported condition arg1: {arg1}") - return - value = _evaluate_value_node(arg1) - if value is None: - logger.debug(f"Skipping unsupported condition arg1: {arg1}") - return - inverted_op = _invert_comparison(op) - if inverted_op is None: - logger.debug(f"Skipping unsupported condition op swap: {op}") - return - conditions.append([inverted_op, identifier.parts[-1], value]) - return - - value = _evaluate_value_node(arg2) - if value is None: - logger.debug(f"Skipping unsupported condition arg2: {arg2}") - return - - conditions.append([op, identifier.parts[-1], value]) - if isinstance(node, sql_ast.BetweenOperation): - var, up, down = node.args - if not isinstance(var, sql_ast.Identifier): - logger.debug(f"Skipping unsupported between condition: {node}") - return - - up_value = _evaluate_value_node(up) - down_value = _evaluate_value_node(down) - if up_value is None or down_value is None: - logger.debug(f"Skipping unsupported between condition: {node}") - return - - op = node.op.lower() - conditions.append([op, var.parts[-1], (up_value, down_value)]) - - query_traversal(binary_op, _extract) - return conditions - - -HUBSPOT_TABLE_COLUMN_DEFINITIONS: Dict[str, List[Tuple[str, str, str]]] = { - "companies": [ - ("name", "VARCHAR", "Company name"), - ("domain", "VARCHAR", "Company domain"), - ("industry", "VARCHAR", "Industry"), - ("city", "VARCHAR", "City"), - ("state", "VARCHAR", "State"), - ("phone", "VARCHAR", "Phone number"), - ("website", "VARCHAR", "Company website URL"), - ("address", "VARCHAR", "Street address"), - ("zip", "VARCHAR", "Postal code"), - ("numberofemployees", "INTEGER", "Employee count"), - ("annualrevenue", "DECIMAL", "Annual revenue"), - ("lifecyclestage", "VARCHAR", "Lifecycle stage"), - ("current_erp", "VARCHAR", "Current ERP system"), - ("current_erp_version", "VARCHAR", "Current ERP version"), - ("current_web_platform", "VARCHAR", "Current web platform"), - ("accounting_software", "VARCHAR", "Accounting software"), - ("credit_card_processor", "VARCHAR", "Credit card processor"), - ("data_integration_platform", "VARCHAR", "Data integration platform"), - ("marketing_platform", "VARCHAR", "Marketing automation platform"), - ("pos_software", "VARCHAR", "POS software"), - ("shipping_software", "VARCHAR", "Shipping software"), - ("tax_platform", "VARCHAR", "Tax platform"), - ("partner", "BOOLEAN", "Partner flag"), - ("partner_type", "VARCHAR", "Partner type"), - ("partnership_status", "VARCHAR", "Partnership status"), - ("partner_payout_ytd", "DECIMAL", "Partner payout YTD"), - ("partnership_commission", "DECIMAL", "Partnership commission YTD"), - ("total_customer_value", "DECIMAL", "Total customer value"), - ("total_revenue", "DECIMAL", "Total revenue"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - "contacts": [ - ("email", "VARCHAR", "Email address"), - ("firstname", "VARCHAR", "First name"), - ("lastname", "VARCHAR", "Last name"), - ("phone", "VARCHAR", "Phone number"), - ("mobilephone", "VARCHAR", "Mobile phone number"), - ("jobtitle", "VARCHAR", "Job title"), - ("company", "VARCHAR", "Associated company"), - ("city", "VARCHAR", "City"), - ("website", "VARCHAR", "Website URL"), - ("lifecyclestage", "VARCHAR", "Lifecycle stage"), - ("hs_lead_status", "VARCHAR", "Lead status"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("dc_contact", "BOOLEAN", "Direct Commerce contact indicator"), - ("current_ecommerce_platform", "VARCHAR", "Current ecommerce platform"), - ("departments", "VARCHAR", "Departments"), - ("demo__requested", "BOOLEAN", "Demo requested flag"), - ("linkedin_url", "VARCHAR", "LinkedIn profile URL"), - ("referral_name", "VARCHAR", "Referral name"), - ("referral_company_name", "VARCHAR", "Referral company name"), - ("notes_last_contacted", "TIMESTAMP", "Last contacted timestamp"), - ("notes_last_updated", "TIMESTAMP", "Last activity updated timestamp"), - ("notes_next_activity_date", "TIMESTAMP", "Next activity date"), - ("num_contacted_notes", "INTEGER", "Number of contacted notes"), - ("hs_sales_email_last_clicked", "TIMESTAMP", "Last sales email clicked"), - ("hs_sales_email_last_opened", "TIMESTAMP", "Last sales email opened"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - "deals": [ - ("dealname", "VARCHAR", "Deal name"), - ("amount", "DECIMAL", "Deal amount"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("dealstage", "VARCHAR", "Deal stage"), - ("dealstage_label", "VARCHAR", "Deal stage label"), - ("pipeline", "VARCHAR", "Sales pipeline"), - ("pipeline_label", "VARCHAR", "Pipeline label"), - ("closedate", "DATE", "Expected close date"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("owner_name", "VARCHAR", "Owner name"), - ("owner_email", "VARCHAR", "Owner email"), - ("closed_won_reason", "VARCHAR", "Reason deal was won"), - ("closed_lost_reason", "VARCHAR", "Reason deal was lost"), - ("lead_attribution", "VARCHAR", "Lead attribution"), - ("services_requested", "VARCHAR", "Services requested"), - ("platform", "VARCHAR", "Platform"), - ("referral_partner", "VARCHAR", "Referral partner"), - ("referral_commission_amount", "DECIMAL", "Referral commission amount"), - ("tech_partners_involved", "VARCHAR", "Tech partners involved"), - ("sales_tier", "VARCHAR", "Sales tier"), - ("commission_status", "VARCHAR", "Commission status"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - "tickets": [ - ("subject", "VARCHAR", "Ticket subject"), - ("content", "TEXT", "Ticket content/description"), - ("hs_pipeline", "VARCHAR", "Pipeline"), - ("hs_pipeline_stage", "VARCHAR", "Pipeline stage"), - ("hs_ticket_priority", "VARCHAR", "Priority"), - ("hs_ticket_category", "VARCHAR", "Category"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("primary_deal_id", "VARCHAR", "Primary associated deal ID"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - # Association tables definitions for many-to-many relationships - # Reference: https://developers.hubspot.com/docs/api-reference/crm-associations-v4/guide#associate-records-without-a-label - "company_contacts": [ - ("company_id", "VARCHAR", "Company ID"), - ("contact_id", "VARCHAR", "Contact ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "company_deals": [ - ("company_id", "VARCHAR", "Company ID"), - ("deal_id", "VARCHAR", "Deal ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "company_tickets": [ - ("company_id", "VARCHAR", "Company ID"), - ("ticket_id", "VARCHAR", "Ticket ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "contact_companies": [ - ("contact_id", "VARCHAR", "Contact ID"), - ("company_id", "VARCHAR", "Company ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "contact_deals": [ - ("contact_id", "VARCHAR", "Contact ID"), - ("deal_id", "VARCHAR", "Deal ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "contact_tickets": [ - ("contact_id", "VARCHAR", "Contact ID"), - ("ticket_id", "VARCHAR", "Ticket ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "deal_companies": [ - ("deal_id", "VARCHAR", "Deal ID"), - ("company_id", "VARCHAR", "Company ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "deal_contacts": [ - ("deal_id", "VARCHAR", "Deal ID"), - ("contact_id", "VARCHAR", "Contact ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "ticket_companies": [ - ("ticket_id", "VARCHAR", "Ticket ID"), - ("company_id", "VARCHAR", "Company ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "ticket_contacts": [ - ("ticket_id", "VARCHAR", "Ticket ID"), - ("contact_id", "VARCHAR", "Contact ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - "ticket_deals": [ - ("ticket_id", "VARCHAR", "Ticket ID"), - ("deal_id", "VARCHAR", "Deal ID"), - ("association_type", "VARCHAR", "Association type"), - ("association_label", "VARCHAR", "Association label"), - ], - # Reference: https://developers.hubspot.com/docs/api-reference/crm-tasks-v3/guide - "tasks": [ - ("hs_task_subject", "VARCHAR", "Task subject"), - ("hs_task_body", "TEXT", "Task body/description"), - ("hs_task_status", "VARCHAR", "Task status"), - ("hs_task_priority", "VARCHAR", "Task priority"), - ("hs_task_type", "VARCHAR", "Task type"), - ("hs_timestamp", "TIMESTAMP", "Due date"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("primary_deal_id", "VARCHAR", "Primary associated deal ID"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - # Reference: https://developers.hubspot.com/docs/api-reference/crm-calls-v3/guide - "calls": [ - ("hs_call_title", "VARCHAR", "Call title"), - ("hs_call_body", "TEXT", "Call notes/description"), - ("hs_call_direction", "VARCHAR", "Call direction (INBOUND/OUTBOUND)"), - ("hs_call_disposition", "VARCHAR", "Call outcome"), - ("hs_call_duration", "INTEGER", "Call duration in milliseconds"), - ("hs_call_status", "VARCHAR", "Call status"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("hs_timestamp", "TIMESTAMP", "Call timestamp"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("primary_deal_id", "VARCHAR", "Primary associated deal ID"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - # Reference: https://developers.hubspot.com/docs/api-reference/crm-emails-v3/guide - "emails": [ - ("hs_email_subject", "VARCHAR", "Email subject"), - ("hs_email_text", "TEXT", "Email body text"), - ("hs_email_direction", "VARCHAR", "Email direction (INCOMING/FORWARDED/EMAIL)"), - ("hs_email_status", "VARCHAR", "Email status"), - ("hs_email_sender_email", "VARCHAR", "Sender email address"), - ("hs_email_to_email", "VARCHAR", "Recipient email address"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("hs_timestamp", "TIMESTAMP", "Email timestamp"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("primary_deal_id", "VARCHAR", "Primary associated deal ID"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - # Reference: https://developers.hubspot.com/docs/api-reference/crm-meetings-v3/guide - "meetings": [ - ("hs_meeting_title", "VARCHAR", "Meeting title"), - ("hs_meeting_body", "TEXT", "Meeting description"), - ("hs_meeting_location", "VARCHAR", "Meeting location"), - ("hs_meeting_outcome", "VARCHAR", "Meeting outcome"), - ("hs_meeting_start_time", "TIMESTAMP", "Meeting start time"), - ("hs_meeting_end_time", "TIMESTAMP", "Meeting end time"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("hs_timestamp", "TIMESTAMP", "Meeting timestamp"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("primary_deal_id", "VARCHAR", "Primary associated deal ID"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - # Reference: https://developers.hubspot.com/docs/api-reference/crm-notes-v3/guide - "notes": [ - ("hs_note_body", "TEXT", "Note content"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("hs_timestamp", "TIMESTAMP", "Note timestamp"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("primary_deal_id", "VARCHAR", "Primary associated deal ID"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], - # Reference: https://developers.hubspot.com/docs/api-reference/crm-crm-owners-v3/guide#crm-api-owners - "owners": [ - ("id", "VARCHAR", "Owner ID"), - ("email", "VARCHAR", "Owner email"), - ("first_name", "VARCHAR", "First name"), - ("last_name", "VARCHAR", "Last name"), - ("full_name", "VARCHAR", "Full name"), - ("user_id", "VARCHAR", "User ID"), - ("teams", "TEXT", "Teams"), - ("created_at", "TIMESTAMP", "Created at"), - ("updated_at", "TIMESTAMP", "Updated at"), - ("archived", "BOOLEAN", "Archived"), - ], - "deal_stages": [ - ("pipeline_id", "VARCHAR", "Pipeline ID"), - ("pipeline_label", "VARCHAR", "Pipeline label"), - ("stage_id", "VARCHAR", "Stage ID"), - ("stage_label", "VARCHAR", "Stage label"), - ("stage_order", "INTEGER", "Stage display order"), - ("stage_probability", "DECIMAL", "Stage probability"), - ("stage_archived", "BOOLEAN", "Stage archived"), - ], - "leads": [ - ("hs_lead_name", "VARCHAR", "Lead name"), - ("hs_lead_type", "VARCHAR", "Lead type"), - ("hs_lead_label", "VARCHAR", "Lead label/status"), - ("hubspot_owner_id", "VARCHAR", "Owner ID"), - ("hs_timestamp", "TIMESTAMP", "Lead timestamp"), - ("primary_contact_id", "VARCHAR", "Primary associated contact ID"), - ("primary_company_id", "VARCHAR", "Primary associated company ID"), - ("createdate", "TIMESTAMP", "Creation date"), - ("lastmodifieddate", "TIMESTAMP", "Last modification date"), - ], -} - - -def _get_attr_value(obj: Any, *names: str) -> Any: - for name in names: - if isinstance(obj, dict) and name in obj: - return obj[name] - if hasattr(obj, name): - return getattr(obj, name) - return None - - -def _as_str(value: Any) -> Optional[str]: - if value is None: - return None - return str(value) - - -# Format owner teams into a comma-separated string -def _format_owner_teams(teams: Any) -> Optional[str]: - if not teams: - return None - if isinstance(teams, (list, tuple)): - parts: List[str] = [] - for team in teams: - team_id = _get_attr_value(team, "id", "team_id") - team_name = _get_attr_value(team, "name") - if team_name and team_id: - parts.append(f"{team_name} ({team_id})") - elif team_name: - parts.append(str(team_name)) - elif team_id: - parts.append(str(team_id)) - return ", ".join(parts) if parts else None - return str(teams) - - -# Convert HubSpot owner object to a dictionary row -def _owner_to_row(owner: Any) -> Dict[str, Any]: - owner_id = _as_str(_get_attr_value(owner, "id")) - email = _get_attr_value(owner, "email") - first_name = _get_attr_value(owner, "first_name", "firstName") - last_name = _get_attr_value(owner, "last_name", "lastName") - user_id = _as_str(_get_attr_value(owner, "user_id", "userId")) - created_at = _get_attr_value(owner, "created_at", "createdAt") - updated_at = _get_attr_value(owner, "updated_at", "updatedAt") - archived = _get_attr_value(owner, "archived") - teams = _format_owner_teams(_get_attr_value(owner, "teams")) - - name_parts = [part for part in (first_name, last_name) if part] - full_name = " ".join(name_parts) if name_parts else (email or None) - - return { - "id": owner_id, - "email": email, - "first_name": first_name, - "last_name": last_name, - "full_name": full_name, - "user_id": user_id, - "teams": teams, - "created_at": created_at, - "updated_at": updated_at, - "archived": archived, - } - - -def _fetch_owner_pages(handler, archived: bool = False) -> List[Dict[str, Any]]: - hubspot = handler.connect() - results: List[Dict[str, Any]] = [] - after = None - - while True: - response = hubspot.crm.owners.owners_api.get_page(limit=500, after=after, archived=archived) - owners = getattr(response, "results", None) or getattr(response, "owners", None) or [] - for owner in owners: - results.append(_owner_to_row(owner)) - - paging = getattr(response, "paging", None) - next_page = getattr(paging, "next", None) if paging else None - after = getattr(next_page, "after", None) if next_page else None - if after is None: - break - - return results - - -def _fetch_owner_rows(handler, include_archived: bool = True) -> List[Dict[str, Any]]: - rows = _fetch_owner_pages(handler, archived=False) - if include_archived: - rows.extend(_fetch_owner_pages(handler, archived=True)) - - deduped: Dict[str, Dict[str, Any]] = {} - for row in rows: - owner_id = row.get("id") - if not owner_id: - continue - deduped.setdefault(owner_id, row) - - return list(deduped.values()) if deduped else rows - - -def _get_owner_rows(handler) -> List[Dict[str, Any]]: - cache_key = "_hubspot_owner_rows_cache" - cached = getattr(handler, cache_key, None) - if cached is None: - cached = _fetch_owner_rows(handler, include_archived=True) - setattr(handler, cache_key, cached) - return cached - - -def _get_owner_map(handler) -> Dict[str, Dict[str, Any]]: - cache_key = "_hubspot_owner_map_cache" - cached = getattr(handler, cache_key, None) - if cached is None: - rows = _get_owner_rows(handler) - cached = {row["id"]: row for row in rows if row.get("id")} - setattr(handler, cache_key, cached) - return cached - - -def _fetch_deal_stage_rows(handler) -> List[Dict[str, Any]]: - hubspot = handler.connect() - response = hubspot.crm.pipelines.pipelines_api.get_all("deals") - pipelines = getattr(response, "results", None) or response or [] - rows: List[Dict[str, Any]] = [] - - for pipeline in pipelines: - pipeline_id = _as_str(_get_attr_value(pipeline, "id")) - pipeline_label = _get_attr_value(pipeline, "label") - stages = _get_attr_value(pipeline, "stages") or [] - - for stage in stages: - stage_id = _as_str(_get_attr_value(stage, "id")) - stage_label = _get_attr_value(stage, "label") - stage_order = _get_attr_value(stage, "display_order", "displayOrder") - stage_archived = _get_attr_value(stage, "archived") - metadata = _get_attr_value(stage, "metadata") or {} - stage_probability = metadata.get("probability") if isinstance(metadata, dict) else None - - rows.append( - { - "pipeline_id": pipeline_id, - "pipeline_label": pipeline_label, - "stage_id": stage_id, - "stage_label": stage_label, - "stage_order": stage_order, - "stage_probability": stage_probability, - "stage_archived": stage_archived, - } - ) - - return rows - - -def _get_deal_stage_rows(handler) -> List[Dict[str, Any]]: - cache_key = "_hubspot_deal_stage_rows_cache" - cached = getattr(handler, cache_key, None) - if cached is None: - cached = _fetch_deal_stage_rows(handler) - setattr(handler, cache_key, cached) - return cached - - -# Get deal stage maps: (pipeline_id, stage_id) -> row and stage_id -> row -def _get_deal_stage_maps( - handler, -) -> Tuple[Dict[Tuple[str, str], Dict[str, Any]], Dict[str, Dict[str, Any]]]: - cache_key = "_hubspot_deal_stage_map_cache" - cached = getattr(handler, cache_key, None) - if cached is None: - rows = _get_deal_stage_rows(handler) - pair_map: Dict[Tuple[str, str], Dict[str, Any]] = {} - stage_map: Dict[str, Dict[str, Any]] = {} - for row in rows: - pipeline_id = row.get("pipeline_id") - stage_id = row.get("stage_id") - if pipeline_id and stage_id: - pair_map[(pipeline_id, stage_id)] = row - if stage_id and stage_id not in stage_map: - stage_map[stage_id] = row - cached = (pair_map, stage_map) - setattr(handler, cache_key, cached) - return cached - - -def _extract_in_values(value: Any) -> List[Any]: - """ - Extract values from IN clause, handling various formats: - - Python list/tuple/set: return as list - - AST Tuple node: extract values from args - - Single value: wrap in list - """ - if hasattr(value, "args"): - extracted = [] - for arg in value.args: - if hasattr(arg, "value"): - extracted.append(arg.value) - else: - extracted.append(arg) - return extracted - - if isinstance(value, (list, tuple, set)): - return list(value) - - return [value] - - -def _extract_scalar_value(value: Any) -> Any: - """ - Extract scalar value from AST Constant node or return as-is. - """ - if hasattr(value, "value") and not hasattr(value, "args"): - return value.value - return value - - -def _normalize_filter_conditions(conditions: Optional[List[FilterCondition]]) -> List[List[Any]]: - """ - Convert FilterCondition instances into the condition format expected by query executors. - """ - normalized: List[List[Any]] = [] - if not conditions: - return normalized - - for condition in conditions: - if isinstance(condition, FilterCondition): - op = canonical_op(condition.op) - col = to_internal_property(condition.column) - val = condition.value - - # Check if this is an IN/NOT IN operator with AST Tuple - if op in ("in", "not_in") and hasattr(val, "args"): - val = _extract_in_values(val) - else: - val = _extract_scalar_value(val) - - normalized.append([op, col, val]) - elif isinstance(condition, (list, tuple)) and len(condition) >= 3: - normalized.append([canonical_op(condition[0]), to_internal_property(condition[1]), condition[2]]) - return normalized - - -def _normalize_conditions_for_executor(conditions: List[List[Any]]) -> List[List[Any]]: - normalized = [] - for condition in conditions: - if len(condition) < 3: - continue - op, col, val = condition[0], condition[1], condition[2] - normalized.append([SQL_OPERATOR_MAP.get(op, op), col, val]) - return normalized - - -def _build_hubspot_search_filters( - conditions: List[List[Any]], - searchable_columns: Set[str], -) -> Optional[List[Dict]]: - """ - Convert normalized conditions to HubSpot Search API filter format. - Returns a list of filter dicts if all conditions are supported, otherwise None. - """ - if not conditions: - return None - - filters: List[Dict[str, Any]] = [] - - for condition in conditions: - if not isinstance(condition, (list, tuple)) or len(condition) < 3: - logger.debug(f"Invalid condition format: {condition}") - return None - - operator, column, value = condition[0], condition[1], condition[2] - operator_key = canonical_op(operator) - - if operator_key not in OPERATOR_MAP: - logger.debug(f"Unsupported operator '{operator_key}' for HubSpot search, falling back to post-filter") - return None - - if column not in searchable_columns: - logger.debug(f"Column '{column}' not searchable in HubSpot, falling back to post-filter") - return None - - property_name = to_hubspot_property(column) - - hubspot_operator = OPERATOR_MAP[operator_key] - - if hubspot_operator in {"IN", "NOT_IN"}: - values = _extract_in_values(value) - values = [v for v in values if v is not None] - if not values: - logger.debug( - f"No valid (non-None) values in IN clause for column '{column}', falling back to post-filter" - ) - return None - - logger.debug(f"Building IN filter for {column}: {values}") - filters.append( - { - "propertyName": property_name, - "operator": hubspot_operator, - "values": [str(val) for val in values], - } - ) - else: - actual_value = _extract_scalar_value(value) - filters.append( - { - "propertyName": property_name, - "operator": hubspot_operator, - "value": str(actual_value), - } - ) - - if not filters: - return None - - return filters - - -def _build_hubspot_search_sorts( - sort_columns: List[SortColumn], - searchable_columns: Set[str], -) -> Optional[List[Dict[str, Any]]]: - if not sort_columns: - return None - - sorts: List[Dict[str, Any]] = [] - for sort in sort_columns: - column = to_internal_property(sort.column) - if column not in searchable_columns: - logger.debug(f"Column '{column}' not sortable in HubSpot, falling back to post-sort") - return None - sorts.append( - { - "propertyName": to_hubspot_property(column), - "direction": "ASCENDING" if sort.ascending else "DESCENDING", - } - ) - return sorts - - -def _build_hubspot_properties(columns: Iterable[str]) -> List[str]: - properties = [] - for col in columns: - prop = to_hubspot_property(col) - if prop == "hs_object_id": - continue - properties.append(prop) - return list(dict.fromkeys(properties)) - - -def _prepare_association_request(object_type: str, columns: List[str]) -> Tuple[List[str], List[str]]: - assoc_columns = set(get_primary_association_columns(object_type)) - if not assoc_columns: - return [], columns - - needs_associations = bool(assoc_columns.intersection(columns)) - if not needs_associations: - return [], columns - - association_targets = get_association_targets_for_object(object_type) - hubspot_columns = [col for col in columns if col not in assoc_columns] - return association_targets, hubspot_columns - - -HUBSPOT_IN_MAX = 100 - - -def _execute_hubspot_search( - search_api, - filters: List[Dict], - properties: List[str], - limit: Optional[int], - to_dict_fn: callable, - sorts: Optional[List[Dict[str, Any]]] = None, - object_type: Optional[str] = None, -) -> List[Dict[str, Any]]: - """ - Execute paginated HubSpot search with filters. - Automatically chunks oversized IN filters (HubSpot max: 100 values). - """ - logger.debug(f"[_execute_hubspot_search] called — object_type={object_type}, limit={limit}, filters={filters}") - for i, f in enumerate(filters or []): - if f.get("operator") in {"IN", "NOT_IN"} and len(f.get("values", [])) > HUBSPOT_IN_MAX: - values = f["values"] - chunks = [values[j : j + HUBSPOT_IN_MAX] for j in range(0, len(values), HUBSPOT_IN_MAX)] - # When no explicit limit is provided, cap the number of chunks we process. - # Without this cap, a caller that passes id IN [10000 ids] with limit=None - # would fire 100+ sequential HubSpot API calls and run indefinitely. - MAX_CHUNKS_WITHOUT_LIMIT = 10 - effective_limit = limit - if limit is None and len(chunks) > MAX_CHUNKS_WITHOUT_LIMIT: - effective_limit = MAX_CHUNKS_WITHOUT_LIMIT * HUBSPOT_IN_MAX - collected: List[Dict[str, Any]] = [] - for chunk in chunks: - if effective_limit is not None and len(collected) >= effective_limit: - break - chunk_limit = effective_limit - len(collected) if effective_limit is not None else None - chunked_filters = filters[:i] + [{**f, "values": chunk}] + filters[i + 1 :] - collected.extend( - _execute_hubspot_search( - search_api, - chunked_filters, - properties, - chunk_limit, - to_dict_fn, - sorts, - object_type, - ) - ) - return collected - - collected: List[Dict[str, Any]] = [] - remaining = limit if limit is not None else float("inf") - after = None - page_num = 0 - - while remaining > 0: - page_num += 1 - page_limit = min(int(remaining) if remaining != float("inf") else 200, 200) - logger.debug( - f"[_execute_hubspot_search] page {page_num} — fetching up to {page_limit} results (after={after}, collected={len(collected)})" - ) - search_request = { - "limit": page_limit, - } - - if properties: - search_request["properties"] = properties - - if filters: - search_request["filterGroups"] = [{"filters": filters}] - if sorts: - search_request["sorts"] = sorts - - if after is not None: - search_request["after"] = after - - if object_type is None: - response = search_api.do_search(public_object_search_request=search_request) - else: - response = search_api.do_search(object_type, public_object_search_request=search_request) - - results = getattr(response, "results", []) or [] - for result in results: - collected.append(to_dict_fn(result)) - if limit is not None and len(collected) >= limit: - return collected - - paging = getattr(response, "paging", None) - next_page = getattr(paging, "next", None) if paging else None - after = getattr(next_page, "after", None) if next_page else None - - logger.debug(f"[_execute_hubspot_search] page {page_num} — got {len(results)} results, after={after}") - if after is None: - break - - if remaining != float("inf"): - remaining = limit - len(collected) - - logger.debug(f"[_execute_hubspot_search] done — total collected={len(collected)}") - return collected - - -class HubSpotAPIResource(APIResource): - """ - Base class for HubSpot table resources with custom select handling. - - Overrides the default select() method to properly handle server-side filtering - and avoid double-filtering issues with AST nodes. - """ - - # Reference: https://developers.hubspot.com/docs/api-reference/search/guide - SEARCHABLE_COLUMNS: Set[str] = set() - - # Aggregate function names → pandas equivalents - _AGG_FUNC_MAP: Dict[str, str] = { - "sum": "sum", - "count": "count", - "avg": "mean", - "mean": "mean", - "max": "max", - "min": "min", - } - - def select(self, query: ASTNode) -> pd.DataFrame: - """Select data, applying WHERE, GROUP BY, ORDER BY, LIMIT and function evaluation.""" - - conditions, order_by, result_limit = self._extract_query_params(query) - group_by_cols = self._get_group_by_columns(query) - # Targets include columns referenced inside functions and GROUP BY - targets = self._get_targets(query) - fetch_targets = list(dict.fromkeys(targets + group_by_cols)) - normalized_conditions = _normalize_filter_conditions(conditions) - - self._validate_query_columns(fetch_targets, normalized_conditions, order_by) - - for condition in normalized_conditions: - if len(condition) >= 3 and condition[0] == "in": - in_vals = condition[2] if isinstance(condition[2], list) else [condition[2]] - if not in_vals or all(v is None for v in in_vals): - return pd.DataFrame(columns=fetch_targets or self.get_columns()) - - if self.SEARCHABLE_COLUMNS: - filters = ( - _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if normalized_conditions - else None - ) - # Don't push ORDER BY to search when GROUP BY is present (need raw rows) - sorts = ( - _build_hubspot_search_sorts(order_by, self.SEARCHABLE_COLUMNS) - if order_by and not group_by_cols - else None - ) - use_search = filters is not None or sorts is not None - else: - filters = None - sorts = None - use_search = False - - fetch_columns = self._get_fetch_columns( - targets=fetch_targets, - normalized_conditions=normalized_conditions, - order_by=order_by if not group_by_cols else [], - use_search=use_search, - ) - - result = self.list( - conditions=conditions if not use_search else None, - limit=result_limit if not group_by_cols else None, - sort=order_by if not use_search and not group_by_cols else None, - targets=fetch_columns, - search_filters=filters, - search_sorts=sorts, - allow_search=use_search, - ) - - # Post-filter for non-search queries - if not use_search and normalized_conditions and not result.empty: - result = self._apply_post_filter(result, normalized_conditions) - - # GROUP BY + aggregation - if group_by_cols and not result.empty: - result = self._apply_aggregation(result, query, group_by_cols) - - # ORDER BY (after aggregation so we can sort on aggregate columns) - if order_by and not result.empty: - result = self._apply_post_sort(result, order_by) - - # LIMIT (applied after aggregation/sort) - if result_limit is not None and not result.empty: - result = result.head(result_limit) - - return self._apply_column_selection(result, query.targets or []) - - def _extract_query_params(self, query: ASTNode) -> Tuple[List, List, Optional[int]]: - """Extract conditions, order_by, and limit from query AST.""" - if query.where: - try: - conditions = extract_comparison_conditions(query.where) - except NotImplementedError: - conditions = _extract_comparison_conditions_with_functions(query.where) - else: - conditions = [] - - order_by = [] - if query.order_by: - - def _extract_order_column(field: ASTNode) -> Optional[str]: - if isinstance(field, sql_ast.Identifier): - return field.parts[-1] - if isinstance(field, sql_ast.Function): - func = getattr(field, "op", None) or getattr(field, "name", None) - if func and str(func).lower() in {"lower", "upper"} and field.args: - if isinstance(field.args[0], sql_ast.Identifier): - return field.args[0].parts[-1] - if hasattr(field, "args") and field.args: - last_arg = field.args[-1] - if isinstance(last_arg, sql_ast.Identifier): - return last_arg.parts[-1] - return None - - for col in query.order_by: - ascending = True - if hasattr(col, "direction") and col.direction: - ascending = col.direction.upper() != "DESC" - elif hasattr(col, "ascending"): - ascending = col.ascending - column_name = _extract_order_column(col.field) - if not column_name: - logger.debug(f"Skipping unsupported order by field: {col.field}") - continue - order_by.append(SortColumn(column_name, ascending)) - - result_limit = query.limit.value if query.limit else None - - return conditions, order_by, result_limit - - def _get_targets(self, query: ASTNode) -> List[str]: - """Extract target column names from query.""" - targets = [] - if query.targets: - for target in query.targets: - if isinstance(target, sql_ast.Star): - continue - if isinstance(target, sql_ast.Identifier): - targets.append(to_internal_property(target.parts[-1])) - continue - targets.extend(self._extract_target_columns(target)) - return list(dict.fromkeys(targets)) - - @staticmethod - def _extract_target_columns(target: ASTNode) -> List[str]: - columns: List[str] = [] - - def collect_identifiers(node, **kwargs): - if isinstance(node, sql_ast.Identifier): - columns.append(to_internal_property(node.parts[-1])) - return None - - query_traversal(target, collect_identifiers) - return columns - - def _apply_post_filter(self, df: pd.DataFrame, conditions: List[List[Any]]) -> pd.DataFrame: - """Apply post-filtering using pandas operations instead of SQL rendering.""" - if df.empty: - return df - - mask = pd.Series([True] * len(df), index=df.index) - - for condition in conditions: - if len(condition) < 3: - continue - - op, column, value = condition[0], condition[1], condition[2] - op_key = canonical_op(op) - - if column not in df.columns: - continue - - try: - if op_key == "eq": - mask &= df[column] == value - elif op_key == "neq": - mask &= df[column] != value - elif op_key == "lt": - mask &= df[column] < value - elif op_key == "lte": - mask &= df[column] <= value - elif op_key == "gt": - mask &= df[column] > value - elif op_key == "gte": - mask &= df[column] >= value - elif op_key == "in": - values = value if isinstance(value, (list, tuple, set)) else [value] - mask &= df[column].isin(values) - elif op_key == "not_in": - values = value if isinstance(value, (list, tuple, set)) else [value] - mask &= ~df[column].isin(values) - except Exception: - continue - - return df[mask].reset_index(drop=True) - - def _apply_post_sort(self, df: pd.DataFrame, sort: List[SortColumn]) -> pd.DataFrame: - sort_columns = [] - sort_ascending = [] - for sort_item in sort: - column = to_internal_property(sort_item.column) - if column not in df.columns: - continue - sort_columns.append(column) - sort_ascending.append(sort_item.ascending) - - if not sort_columns: - return df - - try: - return df.sort_values(by=sort_columns, ascending=sort_ascending).reset_index(drop=True) - except Exception: - return df - - def _apply_column_selection(self, df: pd.DataFrame, targets) -> pd.DataFrame: - """Apply column selection, resolving AST target nodes and aliases.""" - if not targets or df.empty: - return df - - def _alias_str(alias) -> Optional[str]: - """Convert an alias (str or Identifier) to a plain string.""" - if alias is None: - return None - if isinstance(alias, str): - return alias - if isinstance(alias, sql_ast.Identifier): - return alias.parts[-1] - return str(alias) - - selected: List[str] = [] - for target in targets: - if isinstance(target, sql_ast.Star): - return df - # AST node with an alias — the alias becomes the output column name - alias = _alias_str(getattr(target, "alias", None)) - if alias and alias in df.columns: - selected.append(alias) - continue - # Plain identifier - if isinstance(target, sql_ast.Identifier): - col = to_internal_property(target.parts[-1]) - if col in df.columns: - selected.append(col) - continue - # Function without alias — resolved agg column - if isinstance(target, sql_ast.Function): - func_name = (getattr(target, "op", None) or getattr(target, "name", "")).lower() - inner_cols = self._extract_target_columns(target) - candidate = f"{func_name}({inner_cols[0]})" if inner_cols else func_name - if candidate in df.columns: - selected.append(candidate) - elif inner_cols and inner_cols[0] in df.columns: - selected.append(inner_cols[0]) - continue - # Plain string - if isinstance(target, str) and target in df.columns: - selected.append(target) - - selected = list(dict.fromkeys(selected)) - if selected: - return df[selected] - return df - - def _validate_query_columns( - self, - targets: List[str], - normalized_conditions: List[List[Any]], - order_by: List[SortColumn], - ) -> None: - # Names that are SQL aggregate/scalar functions — skip validation - _FUNC_NAMES = {"sum", "count", "avg", "mean", "max", "min", "date_trunc", "lower", "upper", "coalesce"} - - requested = set() - for col in targets or []: - if col.lower() not in _FUNC_NAMES: - requested.add(col) - - for condition in normalized_conditions: - if len(condition) >= 2: - col = condition[1] - if col.lower() not in _FUNC_NAMES: - requested.add(col) - - for sort_item in order_by or []: - col = to_internal_property(sort_item.column) - if col.lower() not in _FUNC_NAMES: - requested.add(col) - - if not requested: - return - - available = set(self.get_columns()) - missing = [col for col in requested if col not in available] - if not missing: - return - - missing_cols = ", ".join(missing) - available_cols = ", ".join(sorted(available)) - raise ValueError( - f"Column(s) {missing_cols} do not exist for this HubSpot table. Available columns: {available_cols}." - ) - - def _get_group_by_columns(self, query: ASTNode) -> List[str]: - """Extract GROUP BY column names from query AST.""" - if not query.group_by: - return [] - cols = [] - for item in query.group_by: - if isinstance(item, sql_ast.Identifier): - cols.append(to_internal_property(item.parts[-1])) - elif isinstance(item, sql_ast.Function): - # e.g. DATE_TRUNC('month', closedate) — include the inner column - inner_cols = self._extract_target_columns(item) - cols.extend(inner_cols) - return list(dict.fromkeys(cols)) - - def _apply_aggregation(self, df: pd.DataFrame, query: ASTNode, group_by_cols: List[str]) -> pd.DataFrame: - """Apply GROUP BY + aggregation to a DataFrame based on query targets.""" - if not group_by_cols: - return df - - # Collect (input_col, agg_func, output_alias) triples from SELECT targets - agg_specs: List[Tuple[str, str, str]] = [] - for target in query.targets or []: - if not isinstance(target, sql_ast.Function): - continue - func_name = (getattr(target, "op", None) or getattr(target, "name", "")).lower() - pandas_agg = self._AGG_FUNC_MAP.get(func_name) - if pandas_agg is None: - continue - inner_cols = self._extract_target_columns(target) - raw_alias = getattr(target, "alias", None) - alias = ( - raw_alias.parts[-1] - if isinstance(raw_alias, sql_ast.Identifier) - else str(raw_alias) - if raw_alias is not None and not isinstance(raw_alias, str) - else raw_alias - ) - if inner_cols: - col = inner_cols[0] - out_name = alias or f"{func_name}({col})" - agg_specs.append((col, pandas_agg, out_name)) - elif func_name == "count": - # COUNT(*) — use first group_by col as proxy - out_name = alias or "count" - agg_specs.append((group_by_cols[0], "count", out_name)) - - if not agg_specs: - # No aggregate functions — just deduplicate by group keys - return df[group_by_cols].drop_duplicates().reset_index(drop=True) - - # Validate group-by columns exist - valid_group_cols = [c for c in group_by_cols if c in df.columns] - if not valid_group_cols: - return df - - agg_dict: Dict[str, List[str]] = {} - for col, pandas_agg, _ in agg_specs: - if col in df.columns: - agg_dict.setdefault(col, []).append(pandas_agg) - - if not agg_dict: - return df[valid_group_cols].drop_duplicates().reset_index(drop=True) - - grouped = df.groupby(valid_group_cols, as_index=False).agg(agg_dict) - # Flatten multi-level columns from groupby + agg - if isinstance(grouped.columns, pd.MultiIndex): - grouped.columns = ["_".join(filter(None, c)) for c in grouped.columns] - - # Rename output columns to requested aliases - rename_map: Dict[str, str] = {} - for col, pandas_agg, out_name in agg_specs: - # pandas names the result column as col_agg in multi-level flattened case - candidate = f"{col}_{pandas_agg}" - if candidate in grouped.columns and candidate != out_name: - rename_map[candidate] = out_name - elif col in grouped.columns and col != out_name: - rename_map[col] = out_name - if rename_map: - grouped = grouped.rename(columns=rename_map) - - return grouped.reset_index(drop=True) - - def _get_fetch_columns( - self, - targets: List[str], - normalized_conditions: List[List[Any]], - order_by: List[SortColumn], - use_search: bool, - ) -> List[str]: - if targets: - base_columns = list(targets) - else: - base_columns = list(self.get_columns()) - - if use_search: - return list(dict.fromkeys(base_columns)) - - extra_columns = [] - for condition in normalized_conditions: - if len(condition) >= 2: - extra_columns.append(condition[1]) - for sort in order_by or []: - extra_columns.append(to_internal_property(sort.column)) - - return list(dict.fromkeys(base_columns + extra_columns)) - - def _object_to_dict(self, obj: Any, columns: List[str]) -> Dict[str, Any]: - properties = getattr(obj, "properties", {}) or {} - row = {} - for col in columns: - if col == "id": - row["id"] = getattr(obj, "id", None) - continue - row[col] = properties.get(to_hubspot_property(col)) - return row - - -class OwnersTable(HubSpotAPIResource): - """HubSpot owners table.""" - - SEARCHABLE_COLUMNS: Set[str] = set() - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - return { - "TABLE_NAME": "owners", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot owners with names and emails", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("owners") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - owners = self.get_owners(limit=limit) - owners_df = pd.DataFrame(owners) - if owners_df.empty: - owners_df = pd.DataFrame(columns=targets or self._get_default_owner_columns()) - return owners_df - - def add(self, data: List[dict]) -> None: - raise NotImplementedError("Creating owners via INSERT is not supported.") - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - raise NotImplementedError("Updating owners via UPDATE is not supported.") - - def remove(self, conditions: List[FilterCondition]) -> None: - raise NotImplementedError("Deleting owners via DELETE is not supported.") - - def get_columns(self) -> List[str]: - return self._get_default_owner_columns() - - @staticmethod - def _get_default_owner_columns() -> List[str]: - return [ - "id", - "email", - "first_name", - "last_name", - "full_name", - "user_id", - "teams", - "created_at", - "updated_at", - "archived", - ] - - def get_owners(self, limit: Optional[int] = None) -> List[Dict[str, Any]]: - owners = _get_owner_rows(self.handler) - if limit is not None: - return owners[:limit] - return owners - - -class DealStagesTable(HubSpotAPIResource): - """HubSpot deal pipeline stages table.""" - - SEARCHABLE_COLUMNS: Set[str] = set() - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - return { - "TABLE_NAME": "deal_stages", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot deal pipeline stages with human-readable labels", - "ROW_COUNT": None, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("deal_stages") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - stages = self.get_deal_stages(limit=limit) - stages_df = pd.DataFrame(stages) - if stages_df.empty: - stages_df = pd.DataFrame(columns=targets or self._get_default_deal_stage_columns()) - return stages_df - - def add(self, data: List[dict]) -> None: - raise NotImplementedError("Creating deal stages via INSERT is not supported.") - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - raise NotImplementedError("Updating deal stages via UPDATE is not supported.") - - def remove(self, conditions: List[FilterCondition]) -> None: - raise NotImplementedError("Deleting deal stages via DELETE is not supported.") - - def get_columns(self) -> List[str]: - return self._get_default_deal_stage_columns() - - @staticmethod - def _get_default_deal_stage_columns() -> List[str]: - return [ - "pipeline_id", - "pipeline_label", - "stage_id", - "stage_label", - "stage_order", - "stage_probability", - "stage_archived", - ] - - def get_deal_stages(self, limit: Optional[int] = None) -> List[Dict[str, Any]]: - stages = _get_deal_stage_rows(self.handler) - if limit is not None: - return stages[:limit] - return stages - - -class CompaniesTable(HubSpotAPIResource): - """Hubspot Companies table.""" - - SEARCHABLE_COLUMNS = { - "name", - "domain", - "industry", - "city", - "state", - "id", - "website", - "address", - "zip", - "numberofemployees", - "annualrevenue", - "lifecyclestage", - "current_erp", - "current_erp_version", - "current_web_platform", - "accounting_software", - "credit_card_processor", - "data_integration_platform", - "marketing_platform", - "pos_software", - "shipping_software", - "tax_platform", - "partner", - "partner_type", - "partnership_status", - "partner_payout_ytd", - "partnership_commission", - "total_customer_value", - "total_revenue", - "lastmodifieddate", - } - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("companies") - except Exception: - pass - - return { - "TABLE_NAME": "companies", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": self.handler._get_table_description("companies"), - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("companies") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - companies_df = pd.json_normalize( - self.get_companies( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if companies_df.empty: - companies_df = pd.DataFrame(columns=targets or self._get_default_company_columns()) - return companies_df - - def add(self, company_data: List[dict]): - self.create_companies(company_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - companies_df = pd.json_normalize(self.get_companies(limit=200, where_conditions=normalized_conditions)) - - if companies_df.empty: - raise ValueError("No companies retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(companies_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No companies found matching WHERE conditions: {conditions}.") - - company_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(company_ids)} compan(ies) matching WHERE conditions") - self.update_companies(company_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - companies_df = pd.json_normalize(self.get_companies(limit=200, where_conditions=normalized_conditions)) - - if companies_df.empty: - raise ValueError("No companies retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(companies_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No companies found matching WHERE conditions: {conditions}.") - - company_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(company_ids)} compan(ies) matching WHERE conditions") - self.delete_companies(company_ids) - - def get_columns(self) -> List[str]: - return self._get_default_company_columns() - - @staticmethod - def _get_default_company_columns() -> List[str]: - return [ - "id", - "name", - "city", - "phone", - "state", - "domain", - "industry", - "website", - "address", - "zip", - "numberofemployees", - "annualrevenue", - "lifecyclestage", - "current_erp", - "current_erp_version", - "current_web_platform", - "accounting_software", - "credit_card_processor", - "data_integration_platform", - "marketing_platform", - "pos_software", - "shipping_software", - "tax_platform", - "partner", - "partner_type", - "partnership_status", - "partner_payout_ytd", - "partnership_commission", - "total_customer_value", - "total_revenue", - "createdate", - "lastmodifieddate", - ] - - def get_companies( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_company_columns() - columns = requested_properties or default_properties - hubspot_properties = _build_hubspot_properties(columns) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - search_results = self._search_companies_by_conditions( - hubspot, filters, hubspot_properties, limit, search_sorts, columns - ) - logger.info(f"Retrieved {len(search_results)} companies from HubSpot via search API") - return search_results - - companies = hubspot.crm.companies.get_all(**api_kwargs) - companies_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for company in companies: - try: - companies_dict.append(self._company_to_dict(company, columns)) - if len(companies_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(companies_dict)} companies from HubSpot") - return companies_dict - - def _search_companies_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.companies.search_api, - filters or [], - properties, - limit, - lambda obj: self._company_to_dict(obj, columns), - sorts=sorts, - ) - - def _company_to_dict(self, company: Any, columns: Optional[List[str]] = None) -> Dict[str, Any]: - columns = columns or self._get_default_company_columns() - return self._object_to_dict(company, columns) - - def create_companies(self, companies_data: List[Dict[str, Any]]) -> None: - if not companies_data: - raise ValueError("No company data provided for creation") - - logger.info(f"Attempting to create {len(companies_data)} compan(ies)") - hubspot = self.handler.connect() - companies_to_create = [HubSpotObjectInputCreate(properties=company) for company in companies_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=companies_to_create) - - try: - created_companies = hubspot.crm.companies.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_companies or not hasattr(created_companies, "results") or not created_companies.results: - raise Exception("Company creation returned no results") - created_ids = [c.id for c in created_companies.results] - logger.info(f"Successfully created {len(created_ids)} compan(ies) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Companies creation failed: {str(e)}") - raise Exception(f"Companies creation failed {e}") - - def update_companies(self, company_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - companies_to_update = [HubSpotObjectBatchInput(id=cid, properties=values_to_update) for cid in company_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=companies_to_update) - try: - updated = hubspot.crm.companies.batch_api.update(batch_input_simple_public_object_batch_input=batch_input) - logger.info(f"Companies with ID {[c.id for c in updated.results]} updated") - except Exception as e: - raise Exception(f"Companies update failed {e}") - - def delete_companies(self, company_ids: List[str]) -> None: - hubspot = self.handler.connect() - companies_to_delete = [HubSpotObjectId(id=cid) for cid in company_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=companies_to_delete) - try: - hubspot.crm.companies.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Companies deleted") - except Exception as e: - raise Exception(f"Companies deletion failed {e}") - - -def _extract_association_condition(conditions: List[List[Any]], column: str) -> Optional[List[str]]: - """ - Return a list of non-None string values if conditions contain an - eq/in filter on *column*, otherwise return None. - """ - for condition in conditions: - if len(condition) >= 3 and condition[1] == column: - op, val = condition[0], condition[2] - if op == "eq" and val is not None: - return [str(val)] - if op == "in": - vals = val if isinstance(val, list) else [val] - valid = [str(v) for v in vals if v is not None] - if valid: - return valid - return None - - -class ContactsTable(HubSpotAPIResource): - """Hubspot Contacts table.""" - - SEARCHABLE_COLUMNS = { - "email", - "id", - "firstname", - "lastname", - "phone", - "mobilephone", - "jobtitle", - "company", - "city", - "website", - "lifecyclestage", - "hs_lead_status", - "hubspot_owner_id", - "dc_contact", - "current_ecommerce_platform", - "departments", - "demo__requested", - "linkedin_url", - "referral_name", - "referral_company_name", - "notes_last_contacted", - "notes_last_updated", - "notes_next_activity_date", - "num_contacted_notes", - "hs_sales_email_last_clicked", - "hs_sales_email_last_opened", - "lastmodifieddate", - } - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("contacts") - except Exception: - pass - - return { - "TABLE_NAME": "contacts", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": self.handler._get_table_description("contacts"), - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("contacts") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - contacts_df = pd.json_normalize( - self.get_contacts( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if contacts_df.empty: - contacts_df = pd.DataFrame(columns=targets or self._get_default_contact_columns()) - else: - if "id" in contacts_df.columns: - contacts_df["id"] = pd.to_numeric(contacts_df["id"], errors="coerce") - return contacts_df - - def add(self, contact_data: List[dict]): - self.create_contacts(contact_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - where_conditions = _normalize_filter_conditions(conditions) - contacts_df = pd.json_normalize(self.get_contacts(limit=200, where_conditions=where_conditions)) - - if contacts_df.empty: - raise ValueError("No contacts retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(where_conditions) - update_query_executor = UPDATEQueryExecutor(contacts_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No contacts found matching WHERE conditions: {conditions}.") - - contact_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(contact_ids)} contact(s) matching WHERE conditions") - self.update_contacts(contact_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - where_conditions = _normalize_filter_conditions(conditions) - contacts_df = pd.json_normalize(self.get_contacts(limit=200, where_conditions=where_conditions)) - - if contacts_df.empty: - raise ValueError("No contacts retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(where_conditions) - delete_query_executor = DELETEQueryExecutor(contacts_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No contacts found matching WHERE conditions: {conditions}.") - - contact_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(contact_ids)} contact(s) matching WHERE conditions") - self.delete_contacts(contact_ids) - - def get_columns(self) -> List[str]: - return self._get_default_contact_columns() - - @staticmethod - def _get_default_contact_columns() -> List[str]: - return [ - "id", - "email", - "firstname", - "lastname", - "phone", - "mobilephone", - "jobtitle", - "company", - "city", - "website", - "lifecyclestage", - "hs_lead_status", - "hubspot_owner_id", - "dc_contact", - "current_ecommerce_platform", - "departments", - "demo__requested", - "linkedin_url", - "referral_name", - "referral_company_name", - "notes_last_contacted", - "notes_last_updated", - "notes_next_activity_date", - "num_contacted_notes", - "hs_sales_email_last_clicked", - "hs_sales_email_last_opened", - "createdate", - "lastmodifieddate", - "primary_company_id", - ] - - def get_contacts( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - logger.debug( - f"[ContactsTable] get_contacts() called — limit={limit}, conditions={where_conditions}, properties={properties}" - ) - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - requested_properties = properties or [] - default_properties = self._get_default_contact_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("contacts", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - logger.debug( - f"[ContactsTable] get_contacts() — association_targets={association_targets}, hubspot_columns={hubspot_columns}" - ) - - # Optimization: if filtering by primary_company_id, bypass full contact - # scan and use the associations API to fetch only the relevant contacts. - company_ids = _extract_association_condition(normalized_conditions, "primary_company_id") - if company_ids: - logger.debug( - f"[ContactsTable] get_contacts() — company_ids filter detected ({len(company_ids)} ids), using associations API" - ) - return self._get_contacts_by_company_ids( - hubspot, company_ids, hubspot_columns, hubspot_properties, columns, limit - ) - - # Guard: fetching association columns (primary_company_id) without any - # WHERE filter means MindsDB's join executor is doing a full table scan - # to resolve a direct FK join — e.g.: - # FROM contacts c JOIN companies co ON c.primary_company_id = co.id - # MindsDB injects a large LIMIT (e.g. 20010) for its in-memory join - # buffer, so we also block large-limit scans, not just limit=None. - # Small explicit limits (≤ MAX_ASSOCIATION_SCAN) are allowed so that - # a user can still browse a few contacts with their association columns. - # Return empty immediately. Users should rewrite the query using the - # appropriate association table, e.g.: - # FROM companies co - # JOIN company_contacts cc ON cc.company_id = co.id - # JOIN contacts c ON c.id = cc.contact_id - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins between HubSpot objects using foreign key columns " - "(e.g. primary_company_id) are not supported. The HubSpot API represents " - "relationships through association tables.\n\n" - "Please rewrite your query using the association table, e.g.:\n" - "SELECT c.firstname, c.lastname, c.email\n" - "FROM companies co\n" - "JOIN company_contacts cc ON cc.company_id = co.id\n" - "JOIN contacts c ON c.id = cc.contact_id\n" - "WHERE co.name = 'HubSpot'" - ) - raise ValueError(msg) - - logger.debug( - f"[ContactsTable] get_contacts() — proceeding with scan/search. allow_search={allow_search}, search_filters={search_filters}, normalized_conditions={normalized_conditions}" - ) - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for contacts.") - logger.debug( - f"[ContactsTable] get_contacts() — calling _search_contacts_by_conditions with filters={filters}, limit={limit}" - ) - search_results = self._search_contacts_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} contacts from HubSpot via search API") - return search_results - - logger.debug( - f"[ContactsTable] get_contacts() — falling back to full scan (get_all), effective_limit={limit if limit is not None else 10_000}" - ) - contacts = hubspot.crm.contacts.get_all(**api_kwargs) - contacts_dict = [] - # Without an explicit LIMIT MindsDB's join executor does not propagate the - # outer LIMIT to sub-table queries, leading to unbounded full-table scans. - # Cap the scan so that JOIN queries don't run indefinitely. Queries that - # genuinely need more than MAX_SCAN_ROWS rows should supply an explicit LIMIT. - # Cap full scans at MAX_SCAN_ROWS regardless of whether limit was explicit. - # MindsDB's in-memory join executor injects large limits (e.g. 20010, 20000) - # for its join buffer, causing multi-minute full table scans. Applying min() - # ensures those injected limits are treated the same as limit=None. - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - logger.debug(f"[ContactsTable] get_contacts() — full scan capped at {effective_limit}") - try: - for contact in contacts: - row = self._contact_to_dict(contact, hubspot_columns, association_targets) - contacts_dict.append(row) - if effective_limit is not None and len(contacts_dict) >= effective_limit: - logger.debug( - f"[ContactsTable] get_contacts() — reached effective_limit={effective_limit}, stopping scan" - ) - break - except Exception as e: - logger.error(f"Failed to iterate HubSpot contacts: {str(e)}") - raise - - logger.info(f"Retrieved {len(contacts_dict)} contacts from HubSpot") - return contacts_dict - - def _get_contacts_by_company_ids( - self, - hubspot: HubSpot, - company_ids: List[str], - hubspot_columns: List[str], - hubspot_properties: List[str], - columns: List[str], - limit: Optional[int], - ) -> List[Dict[str, Any]]: - """ - Fetch contacts that are associated with the given company IDs using - HubSpot's batch associations API + batch read, instead of scanning - all contacts (~100x faster for large accounts). - """ - logger.debug( - f"[ContactsTable] _get_contacts_by_company_ids() called — company_ids={company_ids}, limit={limit}" - ) - BATCH = 100 - # contact_id -> company_id (first company that referenced this contact) - contact_company_map: Dict[str, str] = {} - - for i in range(0, len(company_ids), BATCH): - chunk = company_ids[i : i + BATCH] - try: - resp = hubspot.crm.associations.batch_api.read( - "companies", - "contacts", - BatchInputPublicObjectId(inputs=[PublicObjectId(id=cid) for cid in chunk]), - ) - for multi in resp.results or []: - from_id = str( - (multi._from or {}).get("id", "") - if isinstance(multi._from, dict) - else getattr(multi._from, "id", "") - ) - for assoc in multi.to or []: - cid = str(assoc.id) - if cid not in contact_company_map: - contact_company_map[cid] = from_id - except Exception: - pass - - if not contact_company_map: - logger.debug( - "[ContactsTable] _get_contacts_by_company_ids() — no contacts found for given company_ids, returning []" - ) - return [] - - logger.debug( - f"[ContactsTable] _get_contacts_by_company_ids() — found {len(contact_company_map)} contact ids from associations API" - ) - all_contact_ids = list(contact_company_map.keys()) - if limit is not None: - all_contact_ids = all_contact_ids[:limit] - - contacts_dict = [] - for i in range(0, len(all_contact_ids), BATCH): - batch_ids = all_contact_ids[i : i + BATCH] - try: - resp = hubspot.crm.contacts.batch_api.read( - batch_read_input_simple_public_object_id=BatchReadInputSimplePublicObjectId( - properties=hubspot_properties, - inputs=[ContactObjectId(id=cid) for cid in batch_ids], - ) - ) - for contact in resp.results or []: - row = self._contact_to_dict(contact, hubspot_columns, None) - row["primary_company_id"] = contact_company_map.get(str(contact.id)) - contacts_dict.append(row) - except Exception as e: - logger.error(f"Failed to batch read contacts: {e}") - raise - - logger.info(f"Retrieved {len(contacts_dict)} contacts via company associations API") - return contacts_dict - - def _search_contacts_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - logger.debug( - f"[ContactsTable] _search_contacts_by_conditions() called — filters={filters}, limit={limit}, sorts={sorts}" - ) - return _execute_hubspot_search( - hubspot.crm.contacts.search_api, - filters or [], - properties, - limit, - lambda obj: self._contact_to_dict(obj, columns, association_targets), - sorts=sorts, - ) - - def _contact_to_dict( - self, - contact: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_contact_columns() - try: - row = self._object_to_dict(contact, columns) - if association_targets: - row = enrich_object_with_associations(contact, "contacts", row) - return row - except Exception: - assoc_columns = get_primary_association_columns("contacts") if association_targets else [] - return { - "id": getattr(contact, "id", None), - **{col: None for col in columns if col != "id"}, - **{col: None for col in assoc_columns}, - } - - def create_contacts(self, contacts_data: List[Dict[str, Any]]) -> None: - if not contacts_data: - raise ValueError("No contact data provided for creation") - - logger.info(f"Attempting to create {len(contacts_data)} contact(s)") - hubspot = self.handler.connect() - contacts_to_create = [HubSpotObjectInputCreate(properties=contact) for contact in contacts_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=contacts_to_create) - - try: - created_contacts = hubspot.crm.contacts.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_contacts or not hasattr(created_contacts, "results") or not created_contacts.results: - raise Exception("Contact creation returned no results") - created_ids = [c.id for c in created_contacts.results] - logger.info(f"Successfully created {len(created_ids)} contact(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Contacts creation failed: {str(e)}") - raise Exception(f"Contacts creation failed {e}") - - def update_contacts(self, contact_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - contacts_to_update = [HubSpotObjectBatchInput(id=cid, properties=values_to_update) for cid in contact_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=contacts_to_update) - try: - updated = hubspot.crm.contacts.batch_api.update(batch_input_simple_public_object_batch_input=batch_input) - logger.info(f"Contacts with ID {[c.id for c in updated.results]} updated") - except Exception as e: - raise Exception(f"Contacts update failed {e}") - - def delete_contacts(self, contact_ids: List[str]) -> None: - hubspot = self.handler.connect() - contacts_to_delete = [HubSpotObjectId(id=cid) for cid in contact_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=contacts_to_delete) - try: - hubspot.crm.contacts.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Contacts deleted") - except Exception as e: - raise Exception(f"Contacts deletion failed {e}") - - -class DealsTable(HubSpotAPIResource): - """Hubspot Deals table.""" - - SEARCHABLE_COLUMNS = { - "dealname", - "amount", - "dealstage", - "pipeline", - "closedate", - "hubspot_owner_id", - "closed_won_reason", - "closed_lost_reason", - "lead_attribution", - "services_requested", - "platform", - "referral_partner", - "referral_commission_amount", - "tech_partners_involved", - "sales_tier", - "commission_status", - "id", - "lastmodifieddate", - } - ASSOCIATION_COLUMNS = {"primary_company_id", "primary_contact_id"} - # Additional columns that require fetching extra data and better mapping - OWNER_COLUMNS = {"owner_name", "owner_email"} - STAGE_COLUMNS = {"dealstage_label", "pipeline_label"} - VIRTUAL_COLUMNS = OWNER_COLUMNS | STAGE_COLUMNS - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("deals") - except Exception: - pass - - return { - "TABLE_NAME": "deals", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": self.handler._get_table_description("deals"), - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("deals") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - deals_df = pd.json_normalize( - self.get_deals( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if deals_df.empty: - deals_df = pd.DataFrame(columns=targets or self._get_default_deal_columns()) - else: - deals_df = self._cast_deal_columns(deals_df) - return deals_df - - def add(self, deal_data: List[dict]): - self.create_deals(deal_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - where_conditions = _normalize_filter_conditions(conditions) - deals_df = pd.json_normalize(self.get_deals(limit=200, where_conditions=where_conditions)) - - if deals_df.empty: - raise ValueError("No deals retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(where_conditions) - update_query_executor = UPDATEQueryExecutor(deals_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No deals found matching WHERE conditions: {conditions}.") - - deal_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(deal_ids)} deal(s) matching WHERE conditions") - self.update_deals(deal_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - where_conditions = _normalize_filter_conditions(conditions) - deals_df = pd.json_normalize(self.get_deals(limit=200, where_conditions=where_conditions)) - - if deals_df.empty: - raise ValueError("No deals retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(where_conditions) - delete_query_executor = DELETEQueryExecutor(deals_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No deals found matching WHERE conditions: {conditions}.") - - deal_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(deal_ids)} deal(s) matching WHERE conditions") - self.delete_deals(deal_ids) - - def get_columns(self) -> List[str]: - return self._get_default_deal_columns() - - @staticmethod - def _get_default_deal_columns() -> List[str]: - return [ - "id", - "dealname", - "amount", - "primary_company_id", - "primary_contact_id", - "pipeline", - "pipeline_label", - "closedate", - "dealstage", - "dealstage_label", - "hubspot_owner_id", - "owner_name", - "owner_email", - "closed_won_reason", - "closed_lost_reason", - "lead_attribution", - "services_requested", - "platform", - "referral_partner", - "referral_commission_amount", - "tech_partners_involved", - "sales_tier", - "commission_status", - "createdate", - "lastmodifieddate", - ] - - @staticmethod - def _cast_deal_columns(deals_df: pd.DataFrame) -> pd.DataFrame: - numeric_columns = ["amount"] - datetime_columns = ["closedate", "createdate", "lastmodifieddate"] - for column in numeric_columns: - if column in deals_df.columns: - deals_df[column] = pd.to_numeric(deals_df[column], errors="coerce") - for column in datetime_columns: - if column in deals_df.columns: - deals_df[column] = pd.to_datetime(deals_df[column], errors="coerce") - return deals_df - - def _needs_owner_details(self, columns: List[str]) -> bool: - return bool(self.OWNER_COLUMNS.intersection(columns)) - - def _needs_stage_details(self, columns: List[str]) -> bool: - return bool(self.STAGE_COLUMNS.intersection(columns)) - - def _add_virtual_dependencies(self, columns: List[str]) -> List[str]: - normalized = list(dict.fromkeys(columns)) - if self._needs_owner_details(normalized) and "hubspot_owner_id" not in normalized: - normalized.append("hubspot_owner_id") - if self._needs_stage_details(normalized): - if "dealstage" not in normalized: - normalized.append("dealstage") - if "pipeline" not in normalized: - normalized.append("pipeline") - return normalized - - def _strip_virtual_columns(self, columns: List[str]) -> List[str]: - return [col for col in columns if col not in self.VIRTUAL_COLUMNS] - - def _get_fetch_columns( - self, - targets: List[str], - normalized_conditions: List[List[Any]], - order_by: List[SortColumn], - use_search: bool, - ) -> List[str]: - base_columns = super()._get_fetch_columns(targets, normalized_conditions, order_by, use_search) - - if targets: - if "dealstage" in targets and "dealstage_label" not in base_columns: - base_columns.append("dealstage_label") - if "pipeline" in targets and "pipeline_label" not in base_columns: - base_columns.append("pipeline_label") - if "hubspot_owner_id" in targets and "owner_name" not in base_columns: - base_columns.append("owner_name") - - return list(dict.fromkeys(base_columns)) - - def _apply_column_selection(self, df: pd.DataFrame, targets: List[str]) -> pd.DataFrame: - if df.empty or not targets: - return df - - df = df.copy() - - # Try to use the enriched labels/names for better readability - # TODO: check for better way to handle this without modifying original columns - if "dealstage" in targets and "dealstage_label" in df.columns and "dealstage_label" not in targets: - df["dealstage"] = df["dealstage_label"].combine_first(df["dealstage"]) - - if "pipeline" in targets and "pipeline_label" in df.columns and "pipeline_label" not in targets: - df["pipeline"] = df["pipeline_label"].combine_first(df["pipeline"]) - - if "hubspot_owner_id" in targets and "owner_name" in df.columns and "owner_name" not in targets: - df["hubspot_owner_id"] = df["owner_name"].combine_first(df["hubspot_owner_id"]) - - return super()._apply_column_selection(df, targets) - - def _enrich_deal_rows(self, rows: List[Dict[str, Any]], columns: List[str]) -> None: - if not rows: - return - - needs_owner = self._needs_owner_details(columns) - needs_stage = self._needs_stage_details(columns) - if not needs_owner and not needs_stage: - return - - owner_map = _get_owner_map(self.handler) if needs_owner else None - stage_pair_map: Dict[Tuple[str, str], Dict[str, Any]] = {} - stage_map: Dict[str, Dict[str, Any]] = {} - if needs_stage: - stage_pair_map, stage_map = _get_deal_stage_maps(self.handler) - - for row in rows: - if needs_owner: - owner_id = _as_str(row.get("hubspot_owner_id")) - owner = owner_map.get(owner_id) if owner_id else None - row["owner_name"] = owner.get("full_name") if owner else None - row["owner_email"] = owner.get("email") if owner else None - - if needs_stage: - pipeline_id = _as_str(row.get("pipeline")) - stage_id = _as_str(row.get("dealstage")) - stage_info = None - if pipeline_id and stage_id: - stage_info = stage_pair_map.get((pipeline_id, stage_id)) - if stage_info is None and stage_id: - stage_info = stage_map.get(stage_id) - row["pipeline_label"] = stage_info.get("pipeline_label") if stage_info else None - row["dealstage_label"] = stage_info.get("stage_label") if stage_info else None - - def get_deals( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - requested_properties = properties or [] - default_properties = self._get_default_deal_columns() - columns = self._add_virtual_dependencies(requested_properties or default_properties) - needs_owner = self._needs_owner_details(columns) - needs_stage = self._needs_stage_details(columns) - association_targets, hubspot_columns = _prepare_association_request("deals", columns) - hubspot_columns = self._strip_virtual_columns(hubspot_columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot deals using foreign key columns " - "(e.g. primary_company_id, primary_contact_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table, e.g.:\n" - "FROM companies co\n" - "JOIN company_deals cd ON cd.company_id = co.id\n" - "JOIN deals d ON d.id = cd.deal_id" - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for deals.") - search_results = self._search_deals_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - if needs_owner or needs_stage: - self._enrich_deal_rows(search_results, columns) - logger.info(f"Retrieved {len(search_results)} deals from HubSpot via search API") - return search_results - - deals = hubspot.crm.deals.get_all(**api_kwargs) - deals_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - logger.debug(f"[DealsTable] get_deals() — full scan capped at {effective_limit}") - for deal in deals: - try: - row = self._deal_to_dict(deal, hubspot_columns, association_targets) - deals_dict.append(row) - if len(deals_dict) >= effective_limit: - logger.debug(f"[DealsTable] get_deals() — reached effective_limit={effective_limit}, stopping scan") - break - except Exception as e: - logger.error(f"Error processing deal {getattr(deal, 'id', 'unknown')}: {str(e)}") - raise ValueError(f"Failed to process deal {getattr(deal, 'id', 'unknown')}.") from e - - if needs_owner or needs_stage: - self._enrich_deal_rows(deals_dict, columns) - logger.info(f"Retrieved {len(deals_dict)} deals from HubSpot") - return deals_dict - - def _search_deals_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - hubspot_columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.deals.search_api, - filters or [], - properties, - limit, - lambda obj: self._deal_to_dict(obj, hubspot_columns, association_targets), - sorts=sorts, - ) - - def _deal_to_dict( - self, - deal: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_deal_columns() - row = self._object_to_dict(deal, columns) - if association_targets: - row = enrich_object_with_associations(deal, "deals", row) - return row - - def create_deals(self, deals_data: List[Dict[str, Any]]) -> None: - if not deals_data: - raise ValueError("No deal data provided for creation") - - logger.info(f"Attempting to create {len(deals_data)} deal(s)") - hubspot = self.handler.connect() - deals_to_create = [HubSpotObjectInputCreate(properties=deal) for deal in deals_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=deals_to_create) - - try: - created_deals = hubspot.crm.deals.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_deals or not hasattr(created_deals, "results") or not created_deals.results: - raise Exception("Deal creation returned no results") - created_ids = [d.id for d in created_deals.results] - logger.info(f"Successfully created {len(created_ids)} deal(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Deals creation failed: {str(e)}") - raise Exception(f"Deals creation failed {e}") - - def update_deals(self, deal_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - deals_to_update = [HubSpotObjectBatchInput(id=did, properties=values_to_update) for did in deal_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=deals_to_update) - try: - updated = hubspot.crm.deals.batch_api.update(batch_input_simple_public_object_batch_input=batch_input) - logger.info(f"Deals with ID {[d.id for d in updated.results]} updated") - except Exception as e: - raise Exception(f"Deals update failed {e}") - - def delete_deals(self, deal_ids: List[str]) -> None: - hubspot = self.handler.connect() - deals_to_delete = [HubSpotObjectId(id=did) for did in deal_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=deals_to_delete) - try: - hubspot.crm.deals.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Deals deleted") - except Exception as e: - raise Exception(f"Deals deletion failed {e}") - - -class TicketsTable(HubSpotAPIResource): - """HubSpot Tickets table for support ticket management.""" - - SEARCHABLE_COLUMNS = {"subject", "hs_pipeline", "hs_pipeline_stage", "hs_ticket_priority", "id"} - ASSOCIATION_COLUMNS = {"primary_company_id", "primary_contact_id", "primary_deal_id"} - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("tickets") - except Exception: - pass - - return { - "TABLE_NAME": "tickets", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot tickets data including subject, status, priority and pipeline information", - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("tickets") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - tickets_df = pd.json_normalize( - self.get_tickets( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if tickets_df.empty: - tickets_df = pd.DataFrame(columns=targets or self._get_default_ticket_columns()) - return tickets_df - - def add(self, ticket_data: List[dict]): - self.create_tickets(ticket_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - tickets_df = pd.json_normalize(self.get_tickets(limit=200, where_conditions=normalized_conditions)) - - if tickets_df.empty: - raise ValueError("No tickets retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(tickets_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No tickets found matching WHERE conditions: {conditions}.") - - ticket_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(ticket_ids)} ticket(s) matching WHERE conditions") - self.update_tickets(ticket_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - tickets_df = pd.json_normalize(self.get_tickets(limit=200, where_conditions=normalized_conditions)) - - if tickets_df.empty: - raise ValueError("No tickets retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(tickets_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No tickets found matching WHERE conditions: {conditions}.") - - ticket_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(ticket_ids)} ticket(s) matching WHERE conditions") - self.delete_tickets(ticket_ids) - - def get_columns(self) -> List[str]: - return self._get_default_ticket_columns() - - @staticmethod - def _get_default_ticket_columns() -> List[str]: - return [ - "id", - "subject", - "content", - "hs_pipeline", - "hs_pipeline_stage", - "hs_ticket_priority", - "hs_ticket_category", - "hubspot_owner_id", - "createdate", - "lastmodifieddate", - "primary_company_id", - "primary_contact_id", - "primary_deal_id", - ] - - def get_tickets( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_ticket_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("tickets", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot tickets using foreign key columns " - "(e.g. primary_company_id, primary_contact_id, primary_deal_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table, e.g.:\n" - "FROM companies co\n" - "JOIN company_tickets ct ON ct.company_id = co.id\n" - "JOIN tickets t ON t.id = ct.ticket_id" - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for tickets.") - search_results = self._search_tickets_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} tickets from HubSpot via search API") - return search_results - - tickets = hubspot.crm.tickets.get_all(**api_kwargs) - tickets_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for ticket in tickets: - try: - row = self._ticket_to_dict(ticket, hubspot_columns, association_targets) - tickets_dict.append(row) - if len(tickets_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(tickets_dict)} tickets from HubSpot") - return tickets_dict - - def _search_tickets_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.tickets.search_api, - filters or [], - properties, - limit, - lambda obj: self._ticket_to_dict(obj, columns, association_targets), - sorts=sorts, - ) - - def _ticket_to_dict( - self, - ticket: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_ticket_columns() - row = self._object_to_dict(ticket, columns) - if association_targets: - row = enrich_object_with_associations(ticket, "tickets", row) - return row - - def create_tickets(self, tickets_data: List[Dict[str, Any]]) -> None: - if not tickets_data: - raise ValueError("No ticket data provided for creation") - - logger.info(f"Attempting to create {len(tickets_data)} ticket(s)") - hubspot = self.handler.connect() - tickets_to_create = [HubSpotObjectInputCreate(properties=ticket) for ticket in tickets_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=tickets_to_create) - - try: - created_tickets = hubspot.crm.tickets.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_tickets or not hasattr(created_tickets, "results") or not created_tickets.results: - raise Exception("Ticket creation returned no results") - created_ids = [t.id for t in created_tickets.results] - logger.info(f"Successfully created {len(created_ids)} ticket(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Tickets creation failed: {str(e)}") - raise Exception(f"Tickets creation failed {e}") - - def update_tickets(self, ticket_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - tickets_to_update = [HubSpotObjectBatchInput(id=tid, properties=values_to_update) for tid in ticket_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=tickets_to_update) - try: - updated = hubspot.crm.tickets.batch_api.update(batch_input_simple_public_object_batch_input=batch_input) - logger.info(f"Tickets with ID {[t.id for t in updated.results]} updated") - except Exception as e: - raise Exception(f"Tickets update failed {e}") - - def delete_tickets(self, ticket_ids: List[str]) -> None: - hubspot = self.handler.connect() - tickets_to_delete = [HubSpotObjectId(id=tid) for tid in ticket_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=tickets_to_delete) - try: - hubspot.crm.tickets.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Tickets deleted") - except Exception as e: - raise Exception(f"Tickets deletion failed {e}") - - -class TasksTable(HubSpotAPIResource): - """HubSpot Tasks table for task management and follow-ups.""" - - SEARCHABLE_COLUMNS = {"hs_task_subject", "hs_task_status", "hs_task_priority", "hs_task_type", "id"} - ASSOCIATION_COLUMNS = {"primary_company_id", "primary_contact_id", "primary_deal_id"} - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("tasks") - except Exception: - pass - - return { - "TABLE_NAME": "tasks", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot tasks data including subject, status, priority and due dates", - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("tasks") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - tasks_df = pd.json_normalize( - self.get_tasks( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if tasks_df.empty: - tasks_df = pd.DataFrame(columns=targets or self._get_default_task_columns()) - return tasks_df - - def add(self, task_data: List[dict]): - self.create_tasks(task_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - tasks_df = pd.json_normalize(self.get_tasks(limit=200, where_conditions=normalized_conditions)) - - if tasks_df.empty: - raise ValueError("No tasks retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(tasks_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No tasks found matching WHERE conditions: {conditions}.") - - task_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(task_ids)} task(s) matching WHERE conditions") - self.update_tasks(task_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - tasks_df = pd.json_normalize(self.get_tasks(limit=200, where_conditions=normalized_conditions)) - - if tasks_df.empty: - raise ValueError("No tasks retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(tasks_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No tasks found matching WHERE conditions: {conditions}.") - - task_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(task_ids)} task(s) matching WHERE conditions") - self.delete_tasks(task_ids) - - def get_columns(self) -> List[str]: - return self._get_default_task_columns() - - @staticmethod - def _get_default_task_columns() -> List[str]: - return [ - "id", - "hs_task_subject", - "hs_task_body", - "hs_task_status", - "hs_task_priority", - "hs_task_type", - "hs_timestamp", - "hubspot_owner_id", - "createdate", - "lastmodifieddate", - "primary_company_id", - "primary_contact_id", - "primary_deal_id", - ] - - def get_tasks( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_task_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("tasks", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot tasks using foreign key columns " - "(e.g. primary_contact_id, primary_company_id, primary_deal_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table." - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - # Tasks use the objects API - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for tasks.") - search_results = self._search_tasks_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} tasks from HubSpot via search API") - return search_results - - tasks = self.handler._get_objects_all("tasks", **api_kwargs) - tasks_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for task in tasks: - try: - row = self._task_to_dict(task, hubspot_columns, association_targets) - tasks_dict.append(row) - if len(tasks_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(tasks_dict)} tasks from HubSpot") - return tasks_dict - - def _search_tasks_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.objects.search_api, - filters or [], - properties, - limit, - lambda obj: self._task_to_dict(obj, columns, association_targets), - sorts=sorts, - object_type="tasks", - ) - - def _task_to_dict( - self, - task: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_task_columns() - row = self._object_to_dict(task, columns) - if association_targets: - row = enrich_object_with_associations(task, "tasks", row) - return row - - def create_tasks(self, tasks_data: List[Dict[str, Any]]) -> None: - if not tasks_data: - raise ValueError("No task data provided for creation") - - logger.info(f"Attempting to create {len(tasks_data)} task(s)") - hubspot = self.handler.connect() - tasks_to_create = [HubSpotObjectInputCreate(properties=task) for task in tasks_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=tasks_to_create) - - try: - created_tasks = hubspot.crm.objects.tasks.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_tasks or not hasattr(created_tasks, "results") or not created_tasks.results: - raise Exception("Task creation returned no results") - created_ids = [t.id for t in created_tasks.results] - logger.info(f"Successfully created {len(created_ids)} task(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Tasks creation failed: {str(e)}") - raise Exception(f"Tasks creation failed {e}") - - def update_tasks(self, task_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - tasks_to_update = [HubSpotObjectBatchInput(id=tid, properties=values_to_update) for tid in task_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=tasks_to_update) - try: - updated = hubspot.crm.objects.tasks.batch_api.update( - batch_input_simple_public_object_batch_input=batch_input - ) - logger.info(f"Tasks with ID {[t.id for t in updated.results]} updated") - except Exception as e: - raise Exception(f"Tasks update failed {e}") - - def delete_tasks(self, task_ids: List[str]) -> None: - hubspot = self.handler.connect() - tasks_to_delete = [HubSpotObjectId(id=tid) for tid in task_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=tasks_to_delete) - try: - hubspot.crm.objects.tasks.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Tasks deleted") - except Exception as e: - raise Exception(f"Tasks deletion failed {e}") - - -class CallsTable(HubSpotAPIResource): - """HubSpot Calls table for phone/video call logs.""" - - SEARCHABLE_COLUMNS = {"hs_call_title", "hs_call_direction", "hs_call_disposition", "hs_call_status", "id"} - ASSOCIATION_COLUMNS = {"primary_company_id", "primary_contact_id", "primary_deal_id"} - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("calls") - except Exception: - pass - - return { - "TABLE_NAME": "calls", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot call logs including direction, duration, outcome and notes", - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("calls") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - calls_df = pd.json_normalize( - self.get_calls( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if calls_df.empty: - calls_df = pd.DataFrame(columns=targets or self._get_default_call_columns()) - return calls_df - - def add(self, call_data: List[dict]): - self.create_calls(call_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - calls_df = pd.json_normalize(self.get_calls(limit=200, where_conditions=normalized_conditions)) - - if calls_df.empty: - raise ValueError("No calls retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(calls_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No calls found matching WHERE conditions: {conditions}.") - - call_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(call_ids)} call(s) matching WHERE conditions") - self.update_calls(call_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - calls_df = pd.json_normalize(self.get_calls(limit=200, where_conditions=normalized_conditions)) - - if calls_df.empty: - raise ValueError("No calls retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(calls_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No calls found matching WHERE conditions: {conditions}.") - - call_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(call_ids)} call(s) matching WHERE conditions") - self.delete_calls(call_ids) - - def get_columns(self) -> List[str]: - return self._get_default_call_columns() - - @staticmethod - def _get_default_call_columns() -> List[str]: - return [ - "id", - "hs_call_title", - "hs_call_body", - "hs_call_direction", - "hs_call_disposition", - "hs_call_duration", - "hs_call_status", - "hubspot_owner_id", - "hs_timestamp", - "createdate", - "lastmodifieddate", - "primary_company_id", - "primary_contact_id", - "primary_deal_id", - ] - - def get_calls( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_call_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("calls", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot calls using foreign key columns " - "(e.g. primary_contact_id, primary_company_id, primary_deal_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table." - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for calls.") - search_results = self._search_calls_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} calls from HubSpot via search API") - return search_results - - calls = self.handler._get_objects_all("calls", **api_kwargs) - calls_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for call in calls: - try: - row = self._call_to_dict(call, hubspot_columns, association_targets) - calls_dict.append(row) - if len(calls_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(calls_dict)} calls from HubSpot") - return calls_dict - - def _search_calls_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.objects.search_api, - filters or [], - properties, - limit, - lambda obj: self._call_to_dict(obj, columns, association_targets), - sorts=sorts, - object_type="calls", - ) - - def _call_to_dict( - self, - call: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_call_columns() - row = self._object_to_dict(call, columns) - if association_targets: - row = enrich_object_with_associations(call, "calls", row) - return row - - def create_calls(self, calls_data: List[Dict[str, Any]]) -> None: - if not calls_data: - raise ValueError("No call data provided for creation") - - logger.info(f"Attempting to create {len(calls_data)} call(s)") - hubspot = self.handler.connect() - calls_to_create = [HubSpotObjectInputCreate(properties=call) for call in calls_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=calls_to_create) - - try: - created_calls = hubspot.crm.objects.calls.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_calls or not hasattr(created_calls, "results") or not created_calls.results: - raise Exception("Call creation returned no results") - created_ids = [c.id for c in created_calls.results] - logger.info(f"Successfully created {len(created_ids)} call(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Calls creation failed: {str(e)}") - raise Exception(f"Calls creation failed {e}") - - def update_calls(self, call_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - calls_to_update = [HubSpotObjectBatchInput(id=cid, properties=values_to_update) for cid in call_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=calls_to_update) - try: - updated = hubspot.crm.objects.calls.batch_api.update( - batch_input_simple_public_object_batch_input=batch_input - ) - logger.info(f"Calls with ID {[c.id for c in updated.results]} updated") - except Exception as e: - raise Exception(f"Calls update failed {e}") - - def delete_calls(self, call_ids: List[str]) -> None: - hubspot = self.handler.connect() - calls_to_delete = [HubSpotObjectId(id=cid) for cid in call_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=calls_to_delete) - try: - hubspot.crm.objects.calls.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Calls deleted") - except Exception as e: - raise Exception(f"Calls deletion failed {e}") - - -class EmailsTable(HubSpotAPIResource): - """HubSpot Emails table for email engagement logs.""" - - SEARCHABLE_COLUMNS = {"hs_email_subject", "hs_email_direction", "hs_email_status", "id"} - ASSOCIATION_COLUMNS = {"primary_company_id", "primary_contact_id", "primary_deal_id"} - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("emails") - except Exception: - pass - - return { - "TABLE_NAME": "emails", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot email logs including subject, direction, status and content", - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("emails") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - emails_df = pd.json_normalize( - self.get_emails( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if emails_df.empty: - emails_df = pd.DataFrame(columns=targets or self._get_default_email_columns()) - return emails_df - - def add(self, email_data: List[dict]): - self.create_emails(email_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - emails_df = pd.json_normalize(self.get_emails(limit=200, where_conditions=normalized_conditions)) - - if emails_df.empty: - raise ValueError("No emails retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(emails_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No emails found matching WHERE conditions: {conditions}.") - - email_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(email_ids)} email(s) matching WHERE conditions") - self.update_emails(email_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - emails_df = pd.json_normalize(self.get_emails(limit=200, where_conditions=normalized_conditions)) - - if emails_df.empty: - raise ValueError("No emails retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(emails_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No emails found matching WHERE conditions: {conditions}.") - - email_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(email_ids)} email(s) matching WHERE conditions") - self.delete_emails(email_ids) - - def get_columns(self) -> List[str]: - return self._get_default_email_columns() - - @staticmethod - def _get_default_email_columns() -> List[str]: - return [ - "id", - "hs_email_subject", - "hs_email_text", - "hs_email_direction", - "hs_email_status", - "hs_email_sender_email", - "hs_email_to_email", - "hubspot_owner_id", - "hs_timestamp", - "createdate", - "lastmodifieddate", - "primary_company_id", - "primary_contact_id", - "primary_deal_id", - ] - - def get_emails( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_email_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("emails", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot emails using foreign key columns " - "(e.g. primary_contact_id, primary_company_id, primary_deal_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table." - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for emails.") - search_results = self._search_emails_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} emails from HubSpot via search API") - return search_results - - emails = self.handler._get_objects_all("emails", **api_kwargs) - emails_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for email in emails: - try: - row = self._email_to_dict(email, hubspot_columns, association_targets) - emails_dict.append(row) - if len(emails_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(emails_dict)} emails from HubSpot") - return emails_dict - - def _search_emails_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.objects.search_api, - filters or [], - properties, - limit, - lambda obj: self._email_to_dict(obj, columns, association_targets), - sorts=sorts, - object_type="emails", - ) - - def _email_to_dict( - self, - email: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_email_columns() - row = self._object_to_dict(email, columns) - if association_targets: - row = enrich_object_with_associations(email, "emails", row) - return row - - def create_emails(self, emails_data: List[Dict[str, Any]]) -> None: - if not emails_data: - raise ValueError("No email data provided for creation") - - logger.info(f"Attempting to create {len(emails_data)} email(s)") - hubspot = self.handler.connect() - emails_to_create = [HubSpotObjectInputCreate(properties=email) for email in emails_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=emails_to_create) - - try: - created_emails = hubspot.crm.objects.emails.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_emails or not hasattr(created_emails, "results") or not created_emails.results: - raise Exception("Email creation returned no results") - created_ids = [e.id for e in created_emails.results] - logger.info(f"Successfully created {len(created_ids)} email(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Emails creation failed: {str(e)}") - raise Exception(f"Emails creation failed {e}") - - def update_emails(self, email_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - emails_to_update = [HubSpotObjectBatchInput(id=eid, properties=values_to_update) for eid in email_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=emails_to_update) - try: - updated = hubspot.crm.objects.emails.batch_api.update( - batch_input_simple_public_object_batch_input=batch_input - ) - logger.info(f"Emails with ID {[e.id for e in updated.results]} updated") - except Exception as e: - raise Exception(f"Emails update failed {e}") - - def delete_emails(self, email_ids: List[str]) -> None: - hubspot = self.handler.connect() - emails_to_delete = [HubSpotObjectId(id=eid) for eid in email_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=emails_to_delete) - try: - hubspot.crm.objects.emails.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Emails deleted") - except Exception as e: - raise Exception(f"Emails deletion failed {e}") - - -class MeetingsTable(HubSpotAPIResource): - """HubSpot Meetings table for meeting logs and scheduled meetings.""" - - SEARCHABLE_COLUMNS = {"hs_meeting_title", "hs_meeting_outcome", "id"} - ASSOCIATION_COLUMNS = {"primary_company_id", "primary_contact_id", "primary_deal_id"} - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("meetings") - except Exception: - pass - - return { - "TABLE_NAME": "meetings", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot meeting logs including title, location, outcome and timing", - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("meetings") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - meetings_df = pd.json_normalize( - self.get_meetings( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if meetings_df.empty: - meetings_df = pd.DataFrame(columns=targets or self._get_default_meeting_columns()) - return meetings_df - - def add(self, meeting_data: List[dict]): - self.create_meetings(meeting_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - meetings_df = pd.json_normalize(self.get_meetings(limit=200, where_conditions=normalized_conditions)) - - if meetings_df.empty: - raise ValueError("No meetings retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(meetings_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No meetings found matching WHERE conditions: {conditions}.") - - meeting_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(meeting_ids)} meeting(s) matching WHERE conditions") - self.update_meetings(meeting_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - meetings_df = pd.json_normalize(self.get_meetings(limit=200, where_conditions=normalized_conditions)) - - if meetings_df.empty: - raise ValueError("No meetings retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(meetings_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No meetings found matching WHERE conditions: {conditions}.") - - meeting_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(meeting_ids)} meeting(s) matching WHERE conditions") - self.delete_meetings(meeting_ids) - - def get_columns(self) -> List[str]: - return self._get_default_meeting_columns() - - @staticmethod - def _get_default_meeting_columns() -> List[str]: - return [ - "id", - "hs_meeting_title", - "hs_meeting_body", - "hs_meeting_location", - "hs_meeting_outcome", - "hs_meeting_start_time", - "hs_meeting_end_time", - "hubspot_owner_id", - "hs_timestamp", - "createdate", - "lastmodifieddate", - "primary_company_id", - "primary_contact_id", - "primary_deal_id", - ] - - def get_meetings( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_meeting_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("meetings", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot meetings using foreign key columns " - "(e.g. primary_contact_id, primary_company_id, primary_deal_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table." - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for meetings.") - search_results = self._search_meetings_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} meetings from HubSpot via search API") - return search_results - - meetings = self.handler._get_objects_all("meetings", **api_kwargs) - meetings_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for meeting in meetings: - try: - row = self._meeting_to_dict(meeting, hubspot_columns, association_targets) - meetings_dict.append(row) - if len(meetings_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(meetings_dict)} meetings from HubSpot") - return meetings_dict - - def _search_meetings_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.objects.search_api, - filters or [], - properties, - limit, - lambda obj: self._meeting_to_dict(obj, columns, association_targets), - sorts=sorts, - object_type="meetings", - ) - - def _meeting_to_dict( - self, - meeting: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_meeting_columns() - row = self._object_to_dict(meeting, columns) - if association_targets: - row = enrich_object_with_associations(meeting, "meetings", row) - return row - - def create_meetings(self, meetings_data: List[Dict[str, Any]]) -> None: - if not meetings_data: - raise ValueError("No meeting data provided for creation") - - logger.info(f"Attempting to create {len(meetings_data)} meeting(s)") - hubspot = self.handler.connect() - meetings_to_create = [HubSpotObjectInputCreate(properties=meeting) for meeting in meetings_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=meetings_to_create) - - try: - created_meetings = hubspot.crm.objects.meetings.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_meetings or not hasattr(created_meetings, "results") or not created_meetings.results: - raise Exception("Meeting creation returned no results") - created_ids = [m.id for m in created_meetings.results] - logger.info(f"Successfully created {len(created_ids)} meeting(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Meetings creation failed: {str(e)}") - raise Exception(f"Meetings creation failed {e}") - - def update_meetings(self, meeting_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - meetings_to_update = [HubSpotObjectBatchInput(id=mid, properties=values_to_update) for mid in meeting_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=meetings_to_update) - try: - updated = hubspot.crm.objects.meetings.batch_api.update( - batch_input_simple_public_object_batch_input=batch_input - ) - logger.info(f"Meetings with ID {[m.id for m in updated.results]} updated") - except Exception as e: - raise Exception(f"Meetings update failed {e}") - - def delete_meetings(self, meeting_ids: List[str]) -> None: - hubspot = self.handler.connect() - meetings_to_delete = [HubSpotObjectId(id=mid) for mid in meeting_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=meetings_to_delete) - try: - hubspot.crm.objects.meetings.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Meetings deleted") - except Exception as e: - raise Exception(f"Meetings deletion failed {e}") - - -class NotesTable(HubSpotAPIResource): - """HubSpot Notes table for timeline notes on records.""" - - SEARCHABLE_COLUMNS = {"id"} - ASSOCIATION_COLUMNS = {"primary_company_id", "primary_contact_id", "primary_deal_id"} - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("notes") - except Exception: - pass - - return { - "TABLE_NAME": "notes", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot notes for timeline entries on records", - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("notes") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - notes_df = pd.json_normalize( - self.get_notes( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if notes_df.empty: - notes_df = pd.DataFrame(columns=targets or self._get_default_note_columns()) - return notes_df - - def add(self, note_data: List[dict]): - self.create_notes(note_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - notes_df = pd.json_normalize(self.get_notes(limit=200, where_conditions=normalized_conditions)) - - if notes_df.empty: - raise ValueError("No notes retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(notes_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No notes found matching WHERE conditions: {conditions}.") - - note_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(note_ids)} note(s) matching WHERE conditions") - self.update_notes(note_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - notes_df = pd.json_normalize(self.get_notes(limit=200, where_conditions=normalized_conditions)) - - if notes_df.empty: - raise ValueError("No notes retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(notes_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No notes found matching WHERE conditions: {conditions}.") - - note_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(note_ids)} note(s) matching WHERE conditions") - self.delete_notes(note_ids) - - def get_columns(self) -> List[str]: - return self._get_default_note_columns() - - @staticmethod - def _get_default_note_columns() -> List[str]: - return [ - "id", - "hs_note_body", - "hubspot_owner_id", - "hs_timestamp", - "createdate", - "lastmodifieddate", - "primary_company_id", - "primary_contact_id", - "primary_deal_id", - ] - - def get_notes( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_note_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("notes", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot notes using foreign key columns " - "(e.g. primary_contact_id, primary_company_id, primary_deal_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table." - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for notes.") - search_results = self._search_notes_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} notes from HubSpot via search API") - return search_results - - notes = self.handler._get_objects_all("notes", **api_kwargs) - notes_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for note in notes: - try: - row = self._note_to_dict(note, hubspot_columns, association_targets) - notes_dict.append(row) - if len(notes_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(notes_dict)} notes from HubSpot") - return notes_dict - - def _search_notes_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.objects.search_api, - filters or [], - properties, - limit, - lambda obj: self._note_to_dict(obj, columns, association_targets), - sorts=sorts, - object_type="notes", - ) - - def _note_to_dict( - self, - note: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_note_columns() - row = self._object_to_dict(note, columns) - if association_targets: - row = enrich_object_with_associations(note, "notes", row) - return row - - def create_notes(self, notes_data: List[Dict[str, Any]]) -> None: - if not notes_data: - raise ValueError("No note data provided for creation") - - logger.info(f"Attempting to create {len(notes_data)} note(s)") - hubspot = self.handler.connect() - notes_to_create = [HubSpotObjectInputCreate(properties=note) for note in notes_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=notes_to_create) - - try: - created_notes = hubspot.crm.objects.notes.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_notes or not hasattr(created_notes, "results") or not created_notes.results: - raise Exception("Note creation returned no results") - created_ids = [n.id for n in created_notes.results] - logger.info(f"Successfully created {len(created_ids)} note(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Notes creation failed: {str(e)}") - raise Exception(f"Notes creation failed {e}") - - def update_notes(self, note_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - notes_to_update = [HubSpotObjectBatchInput(id=nid, properties=values_to_update) for nid in note_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=notes_to_update) - try: - updated = hubspot.crm.objects.notes.batch_api.update( - batch_input_simple_public_object_batch_input=batch_input - ) - logger.info(f"Notes with ID {[n.id for n in updated.results]} updated") - except Exception as e: - raise Exception(f"Notes update failed {e}") - - def delete_notes(self, note_ids: List[str]) -> None: - hubspot = self.handler.connect() - notes_to_delete = [HubSpotObjectId(id=nid) for nid in note_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=notes_to_delete) - try: - hubspot.crm.objects.notes.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Notes deleted") - except Exception as e: - raise Exception(f"Notes deletion failed {e}") - - -class LeadsTable(HubSpotAPIResource): - """HubSpot Leads table for prospective customer records.""" - - # Reference: https://developers.hubspot.com/docs/api-reference/crm-leads-v3/guide - SEARCHABLE_COLUMNS: Set[str] = {"hs_lead_name", "hs_lead_type", "hs_lead_label", "id"} - ASSOCIATION_COLUMNS = {"primary_contact_id", "primary_company_id"} - - def meta_get_tables(self, table_name: str) -> Dict[str, Any]: - row_count = None - try: - self.handler.connect() - row_count = self.handler._estimate_table_rows("leads") - except Exception: - pass - return { - "TABLE_NAME": "leads", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": "HubSpot leads representing prospective customer records", - "ROW_COUNT": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict[str, Any]]: - return self.handler._get_default_meta_columns("leads") - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - ) -> pd.DataFrame: - leads_df = pd.json_normalize( - self.get_leads( - limit=limit, - where_conditions=conditions, - properties=targets, - search_filters=search_filters, - search_sorts=search_sorts, - allow_search=allow_search, - ) - ) - if leads_df.empty: - leads_df = pd.DataFrame(columns=targets or self._get_default_lead_columns()) - return leads_df - - def add(self, lead_data: List[dict]): - self.create_leads(lead_data) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - leads_df = pd.json_normalize(self.get_leads(limit=200, where_conditions=normalized_conditions)) - - if leads_df.empty: - raise ValueError("No leads retrieved from HubSpot to evaluate update conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - update_query_executor = UPDATEQueryExecutor(leads_df, executor_conditions) - filtered_df = update_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No leads found matching WHERE conditions: {conditions}.") - - lead_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Updating {len(lead_ids)} lead(s) matching WHERE conditions") - self.update_leads(lead_ids, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - normalized_conditions = _normalize_filter_conditions(conditions) - leads_df = pd.json_normalize(self.get_leads(limit=200, where_conditions=normalized_conditions)) - - if leads_df.empty: - raise ValueError("No leads retrieved from HubSpot to evaluate delete conditions.") - - executor_conditions = _normalize_conditions_for_executor(normalized_conditions) - delete_query_executor = DELETEQueryExecutor(leads_df, executor_conditions) - filtered_df = delete_query_executor.execute_query() - - if filtered_df.empty: - raise ValueError(f"No leads found matching WHERE conditions: {conditions}.") - - lead_ids = filtered_df["id"].astype(str).tolist() - logger.info(f"Deleting {len(lead_ids)} lead(s) matching WHERE conditions") - self.delete_leads(lead_ids) - - def get_columns(self) -> List[str]: - return self._get_default_lead_columns() - - @staticmethod - def _get_default_lead_columns() -> List[str]: - return [ - "id", - "hs_lead_name", - "hs_lead_type", - "hs_lead_label", - "hubspot_owner_id", - "hs_timestamp", - "primary_contact_id", - "primary_company_id", - "createdate", - "lastmodifieddate", - ] - - def get_leads( - self, - limit: Optional[int] = None, - where_conditions: Optional[List] = None, - properties: Optional[List[str]] = None, - search_filters: Optional[List[Dict[str, Any]]] = None, - search_sorts: Optional[List[Dict[str, Any]]] = None, - allow_search: bool = True, - **kwargs, - ) -> List[Dict]: - normalized_conditions = _normalize_filter_conditions(where_conditions) - hubspot = self.handler.connect() - - requested_properties = properties or [] - default_properties = self._get_default_lead_columns() - columns = requested_properties or default_properties - association_targets, hubspot_columns = _prepare_association_request("leads", columns) - hubspot_properties = _build_hubspot_properties(hubspot_columns) - - MAX_ASSOCIATION_SCAN = 500 - if not normalized_conditions and limit is not None and limit > MAX_ASSOCIATION_SCAN: - msg = ( - "Direct FK joins on HubSpot leads using foreign key columns " - "(e.g. primary_contact_id, primary_company_id) are not supported. " - "The HubSpot API represents relationships through association tables.\n\n" - "Please rewrite your query using the appropriate association table." - ) - raise ValueError(msg) - - api_kwargs = {**kwargs, "properties": hubspot_properties} - if limit is not None: - api_kwargs["limit"] = limit - else: - api_kwargs.pop("limit", None) - if association_targets: - api_kwargs["associations"] = association_targets - - if allow_search and (search_filters or search_sorts or normalized_conditions): - filters = search_filters - if filters is None and normalized_conditions: - filters = _build_hubspot_search_filters(normalized_conditions, self.SEARCHABLE_COLUMNS) - if filters is not None or search_sorts is not None: - if association_targets: - logger.debug("HubSpot search API does not include associations for leads.") - search_results = self._search_leads_by_conditions( - hubspot, - filters, - hubspot_properties, - limit, - search_sorts, - hubspot_columns, - association_targets, - ) - logger.info(f"Retrieved {len(search_results)} leads from HubSpot via search API") - return search_results - - leads = self.handler._get_objects_all("leads", **api_kwargs) - leads_dict = [] - MAX_SCAN_ROWS = 10_000 - effective_limit = min(limit, MAX_SCAN_ROWS) if limit is not None else MAX_SCAN_ROWS - for lead in leads: - try: - row = self._lead_to_dict(lead, hubspot_columns, association_targets) - leads_dict.append(row) - if len(leads_dict) >= effective_limit: - break - except Exception: - continue - - logger.info(f"Retrieved {len(leads_dict)} leads from HubSpot") - return leads_dict - - def _search_leads_by_conditions( - self, - hubspot: HubSpot, - filters: Optional[List[Dict[str, Any]]], - properties: List[str], - limit: Optional[int], - sorts: Optional[List[Dict[str, Any]]], - columns: List[str], - association_targets: List[str], - ) -> List[Dict[str, Any]]: - return _execute_hubspot_search( - hubspot.crm.objects.search_api, - filters or [], - properties, - limit, - lambda obj: self._lead_to_dict(obj, columns, association_targets), - sorts=sorts, - object_type="leads", - ) - - def _lead_to_dict( - self, - lead: Any, - columns: Optional[List[str]] = None, - association_targets: Optional[List[str]] = None, - ) -> Dict[str, Any]: - columns = columns or self._get_default_lead_columns() - row = self._object_to_dict(lead, columns) - if association_targets: - row = enrich_object_with_associations(lead, "leads", row) - return row - - def create_leads(self, leads_data: List[Dict[str, Any]]) -> None: - if not leads_data: - raise ValueError("No lead data provided for creation") - - logger.info(f"Attempting to create {len(leads_data)} lead(s)") - hubspot = self.handler.connect() - leads_to_create = [HubSpotObjectInputCreate(properties=lead) for lead in leads_data] - batch_input = BatchInputSimplePublicObjectBatchInputForCreate(inputs=leads_to_create) - - try: - created_leads = hubspot.crm.objects.leads.batch_api.create( - batch_input_simple_public_object_batch_input_for_create=batch_input - ) - if not created_leads or not hasattr(created_leads, "results") or not created_leads.results: - raise Exception("Lead creation returned no results") - created_ids = [lead.id for lead in created_leads.results] - logger.info(f"Successfully created {len(created_ids)} lead(s) with IDs: {created_ids}") - except Exception as e: - logger.error(f"Leads creation failed: {str(e)}") - raise Exception(f"Leads creation failed {e}") - - def update_leads(self, lead_ids: List[str], values_to_update: Dict[str, Any]) -> None: - hubspot = self.handler.connect() - leads_to_update = [HubSpotObjectBatchInput(id=lid, properties=values_to_update) for lid in lead_ids] - batch_input = BatchInputSimplePublicObjectBatchInput(inputs=leads_to_update) - try: - updated = hubspot.crm.objects.leads.batch_api.update( - batch_input_simple_public_object_batch_input=batch_input - ) - logger.info(f"Leads with ID {[lead.id for lead in updated.results]} updated") - except Exception as e: - raise Exception(f"Leads update failed {e}") - - def delete_leads(self, lead_ids: List[str]) -> None: - hubspot = self.handler.connect() - leads_to_delete = [HubSpotObjectId(id=lid) for lid in lead_ids] - batch_input = BatchInputSimplePublicObjectId(inputs=leads_to_delete) - try: - hubspot.crm.objects.leads.batch_api.archive(batch_input_simple_public_object_id=batch_input) - logger.info("Leads deleted") - except Exception as e: - raise Exception(f"Leads deletion failed {e}") diff --git a/mindsdb/integrations/handlers/hubspot_handler/icon.svg b/mindsdb/integrations/handlers/hubspot_handler/icon.svg deleted file mode 100644 index c4f5aa2d226..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hubspot_handler/requirements.txt b/mindsdb/integrations/handlers/hubspot_handler/requirements.txt deleted file mode 100644 index be3f5a55e1d..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -hubspot-api-client==12.0.0 diff --git a/mindsdb/integrations/handlers/hubspot_handler/tests/test_hubspot_handler.py b/mindsdb/integrations/handlers/hubspot_handler/tests/test_hubspot_handler.py deleted file mode 100644 index d6f12d110e7..00000000000 --- a/mindsdb/integrations/handlers/hubspot_handler/tests/test_hubspot_handler.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -import unittest - -from mindsdb.integrations.handlers.hubspot_handler.hubspot_handler import HubspotHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -class HubSpotHandlerTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.kwargs = {"connection_data": {"access_token": os.environ.get("ACCESS_TOKEN")}} - cls.handler = HubspotHandler("test_hubspot_handler", **cls.kwargs) - - def test_0_check_connection(self): - assert self.handler.check_connection() - - def test_1_get_tables(self): - tables = self.handler.get_tables() - assert tables.type is not RESPONSE_TYPE.ERROR - - def test_2_select_companies_query(self): - query = "SELECT * FROM test_hubspot_handler.companies" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_3_select_contacts_query(self): - query = "SELECT * FROM test_hubspot_handler.contacts" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE - - def test_4_select_deals_query(self): - query = "SELECT * FROM test_hubspot_handler.deals" - result = self.handler.native_query(query) - assert result.type is RESPONSE_TYPE.TABLE diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/README.md b/mindsdb/integrations/handlers/huggingface_api_handler/README.md deleted file mode 100644 index c7ea1660339..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/README.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: Hugging Face Inference API -sidebarTitle: Hugging Face Inference API ---- - -This documentation describes the integration of MindsDB with [Hugging Face Inference API](https://huggingface.co/inference-api/serverless). -The integration allows for the deployment of Hugging Face models through Inference API within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Hugging Face Inference API within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain the API key for Hugging Face Inference API required to deploy and use Hugging Face models through Inference API within MindsDB. Generate tokens in the `Settings -> Access Tokens` tab of the Hugging Face account. - -## Setup - -Create an AI engine from the [Hugging Face Inference API handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/huggingface_api_handler). - -```sql -CREATE ML_ENGINE huggingface_api_engine -FROM huggingface_api -USING - huggingface_api_api_key = 'api-key-value'; -``` - -Create a model using `huggingface_api_engine` as an engine. - -```sql -CREATE MODEL huggingface_api_model -PREDICT target_column -USING - engine = 'huggingface_api_engine', -- engine name as created via CREATE ML_ENGINE - task = 'task_name', -- choose one of 'text-classification', 'text-generation', 'question-answering', 'sentence-similarity', 'zero-shot-classification', 'summarization', 'fill-mask', 'image-classification', 'object-detection', 'automatic-speech-recognition', 'audio-classification' - input_column = 'column_name', -- column that stores input/question to the model - labels = ['label 1', 'label 2']; -- labels used to classify data (used for classification tasks) -``` - -The following parameters are supported in the `USING` clause of the `CREATE MODEL` statement: - -| Parameter | Required | Description | -| ------------------ | -------------------------------------------- | --------------- | -| `engine` | Yes | It is the name of the ML engine created with the `CREATE ML_ENGINE` statement.| -| `task` | Only if `model_name` is not provided | It describes a task to be performed.| -| `model_name` | Only if `task` is not provided | It specifies a model to be used.| -| `input_column` | Yes | It is the name of the column that stores input to the model.| -| `endpoint` | No | It defines the endpoint to use for API calls. If not specified, the hosted Inference API from Hugging Face will be used.| -| `options` | No | It is a JSON object containing additional options to pass to the API call. More information about the available options for each task can be found [here](https://huggingface.co/docs/api-inference/detailed_parameters).| -| `parameters` | No | It is a JSON object containing additional parameters to pass to the API call. More information about the available parameters for each task can be found [here](https://huggingface.co/docs/api-inference/detailed_parameters).| -| `context_column` | Only if `task` is `question-answering` | It is used for the `question-answering` task to provide context to the question.| -| `input_column2` | Only if `task` is `sentence-similarity` | It is used for the `sentence-similarity` task to provide the second input sentence for comparison.| -| `candidate_labels` | Only if `task` is `zero-shot-classification` | It is used for the `zero-shot-classification` task to classify input data according to provided labels.| - -## Usage - -The following usage examples utilize `huggingface_api_engine` to create a model with the `CREATE MODEL` statement. - -Create a model to classify input text as spam or ham. - -```sql -CREATE MODEL spam_classifier -PREDICT is_spam -USING - engine = 'huggingface_api_engine', - task = 'text-classification', - column = 'text'; -``` - -Query the model to get predictions. - -```sql -SELECT text, is_spam -FROM spam_classifier -WHERE text = 'Subscribe to this channel asap'; -``` - -Here is the output: - -```sql -+--------------------------------+---------+ -| text | is_spam | -+--------------------------------+---------+ -| Subscribe to this channel asap | spam | -+--------------------------------+---------+ -``` - - - -Find more quick examples below: - - - - -```sql -CREATE MODEL mindsdb.hf_text_classifier -PREDICT sentiment -USING - task = 'text-classification', - engine = 'hf_api_engine', - input_column = 'text'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_fill_mask -PREDICT sequence -USING - task = 'fill-mask', - engine = 'hf_api_engine', - input_column = 'text'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_summarizer -PREDICT summary -USING - task = 'summarization', - engine = 'hf_api_engine', - input_column = 'text'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_text_generator -PREDICT generated_text -USING - task = 'text-generation', - engine = 'hf_api_engine', - input_column = 'text'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_question_answerer -PREDICT answer -USING - task = 'question-answering', - engine = 'hf_api_engine', - input_column = 'question', - context_column = 'context'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_sentence_similarity -PREDICT similarity -USING - task = 'sentence-similarity', - engine = 'hf_api_engine', - input_column = 'sentence1', - input_column2 = 'sentence2'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_zero_shot_classifier -PREDICT label -USING - task = 'zero-shot-classification', - engine = 'hf_api_engine', - input_column = 'text', - candidate_labels = ['label1', 'label2', 'label3']; -``` - - - -```sql -CREATE MODEL mindsdb.hf_image_classifier -PREDICT label -USING - task = 'image-classification', - engine = 'hf_api_engine', - input_column = 'image_url'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_object_detector -PREDICT objects -USING - task = 'object-detection', - engine = 'hf_api_engine', - input_column = 'image_url'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_speech_recognizer -PREDICT transcription -USING - task = 'automatic-speech-recognition', - engine = 'hf_api_engine', - input_column = 'audio_url'; -``` - - - -```sql -CREATE MODEL mindsdb.hf_audio_classifier -PREDICT label -USING - task = 'audio-classification', - engine = 'hf_api_engine', - input_column = 'audio_url'; -``` - - - - - - - - -**Next Steps** - -Follow [this link](https://docs.mindsdb.com/sql/tutorials/hugging-face-inference-api-examples) to see more use case examples. - diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/__about__.py b/mindsdb/integrations/handlers/huggingface_api_handler/__about__.py deleted file mode 100644 index d1e79e53e1b..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Hugging Face API handler' -__package_name__ = 'mindsdb_huggingface_api_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Hugging Face Inference API" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/__init__.py b/mindsdb/integrations/handlers/huggingface_api_handler/__init__.py deleted file mode 100644 index b31700d2478..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .huggingface_api_handler import HuggingFaceInferenceAPIHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'Hugging Face API' -name = 'huggingface_api' -type = HANDLER_TYPE.ML -icon_path = "icon.svg" -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py b/mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py deleted file mode 100644 index 33837ffea5c..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +++ /dev/null @@ -1,6 +0,0 @@ -class UnsupportedTaskException(Exception): - pass - - -class InsufficientParametersException(Exception): - pass diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py b/mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py deleted file mode 100644 index 4cb1a5c2a0f..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +++ /dev/null @@ -1,243 +0,0 @@ -import json -from typing import Optional, Dict -import pandas as pd -from huggingface_hub import HfApi -from huggingface_hub import hf_hub_download -from hugging_py_face import NLP, ComputerVision, AudioProcessing, get_in_df_supported_tasks - -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.integrations.utilities.handler_utils import get_api_key -from .exceptions import UnsupportedTaskException, InsufficientParametersException - - -class HuggingFaceInferenceAPIHandler(BaseMLEngine): - """ - Integration with the Hugging Face Inference API. - """ - - name = 'huggingface_api' - - @staticmethod - def create_validation(target, args=None, **kwargs): - args = args['using'] - - if 'input_column' not in args: - raise InsufficientParametersException('input_column has to be specified') - - if 'model_name' not in args: - # detect model by task - task = args.get('task') - if task is None: - raise InsufficientParametersException('model_name or task have to be specified') - - args['model_name'] = None - else: - # detect task by model - hf_api = HfApi() - metadata = hf_api.model_info(args['model_name']) - - if 'task' not in args: - args['task'] = metadata.pipeline_tag - - if args['task'] not in get_in_df_supported_tasks(): - raise UnsupportedTaskException(f'The task {args["task"]} is not supported by the Hugging Face Inference API engine.') - - if args['task'] == 'zero-shot-classification': - if 'candidate_labels' not in args: - raise Exception('"candidate_labels" is required for zero-shot-classification') - - if args['task'] == 'sentence-similarity': - if 'input_column2' not in args: - raise InsufficientParametersException('input_column2 has to be specified') - - def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - if 'using' not in args: - raise InsufficientParametersException("Hugging Face Inference engine requires a USING clause! Refer to its documentation for more details.") - - args = args['using'] - args['target'] = target - - if 'options' not in args: - args['options'] = {} - - if 'parameters' not in args: - args['parameters'] = {} - - if args['model_name'] is not None: - # config.json - config = {} - try: - config_path = hf_hub_download(args['model_name'], 'config.json') - config = json.load(open(config_path)) - except Exception: - pass - - if 'max_length' in args: - args['options']['max_length'] = args['max_length'] - elif 'max_position_embeddings' in config: - args['options']['max_length'] = config['max_position_embeddings'] - elif 'max_length' in config: - args['options']['max_length'] = config['max_length'] - - labels_default = config.get('id2label', {}) - labels_map = {} - if 'labels' in args: - for num, value in labels_default.items(): - if num.isdigit(): - num = int(num) - labels_map[value] = args['labels'][num] - args['labels_map'] = labels_map - if 'task_specific_params' in config: - args['task_specific_params'] = config['task_specific_params'] - - # for summarization - if 'min_output_length' in args: - args['options']['min_output_length'] = args['min_output_length'] - - if 'max_output_length' in args: - args['options']['max_output_length'] = args['max_output_length'] - - self.model_storage.json_set('args', args) - - def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - args = self.model_storage.json_get('args') - api_key = get_api_key('huggingface_api', args, self.engine_storage, strict=False) - - input_column = args['input_column'] - model_name = args['model_name'] - endpoint = args['endpoint'] if 'endpoint' in args else None - options = args['options'] if 'options' in args else None - parameters = args['parameters'] if 'parameters' in args else None - - if args['task'] == 'text-classification': - nlp = NLP(api_key, endpoint) - result_df = nlp.text_classification_in_df( - df, - input_column, - options, - model_name, - ) - labels_map = args.get('labels_map') - - result_df['predictions'] = result_df['predictions'].apply(lambda x: labels_map.get(x, x)) - - elif args['task'] == 'fill-mask': - nlp = NLP(api_key, endpoint) - result_df = nlp.fill_mask_in_df( - df, - input_column, - options, - model_name - ) - - elif args['task'] == 'summarization': - nlp = NLP(api_key, endpoint) - result_df = nlp.summarization_in_df( - df, - input_column, - parameters, - options, - model_name - ) - - elif args['task'] == 'text-generation': - nlp = NLP(api_key, endpoint) - result_df = nlp.text_generation_in_df( - df, - input_column, - parameters, - options, - model_name - ) - - elif args['task'] == 'question-answering': - nlp = NLP(api_key, endpoint) - result_df = nlp.question_answering_in_df( - df, - input_column, - args['context_column'], - model_name - ) - - elif args['task'] == 'sentence-similarity': - nlp = NLP(api_key, endpoint) - result_df = nlp.sentence_similarity_in_df( - df, - input_column, - args['input_column2'], - options, - model_name - ) - - elif args['task'] == 'zero-shot-classification': - nlp = NLP(api_key, endpoint) - result_df = nlp.zero_shot_classification_in_df( - df, - input_column, - args['candidate_labels'], - parameters, - options, - model_name - ) - - elif args['task'] == 'translation': - lang_in = args['lang_input'] - lang_out = args['lang_output'] - - input_origin = None - if 'task_specific_params' in args: - task = f"translation_{lang_in}_to_{lang_out}" - if task in args['task_specific_params'] and 'prefix' in args['task_specific_params'][task]: - # inject prefix to data - prefix = args['task_specific_params'][task]['prefix'] - input_origin = df[input_column] - df[input_column] = prefix + input_origin - # don't pick up model in hugging_py_face - lang_in = lang_out = None - - nlp = NLP(api_key, endpoint) - result_df = nlp.translation_in_df( - df, - input_column, - lang_in, - lang_out, - options, - model_name - ) - if input_origin is not None: - df[input_column] = input_origin - - elif args['task'] == 'image-classification': - cp = ComputerVision(api_key, endpoint) - result_df = cp.image_classification_in_df( - df, - input_column, - model_name - ) - - elif args['task'] == 'object-detection': - cp = ComputerVision(api_key, endpoint) - result_df = cp.object_detection_in_df( - df, - input_column, - model_name - ) - - elif args['task'] == 'automatic-speech-recognition': - ap = AudioProcessing(api_key, endpoint) - result_df = ap.automatic_speech_recognition_in_df( - df, - input_column, - model_name - ) - - elif args['task'] == 'audio-classification': - ap = AudioProcessing(api_key, endpoint) - result_df = ap.audio_classification_in_df( - df, - input_column, - model_name - ) - - result_df = result_df.rename(columns={'predictions': args['target']}) - return result_df diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/icon.svg b/mindsdb/integrations/handlers/huggingface_api_handler/icon.svg deleted file mode 100644 index e64183cefaf..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/icon.svg +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/requirements.txt b/mindsdb/integrations/handlers/huggingface_api_handler/requirements.txt deleted file mode 100644 index 68703475ddb..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -hugging_py_face -huggingface-hub -filelock>=3.20.3 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/tests/__init__.py b/mindsdb/integrations/handlers/huggingface_api_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api.py b/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api.py deleted file mode 100644 index aa357b3a65d..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api.py +++ /dev/null @@ -1,46 +0,0 @@ -from unittest.mock import patch -import pandas as pd - -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorTest - - -class TestHuggingFaceAPI(BaseExecutorTest): - def run_sql(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_text_classification(self, mock_handler): - self.run_sql("CREATE DATABASE proj") - - texts = ["I like you. I love you", "I don't like you. I hate you"] - df = pd.DataFrame(texts, columns=["texts"]) - - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - """ - CREATE MODEL proj.test_hfapi_text_classification - PREDICT sentiment - USING - task = 'text-classification', - engine = 'hf_api_engine', - api_key = '', - input_column = 'text' - """ - ) - - result_df = self.run_sql( - """ - SELECT sentiment - FROM proj.test_hfapi_text_classification - WHERE - text='I like you. I love you' - """ - ) - - assert "positive" in result_df["sentiment"].iloc[0].lower() diff --git a/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api_handler.py b/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api_handler.py deleted file mode 100644 index 9f607ba81d3..00000000000 --- a/mindsdb/integrations/handlers/huggingface_api_handler/tests/test_huggingface_api_handler.py +++ /dev/null @@ -1,9 +0,0 @@ -import unittest - - -class HuggingFaceAPIHandlerTest(unittest.TestCase): - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/mindsdb/integrations/handlers/huggingface_handler/README.md b/mindsdb/integrations/handlers/huggingface_handler/README.md deleted file mode 100644 index 3a38cbaae97..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/README.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: Hugging Face -sidebarTitle: Hugging Face ---- - -This documentation describes the integration of MindsDB with [Hugging Face](https://huggingface.co/), a company that develops computer tools for building applications using machine learning. -The integration allows for the deployment of Hugging Face models within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Hugging Face within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Setup - -Create an AI engine from the [Hugging Face handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/huggingface_handler). - -```sql -CREATE ML_ENGINE huggingface_engine -FROM huggingface -USING huggingface_api_api_key = 'hf_xxx'; -``` - -Create a model using `huggingface_engine` as an engine. - -```sql -CREATE MODEL huggingface_model -PREDICT target_column -USING - engine = 'huggingface_engine', -- engine name as created via CREATE ML_ENGINE - model_name = 'hf_hub_model_name', -- choose one of PyTorch models from the Hugging Face Hub - task = 'task_name', -- choose one of 'text-classification', 'text-generation', 'zero-shot-classification', 'translation', 'summarization', 'text2text-generation', 'fill-mask' - input_column = 'column_name', -- column that stores input/question to the model - labels = ['label 1', 'label 2']; -- labels used to classify data (used for classification tasks) -``` - -## Usage - -The following usage examples utilize `huggingface_engine` to create a model with the `CREATE MODEL` statement. - -Create a model to classify input text as spam or ham. - -```sql -CREATE MODEL spam_classifier -PREDICT spam_or_ham -USING - engine = 'huggingface_engine', - model_name = 'mrm8488/bert-tiny-finetuned-sms-spam-detection', - task = 'text-classification', - input_column = 'text', - labels = ['ham', 'spam']; -``` - -Query the model to get predictions. - -```sql -SELECT text, spam_or_ham -FROM spam_classifier -WHERE text = 'Subscribe to this channel asap'; -``` - -Here is the output: - -```sql -+--------------------------------+-------------+ -| text | spam_or_ham | -+--------------------------------+-------------+ -| Subscribe to this channel asap | spam | -+--------------------------------+-------------+ -``` - - - -**Next Steps** - -Follow [this link](https://docs.mindsdb.com/sql/tutorials/hugging-face-examples) to see more use case examples. - diff --git a/mindsdb/integrations/handlers/huggingface_handler/__about__.py b/mindsdb/integrations/handlers/huggingface_handler/__about__.py deleted file mode 100644 index ee347c4d128..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Hugging Face handler' -__package_name__ = 'mindsdb_huggingface_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Higging Face" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/huggingface_handler/__init__.py b/mindsdb/integrations/handlers/huggingface_handler/__init__.py deleted file mode 100644 index b9bdfd121d8..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .huggingface_handler import HuggingFaceHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = "Hugging Face" -name = "huggingface" -type = HANDLER_TYPE.ML -icon_path = "icon.svg" -permanent = False -execution_method = "subprocess_keep" - -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] diff --git a/mindsdb/integrations/handlers/huggingface_handler/finetune.py b/mindsdb/integrations/handlers/huggingface_handler/finetune.py deleted file mode 100644 index 119e0d881f0..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/finetune.py +++ /dev/null @@ -1,203 +0,0 @@ -import evaluate -import nltk -import numpy as np -from datasets import Dataset -from transformers import ( - AutoConfig, - AutoModelForSeq2SeqLM, - AutoModelForSequenceClassification, - AutoTokenizer, - DataCollatorForSeq2Seq, - Seq2SeqTrainingArguments, - Trainer, - TrainingArguments, -) - -# todo add support for question answering task -# todo add support for fill mask -# todo add support for text_generation (causal language model) -# todo add support for text_2_text generation - - -def _finetune_cls(df, args): - df = df.rename(columns={args["target"]: "labels", args["input_column"]: "text"}) - tokenizer_from = args.get("using", {}).get("tokenizer_from", args["model_name"]) - tokenizer = AutoTokenizer.from_pretrained(tokenizer_from) - dataset = Dataset.from_pandas(df) - - def _tokenize_text_cls_fn(examples): - return tokenizer(examples["text"], padding="max_length", truncation=True) - - tokenized_datasets = dataset.map(_tokenize_text_cls_fn, batched=True) - ds = tokenized_datasets.shuffle(seed=42).train_test_split(test_size=args.get("eval_size", 0.1)) - train_ds = ds["train"] - eval_ds = ds["test"] - - ft_args = args.get("using", {}).get("trainer_args", {}) - ft_args["output_dir"] = args["model_folder"] - - n_labels = len(args["labels_map"]) - # todo replace for prod - assert n_labels == df["labels"].nunique(), ( - f"Label mismatch! Ensure labels match what the model was originally trained on. Found {df['labels'].nunique()} classes, expected {n_labels}." - ) # noqa - # TODO: ideally check that labels are a subset of the original ones, too. - config = AutoConfig.from_pretrained(args["model_name"]) - model = AutoModelForSequenceClassification.from_pretrained(args["model_name"], config=config) - metric = evaluate.load("accuracy") - training_args = TrainingArguments(**ft_args) - - def _compute_metrics(eval_pred): - logits, labels = eval_pred - predictions = np.argmax(logits, axis=-1) - return metric.compute(predictions=predictions, references=labels) - - # generate trainer and finetune - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_ds, - eval_dataset=eval_ds, - compute_metrics=_compute_metrics, - ) - - return tokenizer, trainer - - -# TODO: merge with summarization? -def _finetune_translate(df, args): - config = AutoConfig.from_pretrained(args["model_name"]) - df = df.rename(columns={args["target"]: "translation", args["input_column"]: "text"}) - tokenizer_from = args.get("using", {}).get("tokenizer_from", args["model_name"]) - tokenizer = AutoTokenizer.from_pretrained(tokenizer_from) - dataset = Dataset.from_pandas(df) - - def _tokenize_translate_fn(examples): - source_lang = args["lang_input"] - target_lang = args["lang_output"] - max_target_length = config.task_specific_params["summarization"]["max_length"] - prefix = f"translate {source_lang} to {target_lang}: " - inputs = [prefix + ex for ex in examples["text"]] - targets = [ex for ex in examples["translation"]] - model_inputs = tokenizer(inputs, max_length=config.n_positions, truncation=True) - - # Setup the tokenizer for targets - with tokenizer.as_target_tokenizer(): - labels = tokenizer(targets, max_length=max_target_length, truncation=True) - - model_inputs["labels"] = labels["input_ids"] - return model_inputs - - tokenized_datasets = dataset.map(_tokenize_translate_fn, batched=True) - ds = tokenized_datasets.shuffle(seed=42).train_test_split(test_size=args.get("eval_size", 0.1)) - train_ds = ds["train"] - eval_ds = ds["test"] - ft_args = args.get("using", {}).get("trainer_args", {}) - ft_args["output_dir"] = args["model_folder"] - ft_args["predict_with_generate"] = True - - model = AutoModelForSeq2SeqLM.from_pretrained(args["model_name"], config=config) - model.resize_token_embeddings(len(tokenizer)) - training_args = Seq2SeqTrainingArguments(**ft_args) - data_collator = DataCollatorForSeq2Seq(tokenizer, model=model) - - # generate trainer and finetune - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_ds, - eval_dataset=eval_ds, - data_collator=data_collator, - # compute_metrics=_compute_metrics, - ) - - return tokenizer, trainer - - -def _finetune_summarization(df, args): - df = df.rename(columns={args["target"]: "summary", args["input_column"]: "text"}) - tokenizer_from = args.get("using", {}).get("tokenizer_from", args["model_name"]) - tokenizer = AutoTokenizer.from_pretrained(tokenizer_from) - dataset = Dataset.from_pandas(df) - config = AutoConfig.from_pretrained(args["model_name"]) - - def _tokenize_summarize_fn(examples): - prefix = "summarize: " if "t5" in args["model_name"] else "" - inputs = [prefix + doc for doc in examples["text"]] - model_inputs = tokenizer( - inputs, - padding="max_length", - truncation=True, - max_length=config.max_position_embeddings, - pad_to_max_length=True, - ) # noqa - labels = tokenizer( - text_target=examples["summary"], - max_length=config.max_position_embeddings, - truncation=True, - ) # noqa - model_inputs["labels"] = labels["input_ids"] - return model_inputs - - tokenized_datasets = dataset.map(_tokenize_summarize_fn, batched=True) - ds = tokenized_datasets.shuffle(seed=42).train_test_split(test_size=args.get("eval_size", 0.1)) - train_ds = ds["train"] - eval_ds = ds["test"] - - ft_args = args.get("using", {}).get("trainer_args", {}) - ft_args["output_dir"] = args["model_folder"] - ft_args["predict_with_generate"] = True - - model = AutoModelForSeq2SeqLM.from_pretrained(args["model_name"], config=config) - metric = evaluate.load("rouge") - training_args = Seq2SeqTrainingArguments(**ft_args) - data_collator = DataCollatorForSeq2Seq(tokenizer, model=model) - - def _compute_metrics(eval_pred): - # ref: github.com/huggingface/notebooks/blob/main/examples/summarization.ipynb - predictions, labels = eval_pred - decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True) - decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True) - - # Rogue expects a newline after each sentence - decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds] - decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels] - - result = metric.compute( - predictions=decoded_preds, - references=decoded_labels, - use_stemmer=True, - use_aggregator=True, - ) - result = {key: value * 100 for key, value in result.items()} - prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions] - result["gen_len"] = np.mean(prediction_lens) # todo: remove? - return {k: round(v, 4) for k, v in result.items()} - - # generate trainer and finetune - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_ds, - eval_dataset=eval_ds, - data_collator=data_collator, - compute_metrics=_compute_metrics, - ) - - return tokenizer, trainer - - -def _finetune_fill_mask(df, args): - raise NotImplementedError("Finetuning fill-mask models is not yet supported.") - - -def _finetune_text_generation(df, args): - raise NotImplementedError("Finetuning text-generation models is not yet supported.") - - -def _finetune_question_answering(df, args): - raise NotImplementedError("Finetuning question-answering models is not yet supported.") - - -def _finetune_text_2_text_generation(df, args): - raise NotImplementedError("Finetuning text-2-text generation models is not yet supported.") diff --git a/mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py b/mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py deleted file mode 100644 index c9b0f17a438..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +++ /dev/null @@ -1,360 +0,0 @@ -from typing import Dict, Optional - -import pandas as pd -import transformers -from huggingface_hub import HfApi - -from mindsdb.integrations.handlers.huggingface_handler.settings import FINETUNE_MAP -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class HuggingFaceHandler(BaseMLEngine): - name = "huggingface" - - @staticmethod - def create_validation(target, args=None, **kwargs): - if "using" in args: - args = args["using"] - - hf_api = HfApi() - - # check model is pytorch based - metadata = hf_api.model_info(args["model_name"]) - if "pytorch" not in metadata.tags: - raise Exception( - "Currently only PyTorch models are supported (https://huggingface.co/models?library=pytorch&sort=downloads). To request another library, please contact us on our community slack (https://mindsdb.com/joincommunity)." - ) - - # check model task - supported_tasks = [ - "text-classification", - "text-generation", - "zero-shot-classification", - "translation", - "summarization", - "text2text-generation", - "fill-mask", - ] - - if metadata.pipeline_tag not in supported_tasks: - raise Exception( - f"Not supported task for model: {metadata.pipeline_tag}.\ - Should be one of {', '.join(supported_tasks)}" - ) - - if "task" not in args: - args["task"] = metadata.pipeline_tag - elif args["task"] != metadata.pipeline_tag: - raise Exception(f"Task mismatch for model: {args['task']}!={metadata.pipeline_tag}") - - input_keys = list(args.keys()) - - # task, model_name, input_column is essential - for key in ["task", "model_name", "input_column"]: - if key not in args: - raise Exception(f'Parameter "{key}" is required') - input_keys.remove(key) - - # check tasks input - - if args["task"] == "zero-shot-classification": - key = "candidate_labels" - if key not in args: - raise Exception('"candidate_labels" is required for zero-shot-classification') - input_keys.remove(key) - - if args["task"] == "translation": - keys = ["lang_input", "lang_output"] - for key in keys: - if key not in args: - raise Exception(f"{key} is required for translation") - input_keys.remove(key) - - if args["task"] == "summarization": - keys = ["min_output_length", "max_output_length"] - for key in keys: - if key not in args: - raise Exception(f"{key} is required for summarization") - input_keys.remove(key) - - # optional keys - for key in ["labels", "max_length", "truncation_policy"]: - if key in input_keys: - input_keys.remove(key) - - if len(input_keys) > 0: - raise Exception(f"Not expected parameters: {', '.join(input_keys)}") - - def create(self, target, args=None, **kwargs): - # TODO change BaseMLEngine api? - if "using" in args: - args = args["using"] - - args["target"] = target - - model_name = args["model_name"] - hf_model_storage_path = self.engine_storage.folder_get(model_name) # real - - if args["task"] == "translation": - args["task_proper"] = f"translation_{args['lang_input']}_to_{args['lang_output']}" - else: - args["task_proper"] = args["task"] - - logger.debug(f"Checking file system for {model_name}...") - - #### - # Check if pipeline has already been downloaded - try: - pipeline = transformers.pipeline( - task=args["task_proper"], model=hf_model_storage_path, tokenizer=hf_model_storage_path - ) - logger.debug("Model already downloaded!") - #### - # Otherwise download it - except (ValueError, OSError): - try: - logger.debug(f"Downloading {model_name}...") - pipeline = transformers.pipeline(task=args["task_proper"], model=model_name) - - pipeline.save_pretrained(hf_model_storage_path) - - logger.debug(f"Saved to {hf_model_storage_path}") - except Exception: - raise Exception( - "Error while downloading and setting up the model. Please try a different model. We're working on expanding the list of supported models, so we would appreciate it if you let us know about this in our community slack (https://mindsdb.com/joincommunity)." - ) # noqa - #### - - if "max_length" in args: - pass - elif "max_position_embeddings" in pipeline.model.config.to_dict().keys(): - args["max_length"] = pipeline.model.config.max_position_embeddings - elif "max_length" in pipeline.model.config.to_dict().keys(): - args["max_length"] = pipeline.model.config.max_length - else: - logger.debug("No max_length found!") - - labels_default = pipeline.model.config.id2label - labels_map = {} - if "labels" in args: - for num in labels_default.keys(): - labels_map[labels_default[num]] = args["labels"][num] - args["labels_map"] = labels_map - else: - for num in labels_default.keys(): - labels_map[labels_default[num]] = labels_default[num] - args["labels_map"] = labels_map - - # store and persist in model folder - self.model_storage.json_set("args", args) - - # persist changes to handler folder - self.engine_storage.folder_sync(model_name) - - # todo move infer tasks to a seperate file - def predict_text_classification(self, pipeline, item, args): - top_k = args.get("top_k", 1000) - - result = pipeline([item], top_k=top_k, truncation=True, max_length=args["max_length"])[0] - - final = {} - explain = {} - if type(result) == dict: - result = [result] - final[args["target"]] = args["labels_map"][result[0]["label"]] - for elem in result: - if args["labels_map"]: - explain[args["labels_map"][elem["label"]]] = elem["score"] - else: - explain[elem["label"]] = elem["score"] - final[f"{args['target']}_explain"] = explain - return final - - def predict_text_generation(self, pipeline, item, args): - result = pipeline([item], max_length=args["max_length"])[0] - - final = {} - final[args["target"]] = result["generated_text"] - - return final - - def predict_zero_shot(self, pipeline, item, args): - top_k = args.get("top_k", 1000) - - result = pipeline( - [item], - candidate_labels=args["candidate_labels"], - truncation=True, - top_k=top_k, - max_length=args["max_length"], - )[0] - - final = {} - final[args["target"]] = result["labels"][0] - - explain = dict(zip(result["labels"], result["scores"])) - final[f"{args['target']}_explain"] = explain - - return final - - def predict_translation(self, pipeline, item, args): - result = pipeline([item], max_length=args["max_length"])[0] - - final = {} - final[args["target"]] = result["translation_text"] - - return final - - def predict_summarization(self, pipeline, item, args): - result = pipeline( - [item], - min_length=args["min_output_length"], - max_length=args["max_output_length"], - )[0] - - final = {} - final[args["target"]] = result["summary_text"] - - return final - - def predict_text2text(self, pipeline, item, args): - result = pipeline([item], max_length=args["max_length"])[0] - - final = {} - final[args["target"]] = result["generated_text"] - - return final - - def predict_fill_mask(self, pipeline, item, args): - result = pipeline([item])[0] - - final = {} - final[args["target"]] = result[0]["sequence"] - explain = {elem["sequence"]: elem["score"] for elem in result} - final[f"{args['target']}_explain"] = explain - - return final - - def predict(self, df, args=None): - fnc_list = { - "text-classification": self.predict_text_classification, - "text-generation": self.predict_text_generation, - "zero-shot-classification": self.predict_zero_shot, - "translation": self.predict_translation, - "summarization": self.predict_summarization, - "fill-mask": self.predict_fill_mask, - } - - # get stuff from model folder - args = self.model_storage.json_get("args") - - task = args["task"] - - if task not in fnc_list: - raise RuntimeError(f"Unknown task: {task}") - - fnc = fnc_list[task] - - try: - # load from model storage (finetuned models will use this) - hf_model_storage_path = self.model_storage.folder_get(args["model_name"]) - pipeline = transformers.pipeline( - task=args["task_proper"], - model=hf_model_storage_path, - tokenizer=hf_model_storage_path, - ) - except (ValueError, OSError): - # load from engine storage (i.e. 'common' models) - hf_model_storage_path = self.engine_storage.folder_get(args["model_name"]) - pipeline = transformers.pipeline( - task=args["task_proper"], - model=hf_model_storage_path, - tokenizer=hf_model_storage_path, - ) - - input_column = args["input_column"] - if input_column not in df.columns: - raise RuntimeError(f'Column "{input_column}" not found in input data') - input_list = df[input_column] - - max_tokens = pipeline.tokenizer.model_max_length - - results = [] - for item in input_list: - if max_tokens is not None: - tokens = pipeline.tokenizer.encode(item) - if len(tokens) > max_tokens: - truncation_policy = args.get("truncation_policy", "strict") - if truncation_policy == "strict": - results.append({"error": f"Tokens count exceed model limit: {len(tokens)} > {max_tokens}"}) - continue - elif truncation_policy == "left": - tokens = tokens[-max_tokens + 1 : -1] # cut 2 empty tokens from left and right - else: - tokens = tokens[1 : max_tokens - 1] # cut 2 empty tokens from left and right - - item = pipeline.tokenizer.decode(tokens) - - item = str(item) - try: - result = fnc(pipeline, item, args) - except Exception as e: - msg = str(e).strip() - if msg == "": - msg = e.__class__.__name__ - result = {"error": msg} - results.append(result) - - pred_df = pd.DataFrame(results) - - return pred_df - - def describe(self, attribute: Optional[str] = None) -> pd.DataFrame: - args = self.model_storage.json_get("args") - if attribute == "args": - return pd.DataFrame(args.items(), columns=["key", "value"]) - elif attribute == "metadata": - hf_api = HfApi() - metadata = hf_api.model_info(args["model_name"]) - data = metadata.__dict__ - return pd.DataFrame(list(data.items()), columns=["key", "value"]) - else: - tables = ["args", "metadata"] - return pd.DataFrame(tables, columns=["tables"]) - - def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - finetune_args = args if args else {} - args = self.base_model_storage.json_get("args") - args.update(finetune_args) - - model_name = args["model_name"] - model_folder = self.model_storage.folder_get(model_name) - args["model_folder"] = model_folder - model_folder_name = model_folder.split("/")[-1] - task = args["task"] - - if task not in FINETUNE_MAP: - raise KeyError( - f"{task} is not currently supported, please choose a supported task - {', '.join(FINETUNE_MAP)}" - ) - - tokenizer, trainer = FINETUNE_MAP[task](df, args) - - try: - trainer.train() - trainer.save_model( - model_folder - ) # TODO: save entire pipeline instead https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.Pipeline.save_pretrained - tokenizer.save_pretrained(model_folder) - - # persist changes - self.model_storage.json_set("args", args) - self.model_storage.folder_sync(model_folder_name) - - except Exception as e: - err_str = f"Finetune failed with error: {str(e)}" - logger.debug(err_str) - raise Exception(err_str) diff --git a/mindsdb/integrations/handlers/huggingface_handler/icon.svg b/mindsdb/integrations/handlers/huggingface_handler/icon.svg deleted file mode 100644 index 9fb11979a10..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/icon.svg +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/huggingface_handler/requirements.txt b/mindsdb/integrations/handlers/huggingface_handler/requirements.txt deleted file mode 100644 index eae77291d1f..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -# NOTE: Any changes made here need to be made to requirements_cpu.txt as well -datasets==2.16.1 -evaluate==0.4.3 -nltk==3.9.3 -huggingface-hub==1.9.1 -torch==2.8.0 -transformers==5.5.0 diff --git a/mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt b/mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt deleted file mode 100644 index b509a2942f4..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]` -datasets==2.16.1 -evaluate==0.4.3 -nltk==3.9.3 -huggingface-hub==1.9.1 -torch==2.8.0+cpu -transformers==5.5.0 diff --git a/mindsdb/integrations/handlers/huggingface_handler/settings.py b/mindsdb/integrations/handlers/huggingface_handler/settings.py deleted file mode 100644 index 2a5410371af..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/settings.py +++ /dev/null @@ -1,27 +0,0 @@ -from mindsdb.integrations.handlers.huggingface_handler.finetune import ( - _finetune_cls, - _finetune_fill_mask, - _finetune_question_answering, - _finetune_summarization, - _finetune_text_generation, - _finetune_translate, -) - -# todo once we have moved predict tasks functions into a separate function -# PREDICT_MAP = { -# 'text-classification': self.predict_text_classification, -# 'zero-shot-classification': self.predict_zero_shot, -# 'translation': self.predict_translation, -# 'summarization': self.predict_summarization, -# 'fill-mask': self.predict_fill_mask -# } - -FINETUNE_MAP = { - "text-classification": _finetune_cls, - "zero-shot-classification": _finetune_cls, - "translation": _finetune_translate, - "summarization": _finetune_summarization, - "fill-mask": _finetune_fill_mask, - "text-generation": _finetune_text_generation, - "question-answering": _finetune_question_answering, -} diff --git a/mindsdb/integrations/handlers/huggingface_handler/tests/test_huggingface.py b/mindsdb/integrations/handlers/huggingface_handler/tests/test_huggingface.py deleted file mode 100644 index 63e0e91cedd..00000000000 --- a/mindsdb/integrations/handlers/huggingface_handler/tests/test_huggingface.py +++ /dev/null @@ -1,365 +0,0 @@ -import time -from unittest.mock import patch - -import pandas as pd -from mindsdb_sql_parser import parse_sql -from tests.unit.executor_test_base import BaseExecutorTest - -# How to run: -# env PYTHONPATH=./ pytest -vx tests/unit/test_ml_handlers.py -# Warning: a big huggingface models will be downloaded - - -class TestHuggingface(BaseExecutorTest): - def run_sql(self, sql): - return self.command_executor.execute_command(parse_sql(sql)) - - def hf_test_run(self, mock_handler, model_name, create_sql, predict_sql): - # prepare table - text_spammy = [ - "It is the best time to launch the Robot to get more money. https:\\/\\/Gof.bode-roesch.de\\/Gof", - "Start making thousands of dollars every week just using this robot. https:\\/\\/Gof.coronect.de\\/Gof", - ] - - text_short = ["I want to dance", "Baking is the best"] - - text_long = [ - "Dance is a performing art form consisting of sequences of movement, either improvised or purposefully selected. This movement has aesthetic and often symbolic value.[nb 1] Dance can be categorized and described by its choreography, by its repertoire of movements, or by its historical period or place of origin.", - "Baking is a method of preparing food that uses dry heat, typically in an oven, but can also be done in hot ashes, or on hot stones. The most common baked item is bread but many other types of foods can be baked. Heat is gradually transferred from the surface of cakes, cookies, and pieces of bread to their center. As heat travels through, it transforms batters and doughs into baked goods and more with a firm dry crust and a softer center. Baking can be combined with grilling to produce a hybrid barbecue variant by using both methods simultaneously, or one after the other. Baking is related to barbecuing because the concept of the masonry oven is similar to that of a smoke pit.", - ] - - df = pd.DataFrame(data=[text_spammy, text_short, text_long]).T - df.columns = ["text_spammy", "text_short", "text_long"] - - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # create predictor - ret = self.run_sql(create_sql) - assert ret.error_code is None - - # wait - done = False - for attempt in range(900): - ret = self.run_sql(f"select status from mindsdb.models where name='{model_name}'") - data = ret.data.to_lists() - if len(data) > 0: - if data[0][0] == "complete": - done = True - break - elif data[0][0] == "error": - break - time.sleep(0.5) - if not done: - raise RuntimeError("predictor not created") - - # use predictor - ret = self.command_executor.execute_command(parse_sql(predict_sql)) - assert ret.error_code is None - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_classification_bin(self, mock_handler): - # create predictor - create_sql = """ - CREATE PREDICTOR mindsdb.spam_classifier - predict PRED - USING - engine='huggingface', - join_learn_process=true, - task='text-classification', - model_name= "mrm8488/bert-tiny-finetuned-sms-spam-detection", - input_column = 'text_spammy', - labels=['ham','spam'] - """ - - model_name = "spam_classifier" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.spam_classifier as h - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - # one line prediction - predict_sql = """ - SELECT * from mindsdb.spam_classifier - where text_spammy= 'It is the best time to launch the Robot to get more money. https:\\/\\/Gof.bode-roesch.de\\/Gof' - """ - # use predictor - ret = self.command_executor.execute_command(parse_sql(predict_sql)) - assert ret.error_code is None - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_classification_multy(self, mock_handler): - # create predictor - create_sql = """ - CREATE PREDICTOR mindsdb.sentiment_classifier - predict PRED - USING - engine='huggingface', - join_learn_process=true, - task='text-classification', - model_name= "cardiffnlp/twitter-roberta-base-sentiment", - input_column = 'text_short', - labels=['neg','neu','pos'] - """ - - model_name = "sentiment_classifier" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.sentiment_classifier as h - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_zero_shot(self, mock_handler): - # create predictor - create_sql = """ - CREATE PREDICTOR mindsdb.zero_shot_tcd - predict PREDZS - USING - engine='huggingface', - join_learn_process=true, - task="zero-shot-classification", - model_name= "facebook/bart-large-mnli", - input_column = "text_short", - candidate_labels=['travel', 'cooking', 'dancing'] - """ - - model_name = "zero_shot_tcd" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.zero_shot_tcd as h - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_summarization(self, mock_handler): - # create predictor - create_sql = """ - CREATE MODEL mindsdb.hf_summarization - PREDICT summary - USING - engine = 'huggingface', - task = 'summarization', - model_name = 'sshleifer/distilbart-xsum-12-1', - input_column = 'text_long', - min_output_length = 5, - max_output_length = 20; - """ - - model_name = "hf_summarization" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.hf_summarization as h - """ - - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_translation(self, mock_handler): - # create predictor - create_sql = """ - CREATE PREDICTOR mindsdb.translator_en_fr - predict TRANSLATION - USING - engine='huggingface', - join_learn_process=true, - task = "translation", - model_name = "t5-base", - input_column = "text_short", - lang_input = "en", - lang_output = "fr" - """ - - model_name = "translator_en_fr" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.translator_en_fr as h - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_text2text(self, mock_handler): - # create predictor - create_sql = """ - CREATE MODEL mindsdb.text_generator - predict PREDICTION - USING - engine='huggingface', - join_learn_process=true, - task = "text2text-generation", - model_name = "google/flan-t5-base", - input_column = 'comment' - """ - - model_name = "text_generator" - - predict_sql = """ - SELECT * FROM text_generator - WHERE comment='Question: Why did the chicken cross the road?' - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_text_classification_finetune(self, mock_handler): - create_sql = """ - CREATE PREDICTOR mindsdb.spam_classifier - predict PRED - USING - engine='huggingface', - join_learn_process=true, - task='text-classification', - model_name= "mrm8488/bert-tiny-finetuned-sms-spam-detection", - input_column = 'text_spammy', - labels=['ham','spam'] - """ - - model_name = "spam_classifier" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.spam_classifier as h - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - # one line prediction - predict_sql = """ - SELECT * from mindsdb.spam_classifier - where text_spammy= 'It is the best time to launch the Robot to get more money. https:\\/\\/Gof.bode-roesch.de\\/Gof' - """ - # use predictor - ret = self.command_executor.execute_command(parse_sql(predict_sql)) - assert ret.error_code is None - - # fine tune - - fine_tune_sql = """ - FINETUNE mindsdb.spam_classifier - FROM pg ( - SELECT label as PRED, text as text_spammy FROM df WHERE PRED <= 1 - ) - USING - tokenizer_from = 'bert-base-uncased'; - """ - - ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) - - assert ret.error_code is None - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_zero_shot_classification_finetune(self, mock_handler): - # create predictor - create_sql = """ - CREATE PREDICTOR mindsdb.zero_shot_tcd - predict PREDZS - USING - engine='huggingface', - join_learn_process=true, - task="zero-shot-classification", - model_name= "facebook/bart-large-mnli", - input_column = "text_short", - candidate_labels=['travel', 'cooking', 'dancing'] - """ - - model_name = "zero_shot_tcd" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.zero_shot_tcd as h - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - # fine tune - - fine_tune_sql = """ - FINETUNE mindsdb.zero_shot_tcd - FROM pg (SELECT label, hypothesis FROM df); - """ - - ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) - - assert ret.error_code is None - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_translation_finetune(self, mock_handler): - # create predictor - create_sql = """ - CREATE PREDICTOR mindsdb.translator_en_fr - predict TRANSLATION - USING - engine='huggingface', - join_learn_process=true, - task = "translation", - model_name = "t5-base", - input_column = "text_short", - lang_input = "en", - lang_output = "fr" - """ - - model_name = "translator_en_fr" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.translator_en_fr as h - """ - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - # fine tune - - fine_tune_sql = """ - FINETUNE mindsdb.translator_en_fr - FROM pg (SELECT text_long, transl FROM df); - """ - - ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) - - assert ret.error_code is None - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hf_summarization_finetune(self, mock_handler): - # create predictor - create_sql = """ - CREATE MODEL mindsdb.hf_summarization - PREDICT summary - USING - engine = 'huggingface', - task = 'summarization', - model_name = 'sshleifer/distilbart-xsum-12-1', - input_column = 'text_long', - min_output_length = 5, - max_output_length = 20; - """ - - model_name = "hf_summarization" - - predict_sql = """ - SELECT h.* - FROM pg.df as t - JOIN mindsdb.hf_summarization as h - """ - - self.hf_test_run(mock_handler, model_name, create_sql, predict_sql) - - # fine tune - fine_tune_sql = """ - FINETUNE mindsdb.hf_summarization - FROM pg ( - SELECT text, summary FROM df - ); - """ - - ret = self.command_executor.execute_command(parse_sql(fine_tune_sql)) - - assert ret.error_code is None diff --git a/mindsdb/integrations/handlers/llama_index_handler/README.md b/mindsdb/integrations/handlers/llama_index_handler/README.md deleted file mode 100644 index fa48241eb1a..00000000000 --- a/mindsdb/integrations/handlers/llama_index_handler/README.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: LlamaIndex -sidebarTitle: LlamaIndex ---- - -## LlamaIndex Handler - -This documentation describes the integration of MindsDB with [LlamaIndex](https://docs.llamaindex.ai/en/stable/), a framework for building context-augmented generative AI applications with LLMs. - - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use LlamaIndex within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain the OpenAI API key required to OpenAI LLMs. Follow the [instructions for obtaining the API key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key). - -## Setup - -Create an AI engine from the [Llamaindex handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/llama_index_handler). - -```sql -CREATE ML_ENGINE llama_index -FROM llama_index -USING - openai_api_key = 'api-key-value'; -``` - -Create a model using `llama_index` as an engine and OpenAI as a model provider. - -```sql -CREATE MODEL chatbot_model -PREDICT answer -USING - engine = 'llama_index', -- engine name as created via CREATE ML_ENGINE - input_column = 'question', - mode = 'conversational', -- optional - user_column = 'question', -- optional: used only for conversational mode - assistant_column = 'answer'; -- optional: used only for conversational mode -``` - - -## Usage - -Here is how to create a model that answers questions by reading a page from the web: - -```sql -CREATE MODEL qa_model -PREDICT answer -USING - engine = 'llama_index', - reader = 'SimpleWebPageReader', - source_url_link = 'https://mindsdb.com/about', - input_column = 'question'; -``` - -Query the model to get answer: - -```sql -SELECT question, answer -FROM mindsdb.qa_model -WHERE question = "What is MindsDB's story?" -``` - -Here is the output: - -```sql -+---------------------------+-------------------------------+ -|question |answer | -+---------------------------+-------------------------------+ -|What is MindsDB's story? |MindsDB is a fast-growing open-source ...| -+---------------------------+-------------------------------+ - -``` - -### Configuring SimpleWebPageReader for Specific Domains - -When SimpleWebPageReader is used it can be configured to interact only with specific domains by using the `web_crawling_allowed_sites` setting in the `config.json` file. -This feature allows you to restrict the handler to read and process content only from the domains you specify, enhancing security and control over web interactions. - -To configure this, simply list the allowed domains under the `web_crawling_allowed_sites` key in `config.json`. For example: - -```json -"web_crawling_allowed_sites": [ - "https://docs.mindsdb.com", - "https://another-allowed-site.com" -] -``` - - - -**Next Steps** - -Go to the [Use Cases](https://docs.mindsdb.com/use-cases/overview) section to see more examples. - - diff --git a/mindsdb/integrations/handlers/llama_index_handler/__about__.py b/mindsdb/integrations/handlers/llama_index_handler/__about__.py deleted file mode 100644 index 1d215cb6c83..00000000000 --- a/mindsdb/integrations/handlers/llama_index_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB LlamaIndex handler" -__package_name__ = "mindsdb_llama_index_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for LlamaIndex" -__author__ = "Balaji Seetharaman " -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/llama_index_handler/__init__.py b/mindsdb/integrations/handlers/llama_index_handler/__init__.py deleted file mode 100644 index f88e7f7e56f..00000000000 --- a/mindsdb/integrations/handlers/llama_index_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .llama_index_handler import LlamaIndexHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "LlamaIndex" -name = "llama_index" -type = HANDLER_TYPE.ML -icon_path = 'icon.svg' -permanent = False - -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] diff --git a/mindsdb/integrations/handlers/llama_index_handler/icon.svg b/mindsdb/integrations/handlers/llama_index_handler/icon.svg deleted file mode 100644 index 471c06cfc47..00000000000 --- a/mindsdb/integrations/handlers/llama_index_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py b/mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py deleted file mode 100644 index dd984971e78..00000000000 --- a/mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +++ /dev/null @@ -1,183 +0,0 @@ -from typing import Optional, Dict - -import pandas as pd -from llama_index.llms.openai import OpenAI -from llama_index.core import Document -from llama_index.readers.web import SimpleWebPageReader -from llama_index.core import PromptTemplate -from llama_index.core import StorageContext, load_index_from_storage -from llama_index.embeddings.openai import OpenAIEmbedding -from llama_index.core import VectorStoreIndex -from llama_index.core import Settings - -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.utilities.config import Config -from mindsdb.utilities.security import validate_urls -from mindsdb.integrations.handlers.llama_index_handler.settings import llama_index_config, LlamaIndexModel -from mindsdb.integrations.libs.api_handler_exceptions import MissingConnectionParams -from mindsdb.integrations.utilities.handler_utils import get_api_key - - -class LlamaIndexHandler(BaseMLEngine): - """Integration with the LlamaIndex data framework for LLM applications.""" - - name = "llama_index" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.generative = True - self.default_index_class = llama_index_config.DEFAULT_INDEX_CLASS - self.supported_index_class = llama_index_config.SUPPORTED_INDEXES - self.default_reader = llama_index_config.DEFAULT_READER - self.supported_reader = llama_index_config.SUPPORTED_READERS - self.config = Config() - - @staticmethod - def create_validation(target, args=None, **kwargs): - if "using" not in args: - raise MissingConnectionParams("LlamaIndex engine requires USING clause!") - else: - args = args["using"] - LlamaIndexModel(**args) - - def create( - self, - target: str, - df: Optional[pd.DataFrame] = None, - args: Optional[Dict] = None, - ) -> None: - # workaround to create llama model without input data - if df is None or df.empty: - df = pd.DataFrame([{"text": ""}]) - - args_reader = args.get("using", {}).get("reader", self.default_reader) - - if args_reader == "DFReader": - dstrs = df.apply( - lambda x: ", ".join([f"{col}: {str(entry)}" for col, entry in zip(df.columns, x)]), - axis=1, - ) - reader = list(map(lambda x: Document(text=x), dstrs.tolist())) - elif args_reader == "SimpleWebPageReader": - url = args["using"]["source_url_link"] - allowed_urls = self.config.get("web_crawling_allowed_sites", []) - if allowed_urls and not validate_urls(url, allowed_urls): - raise ValueError( - f"The provided URL is not allowed for web crawling. Please use any of {', '.join(allowed_urls)}." - ) - reader = SimpleWebPageReader(html_to_text=True).load_data([url]) - else: - raise Exception(f"Invalid operation mode. Please use one of {self.supported_reader}.") - self.model_storage.json_set("args", args) - index = self._setup_index(reader) - path = self.model_storage.folder_get("context") - index.storage_context.persist(persist_dir=path) - self.model_storage.folder_sync("context") - - def update(self, args) -> None: - args_cur = self.model_storage.json_get("args") - args_cur["using"].update(args["using"]) - - # check new set of arguments - self.create_validation(None, args_cur) - - self.model_storage.json_set("args", args_cur) - - def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> pd.DataFrame: - pred_args = args["predict_params"] if args else {} - - args = self.model_storage.json_get("args") - engine_kwargs = {} - - if args["using"].get("mode") == "conversational": - user_column = args["using"]["user_column"] - assistant_column = args["using"]["assistant_column"] - - messages = [] - for row in df[:-1].to_dict("records"): - messages.append(f"user: {row[user_column]}") - messages.append(f"assistant: {row[assistant_column]}") - - conversation = "\n".join(messages) - - questions = [df.iloc[-1][user_column]] - - if "prompt" in pred_args and pred_args["prompt"] is not None: - user_prompt = pred_args["prompt"] - else: - user_prompt = args["using"].get("prompt", "") - - prompt_template = ( - f"{user_prompt}\n" - f"---------------------\n" - f"We have provided context information below. \n" - f"{{context_str}}\n" - f"---------------------\n" - f"This is previous conversation history:\n" - f"{conversation}\n" - f"---------------------\n" - f"Given this information, please answer the question: {{query_str}}" - ) - - engine_kwargs["text_qa_template"] = PromptTemplate(prompt_template) - - else: - input_column = args["using"].get("input_column", None) - - prompt_template = args["using"].get("prompt_template", args.get("prompt_template", None)) - if prompt_template is not None: - self.create_validation(args=args) - engine_kwargs["text_qa_template"] = PromptTemplate(prompt_template) - - if input_column is None: - raise Exception( - "`input_column` must be provided at model creation time or through USING clause when predicting. Please try again." - ) # noqa - - if input_column not in df.columns: - raise Exception(f'Column "{input_column}" not found in input data! Please try again.') - - questions = df[input_column] - - index_path = self.model_storage.folder_get("context") - storage_context = StorageContext.from_defaults(persist_dir=index_path) - self._get_service_context() - - index = load_index_from_storage(storage_context) - query_engine = index.as_query_engine(**engine_kwargs) - - results = [] - - for question in questions: - query_results = query_engine.query(question) # TODO: provide extra_info in explain_target col - results.append(query_results.response) - - result_df = pd.DataFrame({"question": questions, args["target"]: results}) # result_df['answer'].tolist() - return result_df - - def _get_service_context(self) -> None: - args = self.model_storage.json_get("args") - engine_storage = self.engine_storage - openai_api_key = get_api_key("openai", args["using"], engine_storage, strict=True) - llm_kwargs = {"api_key": openai_api_key} - - if "temperature" in args["using"]: - llm_kwargs["temperature"] = args["using"]["temperature"] - if "model_name" in args["using"]: - llm_kwargs["model_name"] = args["using"]["model_name"] - if "max_tokens" in args["using"]: - llm_kwargs["max_tokens"] = args["using"]["max_tokens"] - # only way this works is by sending the key through openai - - if Settings.llm is None: - llm = OpenAI(api_key=openai_api_key) - Settings.llm = llm - if Settings.embed_model is None: - embed_model = OpenAIEmbedding() - Settings.embed_model = embed_model - # TODO: all usual params should be added to Settings - - def _setup_index(self, documents): - self._get_service_context() - index = VectorStoreIndex.from_documents(documents) - return index diff --git a/mindsdb/integrations/handlers/llama_index_handler/requirements.txt b/mindsdb/integrations/handlers/llama_index_handler/requirements.txt deleted file mode 100644 index 2fe49e5eb36..00000000000 --- a/mindsdb/integrations/handlers/llama_index_handler/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -llama-index==0.13.0 -pydantic-settings >= 2.1.0 -llama-index-readers-web -llama-index-embeddings-openai \ No newline at end of file diff --git a/mindsdb/integrations/handlers/llama_index_handler/settings.py b/mindsdb/integrations/handlers/llama_index_handler/settings.py deleted file mode 100644 index 5726c58de5b..00000000000 --- a/mindsdb/integrations/handlers/llama_index_handler/settings.py +++ /dev/null @@ -1,66 +0,0 @@ -from typing import List, Optional -from pydantic import BaseModel, field_validator, model_validator -from pydantic_settings import BaseSettings - - -class LlamaIndexConfig(BaseSettings): - """ - Model for LlamaIndexHandler settings. - - Attributes: - default_index_class (str): Default index class. - supported_index_class (List[str]): Supported index classes. - default_reader (str): Default reader. Note this is custom data frame reader. - supported_reader (List[str]): Supported readers. - """ - DEFAULT_INDEX_CLASS: str = "VectorStoreIndex" - SUPPORTED_INDEXES: List[str] = ["VectorStoreIndex"] - DEFAULT_READER: str = "DFReader" - SUPPORTED_READERS: List[str] = ["DFReader", "SimpleWebPageReader"] - - -llama_index_config = LlamaIndexConfig() - - -class LlamaIndexModel(BaseModel): - """ - Model for LlamaIndexHandler. - - Attributes: - reader (str): Reader. - index_class (str): Index class. - index (Any): Index. - reader_params (Any): Reader parameters. - index_params (Any): Index parameters. - """ - reader: Optional[str] = None - index_class: Optional[str] = None - input_column: str - openai_api_key: Optional[str] = None - input_column: Optional[str] - mode: Optional[str] = None - user_column: Optional[str] = None - assistant_column: Optional[str] = None - - @field_validator('reader') - @classmethod - def validate_reader(cls, value): - if value not in llama_index_config.SUPPORTED_READERS: - raise ValueError(f"Reader {value} is not supported.") - - return value - - @field_validator('index_class') - @classmethod - def validate_index_class(cls, value): - if value not in llama_index_config.SUPPORTED_INDEXES: - raise ValueError(f"Index class {value} is not supported.") - - return value - - @model_validator(mode='after') - def validate_mode(self): - if self.mode == "conversational" and not all([self.user_column, self.assistant_column]): - raise ValueError("Conversational mode requires user_column and assistant_column parameter") - - return self diff --git a/mindsdb/integrations/handlers/mariadb_handler/README.md b/mindsdb/integrations/handlers/mariadb_handler/README.md deleted file mode 100644 index 20ebb0bc94f..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/README.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: MariaDB -sidebarTitle: MariaDB ---- - -This documentation describes the integration of MindsDB with [MariaDB](https://mariadb.org/), one of the most popular open source relational databases. -The integration allows MindsDB to access data from MariaDB and enhance MariaDB with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect MariaDB to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to MariaDB from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mariadb_handler) as an engine. - -```sql -CREATE DATABASE mariadb_conn -WITH ENGINE = 'mariadb', -PARAMETERS = { - "host": "host-name", - "port": 3307, - "database": "db-name", - "user": "user-name", - "password": "password" -}; -``` - -Or: - -```sql -CREATE DATABASE mariadb_conn -WITH - ENGINE = 'mariadb', - PARAMETERS = { - "url": "mariadb://user-name@host-name:3307" - }; -``` - -Required connection parameters include the following: - -* `user`: The username for the MariaDB database. -* `password`: The password for the MariaDB database. -* `host`: The hostname, IP address, or URL of the MariaDB server. -* `port`: The port number for connecting to the MariaDB server. -* `database`: The name of the MariaDB database to connect to. - -Or: - -* `url`: You can specify a connection to MariaDB Server using a URI-like string, as an alternative connection option. You can also use `mysql://` as the protocol prefix - -Optional connection parameters include the following: - - * `ssl`: Boolean parameter that indicates whether SSL encryption is enabled for the connection. Set to True to enable SSL and enhance connection security, or set to False to use the default non-encrypted connection. - * `ssl_ca`: Specifies the path to the Certificate Authority (CA) file in PEM format. - * `ssl_cert`: Specifies the path to the SSL certificate file. This certificate should be signed by a trusted CA specified in the `ssl_ca` file or be a self-signed certificate trusted by the server. - * `ssl_key`: Specifies the path to the private key file (in PEM format). - * `use_pure` (`True` by default): Whether to use pure Python or C Extension. If `use_pure=False` and the C Extension is not available, then Connector/Python will automatically fall back to the pure Python implementation. - -## Usage - -The following usage examples utilize the connection to MariaDB made via the `CREATE DATABASE` statement and named `mariadb_conn`. - -Retrieve data from a specified table by providing the integration and table name. - -```sql -SELECT * -FROM mariadb_conn.table_name -LIMIT 10; -``` - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the MariaDB database. -* **Checklist**: - 1. Ensure that the MariaDB server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct MySQL connection. - 3. Test the network connection between the MindsDB host and the MariaDB server. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces, reserved words or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/mindsdb/integrations/handlers/mariadb_handler/__about__.py b/mindsdb/integrations/handlers/mariadb_handler/__about__.py deleted file mode 100644 index 23ae63f2c42..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB MariaDB handler' -__package_name__ = 'mindsdb_mariadb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for MariaDB" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/mariadb_handler/__init__.py b/mindsdb/integrations/handlers/mariadb_handler/__init__.py deleted file mode 100644 index a9810874334..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -try: - from .mariadb_handler import MariaDBHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e -from .__about__ import __version__ as version, __description__ as description - - -title = "MariaDB" -name = "mariadb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "support_level", -] diff --git a/mindsdb/integrations/handlers/mariadb_handler/connection_args.py b/mindsdb/integrations/handlers/mariadb_handler/connection_args.py deleted file mode 100644 index 589d25dda5e..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/connection_args.py +++ /dev/null @@ -1,76 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - url={ - 'type': ARG_TYPE.STR, - 'description': 'The URI-Like connection string to the MariaDB server. If provided, it will override the other connection arguments.', - 'required': False, - 'label': 'URL' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the MariaDB server.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the MariaDB server.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the MariaDB server.', - 'required': True, - 'label': 'Database' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the MariaDB server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the MariaDB server. Must be an integer.', - 'required': True, - 'label': 'Port' - }, - ssl={ - 'type': ARG_TYPE.BOOL, - 'description': 'Set it to True to enable ssl.', - 'required': False, - 'label': 'ssl' - }, - ssl_ca={ - 'type': ARG_TYPE.PATH, - 'description': 'Path or URL of the Certificate Authority (CA) certificate file', - 'required': False, - 'label': 'ssl_ca' - }, - ssl_cert={ - 'type': ARG_TYPE.PATH, - 'description': 'Path name or URL of the server public key certificate file', - 'required': False, - 'label': 'ssl_cert' - }, - ssl_key={ - 'type': ARG_TYPE.PATH, - 'description': 'The path name or URL of the server private key file', - 'required': False, - 'label': 'ssl_key', - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=3306, - user='root', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/mariadb_handler/icon.svg b/mindsdb/integrations/handlers/mariadb_handler/icon.svg deleted file mode 100644 index 9de2c0f8df9..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mariadb_handler/mariadb_handler.py b/mindsdb/integrations/handlers/mariadb_handler/mariadb_handler.py deleted file mode 100644 index 3d1c49d7f7f..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/mariadb_handler.py +++ /dev/null @@ -1,12 +0,0 @@ -from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler - - -class MariaDBHandler(MySQLHandler): - """ - This handler handles connection and execution of the MariaDB statements. - """ - - name = 'mariadb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/mariadb_handler/requirements.txt b/mindsdb/integrations/handlers/mariadb_handler/requirements.txt deleted file mode 100644 index ee467569031..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --r mindsdb/integrations/handlers/mysql_handler/requirements.txt diff --git a/mindsdb/integrations/handlers/mariadb_handler/tests/test_mariadb_handler.py b/mindsdb/integrations/handlers/mariadb_handler/tests/test_mariadb_handler.py deleted file mode 100644 index 7aac98c1dc8..00000000000 --- a/mindsdb/integrations/handlers/mariadb_handler/tests/test_mariadb_handler.py +++ /dev/null @@ -1,178 +0,0 @@ -import time -import os -import shutil -import tarfile - -import pytest -import docker - -from mindsdb.integrations.handlers.mariadb_handler.mariadb_handler import MariaDBHandler -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.utilities.fs import safe_extract - -HANDLER_KWARGS = { - "connection_data": { - "host": "localhost", - "port": "13306", - "user": "root", - "password": "supersecret", - "database": "test", - "ssl": False, - } -} - -CERTS_ARCHIVE = "certs.tar" -CERTS_DIR = "mysql" - - -def get_certs(): - certs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mysql") - certs = {} - for cert_key, fname in [("ssl_ca", "ca.pem"), ("ssl_cert", "client-cert.pem"), ("ssl_key", "client-key.pem")]: - cert_file = os.path.join(certs_dir, fname) - certs[cert_key] = cert_file - return certs - - -def get_certificates(container): - cur_dir = os.path.dirname(os.path.abspath(__file__)) - archive_path = os.path.join(cur_dir, CERTS_ARCHIVE) - with open(archive_path, "wb") as f: - bits, _ = container.get_archive("/var/lib/mysql") - for chunk in bits: - f.write(chunk) - - with tarfile.open(archive_path) as tf: - safe_extract(tf, path=cur_dir) - certs = get_certs() - HANDLER_KWARGS["connection_data"].update(certs) - - -def waitReadiness(container, timeout=30): - threshold = time.time() + timeout - ready_msg = "mariadbd: ready for connections" - while True: - lines = container.logs().decode() - # container fully ready - # because it reloads the db server during initialization - # need to check that the 'ready for connections' has found second time - if lines.count(ready_msg) >= 2: - break - if time.time() > threshold: - raise Exception("timeout exceeded, container is still not ready") - - -@pytest.fixture(scope="module", params=[{"ssl": False}, {"ssl": True}], ids=["NoSSL", "SSL"]) -def handler(request): - image_name = "mindsdb/mariadb-handler-test" - docker_client = docker.from_env() - with_ssl = request.param["ssl"] - container = None - try: - container = docker_client.containers.run( - image_name, - command="--secure-file-priv=/", - detach=True, - environment={"MYSQL_ROOT_PASSWORD": "supersecret"}, - ports={"3306/tcp": 13306}, - ) - waitReadiness(container) - # ubnormal teardown - except Exception as e: - if container is not None: - container.kill() - raise e - - if with_ssl: - get_certificates(container) - handler = MariaDBHandler("test_mariadb_handler", **HANDLER_KWARGS) - yield handler - - # normal teardown - container.kill() - docker_client.close() - if with_ssl: - cur_dir = os.path.dirname(os.path.abspath(__file__)) - try: - os.remove(os.path.join(cur_dir, CERTS_ARCHIVE)) - shutil.rmtree(os.path.join(cur_dir, CERTS_DIR)) - except Exception as e: - print(f"unable to delete .tar/files of certificates: {e}") - - -class TestMariaDBHandler: - def test_connect(self, handler): - handler.connect() - assert handler.is_connected, "connection error" - - def test_check_connection(self, handler): - res = handler.check_connection() - assert res.success, res.error_message - - def test_native_query_show_dbs(self, handler): - dbs = handler.native_query("SHOW DATABASES;") - dbs = dbs.data_frame - assert dbs is not None, "expected to get some data, but got None" - assert "Database" in dbs, f"expected to get 'Database' column in response:\n{dbs}" - dbs = list(dbs["Database"]) - expected_db = HANDLER_KWARGS["connection_data"]["database"] - assert expected_db in dbs, f"expected to have {expected_db} db in response: {dbs}" - - def test_get_tables(self, handler): - tables = self.get_table_names(handler) - assert "rentals" in tables, f"expected to have 'rentals' table in the db but got: {tables}" - - def test_describe_table(self, handler): - described = handler.get_columns("rentals") - describe_data = described.data_frame - self.check_valid_response(described) - got_columns = list(describe_data.iloc[:, 0]) - want_columns = [ - "number_of_rooms", - "number_of_bathrooms", - "sqft", - "location", - "days_on_market", - "initial_price", - "neighborhood", - "rental_price", - ] - assert got_columns == want_columns, ( - f"expected to have next columns in rentals table:\n{want_columns}\nbut got:\n{got_columns}" - ) - - def test_create_table(self, handler): - new_table = "test_mdb" - res = handler.native_query(f"CREATE TABLE IF NOT EXISTS {new_table} (test_col INT)") - self.check_valid_response(res) - tables = self.get_table_names(handler) - assert new_table in tables, f"expected to have {new_table} in database, but got: {tables}" - - def test_drop_table(self, handler): - drop_table = "test_md" - res = handler.native_query(f"DROP TABLE IF EXISTS {drop_table}") - self.check_valid_response(res) - tables = self.get_table_names(handler) - assert drop_table not in tables - - def test_select_query(self, handler): - limit = 5 - query = f"SELECT * FROM rentals WHERE number_of_rooms = 2 LIMIT {limit}" - res = handler.query(query) - self.check_valid_response(res) - got_rows = res.data_frame.shape[0] - want_rows = limit - assert got_rows == want_rows, f"expected to have {want_rows} rows in response but got: {got_rows}" - - def check_valid_response(self, res): - if res.resp_type == RESPONSE_TYPE.TABLE: - assert res.data_frame is not None, "expected to have some data, but got None" - assert res.error_code == 0, f"expected to have zero error_code, but got {res.error_code}" - assert res.error_message is None, f"expected to have None in error message, but got {res.error_message}" - - def get_table_names(self, handler): - res = handler.get_tables() - tables = res.data_frame - assert tables is not None, "expected to have some tables in the db, but got None" - assert "table_name" in tables, f"expected to get 'table_name' column in the response:\n{tables}" - return list(tables["table_name"]) diff --git a/mindsdb/integrations/handlers/mlflow_handler/__about__.py b/mindsdb/integrations/handlers/mlflow_handler/__about__.py deleted file mode 100644 index c7720a069fb..00000000000 --- a/mindsdb/integrations/handlers/mlflow_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB MLflow handler' -__package_name__ = 'mindsdb_mlflow_handler' -__version__ = '0.0.2' -__description__ = "MindsDB handler for MLflow" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/mlflow_handler/__init__.py b/mindsdb/integrations/handlers/mlflow_handler/__init__.py deleted file mode 100644 index de6f799905e..00000000000 --- a/mindsdb/integrations/handlers/mlflow_handler/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE -from mindsdb.integrations.handlers.mlflow_handler.__about__ import __version__ as version, __description__ as description -try: - from mindsdb.integrations.handlers.mlflow_handler.mlflow_handler import MLflowHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = 'MLFlow' -name = 'mlflow' -type = HANDLER_TYPE.ML -icon_path = "icon.svg" -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/mlflow_handler/example.md b/mindsdb/integrations/handlers/mlflow_handler/example.md deleted file mode 100644 index 1c72f75e6a9..00000000000 --- a/mindsdb/integrations/handlers/mlflow_handler/example.md +++ /dev/null @@ -1,46 +0,0 @@ -# Preliminaries - -To use MLFlow-served models through MindsDB, you need to: - -1) Train a model via a wrapper class that inherits from `mlflow.pyfunc.PythonModel`. Should expose a `predict()` method that returns the predicted output for some input data when called. - -(Important: ensure that the python version specified for conda env matches the one used to actually train the model). - -2) Start the MLFlow server: -mlflow server -p 5001 --backend-store-uri sqlite:////path/to/mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0 - -3) Serve the trained model: -mlflow models serve --model-uri ./model_folder_name - - -# MindsDB example commands - --- Create a model that registers an MLFlow served model as an AI Table - -CREATE MODEL mindsdb.test -PREDICT target -USING -engine='mlflow', -- calls this handler -model_name='model_folder_name', -mlflow_server_url='http://0.0.0.0:5001/', -- match port with mlflow server -mlflow_server_path='sqlite:////path/to/mlflow.db', -predict_url='http://localhost:5000/invocations'; -- match port with `mlflow serve` - - --- Check model status - -SELECT * FROM mindsdb.models WHERE name='test'; -- will appear as `complete` if import process finished successfully - - --- Predict using synthetic data - -SELECT target -FROM mindsdb.test -WHERE text='The tsunami is coming, seek high ground'; -- gets predictions for the input data - - --- Batch prediction joining with another table - -SELECT t.text, m.predict -FROM mindsdb.test as m -JOIN files.some_text as t; diff --git a/mindsdb/integrations/handlers/mlflow_handler/icon.svg b/mindsdb/integrations/handlers/mlflow_handler/icon.svg deleted file mode 100644 index 6b0cf427b38..00000000000 --- a/mindsdb/integrations/handlers/mlflow_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py b/mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py deleted file mode 100644 index 547a8f95324..00000000000 --- a/mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +++ /dev/null @@ -1,105 +0,0 @@ -import requests -from datetime import datetime -from typing import Dict, Optional - -import pandas as pd -from mlflow.tracking import MlflowClient - -from mindsdb.integrations.libs.base import BaseMLEngine - - -class MLflowHandler(BaseMLEngine): - """ - The MLflow integration engine needs to have a working connection to MLFlow. For this: - - All models to use should be previously served - - An MLflow server should be running, to access its model registry - - Example: - 1. Run `mlflow server -p 5001 --backend-store-uri sqlite:////path/to/mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0` - 2. Run `mlflow models serve --model-uri ./model_path` - 3. Run MindsDB - - Note: above, `artifacts` is a folder to store artifacts for new experiments that do not specify an artifact store. - """ # noqa - - name = "mlflow" - - def create( - self, - target: str, - df: Optional[pd.DataFrame] = None, - args: Optional[Dict] = None, - ) -> None: - args = args["using"] # ignore the rest of the problem definition - connection = MlflowClient(args["mlflow_server_url"], args["mlflow_server_path"]) - model_name = args["model_name"] - mlflow_models = [model.name for model in connection.search_registered_models()] - - if model_name not in mlflow_models: - raise Exception( - f"Error: model '{model_name}' not found in mlflow. Check serving and try again." - ) - - args["target"] = target - self._check_model_url(args["predict_url"]) - self.model_storage.json_set("args", args) - - def predict(self, df, args=None): - args = self.model_storage.json_get("args") # override any incoming args for now - self._check_model_url(args["predict_url"]) - resp = requests.post( - args["predict_url"], - data=df.to_json(orient="records"), - headers={"content-type": "application/json; format=pandas-records"}, - ) - answer = resp.json() - predictions = pd.DataFrame({args["target"]: answer}) - return predictions - - def describe(self, key: Optional[str] = None) -> pd.DataFrame: - if key == "info": - args = self.model_storage.json_get("args") - connection = MlflowClient( - args["mlflow_server_url"], args["self.mlflow_server_path"] - ) - models = { - model.name: model for model in connection.search_registered_models() - } - model = models[key] - latest_version = model.latest_versions[-1] - description = { - "NAME": [model.name], - "USER_DESCRIPTION": [model.description], - "LAST_STATUS": [latest_version.status], - "CREATED_AT": [ - datetime.fromtimestamp(model.creation_timestamp // 1000).strftime( - "%m/%d/%Y, %H:%M:%S" - ) - ], - "LAST_UPDATED": [ - datetime.fromtimestamp( - model.last_updated_timestamp // 1000 - ).strftime("%m/%d/%Y, %H:%M:%S") - ], - "TAGS": [model.tags], - "LAST_RUN_ID": [latest_version.run_id], - "LAST_SOURCE_PATH": [latest_version.source], - "LAST_USER_ID": [latest_version.user_id], - "LAST_VERSION": [latest_version.version], - } - return pd.DataFrame.from_dict(description) - else: - tables = ["info"] - return pd.DataFrame(tables, columns=["tables"]) - - @staticmethod - def _check_model_url(url): - """try post without data, check status code not in (not_found, method_not_allowed)""" - try: - resp = requests.post(url) - if resp.status_code in (404, 405): - raise Exception( - f"Model url is incorrect, status_code: {resp.status_code}" - ) - except requests.RequestException as e: - raise Exception(f"Model url is incorrect: {str(e)}") diff --git a/mindsdb/integrations/handlers/mlflow_handler/requirements.txt b/mindsdb/integrations/handlers/mlflow_handler/requirements.txt deleted file mode 100644 index 3ccfa559cbe..00000000000 --- a/mindsdb/integrations/handlers/mlflow_handler/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -mlflow -protobuf>=6.33.5 # not directly required, pinned by Snyk to avoid a vulnerability -sqlparse>=0.5.4 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/mindsdb/integrations/handlers/mlflow_handler/tests/test_mlflow.py b/mindsdb/integrations/handlers/mlflow_handler/tests/test_mlflow.py deleted file mode 100644 index f333c93b621..00000000000 --- a/mindsdb/integrations/handlers/mlflow_handler/tests/test_mlflow.py +++ /dev/null @@ -1,50 +0,0 @@ -# How to run: -# env PYTHONPATH=./:$PYTHONPATH pytest tests/unit/ml_handlers/test_mlflow.py -ls -import requests -import time -import pytest -from unittest.mock import patch - -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorTest -from mindsdb.integrations.handlers.mlflow_handler.mlflow_handler import MLflowHandler - - -# TODO: fix patches -class TestMLFlow(BaseExecutorTest): - def run_sql(self, sql): - return self.command_executor.execute_command(parse_sql(sql)) - - @patch("mlflow.tracking.MlflowClient") - @patch.object(MLflowHandler, "_check_model_url") - @patch("mindsdb.integrations.handlers.mlflow_handler.mlflow_handler.requests.post") - def test_mlflow(self, mock_internal_post, mock_handler_url_method, mock_mlflow_client): - mock_mlflow_client.search_registered_models.side_effect = ["test_mlflow"] - mock_internal_post.side_effect = requests.Request(json=["negative_sentiment"]) - mock_handler_url_method.side_effect = True - ret = self.run_sql(""" - CREATE PREDICTOR mindsdb.test_mlflow - PREDICT c - USING - engine='mlflow', - model_name='test_mlflow', - mlflow_server_url='http://0.0.0.0:5001/', - mlflow_server_path='sqlite:////mlflow.db', - predict_url='http://localhost:5000/invocations'; - """) - assert ret.error_code is None - - time.sleep(3) - - ret = self.run_sql(""" - SELECT p.* - FROM mindsdb.test_mlflow as p - WHERE text="The tsunami is coming, seek high ground"; - """) - assert ret.error_code is None - assert ret.c == "0" # what is it? - - -if __name__ == "__main__": - pytest.main(["test_mlflow.py"]) diff --git a/mindsdb/integrations/handlers/mssql_handler/README.md b/mindsdb/integrations/handlers/mssql_handler/README.md deleted file mode 100644 index b8ad2d8272c..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/README.md +++ /dev/null @@ -1,310 +0,0 @@ ---- -title: Microsoft SQL Server -sidebarTitle: Microsoft SQL Server ---- - -This documentation describes the integration of MindsDB with Microsoft SQL Server, a relational database management system developed by Microsoft. -The integration allows for advanced SQL functionalities, extending Microsoft SQL Server's capabilities with MindsDB's features. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB [locally via Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or use [MindsDB Cloud](https://cloud.mindsdb.com/). -2. To connect Microsoft SQL Server to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -### Installation - -The MSSQL handler supports two connection methods: - -#### Option 1: Standard Connection (pymssql - Recommended) - -```bash -pip install mindsdb[mssql] -``` - -This installs `pymssql`, which provides native FreeTDS-based connections. Works on all platforms. - -#### Option 2: ODBC Connection (pyodbc) - -```bash -pip install mindsdb[mssql-odbc] -``` - -This installs both `pymssql` and `pyodbc` for ODBC driver support. - -**Additional requirements for ODBC:** -- **System ODBC libraries**: On Linux, install `unixodbc` and `unixodbc-dev` - ```bash - sudo apt-get install unixodbc unixodbc-dev - ``` -- **Microsoft ODBC Driver for SQL Server**: - - **Linux**: - ```bash - # Add Microsoft repository - curl https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc - curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list - - # Install ODBC Driver 18 - sudo apt-get update - sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 - ``` - - **macOS**: `brew install msodbcsql18` - - **Windows**: Download from [Microsoft](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server) - -To verify installed drivers: - -```bash -odbcinst -q -d -``` - -## Connection - -Establish a connection to your Microsoft SQL Server database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE mssql_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "127.0.0.1", - "port": 1433, - "user": "sa", - "password": "password", - "database": "master" -}; -``` - -Required connection parameters include the following: - -* `user`: The username for the Microsoft SQL Server. -* `password`: The password for the Microsoft SQL Server. -* `host` The hostname, IP address, or URL of the Microsoft SQL Server. -* `database` The name of the Microsoft SQL Server database to connect to. - -Optional connection parameters include the following: - -* `port`: The port number for connecting to the Microsoft SQL Server. Default is 1433. -* `server`: The server name to connect to. Typically only used with named instances or Azure SQL Database. -* `schema`: The schema in which objects are searched first. If specified, all table references without an explicit schema will be automatically qualified with this schema. - -### ODBC Connection - -The handler also supports ODBC connections via `pyodbc` for advanced scenarios like Windows Authentication or specific driver requirements. - - -#### Setup - -1. Install: `pip install mindsdb[mssql-odbc]` -2. Install system ODBC driver (see Installation section above) - -Basic ODBC Connection: - -```sql -CREATE DATABASE mssql_odbc_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "127.0.0.1", - "port": 1433, - "user": "sa", - "password": "password", - "database": "master", - "driver": "ODBC Driver 18 for SQL Server" -- Specifying driver enables ODBC -}; -``` -ODBC-specific Parameters: - -* `driver`: The ODBC driver name (e.g., "ODBC Driver 18 for SQL Server"). When specified, enables ODBC mode. -* `use_odbc`: Set to `true` to explicitly use ODBC. Optional if `driver` is specified. -* `encrypt`: Connection encryption: `"yes"` or `"no"`. Driver 18 defaults to `"yes"`. -* `trust_server_certificate`: Whether to trust self-signed certificates: `"yes"` or `"no"`. -* `connection_string_args`: Additional connection string arguments. - -#### Example: Azure SQL Database with Encryption: - -```sql -CREATE DATABASE azure_sql_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "myserver.database.windows.net", - "port": 1433, - "user": "adminuser", - "password": "SecurePass123!", - "database": "mydb", - "driver": "ODBC Driver 18 for SQL Server", - "encrypt": "yes", - "trust_server_certificate": "no" -}; -``` - -#### Example: Local Development (Self-Signed Certificate): - -```sql -CREATE DATABASE local_mssql -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "localhost", - "port": 1433, - "user": "sa", - "password": "YourStrong@Passw0rd", - "database": "testdb", - "driver": "ODBC Driver 18 for SQL Server", - "encrypt": "yes", - "trust_server_certificate": "yes" -- Allow self-signed certs -}; -``` - -### ODBC Connection - -The handler also supports ODBC connections via `pyodbc` for advanced scenarios like Windows Authentication or specific driver requirements. - - -#### Setup - -1. Install: `pip install mindsdb[mssql-odbc]` -2. Install system ODBC driver (see Installation section above) - -Basic ODBC Connection: - -```sql -CREATE DATABASE mssql_odbc_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "127.0.0.1", - "port": 1433, - "user": "sa", - "password": "password", - "database": "master", - "driver": "ODBC Driver 18 for SQL Server" -- Specifying driver enables ODBC -}; -``` -ODBC-specific Parameters: - -* `driver`: The ODBC driver name (e.g., "ODBC Driver 18 for SQL Server"). When specified, enables ODBC mode. -* `use_odbc`: Set to `true` to explicitly use ODBC. Optional if `driver` is specified. -* `encrypt`: Connection encryption: `"yes"` or `"no"`. Driver 18 defaults to `"yes"`. -* `trust_server_certificate`: Whether to trust self-signed certificates: `"yes"` or `"no"`. -* `connection_string_args`: Additional connection string arguments. - -#### Example: Azure SQL Database with Encryption: - -```sql -CREATE DATABASE azure_sql_datasource -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "myserver.database.windows.net", - "port": 1433, - "user": "adminuser", - "password": "SecurePass123!", - "database": "mydb", - "driver": "ODBC Driver 18 for SQL Server", - "encrypt": "yes", - "trust_server_certificate": "no" -}; -``` - -#### Example: Local Development (Self-Signed Certificate): - -```sql -CREATE DATABASE local_mssql -WITH ENGINE = 'mssql', -PARAMETERS = { - "host": "localhost", - "port": 1433, - "user": "sa", - "password": "YourStrong@Passw0rd", - "database": "testdb", - "driver": "ODBC Driver 18 for SQL Server", - "encrypt": "yes", - "trust_server_certificate": "yes" -- Allow self-signed certs -}; -``` - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM mssql_datasource.schema_name.table_name -LIMIT 10; -``` - -Run T-SQL queries directly on the connected Microsoft SQL Server database: - -```sql -SELECT * FROM mssql_datasource ( - - --Native Query Goes Here - SELECT - SUM(orderqty) total - FROM Product p JOIN SalesOrderDetail sd ON p.productid = sd.productid - JOIN SalesOrderHeader sh ON sd.salesorderid = sh.salesorderid - JOIN Customer c ON sh.customerid = c.customerid - WHERE (Name = 'Racing Socks, L') AND (companyname = 'Riding Cycles'); - -); -``` - - -The above examples utilize `mssql_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -### Performance Optimization for Large Datasets - -The handler is optimized for efficient data processing, but for very large result sets (millions of rows): - -1. **Use SQL Server's filtering**: Apply `WHERE` clauses to filter data on the server side -2. **Use pagination**: Use `TOP`/`OFFSET-FETCH` in SQL Server or `LIMIT` in MindsDB queries -3. **Aggregate when possible**: Use `GROUP BY`, `COUNT()`, `AVG()`, etc. to reduce data volume -4. **Index your tables**: Ensure proper indexes on SQL Server for query performance - -**Example - Paginated Query:** -```sql -SELECT * FROM mssql_datasource ( - SELECT TOP 100000 * - FROM large_table - ORDER BY id - OFFSET 0 ROWS -); -``` - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Microsoft SQL Server database. -* **Checklist**: - 1. Make sure the Microsoft SQL Server is active. - 2. Confirm that host, port, user, and password are correct. Try a direct Microsoft SQL Server connection using a client like SQL Server Management Studio or DBeaver. - 3. Ensure a stable network between MindsDB and Microsoft SQL Server. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - - - -`ODBC Driver Connection Error` - -* **Symptoms**: Errors like "Driver not found", "Can't open lib 'ODBC Driver 17 for SQL Server'", or "pyodbc is not installed". -* **Checklist**: - 1. **Verify pyodbc is installed**: `pip list | grep pyodbc` - 2. **Check system ODBC libraries**: `ldconfig -p | grep odbc` (Linux) should show libodbc.so - 3. **Verify ODBC drivers**: Run `odbcinst -q -d` to list installed drivers - 4. **Match driver name exactly**: Use the exact name from `odbcinst -q -d` (case-sensitive) - 5. **For Driver 18 encryption errors**: Add `"encrypt": "yes", "trust_server_certificate": "yes"` for local/dev servers - 6. **Test connection manually**: - ```python - import pyodbc - print(pyodbc.drivers()) # Should list available drivers - ``` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mssql_handler/__about__.py b/mindsdb/integrations/handlers/mssql_handler/__about__.py deleted file mode 100644 index 075c8036e10..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Microsoft SQL Server handler" -__package_name__ = "mindsdb_mssql_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Microsoft SQL Server" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/mssql_handler/__init__.py b/mindsdb/integrations/handlers/mssql_handler/__init__.py deleted file mode 100644 index dd9962db5c9..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .mssql_handler import SqlServerHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = "Microsoft SQL Server" -name = "mssql" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args_example", - "connection_args", - "import_error", - "icon_path", - "support_level", -] diff --git a/mindsdb/integrations/handlers/mssql_handler/connection_args.py b/mindsdb/integrations/handlers/mssql_handler/connection_args.py deleted file mode 100644 index a9ad3f9a60f..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/connection_args.py +++ /dev/null @@ -1,54 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the Microsoft SQL Server.", - "required": True, - "label": "User", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the Microsoft SQL Server.", - "required": True, - "label": "Password", - "secret": True, - }, - database={ - "type": ARG_TYPE.STR, - "description": "The database name to use when connecting with the Microsoft SQL Server.", - "required": True, - "label": "Database", - }, - host={ - "type": ARG_TYPE.STR, - "description": "The host name or IP address of the Microsoft SQL Server.", - "required": True, - "label": "Host", - }, - port={ - "type": ARG_TYPE.INT, - "description": "The TCP/IP port of the Microsoft SQL Server. Must be an integer.", - "required": False, - "label": "Port", - }, - server={ - "type": ARG_TYPE.STR, - "description": "The server name of the Microsoft SQL Server. Typically only used with named instances or Azure SQL Database.", - "required": False, - "label": "Server", - }, - schema={ - "type": ARG_TYPE.STR, - "description": "The schema in which objects are searched first. If not provided, all schemas will be queried.", - "required": False, - "label": "Schema", - }, -) - -connection_args_example = OrderedDict( - host="127.0.0.1", port=1433, user="sa", password="password", database="master", schema="dbo" -) diff --git a/mindsdb/integrations/handlers/mssql_handler/icon.svg b/mindsdb/integrations/handlers/mssql_handler/icon.svg deleted file mode 100644 index b462f8ac92d..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/icon.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py b/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py deleted file mode 100644 index 7b6e42fff34..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +++ /dev/null @@ -1,730 +0,0 @@ -from typing import Any, Union, TYPE_CHECKING -import datetime - -import pymssql -from pymssql import OperationalError -import pandas as pd -from pandas.api import types as pd_types -from sqlalchemy.exc import SQLAlchemyError - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Identifier - -from mindsdb.integrations.libs.base import MetaDatabaseHandler -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.utilities import log -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender, RenderError -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - -if TYPE_CHECKING: - import pyodbc - -logger = log.getLogger(__name__) - - -def _map_type(mssql_type_text: str) -> MYSQL_DATA_TYPE: - """Map MSSQL text types names to MySQL types as enum. - - Args: - mssql_type_text (str): The name of the MSSQL type to map. - - Returns: - MYSQL_DATA_TYPE: The MySQL type enum that corresponds to the MSSQL text type name. - """ - internal_type_name = mssql_type_text.lower() - types_map = { - ("tinyint", "smallint", "int", "bigint"): MYSQL_DATA_TYPE.INT, - ("bit",): MYSQL_DATA_TYPE.BOOL, - ("money", "smallmoney", "float", "real"): MYSQL_DATA_TYPE.FLOAT, - ("decimal", "numeric"): MYSQL_DATA_TYPE.DECIMAL, - ("date",): MYSQL_DATA_TYPE.DATE, - ("time",): MYSQL_DATA_TYPE.TIME, - ("datetime2", "datetimeoffset", "datetime", "smalldatetime"): MYSQL_DATA_TYPE.DATETIME, - ("varchar", "nvarchar"): MYSQL_DATA_TYPE.VARCHAR, - ("char", "text", "nchar", "ntext"): MYSQL_DATA_TYPE.TEXT, - ("binary", "varbinary", "image"): MYSQL_DATA_TYPE.BINARY, - } - - for db_types_list, mysql_data_type in types_map.items(): - if internal_type_name in db_types_list: - return mysql_data_type - - logger.debug(f"MSSQL handler type mapping: unknown type: {internal_type_name}, use VARCHAR as fallback.") - return MYSQL_DATA_TYPE.VARCHAR - - -def _make_table_response( - result: list[Union[dict[str, Any], tuple]], cursor: Union[pymssql.Cursor, "pyodbc.Cursor"], use_odbc: bool = False -) -> Response: - """Build response from result and cursor. - - Args: - result (list[Union[dict[str, Any], tuple]]): result of the query. - cursor (Union[pymssql.Cursor, pyodbc.Cursor]): cursor object. - use_odbc (bool): whether ODBC connection is being used. - - Returns: - Response: response object. - """ - description: list[tuple[Any]] = cursor.description - mysql_types: list[MYSQL_DATA_TYPE] = [] - columns = [x[0] for x in cursor.description] - - if not result: - data_frame = pd.DataFrame(columns=columns) - elif use_odbc: - # from_records() understands tuple-like records (including pyodbc.Row) - data_frame = pd.DataFrame.from_records(result, columns=columns) - else: - # pymssql with as_dict=True returns list of dicts - data_frame = pd.DataFrame(result) - - for column in description: - column_name = column[0] - column_type = column[1] - column_dtype = data_frame[column_name].dtype - - if use_odbc: - # For pyodbc, use type inference based on pandas dtype - if pd_types.is_integer_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.INT) - elif pd_types.is_float_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) - elif pd_types.is_bool_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.TINYINT) - elif pd_types.is_datetime64_any_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.DATETIME) - elif pd_types.is_object_dtype(column_dtype): - if len(data_frame) > 0 and isinstance( - data_frame[column_name].iloc[0], (datetime.datetime, datetime.date, datetime.time) - ): - mysql_types.append(MYSQL_DATA_TYPE.DATETIME) - else: - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - else: - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - else: - match column_type: - case pymssql.NUMBER: - if pd_types.is_integer_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.INT) - elif pd_types.is_float_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.FLOAT) - elif pd_types.is_bool_dtype(column_dtype): - mysql_types.append(MYSQL_DATA_TYPE.TINYINT) - else: - mysql_types.append(MYSQL_DATA_TYPE.DOUBLE) - case pymssql.DECIMAL: - mysql_types.append(MYSQL_DATA_TYPE.DECIMAL) - case pymssql.STRING: - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - case pymssql.DATETIME: - mysql_types.append(MYSQL_DATA_TYPE.DATETIME) - case pymssql.BINARY: - # DATE and TIME types returned as 'BINARY' type, and dataframe type is 'object', so it is not possible - # to infer correct mysql type for them - if pd_types.is_datetime64_any_dtype(column_dtype): - # pymssql return datetimes as 'binary' type - # if timezone is present, then it is datetime.timezone - series = data_frame[column_name] - if ( - series.dt.tz is not None - and isinstance(series.dt.tz, datetime.timezone) - and series.dt.tz != datetime.timezone.utc - ): - series = series.dt.tz_convert("UTC") - data_frame[column_name] = series.dt.tz_localize(None) - mysql_types.append(MYSQL_DATA_TYPE.DATETIME) - else: - mysql_types.append(MYSQL_DATA_TYPE.BINARY) - case _: - logger.warning(f"Unknown type: {column_type}, use TEXT as fallback.") - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - - return Response(RESPONSE_TYPE.TABLE, data_frame=data_frame, mysql_types=mysql_types) - - -class SqlServerHandler(MetaDatabaseHandler): - """ - This handler handles connection and execution of the Microsoft SQL Server statements. - Supports both native pymssql connections and ODBC connections via pyodbc. - - To use ODBC connection, specify either: - - 'use_odbc': True in connection parameters, or - - 'driver': '' in connection parameters - """ - - name = "mssql" - - def __init__(self, name, **kwargs): - super().__init__(name) - self.parser = parse_sql - self.connection_args = kwargs.get("connection_data") - self.dialect = "mssql" - self.database = self.connection_args.get("database") - self.schema = self.connection_args.get("schema") - self.renderer = SqlalchemyRender("mssql") - - # Determine if ODBC should be used - self.use_odbc = self.connection_args.get("use_odbc", False) or "driver" in self.connection_args - - self.connection = None - self.is_connected = False - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def connect(self): - """ - Establishes a connection to a Microsoft SQL Server database. - Uses either pymssql (native) or pyodbc based on configuration. - - Raises: - pymssql._mssql.OperationalError or pyodbc.Error: If an error occurs while connecting to the database. - - Returns: - Union[pymssql.Connection, pyodbc.Connection]: A connection object to the Microsoft SQL Server database. - """ - - if self.is_connected is True: - return self.connection - - if self.use_odbc: - return self._connect_odbc() - else: - return self._connect_pymssql() - - def _connect_pymssql(self): - """Connect using pymssql (native FreeTDS-based connection).""" - # Mandatory connection parameters - if not all(key in self.connection_args for key in ["host", "user", "password", "database"]): - raise ValueError("Required parameters (host, user, password, database) must be provided.") - - config = { - "host": self.connection_args.get("host"), - "user": self.connection_args.get("user"), - "password": self.connection_args.get("password"), - "database": self.connection_args.get("database"), - } - - # Optional connection parameters - if "port" in self.connection_args: - config["port"] = self.connection_args.get("port") - - if "server" in self.connection_args: - config["server"] = self.connection_args.get("server") - - try: - self.connection = pymssql.connect(**config) - self.is_connected = True - return self.connection - except OperationalError as e: - logger.error(f"Error connecting to Microsoft SQL Server {self.database}, {e}!") - self.is_connected = False - raise - - def _connect_odbc(self): - """Connect using pyodbc (ODBC connection).""" - try: - import pyodbc - except ImportError as e: - raise ImportError( - "pyodbc is not installed. Install it with 'pip install pyodbc' or " - "'pip install mindsdb[mssql-odbc]' to use ODBC connections." - ) from e - - # Mandatory connection parameters - if not all(key in self.connection_args for key in ["host", "user", "password", "database"]): - raise ValueError("Required parameters (host, user, password, database) must be provided.") - - driver = self.connection_args.get("driver", "ODBC Driver 18 for SQL Server") - host = self.connection_args.get("host") - port = self.connection_args.get("port", 1433) - database = self.connection_args.get("database") - user = self.connection_args.get("user") - password = self.connection_args.get("password") - - conn_str_parts = [ - f"DRIVER={{{driver}}}", - f"SERVER={host},{port}", - f"DATABASE={database}", - f"UID={user}", - f"PWD={password}", - ] - - # Add optional parameters - if "encrypt" in self.connection_args: - conn_str_parts.append(f"Encrypt={self.connection_args.get('encrypt', 'yes')}") - if "trust_server_certificate" in self.connection_args: - conn_str_parts.append( - f"TrustServerCertificate={self.connection_args.get('trust_server_certificate', 'yes')}" - ) - - if "connection_string_args" in self.connection_args: - conn_str_parts.append(self.connection_args["connection_string_args"]) - - conn_str = ";".join(conn_str_parts) - - try: - self.connection = pyodbc.connect(conn_str, timeout=10) - self.is_connected = True - return self.connection - except pyodbc.Error as e: - logger.error(f"Error connecting to Microsoft SQL Server {self.database} via ODBC, {e}!") - self.is_connected = False - - # Check if it's a driver not found error - error_msg = str(e) - if "Driver" in error_msg and ("not found" in error_msg or "specified" in error_msg): - raise ConnectionError( - f"ODBC Driver not found: {driver}. " - f"Please install the Microsoft ODBC Driver for SQL Server. " - f"Error: {e}" - ) from e - raise - except Exception as e: - logger.error(f"Error connecting to Microsoft SQL Server {self.database} via ODBC, {e}!") - self.is_connected = False - raise - - def disconnect(self): - """ - Closes the connection to the Microsoft SQL Server database if it's currently open. - """ - - if not self.is_connected: - return - if self.connection is not None: - try: - self.connection.close() - except Exception: - logger.exception("Failed to close connection:") - pass - self.connection = None - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Microsoft SQL Server database. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - connection = self.connect() - with connection.cursor() as cur: - # Execute a simple query to test the connection - cur.execute("select 1;") - response.success = True - except Exception as e: - logger.error(f"Error connecting to Microsoft SQL Server {self.database}, {e}!") - response.error_message = str(e) - - if response.success and need_to_close: - self.disconnect() - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: str) -> Response: - """ - Executes a SQL query on the Microsoft SQL Server database and returns the result. - - Args: - query (str): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - - need_to_close = self.is_connected is False - - connection = self.connect() - - if self.use_odbc: - with connection.cursor() as cur: - try: - cur.execute(query) - if cur.description: - result = cur.fetchall() - response = _make_table_response(result, cur, use_odbc=True) - else: - response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount) - connection.commit() - except Exception as e: - logger.exception(f"Error running query: {query} on {self.database}, {e}!") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e)) - connection.rollback() - else: - with connection.cursor(as_dict=True) as cur: - try: - cur.execute(query) - if cur.description: - result = cur.fetchall() - response = _make_table_response(result, cur, use_odbc=False) - else: - response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount) - connection.commit() - except Exception as e: - logger.exception(f"Error running query: {query} on {self.database}, {e}!") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e)) - connection.rollback() - - if need_to_close is True: - self.disconnect() - - return response - - def _add_schema_to_tables(self, node, is_table=False, **kwargs): - """ - Callback for query_traversal that adds schema prefix to table identifiers. - - Args: - node: The AST node being visited - is_table: True if this node represents a table reference - **kwargs: Other arguments from query_traversal (parent_query, callstack, etc.) - - Returns: - None to keep traversing, or a replacement node - Note: This is mostly a workaround for Minds but it should still work for FQE - """ - if is_table and isinstance(node, Identifier): - # Only add schema if the identifier doesn't already have one (single part) - if len(node.parts) == 1: - node.parts.insert(0, self.schema) - node.is_quoted.insert(0, False) - return None - - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - # Add schema prefix to table identifiers if schema is configured - if self.schema: - query_traversal(query, self._add_schema_to_tables) - query_str, render_error = None, None - try: - query_str = self.renderer.get_string(query, with_failback=False) - except (SQLAlchemyError, NotImplementedError, RenderError) as e: - render_error = str(e) - - if query_str is None: - query_str = self.renderer.get_string(query, with_failback=True) - - logger.debug(f"Executing SQL query: {query_str}") - resp = self.native_query(query_str) - if resp.resp_type == RESPONSE_TYPE.ERROR and render_error: - resp.error_message += f"\nThe problem with render: {render_error}" - return resp - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables and views in the current schema of the Microsoft SQL Server database. - - Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. - """ - - query = f""" - SELECT - table_schema, - table_name, - table_type - FROM {self.database}.INFORMATION_SCHEMA.TABLES - WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW') - """ - if self.schema: - query += f" AND table_schema = '{self.schema}'" - - return self.native_query(query) - - def get_columns(self, table_name) -> Response: - """ - Retrieves column details for a specified table in the Microsoft SQL Server database. - - Args: - table_name (str): The name of the table for which to retrieve column information. - - Returns: - Response: A response object containing the column details, formatted as per the `Response` class. - Raises: - ValueError: If the 'table_name' is not a valid string. - """ - - query = f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM - information_schema.columns - WHERE - table_name = '{table_name}' - """ - - if self.schema: - query += f" AND table_schema = '{self.schema}'" - - result = self.native_query(query) - result.to_columns_table_response(map_type_fn=_map_type) - return result - - def meta_get_tables(self, table_names: list[str] | None = None) -> Response: - """ - Retrieves metadata information about the tables in the Microsoft SQL Server database - to be stored in the data catalog. - - Args: - table_names (list): A list of table names for which to retrieve metadata information. - - Returns: - Response: A response object containing the metadata information, formatted as per the `Response` class. - """ - query = f""" - SELECT - t.TABLE_NAME as table_name, - t.TABLE_SCHEMA as table_schema, - t.TABLE_TYPE as table_type, - CAST(ep.value AS NVARCHAR(MAX)) as table_description, - SUM(p.rows) as row_count - FROM {self.database}.INFORMATION_SCHEMA.TABLES t - LEFT JOIN {self.database}.sys.tables st - ON t.TABLE_NAME = st.name - LEFT JOIN {self.database}.sys.schemas s - ON st.schema_id = s.schema_id AND t.TABLE_SCHEMA = s.name - LEFT JOIN {self.database}.sys.extended_properties ep - ON st.object_id = ep.major_id - AND ep.minor_id = 0 - AND ep.class = 1 - AND ep.name = 'MS_Description' - LEFT JOIN {self.database}.sys.partitions p - ON st.object_id = p.object_id - AND p.index_id IN (0, 1) - WHERE t.TABLE_TYPE IN ('BASE TABLE', 'VIEW') - AND t.TABLE_SCHEMA NOT IN ('sys', 'INFORMATION_SCHEMA') - """ - - if self.schema: - query += f" AND t.TABLE_SCHEMA = '{self.schema}'" - - query += " GROUP BY t.TABLE_NAME, t.TABLE_SCHEMA, t.TABLE_TYPE, ep.value" - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" HAVING t.TABLE_NAME IN ({','.join(quoted_names)})" - - result = self.native_query(query) - return result - - def meta_get_columns(self, table_names: list[str] | None = None) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - query = f""" - SELECT - c.TABLE_NAME as table_name, - c.COLUMN_NAME as column_name, - c.DATA_TYPE as data_type, - CAST(ep.value AS NVARCHAR(MAX)) as column_description, - c.COLUMN_DEFAULT as column_default, - CASE WHEN c.IS_NULLABLE = 'YES' THEN 1 ELSE 0 END as is_nullable - FROM {self.database}.INFORMATION_SCHEMA.COLUMNS c - LEFT JOIN {self.database}.sys.tables st - ON c.TABLE_NAME = st.name - LEFT JOIN {self.database}.sys.schemas s - ON st.schema_id = s.schema_id AND c.TABLE_SCHEMA = s.name - LEFT JOIN {self.database}.sys.columns sc - ON st.object_id = sc.object_id AND c.COLUMN_NAME = sc.name - LEFT JOIN {self.database}.sys.extended_properties ep - ON st.object_id = ep.major_id - AND sc.column_id = ep.minor_id - AND ep.name = 'MS_Description' - WHERE c.TABLE_SCHEMA NOT IN ('sys', 'INFORMATION_SCHEMA') - """ - - if self.schema: - query += f" AND c.TABLE_SCHEMA = '{self.schema}'" - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" AND c.TABLE_NAME IN ({','.join(quoted_names)})" - - result = self.native_query(query) - return result - - def meta_get_column_statistics(self, table_names: list[str] | None = None) -> Response: - """ - Retrieves column statistics (e.g., null percentage, distinct value count, min/max values) - for the specified tables or all tables if no list is provided. - - Note: Uses SQL Server's sys.dm_db_stats_properties and sys.dm_db_stats_histogram - (similar to PostgreSQL's pg_stats). Statistics are only available for columns that - have statistics objects created by SQL Server (typically indexed columns or columns - used in queries after AUTO_CREATE_STATISTICS). - - Args: - table_names (list): A list of table names for which to retrieve column statistics. - - Returns: - Response: A response object containing the column statistics. - """ - table_filter = "" - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - table_filter = f" AND t.name IN ({','.join(quoted_names)})" - - schema_filter = "" - if self.schema: - schema_filter = f" AND s.name = '{self.schema}'" - - # Using OUTER APPLY to handle table-valued functions properly - # This is equivalent to PostgreSQL's pg_stats view approach - # Includes all statistics: auto-created, user-created, and index-based - # dm_db_stats_histogram columns: range_high_key, range_rows, equal_rows, - # distinct_range_rows, average_range_rows - query = f""" - SELECT DISTINCT - t.name AS TABLE_NAME, - c.name AS COLUMN_NAME, - CAST(NULL AS DECIMAL(10,2)) AS NULL_PERCENTAGE, - CAST(h.distinct_count AS BIGINT) AS DISTINCT_VALUES_COUNT, - NULL AS MOST_COMMON_VALUES, - NULL AS MOST_COMMON_FREQUENCIES, - CAST(h.min_value AS NVARCHAR(MAX)) AS MINIMUM_VALUE, - CAST(h.max_value AS NVARCHAR(MAX)) AS MAXIMUM_VALUE - FROM {self.database}.sys.tables t - INNER JOIN {self.database}.sys.schemas s - ON t.schema_id = s.schema_id - INNER JOIN {self.database}.sys.columns c - ON t.object_id = c.object_id - LEFT JOIN {self.database}.sys.stats st - ON st.object_id = t.object_id - LEFT JOIN {self.database}.sys.stats_columns sc - ON sc.object_id = st.object_id - AND sc.stats_id = st.stats_id - AND sc.column_id = c.column_id - AND sc.stats_column_id = 1 -- Only leading column in multi-column stats - OUTER APPLY ( - SELECT - MIN(CAST(range_high_key AS NVARCHAR(MAX))) AS min_value, - MAX(CAST(range_high_key AS NVARCHAR(MAX))) AS max_value, - SUM(CAST(distinct_range_rows AS BIGINT)) + COUNT(*) AS distinct_count - FROM {self.database}.sys.dm_db_stats_histogram(st.object_id, st.stats_id) - WHERE st.object_id IS NOT NULL - ) h - WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA') - {schema_filter} - {table_filter} - ORDER BY t.name, c.name - """ - - result = self.native_query(query) - return result - - def meta_get_primary_keys(self, table_names: list[str] | None = None) -> Response: - """ - Retrieves primary key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve primary key information. - - Returns: - Response: A response object containing the primary key information. - """ - query = f""" - SELECT - tc.TABLE_NAME as table_name, - kcu.COLUMN_NAME as column_name, - kcu.ORDINAL_POSITION as ordinal_position, - tc.CONSTRAINT_NAME as constraint_name - FROM {self.database}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc - INNER JOIN {self.database}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu - ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME - AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA - AND tc.TABLE_NAME = kcu.TABLE_NAME - WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY' - """ - - if self.schema: - query += f" AND tc.TABLE_SCHEMA = '{self.schema}'" - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" AND tc.TABLE_NAME IN ({','.join(quoted_names)})" - - query += " ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION" - - result = self.native_query(query) - return result - - def meta_get_foreign_keys(self, table_names: list[str] | None = None) -> Response: - """ - Retrieves foreign key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve foreign key information. - - Returns: - Response: A response object containing the foreign key information. - """ - query = f""" - SELECT - OBJECT_NAME(fk.referenced_object_id) as parent_table_name, - COL_NAME(fkc.referenced_object_id, fkc.referenced_column_id) as parent_column_name, - OBJECT_NAME(fk.parent_object_id) as child_table_name, - COL_NAME(fkc.parent_object_id, fkc.parent_column_id) as child_column_name, - fk.name as constraint_name - FROM {self.database}.sys.foreign_keys fk - INNER JOIN {self.database}.sys.foreign_key_columns fkc - ON fk.object_id = fkc.constraint_object_id - INNER JOIN {self.database}.sys.tables t - ON fk.parent_object_id = t.object_id - INNER JOIN {self.database}.sys.schemas s - ON t.schema_id = s.schema_id - WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA') - """ - - if self.schema: - query += f" AND s.name = '{self.schema}'" - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" AND OBJECT_NAME(fk.parent_object_id) IN ({','.join(quoted_names)})" - - query += " ORDER BY child_table_name, constraint_name" - - result = self.native_query(query) - return result diff --git a/mindsdb/integrations/handlers/mssql_handler/requirements.txt b/mindsdb/integrations/handlers/mssql_handler/requirements.txt deleted file mode 100644 index 0e1799ce37c..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pymssql >= 2.1.4 diff --git a/mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt b/mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt deleted file mode 100644 index 17efbda6fc6..00000000000 --- a/mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +++ /dev/null @@ -1,3 +0,0 @@ -pymssql >= 2.1.4 -pyodbc >= 5.2.0 - diff --git a/mindsdb/integrations/handlers/mysql_handler/README.md b/mindsdb/integrations/handlers/mysql_handler/README.md deleted file mode 100644 index 18a747a0027..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/README.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: MySQL -sidebarTitle: MySQL ---- - -This documentation describes the integration of MindsDB with [MySQL](https://www.mysql.com/), a fast, reliable, and scalable open-source database. -The integration allows MindsDB to access data from MySQL and enhance MySQL with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect MySQL to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to MySQL from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mysql_handler) as an engine. - -```sql -CREATE DATABASE mysql_conn -WITH ENGINE = 'mysql', -PARAMETERS = { - "host": "host-name", - "port": 3306, - "database": "db-name", - "user": "user-name", - "password": "password" -}; -``` - -Or: - -```sql -CREATE DATABASE mysql_datasource -WITH - ENGINE = 'mysql', - PARAMETERS = { - "url": "mysql://user-name@host-name:3306" - }; -``` - -Required connection parameters include the following: - -* `user`: The username for the MySQL database. -* `password`: The password for the MySQL database. -* `host`: The hostname, IP address, or URL of the MySQL server. -* `port`: The port number for connecting to the MySQL server. -* `database`: The name of the MySQL database to connect to. - -Or: - -* `url`: You can specify a connection to MySQL Server using a URI-like string, as an alternative connection option. - -Optional connection parameters include the following: - - * `ssl`: Boolean parameter that indicates whether SSL encryption is enabled for the connection. Set to True to enable SSL and enhance connection security, or set to False to use the default non-encrypted connection. - * `ssl_ca`: Specifies the path to the Certificate Authority (CA) file in PEM format. - * `ssl_cert`: Specifies the path to the SSL certificate file. This certificate should be signed by a trusted CA specified in the `ssl_ca` file or be a self-signed certificate trusted by the server. - * `ssl_key`: Specifies the path to the private key file (in PEM format). - * `use_pure` (`True` by default): Whether to use pure Python or C Extension. If `use_pure=False` and the C Extension is not available, then Connector/Python will automatically fall back to the pure Python implementation. - -## Usage - -The following usage examples utilize the connection to MySQL made via the `CREATE DATABASE` statement and named `mysql_conn`. - -Retrieve data from a specified table by providing the integration and table name. - -```sql -SELECT * -FROM mysql_conn.table_name -LIMIT 10; -``` - - -**Next Steps** - -Follow [this tutorial](https://docs.mindsdb.com/use-cases/data_enrichment/text-summarization-inside-mysql-with-openai) to see more use case examples. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the MySQL database. -* **Checklist**: - 1. Ensure that the MySQL server is running and accessible - 2. Confirm that host, port, user, and password are correct. Try a direct MySQL connection. - 3. Test the network connection between the MindsDB host and the MySQL server. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces, reserved words or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/mindsdb/integrations/handlers/mysql_handler/__about__.py b/mindsdb/integrations/handlers/mysql_handler/__about__.py deleted file mode 100644 index a6669acb709..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB MySQL handler" -__package_name__ = "mindsdb_mysql_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for MySQL" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/mysql_handler/__init__.py b/mindsdb/integrations/handlers/mysql_handler/__init__.py deleted file mode 100644 index 77561d72c28..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args_example, connection_args - -try: - from .mysql_handler import MySQLHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "MySQL" -name = "mysql" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", - "support_level", -] diff --git a/mindsdb/integrations/handlers/mysql_handler/connection_args.py b/mindsdb/integrations/handlers/mysql_handler/connection_args.py deleted file mode 100644 index 1c4cb6abf40..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/connection_args.py +++ /dev/null @@ -1,72 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - url={ - "type": ARG_TYPE.STR, - "description": "The URI-Like connection string to the MySQL server. If provided, it will override the other connection arguments.", - "required": False, - "label": "URL", - }, - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the MySQL server.", - "required": True, - "label": "User", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the MySQL server.", - "required": True, - "label": "Password", - "secret": True, - }, - database={ - "type": ARG_TYPE.STR, - "description": "The database name to use when connecting with the MySQL server.", - "required": True, - "label": "Database", - }, - host={ - "type": ARG_TYPE.STR, - "description": "The host name or IP address of the MySQL server. NOTE: use '127.0.0.1' instead of 'localhost' to connect to local server.", - "required": True, - "label": "Host", - }, - port={ - "type": ARG_TYPE.INT, - "description": "The TCP/IP port of the MySQL server. Must be an integer.", - "required": True, - "label": "Port", - }, - ssl={ - "type": ARG_TYPE.BOOL, - "description": "Set it to True to enable ssl.", - "required": False, - "label": "ssl", - }, - ssl_ca={ - "type": ARG_TYPE.PATH, - "description": "Path or URL of the Certificate Authority (CA) certificate file", - "required": False, - "label": "ssl_ca", - }, - ssl_cert={ - "type": ARG_TYPE.PATH, - "description": "Path name or URL of the server public key certificate file", - "required": False, - "label": "ssl_cert", - }, - ssl_key={ - "type": ARG_TYPE.PATH, - "description": "The path name or URL of the server private key file", - "required": False, - "label": "ssl_key", - }, -) - -connection_args_example = OrderedDict( - host="127.0.0.1", port=3306, user="root", password="password", database="database" -) diff --git a/mindsdb/integrations/handlers/mysql_handler/icon.svg b/mindsdb/integrations/handlers/mysql_handler/icon.svg deleted file mode 100644 index 15399ae4197..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/icon.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py b/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py deleted file mode 100644 index 86882d03563..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +++ /dev/null @@ -1,624 +0,0 @@ -from typing import Optional, List, Dict, Any, Generator - -import pandas as pd -import mysql.connector - -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import MetaDatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - TableResponse, - OkResponse, - ErrorResponse, - DataHandlerResponse, -) -from mindsdb.integrations.handlers.mysql_handler.settings import ConnectionConfig -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import C_TYPES, DATA_C_TYPE_MAP -from mindsdb.utilities.types.column import Column -from mindsdb.utilities.config import config as mindsdb_config - -logger = log.getLogger(__name__) - - -def _map_type(mysql_type_text: str) -> MYSQL_DATA_TYPE: - """Map MySQL text types names to MySQL types as enum. - - Args: - mysql_type_text (str): The name of the MySQL type to map. - - Returns: - MYSQL_DATA_TYPE: The MySQL type enum that corresponds to the MySQL text type name. - """ - try: - return MYSQL_DATA_TYPE(mysql_type_text.upper()) - except Exception: - logger.warning(f"MySQL handler: unknown type: {mysql_type_text}, use TEXT as fallback.") - return MYSQL_DATA_TYPE.TEXT - - -def _get_columns(cursor: mysql.connector.cursor.MySQLCursor) -> list[Column]: - """Get columns from cursor description. - - Args: - cursor (mysql.connector.cursor.MySQLCursor): cursor object. - - Returns: - list[Column]: List of Column objects with type and dtype info. - """ - description = cursor.description - reverse_c_type_map = {v.code: k for k, v in DATA_C_TYPE_MAP.items() if v.code != C_TYPES.MYSQL_TYPE_BLOB} - columns = [] - for col in description: - column_name = col[0] - type_int = col[1] - - if isinstance(type_int, int) is False: - mysql_type = MYSQL_DATA_TYPE.TEXT - elif type_int == C_TYPES.MYSQL_TYPE_TINY: - # There are 3 types that returns as TINYINT: TINYINT, BOOL, BOOLEAN. - mysql_type = MYSQL_DATA_TYPE.TINYINT - elif type_int in reverse_c_type_map: - mysql_type = reverse_c_type_map[type_int] - elif type_int == C_TYPES.MYSQL_TYPE_BLOB: - # region determine text/blob type by flags - # Unfortunately, there is no way to determine particular type of text/blob column by flags. - # Subtype have to be determined by 8-s element of description tuple, but mysql.conector - # return the same value for all text types (TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT), and for - # all blob types (TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB). - if col[7] == 16: - mysql_type = MYSQL_DATA_TYPE.TEXT - elif col[7] == 144: - mysql_type = MYSQL_DATA_TYPE.BLOB - else: - logger.debug(f"MySQL handler: unknown type code {col[7]}, use TEXT as fallback.") - mysql_type = MYSQL_DATA_TYPE.TEXT - # endregion - else: - logger.warning(f"MySQL handler: unknown type id={type_int} in column {column_name}, use TEXT as fallback.") - mysql_type = MYSQL_DATA_TYPE.TEXT - - if mysql_type in ( - MYSQL_DATA_TYPE.SMALLINT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.BIGINT, - MYSQL_DATA_TYPE.TINYINT, - ): - expected_dtype = "Int64" - elif mysql_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): - expected_dtype = "boolean" - else: - expected_dtype = None - - columns.append(Column(name=column_name, type=mysql_type, dtype=expected_dtype)) - return columns - - -def _make_df(result: list[tuple[Any]], columns: list[Column]) -> pd.DataFrame: - """Make pandas DataFrame from result and columns. - - Args: - result (list[tuple[Any]]): result of the query (list of tuples). - columns (list[Column]): list of columns. - - Returns: - pd.DataFrame: pandas DataFrame. - """ - serieses = [] - for i, column in enumerate(columns): - serieses.append(pd.Series([row[i] for row in result], dtype=column.dtype, name=column.name)) - return pd.concat(serieses, axis=1, copy=False) - - -class MySQLHandler(MetaDatabaseHandler): - """ - This handler handles connection and execution of the MySQL statements. - """ - - name = "mysql" - stream_response = True - - def __init__(self, name: str, **kwargs: Any) -> None: - super().__init__(name) - self.parser = parse_sql - self.dialect = "mysql" - self.connection_data = kwargs.get("connection_data", {}) - self.database = self.connection_data.get("database") - - self.connection: Optional[mysql.connector.MySQLConnection] = None - - def __del__(self) -> None: - if self.is_connected: - self.disconnect() - - def _unpack_config(self) -> Dict[str, Any]: - """ - Unpacks the config from the connection_data by validation all parameters. - - Returns: - dict: A dictionary containing the validated connection parameters. - """ - try: - config = ConnectionConfig(**self.connection_data) - return config.model_dump(exclude_unset=True) - except ValueError as e: - raise ValueError(str(e)) - - @property - def is_connected(self) -> bool: - """ - Checks if the handler is connected to the MySQL database. - - Returns: - bool: True if the handler is connected, False otherwise. - """ - return self.connection is not None and self.connection.is_connected() - - @is_connected.setter - def is_connected(self, value: bool) -> None: - pass - - def connect(self) -> mysql.connector.MySQLConnection: - """ - Establishes a connection to a MySQL database. - - Returns: - MySQLConnection: An active connection to the database. - """ - if self.is_connected and self.connection.is_connected(): - return self.connection - config = self._unpack_config() - if "conn_attrs" in self.connection_data: - config["conn_attrs"] = self.connection_data["conn_attrs"] - - if "connection_timeout" not in config: - config["connection_timeout"] = 10 - - ssl = self.connection_data.get("ssl") - if ssl is True: - ssl_ca = self.connection_data.get("ssl_ca") - ssl_cert = self.connection_data.get("ssl_cert") - ssl_key = self.connection_data.get("ssl_key") - config["client_flags"] = [mysql.connector.constants.ClientFlag.SSL] - if ssl_ca is not None: - config["ssl_ca"] = ssl_ca - if ssl_cert is not None: - config["ssl_cert"] = ssl_cert - if ssl_key is not None: - config["ssl_key"] = ssl_key - elif ssl is False: - config["ssl_disabled"] = True - - if "collation" not in config: - config["collation"] = "utf8mb4_general_ci" - if "use_pure" not in config: - config["use_pure"] = True - try: - connection = mysql.connector.connect(**config) - connection.autocommit = True - self.connection = connection - return self.connection - except mysql.connector.Error as e: - logger.error(f"Error connecting to MySQL {self.database}, {e}!") - raise - - def disconnect(self) -> None: - """ - Closes the connection to the MySQL database if it's currently open. - """ - if self.is_connected is False: - return - self.connection.close() - return - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the MySQL database. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - - result = StatusResponse(False) - need_to_close = not self.is_connected - - try: - connection = self.connect() - result.success = connection.is_connected() - except mysql.connector.Error as e: - logger.error(f"Error connecting to MySQL {self.connection_data.get('database', 'unknown')}! Error: {e}") - result.error_message = str(e) - - if result.success and need_to_close: - self.disconnect() - - return result - - def native_query(self, query: str, stream: bool = True, **kwargs) -> DataHandlerResponse: - """Executes a SQL query on the MySQL database and returns the result. - - Args: - query (str): The SQL query to be executed. - stream (bool): Whether to stream the results of the query. - **kwargs: Additional keyword arguments. - - Returns: - DataHandlerResponse: A response object containing the result of the query or an error message. - """ - if stream is False: - response = self._execute_fetchall(query) - else: - generator = self._execute_fetchmany(query) - try: - response: TableResponse = next(generator) - response.data_generator = generator - except StopIteration as e: - response = e.value - if isinstance(response, DataHandlerResponse) is False: - raise - return response - - def _execute_fetchall(self, query: str) -> DataHandlerResponse: - """Executes a SQL query on the MySQL database and returns the full result at once. - - Args: - query (str): The SQL query to be executed. - - Returns: - DataHandlerResponse: A response object containing the result of the query or an error message. - """ - connection = self.connect() - with connection.cursor(buffered=True) as cursor: - try: - cursor.execute(query) - if cursor.with_rows: - result = cursor.fetchall() - columns = _get_columns(cursor) - df = _make_df(result, columns) - response = TableResponse(data=df, affected_rows=cursor.rowcount, columns=columns) - else: - response = OkResponse(affected_rows=cursor.rowcount) - except Exception as e: - response = self._handle_query_exception(e, query, connection) - return response - - def _execute_fetchmany( - self, query: str - ) -> Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: - """Execute a SQL query on the MySQL database and return a generator of data frames. - - Args: - query (str): The SQL query to be executed. - - Returns: - Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: Generator of data frames. - """ - connection = self.connect() - with connection.cursor(buffered=False) as cursor: - try: - cursor.execute(query) - if not cursor.with_rows: - return OkResponse(affected_rows=cursor.rowcount) - - columns = _get_columns(cursor) - yield TableResponse(affected_rows=cursor.rowcount, columns=columns) - - fetch_size = mindsdb_config["data_stream"]["fetch_size"] - while result := cursor.fetchmany(size=fetch_size): - yield _make_df(result, columns) - except Exception as e: - return self._handle_query_exception(e, query, connection) - - def _handle_query_exception(self, e: Exception, query: str, connection) -> ErrorResponse: - """Handle query execution errors with appropriate logging and rollback. - - Args: - e: The exception that was raised - query: The SQL query that failed - connection: The database connection to rollback - - Returns: - ErrorResponse with appropriate error details - """ - logger.error(f"Error running query: {query} on {self.connection_data.get('database', 'unknown')}! Error: {e}") - if connection is not None and connection.is_connected(): - connection.rollback() - if isinstance(e, mysql.connector.Error): - return ErrorResponse(error_code=e.errno or 1, error_message=str(e)) - return ErrorResponse(error_code=0, error_message=str(e)) - - def query(self, query: ASTNode) -> DataHandlerResponse: - """ - Retrieve the data from the SQL statement. - """ - renderer = SqlalchemyRender("mysql") - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Get a list with all of the tabels in MySQL selected database - """ - sql = """ - SELECT - TABLE_SCHEMA AS table_schema, - TABLE_NAME AS table_name, - TABLE_TYPE AS table_type - FROM - information_schema.TABLES - WHERE - TABLE_TYPE IN ('BASE TABLE', 'VIEW') - AND TABLE_SCHEMA = DATABASE() - ORDER BY 2 - ; - """ - result = self.native_query(sql) - return result - - def get_columns(self, table_name: str) -> Response: - """ - Show details about the table - """ - q = f""" - select - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - from - information_schema.columns - where - table_name = '{table_name}' - and table_schema = DATABASE(); - """ - result = self.native_query(q) - result.to_columns_table_response(map_type_fn=_map_type) - return result - - def meta_get_tables(self, table_names: Optional[List[str]] = None, include_row_count: bool = False) -> Response: - """ - Retrieves metadata information about the tables in the MySQL database - to be stored in the data catalog. - - Args: - table_names (list): A list of table names for which to retrieve metadata information. - include_row_count (bool): Include TABLE_ROWS statistics (can be expensive on large schemas). - - Returns: - Response: A response object containing the metadata information. - """ - row_count_select = """,\n t.TABLE_ROWS as row_count""" if include_row_count else "" - - query = f""" - SELECT - t.TABLE_NAME as table_name, - t.TABLE_SCHEMA as table_schema, - t.TABLE_TYPE as table_type, - t.TABLE_COMMENT as table_description - {row_count_select} - FROM information_schema.TABLES t - WHERE t.TABLE_SCHEMA = DATABASE() - AND t.TABLE_TYPE IN ('BASE TABLE', 'VIEW') - """ - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" AND t.TABLE_NAME IN ({','.join(quoted_names)})" - - query += " ORDER BY t.TABLE_NAME" - - result = self.native_query(query) - return result - - def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - query = """ - SELECT - c.TABLE_NAME as table_name, - c.COLUMN_NAME as column_name, - c.DATA_TYPE as data_type, - c.COLUMN_COMMENT as column_description, - c.COLUMN_DEFAULT as column_default, - CASE WHEN c.IS_NULLABLE = 'YES' THEN 1 ELSE 0 END as is_nullable - FROM information_schema.COLUMNS c - WHERE c.TABLE_SCHEMA = DATABASE() - """ - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" AND c.TABLE_NAME IN ({','.join(quoted_names)})" - - query += " ORDER BY c.TABLE_NAME, c.ORDINAL_POSITION" - - result = self.native_query(query) - return result - - def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves column statistics for the specified tables (or all tables if no list is provided). - Uses MySQL 8.0+ metadata sources (INFORMATION_SCHEMA.COLUMN_STATISTICS and INFORMATION_SCHEMA.STATISTICS) not requiring table scans. - - Args: - table_names (list): A list of table names for which to retrieve column statistics. - - Returns: - Response: A response object containing the column statistics. - """ - table_filter = "" - if table_names: - quoted = ",".join(f"'{t}'" for t in table_names) - table_filter = f" AND c.TABLE_NAME IN ({quoted})" - - query = f""" - WITH cols AS ( - SELECT c.TABLE_SCHEMA, c.TABLE_NAME, c.COLUMN_NAME, c.ORDINAL_POSITION - FROM information_schema.COLUMNS c - WHERE c.TABLE_SCHEMA = DATABASE() - {table_filter} - ), - hist AS ( - SELECT - cs.SCHEMA_NAME AS TABLE_SCHEMA, - cs.TABLE_NAME, - cs.COLUMN_NAME, - cs.HISTOGRAM, - JSON_LENGTH(cs.HISTOGRAM, '$.buckets') AS buckets_len - FROM information_schema.COLUMN_STATISTICS cs - WHERE cs.SCHEMA_NAME = DATABASE() - ), - ndv AS ( - SELECT - s.TABLE_SCHEMA, - s.TABLE_NAME, - s.COLUMN_NAME, - MAX(s.CARDINALITY) AS DISTINCT_VALUES_COUNT - FROM information_schema.STATISTICS s - WHERE s.TABLE_SCHEMA = DATABASE() - GROUP BY s.TABLE_SCHEMA, s.TABLE_NAME, s.COLUMN_NAME - ) - SELECT - c.TABLE_NAME AS TABLE_NAME, - c.COLUMN_NAME AS COLUMN_NAME, - - /* optional fields kept NULL for simplicity */ - CAST(NULL AS JSON) AS MOST_COMMON_VALUES, - CAST(NULL AS JSON) AS MOST_COMMON_FREQUENCIES, - - /* histogram "null-values" fraction -> percent */ - CASE - WHEN h.HISTOGRAM IS NULL THEN NULL - ELSE ROUND( - CAST(JSON_UNQUOTE(JSON_EXTRACT(h.HISTOGRAM, '$."null-values"')) AS DECIMAL(10,6)) * 100, - 2 - ) - END AS NULL_PERCENTAGE, - /* MIN: first bucket's point (singleton) or lower endpoint (equi-height) */ - CASE - WHEN h.HISTOGRAM IS NULL THEN NULL - ELSE COALESCE( - JSON_UNQUOTE(JSON_EXTRACT(h.HISTOGRAM, '$.buckets[0].value')), - JSON_UNQUOTE(JSON_EXTRACT(h.HISTOGRAM, '$.buckets[0].endpoint[0]')) - ) - END AS MINIMUM_VALUE, - - /* MAX: last bucket's point (singleton) or upper endpoint (equi-height) */ - CASE - WHEN h.HISTOGRAM IS NULL THEN NULL - ELSE COALESCE( - JSON_UNQUOTE( - JSON_EXTRACT(h.HISTOGRAM, - CONCAT('$.buckets[', GREATEST(h.buckets_len - 1, 0), '].value') - ) - ), - JSON_UNQUOTE( - JSON_EXTRACT(h.HISTOGRAM, - CONCAT('$.buckets[', GREATEST(h.buckets_len - 1, 0), '].endpoint[1]') - ) - ), - JSON_UNQUOTE( - JSON_EXTRACT(h.HISTOGRAM, - CONCAT('$.buckets[', GREATEST(h.buckets_len - 1, 0), '].endpoint[0]') - ) - ) - ) - END AS MAXIMUM_VALUE, - n.DISTINCT_VALUES_COUNT - FROM cols c - LEFT JOIN hist h - ON h.TABLE_SCHEMA = c.TABLE_SCHEMA - AND h.TABLE_NAME = c.TABLE_NAME - AND h.COLUMN_NAME = c.COLUMN_NAME - LEFT JOIN ndv n - ON n.TABLE_SCHEMA = c.TABLE_SCHEMA - AND n.TABLE_NAME = c.TABLE_NAME - AND n.COLUMN_NAME = c.COLUMN_NAME - ORDER BY c.TABLE_NAME, c.ORDINAL_POSITION; - """ - return self.native_query(query) - - def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves primary key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve primary key information. - - Returns: - Response: A response object containing the primary key information. - """ - query = """ - SELECT - tc.TABLE_NAME as table_name, - kcu.COLUMN_NAME as column_name, - kcu.ORDINAL_POSITION as ordinal_position, - tc.CONSTRAINT_NAME as constraint_name - FROM information_schema.TABLE_CONSTRAINTS tc - INNER JOIN information_schema.KEY_COLUMN_USAGE kcu - ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME - AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA - AND tc.TABLE_NAME = kcu.TABLE_NAME - WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY' - AND tc.TABLE_SCHEMA = DATABASE() - """ - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" AND tc.TABLE_NAME IN ({','.join(quoted_names)})" - - query += " ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION" - - result = self.native_query(query) - return result - - def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves foreign key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve foreign key information. - - Returns: - Response: A response object containing the foreign key information. - """ - query = """ - SELECT - kcu.REFERENCED_TABLE_NAME as parent_table_name, - kcu.REFERENCED_COLUMN_NAME as parent_column_name, - kcu.TABLE_NAME as child_table_name, - kcu.COLUMN_NAME as child_column_name, - kcu.CONSTRAINT_NAME as constraint_name - FROM information_schema.KEY_COLUMN_USAGE kcu - WHERE kcu.TABLE_SCHEMA = DATABASE() - AND kcu.REFERENCED_TABLE_NAME IS NOT NULL - """ - - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - query += f" AND kcu.TABLE_NAME IN ({','.join(quoted_names)})" - - query += " ORDER BY kcu.TABLE_NAME, kcu.CONSTRAINT_NAME" - - result = self.native_query(query) - return result diff --git a/mindsdb/integrations/handlers/mysql_handler/requirements.txt b/mindsdb/integrations/handlers/mysql_handler/requirements.txt deleted file mode 100644 index dbf1cf4b2c6..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -mysql-connector-python==9.1.0 diff --git a/mindsdb/integrations/handlers/mysql_handler/settings.py b/mindsdb/integrations/handlers/mysql_handler/settings.py deleted file mode 100644 index f1cdc521b76..00000000000 --- a/mindsdb/integrations/handlers/mysql_handler/settings.py +++ /dev/null @@ -1,121 +0,0 @@ -from typing import Optional -from pydantic import BaseModel, AnyUrl, TypeAdapter, model_validator, field_validator, ConfigDict -from urllib.parse import urlparse - - -_ANY_URL_ADAPTER = TypeAdapter(AnyUrl) - - -class ConnectionConfig(BaseModel): - """ - MySQL connection configuration with validation. - - Supports two connection methods: - 1. URL-based: mysql://user:password@host:port/database - 2. Parameter-based: individual host, port, user, password, database params - """ - - url: Optional[AnyUrl] = None - host: Optional[str] = None - port: int = 3306 - user: Optional[str] = None - password: Optional[str] = None - database: Optional[str] = None - - @field_validator("port") - @classmethod - def validate_port(cls, v: int) -> int: - """Validate that port is within valid range.""" - if v < 1 or v > 65535: - raise ValueError(f"Port must be between 1 and 65535, got {v}") - return v - - @field_validator("url", mode="before") - @classmethod - def validate_url(cls, v: Optional[str]) -> Optional[AnyUrl]: - """Validate URL using AnyUrl as a fallback option for MySQL DSN parsing.""" - if v is None or isinstance(v, AnyUrl): - return v - try: - return _ANY_URL_ADAPTER.validate_python(v) - except ValueError as exc: - raise ValueError(f"Invalid MySQL connection URL: {v}") from exc - - @field_validator("host") - @classmethod - def validate_host(cls, v: Optional[str]) -> Optional[str]: - """Validate that host is not empty if provided.""" - if v is not None and not v.strip(): - raise ValueError("Host cannot be empty string") - return v - - @field_validator("database") - @classmethod - def validate_database(cls, v: Optional[str]) -> Optional[str]: - """Validate that database name is not empty if provided.""" - if v is not None and not v.strip(): - raise ValueError("Database name cannot be empty string") - return v - - @model_validator(mode="before") - @classmethod - def check_db_params(cls, values): - """Ensures either URL is provided or all individual parameters are provided.""" - url = values.get("url") - host = values.get("host") - user = values.get("user") - password = values.get("password") - database = values.get("database") - - if not url and not (host and user and password and database): - missing_params = [] - if not host: - missing_params.append("host") - if not user: - missing_params.append("user") - if not password: - missing_params.append("password") - if not database: - missing_params.append("database") - - raise ValueError( - f"Either a valid URL or all required parameters must be provided. Missing: {', '.join(missing_params)}" - ) - - if url: - # Parse URL and extract connection parameters - try: - parsed = urlparse(str(url)) - - # Extract parameters from URL - values["host"] = parsed.hostname or host - values["port"] = parsed.port if parsed.port is not None else values.get("port", 3306) - values["user"] = parsed.username or user - values["password"] = parsed.password or password - values["database"] = parsed.path[1:] if parsed.path and len(parsed.path) > 1 else database - - # Validate extracted parameters - if not values["host"]: - raise ValueError("URL must contain a hostname") - if not values["user"]: - raise ValueError("URL must contain a username") - if not values["database"]: - raise ValueError("URL must contain a database name in the path") - - except Exception as e: - raise ValueError(f"Invalid MySQL connection URL: {str(e)}") - - # mysql connector raises error if url is provided - values.pop("url", None) - - return values - - # Validate individual parameters - if not url: - for param in ["host", "user", "password", "database"]: - if not values.get(param): - raise ValueError(f"'{param}' is required when URL is not provided") - - return values - - model_config = ConfigDict(str_min_length=1, str_strip_whitespace=True, validate_assignment=True) diff --git a/mindsdb/integrations/handlers/netsuite_handler/README.md b/mindsdb/integrations/handlers/netsuite_handler/README.md deleted file mode 100644 index ebb882a4346..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/README.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: Oracle NetSuite -sidebarTitle: NetSuite ---- - -This documentation describes the integration of MindsDB with Oracle NetSuite using the REST Query (SuiteQL) API. -It lets you query NetSuite data in SQL and run SuiteQL directly when you need full control over filtering and joins. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. Enable Token-Based Authentication (TBA) and REST Web Services in NetSuite: - - Setup > Company > Enable Features > SuiteCloud tab - - Check "Token-Based Authentication" and "REST Web Services" - -## Connection - -Establish a connection to NetSuite from MindsDB by executing the following SQL command and providing its handler name as an engine. - -```sql -CREATE DATABASE netsuite_datasource -WITH - ENGINE = 'netsuite', - PARAMETERS = { - "account_id": "123456_SB1", - "consumer_key": "ck_...", - "consumer_secret": "cs_...", - "token_id": "token_...", - "token_secret": "token_secret_...", - "rest_domain": "https://123456-sb1.suitetalk.api.netsuite.com", - "record_types": "customer,transaction,inventoryitem" - }; -``` - -Required connection parameters include the following: - -- `account_id`: NetSuite account/realm ID (e.g. `123456_SB1`) -- `consumer_key`: Integration consumer key -- `consumer_secret`: Integration consumer secret -- `token_id`: Access token ID -- `token_secret`: Access token secret - -Optional connection parameters include the following: - -- `rest_domain`: Override REST domain (defaults to `https://.suitetalk.api.netsuite.com`, with underscores converted to dashes) -- `record_types`: Record types to expose as tables, either a comma-separated string (`"customer,transaction"`) or a JSON array (`["customer", "transaction"]`) - -If `record_types` is not provided, the handler registers only a small set of tables that are commonly accessible: -`contact`, `customer`, `item`, `message`, `subsidiary`, `task`, `transaction`. - -## Token-Based Authentication setup - -To create the required credentials in NetSuite: - -1. Create an Integration record: Setup > Integrations > Manage Integrations > New. Enable Token-Based Authentication. -2. Create/choose a role for the integration and grant: - - Setup > REST Web Services (Full) - - Setup > User Access Tokens (Full) - - Record-level permissions you will query (e.g., Transactions > Sales Order, Lists > Customers). -3. Assign that role to the user. -4. Generate an Access Token: Setup > Users/Roles > Access Tokens > New. -5. Copy the Consumer Key/Secret and Token ID/Secret. - -## Usage - -Retrieve data from a record table (SuiteQL base table names, lowercased): - -```sql -SELECT * -FROM netsuite_datasource.salesorder -WHERE id = 48; -``` - -Record tables: -- Use `WHERE id = ...` (or `internalId`) to fetch a record directly. -- Equality filters are pushed down to SuiteQL; other filters are applied locally. - -Run SuiteQL directly using the native query syntax (recommended for complex filters): - -```sql -SELECT * FROM netsuite_datasource ( - SELECT id, tranid, total - FROM transaction - WHERE type = 'SalesOrd' - FETCH NEXT 5 ROWS ONLY -); -``` - -Limit the registered tables to what your role can access: - -```sql -CREATE DATABASE netsuite_limited -WITH - ENGINE = 'netsuite', - PARAMETERS = { - "account_id": "123456_SB1", - "consumer_key": "ck_...", - "consumer_secret": "cs_...", - "token_id": "token_...", - "token_secret": "token_secret_...", - "record_types": ["customer", "salesorder", "invoice"] - }; -``` - - -Use the `rest_domain` parameter if your account uses a REST domain that differs from the default derived from `account_id`. - - - -Access to record tables and SuiteQL depends on the NetSuite role tied to your access token. -If a query fails with 403/permission errors, ensure the role includes REST Web Services, User Access Tokens, and record-specific permissions for the tables you are querying (plus SuiteAnalytics permissions for SuiteQL). - - - -The NetSuite handler is read-only. `INSERT`, `UPDATE`, and `DELETE` are not supported. - diff --git a/mindsdb/integrations/handlers/netsuite_handler/__about__.py b/mindsdb/integrations/handlers/netsuite_handler/__about__.py deleted file mode 100644 index a84d0c36c29..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Oracle NetSuite handler" -__package_name__ = "mindsdb_netsuite_handler" -__version__ = "0.0.1" -__description__ = "Oracle NetSuite handler for MindsDB" -__author__ = "MindsDB Integrations" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2024 - mindsdb" diff --git a/mindsdb/integrations/handlers/netsuite_handler/__init__.py b/mindsdb/integrations/handlers/netsuite_handler/__init__.py deleted file mode 100644 index f6e6c4d6863..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .netsuite_handler import NetSuiteHandler as Handler - - import_error = None -except Exception as e: # pragma: no cover - surfaced to UI - Handler = None - import_error = e - -title = "Oracle NetSuite" -name = "netsuite" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "support_level", - "import_error", - "icon_path", - "connection_args", - "connection_args_example", -] diff --git a/mindsdb/integrations/handlers/netsuite_handler/connection_args.py b/mindsdb/integrations/handlers/netsuite_handler/connection_args.py deleted file mode 100644 index 6a5a7f53b2c..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/connection_args.py +++ /dev/null @@ -1,63 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - account_id={ - "type": ARG_TYPE.STR, - "description": "NetSuite account/realm ID (e.g. 123456_SB1)", - "required": True, - "label": "Account ID", - }, - consumer_key={ - "type": ARG_TYPE.PWD, - "description": "OAuth consumer key for the NetSuite integration", - "required": True, - "label": "Consumer Key", - "secret": True, - }, - consumer_secret={ - "type": ARG_TYPE.PWD, - "description": "OAuth consumer secret for the NetSuite integration", - "required": True, - "label": "Consumer Secret", - "secret": True, - }, - token_id={ - "type": ARG_TYPE.PWD, - "description": "Token ID generated for the integration role", - "required": True, - "label": "Token ID", - "secret": True, - }, - token_secret={ - "type": ARG_TYPE.PWD, - "description": "Token secret generated for the integration role", - "required": True, - "label": "Token Secret", - "secret": True, - }, - rest_domain={ - "type": ARG_TYPE.URL, - "description": "Optional REST domain override (defaults to https://.suitetalk.api.netsuite.com)", - "required": False, - "label": "REST Domain", - }, - record_types={ - "type": ARG_TYPE.STR, - "description": "Comma separated NetSuite record types to expose (e.g. customer,item,salesOrder)", - "required": False, - "label": "Record Types", - }, -) - -connection_args_example = OrderedDict( - account_id="123456_SB1", - consumer_key="ck_...", - consumer_secret="cs_...", - token_id="token_...", - token_secret="token_secret_...", - rest_domain="https://123456-sb1.suitetalk.api.netsuite.com", - record_types="customer,item,salesorder", -) diff --git a/mindsdb/integrations/handlers/netsuite_handler/icon.svg b/mindsdb/integrations/handlers/netsuite_handler/icon.svg deleted file mode 100644 index 85228600dfc..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/icon.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/netsuite_handler/netsuite_handler.py b/mindsdb/integrations/handlers/netsuite_handler/netsuite_handler.py deleted file mode 100644 index 6ec736744e7..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/netsuite_handler.py +++ /dev/null @@ -1,659 +0,0 @@ -from typing import Any, List, Optional -import uuid - -import pandas as pd -import requests -from mindsdb_sql_parser import parse_sql -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - -try: - from requests_oauthlib import OAuth1 -except ImportError: # pragma: no cover - handled at runtime by __init__.py - OAuth1 = None - -from mindsdb.integrations.handlers.netsuite_handler.netsuite_tables import NetSuiteRecordTable -from mindsdb.integrations.handlers.netsuite_handler.__about__ import __version__ as handler_version -from mindsdb.integrations.libs.api_handler import MetaAPIHandler -from mindsdb.integrations.libs.response import HandlerResponse as Response, HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def _map_type(internal_type_name: str | None) -> MYSQL_DATA_TYPE: - """ - Map SuiteQL type names to MySQL types. - """ - fallback_type = MYSQL_DATA_TYPE.VARCHAR - - if not internal_type_name: - return fallback_type - - name = str(internal_type_name).lower() - - if "bool" in name: - return MYSQL_DATA_TYPE.BOOL - if "timestamp" in name or ("date" in name and "time" in name): - return MYSQL_DATA_TYPE.DATETIME - if name == "date" or name.endswith("date"): - return MYSQL_DATA_TYPE.DATE - if "time" in name: - return MYSQL_DATA_TYPE.TIME - if "double" in name: - return MYSQL_DATA_TYPE.DOUBLE - if "float" in name: - return MYSQL_DATA_TYPE.FLOAT - if any(token in name for token in ("decimal", "numeric", "currency", "percent", "number")): - return MYSQL_DATA_TYPE.DECIMAL - if "bigint" in name or ("big" in name and "int" in name): - return MYSQL_DATA_TYPE.BIGINT - if "smallint" in name or ("small" in name and "int" in name): - return MYSQL_DATA_TYPE.SMALLINT - if "int" in name: - return MYSQL_DATA_TYPE.INT - if "json" in name: - return MYSQL_DATA_TYPE.JSON - if "binary" in name or "blob" in name: - return MYSQL_DATA_TYPE.BLOB - if any(token in name for token in ("text", "char", "string", "clob")): - return MYSQL_DATA_TYPE.TEXT - - return fallback_type - - -class NetSuiteHandler(MetaAPIHandler): - """ - This handler manages connections and queries for the Oracle NetSuite SuiteQL API. - """ - - name = "netsuite" - - # Default record types to register when record_types is not provided. - # NOTE: NetSuite metadata-catalog could be used to discover this list, but it can take 30-60 seconds to respond. - # For now we stick with hardocded value - DEFAULT_TABLES = [ - # --- Core reference / dimensions (joins everywhere) --- - "account", - "accountingperiod", - "subsidiary", - "currency", - "currencyrate", - "department", - "classification", - "term", - "taxtype", - "paymentmethod", - "pricelevel", - "pricebook", - "priceplan", - "pricinggroup", - # --- CRM / entities --- - "customer", - "contact", - "vendor", - "employee", - "partner", - "opportunity", - "supportcase", - "campaign", - "campaignresponse", - # --- Sales / AR transactions --- - "estimate", - "salesorder", - "invoice", - "itemfulfillment", - "customerpayment", - "customerdeposit", - "customerrefund", - "creditmemo", - "cashsale", - "cashrefund", - "returnauthorization", - # --- Purchasing / AP transactions --- - "purchaseorder", - "purchaserequisition", - "purchasecontract", - "vendorbill", - "vendorpayment", - "vendorcredit", - "vendorreturnauthorization", - "vendorprepayment", - "vendorprepaymentapplication", - # --- Cash / banking --- - "deposit", - "depositapplication", - "check", - # --- Items / product catalog --- - "inventoryitem", - "assemblyitem", - "kititem", - "itemgroup", - "markupitem", - "discountitem", - "subtotalitem", - "descriptionitem", - "downloaditem", - "shipitem", - "servicesaleitem", - "servicepurchaseitem", - "serviceresaleitem", - "noninventorysaleitem", - "noninventorypurchaseitem", - "noninventoryresaleitem", - "otherchargesaleitem", - "otherchargepurchaseitem", - "otherchargeresaleitem", - # --- Inventory / warehouse --- - "location", - "bin", - "bintransfer", - "binworksheet", - "inventoryadjustment", - "inventorytransfer", - "inventorycount", - "inventorynumber", - "itemreceipt", - "inboundshipment", - # --- Manufacturing / work orders --- - "workorder", - "workorderissue", - "workordercompletion", - "workorderclose", - "bom", - "bomrevision", - "manufacturingrouting", - "manufacturingoperationtask", - "manufacturingcosttemplate", - # --- Accounting / journals --- - "journalentry", - "intercompanyjournalentry", - "advintercompanyjournalentry", - "periodendjournal", - "statisticaljournalentry", - "consolidatedexchangerate", - "fairvalueprice", - # --- Billing / subscriptions / rev rec --- - "billingaccount", - "billingschedule", - "billingrevenueevent", - "subscription", - "subscriptionplan", - "subscriptionterm", - "subscriptionline", - "subscriptionchangeorder", - "revrectemplate", - "revrecschedule", - # --- Misc “useful for ops/support” --- - "task", - "phonecall", - "message", - "emailtemplate", - "notetype", - ] - ACCESSIBLE_TABLES = {"contact", "customer", "item", "message", "subsidiary", "task", "transaction"} - - def __init__(self, name: str, **kwargs): - """ - Initializes the handler. - - Args: - name (str): The name of the handler instance. - **kwargs: Arbitrary keyword arguments including connection_data. - """ - super().__init__(name) - - self.connection_data = kwargs.get("connection_data", {}) or {} - self.kwargs = kwargs - - self.session: Optional[requests.Session] = None - self.is_connected: bool = False - self.base_url = self._build_base_url() - self._record_types_source: str = "default" - self.record_types = self._get_record_types() - self._unsupported_record_types: set[str] = set() - - for record_type in self.record_types: - self._register_table(record_type, NetSuiteRecordTable(self, record_type)) - - def _build_base_url(self) -> str: - """ - Builds the REST base URL for NetSuite. - - Returns: - str: The base URL for NetSuite REST endpoints. - """ - rest_domain = self.connection_data.get("rest_domain") - if rest_domain: - return rest_domain.rstrip("/") - - account_id = self.connection_data.get("account_id") - if not account_id: - return "" - - # NetSuite REST domains use dashes, while realm/account ids often use underscores (e.g. 123456_SB1 -> 123456-sb1) - host = str(account_id).lower().replace("_", "-") - - return f"https://{host}.suitetalk.api.netsuite.com" - - def _get_record_types(self) -> List[str]: - """ - Resolves the record types to register as tables. - - - If connection_data.record_types is provided: use that (as before). - - If not provided: use only ACCESSIBLE_TABLES (allowed tables) to avoid - registering tables you can't query under current role. - - DEFAULT_TABLES remains as reference for future / broader roles. - """ - record_types = self.connection_data.get("record_types") - - # Explicit config always wins - if isinstance(record_types, str): - self._record_types_source = "config" - return [value.strip().lower() for value in record_types.split(",") if value.strip()] - - if isinstance(record_types, list): - self._record_types_source = "config" - return [value.strip().lower() for value in record_types if isinstance(value, str) and value.strip()] - - # Default behavior (no record_types provided): register ONLY allowed tables - self._record_types_source = "default_allowed" - return sorted({name.strip().lower() for name in self.ACCESSIBLE_TABLES if name and name.strip()}) - - def connect(self) -> requests.Session: - """ - Creates an authenticated NetSuite session using token-based authentication. - - Returns: - requests.Session: An authenticated session for NetSuite REST calls. - """ - if self.is_connected and self.session is not None: - return self.session - - if OAuth1 is None: - raise ImportError("requests-oauthlib is required for the NetSuite handler.") - - account_id = self.connection_data.get("account_id") - consumer_key = self.connection_data.get("consumer_key") - consumer_secret = self.connection_data.get("consumer_secret") - token_id = self.connection_data.get("token_id") - token_secret = self.connection_data.get("token_secret") - realm = str(account_id).upper() - - missing = [ - field - for field, value in { - "account_id": account_id, - "consumer_key": consumer_key, - "consumer_secret": consumer_secret, - "token_id": token_id, - "token_secret": token_secret, - }.items() - if not value - ] - if missing: - raise ValueError(f"Missing required NetSuite credentials: {', '.join(missing)}") - - self.session = requests.Session() - self.session.auth = OAuth1( - consumer_key, - consumer_secret, - token_id, - token_secret, - realm=realm, - signature_method="HMAC-SHA256", - signature_type="auth_header", - ) - - self.is_connected = True - return self.session - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the NetSuite connection. - - Returns: - StatusResponse: Status and error information if the connection fails. - """ - response = StatusResponse(False) - try: - self.connect() - # Use SuiteQL to validate credentials quickly without relying on a specific record type. - self._request("POST", "/services/rest/query/v1/suiteql", json={"q": "SELECT 1"}) - - response.success = True - except Exception as exc: # broad catch to expose to UI - logger.error("NetSuite connection failed: %s", exc) - response.error_message = str(exc) - self.is_connected = False - - return response - - def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (Optional[List[str]]): Optional list of table names. - - Returns: - Response: A response object containing the table metadata. - """ - allowed = set(self._tables.keys()) - if self._unsupported_record_types: - allowed = allowed - {name.lower() for name in self._unsupported_record_types} - - if table_names is not None: - allowed = allowed & {name.lower() for name in table_names} - - df = pd.DataFrame() - for table_name, table_class in self._tables.items(): - if table_name not in allowed: - continue - try: - if hasattr(table_class, "meta_get_tables"): - table_metadata = table_class.meta_get_tables(table_name) - df = pd.concat([df, pd.DataFrame([table_metadata])], ignore_index=True) - except Exception: - logger.exception(f"Error retrieving metadata for table {table_name}:") - - if len(df.columns) == 0: - df = pd.DataFrame( - columns=[ - "TABLE_NAME", - "TABLE_TYPE", - "TABLE_SCHEMA", - "TABLE_DESCRIPTION", - "ROW_COUNT", - ] - ) - - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_columns(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - allowed = set(self._tables.keys()) - if self._unsupported_record_types: - allowed = allowed - {name.lower() for name in self._unsupported_record_types} - - if table_names is not None: - allowed = allowed & {name.lower() for name in table_names} - - df = pd.DataFrame() - for table_name, table_class in self._tables.items(): - if table_name not in allowed: - continue - try: - if hasattr(table_class, "meta_get_columns"): - column_metadata = table_class.meta_get_columns(table_name, **kwargs) - df = pd.concat([df, pd.DataFrame(column_metadata)], ignore_index=True) - except Exception: - logger.exception(f"Error retrieving column metadata for table {table_name}:") - - if len(df.columns) == 0: - df = pd.DataFrame( - columns=[ - "TABLE_NAME", - "COLUMN_NAME", - "DATA_TYPE", - "COLUMN_DESCRIPTION", - "IS_NULLABLE", - "COLUMN_DEFAULT", - ] - ) - - return Response(RESPONSE_TYPE.TABLE, df) - - def get_tables(self) -> Response: - """ - Retrieves the list of registered NetSuite tables. - """ - allowed = set(self._tables.keys()) - if self._unsupported_record_types: - allowed = allowed - {name.lower() for name in self._unsupported_record_types} - - data = [{"TABLE_NAME": name, "TABLE_TYPE": "BASE TABLE"} for name in sorted(allowed)] - return Response(RESPONSE_TYPE.TABLE, pd.DataFrame(data)) - - def get_columns(self, table_name: str) -> Response: - """ - Retrieves column details for a specified NetSuite table using SuiteQL metadata. - """ - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - normalized = table_name.lower() - table = self._tables.get(normalized) - if table is None: - raise ValueError(f"Table not found: {table_name}") - - columns = table.meta_get_columns(normalized) - rows = [] - for idx, column in enumerate(columns, start=1): - nullable = column.get("is_nullable") - if isinstance(nullable, bool): - nullable = "YES" if nullable else "NO" - else: - nullable = None - rows.append( - { - "COLUMN_NAME": column.get("column_name"), - "DATA_TYPE": column.get("data_type") or "varchar", - "ORDINAL_POSITION": idx, - "COLUMN_DEFAULT": column.get("column_default"), - "IS_NULLABLE": nullable, - "CHARACTER_MAXIMUM_LENGTH": None, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": None, - "NUMERIC_SCALE": None, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": None, - "COLLATION_NAME": None, - } - ) - - df = pd.DataFrame( - rows, - columns=[ - "COLUMN_NAME", - "DATA_TYPE", - "ORDINAL_POSITION", - "COLUMN_DEFAULT", - "IS_NULLABLE", - "CHARACTER_MAXIMUM_LENGTH", - "CHARACTER_OCTET_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - "DATETIME_PRECISION", - "CHARACTER_SET_NAME", - "COLLATION_NAME", - ], - ) - - result = Response(RESPONSE_TYPE.TABLE, df) - result.to_columns_table_response(map_type_fn=_map_type) - return result - - def native_query(self, query: Any) -> Response: - """ - Executes SuiteQL using the NetSuite REST Query API. - - Args: - query (Any): SuiteQL query string or AST. - - Returns: - Response: A response containing the query results. - """ - if not isinstance(query, str): - ast = parse_sql(query) - return self.query(ast) - - suiteql = query.strip() - if suiteql.endswith(";"): - suiteql = suiteql[:-1] - - payload = self._request("POST", "/services/rest/query/v1/suiteql", json={"q": suiteql}) - items = payload.get("items", []) if isinstance(payload, dict) else [] - columns_meta = [] - if isinstance(payload, dict): - columns_meta = payload.get("columnMetadata") or payload.get("columns") or [] - - df = pd.DataFrame() - if items: - first_item = items[0] - if isinstance(first_item, dict) and "values" in first_item and columns_meta: - columns = [] - for idx, col in enumerate(columns_meta): - if isinstance(col, dict): - columns.append(col.get("name") or col.get("label") or f"col_{idx}") - else: - columns.append(str(col)) - rows = [item.get("values", []) for item in items] - if rows: - max_len = max(len(row) for row in rows) - if max_len > len(columns): - columns.extend([f"col_{idx}" for idx in range(len(columns), max_len)]) - rows = [row[: len(columns)] + [None] * max(0, len(columns) - len(row)) for row in rows] - - deduped_columns = [] - seen = {} - for name in columns: - count = seen.get(name, 0) + 1 - seen[name] = count - deduped_columns.append(name if count == 1 else f"{name}_{count}") - - df = pd.DataFrame(rows, columns=deduped_columns) - else: - df = pd.DataFrame(items) - - return Response(RESPONSE_TYPE.TABLE, df) - - def _suiteql_select( - self, - table: str, - where_sql: str = "", - limit: Optional[int] = None, - targets: Optional[List[str]] = None, - order_by_sql: str = "", - ) -> Any: - """ - Executes a SuiteQL SELECT and returns raw payload dict. - """ - select_cols = "*" - if targets: - select_cols = ", ".join(targets) - - limit_sql = "" - if limit is not None: - n = int(limit) - if n < 0: - n = 0 - limit_sql = f" FETCH FIRST {n} ROWS ONLY" - - sql = f"SELECT {select_cols} FROM {table}{where_sql}{order_by_sql}{limit_sql}" - return self._request("POST", "/services/rest/query/v1/suiteql", json={"q": sql}) - - def _request(self, method: str, path: str, **kwargs): - """ - Performs an authenticated NetSuite REST request. - - Args: - method (str): HTTP method name. - path (str): Relative or absolute URL for the request. - **kwargs: Additional request parameters. - - Returns: - Any: Parsed JSON response when available. - """ - self.connect() - - if path.startswith("http"): - url = path - else: - if not self.base_url: - raise ValueError("REST domain could not be derived; provide rest_domain or account_id.") - url = f"{self.base_url}{path}" - - headers = kwargs.pop("headers", {}) - headers.setdefault("Accept", "application/json") - if method.upper() != "GET": - headers.setdefault("Content-Type", "application/json") - headers.setdefault("Prefer", "transient") - headers.setdefault("User-Agent", f"mindsdb-netsuite-handler/{handler_version}") - correlation_id = headers.get("X-Request-Id") or str(uuid.uuid4()) - headers["X-Request-Id"] = correlation_id - - response = self.session.request(method, url, headers=headers, **kwargs) - - if not response.ok: - error_message, log_message = self._format_netsuite_error(response) - logger.error( - "NetSuite API error %s for %s %s (request_id=%s): %s", - response.status_code, - method, - url, - correlation_id, - log_message, - ) - raise RuntimeError(error_message) - - if response.text: - try: - return response.json() - except ValueError: - return response.text - return None - - @staticmethod - def _format_netsuite_error(response: requests.Response) -> tuple[str, str]: - """ - Formats NetSuite error responses into user-friendly messages with safe logging. - - Args: - response (requests.Response): HTTP response from NetSuite. - - Returns: - tuple[str, str]: User-facing error message and log-safe message. - """ - status = response.status_code - request_id = response.headers.get("x-ns-request-id") or response.headers.get("x-request-id") - title = None - detail = None - raw_text = response.text or "" - - try: - payload = response.json() - except ValueError: - payload = None - - if isinstance(payload, dict): - title = payload.get("title") - details = payload.get("o:errorDetails") or payload.get("errorDetails") or [] - if details and isinstance(details, list): - first_detail = details[0] - if isinstance(first_detail, dict): - detail = first_detail.get("detail") - - parts = [f"NetSuite API error {status}"] - if title: - parts.append(f"{title}") - if detail: - parts.append(f"{detail}") - if request_id: - parts.append(f"request_id={request_id}") - error_message = ": ".join(parts) - - log_body = raw_text - if len(log_body) > 500: - log_body = f"{log_body[:500]}... [truncated]" - log_message = log_body if log_body else error_message - - return error_message, log_message diff --git a/mindsdb/integrations/handlers/netsuite_handler/netsuite_tables.py b/mindsdb/integrations/handlers/netsuite_handler/netsuite_tables.py deleted file mode 100644 index 805ad50c641..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/netsuite_tables.py +++ /dev/null @@ -1,576 +0,0 @@ -from typing import Any, List, Optional - -import json -import pandas as pd - -from mindsdb.integrations.libs.api_handler import MetaAPIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class NetSuiteRecordTable(MetaAPIResource): - """ - Table abstraction for a NetSuite record type. - """ - - def __init__(self, handler, record_type: str): - """ - Initializes the record table. - - Args: - handler: NetSuite handler instance. - record_type (str): NetSuite record type. - """ - self.record_type = str(record_type).lower() - self._column_metadata_cache = None - super().__init__(handler, table_name=record_type) - - def _get_resource_metadata(self) -> List[dict]: - """ - Retrieves SuiteQL column metadata for this NetSuite record type. - - Returns: - List[dict]: Column metadata entries when available. - """ - if self._column_metadata_cache is not None: - return self._column_metadata_cache - - try: - payload = self.handler._suiteql_select(table=self.record_type, limit=1) - except RuntimeError as exc: - if self._should_skip_record_type(exc): - self._mark_record_type_unsupported() - self._column_metadata_cache = [] - return self._column_metadata_cache - - self._column_metadata_cache = self._payload_to_column_metadata(payload) - return self._column_metadata_cache - - def _payload_to_column_metadata(self, payload: Any) -> List[dict]: - """ - Converts SuiteQL payload column metadata into a normalized list. - """ - if not isinstance(payload, dict): - return [] - - columns: List[dict] = [] - metadata = payload.get("columnMetadata") or payload.get("columns") or [] - if isinstance(metadata, list) and metadata: - for idx, col in enumerate(metadata): - if isinstance(col, dict): - name = col.get("name") or col.get("label") or col.get("id") or f"col_{idx}" - data_type = col.get("type") or col.get("dataType") or col.get("sqlType") or col.get("fieldType") - description = col.get("description") or col.get("label") or "" - is_nullable = col.get("nullable") if "nullable" in col else None - default_value = col.get("defaultValue") or col.get("default") or "" - else: - name = str(col) - data_type = None - description = "" - is_nullable = None - default_value = "" - columns.append( - { - "column_name": name, - "data_type": self._normalize_metadata_type(data_type), - "column_description": description, - "is_nullable": is_nullable, - "column_default": default_value, - } - ) - - if columns: - return columns - - items = payload.get("items") or [] - if isinstance(items, list) and items and isinstance(items[0], dict): - for name, value in items[0].items(): - columns.append( - { - "column_name": name, - "data_type": self._infer_column_type(value), - "column_description": "", - "is_nullable": None, - "column_default": "", - } - ) - - return columns - - @staticmethod - def _normalize_metadata_type(value) -> str: - if isinstance(value, dict): - if "type" in value: - return NetSuiteRecordTable._normalize_metadata_type(value.get("type")) - if "$ref" in value and isinstance(value["$ref"], str): - return value["$ref"].split("/")[-1] - if "format" in value: - return str(value["format"]) - return "str" - if isinstance(value, list): - return ",".join([str(item) for item in value]) - if value is None: - return "str" - return str(value) - - def _extract_field_metadata(self) -> List[dict]: - """ - Extracts column metadata from SuiteQL responses. - - Returns: - List[dict]: Column metadata entries with table_name, column_name, data_type, - column_description, is_nullable, and column_default. - """ - metadata = self._get_resource_metadata() - if not metadata: - return [] - - fields_metadata = [] - for field in metadata: - fields_metadata.append( - { - "table_name": self.record_type, - "column_name": field.get("column_name"), - "data_type": field.get("data_type"), - "column_description": field.get("column_description"), - "is_nullable": field.get("is_nullable"), - "column_default": field.get("column_default"), - } - ) - - return fields_metadata - - def list( - self, - conditions: List[FilterCondition] = None, - limit: Optional[int] = None, - sort: Optional[list] = None, - targets: Optional[List[str]] = None, - **kwargs, - ) -> pd.DataFrame: - """ - Fetches records using SuiteQL. - """ - limit = int(limit) if limit is not None else None - - # Guard for tables we already marked unsupported - if ( - hasattr(self.handler, "_unsupported_record_types") - and self.record_type in self.handler._unsupported_record_types - ): - if targets: - return pd.DataFrame(columns=targets) - return pd.DataFrame(columns=["id"]) - - def _sql_quote(value) -> str: - if value is None: - return "NULL" - if isinstance(value, bool): - # NetSuite often uses 'T'/'F' in some contexts, but SuiteQL generally accepts TRUE/FALSE too. - # We'll use string T/F to be safe with record fields. - return "'T'" if value else "'F'" - if isinstance(value, (int, float)): - return str(value) - s = str(value).replace("'", "''") - return f"'{s}'" - - def _normalize_col(col: str) -> str: - # Your users might filter by internalId; SuiteQL base tables typically expose 'id' - if col.lower() in ("internalid",): - return "id" - return col - - # Build WHERE (push down only EQUAL for now – predictable) - where_parts = [] - for cond in conditions or []: - if cond.op == FilterOperator.EQUAL: - col = _normalize_col(cond.column) - if cond.value is None: - where_parts.append(f"{col} IS NULL") - else: - where_parts.append(f"{col} = {_sql_quote(cond.value)}") - cond.applied = True - - where_sql = "" - if where_parts: - where_sql = " WHERE " + " AND ".join(where_parts) - - # ORDER BY - order_by_sql = "" - if sort: - parts = [] - for s in sort: - direction = "ASC" if s.ascending else "DESC" - parts.append(f"{s.column} {direction}") - if parts: - order_by_sql = " ORDER BY " + ", ".join(parts) - - # Execute SuiteQL - payload = self.handler._suiteql_select( - table=self.record_type, - where_sql=where_sql, - limit=limit, - targets=targets, - order_by_sql=order_by_sql, - ) - - df = self._payload_to_dataframe(payload, targets=targets) - - if df.empty: - if targets: - return pd.DataFrame(columns=targets) - return pd.DataFrame(columns=["id"]) - - # Pretty / stable cell parsing - df = self._prettify_dataframe(df) - - if targets: - df = df.reindex(columns=targets) - - return df - - def _payload_to_dataframe(self, payload: Any, targets: Optional[List[str]] = None) -> pd.DataFrame: - """ - Convert SuiteQL response payload into a DataFrame. - """ - if not isinstance(payload, dict): - return pd.DataFrame(columns=targets or []) - - items = payload.get("items") or [] - if not isinstance(items, list) or not items: - return pd.DataFrame(columns=targets or []) - - first = items[0] - - if isinstance(first, dict) and "values" in first: - col_meta = payload.get("columnMetadata") or payload.get("columns") or [] - cols: List[str] = [] - for idx, c in enumerate(col_meta): - if isinstance(c, dict): - cols.append(c.get("name") or c.get("label") or f"col_{idx}") - else: - cols.append(str(c)) - - rows = [] - for it in items: - if isinstance(it, dict): - rows.append(it.get("values") or []) - else: - rows.append([]) - - max_len = max((len(r) for r in rows), default=0) - if len(cols) < max_len: - cols.extend([f"col_{i}" for i in range(len(cols), max_len)]) - - padded = [] - for r in rows: - r = list(r) - if len(r) < len(cols): - r = r + [None] * (len(cols) - len(r)) - else: - r = r[: len(cols)] - padded.append(r) - - seen = {} - deduped = [] - for name in cols: - count = seen.get(name, 0) + 1 - seen[name] = count - deduped.append(name if count == 1 else f"{name}_{count}") - - return pd.DataFrame(padded, columns=deduped) - - if isinstance(first, dict): - return pd.DataFrame(items) - - return pd.DataFrame(columns=targets or []) - - def _prettify_dataframe(self, df: pd.DataFrame) -> pd.DataFrame: - """ - Make SuiteQL/REST-ish nested values readable - """ - - def pick_href(obj: Any) -> Optional[str]: - if isinstance(obj, dict): - href = obj.get("href") - if href: - return str(href) - links = obj.get("links") - if isinstance(links, list): - for link in links: - if isinstance(link, dict) and link.get("rel") == "self" and link.get("href"): - return str(link.get("href")) - if isinstance(obj, list): - for link in obj: - if isinstance(link, dict) and link.get("rel") == "self" and link.get("href"): - return str(link.get("href")) - return None - - def normalize_cell(v: Any) -> Any: - if v is None: - return None - - # Most common NetSuite ref objects - if isinstance(v, dict): - # prefer refName for readability - if "refName" in v and v.get("refName") is not None: - return v.get("refName") - if "name" in v and v.get("name") is not None: - return v.get("name") - if "id" in v and v.get("id") is not None and len(v.keys()) <= 3: - # small dict with id-ish fields - return v.get("id") - href = pick_href(v) - if href: - return href - # fallback to stable JSON - try: - return json.dumps(v, ensure_ascii=False, sort_keys=True) - except Exception: - return str(v) - - if isinstance(v, list): - href = pick_href(v) - if href: - return href - try: - return json.dumps(v, ensure_ascii=False, sort_keys=True) - except Exception: - return str(v) - - return v - - out = df.copy() - for col in out.columns: - out[col] = out[col].map(normalize_cell) - return out - - @staticmethod - def _should_skip_record_type(exc: RuntimeError) -> bool: - message = str(exc).lower() - if "record" in message and "was not found" in message: - return True - if "invalid method" in message or "method not allowed" in message: - return True - if "operation is not allowed" in message: - return True - return False - - def _mark_record_type_unsupported(self) -> None: - if hasattr(self.handler, "_unsupported_record_types"): - self.handler._unsupported_record_types.add(str(self.record_type).lower()) - - def add(self, row: List[dict], **kwargs) -> None: - """ - Creates records in NetSuite. - - Args: - row (List[dict]): Records to add. - """ - raise NotImplementedError("NetSuite handler is read-only via SuiteQL.") - - def modify(self, conditions: List[FilterCondition], values: dict): - """ - Updates records in NetSuite identified by id/internalId. - - Args: - conditions (List[FilterCondition]): Conditions to select records. - values (dict): Updated values. - """ - raise NotImplementedError("NetSuite handler is read-only via SuiteQL.") - - def remove(self, conditions: List[FilterCondition]): - """ - Deletes records in NetSuite identified by id/internalId. - - Args: - conditions (List[FilterCondition]): Conditions to select records. - """ - raise NotImplementedError("NetSuite handler is read-only via SuiteQL.") - - def get_columns(self) -> list: - """ - Infers columns from a sample response. - - Returns: - list: List of column names. - """ - columns_metadata = self.meta_get_columns() - if columns_metadata: - return [column.get("column_name") for column in columns_metadata if column.get("column_name")] - - sample = self.list(limit=1) - if sample.empty: - return [] - return list(sample.columns) - - def meta_get_tables(self, table_name: str, main_metadata=None) -> dict: - """ - Retrieves table metadata for the NetSuite record type. - - Args: - table_name (str): The table name for the record type. - main_metadata: Unused; present for interface compatibility. - - Returns: - dict: Table metadata including table_name, table_type, table_description, and row_count. - """ - return { - "table_name": table_name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": None, - } - - def meta_get_columns(self, table_name: str = None, **kwargs) -> List[dict]: - """ - Retrieves column metadata for the NetSuite record type. - - Args: - table_name (str): Optional table name override. - **kwargs: Additional handler-specific arguments. - - Returns: - List[dict]: Column metadata entries with table_name, column_name, data_type, - column_description, is_nullable, and column_default. - """ - metadata = self._extract_field_metadata() - if metadata: - return metadata - - sample_record = self._get_sample_record() - if not sample_record: - return [] - - columns = [] - for column_name, value in sample_record.items(): - columns.append( - { - "table_name": self.record_type, - "column_name": column_name, - "data_type": self._infer_column_type(value), - "column_description": "", - "is_nullable": None, - "column_default": "", - } - ) - return columns - - def _get_sample_record(self) -> dict: - """ - Retrieves a sample record for column inference when metadata is unavailable. - - Returns: - dict: A sample record payload, or an empty dict if unavailable. - """ - try: - sample = self.list(limit=1) - except RuntimeError as exc: - if self._should_skip_record_type(exc): - self._mark_record_type_unsupported() - return {} - if sample.empty: - return {} - - record = sample.iloc[0].to_dict() - if self._is_minimal_record(record): - record = self._expand_minimal_record(record) - - return record if isinstance(record, dict) else {} - - def _is_minimal_record(self, record: dict) -> bool: - if not isinstance(record, dict) or not record: - return False - keys = {str(key).lower() for key in record.keys()} - allowed = {"id", "internalid", "links"} - return keys.issubset(allowed) - - def _expand_minimal_record(self, record: dict) -> dict: - """ - Expands a minimal record (id/internalId/links only) into a full record. - - Args: - record (dict): The minimal record payload. - - Returns: - dict: The full record payload when available, otherwise the original record. - """ - if not isinstance(record, dict): - return record - - record_id = record.get("internalId") or record.get("id") - if record_id is None: - return record - - try: - - def _sql_quote(value) -> str: - if value is None: - return "NULL" - if isinstance(value, bool): - return "'T'" if value else "'F'" - if isinstance(value, (int, float)): - return str(value) - s = str(value).replace("'", "''") - return f"'{s}'" - - where_sql = f" WHERE id = {_sql_quote(record_id)}" - payload = self.handler._suiteql_select(table=self.record_type, where_sql=where_sql, limit=1) - df = self._payload_to_dataframe(payload) - if df.empty: - return record - return df.iloc[0].to_dict() - except RuntimeError: - return record - - @staticmethod - def _infer_column_type(value) -> str: - if isinstance(value, bool): - return "bool" - if isinstance(value, int): - return "int" - if isinstance(value, float): - return "float" - if isinstance(value, (dict, list)): - return "json" - if value is None: - return "str" - return "str" - - def meta_get_primary_keys(self, table_name: str) -> List[dict]: - """ - Retrieves primary key metadata for the NetSuite record type. - - Args: - table_name (str): The table name for the record type. - - Returns: - List[dict]: Primary key metadata entries with table_name and column_name. - """ - columns = {col.get("column_name") for col in self.meta_get_columns() if col.get("column_name")} - - primary_key = None - for candidate in ("internalId", "id", "Id"): - if candidate in columns: - primary_key = candidate - break - - if not primary_key: - return [] - - return [{"table_name": table_name, "column_name": primary_key}] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[dict]: - """ - Retrieves foreign key metadata inferred from record reference fields. - - Args: - table_name (str): The table name for the record type. - all_tables (List[str]): All available table names for relationship resolution. - - Returns: - List[dict]: Foreign key metadata entries with parent/child table and column names. - """ - return [] diff --git a/mindsdb/integrations/handlers/netsuite_handler/requirements.txt b/mindsdb/integrations/handlers/netsuite_handler/requirements.txt deleted file mode 100644 index d217191ad8b..00000000000 --- a/mindsdb/integrations/handlers/netsuite_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests-oauthlib>=1.3.1 diff --git a/mindsdb/integrations/handlers/ollama_handler/README.md b/mindsdb/integrations/handlers/ollama_handler/README.md deleted file mode 100644 index ea775303d73..00000000000 --- a/mindsdb/integrations/handlers/ollama_handler/README.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Ollama -sidebarTitle: Ollama ---- - -This documentation describes the integration of MindsDB with [Ollama](https://ollama.com/), a tool that enables local deployment of large language models. -The integration allows for the deployment of Ollama models within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use Ollama within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Follow [this instruction](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) to download Ollama and run models locally. - - -Here are the recommended system specifications: - -- A working Ollama installation, as in point 3. -- For 7B models, at least 8GB RAM is recommended. -- For 13B models, at least 16GB RAM is recommended. -- For 70B models, at least 64GB RAM is recommended. - - -## Setup - -Create an AI engine from the [Ollama handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/ollama_handler). - -```sql -CREATE ML_ENGINE ollama_engine -FROM ollama; -``` - -Create a model using `ollama_engine` as an engine. - -```sql -CREATE MODEL ollama_model -PREDICT completion -USING - engine = 'ollama_engine', -- engine name as created via CREATE ML_ENGINE - model_name = 'model-name', -- model run with 'ollama run model-name' - ollama_serve_url = 'http://localhost:11434'; -``` - - -If you run Ollama and MindsDB in separate Docker containers, use the `localhost` value of the container. For example, `ollama_serve_url = 'http://host.docker.internal:11434'`. - - -You can find [available models here](https://github.com/ollama/ollama?tab=readme-ov-file#model-library). - -## Usage - -The following usage examples utilize `ollama_engine` to create a model with the `CREATE MODEL` statement. - -Deploy and use the `llama2` model. - -First, [download Ollama](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and run the model locally by executing `ollama run llama2`. - -Now deploy this model within MindsDB. - -```sql -CREATE MODEL llama2_model -PREDICT completion -USING - engine = 'ollama_engine', - model_name = 'llama2'; -``` - - -Models can be run in either the 'generate' or 'embedding' modes. The 'generate' mode is used for text generation, while the 'embedding' mode is used to generate embeddings for text. - -However, these modes can only be used with models that support them. For example, the `moondream` model supports both modes. - -By default, if the mode is not specified, the model will run in 'generate' mode if multiple modes are supported. If only one mode is supported, the model will run in that mode. - -To specify the mode, use the `mode` parameter in the `CREATE MODEL` statement. For example, `mode = 'embedding'`. - - -Query the model to get predictions. - -```sql -SELECT text, completion -FROM llama2_model -WHERE text = 'Hello'; -``` - -Here is the output: - -```sql -+-------+------------+ -| text | completion | -+-------+------------+ -| Hello | Hello! | -+-------+------------+ -``` - -You can override the prompt message as below: - -```sql -SELECT text, completion -FROM llama2_model -WHERE text = 'Hello' -USING - prompt_template = 'Answer using exactly five words: {{text}}:'; -``` - -Here is the output: - -```sql -+-------+------------------------------------+ -| text | completion | -+-------+------------------------------------+ -| Hello | Hello! *smiles* How are you today? | -+-------+------------------------------------+ -``` - - -**Next Steps** - -Go to the [Use Cases](https://docs.mindsdb.com/use-cases/overview) section to see more examples. - - -### Embeddings - -If you want to use an embedding model (instead of text generation), then you will want to activate embedding mode at model creation: - -```sql -CREATE MODEL nomic_embed_model -PREDICT embeddings -USING - engine = 'ollama_engine', - model_name = 'nomic-embed-text', - mode = 'embedding', - prompt_template = '{{column}}, {{another_column}}'; -``` - -The output will contain embeddings for each input row (length is model-dependent): - -```sql -+-------+---------------------------------------------------------------------------------+ -| column | another_column | embeddings | -+-------+---------------------------------------------------------------------------------+ -| Hello | Matt! | [0.7849581241607666,1.263154149055481,-4.024246692657471... | -+-------+---------------------------------------------------------------------------------+ -``` diff --git a/mindsdb/integrations/handlers/ollama_handler/__about__.py b/mindsdb/integrations/handlers/ollama_handler/__about__.py deleted file mode 100644 index 37799994782..00000000000 --- a/mindsdb/integrations/handlers/ollama_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Ollama handler" -__package_name__ = "mindsdb_ollama_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Ollama" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023- mindsdb" diff --git a/mindsdb/integrations/handlers/ollama_handler/__init__.py b/mindsdb/integrations/handlers/ollama_handler/__init__.py deleted file mode 100644 index eea6a1903d6..00000000000 --- a/mindsdb/integrations/handlers/ollama_handler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .ollama_handler import OllamaHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Ollama" -name = "ollama" -type = HANDLER_TYPE.ML -icon_path = "icon.png" -permanent = False - -__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"] diff --git a/mindsdb/integrations/handlers/ollama_handler/icon.png b/mindsdb/integrations/handlers/ollama_handler/icon.png deleted file mode 100644 index 7c9be9a71e3..00000000000 Binary files a/mindsdb/integrations/handlers/ollama_handler/icon.png and /dev/null differ diff --git a/mindsdb/integrations/handlers/ollama_handler/ollama_handler.py b/mindsdb/integrations/handlers/ollama_handler/ollama_handler.py deleted file mode 100644 index 639345933fa..00000000000 --- a/mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +++ /dev/null @@ -1,202 +0,0 @@ -import json -import requests -from typing import Dict, Optional - -import pandas as pd - -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.integrations.libs.llm.utils import get_completed_prompts - - -class OllamaHandler(BaseMLEngine): - name = "ollama" - DEFAULT_SERVE_URL = "http://localhost:11434" - - @staticmethod - def create_validation(target, args=None, **kwargs): - if "using" not in args: - raise Exception("Ollama engine requires a USING clause! Refer to its documentation for more details.") - else: - args = args["using"] - - if "model_name" not in args: - raise Exception("`model_name` must be provided in the USING clause.") - - # check ollama service health - connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) - status = requests.get(connection + "/api/tags").status_code - if status != 200: - raise Exception( - f"Ollama service is not working (status `{status}`). Please double check it is running and try again." - ) # noqa - - def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - """Pull LLM artifacts with Ollama API.""" - # arg setter - args = args["using"] - args["target"] = target - connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) - - def _model_check(): - """Checks model has been pulled and that it works correctly.""" - responses = {} - for endpoint in ["generate", "embeddings"]: - try: - code = requests.post( - connection + f"/api/{endpoint}", - json={ - "model": args["model_name"], - "prompt": "Hello.", - }, - ).status_code - responses[endpoint] = code - except Exception: - responses[endpoint] = 500 - return responses - - # check model for all supported endpoints - responses = _model_check() - if 200 not in responses.values(): - # pull model (blocking operation) and serve - # TODO: point to the engine storage folder instead of default location - connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) - requests.post(connection + "/api/pull", json={"name": args["model_name"]}) - # try one last time - responses = _model_check() - if 200 not in responses.values(): - raise Exception( - f"Ollama model `{args['model_name']}` is not working correctly. Please try pulling this model manually, check it works correctly and try again." - ) # noqa - - supported_modes = {k: True if v == 200 else False for k, v in responses.items()} - - # check if a mode has been provided and if it is valid - runnable_modes = [mode for mode, supported in supported_modes.items() if supported] - if "mode" in args: - if args["mode"] not in runnable_modes: - raise Exception(f"Mode `{args['mode']}` is not supported by the model `{args['model_name']}`.") - - # if a mode has not been provided, check if the model supports only one mode - # if it does, set it as the default mode - # if it supports multiple modes, set the default mode to 'generate' - else: - if len(runnable_modes) == 1: - args["mode"] = runnable_modes[0] - else: - args["mode"] = "generate" - - self.model_storage.json_set("args", args) - - def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame: - """ - Generate text completions with the local LLM. - Args: - df (pd.DataFrame): The input DataFrame containing data to predict. - args (Optional[Dict]): Additional arguments for prediction parameters. - Returns: - pd.DataFrame: The DataFrame containing row-wise text completions. - """ - # setup - pred_args = args.get("predict_params", {}) - args = self.model_storage.json_get("args") - model_name, target_col = args["model_name"], args["target"] - - # Auto-detect column if template is missing - # If user provided a specific template - user_template = pred_args.get("prompt_template", args.get("prompt_template")) - - # OR If no template and 'text' column is missing, then auto-detect - if user_template is None and "text" not in df.columns and len(df.columns) == 1: - col_name = df.columns[0] - # Create a template dynamically - prompt_template = "Answer the following question: {{{{" + col_name + "}}}}" - else: - # Fallback: Use user template OR default to 'text' (Old behavior) - prompt_template = user_template if user_template else "Answer the following question: {{{{text}}}}" - - # prepare prompts - prompts, empty_prompt_ids = get_completed_prompts(prompt_template, df) - df["__mdb_prompt"] = prompts - - # setup endpoint - endpoint = args.get("mode", "generate") - - # call llm - completions = [] - for i, row in df.iterrows(): - if i not in empty_prompt_ids: - temperature = pred_args.get("temperature", args.get("temperature")) - - # Options dictionary - options = {} - if temperature is not None: - try: - options["temperature"] = float(temperature) - except ValueError: - pass - - # Calling API with the new options - connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) - raw_output = requests.post( - connection + f"/api/{endpoint}", - json={ - "model": model_name, - "prompt": row["__mdb_prompt"], - "options": options, # options passed here - }, - ) - lines = raw_output.content.decode().split("\n") # stream of output tokens - - values = [] - for line in lines: - if line != "": - info = json.loads(line) - if "response" in info: - token = info["response"] - values.append(token) - elif "embedding" in info: - embedding = info["embedding"] - values.append(embedding) - - if endpoint == "embeddings": - completions.append(values) - else: - completions.append("".join(values)) - else: - completions.append("") - - # consolidate output - data = pd.DataFrame(completions) - data.columns = [target_col] - return data - - def describe(self, attribute: Optional[str] = None) -> pd.DataFrame: - args = self.model_storage.json_get("args") - model_name, target_col = args["model_name"], args["target"] - prompt_template = args.get("prompt_template", "Answer the following question: {{{{text}}}}") - - if attribute == "features": - return pd.DataFrame([[target_col, prompt_template]], columns=["target_column", "mindsdb_prompt_template"]) - - # get model info - else: - connection = args.get("ollama_serve_url", OllamaHandler.DEFAULT_SERVE_URL) - model_info = requests.post(connection + "/api/show", json={"name": model_name}).json() - return pd.DataFrame( - [ - [ - model_name, - model_info.get("license", "N/A"), - model_info.get("modelfile", "N/A"), - model_info.get("parameters", "N/A"), - model_info.get("template", "N/A"), - ] - ], - columns=[ - "model_type", - "license", - "modelfile", - "parameters", - "ollama_base_template", - ], - ) diff --git a/mindsdb/integrations/handlers/ollama_handler/tests/test_ollama_handler.py b/mindsdb/integrations/handlers/ollama_handler/tests/test_ollama_handler.py deleted file mode 100644 index b06caaae4e6..00000000000 --- a/mindsdb/integrations/handlers/ollama_handler/tests/test_ollama_handler.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest -from unittest.mock import patch, Mock -import pandas as pd -from mindsdb.integrations.handlers.ollama_handler.ollama_handler import OllamaHandler - - -class TestOllamaHandler(unittest.TestCase): - def setUp(self): - # Mock the storage to return valid model configuration - mock_storage = Mock() - mock_storage.json_get.return_value = { - "model_name": "tinyllama", - "target": "response", - "ollama_serve_url": "http://localhost:11434", - } - - # Initialize handler with mocked storage - self.handler = OllamaHandler(name="test_ollama", model_storage=mock_storage, engine_storage={}) - - @patch("mindsdb.integrations.handlers.ollama_handler.ollama_handler.requests.post") - def test_temperature_passing(self, mock_post): - """ - Test that the temperature parameter is correctly extracted from args - and passed to the Ollama API options. - """ - # Setup mock response - mock_response = Mock() - mock_response.content = b'{"response": "Test response"}' - mock_post.return_value = mock_response - - # Create input dataframe - df = pd.DataFrame({"text": ["Hello"]}) - - # Execute prediction with temperature argument - self.handler.predict(df, args={"predict_params": {"temperature": 0.5}}) - - # Verify API call payload - call_args = mock_post.call_args[1]["json"] - - self.assertIn("options", call_args) - self.assertEqual(call_args["options"]["temperature"], 0.5) - - -if __name__ == "__main__": - unittest.main() diff --git a/mindsdb/integrations/handlers/openai_handler/README.md b/mindsdb/integrations/handlers/openai_handler/README.md deleted file mode 100644 index b63ac10fcad..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/README.md +++ /dev/null @@ -1,237 +0,0 @@ ---- -title: OpenAI -sidebarTitle: OpenAI ---- - -This documentation describes the integration of MindsDB with [OpenAI](https://openai.com/), an AI research organization known for developing AI models like GPT-3 and GPT-4. -The integration allows for the deployment of OpenAI models within MindsDB, providing the models with access to data from various data sources. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To use OpenAI within MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -3. Obtain the OpenAI API key required to deploy and use OpenAI models within MindsDB. Follow the [instructions for obtaining the API key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key). - -## Setup - -Create an AI engine from the [OpenAI handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/openai_handler). - -```sql -CREATE ML_ENGINE openai_engine -FROM openai -USING - openai_api_key = 'api-key-value'; -``` - -Create a model using `openai_engine` as an engine. - -```sql -CREATE MODEL openai_model -PREDICT target_column -USING - engine = 'openai_engine', -- engine name as created via CREATE ML_ENGINE - api_base = 'base-url', -- optional, replaces the default base URL - mode = 'mode_name', -- optional, mode to run the model in - model_name = 'openai_model_name', -- optional with default value of gpt-3.5-turbo - question_column = 'question', -- optional, column name that stores user input - context_column = 'context', -- optional, column that stores context of the user input - prompt_template = 'input your query here', -- optional, user provides instructions to the model here - user_column = 'user_input', -- optional, stores user input - assistant_column = 'conversation_context', -- optional, stores conversation context - prompt = 'instruction to the model', -- optional stores instruction to the model - max_tokens = 100, -- optional, token limit for answer - temperature = 0.3, -- temp - -``` - -The following parameters are available to use when creating an OpenAI model: - -* `engine`: This is the engine name as created with the [`CREATE ML_ENGINE`](https://docs.mindsdb.com/mindsdb_sql/sql/create/ml-engine) statement. -* `api_base`: This parameter is optional. It replaces the default OpenAI's base URL with the defined value. -* `mode`: This parameter is optional. The available modes include `default`, `conversational`, `conversational-full`, `image`, and `embedding`. - - The `default` mode is used by default. The model replies to the `prompt_template` message. - - The `conversational` mode enables the model to read and reply to multiple messages. - - The `conversational-full` mode enables the model to read and reply to multiple messages, one reply per message. - - The `image` mode is used to create an image instead of a text reply. - - The `embedding` mode enables the model to return output in the form of embeddings. -> You can find [all models supported by each mode here](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/openai_handler/constants.py). - -* `model_name`: This parameter is optional. By default, the `gpt-3.5-turbo` model is used. -> You can find [all available models here](https://github.com/mindsdb/mindsdb/blob/main/mindsdb/integrations/handlers/openai_handler/constants.py). - -* `question_column`: This parameter is optional. It contains the column name that stores user input. -* `context_column`: This parameter is optional. It contains the column name that stores context for the user input. -* `prompt_template`: This parameter is optional if you use `question_column`. It stores the message or instructions to the model. *Please note that this parameter can be overridden at prediction time.* -* `max_tokens`: This parameter is optional. It defines the maximum token cost of the prediction. *Please note that this parameter can be overridden at prediction time.* -* `temperature`: This parameter is optional. It defines how *risky* the answers are. The value of `0` marks a well-defined answer, and the value of `0.9` marks a more creative answer. *Please note that this parameter can be overridden at prediction time.* - -## Usage - -Here are the combination of parameters for creating a model: - -1. Provide a `prompt_template` alone. -2. Provide a `question_column` and optionally a `context_column`. -3. Provide a `prompt`, `user_column`, and `assistant_column` to create a model in the conversational mode. - -The following usage examples utilize `openai_engine` to create a model with the `CREATE MODEL` statement. - -### Answering questions without context - -Here is how to create a model that answers questions without context. - -```sql -CREATE MODEL openai_model -PREDICT answer -USING - engine = 'openai_engine', - question_column = 'question'; -``` - -Query the model to get predictions. - -```sql -SELECT question, answer -FROM openai_model -WHERE question = 'Where is Stockholm located?'; -``` - -Here is the output: - -```sql -+---------------------------+-------------------------------+ -|question |answer | -+---------------------------+-------------------------------+ -|Where is Stockholm located?|Stockholm is located in Sweden.| -+---------------------------+-------------------------------+ -``` - -### Answering questions with context - -```sql -CREATE MODEL openai_model -PREDICT answer -USING - engine = 'openai_engine', - question_column = 'question', - context_column = 'context'; -``` - -Query the model to get predictions. - -```sql -SELECT context, question, answer -FROM openai_model -WHERE context = 'Answer accurately' -AND question = 'How many planets exist in the solar system?'; -``` - -On execution, we get: - -```sql -+-------------------+-------------------------------------------+----------------------------------------------+ -|context |question |answer | -+-------------------+-------------------------------------------+----------------------------------------------+ -|Answer accurately |How many planets exist in the solar system?| There are eight planets in the solar system. | -+-------------------+-------------------------------------------+----------------------------------------------+ -``` - -### Prompt completion - -Here is how to create a model that offers the most flexible mode of operation. It answers any query provided in the `prompt_template` parameter. - - -Good prompts are the key to getting great completions out of large language models like the ones that OpenAI offers. For best performance, we recommend you read their [prompting guide](https://beta.openai.com/docs/guides/completion/prompt-design) before trying your hand at prompt templating. - - -Let's look at an example that reuses the `openai_model` model created earlier and overrides parameters at prediction time. - -```sql -SELECT instruction, answer -FROM openai_model -WHERE instruction = 'Speculate extensively' -USING - prompt_template = '{{instruction}}. What does Tom Hanks like?', - max_tokens = 100, - temperature = 0.5; -``` - -On execution, we get: - -```sql -+----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -|instruction |answer | -+----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -|Speculate extensively |Some people speculate that Tom Hanks likes to play golf, while others believe that he enjoys acting and directing. It is also speculated that he likes to spend time with his family and friends, and that he enjoys traveling.| -+----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -``` - -### Conversational mode - -Here is how to create a model in the conversational mode. - -```sql -CREATE MODEL openai_chat_model -PREDICT response -USING - engine = 'openai_engine', - mode = 'conversational', - model_name = 'gpt-3.5-turbo', - user_column = 'user_input', - assistant_column = 'conversation_history', - prompt = 'Answer the question in a helpful way.'; -``` - -And here is how to query this model: - -```sql -SELECT response -FROM openai_chat_model -WHERE user_input = '' -AND conversation_history = ''; -``` - -## Next Steps - -Follow [this tutorial on sentiment analysis](https://docs.mindsdb.com/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai) and [this tutorial on finetuning OpenAI models](https://docs.mindsdb.com/use-cases/automated_finetuning/openai) to see more use case examples. - -## Troubleshooting Guide - - -`Authentication Error` - -* **Symptoms**: Failure to authenticate to the OpenAI API. -* **Checklist**: - 1. Make sure that your OpenAI account is active. - 2. Confirm that your API key is correct. - 3. Ensure that your API key has not been revoked. - 4. Ensure that you have not exceeded the API usage or rate limit. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table and model names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - Examples: - * Incorrect: - ```sql - SELECT input.text, output.sentiment - FROM integration.travel data AS input - JOIN openai_engine AS output - ``` - * Incorrect: - ```sql - SELECT input.text, output.sentiment - FROM integration.'travel data' AS input - JOIN openai_engine AS output - ``` - * Correct: - ```sql - SELECT input.text, output.sentiment - FROM integration.`travel data` AS input - JOIN openai_engine AS output - ``` - diff --git a/mindsdb/integrations/handlers/openai_handler/__about__.py b/mindsdb/integrations/handlers/openai_handler/__about__.py deleted file mode 100644 index c0e5383191c..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB OpenAI handler' -__package_name__ = 'mindsdb_openai_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for OpenAI" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/openai_handler/__init__.py b/mindsdb/integrations/handlers/openai_handler/__init__.py deleted file mode 100644 index fc37a6aa911..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .creation_args import creation_args -from .model_using_args import model_using_args - -try: - from .openai_handler import OpenAIHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "OpenAI" -name = "openai" -type = HANDLER_TYPE.ML -icon_path = "icon.svg" -permanent = False -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "creation_args", - "model_using_args", - "support_level", -] diff --git a/mindsdb/integrations/handlers/openai_handler/constants.py b/mindsdb/integrations/handlers/openai_handler/constants.py deleted file mode 100644 index 451b7da6c38..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/constants.py +++ /dev/null @@ -1,19 +0,0 @@ -OPENAI_API_BASE = "https://api.openai.com/v1" - -CHAT_MODELS_PREFIXES = ("gpt-3.5", "gpt-3.5", "gpt-3.5", "gpt-4", "o3-mini", "o1-mini") -COMPLETION_MODELS = ("babbage-002", "davinci-002") -FINETUNING_MODELS = ("gpt-3.5-turbo", "babbage-002", "davinci-002", "gpt-4") -COMPLETION_LEGACY_BASE_MODELS = ("davinci", "curie", "babbage", "ada") -DEFAULT_CHAT_MODEL = "gpt-4o-mini" - -FINETUNING_LEGACY_MODELS = FINETUNING_MODELS -COMPLETION_LEGACY_MODELS = ( - COMPLETION_LEGACY_BASE_MODELS - + tuple(f"text-{model}-001" for model in COMPLETION_LEGACY_BASE_MODELS) - + ("text-davinci-002", "text-davinci-003") -) - -DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002" - -IMAGE_MODELS = ("dall-e-2", "dall-e-3") -DEFAULT_IMAGE_MODEL = "dall-e-2" diff --git a/mindsdb/integrations/handlers/openai_handler/creation_args.py b/mindsdb/integrations/handlers/openai_handler/creation_args.py deleted file mode 100644 index 700ee1955a9..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/creation_args.py +++ /dev/null @@ -1,14 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -creation_args = OrderedDict( - openai_api_key={ - 'type': ARG_TYPE.STR, - 'description': 'Key for OpenAI API.', - 'required': False, - 'label': 'OpenAI API key', - 'secret': True - } -) diff --git a/mindsdb/integrations/handlers/openai_handler/helpers.py b/mindsdb/integrations/handlers/openai_handler/helpers.py deleted file mode 100644 index bc0bc37789b..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/helpers.py +++ /dev/null @@ -1,198 +0,0 @@ -from typing import Text, List, Dict -import random -import time -import math - -import openai - -import tiktoken - -import mindsdb.utilities.profiler as profiler - - -class PendingFT(openai.OpenAIError): - """ - Custom exception to handle pending fine-tuning status. - """ - - message: str - - def __init__(self, message) -> None: - super().__init__() - self.message = message - - -def retry_with_exponential_backoff( - initial_delay: float = 1, - hour_budget: float = 0.3, - jitter: bool = False, - exponential_base: int = 2, - wait_errors: tuple = (openai.APITimeoutError, openai.APIConnectionError, PendingFT), - status_errors: tuple = (openai.APIStatusError, openai.APIResponseValidationError), -): - """ - Wrapper to enable optional arguments. It means this decorator always needs to be called with parenthesis: - - > @retry_with_exponential_backoff() # optional argument override here - > def f(): [...] - - """ # noqa - - @profiler.profile() - def _retry_with_exponential_backoff(func): - """ - Exponential backoff to retry requests on a rate-limited API call, as recommended by OpenAI. - Loops the call until a successful response or max_retries is hit or an exception is raised. - - Slight changes in the implementation, but originally from: - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_handle_rate_limits.ipynb - - Args: - func: Function to be wrapped - initial_delay: Initial delay in seconds - hour_budget: Hourly budget in seconds - jitter: Adds randomness to the delay - exponential_base: Base for the exponential backoff - wait_errors: Tuple of errors to retry on - status_errors: Tuple of status errors to raise - - Returns: - Wrapper function with exponential backoff - """ # noqa - - def wrapper(*args, **kwargs): - num_retries = 0 - delay = initial_delay - - if isinstance(hour_budget, float) or isinstance(hour_budget, int): - try: - max_retries = round((math.log((hour_budget * 3600) / initial_delay)) / math.log(exponential_base)) - except ValueError: - max_retries = 10 - else: - max_retries = 10 - max_retries = max(1, max_retries) - - while True: - try: - return func(*args, **kwargs) - - except status_errors as e: - error_message = e.body - if isinstance(error_message, dict): - error_message = error_message.get( - "message", - "Please refer to `https://platform.openai.com/docs/guides/error-codes` for more information.", - ) - raise Exception(f"Error status {e.status_code} raised by OpenAI API: {error_message}") - - except wait_errors: - num_retries += 1 - if num_retries > max_retries: - raise Exception(f"Maximum number of retries ({max_retries}) exceeded.") - # Increment the delay and wait - delay *= exponential_base * (1 + jitter * random.random()) - time.sleep(delay) - - except openai.OpenAIError as e: - raise Exception( - f"General {str(e)} error raised by OpenAI. Please refer to `https://platform.openai.com/docs/guides/error-codes` for more information." # noqa - ) - - except Exception as e: - raise e - - return wrapper - - return _retry_with_exponential_backoff - - -def truncate_msgs_for_token_limit(messages: List[Dict], model_name: Text, max_tokens: int, truncate: Text = "first"): - """ - Truncates message list to fit within the token limit. - The first message for chat completion models are general directives with the system role, which will ideally be kept at all times. - - Slight changes in the implementation, but originally from: - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb - - Args: - messages (List[Dict]): List of messages - model_name (Text): Model name - max_tokens (int): Maximum token limit - truncate (Text): Truncate strategy, either 'first' or 'last' - - Returns: - List[Dict]: Truncated message list - """ # noqa - try: - encoder = tiktoken.encoding_for_model(model_name) - except KeyError: - # If the encoding is not found, defualt to cl100k_base. - # This is applicable for handlers that extend the OpenAI handler such as Anyscale. - model_name = "gpt-3.5-turbo-0301" - encoder = tiktoken.get_encoding("cl100k_base") - - sys_priming = messages[0:1] - n_tokens = count_tokens(messages, encoder, model_name) - while n_tokens > max_tokens: - if len(messages) == 2: - return messages[:-1] # edge case: if limit is surpassed by just one input, we remove initial instruction - elif len(messages) == 1: - return messages - - if truncate == "first": - messages = sys_priming + messages[2:] - else: - messages = sys_priming + messages[1:-1] - - n_tokens = count_tokens(messages, encoder, model_name) - return messages - - -def count_tokens(messages: List[Dict], encoder: tiktoken.core.Encoding, model_name: Text = "gpt-3.5-turbo-0301"): - """ - Counts the number of tokens in a list of messages. - - Args: - messages: List of messages - encoder: Tokenizer - model_name: Model name - """ - if "gpt-3.5-turbo" in model_name: # note: future models may deviate from this (only 0301 really complies) - tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n - tokens_per_name = -1 - else: - tokens_per_message = 3 - tokens_per_name = 1 - - num_tokens = 0 - for message in messages: - num_tokens += tokens_per_message - - for key, value in message.items(): - num_tokens += len(encoder.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens += tokens_per_name - num_tokens += 2 # every reply is primed with assistant - return num_tokens - - -def get_available_models(client) -> List[Text]: - """ - Returns a list of available openai models for the given API key. - NOTE: writer's 'get models list' response differs from openai's - https://dev.writer.com/api-reference/completion-api/list-models - https://platform.openai.com/docs/api-reference/models/list - - Args: - client: openai sdk client - - Returns: - List[Text]: List of available models - """ - res = client.models.list() - - if str(client.base_url.netloc).lower() == "api.writer.com": - return [models["id"] for models in res.models] - - return [models.id for models in res.data] diff --git a/mindsdb/integrations/handlers/openai_handler/icon.svg b/mindsdb/integrations/handlers/openai_handler/icon.svg deleted file mode 100644 index e4310648914..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/openai_handler/model_using_args.py b/mindsdb/integrations/handlers/openai_handler/model_using_args.py deleted file mode 100644 index b2e82617338..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/model_using_args.py +++ /dev/null @@ -1,5 +0,0 @@ -model_using_args = { - 'openai_api_key': { - 'secret': True - } -} diff --git a/mindsdb/integrations/handlers/openai_handler/openai_handler.py b/mindsdb/integrations/handlers/openai_handler/openai_handler.py deleted file mode 100644 index 54aa4d7b34b..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/openai_handler.py +++ /dev/null @@ -1,1145 +0,0 @@ -import os -import math -import json -import shutil -import tempfile -import datetime -import textwrap -import subprocess -from enum import Enum -import concurrent.futures -from typing import Text, Tuple, Dict, List, Optional, Any -import openai -from openai.types.fine_tuning import FineTuningJob -from openai import OpenAI, AzureOpenAI, NotFoundError, AuthenticationError -import numpy as np -import pandas as pd - -from mindsdb.utilities.hooks import before_openai_query, after_openai_query -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import BaseMLEngine -from mindsdb.integrations.handlers.openai_handler.helpers import ( - retry_with_exponential_backoff, - truncate_msgs_for_token_limit, - get_available_models, - PendingFT, -) -from mindsdb.integrations.handlers.openai_handler.constants import ( - CHAT_MODELS_PREFIXES, - IMAGE_MODELS, - FINETUNING_MODELS, - OPENAI_API_BASE, - DEFAULT_CHAT_MODEL, - DEFAULT_EMBEDDING_MODEL, - DEFAULT_IMAGE_MODEL, -) -from mindsdb.integrations.libs.llm.utils import get_completed_prompts -from mindsdb.integrations.utilities.handler_utils import get_api_key - -logger = log.getLogger(__name__) - - -class Mode(Enum): - default = "default" - conversational = "conversational" - conversational_full = "conversational-full" - image = "image" - embedding = "embedding" - legacy = "legacy" - - @classmethod - def _missing_(cls, value): - raise ValueError(f"Invalid operation mode '{value}'. Please use one of: {[val.name for val in cls]}") - - -class OpenAIHandler(BaseMLEngine): - """ - This handler handles connection and inference with the OpenAI API. - """ - - name = "openai" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.generative = True - self.default_model = DEFAULT_CHAT_MODEL - self.default_embedding_model = DEFAULT_EMBEDDING_MODEL - self.default_image_model = DEFAULT_IMAGE_MODEL - self.default_mode = Mode.default # can also be 'conversational' or 'conversational-full' - self.rate_limit = 60 # requests per minute - self.max_batch_size = 20 - self.default_max_tokens = 100 - self.supported_ft_models = FINETUNING_MODELS # base models compatible with finetuning - # For now this are only used for handlers that inherits OpenAIHandler and don't need to override base methods - self.api_key_name = getattr(self, "api_key_name", self.name) - self.api_base = getattr(self, "api_base", OPENAI_API_BASE) - - def create_engine(self, connection_args: Dict) -> None: - """ - Validate the OpenAI API credentials on engine creation. - - Args: - connection_args (Dict): Parameters for the engine. - - Raises: - Exception: If the handler is not configured with valid API credentials. - - Returns: - None - """ - connection_args = {k.lower(): v for k, v in connection_args.items()} - api_key = connection_args.get("openai_api_key") - if api_key is not None: - org = connection_args.get("api_organization") - api_base = connection_args.get("api_base") or os.environ.get("OPENAI_API_BASE", OPENAI_API_BASE) - client = self._get_client(api_key=api_key, base_url=api_base, org=org, args=connection_args) - OpenAIHandler._check_client_connection(client) - - @staticmethod - def is_chat_model(model_name): - for prefix in CHAT_MODELS_PREFIXES: - if model_name.startswith(prefix): - return True - return False - - @staticmethod - def _check_client_connection(client: OpenAI) -> None: - """ - Check the OpenAI engine client connection by retrieving a model. - - Args: - client (openai.OpenAI): OpenAI client configured with the API credentials. - - Raises: - Exception: If the client connection (API key) is invalid or something else goes wrong. - - Returns: - None - """ - try: - client.models.retrieve("test") - except NotFoundError: - pass - except AuthenticationError as e: - if isinstance(e.body, dict) and e.body.get("code") == "invalid_api_key": - raise Exception("Invalid api key") - raise Exception(f"Something went wrong: {e}") - - @staticmethod - def create_validation(target: Text, args: Dict = None, **kwargs: Any) -> None: - """ - Validate the OpenAI API credentials on model creation. - - Args: - target (Text): Target column name. - args (Dict): Parameters for the model. - kwargs (Any): Other keyword arguments. - - Raises: - Exception: If the handler is not configured with valid API credentials. - - Returns: - None - """ - if "using" not in args: - raise Exception("OpenAI engine requires a USING clause! Refer to its documentation for more details.") - else: - args = args["using"] - - if len(set(args.keys()) & {"question_column", "prompt_template", "prompt"}) == 0: - raise Exception("One of `question_column`, `prompt_template` or `prompt` is required for this engine.") - - keys_collection = [ - ["prompt_template"], - ["question_column", "context_column"], - ["prompt", "user_column", "assistant_column"], - ] - for keys in keys_collection: - if keys[0] in args and any(x[0] in args for x in keys_collection if x != keys): - raise Exception( - textwrap.dedent( - """\ - Please provide one of - 1) a `prompt_template` - 2) a `question_column` and an optional `context_column` - 3) a `prompt', 'user_column' and 'assistant_column` - """ - ) - ) - - # for all args that are not expected, raise an error - known_args = set() - # flatten of keys_collection - for keys in keys_collection: - known_args = known_args.union(set(keys)) - - # TODO: need a systematic way to maintain a list of known args - known_args = known_args.union( - { - "target", - "model_name", - "mode", - "predict_params", - "json_struct", - "ft_api_info", - "ft_result_stats", - "runtime", - "max_tokens", - "temperature", - "openai_api_key", - "api_organization", - "api_base", - "api_version", - "provider", - } - ) - - unknown_args = set(args.keys()) - known_args - if unknown_args: - # return a list of unknown args as a string - raise Exception( - f"Unknown arguments: {', '.join(unknown_args)}.\n Known arguments are: {', '.join(known_args)}" - ) - - engine_storage = kwargs["handler_storage"] - connection_args = engine_storage.get_connection_args() - api_key = get_api_key("openai", args, engine_storage=engine_storage) - api_base = ( - args.get("api_base") - or connection_args.get("api_base") - or os.environ.get("OPENAI_API_BASE", OPENAI_API_BASE) - ) - org = args.get("api_organization") - client = OpenAIHandler._get_client(api_key=api_key, base_url=api_base, org=org, args=args) - OpenAIHandler._check_client_connection(client) - - def create(self, target, args: Dict = None, **kwargs: Any) -> None: - """ - Create a model by connecting to the OpenAI API. - - Args: - target (Text): Target column name. - args (Dict): Parameters for the model. - kwargs (Any): Other keyword arguments. - - Raises: - Exception: If the model is not configured with valid parameters. - - Returns: - None - """ - args = args["using"] - args["target"] = target - try: - api_key = get_api_key(self.api_key_name, args, self.engine_storage) - connection_args = self.engine_storage.get_connection_args() - api_base = ( - args.get("api_base") - or connection_args.get("api_base") - or os.environ.get("OPENAI_API_BASE") - or self.api_base - ) - client = self._get_client(api_key=api_key, base_url=api_base, org=args.get("api_organization"), args=args) - available_models = get_available_models(client) - - mode = args.get("mode") - if mode is not None: - mode = Mode(mode) - else: - mode = self.default_mode - - if not args.get("model_name"): - if mode is Mode.embedding: - args["model_name"] = self.default_embedding_model - elif mode is Mode.image: - args["model_name"] = self.default_image_model - else: - args["model_name"] = self.default_model - elif (args["model_name"] not in available_models) and (mode is not Mode.embedding): - raise Exception(f"Invalid model name. Please use one of {available_models}") - finally: - self.model_storage.json_set("args", args) - - def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame: - """ - Make predictions using a model connected to the OpenAI API. - - Args: - df (pd.DataFrame): Input data to make predictions on. - args (Dict): Parameters passed when making predictions. - - Raises: - Exception: If the model is not configured with valid parameters or if the input data is not in the expected format. - - Returns: - pd.DataFrame: Input data with the predicted values in a new column. - """ # noqa - # TODO: support for edits, embeddings and moderation - - pred_args = args["predict_params"] if args else {} - args = self.model_storage.json_get("args") - connection_args = self.engine_storage.get_connection_args() - - args["api_base"] = ( - pred_args.get("api_base") - or args.get("api_base") - or connection_args.get("api_base") - or os.environ.get("OPENAI_API_BASE") - or self.api_base - ) - - if pred_args.get("api_organization"): - args["api_organization"] = pred_args["api_organization"] - df = df.reset_index(drop=True) - - if pred_args.get("mode"): - mode = Mode(pred_args["mode"]) - args["mode"] = mode.value - elif args.get("mode"): - mode = Mode(args["mode"]) - else: - mode = Mode(self.default_mode) - - strict_prompt_template = True - if pred_args.get("prompt_template", False): - base_template = pred_args["prompt_template"] # override with predict-time template if available - strict_prompt_template = False - elif args.get("prompt_template", False): - base_template = args["prompt_template"] - else: - base_template = None - - # Embedding mode - if mode is Mode.embedding: - api_args = { - "question_column": pred_args.get("question_column", None), - "model": pred_args.get("model_name") or args.get("model_name"), - } - model_name = "embedding" - if args.get("question_column"): - prompts = list(df[args["question_column"]].apply(lambda x: str(x))) - empty_prompt_ids = np.where(df[[args["question_column"]]].isna().all(axis=1).values)[0] - else: - raise Exception("Embedding mode needs a question_column") - - # Image mode - elif mode is Mode.image: - api_args = { - "n": pred_args.get("n", None), - "size": pred_args.get("size", None), - "response_format": pred_args.get("response_format", None), - } - api_args = {k: v for k, v in api_args.items() if v is not None} # filter out non-specified api args - model_name = pred_args.get("model_name") or args.get("model_name") - - if args.get("question_column"): - prompts = list(df[args["question_column"]].apply(lambda x: str(x))) - empty_prompt_ids = np.where(df[[args["question_column"]]].isna().all(axis=1).values)[0] - elif args.get("prompt_template"): - prompts, empty_prompt_ids = get_completed_prompts(base_template, df) - else: - raise Exception("Image mode needs either `prompt_template` or `question_column`.") - - # Chat or normal completion mode - else: - if args.get("question_column", False) and args["question_column"] not in df.columns: - raise Exception(f"This model expects a question to answer in the '{args['question_column']}' column.") - - if args.get("context_column", False) and args["context_column"] not in df.columns: - raise Exception(f"This model expects context in the '{args['context_column']}' column.") - - # API argument validation - model_name = args.get("model_name", self.default_model) - api_args = { - "max_tokens": pred_args.get("max_tokens", args.get("max_tokens", self.default_max_tokens)), - "temperature": min( - 1.0, - max(0.0, pred_args.get("temperature", args.get("temperature", 0.0))), - ), - "top_p": pred_args.get("top_p", None), - "n": pred_args.get("n", None), - "stop": pred_args.get("stop", None), - "presence_penalty": pred_args.get("presence_penalty", None), - "frequency_penalty": pred_args.get("frequency_penalty", None), - "best_of": pred_args.get("best_of", None), - "logit_bias": pred_args.get("logit_bias", None), - "user": pred_args.get("user", None), - } - - if args.get("prompt_template", False): - prompts, empty_prompt_ids = get_completed_prompts(base_template, df, strict=strict_prompt_template) - - elif args.get("context_column", False): - empty_prompt_ids = np.where( - df[[args["context_column"], args["question_column"]]].isna().all(axis=1).values - )[0] - contexts = list(df[args["context_column"]].apply(lambda x: str(x))) - questions = list(df[args["question_column"]].apply(lambda x: str(x))) - prompts = [f"Context: {c}\nQuestion: {q}\nAnswer: " for c, q in zip(contexts, questions)] - - elif "prompt" in args: - empty_prompt_ids = [] - prompts = list(df[args["user_column"]]) - else: - empty_prompt_ids = np.where(df[[args["question_column"]]].isna().all(axis=1).values)[0] - prompts = list(df[args["question_column"]].apply(lambda x: str(x))) - - # add json struct if available - if args.get("json_struct", False): - for i, prompt in enumerate(prompts): - json_struct = "" - if "json_struct" in df.columns and i not in empty_prompt_ids: - # if row has a specific json, we try to use it instead of the base prompt template - try: - if isinstance(df["json_struct"][i], str): - df["json_struct"][i] = json.loads(df["json_struct"][i]) - for ind, val in enumerate(df["json_struct"][i].values()): - json_struct = json_struct + f"{ind}. {val}\n" - except Exception: - pass # if the row's json is invalid, we use the prompt template instead - - if json_struct == "": - for ind, val in enumerate(args["json_struct"].values()): - json_struct = json_struct + f"{ind + 1}. {val}\n" - - p = textwrap.dedent( - f"""\ - Based on the text following 'The reference text is:', assign values to the following {len(args["json_struct"])} JSON attributes: - {{{{json_struct}}}} - - Values should follow the same order as the attributes above. - Each line in the answer should start with a dotted number, and should not repeat the name of the attribute, just the value. - Each answer must end with new line. - If there is no valid value to a given attribute in the text, answer with a - character. - Values should be as short as possible, ideally 1-2 words (unless otherwise specified). - - Here is an example input of 3 attributes: - 1. rental price - 2. location - 3. number of bathrooms - - Here is an example output for the input: - 1. 3000 - 2. Manhattan - 3. 2 - - Now for the real task. The reference text is: - {prompt} - """ - ) - - p = p.replace("{{json_struct}}", json_struct) - prompts[i] = p - - # remove prompts without signal from completion queue - prompts = [j for i, j in enumerate(prompts) if i not in empty_prompt_ids] - - api_key = get_api_key(self.api_key_name, args, self.engine_storage) - api_args = {k: v for k, v in api_args.items() if v is not None} # filter out non-specified api args - completion = self._completion(model_name, prompts, api_key, api_args, args, df) - - # add null completion for empty prompts - for i in sorted(empty_prompt_ids): - completion.insert(i, None) - - pred_df = pd.DataFrame(completion, columns=[args["target"]]) - - # restore json struct - if args.get("json_struct", False): - for i in pred_df.index: - try: - if "json_struct" in df.columns: - json_keys = df["json_struct"][i].keys() - else: - json_keys = args["json_struct"].keys() - responses = pred_df[args["target"]][i].split("\n") - responses = [x[3:] for x in responses] # del question index - - pred_df[args["target"]][i] = {key: val for key, val in zip(json_keys, responses)} - except Exception: - pred_df[args["target"]][i] = None - - return pred_df - - def _completion( - self, - model_name: Text, - prompts: List[Text], - api_key: Text, - api_args: Dict, - args: Dict, - df: pd.DataFrame, - parallel: bool = True, - ) -> List[Any]: - """ - Handles completion for an arbitrary amount of rows using a model connected to the OpenAI API. - - This method consists of several inner methods: - - _submit_completion: Submit a request to the relevant completion endpoint of the OpenAI API based on the type of task. - - _submit_normal_completion: Submit a request to the completion endpoint of the OpenAI API. - - _submit_embedding_completion: Submit a request to the embeddings endpoint of the OpenAI API. - - _submit_chat_completion: Submit a request to the chat completion endpoint of the OpenAI API. - - _submit_image_completion: Submit a request to the image completion endpoint of the OpenAI API. - - _log_api_call: Log the API call made to the OpenAI API. - - There are a couple checks that should be done when calling OpenAI's API: - - account max batch size, to maximize batch size first - - account rate limit, to maximize parallel calls second - - Additionally, single completion calls are done with exponential backoff to guarantee all prompts are processed, - because even with previous checks the tokens-per-minute limit may apply. - - Args: - model_name (Text): OpenAI Model name. - prompts (List[Text]): List of prompts. - api_key (Text): OpenAI API key. - api_args (Dict): OpenAI API arguments. - args (Dict): Parameters for the model. - df (pd.DataFrame): Input data to run completion on. - parallel (bool): Whether to use parallel processing. - - Returns: - List[Any]: List of completions. The type of completion depends on the task type. - """ - - @retry_with_exponential_backoff() - def _submit_completion( - model_name: Text, prompts: List[Text], api_args: Dict, args: Dict, df: pd.DataFrame - ) -> List[Text]: - """ - Submit a request to the relevant completion endpoint of the OpenAI API based on the type of task. - - Args: - model_name (Text): OpenAI Model name. - prompts (List[Text]): List of prompts. - api_args (Dict): OpenAI API arguments. - args (Dict): Parameters for the model. - df (pd.DataFrame): Input data to run completion on. - - Returns: - List[Text]: List of completions. - """ - kwargs = { - "model": model_name, - } - try: - mode = Mode(args.get("mode")) - except ValueError: - if model_name in IMAGE_MODELS: - mode = Mode.image - elif model_name == "embedding": - mode = Mode.embedding - elif self.is_chat_model(model_name) and model_name != "gpt-3.5-turbo-instruct": - mode = Mode.conversational - elif model_name == "gpt-3.5-turbo-instruct": - mode = Mode.legacy - else: - mode = Mode.default - - match mode: - case Mode.image: - return _submit_image_completion(kwargs, prompts, api_args) - case Mode.embedding: - return _submit_embedding_completion(kwargs, prompts, api_args) - case Mode.conversational | Mode.conversational_full | Mode.default: - return _submit_chat_completion( - kwargs, - prompts, - api_args, - df, - mode=args.get("mode", "conversational"), - ) - case Mode.legacy: - return _submit_normal_completion(kwargs, prompts, api_args) - - def _log_api_call(params: Dict, response: Any) -> None: - """ - Log the API call made to the OpenAI API. - - Args: - params (Dict): Parameters for the API call. - response (Any): Response from the API. - - Returns: - None - """ - after_openai_query(params, response) - - params2 = params.copy() - params2.pop("api_key", None) - params2.pop("user", None) - logger.debug(f">>>openai call: {params2}:\n{response}") - - def _submit_normal_completion(kwargs: Dict, prompts: List[Text], api_args: Dict) -> List[Text]: - """ - Submit a request to the completion endpoint of the OpenAI API. - - This method consists of an inner method: - - _tidy: Parse and tidy up the response from the completion endpoint of the OpenAI API. - - Args: - kwargs (Dict): OpenAI API arguments, including the model to use. - prompts (List[Text]): List of prompts. - api_args (Dict): Other OpenAI API arguments. - - Returns: - List[Text]: List of text completions. - """ - - def _tidy(comp: openai.types.completion.Completion) -> List[Text]: - """ - Parse and tidy up the response from the completion endpoint of the OpenAI API. - - Args: - comp (openai.types.completion.Completion): Completion object. - - Returns: - List[Text]: List of completions as text. - """ - tidy_comps = [] - for c in comp.choices: - if hasattr(c, "text"): - tidy_comps.append(c.text.strip("\n").strip("")) - return tidy_comps - - kwargs = {**kwargs, **api_args} - - before_openai_query(kwargs) - responses = [] - for prompt in prompts: - responses.extend(_tidy(client.completions.create(prompt=prompt, **kwargs))) - _log_api_call(kwargs, responses) - return responses - - def _submit_embedding_completion(kwargs: Dict, prompts: List[Text], api_args: Dict) -> List[float]: - """ - Submit a request to the embeddings endpoint of the OpenAI API. - - This method consists of an inner method: - - _tidy: Parse and tidy up the response from the embeddings endpoint of the OpenAI API. - - Args: - kwargs (Dict): OpenAI API arguments, including the model to use. - prompts (List[Text]): List of prompts. - api_args (Dict): Other OpenAI API arguments. - - Returns: - List[float]: List of embeddings as numbers. - """ - - def _tidy(comp: openai.types.create_embedding_response.CreateEmbeddingResponse) -> List[float]: - """ - Parse and tidy up the response from the embeddings endpoint of the OpenAI API. - - Args: - comp (openai.types.create_embedding_response.CreateEmbeddingResponse): Embedding object. - - Returns: - List[float]: List of embeddings as numbers. - """ - tidy_comps = [] - for c in comp.data: - if hasattr(c, "embedding"): - tidy_comps.append([c.embedding]) - return tidy_comps - - kwargs["input"] = prompts - kwargs = {**kwargs, **api_args} - - before_openai_query(kwargs) - resp = _tidy(client.embeddings.create(**kwargs)) - _log_api_call(kwargs, resp) - return resp - - def _submit_chat_completion( - kwargs: Dict, prompts: List[Text], api_args: Dict, df: pd.DataFrame, mode: Text = "conversational" - ) -> List[Text]: - """ - Submit a request to the chat completion endpoint of the OpenAI API. - - This method consists of an inner method: - - _tidy: Parse and tidy up the response from the chat completion endpoint of the OpenAI API. - - Args: - kwargs (Dict): OpenAI API arguments, including the model to use. - prompts (List[Text]): List of prompts. - api_args (Dict): Other OpenAI API arguments. - df (pd.DataFrame): Input data to run chat completion on. - mode (Text): Mode of operation. - - Returns: - List[Text]: List of chat completions as text. - """ - - def _tidy(comp: openai.types.chat.chat_completion.ChatCompletion) -> List[Text]: - """ - Parse and tidy up the response from the chat completion endpoint of the OpenAI API. - - Args: - comp (openai.types.chat.chat_completion.ChatCompletion): Chat completion object. - - Returns: - List[Text]: List of chat completions as text. - """ - tidy_comps = [] - for c in comp.choices: - if hasattr(c, "message"): - tidy_comps.append(c.message.content.strip("\n").strip("")) - return tidy_comps - - mode = Mode(mode) - completions = [] - if mode is not Mode.conversational or "prompt" not in args: - initial_prompt = { - "role": "system", - "content": "You are a helpful assistant. Your task is to continue the chat.", - } # noqa - else: - # get prompt from model - initial_prompt = {"role": "system", "content": args["prompt"]} # noqa - - kwargs["messages"] = [initial_prompt] - last_completion_content = None - - for pidx in range(len(prompts)): - if mode is not Mode.conversational: - kwargs["messages"].append({"role": "user", "content": prompts[pidx]}) - else: - question = prompts[pidx] - if question: - kwargs["messages"].append({"role": "user", "content": question}) - - assistant_column = args.get("assistant_column") - if assistant_column in df.columns: - answer = df.iloc[pidx][assistant_column] - else: - answer = None - if answer: - kwargs["messages"].append({"role": "assistant", "content": answer}) - - if mode is Mode.conversational_full or (mode is Mode.conversational and pidx == len(prompts) - 1): - kwargs["messages"] = truncate_msgs_for_token_limit( - kwargs["messages"], kwargs["model"], api_args["max_tokens"] - ) - pkwargs = {**kwargs, **api_args} - - before_openai_query(kwargs) - resp = _tidy(client.chat.completions.create(**pkwargs)) - _log_api_call(pkwargs, resp) - - completions.extend(resp) - elif mode is Mode.default: - kwargs["messages"] = [initial_prompt] + [kwargs["messages"][-1]] - pkwargs = {**kwargs, **api_args} - - before_openai_query(kwargs) - resp = _tidy(client.chat.completions.create(**pkwargs)) - _log_api_call(pkwargs, resp) - - completions.extend(resp) - else: - # in "normal" conversational mode, we request completions only for the last row - last_completion_content = None - completions.extend([""]) - - if last_completion_content: - # interleave assistant responses with user input - kwargs["messages"].append({"role": "assistant", "content": last_completion_content[0]}) - - return completions - - def _submit_image_completion(kwargs: Dict, prompts: List[Text], api_args: Dict) -> List[Text]: - """ - Submit a request to the image generation endpoint of the OpenAI API. - - This method consists of an inner method: - - _tidy: Parse and tidy up the response from the image generation endpoint of the OpenAI API. - - Args: - kwargs (Dict): OpenAI API arguments, including the model to use. - prompts (List[Text]): List of prompts. - api_args (Dict): Other OpenAI API arguments. - - Raises: - Exception: If the maximum batch size is reached. - - Returns: - List[Text]: List of image completions as URLs or base64 encoded images. - """ - - def _tidy(comp: List[openai.types.image.Image]) -> List[Text]: - """ - Parse and tidy up the response from the image generation endpoint of the OpenAI API. - - Args: - comp (List[openai.types.image.Image]): Image completion objects. - - Returns: - List[Text]: List of image completions as URLs or base64 encoded images. - """ - return [c.url if hasattr(c, "url") else c.b64_json for c in comp] - - completions = [client.images.generate(**{"prompt": p, **kwargs, **api_args}).data[0] for p in prompts] - return _tidy(completions) - - client = self._get_client( - api_key=api_key, - base_url=args.get("api_base"), - org=args.pop("api_organization") if "api_organization" in args else None, - args=args, - ) - - try: - # check if simple completion works - completion = _submit_completion(model_name, prompts, api_args, args, df) - return completion - except Exception as e: - # else, we get the max batch size - if "you can currently request up to at most a total of" in str(e): - pattern = "a total of" - max_batch_size = int(e[e.find(pattern) + len(pattern) :].split(").")[0]) - else: - max_batch_size = self.max_batch_size # guards against changes in the API message - - if not parallel: - completion = None - for i in range(math.ceil(len(prompts) / max_batch_size)): - partial = _submit_completion( - model_name, - prompts[i * max_batch_size : (i + 1) * max_batch_size], - api_args, - args, - df, - ) - if not completion: - completion = partial - else: - completion.extend(partial) - else: - promises = [] - with concurrent.futures.ThreadPoolExecutor() as executor: - for i in range(math.ceil(len(prompts) / max_batch_size)): - logger.debug(f"{i * max_batch_size}:{(i + 1) * max_batch_size}/{len(prompts)}") - future = executor.submit( - _submit_completion, - model_name, - prompts[i * max_batch_size : (i + 1) * max_batch_size], - api_args, - args, - df, - ) - promises.append({"choices": future}) - completion = None - for p in promises: - if not completion: - completion = p["choices"].result() - else: - completion.extend(p["choices"].result()) - - return completion - - def describe(self, attribute: Optional[Text] = None) -> pd.DataFrame: - """ - Get the metadata or arguments of a model. - - Args: - attribute (Optional[Text]): Attribute to describe. Can be 'args' or 'metadata'. - - Returns: - pd.DataFrame: Model metadata or model arguments. - """ - # TODO: Update to use update() artifacts - - args = self.model_storage.json_get("args") - api_key = get_api_key(self.api_key_name, args, self.engine_storage) - if attribute == "args": - return pd.DataFrame(args.items(), columns=["key", "value"]) - elif attribute == "metadata": - model_name = args.get("model_name", self.default_model) - try: - client = self._get_client( - api_key=api_key, - base_url=args.get("api_base"), - org=args.get("api_organization"), - args=args, - ) - meta = client.models.retrieve(model_name) - except Exception as e: - meta = {"error": str(e)} - return pd.DataFrame(dict(meta).items(), columns=["key", "value"]) - else: - tables = ["args", "metadata"] - return pd.DataFrame(tables, columns=["tables"]) - - def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - """ - Fine-tune OpenAI GPT models via a MindsDB model connected to the OpenAI API. - Steps are roughly: - - Analyze input data and modify it according to suggestions made by the OpenAI utility tool - - Get a training and validation file - - Determine base model to use - - Submit a fine-tuning job via the OpenAI API - - Monitor progress with exponential backoff (which has been modified for greater control given a time budget in hours), - - Gather stats once fine-tuning finishes - - Modify model metadata so that the new version triggers the fine-tuned version of the model (stored in the user's OpenAI account) - - Caveats: - - As base fine-tuning models, OpenAI only supports the original GPT ones: `ada`, `babbage`, `curie`, `davinci`. This means if you fine-tune successively more than once, any fine-tuning other than the most recent one is lost. - - A bunch of helper methods exist to be overridden in other handlers that follow the OpenAI API, e.g. Anyscale - - Args: - df (Optional[pd.DataFrame]): Input data to fine-tune on. - args (Optional[Dict]): Parameters for the fine-tuning process. - - Raises: - Exception: If the model does not support fine-tuning. - - Returns: - None - """ # noqa - args = args if args else {} - - api_key = get_api_key(self.api_key_name, args, self.engine_storage) - - using_args = args.pop("using") if "using" in args else {} - - api_base = using_args.get("api_base", os.environ.get("OPENAI_API_BASE", OPENAI_API_BASE)) - org = using_args.get("api_organization") - client = self._get_client(api_key=api_key, base_url=api_base, org=org, args=args) - - args = {**using_args, **args} - prev_model_name = self.base_model_storage.json_get("args").get("model_name", "") - - if prev_model_name not in self.supported_ft_models: - # base model may be already FTed, check prefixes - for model in self.supported_ft_models: - if model in prev_model_name: - break - else: - raise Exception( - f"This model cannot be finetuned. Supported base models are {self.supported_ft_models}." - ) - - finetune_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - - temp_storage_path = tempfile.mkdtemp() - temp_file_name = f"ft_{finetune_time}" - temp_model_storage_path = f"{temp_storage_path}/{temp_file_name}.jsonl" - - file_names = self._prepare_ft_jsonl(df, temp_storage_path, temp_file_name, temp_model_storage_path) - - jsons = {k: None for k in file_names.keys()} - for split, file_name in file_names.items(): - if os.path.isfile(os.path.join(temp_storage_path, file_name)): - jsons[split] = client.files.create( - file=open(f"{temp_storage_path}/{file_name}", "rb"), purpose="fine-tune" - ) - - if type(jsons["train"]) is openai.types.FileObject: - train_file_id = jsons["train"].id - else: - train_file_id = jsons["base"].id - - if type(jsons["val"]) is openai.types.FileObject: - val_file_id = jsons["val"].id - else: - val_file_id = None - - # `None` values are internally imputed by OpenAI to `null` or default values - ft_params = { - "training_file": train_file_id, - "validation_file": val_file_id, - "model": self._get_ft_model_type(prev_model_name), - } - ft_params = self._add_extra_ft_params(ft_params, using_args) - - start_time = datetime.datetime.now() - - ft_stats, result_file_id = self._ft_call(ft_params, client, args.get("hour_budget", 8)) - ft_model_name = ft_stats.fine_tuned_model - - end_time = datetime.datetime.now() - runtime = end_time - start_time - name_extension = client.files.retrieve(file_id=result_file_id).filename - result_path = f"{temp_storage_path}/ft_{finetune_time}_result_{name_extension}" - - try: - client.files.content(file_id=result_file_id).stream_to_file(result_path) - if ".csv" in name_extension: - # legacy endpoint - train_stats = pd.read_csv(result_path) - if "validation_token_accuracy" in train_stats.columns: - train_stats = train_stats[train_stats["validation_token_accuracy"].notnull()] - args["ft_api_info"] = ft_stats.dict() - args["ft_result_stats"] = train_stats.to_dict() - - elif ".json" in name_extension: - train_stats = pd.read_json(path_or_buf=result_path, lines=True) # new endpoint - args["ft_api_info"] = args["ft_result_stats"] = train_stats.to_dict() - - except Exception: - logger.info( - f"Error retrieving fine-tuning results. Please check manually for information on job {ft_stats.id} (result file {result_file_id})." - ) - - args["model_name"] = ft_model_name - args["runtime"] = runtime.total_seconds() - args["mode"] = self.base_model_storage.json_get("args").get("mode", self.default_mode) - - self.model_storage.json_set("args", args) - shutil.rmtree(temp_storage_path) - - @staticmethod - def _prepare_ft_jsonl(df: pd.DataFrame, _, temp_filename: Text, temp_model_path: Text) -> Dict: - """ - Prepare the input data for fine-tuning. - - Args: - df (pd.DataFrame): Input data to fine-tune on. - temp_filename (Text): Temporary filename. - temp_model_path (Text): Temporary model path. - - Returns: - Dict: File names for the fine-tuning process. - """ - df.to_json(temp_model_path, orient="records", lines=True) - - # TODO avoid subprocess usage once OpenAI enables non-CLI access, or refactor to use our own LLM utils instead - subprocess.run( - [ - "openai", - "tools", - "fine_tunes.prepare_data", - "-f", - temp_model_path, # from file - "-q", # quiet mode (accepts all suggestions) - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - ) - - file_names = { - "original": f"{temp_filename}.jsonl", - "base": f"{temp_filename}_prepared.jsonl", - "train": f"{temp_filename}_prepared_train.jsonl", - "val": f"{temp_filename}_prepared_valid.jsonl", - } - return file_names - - def _get_ft_model_type(self, model_name: Text) -> Text: - """ - Get the model to use for fine-tuning. If the model is not supported, the default model (babbage-002) is used. - - Args: - model_name (Text): Model name. - - Returns: - Text: Model to use for fine-tuning. - """ - for model_type in self.supported_ft_models: - if model_type in model_name.lower(): - return model_type - return "babbage-002" - - @staticmethod - def _add_extra_ft_params(ft_params: Dict, using_args: Dict) -> Dict: - """ - Add extra parameters to the fine-tuning process. - - Args: - ft_params (Dict): Parameters for the fine-tuning process required by the OpenAI API. - using_args (Dict): Parameters passed when calling the fine-tuning process via a model. - - Returns: - Dict: Fine-tuning parameters with extra parameters. - """ - extra_params = { - "n_epochs": using_args.get("n_epochs", None), - "batch_size": using_args.get("batch_size", None), - "learning_rate_multiplier": using_args.get("learning_rate_multiplier", None), - "prompt_loss_weight": using_args.get("prompt_loss_weight", None), - "compute_classification_metrics": using_args.get("compute_classification_metrics", None), - "classification_n_classes": using_args.get("classification_n_classes", None), - "classification_positive_class": using_args.get("classification_positive_class", None), - "classification_betas": using_args.get("classification_betas", None), - } - return {**ft_params, **extra_params} - - def _ft_call(self, ft_params: Dict, client: OpenAI, hour_budget: int) -> Tuple[FineTuningJob, Text]: - """ - Submit a fine-tuning job via the OpenAI API. - This method handles requests to both the legacy and new endpoints. - Currently, `OpenAIHandler` uses the legacy endpoint. Others, like `AnyscaleEndpointsHandler`, use the new endpoint. - - This method consists of an inner method: - - _check_ft_status: Check the status of a fine-tuning job via the OpenAI API. - - Args: - ft_params (Dict): Fine-tuning parameters. - client (openai.OpenAI): OpenAI client. - hour_budget (int): Hour budget for the fine-tuning process. - - Raises: - PendingFT: If the fine-tuning process is still pending. - - Returns: - Tuple[FineTuningJob, Text]: Fine-tuning stats and result file ID. - """ - ft_result = client.fine_tuning.jobs.create(**{k: v for k, v in ft_params.items() if v is not None}) - - @retry_with_exponential_backoff( - hour_budget=hour_budget, - ) - def _check_ft_status(job_id: Text) -> FineTuningJob: - """ - Check the status of a fine-tuning job via the OpenAI API. - - Args: - job_id (Text): Fine-tuning job ID. - - Raises: - PendingFT: If the fine-tuning process is still pending. - - Returns: - FineTuningJob: Fine-tuning stats. - """ - ft_retrieved = client.fine_tuning.jobs.retrieve(fine_tuning_job_id=job_id) - if ft_retrieved.status in ("succeeded", "failed", "cancelled"): - return ft_retrieved - else: - raise PendingFT("Fine-tuning still pending!") - - ft_stats = _check_ft_status(ft_result.id) - - if ft_stats.status != "succeeded": - err_message = ft_stats.events[-1].message if hasattr(ft_stats, "events") else "could not retrieve!" - ft_status = ft_stats.status if hasattr(ft_stats, "status") else "N/A" - raise Exception( - f"Fine-tuning did not complete successfully (status: {ft_status}). Error message: {err_message}" - ) # noqa - - result_file_id = client.fine_tuning.jobs.retrieve(fine_tuning_job_id=ft_result.id).result_files[0] - if hasattr(result_file_id, "id"): - result_file_id = result_file_id.id # legacy endpoint - - return ft_stats, result_file_id - - @staticmethod - def _get_client(api_key: Text, base_url: Text, org: Optional[Text] = None, args: dict = None) -> OpenAI: - """ - Get an OpenAI client with the given API key, base URL, and organization. - - Args: - api_key (Text): OpenAI API key. - base_url (Text): OpenAI base URL. - org (Optional[Text]): OpenAI organization. - - Returns: - openai.OpenAI: OpenAI client. - """ - if args is not None and args.get("provider") == "azure": - return AzureOpenAI( - api_key=api_key, azure_endpoint=base_url, api_version=args.get("api_version"), organization=org - ) - return OpenAI(api_key=api_key, base_url=base_url, organization=org) diff --git a/mindsdb/integrations/handlers/openai_handler/requirements.txt b/mindsdb/integrations/handlers/openai_handler/requirements.txt deleted file mode 100644 index 77778b95ac6..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -tiktoken diff --git a/mindsdb/integrations/handlers/openai_handler/tests/__init__.py b/mindsdb/integrations/handlers/openai_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/openai_handler/tests/test_openai_handler.py b/mindsdb/integrations/handlers/openai_handler/tests/test_openai_handler.py deleted file mode 100644 index d4c09a68f3e..00000000000 --- a/mindsdb/integrations/handlers/openai_handler/tests/test_openai_handler.py +++ /dev/null @@ -1,509 +0,0 @@ -import os -import pytest -import pandas as pd -from unittest.mock import patch - -from tests.unit.ml_handlers.base_ml_test import BaseMLAPITest - - -@pytest.mark.skipif(os.environ.get('MDB_TEST_MDB_OPENAI_API_KEY') is None, reason='Missing API key!') -class TestOpenAI(BaseMLAPITest): - """ - Integration tests for the OpenAI handler. - """ - - def setup_method(self): - """ - Setup test environment by creating a project and an OpenAI engine. - """ - super().setup_method() - self.run_sql("CREATE DATABASE proj") - self.run_sql( - f""" - CREATE ML_ENGINE openai_engine - FROM openai - USING - openai_api_key = '{self.get_api_key('MDB_TEST_MDB_OPENAI_API_KEY')}'; - """ - ) - - def test_create_model_with_unsupported_model_raises_exception(self): - """ - Test if CREATE MODEL raises an exception with an unsupported model. - """ - self.run_sql( - """ - CREATE MODEL proj.test_openaai_unsupported_model_model - PREDICT answer - USING - engine='openai_engine', - model_name='this-model-does-not-exist', - prompt_template='dummy_prompt_template'; - """ - ) - with pytest.raises(Exception) as excinfo: - self.wait_predictor("proj", "test_openaai_unsupported_model_model") - - assert "Invalid model name." in str(excinfo.value) - - def test_full_flow_in_default_mode_with_question_column_for_single_prediction_runs_no_errors(self): - """ - Test the full flow in default mode with a question column for a single prediction. - """ - self.run_sql( - """ - CREATE MODEL proj.test_openai_single_full_flow_default_mode_question_column - PREDICT answer - USING - engine='openai_engine', - question_column='question'; - """ - ) - - self.wait_predictor("proj", "test_openai_single_full_flow_default_mode_question_column") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_openai_single_full_flow_default_mode_question_column - WHERE question='What is the capital of Sweden?' - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_full_flow_in_default_mode_with_question_column_for_bulk_predictions_runs_no_errors(self, mock_handler): - """ - Test the full flow in default mode with a question column for bulk predictions. - """ - df = pd.DataFrame.from_dict({"question": [ - "What is the capital of Sweden?", - "What is the second planet of the solar system?" - ]}) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - """ - CREATE MODEL proj.test_openai_bulk_full_flow_default_mode_question_column - PREDICT answer - USING - engine='openai_engine', - question_column='question'; - """ - ) - - self.wait_predictor("proj", "test_openai_bulk_full_flow_default_mode_question_column") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_openai_bulk_full_flow_default_mode_question_column as p; - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - assert "venus" in result_df["answer"].iloc[1].lower() - - def test_full_flow_in_default_mode_with_prompt_template_for_single_prediction_runs_no_errors(self): - """ - Test the full flow in default mode with a prompt template for a single prediction. - """ - self.run_sql( - """ - CREATE MODEL proj.test_openai_single_full_flow_default_mode_prompt_template - PREDICT answer - USING - engine='openai_engine', - prompt_template='Answer this question and add "Boom!" to the end of the answer: {{{{question}}}}'; - """ - ) - - self.wait_predictor("proj", "test_openai_single_full_flow_default_mode_prompt_template") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_openai_single_full_flow_default_mode_prompt_template - WHERE question='What is the capital of Sweden?' - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - assert "boom!" in result_df["answer"].iloc[0].lower() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_full_flow_in_default_mode_with_prompt_template_for_bulk_predictions_runs_no_errors(self, mock_handler): - """ - Test the full flow in default mode with a prompt template for bulk predictions. - """ - df = pd.DataFrame.from_dict({"question": [ - "What is the capital of Sweden?", - "What is the second planet of the solar system?" - ]}) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - """ - CREATE MODEL proj.test_openai_bulk_full_flow_default_mode_prompt_template - PREDICT answer - USING - engine='openai_engine', - prompt_template='Answer this question and add "Boom!" to the end of the answer: {{{{question}}}}'; - """ - ) - - self.wait_predictor("proj", "test_openai_bulk_full_flow_default_mode_prompt_template") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_openai_bulk_full_flow_default_mode_prompt_template as p; - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - assert "boom!" in result_df["answer"].iloc[0].lower() - assert "venus" in result_df["answer"].iloc[1].lower() - assert "boom!" in result_df["answer"].iloc[1].lower() - - def test_full_flow_in_embedding_mode_for_single_prediction_runs_no_errors(self): - """ - Test the full flow in embedding mode for a single prediction. - """ - self.run_sql( - """ - CREATE MODEL proj.test_openai_single_full_flow_embedding_mode - PREDICT answer - USING - engine='openai_engine', - mode='embedding', - model_name = 'text-embedding-ada-002', - question_column = 'text'; - """ - ) - - self.wait_predictor("proj", "test_openai_single_full_flow_embedding_mode") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_openai_single_full_flow_embedding_mode - WHERE text='Sweden' - """ - ) - assert type(result_df["answer"].iloc[0]) == list - assert type(result_df["answer"].iloc[0][0]) == float - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_full_flow_in_embedding_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): - """ - Test the full flow in embedding mode for bulk predictions. - """ - df = pd.DataFrame.from_dict({"text": [ - "Sweden", - "Venus" - ]}) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - """ - CREATE MODEL proj.test_openai_bulk_full_flow_embedding_mode - PREDICT answer - USING - engine='openai_engine', - mode='embedding', - model_name = 'text-embedding-ada-002', - question_column = 'text'; - """ - ) - - self.wait_predictor("proj", "test_openai_bulk_full_flow_embedding_mode") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_openai_bulk_full_flow_embedding_mode as p; - """ - ) - assert type(result_df["answer"].iloc[0]) == list - assert type(result_df["answer"].iloc[0][0]) == float - assert type(result_df["answer"].iloc[1]) == list - assert type(result_df["answer"].iloc[1][0]) == float - - def test_full_flow_in_image_mode_for_single_prediction_runs_no_errors(self): - """ - Test the full flow in image mode for a single prediction. - """ - self.run_sql( - """ - CREATE MODEL proj.test_openai_single_full_flow_image_mode - PREDICT answer - USING - engine='openai_engine', - mode='image', - prompt_template='Generate an image for: {{{{text}}}}' - """ - ) - - self.wait_predictor("proj", "test_openai_single_full_flow_image_mode") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_openai_single_full_flow_image_mode - WHERE text='Leopard clubs playing in the jungle' - """ - ) - assert type(result_df["answer"].iloc[0]) == str - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_full_flow_in_image_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): - """ - Test the full flow in image mode for bulk predictions. - """ - df = pd.DataFrame.from_dict({"text": [ - "Leopard clubs playing in the jungle", - "A beautiful sunset over the ocean" - ]}) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - """ - CREATE MODEL proj.test_openai_bulk_full_flow_image_mode - PREDICT answer - USING - engine='openai_engine', - mode='image', - prompt_template='Generate an image for: {{{{text}}}}' - """ - ) - - self.wait_predictor("proj", "test_openai_bulk_full_flow_image_mode") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_openai_bulk_full_flow_image_mode as p; - """ - ) - assert type(result_df["answer"].iloc[0]) == str - assert type(result_df["answer"].iloc[1]) == str - - def test_full_flow_in_conversational_for_single_prediction_mode_runs_no_errors(self): - """ - Test the full flow in conversational mode for a single prediction. - """ - self.run_sql( - """ - CREATE MODEL proj.test_openai_single_full_flow_conversational_mode - PREDICT answer - USING - engine='openai_engine', - mode='conversational', - user_column='question', - prompt='you are a helpful assistant', - assistant_column='answer'; - """ - ) - - self.wait_predictor("proj", "test_openai_single_full_flow_conversational_mode") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_openai_single_full_flow_conversational_mode - WHERE question='What is the capital of Sweden?' - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_full_flow_in_conversational_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): - """ - Test the full flow in conversational mode for bulk predictions. - """ - df = pd.DataFrame.from_dict({"question": [ - "What is the capital of Sweden?", - "What are some cool places to visit there?" - ]}) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - """ - CREATE MODEL proj.test_openai_bulk_full_flow_conversational_mode - PREDICT answer - USING - engine='openai_engine', - mode='conversational', - user_column='question', - prompt='you are a helpful assistant', - assistant_column='answer'; - """ - ) - - self.wait_predictor("proj", "test_openai_bulk_full_flow_conversational_mode") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_openai_bulk_full_flow_conversational_mode as p; - """ - ) - assert result_df["answer"].iloc[0] == "" - assert "gamla stan" in result_df["answer"].iloc[1].lower() - - def test_full_flow_in_conversational_full_mode_for_single_prediction_runs_no_errors(self): - """ - Test the full flow in conversational-full mode for a single prediction. - """ - self.run_sql( - """ - CREATE MODEL proj.test_openai_single_full_flow_conversational_full_mode - PREDICT answer - USING - engine='openai_engine', - mode='conversational-full', - user_column='question', - prompt='you are a helpful assistant', - assistant_column='answer'; - """ - ) - - self.wait_predictor("proj", "test_openai_single_full_flow_conversational_full_mode") - - result_df = self.run_sql( - """ - SELECT answer - FROM proj.test_openai_single_full_flow_conversational_full_mode - WHERE question='What is the capital of Sweden?' - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_full_flow_in_conversational_full_mode_for_bulk_predictions_runs_no_errors(self, mock_handler): - """ - Test the full flow in conversational-full mode for bulk predictions. - """ - df = pd.DataFrame.from_dict({"question": [ - "What is the capital of Sweden?", - "What are some cool places to visit there?" - ]}) - self.set_handler(mock_handler, name="pg", tables={"df": df}) - - self.run_sql( - """ - CREATE MODEL proj.test_openai_bulk_full_flow_conversational_full_mode - PREDICT answer - USING - engine='openai_engine', - mode='conversational-full', - user_column='question', - prompt='you are a helpful assistant', - assistant_column='answer'; - """ - ) - - self.wait_predictor("proj", "test_openai_bulk_full_flow_conversational_full_mode") - - result_df = self.run_sql( - """ - SELECT p.answer - FROM pg.df as t - JOIN proj.test_openai_bulk_full_flow_conversational_full_mode as p; - """ - ) - assert "stockholm" in result_df["answer"].iloc[0].lower() - assert "gamla stan" in result_df["answer"].iloc[1].lower() - - # TODO: Fix this test for fine-tuning - # @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - # def test_full_flow_finetune_runs_no_errors(self, mock_handler): - # """ - # Test the full flow for finetuning a model and making a prediction. - # """ - # df = pd.DataFrame.from_dict( - # { - # "prompt": [ - # "What is the SQL syntax to connect a database to MindsDB?", - # "What is the SQL command to connect to the demo postgres database for MindsDB learning hub examples?", - # "What is the SQL syntax to create a MindsDB machine learning model?", - # "What is the SQL syntax to join input data with predictions from a MindsDB machine learning model?" - # ], - # "completion": [ - # """ - # CREATE DATABASE datasource_name - # [WITH] [ENGINE [=] engine_name] [,] - # [PARAMETERS [=] { - # "key": "value", - # ... - # }]; - # """, - # """ - # CREATE DATABASE example_db - # WITH - # ENGINE = "postgres", - # PARAMETERS = { - # "user": "demo_user", - # "password": "demo_password", - # "host": "samples.mindsdb.com", - # "port": "5432", - # "database": "demo" - # }; - # """, - # """ - # CREATE MODEL - # mindsdb.home_rentals_model - # FROM example_db - # (SELECT * FROM demo_data.home_rentals) - # PREDICT rental_price; - # """, - # """ - # SELECT t.column_name, p.column_name, ... - # FROM integration_name.table_name [AS] t - # JOIN project_name.model_name [AS] p; - # """ - # ] - - # } - # ) - # self.set_handler(mock_handler, name="pg", tables={"df": df}) - - # self.run_sql( - # f""" - # CREATE MODEL proj.test_openai_full_flow_finetune - # PREDICT completion - # USING - # engine = 'openai_engine', - # model_name = 'davinci-002', - # prompt_template = 'Return a valid SQL string for the following question about MindsDB in-database machine learning: {{{{prompt}}}}'; - # """ - # ) - - # self.wait_predictor("proj", "test_openai_full_flow_finetune") - - # self.run_sql( - # """ - # FINETUNE proj.test_openai_full_flow_finetune - # FROM pg - # (SELECT prompt, completion FROM df); - # """ - # ) - - # self.wait_predictor("proj", "test_openai_full_flow_finetune", finetune=True) - - # result_df = self.run_sql( - # """ - # SELECT prompt, completion - # FROM proj.test_openai_full_flow_finetune - # WHERE prompt = 'What is the SQL syntax to join input data with predictions from a MindsDB machine learning model?' - # USING max_tokens=400; - # """ - # ) - # assert "SELECT t.column_name, p.column_name, ..." in result_df["completion"].iloc[0].lower() - - -if __name__ == "__main__": - pytest.main([__file__]) diff --git a/mindsdb/integrations/handlers/oracle_handler/README.md b/mindsdb/integrations/handlers/oracle_handler/README.md deleted file mode 100644 index 626e6b122df..00000000000 --- a/mindsdb/integrations/handlers/oracle_handler/README.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: Oracle -sidebarTitle: Oracle ---- - -This documentation describes the integration of MindsDB with [Oracle](https://www.techopedia.com/definition/8711/oracle-database), one of the most trusted and widely used relational database engines for storing, organizing and retrieving data by type while still maintaining relationships between the various types. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Oracle to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your Oracle database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE oracle_datasource -WITH - ENGINE = 'oracle', - PARAMETERS = { - "host": "localhost", - "service_name": "FREEPDB1", - "user": "custom_user", - "password": "password" - }; -``` - -**Recommendation**: Use a dedicated user for MindsDB with the least privileges necessary for the tasks it needs to perform. SYSDBA privileges are not required. -If you are connecting to an Oracle database using SYS or SYSTEM user, tables will be not be visible in the MindsDB Studio UI. - - -Required connection parameters include the following: - -* `user`: The username for the Oracle database. -* `password`: The password for the Oracle database. - -* `dsn`: The data source name (DSN) for the Oracle database. -OR -* `host`: The hostname, IP address, or URL of the Oracle server. -AND -* `sid`: The system identifier (SID) of the Oracle database. -OR -* `service_name`: The service name of the Oracle database. - -Optional connection parameters include the following: - -* `port`: The port number for connecting to the Oracle database. Default is 1521. -* `disable_oob`: The boolean parameter to disable out-of-band breaks. Default is `false`. -* `auth_mode`: The authorization mode to use. -* `thick_mode`: Set to `true` to use thick mode for the connection. Thin mode is used by default. -* `oracle_client_lib_dir`: The directory path where Oracle Client libraries are located. Required if `thick_mode` is set to `true`. - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM oracle_datasource.schema_name.table_name -LIMIT 10; -``` - -Run PL/SQL queries directly on the connected Oracle database: - -```sql -SELECT * FROM oracle_datasource ( - - --Native Query Goes Here - SELECT employee_id, first_name, last_name, email, hire_date - FROM oracle_datasource.hr.employees - WHERE department_id = 10 - ORDER BY hire_date DESC; - -); -``` - - -The above examples utilize `oracle_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Oracle database. -* **Checklist**: - 1. Make sure the Oracle database is active. - 2. Confirm that the connection parameters provided (DSN, host, SID, service_name) and the credentials (user, password) are correct. - 3. Ensure a stable network between MindsDB and Oracle. -* **Symptoms**: Connection timeout errors. -* **Checklist**: - 1. Verify that the Oracle database is reachable from the MindsDB server. - 2. Check for any firewall or network restrictions that might be causing delays. -* **Symptoms**: Can't connect to db: Failed to initialize Oracle client: DPI-1047: Cannot locate a 64-bit Oracle Client library: -* **Checklist**: - 1. Ensure that the Oracle Client libraries are installed on the MindsDB server. - 2. Verify that the `oracle_client_lib_dir` parameter is set correctly in the connection configuration. - 3. Check that the installed Oracle Client libraries match the architecture (64-bit) of the MindsDB server. - - -This [troubleshooting guide](https://docs.oracle.com/en/database/oracle/oracle-database/19/ntqrf/database-connection-issues.html) provided by Oracle might also be helpful. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/oracle_handler/__about__.py b/mindsdb/integrations/handlers/oracle_handler/__about__.py deleted file mode 100644 index 26369093fd1..00000000000 --- a/mindsdb/integrations/handlers/oracle_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Oracle Database handler" -__package_name__ = "mindsdb_oracle_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for Oracle Database" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2022- mindsdb" diff --git a/mindsdb/integrations/handlers/oracle_handler/__init__.py b/mindsdb/integrations/handlers/oracle_handler/__init__.py deleted file mode 100644 index aee60e989dc..00000000000 --- a/mindsdb/integrations/handlers/oracle_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .oracle_handler import OracleHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Oracle" -name = "oracle" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", - "support_level", -] diff --git a/mindsdb/integrations/handlers/oracle_handler/connection_args.py b/mindsdb/integrations/handlers/oracle_handler/connection_args.py deleted file mode 100644 index 21ec86b606d..00000000000 --- a/mindsdb/integrations/handlers/oracle_handler/connection_args.py +++ /dev/null @@ -1,82 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - dsn={ - "type": ARG_TYPE.STR, - "description": "The data source name (DSN) for the Oracle database.", - "required": False, - "label": "Data Source Name (DSN)", - }, - host={ - "type": ARG_TYPE.STR, - "description": "The hostname, IP address, or URL of the Oracle server.", - "required": False, - "label": "Host", - }, - port={ - "type": ARG_TYPE.INT, - "description": "The port number for connecting to the Oracle database. Default is 1521.", - "required": False, - "label": "Port", - }, - sid={ - "type": ARG_TYPE.STR, - "description": "The system identifier (SID) of the Oracle database.", - "required": False, - "label": "SID", - }, - service_name={ - "type": ARG_TYPE.STR, - "description": "The service name of the Oracle database.", - "required": False, - "label": "Service Name", - }, - user={ - "type": ARG_TYPE.STR, - "description": "The username for the Oracle database.", - "required": True, - "label": "User", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password for the Oracle database.", - "secret": True, - "required": True, - "label": "Password", - }, - disable_oob={ - "type": ARG_TYPE.BOOL, - "description": "The boolean parameter to disable out-of-band breaks. Default is `false`.", - "required": False, - "label": "Disable OOB", - }, - auth_mode={ - "type": ARG_TYPE.STR, - "description": "The authorization mode to use.", - "required": False, - "label": "Auth Mode", - }, - thick_mode={ - "type": ARG_TYPE.BOOL, - "description": "Set to `true` to use thick mode for the connection. Thin mode is used by default.", - "required": False, - "label": "Connection mode", - }, - oracle_client_lib_dir={ - "type": ARG_TYPE.STR, - "description": "The directory path where Oracle client libraries are located. Required if using thick mode.", - "required": False, - "label": "Oracle Client Library Directory", - }, -) - -connection_args_example = OrderedDict( - host="127.0.0.1", - port=1521, - user="admin", - password="password", - sid="ORCL", -) diff --git a/mindsdb/integrations/handlers/oracle_handler/icon.svg b/mindsdb/integrations/handlers/oracle_handler/icon.svg deleted file mode 100644 index ab3f9a95908..00000000000 --- a/mindsdb/integrations/handlers/oracle_handler/icon.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/oracle_handler/oracle_handler.py b/mindsdb/integrations/handlers/oracle_handler/oracle_handler.py deleted file mode 100644 index 79d4c342ff4..00000000000 --- a/mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +++ /dev/null @@ -1,761 +0,0 @@ -from typing import Any, Generator - -import oracledb -import pandas as pd -from oracledb import connect, Connection, DatabaseError, Cursor -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.integrations.libs.base import MetaDatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, - TableResponse, - OkResponse, - ErrorResponse, - DataHandlerResponse, -) -from mindsdb.utilities import log -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.utilities.config import config as mindsdb_config -from mindsdb.utilities.types.column import Column -import mindsdb.utilities.profiler as profiler -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -oracledb.defaults.fetch_lobs = False # Return LOBs directly as strings or bytes. -logger = log.getLogger(__name__) - - -def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE: - """Map Oracle types to MySQL types. - List of types: https://docs.oracle.com/en/database/oracle/oracle-database/23/sqlrf/Data-Types.html - - Args: - internal_type_name (str): The name of the Oracle type to map. - - Returns: - MYSQL_DATA_TYPE: The MySQL type that corresponds to the Oracle type. - """ - internal_type_name = internal_type_name.upper() - types_map = { - ( - "VARCHAR2", - "NVARCHAR2", - "CHARACTER VARYING", - "CHAR VARYING", - "NATIONAL CHARACTER", - "NATIONAL CHAR", - "VARCHAR", - "NATIONAL CHARACTER VARYING", - "NATIONAL CHAR VARYING", - "NCHAR VARYING", - "LONG VARCHAR", - ): MYSQL_DATA_TYPE.VARCHAR, - ("INTEGER", "INT"): MYSQL_DATA_TYPE.INT, - ("SMALLINT",): MYSQL_DATA_TYPE.SMALLINT, - ("NUMBER", "DECIMAL"): MYSQL_DATA_TYPE.DECIMAL, - ("FLOAT", "BINARY_FLOAT", "REAL"): MYSQL_DATA_TYPE.FLOAT, - ("BINARY_DOUBLE",): MYSQL_DATA_TYPE.DOUBLE, - ("LONG",): MYSQL_DATA_TYPE.BIGINT, - ("DATE",): MYSQL_DATA_TYPE.DATE, - ( - "HOUR", - "MINUTE", - "SECOND", - "TIMEZONE_HOUR", - "TIMEZONE_MINUTE", - ): MYSQL_DATA_TYPE.SMALLINT, - ( - "TIMESTAMP", - "TIMESTAMP WITH TIME ZONE", - "TIMESTAMP WITH LOCAL TIME ZONE", - ): MYSQL_DATA_TYPE.TIMESTAMP, - ("RAW", "LONG RAW", "BLOB", "BFILE"): MYSQL_DATA_TYPE.BINARY, - ("ROWID", "UROWID"): MYSQL_DATA_TYPE.TEXT, - ("CHAR", "NCHAR", "CLOB", "NCLOB", "CHARACTER"): MYSQL_DATA_TYPE.CHAR, - ("VECTOR",): MYSQL_DATA_TYPE.VECTOR, - ("JSON",): MYSQL_DATA_TYPE.JSON, - } - - for db_types_list, mysql_data_type in types_map.items(): - if internal_type_name in db_types_list: - return mysql_data_type - - logger.debug(f"Oracle handler type mapping: unknown type: {internal_type_name}, use VARCHAR as fallback.") - return MYSQL_DATA_TYPE.VARCHAR - - -def _get_colums(cursor: Cursor) -> list[Column]: - """Get columns from cursor. - - Args: - cursor (psycopg.Cursor): cursor object. - - Returns: - List of columns - """ - columns = [] - for column in cursor.description: - column_name = column[0] - db_type = column[1] - precision = column[4] - scale = column[5] - mysql_type = None - if db_type is oracledb.DB_TYPE_JSON: - mysql_type = MYSQL_DATA_TYPE.JSON - elif db_type is oracledb.DB_TYPE_VECTOR: - mysql_type = MYSQL_DATA_TYPE.VECTOR - elif db_type is oracledb.DB_TYPE_NUMBER: - if scale != 0: - mysql_type = MYSQL_DATA_TYPE.FLOAT - else: - # python max int is 19 digits, oracle can return more - if precision > 18: - mysql_type = MYSQL_DATA_TYPE.DECIMAL - else: - mysql_type = MYSQL_DATA_TYPE.INT - elif db_type is oracledb.DB_TYPE_BINARY_FLOAT: - mysql_type = MYSQL_DATA_TYPE.FLOAT - elif db_type is oracledb.DB_TYPE_BINARY_DOUBLE: - mysql_type = MYSQL_DATA_TYPE.FLOAT - elif db_type is oracledb.DB_TYPE_BINARY_INTEGER: - mysql_type = MYSQL_DATA_TYPE.INT - elif db_type is oracledb.DB_TYPE_BOOLEAN: - mysql_type = MYSQL_DATA_TYPE.BOOLEAN - elif db_type in ( - oracledb.DB_TYPE_CHAR, - oracledb.DB_TYPE_NCHAR, - oracledb.DB_TYPE_LONG, - oracledb.DB_TYPE_NVARCHAR, - oracledb.DB_TYPE_VARCHAR, - oracledb.DB_TYPE_LONG_NVARCHAR, - ): - mysql_type = MYSQL_DATA_TYPE.TEXT - elif db_type in (oracledb.DB_TYPE_RAW, oracledb.DB_TYPE_LONG_RAW): - mysql_type = MYSQL_DATA_TYPE.BINARY - elif db_type is oracledb.DB_TYPE_DATE: - mysql_type = MYSQL_DATA_TYPE.DATE - elif db_type is oracledb.DB_TYPE_TIMESTAMP: - mysql_type = MYSQL_DATA_TYPE.TIMESTAMP - else: - # fallback - mysql_type = MYSQL_DATA_TYPE.TEXT - - columns.append(Column(name=column_name, type=mysql_type)) - return columns - - -def _make_df(result: list[tuple[Any]], columns: list[Column]) -> pd.DataFrame: - """Make pandas DataFrame from result and columns. - - Args: - result (list[tuple[Any]]): result of the query. - columns (list[Column]): list of columns. - - Returns: - pd.DataFrame: pandas DataFrame. - """ - serieses = [] - for i, column in enumerate(columns): - expected_dtype = None - if column.type in ( - MYSQL_DATA_TYPE.SMALLINT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.BIGINT, - MYSQL_DATA_TYPE.TINYINT, - ): - expected_dtype = "Int64" - elif column.type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): - expected_dtype = "boolean" - serieses.append(pd.Series([row[i] for row in result], dtype=expected_dtype, name=column.name)) - df = pd.concat(serieses, axis=1, copy=False) - return df - - -class OracleHandler(MetaDatabaseHandler): - """ - This handler handles connection and execution of SQL queries on Oracle. - """ - - name = "oracle" - stream_response = True - - def __init__(self, name: str, connection_data: dict | None, **kwargs) -> None: - """ - Initializes the handler. - - Args: - name (str): The name of the handler instance. - connection_data (dict | None): The connection data required to connect to OracleDB. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - def connect(self) -> Connection: - """ - Establishes a connection to the Oracle database. - - Raises: - ValueError: If the expected connection parameters are not provided. - - Returns: - oracledb.Connection: A connection object to the Oracle database. - """ - if self.is_connected is True: - return self.connection - - # Mandatory connection parameters. - if not all(key in self.connection_data for key in ["user", "password"]): - raise ValueError("Required parameters (user, password) must be provided.") - - if self.connection_data.get("thick_mode", False): - oracle_client_lib_dir = self.connection_data.get("oracle_client_lib_dir") - if isinstance(oracle_client_lib_dir, str) and oracle_client_lib_dir.strip(): - try: - oracledb.init_oracle_client(lib_dir=oracle_client_lib_dir) - except Exception as e: - raise ValueError(f"Failed to initialize Oracle client: {e}") - else: - raise ValueError( - "Parameter 'oracle_client_lib_dir' must be provided as a non-empty string when using thick_mode." - ) - - config = { - "user": self.connection_data["user"], - "password": self.connection_data["password"], - } - - # If 'dsn' is given, use it. Otherwise, use the individual connection parameters. - if "dsn" in self.connection_data: - config["dsn"] = self.connection_data["dsn"] - - else: - if "host" not in self.connection_data and not any( - key in self.connection_data for key in ["sid", "service_name"] - ): - raise ValueError( - "Required parameter host and either sid or service_name must be provided. Alternatively, dsn can be provided." - ) - - config["host"] = self.connection_data.get("host") - - # Optional connection parameters when 'dsn' is not given. - optional_parameters = ["port", "sid", "service_name"] - for parameter in optional_parameters: - if parameter in self.connection_data: - config[parameter] = self.connection_data[parameter] - - # Other optional connection parameters. - if "disable_oob" in self.connection_data: - config["disable_oob"] = self.connection_data["disable_oob"] - - if "auth_mode" in self.connection_data: - mode_name = "AUTH_MODE_" + self.connection_data["auth_mode"].upper() - if not hasattr(oracledb, mode_name): - raise ValueError(f"Unknown auth mode: {mode_name}") - config["mode"] = getattr(oracledb, mode_name) - - try: - connection = connect( - **config, - ) - - if "session_variables" in self.connection_data: - with connection.cursor() as cur: - for key, value in self.connection_data["session_variables"].items(): - cur.execute(f"ALTER SESSION SET {key} = {repr(value)}") - - except DatabaseError as database_error: - logger.error(f"Error connecting to Oracle, {database_error}!") - raise - - except Exception as unknown_error: - logger.error(f"Unknown error when connecting to Oracle: {unknown_error}") - raise - - self.is_connected = True - self.connection = connection - return self.connection - - def disconnect(self): - """ - Closes the connection to the Oracle database if it's currently open. - """ - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Oracle database. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = self.is_connected is False - - try: - con = self.connect() - con.ping() - response.success = True - except (ValueError, DatabaseError) as known_error: - logger.error(f"Connection check to Oracle failed, {known_error}!") - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f"Connection check to Oracle failed due to an unknown error, {unknown_error}!") - response.error_message = str(unknown_error) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: str, stream: bool = True, **kwargs) -> TableResponse | OkResponse | ErrorResponse: - """Executes a SQL query on the Oracle database and returns the result. - - Args: - query (str): The SQL query to be executed. - stream (bool): Whether to execute the query on the server side (streaming). - **kwargs: Additional keyword arguments. - - Returns: - TableResponse | OkResponse | ErrorResponse: A response object containing the result of the query or an error message. - """ - if stream is False: - response = self._execute_fetchall(query, **kwargs) - else: - generator = self._execute_fetchmany(query, **kwargs) - try: - response: TableResponse = next(generator) - response.data_generator = generator - except StopIteration as e: - response = e.value - if isinstance(response, DataHandlerResponse) is False: - raise - return response - - def _execute_fetchmany(self, query: str) -> Generator[pd.DataFrame, None, OkResponse | ErrorResponse]: - connection = self.connect() - with connection.cursor() as cursor: - try: - # Configure cursor for optimal server-side streaming - fetch_size = mindsdb_config["data_stream"]["fetch_size"] - cursor.arraysize = fetch_size - - cursor.execute(query) - - if cursor.description is None: - connection.commit() - return OkResponse(affected_rows=cursor.rowcount) - - columns = _get_colums(cursor) - yield TableResponse(affected_rows=cursor.rowcount, columns=columns) - # Stream data in batches - while result := cursor.fetchmany(cursor.arraysize): - yield _make_df(result, columns) - connection.commit() - except Exception as e: - return self._handle_query_exception(e, query, connection) - - def _execute_fetchall(self, query: str) -> DataHandlerResponse: - """Executes a SQL query and fetches all results at once (client-side). - - Args: - query (str): The SQL query to be executed. - - Returns: - TableResponse | OkResponse | ErrorResponse: A response object containing the result of the query or an error message. - """ - connection = self.connect() - with connection.cursor() as cursor: - try: - cursor.execute(query) - if cursor.description is None: - response = OkResponse(affected_rows=cursor.rowcount) - else: - # Fetch all results at once - result = cursor.fetchall() - columns = _get_colums(cursor) - df = _make_df(result, columns) - response = TableResponse(data=df, affected_rows=cursor.rowcount, columns=columns) - connection.commit() - except Exception as e: - response = self._handle_query_exception(e, query, connection) - - return response - - def _handle_query_exception(self, e: Exception, query: str, connection) -> ErrorResponse: - """Handle query execution errors with appropriate logging and rollback. - - Args: - e: The exception that was raised - query: The SQL query that failed - connection: The database connection to rollback - - Returns: - ErrorResponse with appropriate error details - """ - if isinstance(e, DatabaseError): - logger.error(f"Error running query: {query} on Oracle, {e}!") - connection.rollback() - return ErrorResponse(error_code=0, error_message=str(e)) - - logger.error(f"Unknown error running query: {query} on Oracle, {e}!") - connection.rollback() - return ErrorResponse(error_code=0, error_message=str(e)) - - def insert(self, table_name: str, df: pd.DataFrame) -> Response: - """ - Inserts data from a DataFrame into a specified table in the Oracle database. - - Args: - table_name (str): The name of the table where the data will be inserted. - df (pd.DataFrame): The DataFrame containing the data to be inserted. - Returns: - Response: A response object indicating the success or failure of the insert operation. - """ - need_to_close = self.is_connected is False - connection = self.connect() - columns = list(df.columns) - placeholders = ", ".join([f":{i + 1}" for i in range(len(columns))]) - insert_query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})" - - with connection.cursor() as cur: - try: - cur.executemany(insert_query, df.values.tolist()) - connection.commit() - rowcount = cur.rowcount - except DatabaseError as database_error: - logger.error(f"Error inserting data into table {table_name} on Oracle, {database_error}!") - connection.rollback() - raise - if need_to_close is True: - self.disconnect() - - return Response(RESPONSE_TYPE.OK, affected_rows=rowcount) - - @profiler.profile() - def query(self, query: ASTNode) -> Response: - """ - Executes a SQL query represented by an ASTNode and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - Response: The response from the `native_query` method, containing the result of the SQL query execution. - """ - renderer = SqlalchemyRender("oracle") - query_str = renderer.get_string(query, with_failback=True) - return self.native_query(query_str) - - def get_tables(self) -> Response: - """ - Retrieves a list of all non-system tables and views in the current schema of the Oracle database. - - Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. - """ - query = """ - SELECT - owner AS table_schema, - table_name AS table_name, - 'BASE TABLE' AS table_type - FROM all_tables t - JOIN all_users u ON t.owner = u.username - WHERE t.tablespace_name = 'USERS' - - UNION ALL - - SELECT - v.owner AS table_schema, - v.view_name AS table_name, - 'VIEW' AS table_type - FROM all_views v - JOIN all_users u ON v.owner = u.username - WHERE v.owner IN ( - SELECT DISTINCT owner - FROM all_tables - WHERE tablespace_name = 'USERS' - ) - """ - return self.native_query(query) - - def get_columns(self, table_name: str) -> Response: - """ - Retrieves column details for a specified table in the Oracle database. - - Args: - table_name (str): The name of the table for which to retrieve column information. - - Returns: - Response: A response object containing the column details, formatted as per the `Response` class. - Raises: - ValueError: If the 'table_name' is not a valid string. - """ - query = f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - COLUMN_ID AS ORDINAL_POSITION, - DATA_DEFAULT AS COLUMN_DEFAULT, - CASE NULLABLE WHEN 'Y' THEN 'YES' ELSE 'NO' END AS IS_NULLABLE, - CHAR_LENGTH AS CHARACTER_MAXIMUM_LENGTH, - NULL AS CHARACTER_OCTET_LENGTH, - DATA_PRECISION AS NUMERIC_PRECISION, - DATA_SCALE AS NUMERIC_SCALE, - NULL AS DATETIME_PRECISION, - CHARACTER_SET_NAME, - NULL AS COLLATION_NAME - FROM USER_TAB_COLUMNS - WHERE table_name = '{table_name}' - ORDER BY TABLE_NAME, COLUMN_ID - """ - result = self.native_query(query) - if result.type is RESPONSE_TYPE.TABLE: - result.to_columns_table_response(map_type_fn=_map_type) - return result - - def meta_get_tables(self, table_names: list[str] | None) -> Response: - """ - Retrieves metadata about all non-system tables and views accessible to the current user. - - Returns: - Response: A response object containing metadata about tables and views. - """ - query = """ - SELECT - o.object_name AS table_name, - o.owner AS table_schema, - o.object_type AS table_type, - c.comments AS table_description, - t.num_rows AS row_count - FROM - all_objects o - JOIN - all_users u ON o.owner = u.username - LEFT JOIN - all_tab_comments c ON o.object_name = c.table_name AND o.owner = c.owner - LEFT JOIN - all_tables t ON o.object_name = t.table_name AND o.owner = t.owner AND o.object_type = 'TABLE' - WHERE - o.object_type IN ('TABLE', 'VIEW') - AND t.tablespace_name = 'USERS' - """ - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t.upper()}'" for t in table_names] - query += f" AND o.object_name IN ({','.join(table_names)})" - - query += " ORDER BY o.owner, o.object_name" - - result = self.native_query(query) - return result - - def meta_get_columns(self, table_names: list[str] | None) -> Response: - """Retrieves metadata about the columns of specified tables accessible to the current user. - - Args: - table_names (list[str] | None): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing column metadata. - """ - query = """ - SELECT - atc.table_name, - atc.column_name, - atc.data_type, - acc.comments AS column_description, - atc.data_default AS column_default, - CASE - WHEN atc.nullable = 'Y' THEN 1 - ELSE 0 - END AS is_nullable - FROM - all_tab_columns atc - JOIN - all_tables at ON atc.table_name = at.table_name AND atc.owner = at.owner - JOIN - all_users u ON atc.owner = u.username - LEFT JOIN - all_col_comments acc ON atc.table_name = acc.table_name - AND atc.column_name = acc.column_name - AND atc.owner = acc.owner - WHERE - at.tablespace_name = 'USERS' - """ - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t.upper()}'" for t in table_names] - query += f" AND atc.table_name IN ({','.join(table_names)})" - query += " ORDER BY atc.owner, atc.table_name, atc.column_id" - result = self.native_query(query) - return result - - def meta_get_column_statistics(self, table_names: list[str] | None) -> Response: - """Retrieves statistics about the columns of specified tables accessible to the current user. - - Args: - table_names (list[str] | None): A list of table names for which to retrieve column statistics. - - Returns: - Response: A response object containing column statistics. - """ - table_filter = "" - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t.upper()}'" for t in table_names] - table_filter = f" AND cs.table_name IN ({','.join(quoted_names)})" - - query = f""" - SELECT - cs.table_name AS TABLE_NAME, - cs.column_name AS COLUMN_NAME, - CASE - WHEN cs.sample_size > 0 THEN ROUND((cs.num_nulls / cs.sample_size) * 100, 2) - ELSE NULL - END AS NULL_PERCENTAGE, - cs.num_distinct AS DISTINCT_VALUES_COUNT, - NULL AS MOST_COMMON_VALUES, - NULL AS MOST_COMMON_FREQUENCIES, - cs.histogram AS HISTOGRAM_TYPE, - h.bounds AS HISTOGRAM_BOUNDS - FROM - all_tab_col_statistics cs - JOIN - all_tables at ON cs.table_name = at.table_name AND cs.owner = at.owner - JOIN - all_users u ON cs.owner = u.username - LEFT JOIN ( - SELECT - owner, - table_name, - column_name, - LISTAGG(endpoint_value, ', ') WITHIN GROUP (ORDER BY endpoint_number) AS bounds - FROM - all_tab_histograms - GROUP BY - owner, - table_name, - column_name - ) h ON cs.table_name = h.table_name - AND cs.column_name = h.column_name - AND cs.owner = h.owner - WHERE - at.tablespace_name = 'USERS' - {table_filter} - ORDER BY - cs.owner, - cs.table_name, - cs.column_name - """ - - result = self.native_query(query) - - if result.type is RESPONSE_TYPE.TABLE and result.data_frame is not None: - df = result.data_frame - - def extract_min_max( - histogram_str: str, - ) -> tuple[float | None, float | None]: - if histogram_str and str(histogram_str).lower() not in ["nan", "none"]: - values = str(histogram_str).split(",") - if values: - min_val = values[0].strip(" '\"") - max_val = values[-1].strip(" '\"") - return min_val, max_val - return None, None - - min_max_values = df["HISTOGRAM_BOUNDS"].apply(extract_min_max) - df["MINIMUM_VALUE"] = min_max_values.apply(lambda x: x[0]) - df["MAXIMUM_VALUE"] = min_max_values.apply(lambda x: x[1]) - df.drop(columns=["HISTOGRAM_BOUNDS"], inplace=True) - return result - - def meta_get_primary_keys(self, table_names: list[str] | None) -> Response: - """ - Retrieves the primary keys for the specified tables accessible to the current user. - - Args: - table_names (list[str] | None): A list of table names for which to retrieve primary keys. - - Returns: - Response: A response object containing primary key information. - """ - - query = """ - SELECT - cols.table_name, - cols.column_name, - cols.position AS ordinal_position, - cons.constraint_name - FROM - all_constraints cons - JOIN - all_cons_columns cols ON cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner - JOIN - all_users u ON cons.owner = u.username - JOIN - all_tables t ON cols.table_name = t.table_name AND cols.owner = t.owner - WHERE - cons.constraint_type = 'P' - AND t.tablespace_name = 'USERS' - """ - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t.upper()}'" for t in table_names] - query += f" AND cols.table_name IN ({','.join(quoted_names)})" - - query += " ORDER BY cols.owner, cols.table_name, cols.position" - - result = self.native_query(query) - return result - - def meta_get_foreign_keys(self, table_names: list[str] | None) -> Response: - """ - Retrieves the foreign keys for the specified tables accessible to the current user. - - Args: - table_names (list[str] | None): A list of table names for which to retrieve foreign keys. - - Returns: - Response: A response object containing foreign key information. - """ - - query = """ - SELECT - pk_cols.table_name AS parent_table_name, - pk_cols.column_name AS parent_column_name, - fk_cols.table_name AS child_table_name, - fk_cols.column_name AS child_column_name, - fk_cons.constraint_name - FROM - all_constraints fk_cons - JOIN - all_cons_columns fk_cols ON fk_cons.owner = fk_cols.owner AND fk_cons.constraint_name = fk_cols.constraint_name - JOIN - all_cons_columns pk_cols ON fk_cons.owner = pk_cols.owner AND fk_cons.r_constraint_name = pk_cols.constraint_name - JOIN - all_users u ON fk_cons.owner = u.username - JOIN - all_tables t ON fk_cols.table_name = t.table_name AND fk_cols.owner = t.owner - WHERE - fk_cons.constraint_type = 'R' - AND t.tablespace_name = 'USERS' - """ - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t.upper()}'" for t in table_names] - query += f" AND fk_cols.table_name IN ({','.join(quoted_names)})" - - query += " ORDER BY fk_cols.owner, fk_cols.table_name, fk_cols.position" - result = self.native_query(query) - return result diff --git a/mindsdb/integrations/handlers/oracle_handler/requirements.txt b/mindsdb/integrations/handlers/oracle_handler/requirements.txt deleted file mode 100644 index caf8f1405e4..00000000000 --- a/mindsdb/integrations/handlers/oracle_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -oracledb==3.3.0 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pgvector_handler/README.md b/mindsdb/integrations/handlers/pgvector_handler/README.md deleted file mode 100644 index dafc87311c3..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# PGVector Handler - -This is the implementation of the PGVector for MindsDB. - -## PGVector - -Open-source vector similarity search for Postgres - -Store your vectors with the rest of your data. - -Supports: - -exact and approximate nearest neighbor search -L2 distance, inner product, and cosine distance -any language with a Postgres client -Plus ACID compliance, point-in-time recovery, JOINs, and all of the other great features of Postgres - -## Implementation - -This handler uses `pgvector` python library to make use of the vector data type in postgres created from the pgvector extension - -The required arguments to establish a connection are the same as a regular postgres connection: - -* `host`: the host name or IP address of the postgres instance -* `port`: the port to use when connecting -* `database`: the database to connect to -* `user`: the user to connect as -* `password`: the password to use when connecting - -## Usage - -### Installing the pgvector extension - -where you have postgres installed run the following commands to install the pgvector extension - -`cd /tmp -git clone --branch v0.4.4 https://github.com/pgvector/pgvector.git -cd pgvector -make -make install` - -### Installing the pgvector python library -Ensure you install all from requirements.txt in the pgvector_handler folder - -### Creating a database connection in MindsDB - -You can create a database connection like you would for a regular postgres database, the only difference is that you need to specify the engine as `pgvector` - -```sql -CREATE DATABASE pvec -WITH - ENGINE = 'pgvector', - PARAMETERS = { - "host": "127.0.0.1", - "port": 5432, - "database": "postgres", - "user": "user", - "password": "password" - }; -``` - -You can insert data into a new collection like so - -```sql -CREATE TABLE pvec.embed - (SELECT embeddings FROM mysql_demo_db.test_embeddings -); - -CREATE ML_ENGINE openai -FROM openai -USING - openai_api_key = 'your-openai-api-key'; - -CREATE MODEL openai_emb -PREDICT embedding -USING - engine = 'openai', - model_name='text-embedding-ada-002', - mode = 'embedding', - question_column = 'review'; - -create table pvec.itemstest ( -SELECT m.embedding AS embeddings, t.review content FROM mysql_demo_db.amazon_reviews t - join openai_emb m -); - -``` - -You can query a collection within your PGVector as follows: - -```sql -SELECT * -FROM pvec.embed -Limit 5; - -SELECT * -FROM pvec.itemstest -Limit 5; -``` - - -You can query on semantic search like so: - -```sql -SELECT * -FROM pvec3.items_test -WHERE embeddings = (select * from mindsdb.embedding) LIMIT 5; -``` diff --git a/mindsdb/integrations/handlers/pgvector_handler/__about__.py b/mindsdb/integrations/handlers/pgvector_handler/__about__.py deleted file mode 100644 index acde77ca83e..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB pgvector handler" -__package_name__ = "mindsdb_pgvector_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for pgvector" -__author__ = "Daniel Usvyat" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/pgvector_handler/__init__.py b/mindsdb/integrations/handlers/pgvector_handler/__init__.py deleted file mode 100644 index f437ca5aded..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __description__ as description -from .__about__ import __version__ as version -from .connection_args import connection_args, connection_args_example -try: - from .pgvector_handler import PgVectorHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "PGVector" -name = "pgvector" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/pgvector_handler/connection_args.py b/mindsdb/integrations/handlers/pgvector_handler/connection_args.py deleted file mode 100644 index 1d14b9017b7..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/connection_args.py +++ /dev/null @@ -1,54 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the PostgreSQL server.", - "required": True, - "label": "User", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the PostgreSQL server.", - "required": True, - "label": "Password", - "secret": True - }, - database={ - "type": ARG_TYPE.STR, - "description": "The database name to use when connecting with the PostgreSQL server.", - "required": True, - "label": "Database", - }, - host={ - "type": ARG_TYPE.STR, - "description": "The host name or IP address of the PostgreSQL server. NOTE: use '127.0.0.1' instead of 'localhost' to connect to local server.", - "required": True, - "label": "Host", - }, - port={ - "type": ARG_TYPE.INT, - "description": "The TCP/IP port of the PostgreSQL server. Must be an integer.", - "required": True, - "label": "Port", - }, - schema={ - "type": ARG_TYPE.STR, - "description": "The schema in which objects are searched first.", - "required": False, - "label": "Schema", - }, - sslmode={ - "type": ARG_TYPE.STR, - "description": "sslmode that will be used for connection.", - "required": False, - "label": "sslmode", - }, -) - -connection_args_example = OrderedDict( - host="127.0.0.1", port=5432, user="root", password="password", database="database" -) diff --git a/mindsdb/integrations/handlers/pgvector_handler/icon.svg b/mindsdb/integrations/handlers/pgvector_handler/icon.svg deleted file mode 100644 index 89a18451405..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/icon.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py b/mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py deleted file mode 100644 index 5d24c3b5578..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +++ /dev/null @@ -1,667 +0,0 @@ -import os -import json -import hashlib -from collections import OrderedDict -from typing import List, Literal, Tuple -from urllib.parse import urlparse - -import pandas as pd -import psycopg -from psycopg import sql -from mindsdb_sql_parser.ast import ( - Parameter, - Identifier, - BinaryOperation, - Tuple as AstTuple, - Constant, - Select, - OrderBy, - TypeCast, - Delete, - Update, - Function, - DropTables, -) -from mindsdb_sql_parser.ast.base import ASTNode -from pgvector.psycopg import register_vector - -from mindsdb.integrations.handlers.postgres_handler.postgres_handler import ( - PostgresHandler, -) -from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse as Response -from mindsdb.integrations.libs.vectordatabase_handler import ( - FilterCondition, - VectorStoreHandler, - TableField, - FilterOperator, -) -from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase -from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs -from mindsdb.utilities import log -from mindsdb.utilities.profiler import profiler -from mindsdb.utilities.context import context as ctx - -logger = log.getLogger(__name__) - - -# todo Issue #7316 add support for different indexes and search algorithms e.g. cosine similarity or L2 norm -class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase): - """This handler handles connection and execution of the PostgreSQL with pgvector extension statements.""" - - name = "pgvector" - - # LRU cache to track which legacy tables have been checked for migration. - # This prevents repeated database existence checks on every table access. - _migration_checked_cache: OrderedDict = OrderedDict() - _MIGRATION_CACHE_MAXSIZE = 1024 - - def __init__(self, name: str, **kwargs): - super().__init__(name=name, **kwargs) - self._is_shared_db = False - self._is_vector_registered = False - # we get these from the connection args on PostgresHandler parent - self._is_sparse = self.connection_args.get("is_sparse", False) - self._vector_size = self.connection_args.get("vector_size", None) - - if self._is_sparse: - if not self._vector_size: - raise ValueError("vector_size is required when is_sparse=True") - - # Use inner product for sparse vectors - distance_op = "<#>" - - else: - distance_op = "<=>" - if "distance" in self.connection_args: - distance_ops = { - "l1": "<+>", - "l2": "<->", - "ip": "<#>", # inner product - "cosine": "<=>", - "hamming": "<~>", - "jaccard": "<%>", - } - - distance_op = distance_ops.get(self.connection_args["distance"]) - if distance_op is None: - raise ValueError(f"Wrong distance type. Allowed options are {list(distance_ops.keys())}") - - self.distance_op = distance_op - self.connect() - - def get_metric_type(self) -> str: - """ - Get the metric type from the distance ops - - """ - distance_ops_to_metric_type_map = { - "<->": "vector_l2_ops", - "<#>": "vector_ip_ops", - "<=>": "vector_cosine_ops", - "<+>": "vector_l1_ops", - "<~>": "bit_hamming_ops", - "<%>": "bit_jaccard_ops", - } - return distance_ops_to_metric_type_map.get(self.distance_op, "vector_cosine_ops") - - def _make_connection_args(self): - cloud_pgvector_url = os.environ.get("KB_PGVECTOR_URL") - # if no connection args and shared pg vector defined - use it - if len(self.connection_args) == 0 and cloud_pgvector_url is not None: - result = urlparse(cloud_pgvector_url) - self.connection_args = { - "host": result.hostname, - "port": result.port, - "user": result.username, - "password": result.password, - "database": result.path[1:], - } - self._is_shared_db = True - return super()._make_connection_args() - - def get_tables(self) -> Response: - # Hide list of tables from all users - if self._is_shared_db: - return Response(RESPONSE_TYPE.OK) - return super().get_tables() - - def query(self, query: ASTNode) -> Response: - # Option to drop table of shared pgvector connection - if isinstance(query, DropTables): - query.tables = [self._check_table(table.parts[-1]) for table in query.tables] - query_str, params = self.renderer.get_exec_params(query, with_failback=True) - return self.native_query(query_str, params, no_restrict=True, stream=False) - return super().query(query) - - def native_query(self, query, params=None, no_restrict=False) -> Response: - """ - Altered `native_query` method of postgres handler. - Restrict usage of native query from executor with shared pg vector connection - Exceptions: if it is used by pgvector itself (with no_restrict = True) - """ - # Prevent execute native queries - if self._is_shared_db and not no_restrict: - return Response(RESPONSE_TYPE.OK) - return super().native_query(query, params=params) - - def raw_query(self, query, params=None) -> Response: - resp = super().native_query(query, params, stream=False) - if resp.resp_type == RESPONSE_TYPE.ERROR: - raise RuntimeError(resp.error_message) - if resp.resp_type == RESPONSE_TYPE.TABLE: - return resp.data_frame - - @profiler.profile() - def connect(self) -> psycopg.connection: - """ - Handles the connection to a PostgreSQL database instance. - """ - self.connection = super().connect() - if self._is_vector_registered: - return self.connection - - with self.connection.cursor() as cur: - try: - # load pg_vector extension - cur.execute("CREATE EXTENSION IF NOT EXISTS vector") - logger.info("pg_vector extension loaded") - - except psycopg.Error as e: - self.connection.rollback() - logger.error(f"Error loading pg_vector extension, ensure you have installed it before running, {e}!") - raise - - # register vector type with psycopg2 connection - register_vector(self.connection) - self._is_vector_registered = True - - return self.connection - - def add_full_text_index(self, table_name: str, column_name: str) -> Response: - """ - Add a full text index to the specified column of the table. - Args: - table_name (str): Name of the table to add the index to. - column_name (str): Name of the column to add the index to. - Returns: - Response: Response object indicating success or failure. - """ - table_name = self._check_table(table_name) - # Quote table name for PostgreSQL - table names may contain special chars like hyphens - query = f'CREATE INDEX IF NOT EXISTS "{table_name}_{column_name}_fts_idx" ON "{table_name}" USING gin(to_tsvector(\'english\', {column_name}))' - self.raw_query(query) - return Response(RESPONSE_TYPE.OK) - - @staticmethod - def _translate_conditions(conditions: List[FilterCondition]) -> Tuple[List[dict], dict]: - """ - Translate filter conditions to a dictionary - """ - - if conditions is None: - conditions = [] - - filter_conditions = [] - embedding_condition = None - - for condition in conditions: - is_embedding = condition.column == "embeddings" - - parts = condition.column.split(".") - key = Identifier(parts[0]) - - # converts 'col.el1.el2' to col->'el1'->>'el2' - if len(parts) > 1: - # intermediate elements - for el in parts[1:-1]: - key = BinaryOperation(op="->", args=[key, Constant(el)]) - - # last element - key = BinaryOperation(op="->>", args=[key, Constant(parts[-1])]) - - type_cast = None - value = condition.value - if ( - isinstance(value, list) - and len(value) > 0 - and condition.op in (FilterOperator.IN, FilterOperator.NOT_IN) - ): - value = condition.value[0] - - if isinstance(value, int): - type_cast = "int" - elif isinstance(value, float): - type_cast = "float" - if type_cast is not None: - key = TypeCast(type_cast, key) - - item = { - "name": key, - "op": condition.op.value, - "value": condition.value, - } - if is_embedding: - embedding_condition = item - else: - filter_conditions.append(item) - - return filter_conditions, embedding_condition - - @staticmethod - def _construct_where_clause(filter_conditions=None): - """ - Construct where clauses from filter conditions - """ - - where_clause = None - - for item in filter_conditions: - key = item["name"] - - if item["op"].lower() in ("in", "not in"): - values = [Constant(i) for i in item["value"]] - value = AstTuple(values) - else: - value = Constant(item["value"]) - condition = BinaryOperation(op=item["op"], args=[key, value]) - - if where_clause is None: - where_clause = condition - else: - where_clause = BinaryOperation(op="AND", args=[where_clause, condition]) - return where_clause - - @staticmethod - def _construct_full_after_from_clause( - where_clause: str, - offset_clause: str, - limit_clause: str, - ) -> str: - return f"{where_clause} {offset_clause} {limit_clause}" - - def _build_keyword_bm25_query( - self, - table_name: str, - keyword_search_args: KeywordSearchArgs, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - limit: int = None, - offset: int = None, - ): - if columns is None: - columns = ["id", "content", "metadata"] - - filter_conditions, _ = self._translate_conditions(conditions) - where_clause = self._construct_where_clause(filter_conditions) - - if keyword_search_args: - keyword_query_condition = BinaryOperation( - op="@@", - args=[ - Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]), - Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]), - ], - ) - - if where_clause: - where_clause = BinaryOperation(op="AND", args=[where_clause, keyword_query_condition]) - else: - where_clause = keyword_query_condition - - distance = Function( - "ts_rank_cd", - args=[ - Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]), - Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]), - ], - alias=Identifier("distance"), - ) - - targets = [Identifier(col) for col in columns] - targets.append(distance) - - limit_clause = Constant(limit) if limit else None - offset_clause = Constant(offset) if offset else None - - return Select( - targets=targets, - from_table=Identifier(table_name), - where=where_clause, - limit=limit_clause, - offset=offset_clause, - ) - - def _build_select_query( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - limit: int = None, - offset: int = None, - ) -> Select: - """ - given inputs, build string query - """ - limit_clause = Constant(limit) if limit else None - offset_clause = Constant(offset) if offset else None - - # translate filter conditions to dictionary - filter_conditions, embedding_search = self._translate_conditions(conditions) - - # given filter conditions, construct where clause - where_clause = self._construct_where_clause(filter_conditions) - - # Handle distance column specially since it's calculated, not stored - modified_columns = [] - has_distance = False - if columns is not None: - for col in columns: - if col == TableField.DISTANCE.value: - has_distance = True - else: - modified_columns.append(col) - else: - modified_columns = ["id", "content", "embeddings", "metadata"] - has_distance = True - - targets = [Identifier(col) for col in modified_columns] - - query = Select( - targets=targets, - from_table=Identifier(table_name), - where=where_clause, - limit=limit_clause, - offset=offset_clause, - ) - - if embedding_search: - search_vector = embedding_search["value"] - - if self._is_sparse: - # Convert dict to sparse vector if needed - if isinstance(search_vector, dict): - from pgvector.utils import SparseVector - - embedding = SparseVector(search_vector, self._vector_size) - search_vector = embedding.to_text() - else: - # Convert list to vector string if needed - if isinstance(search_vector, list): - search_vector = f"[{','.join(str(x) for x in search_vector)}]" - - vector_op = BinaryOperation( - op=self.distance_op, - args=[Identifier("embeddings"), Constant(search_vector)], - alias=Identifier("distance"), - ) - # Calculate distance as part of the query if needed - if has_distance: - query.targets.append(vector_op) - - query.order_by = [OrderBy(vector_op, direction="ASC")] - - return query - - def _check_table(self, table_name: str): - # Apply namespace for a user if the database is shared - if self._is_shared_db: - company_id = ctx.company_id - user_id = ctx.user_id - - # PostgreSQL has a 63-character limit for identifiers. - # Using full UUIDs (36 chars each) would exceed this limit. - # We use a SHA-256 hash of company_id + user_id to create a shorter, unique prefix. - namespace_hash = hashlib.sha256(f"{company_id}_{user_id}".encode()).hexdigest()[:16] - new_table_name = f"t_{namespace_hash}_{table_name}" - - # Backwards compatibility: migrate old tables to the new hashed format - # Old format: t_{company_id}_{table_name} (without user_id) - old_table_name = f"t_{company_id}_{table_name}" - self._migrate_legacy_table(old_table_name, new_table_name) - - return new_table_name - return table_name - - def _migrate_legacy_table(self, old_name: str, new_name: str): - """ - Rename legacy tables from t_{company_id}_{name} to t_{company_id}_{user_id}_{name}. - - Uses an LRU cache to ensure migration is only attempted once per table pair per process, - avoiding repeated database existence checks on every table access. - - Args: - old_name (str): The name of the old table. - new_name (str): The name of the new table. - - Raises: - RuntimeError: If the table rename fails. - """ - cache_key = (old_name, new_name) - - # Check LRU cache - if already checked, return early - if cache_key in PgVectorHandler._migration_checked_cache: - # Move to end (mark as recently used) - PgVectorHandler._migration_checked_cache.move_to_end(cache_key) - return - - # Check if old table exists - # Note: Use %s without quotes - psycopg handles proper escaping for string values - old_exists_df = self.raw_query( - "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = %s)", [[old_name]] - ) - old_exists = old_exists_df.iloc[0, 0] if old_exists_df is not None and not old_exists_df.empty else False - - try: - if old_exists: - # Check if new table already exists - new_exists_df = self.raw_query( - "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = %s)", [[new_name]] - ) - new_exists = ( - new_exists_df.iloc[0, 0] if new_exists_df is not None and not new_exists_df.empty else False - ) - - if not new_exists: - logger.info(f"Migrating legacy pgvector table {old_name} to {new_name}") - rename_query = sql.SQL("ALTER TABLE {} RENAME TO {}").format( - sql.Identifier(f'"{old_name}"'), sql.Identifier(f'"{new_name}"') - ) - self.raw_query(rename_query.as_string(self.connection)) - except Exception: - logger.exception(f"Failed to migrate legacy pgvector table {old_name} to {new_name}") - raise - - # Add to LRU cache with eviction if at capacity - if len(PgVectorHandler._migration_checked_cache) >= PgVectorHandler._MIGRATION_CACHE_MAXSIZE: - PgVectorHandler._migration_checked_cache.popitem(last=False) # Remove oldest - PgVectorHandler._migration_checked_cache[cache_key] = True - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - """ - Retrieve the data from the SQL statement with eliminated rows that dont satisfy the WHERE condition - """ - table_name = self._check_table(table_name) - - if columns is None: - columns = ["id", "content", "embeddings", "metadata"] - - query = self._build_select_query(table_name, columns, conditions, limit, offset) - query_str = self.renderer.get_string(query, with_failback=True) - result = self.raw_query(query_str) - - # ensure embeddings are returned as string so they can be parsed by mindsdb - if "embeddings" in columns: - result["embeddings"] = result["embeddings"].apply(list) - - return result - - def keyword_select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - keyword_search_args: KeywordSearchArgs = None, - ) -> pd.DataFrame: - table_name = self._check_table(table_name) - - if columns is None: - columns = ["id", "content", "embeddings", "metadata"] - - query = self._build_keyword_bm25_query(table_name, keyword_search_args, columns, conditions, limit, offset) - query_str = self.renderer.get_string(query, with_failback=True) - result = self.raw_query(query_str) - - # ensure embeddings are returned as string so they can be parsed by mindsdb - if "embeddings" in columns: - result["embeddings"] = result["embeddings"].astype(str) - - return result - - def create_table(self, table_name: str): - """Create a table with a vector column.""" - - table_name = self._check_table(table_name) - - with self.connection.cursor() as cur: - # For sparse vectors, use sparsevec type - vector_column_type = "sparsevec" if self._is_sparse else "vector" - - # Vector size is required for sparse vectors, optional for dense - if self._is_sparse and not self._vector_size: - raise ValueError("vector_size is required for sparse vectors") - - # Add vector size specification only if provided - size_spec = f"({self._vector_size})" if self._vector_size is not None else "()" - if vector_column_type == "vector": - size_spec = "" - - cur.execute(f""" - CREATE TABLE IF NOT EXISTS "{table_name}" ( - id TEXT PRIMARY KEY, - embeddings {vector_column_type}{size_spec}, - content TEXT, - metadata JSONB - ) - """) - self.connection.commit() - - def insert(self, table_name: str, data: pd.DataFrame): - """ - Insert data into the pgvector table database. - """ - table_name = self._check_table(table_name) - - if "metadata" in data.columns: - data["metadata"] = data["metadata"].apply(json.dumps) - - resp = super().insert(table_name, data) - if resp.resp_type == RESPONSE_TYPE.ERROR: - raise RuntimeError(resp.error_message) - if resp.resp_type == RESPONSE_TYPE.TABLE: - return resp.data_frame - - def update(self, table_name: str, data: pd.DataFrame, key_columns: List[str] = None): - """ - Udate data into the pgvector table database. - """ - table_name = self._check_table(table_name) - - where = None - update_columns = {} - - for col in data.columns: - value = Parameter("%s") - - if col in key_columns: - cond = BinaryOperation(op="=", args=[Identifier(col), value]) - if where is None: - where = cond - else: - where = BinaryOperation(op="AND", args=[where, cond]) - else: - update_columns[col] = value - - query = Update(table=Identifier(table_name), update_columns=update_columns, where=where) - - if TableField.METADATA.value in data.columns: - - def fnc(v): - if isinstance(v, dict): - return json.dumps(v) - - data[TableField.METADATA.value] = data[TableField.METADATA.value].apply(fnc) - - data = data.astype({TableField.METADATA.value: str}) - - transposed_data = [] - for _, record in data.iterrows(): - row = [record[col] for col in update_columns.keys()] - for key_column in key_columns: - row.append(record[key_column]) - transposed_data.append(row) - - query_str = self.renderer.get_string(query) - self.raw_query(query_str, transposed_data) - - def delete(self, table_name: str, conditions: List[FilterCondition] = None): - table_name = self._check_table(table_name) - - filter_conditions, _ = self._translate_conditions(conditions) - where_clause = self._construct_where_clause(filter_conditions) - - query = Delete(table=Identifier(table_name), where=where_clause) - query_str = self.renderer.get_string(query, with_failback=True) - self.raw_query(query_str) - - def drop_table(self, table_name: str, if_exists=True): - """ - Run a drop table query on the pgvector database. - """ - table_name = self._check_table(table_name) - self.raw_query(f'DROP TABLE IF EXISTS "{table_name}"') - - def create_index( - self, - table_name: str, - column_name: str = "embeddings", - index_type: Literal["ivfflat", "hnsw"] = "hnsw", - metric_type: str = None, - ): - """ - Create an index on the pgvector table. - Args: - table_name (str): Name of the table to create the index on. - column_name (str): Name of the column to create the index on. - index_type (str): Type of the index to create. Supported types are 'ivfflat' and 'hnsw'. - metric_type (str): Metric type for the index. Supported types are 'vector_l2_ops', 'vector_ip_ops', and 'vector_cosine_ops'. - """ - if metric_type is None: - metric_type = self.get_metric_type() - # Check if the index type is supported - if index_type not in ["ivfflat", "hnsw"]: - raise ValueError("Invalid index type. Supported types are 'ivfflat' and 'hnsw'.") - table_name = self._check_table(table_name) - # first we make sure embedding dimension is set - # Quote table name for PostgreSQL - table names may contain special chars like hyphens - embedding_dim_size_df = self.raw_query(f'SELECT vector_dims({column_name}) FROM "{table_name}" LIMIT 1') - # check if answer is empty - if embedding_dim_size_df.empty: - raise ValueError("Could not determine embedding dimension size. Make sure that knowledge base isn't empty") - try: - embedding_dim = int(embedding_dim_size_df.iloc[0, 0]) - # alter table to add dimension - self.raw_query(f'ALTER TABLE "{table_name}" ALTER COLUMN {column_name} TYPE vector({embedding_dim})') - except Exception: - raise ValueError("Could not determine embedding dimension size. Make sure that knowledge base isn't empty") - - # Create the index - self.raw_query(f'CREATE INDEX ON "{table_name}" USING {index_type} ({column_name} {metric_type})') diff --git a/mindsdb/integrations/handlers/pgvector_handler/requirements.txt b/mindsdb/integrations/handlers/pgvector_handler/requirements.txt deleted file mode 100644 index 92ec66f21ce..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pgvector==0.3.6 diff --git a/mindsdb/integrations/handlers/pgvector_handler/tests/test_pgvector_handler.py b/mindsdb/integrations/handlers/pgvector_handler/tests/test_pgvector_handler.py deleted file mode 100644 index 178e1bdeb00..00000000000 --- a/mindsdb/integrations/handlers/pgvector_handler/tests/test_pgvector_handler.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import psycopg2 -import pytest - -from mindsdb.integrations.handlers.pgvector_handler.pgvector_handler import PgVectorHandler - - -TEST_DB_NAME = os.environ.get("MDB_TEST_PGVECTOR_DATABASE", "pgvector_handler_test_db") -# Should match table name in data/pgvector/seed.sql -TEST_TABLE_NAME = "items" -# Should match column names in data/pgvector/seed.sql -COLUMN_NAMES = ["id", "content", "embeddings", "metadata"] - -HANDLER_KWARGS = { - "connection_data": { - "host": os.environ.get("MDB_TEST_PGVECTOR_HOST", "127.0.0.1"), - "port": os.environ.get("MDB_TEST_PGVECTOR_PORT", "5432"), - "user": os.environ.get("MDB_TEST_PGVECTOR_USER", "postgres"), - "password": os.environ.get("MDB_TEST_PGVECTOR_PASSWORD", "supersecret"), - "database": TEST_DB_NAME, - } -} - - -def init_db(): - """Seed the test DB with some data""" - conn_info = HANDLER_KWARGS["connection_data"].copy() - conn_info["database"] = "postgres" - db = psycopg2.connect(**conn_info) - db.autocommit = True - cursor = db.cursor() - - try: - cursor.execute(f"DROP DATABASE IF EXISTS {TEST_DB_NAME};") - db.commit() - - # Create the test database - cursor.execute(f"CREATE DATABASE {TEST_DB_NAME};") - db.commit() - - # Reconnect to the new database - conn_info["database"] = TEST_DB_NAME - db = psycopg2.connect(**conn_info) - db.autocommit = True - cursor = db.cursor() - - # Seed the database with data - curr_dir = os.path.dirname(os.path.realpath(__file__)) - seed_sql_path = os.path.join(curr_dir, "data", "pgvector", "seed.sql") - with open(seed_sql_path, "r") as sql_seed_file: - cursor.execute(sql_seed_file.read()) - db.commit() - - finally: - # Close the cursor and the connection - cursor.close() - db.close() - - -@pytest.fixture(scope="module") -def handler(): - init_db() - handler = PgVectorHandler("test_handler", **HANDLER_KWARGS) - yield handler - - -@pytest.mark.skipif( - os.environ.get("MDB_TEST_PGVECTOR_HOST") is None, reason="MDB_TEST_PGVECTOR_HOST environment variable not set" -) -class TestPgvectorConnection: - def test_connect(self, handler): - handler.connect() - assert handler.is_connected, "connection error" - - def test_check_connection(self, handler): - res = handler.check_connection() - assert res.success, res.error_message - - -@pytest.mark.skipif( - os.environ.get("MDB_TEST_PGVECTOR_HOST") is None, reason="MDB_TEST_PGVECTOR_HOST environment variable not set" -) -class TestPgvectorQuery: - def test_select(self, handler): - result = handler.select(TEST_TABLE_NAME) - assert not result.empty - for col in COLUMN_NAMES: - assert col in result.columns diff --git a/mindsdb/integrations/handlers/postgres_handler/README.md b/mindsdb/integrations/handlers/postgres_handler/README.md deleted file mode 100644 index 37eae7236ac..00000000000 --- a/mindsdb/integrations/handlers/postgres_handler/README.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: PostgreSQL -sidebarTitle: PostgreSQL ---- - -This documentation describes the integration of MindsDB with [PostgreSQL](https://www.postgresql.org/), a powerful, open-source, object-relational database system. -The integration allows MindsDB to access data stored in the PostgreSQL database and enhance PostgreSQL with AI capabilities. - -### Prerequisites - -Before proceeding, ensure the following prerequisites are met: - - 1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). - 2. To connect PostgreSQL to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your PostgreSQL database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE postgresql_conn -WITH ENGINE = 'postgres', -PARAMETERS = { - "host": "127.0.0.1", - "port": 5432, - "database": "postgres", - "user": "postgres", - "schema": "data", - "password": "password" -}; -``` - -Required connection parameters include the following: - -* `user`: The username for the PostgreSQL database. -* `password`: The password for the PostgreSQL database. -* `host`: The hostname, IP address, or URL of the PostgreSQL server. -* `port`: The port number for connecting to the PostgreSQL server. -* `database`: The name of the PostgreSQL database to connect to. - -Optional connection parameters include the following: - -* `schema`: The database schema to use. Default is public. -* `sslmode`: The SSL mode for the connection. -* `connection_parameters`: allows passing any PostgreSQL libpq parameters, such as: - * SSL settings: sslrootcert, sslcert, sslkey, sslcrl, sslpassword - * Network and reliability options: connect_timeout, keepalives, keepalives_idle, keepalives_interval, keepalives_count - * Session options: application_name, options, client_encoding - * Any other libpq-supported parameter - -## Usage - -The following usage examples utilize the connection to PostgreSQL made via the `CREATE DATABASE` statement and named `postgresql_conn`. - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM postgresql_conn.table_name -LIMIT 10; -``` - -Run PostgreSQL-native queries directly on the connected PostgreSQL database: - -```sql -SELECT * FROM postgresql_conn ( - - --Native Query Goes Here - SELECT - model, - COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell, - ROUND((CAST(tax AS decimal) / price), 3) AS tax_div_price - FROM used_car_price - -); -``` - - -**Next Steps** - -Follow [this tutorial](https://docs.mindsdb.com/use-cases/predictive_analytics/house-sales-forecasting) to see more use case examples. - - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the PostgreSQL database. -* **Checklist**: - 1. Make sure the PostgreSQL server is active. - 2. Confirm that host, port, user, schema, and password are correct. Try a direct PostgreSQL connection. - 3. Ensure a stable network between MindsDB and PostgreSQL. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - diff --git a/mindsdb/integrations/handlers/postgres_handler/__about__.py b/mindsdb/integrations/handlers/postgres_handler/__about__.py deleted file mode 100644 index cadf136d96b..00000000000 --- a/mindsdb/integrations/handlers/postgres_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB PostgreSQL handler' -__package_name__ = 'mindsdb_postgres_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for PostgreSQL" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/postgres_handler/__init__.py b/mindsdb/integrations/handlers/postgres_handler/__init__.py deleted file mode 100644 index 0a11123446a..00000000000 --- a/mindsdb/integrations/handlers/postgres_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args_example, connection_args - -try: - from .postgres_handler import PostgresHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "PostgreSQL" -name = "postgres" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "support_level", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/postgres_handler/connection_args.py b/mindsdb/integrations/handlers/postgres_handler/connection_args.py deleted file mode 100644 index 1c3a66f3fca..00000000000 --- a/mindsdb/integrations/handlers/postgres_handler/connection_args.py +++ /dev/null @@ -1,65 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the PostgreSQL server.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the PostgreSQL server.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the PostgreSQL server.', - 'required': True, - 'label': 'Database' - }, - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the PostgreSQL server. NOTE: use \'127.0.0.1\' instead of \'localhost\' to connect to local server.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The TCP/IP port of the PostgreSQL server. Must be an integer.', - 'required': True, - 'label': 'Port' - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': 'The schema in which objects are searched first.', - 'required': False, - 'label': 'Schema' - }, - sslmode={ - 'type': ARG_TYPE.STR, - 'description': 'sslmode that will be used for connection.', - 'required': False, - 'label': 'sslmode' - }, - connection_parameters={ - 'type': ARG_TYPE.DICT, - 'description': 'Connection string parameters', - 'required': False, - 'label': 'connection_parameters' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5432, - user='root', - schema='public', - password='password', - database='database' -) diff --git a/mindsdb/integrations/handlers/postgres_handler/icon.svg b/mindsdb/integrations/handlers/postgres_handler/icon.svg deleted file mode 100644 index 89f73173903..00000000000 --- a/mindsdb/integrations/handlers/postgres_handler/icon.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py b/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py deleted file mode 100644 index a3456a8e95a..00000000000 --- a/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +++ /dev/null @@ -1,863 +0,0 @@ -import time -import json -import logging -from typing import Optional, Any, Generator - -import pandas as pd -from pandas import DataFrame -import psycopg -from psycopg import Column as PGColumn, Cursor -from psycopg.postgres import TypeInfo, types as pg_types -from psycopg.pq import ExecStatus - -from mindsdb_sql_parser import parse_sql, Select -from mindsdb_sql_parser.ast.base import ASTNode - -import mindsdb.utilities.profiler as profiler -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.utilities.types.column import Column -from mindsdb.utilities import log -from mindsdb.integrations.libs.base import MetaDatabaseHandler -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, - TableResponse, - OkResponse, - ErrorResponse, - DataHandlerResponse, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.utilities.config import config as mindsdb_config - -logger = log.getLogger(__name__) - -SUBSCRIBE_SLEEP_INTERVAL = 1 - - -def _map_type(internal_type_name: str | None) -> MYSQL_DATA_TYPE: - """Map Postgres types to MySQL types. - - Args: - internal_type_name (str): The name of the Postgres type to map. - - Returns: - MYSQL_DATA_TYPE: The MySQL type that corresponds to the Postgres type. - """ - fallback_type = MYSQL_DATA_TYPE.VARCHAR - - if internal_type_name is None: - return fallback_type - - internal_type_name = internal_type_name.lower() - types_map = { - ("smallint", "smallserial"): MYSQL_DATA_TYPE.SMALLINT, - ("integer", "int", "serial"): MYSQL_DATA_TYPE.INT, - ("bigint", "bigserial"): MYSQL_DATA_TYPE.BIGINT, - ("real", "float"): MYSQL_DATA_TYPE.FLOAT, - ("numeric", "decimal"): MYSQL_DATA_TYPE.DECIMAL, - ("double precision",): MYSQL_DATA_TYPE.DOUBLE, - ("character varying", "varchar"): MYSQL_DATA_TYPE.VARCHAR, - # NOTE: if return chars-types as mysql's CHAR, then response will be padded with spaces, so return as TEXT - ("money", "character", "char", "bpchar", "bpchar", "text"): MYSQL_DATA_TYPE.TEXT, - ("timestamp", "timestamp without time zone", "timestamp with time zone"): MYSQL_DATA_TYPE.DATETIME, - ("date",): MYSQL_DATA_TYPE.DATE, - ("time", "time without time zone", "time with time zone"): MYSQL_DATA_TYPE.TIME, - ("boolean",): MYSQL_DATA_TYPE.BOOL, - ("bytea",): MYSQL_DATA_TYPE.BINARY, - ("json", "jsonb"): MYSQL_DATA_TYPE.JSON, - } - - for db_types_list, mysql_data_type in types_map.items(): - if internal_type_name in db_types_list: - return mysql_data_type - - logger.debug(f"Postgres handler type mapping: unknown type: {internal_type_name}, use VARCHAR as fallback.") - return fallback_type - - -def _get_columns(cursor: Cursor) -> list[Column]: - """Get columns from cursor. - - Args: - cursor (psycopg.Cursor): cursor object. - - Returns: - List of columns - """ - description: list[PGColumn] = cursor.description - mysql_types: list[MYSQL_DATA_TYPE] = [] - for column in description: - if column.type_display == "vector": - # 'vector' is type of pgvector extension, added here as text to not import pgvector - # NOTE: data returned as numpy array - mysql_types.append(MYSQL_DATA_TYPE.VECTOR) - continue - pg_type_info: TypeInfo = pg_types.get(column.type_code) - if pg_type_info is None: - # postgres may return 'polymorphic type', which are not present in the pg_types - # list of 'polymorphic type' can be obtained: - # SELECT oid, typname, typcategory FROM pg_type WHERE typcategory = 'P' ORDER BY oid; - if column.type_code in (2277, 5078): - # anyarray, anycompatiblearray - regtype = "json" - else: - logger.warning(f"Postgres handler: unknown type: {column.type_code}") - mysql_types.append(MYSQL_DATA_TYPE.TEXT) - continue - elif pg_type_info.array_oid == column.type_code: - # it is any array, handle is as json - regtype: str = "json" - else: - regtype: str = pg_type_info.regtype if pg_type_info is not None else None - mysql_type = _map_type(regtype) - mysql_types.append(mysql_type) - - result = [] - for i, column in enumerate(cursor.description): - if mysql_types[i] in ( - MYSQL_DATA_TYPE.SMALLINT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.BIGINT, - MYSQL_DATA_TYPE.TINYINT, - ): - expected_dtype = "Int64" - elif mysql_types[i] in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN): - expected_dtype = "boolean" - else: - expected_dtype = None - result.append( - Column(name=column.name, type=mysql_types[i], original_type=column.type_display, dtype=expected_dtype) - ) - return result - - -def _make_df(result: list[tuple[Any]], columns: list[Column]) -> pd.DataFrame: - """Make pandas DataFrame from result and columns. - - Args: - result (list[tuple[Any]]): result of the query. - columns (list[Column]): list of columns. - - Returns: - pd.DataFrame: pandas DataFrame. - """ - serieses = [] - for i, column in enumerate(columns): - serieses.append(pd.Series([row[i] for row in result], dtype=column.dtype, name=column.name)) - return pd.concat(serieses, axis=1, copy=False) - - -class PostgresHandler(MetaDatabaseHandler): - """ - This handler handles connection and execution of the PostgreSQL statements. - """ - - name = "postgres" - stream_response = True - - @profiler.profile("init_pg_handler") - def __init__(self, name=None, **kwargs): - super().__init__(name) - self.parser = parse_sql - self.connection_args = kwargs.get("connection_data") - self.dialect = "postgresql" - self.database = self.connection_args.get("database") - self.renderer = SqlalchemyRender("postgres") - - self.connection = None - self.is_connected = False - self.cache_thread_safe = True - - def __del__(self): - if self.is_connected: - self.disconnect() - - def _make_connection_args(self): - config = { - "host": self.connection_args.get("host"), - "port": self.connection_args.get("port"), - "user": self.connection_args.get("user"), - "password": self.connection_args.get("password"), - "dbname": self.connection_args.get("database"), - } - - # https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS - connection_parameters = self.connection_args.get("connection_parameters") - if isinstance(connection_parameters, dict) is False: - connection_parameters = {} - if "connect_timeout" not in connection_parameters: - connection_parameters["connect_timeout"] = 10 - config.update(connection_parameters) - - if self.connection_args.get("sslmode"): - config["sslmode"] = self.connection_args.get("sslmode") - - if self.connection_args.get("autocommit"): - config["autocommit"] = self.connection_args.get("autocommit") - - return config - - @profiler.profile() - def connect(self): - """ - Establishes a connection to a PostgreSQL database. - - Raises: - psycopg.Error: If an error occurs while connecting to the PostgreSQL database. - - Returns: - psycopg.Connection: A connection object to the PostgreSQL database. - """ - if self.is_connected: - return self.connection - - config = self._make_connection_args() - try: - self.connection = psycopg.connect(**config) - self.is_connected = True - - schema = self.connection_args.get("schema") - if schema: - with self.connection.cursor() as cur: - cur.execute(f'SET search_path TO "{schema}", public;') - self.connection.commit() - return self.connection - except psycopg.Error as e: - logger.error(f"Error connecting to PostgreSQL {self.database}, {e}!") - self.is_connected = False - raise - - def disconnect(self): - """ - Closes the connection to the PostgreSQL database if it's currently open. - """ - if not self.is_connected: - return - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the PostgreSQL database. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = not self.is_connected - - try: - connection = self.connect() - with connection.cursor() as cur: - # Execute a simple query to test the connection - cur.execute("select 1;") - response.success = True - except psycopg.Error as e: - logger.error(f"Error connecting to PostgreSQL {self.database}, {e}!") - response.error_message = str(e) - - if response.success and need_to_close: - self.disconnect() - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def _cast_dtypes(self, df: DataFrame, description: list) -> DataFrame: - """ - Cast df dtypes basing on postgres types - Note: - Date types casting is not provided because of there is no issues (so far). - By default pandas will cast postgres date types to: - - date -> object - - time -> object - - timetz -> object - - timestamp -> datetime64[ns] - - timestamptz -> datetime64[ns, {tz}] - - Args: - df (DataFrame) - description (list): psycopg cursor description - """ - types_map = { - "int2": "int16", - "int4": "int32", - "int8": "int64", - "numeric": "float64", - "float4": "float32", - "float8": "float64", - } - columns = df.columns - df.columns = list(range(len(columns))) - for column_index, column_name in enumerate(df.columns): - col = df[column_name] - if str(col.dtype) == "object": - pg_type_info: TypeInfo = pg_types.get(description[column_index].type_code) # type_code is int!? - if pg_type_info is not None and pg_type_info.name in types_map: - col = col.fillna(0) # TODO rework - try: - df[column_name] = col.astype(types_map[pg_type_info.name]) - except ValueError as e: - logger.error(f"Error casting column {col.name} to {types_map[pg_type_info.name]}: {e}") - df.columns = columns - - def native_query(self, query: str, params=None, stream: bool = True, **kwargs) -> DataHandlerResponse: - """Executes a SQL query on the PostgreSQL database and returns the result. - NOTE: 'INSERT' (and may be some else) queries can not be executed on the server side, - but there are fallbackto client side execution. - - Args: - query (str): The SQL query to be executed. - params (list): The parameters to be passed to the query. - stream (bool): Whether to stream the results of the query. - **kwargs: Additional keyword arguments. - - Returns: - DataHandlerResponse: A response object containing the result of the query or an error message. - """ - if stream is False: - response = self._execute_client_side(query, params, **kwargs) - elif params is not None: - logger.info("Server side cursor does not support 'fetchmany', executing with client side cursor") - response = self._execute_client_side(query, params, **kwargs) - else: - generator = self._execute_server_side(query, **kwargs) - try: - response: TableResponse = next(generator) - response.data_generator = generator - except StopIteration as e: - response = e.value - if isinstance(response, DataHandlerResponse) is False: - raise - return response - - def _execute_client_side(self, query: str, params=None, **kwargs) -> TableResponse | OkResponse | ErrorResponse: - """Executes a SQL query on the PostgreSQL database and returns the result. - - Args: - query (str): The SQL query to be executed. - params (list): The parameters to be passed to the query. - **kwargs: Additional keyword arguments. - - Returns: - TableResponse | OkResponse | ErrorResponse: A response object containing the result of the query or an error message. - """ - connection = self.connect() - with connection.cursor() as cur: - try: - if params is not None: - cur.executemany(query, params) - else: - cur.execute(query) - if cur.pgresult is None or ExecStatus(cur.pgresult.status) == ExecStatus.COMMAND_OK: - response = OkResponse(affected_rows=cur.rowcount) - else: - result = cur.fetchall() - columns: list[Column] = _get_columns(cur) - response = TableResponse( - affected_rows=cur.rowcount, columns=columns, data=_make_df(result, columns) - ) - connection.commit() - except Exception as e: - response = self._handle_query_exception(e, query, connection) - - return response - - def _execute_server_side( - self, query: str, **kwargs - ) -> Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: - """Execute a SQL query on the PostgreSQL database and return a generator of data frames. - - Args: - query (str): The SQL query to be executed. - params (list): The parameters to be passed to the query. - **kwargs: Additional keyword arguments. - - Returns: - Generator[TableResponse | pd.DataFrame, None, OkResponse | ErrorResponse]: Generator of data frames. - """ - connection = self.connect() - with connection.cursor(name=f"mindsdb_{id(self)}") as cursor: - try: - try: - cursor.execute(query) - except psycopg.errors.SyntaxError as e: - # NOTE: INSERT queries cannot be executed server-side. When they fail, they produce a syntax error - # that always starts with the text below, regardless of the INSERT query format. - lower_e = str(e).lower() - if not lower_e.startswith('syntax error at or near "insert"') and not lower_e.startswith( - 'syntax error at or near "drop"' - ): - raise - connection.rollback() - return self._execute_client_side(query=query) - - if cursor.description is None: - connection.commit() - return OkResponse(affected_rows=cursor.rowcount) - - columns: list[Column] = _get_columns(cursor) - yield TableResponse(affected_rows=cursor.rowcount, columns=columns) - while result := cursor.fetchmany(size=mindsdb_config["data_stream"]["fetch_size"]): - yield _make_df(result, columns) - connection.commit() - except Exception as e: - return self._handle_query_exception(e, query, connection) - - def _handle_query_exception(self, e: Exception, query: str, connection) -> ErrorResponse: - """Handle query execution errors with appropriate logging and rollback. - - Args: - e: The exception that was raised - query: The SQL query that failed - connection: The database connection to rollback - - Returns: - ErrorResponse with appropriate error details - """ - if isinstance(e, (psycopg.ProgrammingError, psycopg.DataError)): - # These are 'expected' exceptions, they should not be treated as mindsdb's errors - # ProgrammingError: table not found or already exists, syntax error, etc - # DataError: division by zero, numeric value out of range, etc. - # https://www.psycopg.org/psycopg3/docs/api/errors.html - log_message = "Database query failed with error, likely due to invalid SQL query" - if logger.isEnabledFor(logging.DEBUG): - log_message += f". Executed query:\n{query}" - logger.info(log_message) - connection.rollback() - return ErrorResponse(error_code=0, error_message=str(e), is_expected_error=True) - else: - logger.error(f"Error running query:\n{query}\non {self.database}, {e}") - connection.rollback() - return ErrorResponse(error_code=0, error_message=str(e)) - - def insert(self, table_name: str, df: pd.DataFrame) -> Response: - need_to_close = not self.is_connected - - connection = self.connect() - - columns = df.columns - - resp = self.get_columns(table_name) - - # copy requires precise cases of names: get current column names from table and adapt input dataframe columns - if resp.data_frame is not None and not resp.data_frame.empty: - db_columns = {c.lower(): c for c in resp.data_frame["COLUMN_NAME"]} - - # try to get case of existing column - columns = [db_columns.get(c.lower(), c) for c in columns] - - columns = [f'"{c}"' for c in columns] - rowcount = None - - with connection.cursor() as cur: - try: - with cur.copy(f'copy "{table_name}" ({",".join(columns)}) from STDIN WITH CSV') as copy: - df.to_csv(copy, index=False, header=False) - - connection.commit() - except Exception as e: - logger.error(f"Error running insert to {table_name} on {self.database}, {e}!") - connection.rollback() - raise e - rowcount = cur.rowcount - - if need_to_close: - self.disconnect() - - return Response(RESPONSE_TYPE.OK, affected_rows=rowcount) - - @profiler.profile() - def query(self, query: ASTNode) -> DataHandlerResponse: - """ - Executes a SQL query represented by an ASTNode and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - DataHandlerResponse: The response from the `native_query` method, - containing the result of the SQL query execution. - """ - query_str, params = self.renderer.get_exec_params(query, with_failback=True) - logger.debug(f"Executing SQL query: {query_str}") - support_stream = isinstance(query, Select) - return self.native_query(query_str, params, stream=support_stream) - - def get_tables(self, all: bool = False) -> Response: - """ - Retrieves a list of all non-system tables and views in the current schema of the PostgreSQL database. - - Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. - """ - all_filter = "and table_schema = current_schema()" - if all is True: - all_filter = "" - query = f""" - SELECT - table_schema, - table_name, - table_type - FROM - information_schema.tables - WHERE - table_schema NOT IN ('information_schema', 'pg_catalog') - and table_type in ('BASE TABLE', 'VIEW') - {all_filter} - """ - return self.native_query(query) - - def get_columns(self, table_name: str, schema_name: Optional[str] = None) -> Response: - """ - Retrieves column details for a specified table in the PostgreSQL database. - - Args: - table_name (str): The name of the table for which to retrieve column information. - schema_name (str): The name of the schema in which the table is located. - - Returns: - Response: A response object containing the column details, formatted as per the `Response` class. - - Raises: - ValueError: If the 'table_name' is not a valid string. - """ - - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - if isinstance(schema_name, str): - schema_name = f"'{schema_name}'" - else: - schema_name = "current_schema()" - query = f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM - information_schema.columns - WHERE - table_name = '{table_name}' - AND - table_schema = {schema_name} - """ - # If it is used by pgvector handler - `native_query` method of pgvector handler will be used - # in that case if shared pgvector db is used - `native_query` will be skipped (return empty result) - # `no_restrict` flag allows to execute native query, and it will call `native_query` of postgres handler - result = self.native_query(query, no_restrict=True) - result.to_columns_table_response(map_type_fn=_map_type) - return result - - def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs): - config = self._make_connection_args() - config["autocommit"] = True - - conn = psycopg.connect(**config) - - # create db trigger - trigger_name = f"mdb_notify_{table_name}" - - before, after = "", "" - - if columns: - # check column exist - conn.execute(f"select {','.join(columns)} from {table_name} limit 0") - - columns = set(columns) - trigger_name += "_" + "_".join(columns) - - news, olds = [], [] - for column in columns: - news.append(f"NEW.{column}") - olds.append(f"OLD.{column}") - - before = f"IF ({', '.join(news)}) IS DISTINCT FROM ({', '.join(olds)}) then\n" - after = "\nEND IF;" - else: - columns = set() - - func_code = f""" - CREATE OR REPLACE FUNCTION {trigger_name}() - RETURNS trigger AS $$ - DECLARE - BEGIN - {before} - PERFORM pg_notify( '{trigger_name}', row_to_json(NEW)::text); - {after} - RETURN NEW; - END; - $$ LANGUAGE plpgsql; - """ - conn.execute(func_code) - - # for after update - new and old have the same values - conn.execute(f""" - CREATE OR REPLACE TRIGGER {trigger_name} - BEFORE INSERT OR UPDATE ON {table_name} - FOR EACH ROW - EXECUTE PROCEDURE {trigger_name}(); - """) - conn.commit() - - # start listen - conn.execute(f"LISTEN {trigger_name};") - - def process_event(event): - try: - row = json.loads(event.payload) - except json.JSONDecodeError: - return - - # check column in input data - if not columns or columns.intersection(row.keys()): - callback(row) - - try: - conn.add_notify_handler(process_event) - - while True: - if stop_event.is_set(): - # exit trigger - return - - # trigger getting updates - # https://www.psycopg.org/psycopg3/docs/advanced/async.html#asynchronous-notifications - conn.execute("SELECT 1").fetchone() - - time.sleep(SUBSCRIBE_SLEEP_INTERVAL) - - finally: - conn.execute(f"drop TRIGGER {trigger_name} on {table_name}") - conn.execute(f"drop FUNCTION {trigger_name}") - conn.commit() - - conn.close() - - def meta_get_tables(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves metadata information about the tables in the PostgreSQL database to be stored in the data catalog. - - Args: - table_names (list): A list of table names for which to retrieve metadata information. - - Returns: - Response: A response object containing the metadata information, formatted as per the `Response` class. - """ - query = """ - SELECT - t.table_name, - t.table_schema, - t.table_type, - obj_description(pgc.oid, 'pg_class') AS table_description, - pgc.reltuples AS row_count - FROM information_schema.tables t - JOIN pg_catalog.pg_namespace pgn - ON pgn.nspname = t.table_schema - JOIN pg_catalog.pg_class pgc - ON pgc.relname = t.table_name - AND pgc.relnamespace = pgn.oid - WHERE t.table_schema = current_schema() - AND t.table_type in ('BASE TABLE', 'VIEW') - AND t.table_name NOT LIKE 'pg_%' - AND t.table_name NOT LIKE 'sql_%' - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" AND t.table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result - - def meta_get_columns(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - query = """ - SELECT - c.table_name, - c.column_name, - c.data_type, - col_description(pgc.oid, c.ordinal_position) AS column_description, - c.column_default, - (c.is_nullable = 'YES') AS is_nullable - FROM information_schema.columns c - JOIN pg_namespace pgn ON pgn.nspname = c.table_schema - JOIN pg_class pgc - ON pgc.relname = c.table_name - AND pgc.relnamespace = pgn.oid - WHERE c.table_schema = current_schema() - AND pgc.relkind = 'r' -- Only consider regular tables (avoids indexes, sequences, etc.) - AND c.table_name NOT LIKE 'pg_%' - AND c.table_name NOT LIKE 'sql_%' - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" AND c.table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result - - def meta_get_column_statistics(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves column statistics (e.g., most common values, frequencies, null percentage, and distinct value count) - for the specified tables or all tables if no list is provided. - - Args: - table_names (list): A list of table names for which to retrieve column statistics. - - Returns: - Response: A response object containing the column statistics. - """ - table_filter = "" - if table_names is not None and len(table_names) > 0: - quoted_names = [f"'{t}'" for t in table_names] - table_filter = f" AND ps.tablename IN ({','.join(quoted_names)})" - - query = ( - """ - SELECT - ps.tablename AS TABLE_NAME, - ps.attname AS COLUMN_NAME, - ROUND(ps.null_frac::numeric * 100, 2) AS NULL_PERCENTAGE, - CASE - WHEN ps.n_distinct < 0 THEN NULL - ELSE ps.n_distinct::bigint - END AS DISTINCT_VALUES_COUNT, - ps.most_common_vals AS MOST_COMMON_VALUES, - ps.most_common_freqs AS MOST_COMMON_FREQUENCIES, - ps.histogram_bounds - FROM pg_stats ps - WHERE ps.schemaname = current_schema() - AND ps.tablename NOT LIKE 'pg_%' - AND ps.tablename NOT LIKE 'sql_%' - """ - + table_filter - + """ - ORDER BY ps.tablename, ps.attname - """ - ) - - result = self.native_query(query) - - if result.type != RESPONSE_TYPE.TABLE or result.data_frame is None: - return result - - df = result.data_frame - - # Extract min/max from histogram bounds - def extract_min_max(histogram_str): - if histogram_str and str(histogram_str) != "nan": - clean = str(histogram_str).strip("{}") - if clean: - values = clean.split(",") - min_val = values[0].strip(" \"'") if values else None - max_val = values[-1].strip(" \"'") if values else None - return min_val, max_val - return None, None - - min_max_values = df["histogram_bounds"].apply(extract_min_max) - df["MINIMUM_VALUE"] = min_max_values.apply(lambda x: x[0]) - df["MAXIMUM_VALUE"] = min_max_values.apply(lambda x: x[1]) - - # Convert most_common_values and most_common_freqs to arrays. - df["MOST_COMMON_VALUES"] = df["most_common_values"].apply( - lambda x: x.strip("{}").split(",") if isinstance(x, str) else [] - ) - df["MOST_COMMON_FREQUENCIES"] = df["most_common_frequencies"].apply( - lambda x: x.strip("{}").split(",") if isinstance(x, str) else [] - ) - - result.data_frame = df.drop(columns=["histogram_bounds", "most_common_values", "most_common_frequencies"]) - - return result - - def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves primary key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve primary key information. - - Returns: - Response: A response object containing the primary key information. - """ - query = """ - SELECT - tc.table_name, - kcu.column_name, - kcu.ordinal_position, - tc.constraint_name - FROM - information_schema.table_constraints AS tc - JOIN information_schema.key_column_usage AS kcu - ON tc.constraint_name = kcu.constraint_name - AND tc.constraint_schema = kcu.constraint_schema - AND tc.table_schema = kcu.table_schema - WHERE - tc.constraint_type = 'PRIMARY KEY' - AND tc.table_schema = current_schema() - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" AND tc.table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result - - def meta_get_foreign_keys(self, table_names: Optional[list] = None) -> Response: - """ - Retrieves foreign key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve foreign key information. - - Returns: - Response: A response object containing the foreign key information. - """ - query = """ - SELECT - ccu.table_name AS parent_table_name, - ccu.column_name AS parent_column_name, - tc.table_name AS child_table_name, - kcu.column_name AS child_column_name, - tc.constraint_name - FROM - information_schema.table_constraints AS tc - JOIN information_schema.key_column_usage AS kcu - ON tc.constraint_name = kcu.constraint_name - AND tc.constraint_schema = kcu.constraint_schema - AND tc.table_schema = kcu.table_schema - JOIN information_schema.constraint_column_usage AS ccu - ON ccu.constraint_name = tc.constraint_name - AND ccu.constraint_schema = tc.constraint_schema - WHERE - tc.constraint_type = 'FOREIGN KEY' - AND tc.table_schema = current_schema() - """ - - if table_names is not None and len(table_names) > 0: - table_names = [f"'{t}'" for t in table_names] - query += f" AND tc.table_name IN ({','.join(table_names)})" - - result = self.native_query(query) - return result diff --git a/mindsdb/integrations/handlers/redshift_handler/README.md b/mindsdb/integrations/handlers/redshift_handler/README.md deleted file mode 100644 index 777a72bacd6..00000000000 --- a/mindsdb/integrations/handlers/redshift_handler/README.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: Amazon Redshift -sidebarTitle: Amazon Redshift ---- - -This documentation describes the integration of MindsDB with [Amazon Redshift](https://docs.aws.amazon.com/redshift/latest/mgmt/welcome.html), a fully managed, petabyte-scale data warehouse service in the cloud. You can start with just a few hundred gigabytes of data and scale to a petabyte or more, enabling you to use your data to acquire new insights for your business and customers. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Redshift to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to your Redshift database from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE redshift_datasource -WITH - engine = 'redshift', - parameters = { - "host": "examplecluster.abc123xyz789.us-west-1.redshift.amazonaws.com", - "port": 5439, - "database": "example_db", - "user": "awsuser", - "password": "my_password" - }; -``` - -Required connection parameters include the following: - -* `host`: The host name or IP address of the Redshift cluster. -* `port`: The port to use when connecting with the Redshift cluster. -* `database`: The database name to use when connecting with the Redshift cluster. -* `user`: The username to authenticate the user with the Redshift cluster. -* `password`: The password to authenticate the user with the Redshift cluster. - -Optional connection parameters include the following: - -* `schema`: The database schema to use. Default is public. -* `sslmode`: The SSL mode for the connection. - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM redshift_datasource.schema_name.table_name -LIMIT 10; -``` - -Run Amazon Redshift SQL queries directly on the connected Redshift database: - -```sql -SELECT * FROM redshift_datasource ( - - --Native Query Goes Here - WITH VENUECOPY AS (SELECT * FROM VENUE) - SELECT * FROM VENUECOPY ORDER BY 1 LIMIT 10; - -); -``` - - -The above examples utilize `redshift_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Amazon Redshift cluster. -* **Checklist**: - 1. Make sure the Redshift cluster is active. - 2. Confirm that host, port, user, password and database are correct. Try a direct Redshift connection using a client like DBeaver. - 3. Ensure that the security settings of the Redshift cluster allow connections from MindsDB. - 4. Ensure a stable network between MindsDB and Redshift. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - - -This [troubleshooting guide](https://docs.aws.amazon.com/redshift/latest/mgmt/troubleshooting-connections.html) provided by AWS might also be helpful. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/redshift_handler/__about__.py b/mindsdb/integrations/handlers/redshift_handler/__about__.py deleted file mode 100644 index 437cfbd6440..00000000000 --- a/mindsdb/integrations/handlers/redshift_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Redshift handler' -__package_name__ = 'mindsdb_redshift_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Redshift" -__author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/redshift_handler/__init__.py b/mindsdb/integrations/handlers/redshift_handler/__init__.py deleted file mode 100644 index d1a8abdb3c8..00000000000 --- a/mindsdb/integrations/handlers/redshift_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .redshift_handler import RedshiftHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Amazon Redshift" -name = "redshift" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "support_level", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/redshift_handler/connection_args.py b/mindsdb/integrations/handlers/redshift_handler/connection_args.py deleted file mode 100644 index d3a30ca1f56..00000000000 --- a/mindsdb/integrations/handlers/redshift_handler/connection_args.py +++ /dev/null @@ -1,58 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the Redshift cluster.', - 'required': True, - 'label': 'Host' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'The port to use when connecting with the Redshift cluster.', - 'required': True, - 'label': 'Port' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': 'The database name to use when connecting with the Redshift cluster.', - 'required': True, - 'label': 'Database' - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the Redshift cluster.', - 'required': True, - 'label': 'User' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password to authenticate the user with the Redshift cluster.', - 'required': True, - 'label': 'Password', - 'secret': True - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': 'The schema in which objects are searched first.', - 'required': False, - 'label': 'Schema' - }, - sslmode={ - 'type': ARG_TYPE.STR, - 'description': 'The SSL mode that will be used for connection.', - 'required': False, - 'label': 'sslmode' - } -) - -connection_args_example = OrderedDict( - host='examplecluster.abc123xyz789.us-west-1.redshift.amazonaws.com', - port='5439', - database='dev', - user='awsuser', - password='my_password' -) diff --git a/mindsdb/integrations/handlers/redshift_handler/icon.svg b/mindsdb/integrations/handlers/redshift_handler/icon.svg deleted file mode 100644 index 8843003c292..00000000000 --- a/mindsdb/integrations/handlers/redshift_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/redshift_handler/redshift_handler.py b/mindsdb/integrations/handlers/redshift_handler/redshift_handler.py deleted file mode 100644 index 1e781723505..00000000000 --- a/mindsdb/integrations/handlers/redshift_handler/redshift_handler.py +++ /dev/null @@ -1,70 +0,0 @@ -import os -import numpy as np -import pandas as pd - -from mindsdb.utilities import log -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - RESPONSE_TYPE -) -from mindsdb.integrations.handlers.postgres_handler.postgres_handler import PostgresHandler - -logger = log.getLogger(__name__) -os.environ["PGCLIENTENCODING"] = "utf-8" - - -class RedshiftHandler(PostgresHandler): - """ - This handler handles connection and execution of the Redshift statements. - """ - - name = 'redshift' - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance. - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name, **kwargs) - - def insert(self, table_name: str, df: pd.DataFrame): - """ - Handles the execution of INSERT statements. - - Args: - table_name (str): name of the table to insert the data into. - df (pd.DataFrame): data to be inserted into the table. - """ - need_to_close = not self.is_connected - - connection = self.connect() - - # Replace NaN values with None - df = df.replace({np.nan: None}) - - # Build the query to insert the data - columns = ', '.join([f'"{col}"' if ' ' in col else col for col in df.columns]) - values = ', '.join(['%s' for _ in range(len(df.columns))]) - query = f'INSERT INTO {table_name} ({columns}) VALUES ({values})' - - with connection.cursor() as cur: - try: - cur.executemany(query, df.values.tolist()) - response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount) - - connection.commit() - except Exception as e: - logger.error(f"Error inserting data into {table_name}, {e}!") - connection.rollback() - response = Response( - RESPONSE_TYPE.ERROR, - error_code=0, - error_message=str(e) - ) - - if need_to_close: - self.disconnect() - - return response diff --git a/mindsdb/integrations/handlers/rest_api_handler/README.md b/mindsdb/integrations/handlers/rest_api_handler/README.md deleted file mode 100644 index 037d6ff429a..00000000000 --- a/mindsdb/integrations/handlers/rest_api_handler/README.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -title: REST API -sidebarTitle: REST API ---- - -This documentation describes the integration of MindsDB with generic REST APIs using bearer-token authentication. -The integration allows MindsDB to forward HTTP requests to any REST API using stored credentials via the passthrough endpoint — no SQL table mapping required. - -### Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. Obtain a bearer token (API key, personal access token, etc.) for the target REST API. - -## Connection - -Establish a connection to a REST API from MindsDB by executing the following SQL command: - -```sql -CREATE DATABASE my_api -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://api.example.com", - "bearer_token": "your_token_here" -}; -``` - -Required connection parameters include the following: - -* `base_url`: The base URL of the REST API (e.g. `https://api.example.com`). All passthrough request paths are appended to this URL. -* `bearer_token`: The bearer token used for authentication. Injected as `Authorization: Bearer ` on every request. - -Optional connection parameters include the following: - -* `default_headers`: A JSON object of static headers added to every request (e.g. `{"Accept": "application/json"}`). -* `allowed_hosts`: A list of allowed hostnames for passthrough requests. Defaults to the hostname of `base_url`. Use `["*"]` to disable host containment. -* `test_path`: The path used by the `/passthrough/test` endpoint to verify connectivity. Defaults to `/`. - -### Examples - -Connect to the HubSpot API: - -```sql -CREATE DATABASE my_hubspot -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://api.hubapi.com", - "bearer_token": "pat-eu1-..." -}; -``` - -Connect to a custom internal API with default headers: - -```sql -CREATE DATABASE my_internal_api -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://internal.example.com/api/v2", - "bearer_token": "sk-...", - "default_headers": {"Accept": "application/json", "X-Team": "data"}, - "test_path": "/health" -}; -``` - -Connect to an API with multiple allowed hosts: - -```sql -CREATE DATABASE my_multi_region_api -WITH ENGINE = 'rest_api', -PARAMETERS = { - "base_url": "https://api.example.com", - "bearer_token": "your_token", - "allowed_hosts": ["api.example.com", "api.eu.example.com"] -}; -``` - -## Usage - -This handler is **passthrough-only** — it does not expose SQL tables. All interaction is through the REST passthrough endpoint. - -### Passthrough Requests - -Send HTTP requests to the upstream API through MindsDB: - -``` -POST /api/integrations/my_api/passthrough -``` - -```json -{ - "method": "GET", - "path": "/v1/users", - "query": {"limit": "10"}, - "headers": {"Accept": "application/json"} -} -``` - -The response wraps the upstream HTTP response: - -```json -{ - "status_code": 200, - "headers": {"content-type": "application/json"}, - "body": {"results": [...]}, - "content_type": "application/json" -} -``` - -Supported HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`. - -### Testing the Connection - -Verify that the base URL, token, and host allowlist are configured correctly: - -``` -POST /api/integrations/my_api/passthrough/test -``` - -Returns: - -```json -{"ok": true, "status_code": 200, "host": "api.example.com", "latency_ms": 140} -``` - -Or on failure: - -```json -{"ok": false, "error_code": "auth_failed", "message": "upstream rejected credentials; base URL and allowlist look correct"} -``` - -## Security - -- **Credentials are never exposed.** The bearer token is stored in MindsDB and injected at request time. It is never returned to the caller. -- **Host containment.** Requests are restricted to hostnames in the allowlist (defaults to the `base_url` host). Private/loopback IP addresses are rejected by default. -- **Header filtering.** Callers cannot override `Authorization`, `Host`, `Cookie`, or `Proxy-*` headers. -- **Response scrubbing.** If the upstream API echoes the token in responses, it is replaced with `[REDACTED_API_KEY]` before returning to the caller. -- **Size limits.** Request bodies are capped at 1 MB, response bodies at 10 MB (configurable via environment variables). - -## Troubleshooting - - -`base_url is not configured` - -* **Symptoms**: Passthrough requests fail with a configuration error. -* **Checklist**: - 1. Ensure `base_url` is provided in the connection parameters. - 2. The URL must include the scheme (`https://`). - - - -`host 'X' is not in the datasource allowlist` - -* **Symptoms**: Passthrough requests to a valid URL are rejected. -* **Checklist**: - 1. The request path may resolve to a different hostname than `base_url`. - 2. Add the hostname to `allowed_hosts` in the connection parameters. - 3. Use `["*"]` to disable host containment (not recommended for production). - - - -`upstream rejected credentials (401/403)` - -* **Symptoms**: The `/passthrough/test` endpoint returns `error_code: "auth_failed"`. -* **Checklist**: - 1. Verify the bearer token is valid and not expired. - 2. Check that the token has the required scopes/permissions for the API endpoints you are calling. - diff --git a/mindsdb/integrations/handlers/rest_api_handler/__about__.py b/mindsdb/integrations/handlers/rest_api_handler/__about__.py deleted file mode 100644 index b7f131f401c..00000000000 --- a/mindsdb/integrations/handlers/rest_api_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB REST API handler" -__package_name__ = "mindsdb_rest_api_handler" -__version__ = "0.0.1" -__description__ = "MindsDB handler for generic REST APIs with bearer-token passthrough" -__author__ = "MindsDB Inc" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2026 - mindsdb" diff --git a/mindsdb/integrations/handlers/rest_api_handler/__init__.py b/mindsdb/integrations/handlers/rest_api_handler/__init__.py deleted file mode 100644 index d9f8fcf24eb..00000000000 --- a/mindsdb/integrations/handlers/rest_api_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .rest_api_handler import RestApiHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "REST API" -name = "rest_api" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", - "connection_args", - "connection_args_example", -] diff --git a/mindsdb/integrations/handlers/rest_api_handler/connection_args.py b/mindsdb/integrations/handlers/rest_api_handler/connection_args.py deleted file mode 100644 index bba20202ba0..00000000000 --- a/mindsdb/integrations/handlers/rest_api_handler/connection_args.py +++ /dev/null @@ -1,44 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - -connection_args = OrderedDict( - base_url={ - "type": ARG_TYPE.STR, - "description": "Base URL of the REST API (e.g. https://api.example.com)", - "required": True, - "label": "Base URL", - }, - bearer_token={ - "type": ARG_TYPE.PWD, - "description": "Bearer token injected as Authorization: Bearer ", - "required": True, - "label": "Bearer Token", - "secret": True, - }, - default_headers={ - "type": ARG_TYPE.DICT, - "description": 'Static headers added to every request (e.g. {"Accept": "application/json"})', - "required": False, - "label": "Default Headers", - }, - allowed_hosts={ - "type": ARG_TYPE.LIST, - "description": 'Allowed hostnames for passthrough requests. Defaults to the base_url host. Use ["*"] to disable containment.', - "required": False, - "label": "Allowed Hosts", - }, - test_path={ - "type": ARG_TYPE.STR, - "description": "Path used by the /passthrough/test endpoint. Defaults to /", - "required": False, - "label": "Test Path", - }, -) - -connection_args_example = OrderedDict( - base_url="https://api.example.com", - bearer_token="your_token_here", - default_headers={"Accept": "application/json"}, - allowed_hosts=["api.example.com"], -) diff --git a/mindsdb/integrations/handlers/rest_api_handler/icon.svg b/mindsdb/integrations/handlers/rest_api_handler/icon.svg deleted file mode 100644 index 2346f8d4d3e..00000000000 --- a/mindsdb/integrations/handlers/rest_api_handler/icon.svg +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py b/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py deleted file mode 100644 index a55ad7c1f4d..00000000000 --- a/mindsdb/integrations/handlers/rest_api_handler/rest_api_handler.py +++ /dev/null @@ -1,87 +0,0 @@ -from typing import Any - -from mindsdb.integrations.libs.api_handler import APIHandler -from mindsdb.integrations.libs.passthrough import PassthroughMixin -from mindsdb.integrations.libs.passthrough_types import PassthroughRequest -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class RestApiHandler(APIHandler, PassthroughMixin): - """Generic REST API handler — passthrough only, no SQL tables. - - This is the "bring your own URL" escape hatch for any bearer-token API - that mindsdb doesn't have a named handler for. Users supply a base_url - and a bearer_token and get full passthrough access. - """ - - name = "rest_api" - - def __init__(self, name: str, **kwargs: Any) -> None: - super().__init__(name) - self.connection_data = kwargs.get("connection_data") or {} - self.kwargs = kwargs - self.is_connected = False - - # PassthroughMixin reads these instance attributes at runtime. - self._bearer_token_arg = "bearer_token" - self._base_url_default = None # user must supply base_url - - # Build the test request from connection_data. Default to GET / - # unless the user provided a custom test_path. - test_path = self.connection_data.get("test_path", "/") - if not test_path.startswith("/"): - test_path = f"/{test_path}" - self._test_request = PassthroughRequest(method="GET", path=test_path) - - def connect(self) -> None: - """No persistent connection needed — passthrough is stateless. - - Validation happens in check_connection(), which we - call separately during CREATE DATABASE. - """ - self.is_connected = True - - def check_connection(self) -> StatusResponse: - """Validate that base_url and bearer_token are present.""" - response = StatusResponse(False) - try: - base_url = self._build_base_url() - if not base_url: - response.error_message = "base_url is required" - return response - token = self.connection_data.get(self._bearer_token_arg) - if not token: - response.error_message = "bearer_token is required" - return response - response.success = True - self.is_connected = True - except Exception as e: - response.error_message = str(e) - return response - - def native_query(self, query: str) -> Response: - """Not supported — use passthrough instead.""" - return Response( - RESPONSE_TYPE.ERROR, - error_message="rest_api handler is passthrough-only. Use the /passthrough endpoint.", - ) - - def get_tables(self) -> Response: - """No SQL tables — passthrough only.""" - import pandas as pd - - return Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame()) - - def get_columns(self, table_name: str) -> Response: - """No SQL tables — passthrough only.""" - return Response( - RESPONSE_TYPE.ERROR, - error_message="rest_api handler is passthrough-only. No tables available.", - ) diff --git a/mindsdb/integrations/handlers/salesforce_handler/README.md b/mindsdb/integrations/handlers/salesforce_handler/README.md deleted file mode 100644 index 5616918ef4f..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/README.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: Salesforce -sidebarTitle: Salesforce ---- - -This documentation describes the integration of MindsDB with [Salesforce](https://www.salesforce.com/), the world’s most trusted customer relationship management (CRM) platform. -The integration allows MindsDB to access data from Salesforce and enhance it with AI capabilities. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). -2. To connect Salesforce to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to Salesforce from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/salesforce_handler) as an engine. - -```sql -CREATE DATABASE salesforce_datasource -WITH - ENGINE = 'salesforce', - PARAMETERS = { - "username": "demo@example.com", - "password": "demo_password", - "client_id": "3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - "client_secret": "5A52C1A1E21DF9012IODC9ISNXXAADDA9" - }; -``` - -Required connection parameters include the following: - -* `username`: The username for the Salesforce account. -* `password`: The password for the Salesforce account. -* `client_id`: The client ID (consumer key) from a connected app in Salesforce. -* `client_secret`: The client secret (consumer secret) from a connected app in Salesforce. - -Optional connection parameters include the following: - -* `is_sandbox`: The setting to indicate whether to connect to a Salesforce sandbox environment (`true`) or production environment (`false`). This parameter defaults to `false`. - - -To create a connected app in Salesforce and obtain the client ID and client secret, follow the steps given below: -1. Log in to your Salesforce account. -2. Go to `Settings` > `Open Advanced Setup` > `Apps` > `App Manager`. -3. Click `New Connected App`, select `Create a Connected App` and click `Continue`. -4. Fill in the required details, i.e., `Connected App Name`, `API Name` and `Contact Phone`. -5. Select the `Enable OAuth Settings` checkbox, set the `Callback URL` to wherever MindsDB is deployed followed by `/verify-auth` (e.g., `http://localhost:47334/verify-auth`), and choose the following OAuth scopes: - - Manage user data via APIs (api) - - Perform requests at any time (refresh_token, offline_access) -6. Click `Save` and then `Continue`. -7. Click on `Manage Consumer Details` under `API (Enable OAuth Settings)`, and copy the Consumer Key (client ID) and Consumer Secret (client secret). -8. Click on `Back to Manage Connected Apps` and then `Manage`. -9. Click `Edit Policies`. -10. Under `OAuth Policies`, configure the `Permitted Users` and `IP Relaxation` settings according to your security policies. For example, to enable all users to access the app without enforcing any IP restrictions, select `All users may self-authorize` and `Relax IP restrictions` respectively. Leave the `Refresh Token Policy` set to `Refresh token is valid until revoked`. -11. Click `Save`. -12. Go to `Identity` > `OAuth and OpenID Connect Settings`. -13. Ensure that the `Allow OAuth Username-Password Flows` checkbox is checked. - - -## Usage - -Retrieve data from a specified table by providing the integration and table names: - -```sql -SELECT * -FROM salesforce_datasource.table_name -LIMIT 10; -``` - -Run [SOQL](https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql.htm) queries directly on the connected Salesforce account: - -```sql -SELECT * FROM salesforce_datasource ( - - --Native Query Goes Here - SELECT Name, Account.Name, Account.Industry - FROM Contact - WHERE Account.Industry = 'Technology' - LIMIT 5 - -); -``` - - -The above examples utilize `salesforce_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - diff --git a/mindsdb/integrations/handlers/salesforce_handler/__about__.py b/mindsdb/integrations/handlers/salesforce_handler/__about__.py deleted file mode 100644 index 02c10cc3b67..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Salesforce handler' -__package_name__ = 'mindsdb_salesforce_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Salesforce" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2024 - mindsdb' diff --git a/mindsdb/integrations/handlers/salesforce_handler/__init__.py b/mindsdb/integrations/handlers/salesforce_handler/__init__.py deleted file mode 100644 index 8efa4157f11..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description - -try: - from .salesforce_handler import SalesforceHandler as Handler - from .connection_args import connection_args_example, connection_args - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Salesforce" -name = "salesforce" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "support_level", - "description", - "import_error", - "icon_path", - "connection_args", - "connection_args_example", -] diff --git a/mindsdb/integrations/handlers/salesforce_handler/connection_args.py b/mindsdb/integrations/handlers/salesforce_handler/connection_args.py deleted file mode 100644 index d56991abb10..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/connection_args.py +++ /dev/null @@ -1,47 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - username={ - 'type': ARG_TYPE.STR, - 'description': 'The username for the Salesforce account.', - 'required': True, - 'label': 'Username' - }, - password={ - 'type': ARG_TYPE.PWD, - 'description': 'The password for the Salesforce account.', - 'secret': True, - 'required': True, - 'label': 'Password' - }, - client_id={ - 'type': ARG_TYPE.STR, - 'description': 'The client ID (consumer key) from a connected app in Salesforce.', - 'required': True, - 'label': 'Client ID (Consumer Key)' - }, - client_secret={ - 'type': ARG_TYPE.STR, - 'description': 'The client secret (consumer secret) from a connected app in Salesforce.', - 'required': True, - 'label': 'Client Secret (Consumer Secret)' - }, - is_sandbox={ - 'type': ARG_TYPE.BOOL, - 'description': 'Set this to True if you need to connect to a sandbox, False for production environments. ' - 'If not provided defaults to False.', - 'required': False, - 'label': 'Is Sandbox' - } -) - -connection_args_example = OrderedDict( - username='demo@example.com', - password='demo_password', - client_id='3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY', - client_secret='5A52C1A1E21DF9012IODC9ISNXXAADDA9', - is_sandbox=True -) diff --git a/mindsdb/integrations/handlers/salesforce_handler/constants.py b/mindsdb/integrations/handlers/salesforce_handler/constants.py deleted file mode 100644 index 9b9ae1ba61d..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/constants.py +++ /dev/null @@ -1,215 +0,0 @@ -""" -Constants for Salesforce handler. -""" - - -def get_soql_instructions(integration_name): - return f"""This handler executes SOQL (Salesforce Object Query Language), NOT SQL! Follow these rules strictly: - -**BASIC STRUCTURE:** -- NO "SELECT *" - must explicitly list all fields - SQL: SELECT * FROM Account; - SOQL: SELECT Id, Name, Industry FROM Account -- NO table aliases - use full table names only - SQL: SELECT a.Name FROM Account a; - SOQL: SELECT Name FROM Account -- NO column aliases - field names cannot be aliased - SQL: SELECT Name AS CompanyName FROM Account; - SOQL: SELECT Name FROM Account -- NO DISTINCT keyword - not supported in SOQL - SQL: SELECT DISTINCT Industry FROM Account; - SOQL: Not possible - use separate logic -- NO subqueries in FROM clause - only relationship-based subqueries allowed - SQL: SELECT * FROM (SELECT Name FROM Account) AS AccountNames; - SOQL: Not supported -- Do not use fields that are not defined in the schema or data catalog. Always reference exact field names. - -**FIELD SELECTION:** -- Always include Id field when querying - CORRECT: SELECT Id, Name, Industry FROM Account - INCORRECT: SELECT Name, Industry FROM Account -- Field names are case-sensitive - CORRECT: SELECT CreatedDate FROM Account - INCORRECT: SELECT createddate FROM Account -- Use exact field names from the data catalog - CORRECT: SELECT CustomerPriority__c FROM Account - INCORRECT: SELECT customer_priority FROM Account - -**FILTERING (WHERE clause):** -- Date/DateTime fields: Use unquoted literals in YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ format - CORRECT: WHERE CloseDate >= 2025-05-28 - CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z - INCORRECT: WHERE CloseDate >= '2025-05-28' - INCORRECT: WHERE CreatedDate >= "2025-05-28" -- Special date literals: TODAY, YESTERDAY, LAST_WEEK, LAST_MONTH, LAST_QUARTER, LAST_YEAR, THIS_WEEK, THIS_MONTH, THIS_QUARTER, THIS_YEAR - CORRECT: WHERE CreatedDate = TODAY - CORRECT: WHERE LastModifiedDate >= LAST_MONTH - CORRECT: WHERE CloseDate >= THIS_QUARTER -- Date arithmetic (e.g., TODAY - 10) is not supported. Use literals like LAST_N_DAYS:10 instead. - CORRECT: WHERE CloseDate >= LAST_N_DAYS:10 - INCORRECT: WHERE CloseDate >= TODAY - 10 -- LIKE operator: Only supports % wildcard, NO underscore (_) wildcard - CORRECT: WHERE Name LIKE '%Corp%' - CORRECT: WHERE Name LIKE 'Acme%' - INCORRECT: WHERE Name LIKE 'A_me%' -- BETWEEN operator: NOT supported, use >= AND <= instead - SQL: WHERE CreatedDate BETWEEN '2025-01-01' AND '2025-12-31' - SOQL: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31 -- Boolean values: Use lowercase true/false, NOT TRUE/FALSE - CORRECT: WHERE Active__c = true - CORRECT: WHERE IsDeleted = false - INCORRECT: WHERE Active__c = TRUE - INCORRECT: WHERE IsDeleted = FALSE -- NULL values: Use lowercase null, NOT NULL - CORRECT: WHERE ParentId = null - CORRECT: WHERE Description != null - INCORRECT: WHERE ParentId IS NULL - INCORRECT: WHERE Description IS NOT NULL -- String values: Use single quotes for strings - CORRECT: WHERE Industry = 'Technology' - CORRECT: WHERE Name = 'Acme Corp' - INCORRECT: WHERE Industry = "Technology" -- Multi-select picklist fields: Use INCLUDES('value1;value2') or EXCLUDES('value1;value2') - CORRECT: WHERE Services__c INCLUDES ('Consulting;Support') - CORRECT: WHERE Services__c EXCLUDES ('Training') - INCORRECT: WHERE Services__c = 'Consulting' -- Limited subquery support - only IN/NOT IN with non-correlated subqueries in WHERE clause - CORRECT: SELECT Id FROM Contact WHERE Id NOT IN (SELECT WhoId FROM Task) - INCORRECT: SELECT Id FROM Contact WHERE NOT EXISTS (SELECT 1 FROM Task WHERE WhoId = Contact.Id) - -**JOINS:** -- NO explicit JOIN syntax supported - SQL: SELECT a.Name, c.FirstName FROM Account a JOIN Contact c ON a.Id = c.AccountId - SOQL: Not supported - use relationship traversal (not applicable in this use case) - -**AGGREGATES:** -- NO COUNT(*) - use COUNT(Id) instead - SQL: SELECT COUNT(*) FROM Account - SOQL: SELECT COUNT(Id) FROM Account -- Cannot mix aggregate functions with non-aggregate fields unless using GROUP BY - CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry - CORRECT: SELECT COUNT(Id) FROM Account - INCORRECT: SELECT Industry, Name, COUNT(Id) FROM Account -- NO GROUP_CONCAT or string aggregation functions - SQL: SELECT GROUP_CONCAT(Name) FROM Account - SOQL: Not supported -- NO HAVING clause - SQL: SELECT Industry, COUNT(*) FROM Account GROUP BY Industry HAVING COUNT(*) > 5 - SOQL: Not supported - filter with separate logic -- GROUP BY has limited field type support - CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry - INCORRECT: SELECT Description, COUNT(Id) FROM Account GROUP BY Description (textarea fields not supported) - -**FUNCTIONS:** -- Date functions: CALENDAR_MONTH(), CALENDAR_YEAR(), CALENDAR_QUARTER(), DAY_IN_MONTH(), DAY_IN_WEEK(), DAY_IN_YEAR(), HOUR_IN_DAY(), WEEK_IN_MONTH(), WEEK_IN_YEAR() - CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025 - CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_MONTH(CreatedDate) = 5 - CORRECT: SELECT Id, Name FROM Account WHERE DAY_IN_WEEK(CreatedDate) = 2 -- NO math functions: ROUND, FLOOR, CEILING, ABS, etc. - SQL: SELECT ROUND(AnnualRevenue, 2) FROM Account - SOQL: Not supported -- NO conditional functions: CASE WHEN, COALESCE, NULLIF, etc. - SQL: SELECT CASE WHEN Industry = 'Technology' THEN 'Tech' ELSE 'Other' END FROM Account - SOQL: Not supported -- NO string functions except INCLUDES/EXCLUDES for multi-select picklists - SQL: SELECT UPPER(Name) FROM Account - SOQL: Not supported - -**OPERATORS:** -- Supported: =, !=, <, >, <=, >=, LIKE, IN, NOT IN, INCLUDES, EXCLUDES - CORRECT: WHERE Industry = 'Technology' - CORRECT: WHERE AnnualRevenue >= 1000000 - CORRECT: WHERE Industry IN ('Technology', 'Finance') - CORRECT: WHERE Industry NOT IN ('Government', 'Non-Profit') - CORRECT: WHERE Services__c INCLUDES ('Consulting') -- NOT supported: REGEXP, BETWEEN, EXISTS, NOT EXISTS - SQL: WHERE Name REGEXP '^[A-Z]' - SOQL: Not supported - -**SORTING & LIMITING:** -- ORDER BY: Fully supported - CORRECT: SELECT Id, Name FROM Account ORDER BY Name ASC - CORRECT: SELECT Id, Name FROM Account ORDER BY CreatedDate DESC, Name ASC - CORRECT: SELECT Id, Name FROM Account ORDER BY Name NULLS LAST -- LIMIT: Maximum 2000 records, use smaller limits for better performance - CORRECT: SELECT Id, Name FROM Account LIMIT 100 - CORRECT: SELECT Id, Name FROM Account LIMIT 2000 - INCORRECT: SELECT Id, Name FROM Account LIMIT 5000 -- NO OFFSET: Not supported for pagination - SQL: SELECT Id, Name FROM Account LIMIT 10 OFFSET 20 - SOQL: Not supported - -**DATA TYPES:** -- picklist: Single-select dropdown, use = operator with string values - CORRECT: WHERE Industry = 'Technology' - CORRECT: WHERE Rating = 'Hot' -- reference: Foreign key field, typically ends with Id - CORRECT: WHERE OwnerId = '00530000003OOwn' - CORRECT: WHERE AccountId = '0013000000UzXyz' -- boolean: Use lowercase true/false - CORRECT: WHERE IsDeleted = false - CORRECT: WHERE Active__c = true -- currency: Numeric field for money values - CORRECT: WHERE AnnualRevenue > 1000000 - CORRECT: WHERE AnnualRevenue >= 500000.50 -- date: Date only, use YYYY-MM-DD format - CORRECT: WHERE LastActivityDate = 2025-05-28 - CORRECT: WHERE SLAExpirationDate__c >= 2025-01-01 -- datetime: Date and time, use YYYY-MM-DDThh:mm:ssZ format - CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z - CORRECT: WHERE LastModifiedDate = 2025-05-28T00:00:00Z -- double/int: Numeric fields - CORRECT: WHERE NumberOfEmployees > 100 - CORRECT: WHERE NumberofLocations__c >= 5.5 -- string/textarea: Text fields, use single quotes - CORRECT: WHERE Name = 'Acme Corporation' - CORRECT: WHERE Description = 'Leading tech company' -- phone/url/email: Specialized string fields, treat as strings - CORRECT: WHERE Phone = '555-1234' - CORRECT: WHERE Website = 'https://example.com' - -**COMMON MISTAKES TO AVOID:** -- Using SELECT * (not allowed) - WRONG: SELECT * FROM Account - RIGHT: SELECT Id, Name, Industry FROM Account -- Quoting date literals (dates must be unquoted) - WRONG: WHERE CreatedDate >= '2025-01-01' - RIGHT: WHERE CreatedDate >= 2025-01-01 -- Using SQL JOIN syntax (not supported) - WRONG: SELECT Account.Name FROM Account JOIN Contact ON Account.Id = Contact.AccountId - RIGHT: Use relationship traversal (not applicable in this use case) -- Using BETWEEN operator (not supported) - WRONG: WHERE CreatedDate BETWEEN 2025-01-01 AND 2025-12-31 - RIGHT: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31 -- Using uppercase TRUE/FALSE/NULL (must be lowercase) - WRONG: WHERE Active__c = TRUE - RIGHT: WHERE Active__c = true -- Using underscore _ in LIKE patterns (only % supported) - WRONG: WHERE Name LIKE 'A_me%' - RIGHT: WHERE Name LIKE 'A%me%' -- Mixing aggregate and non-aggregate fields without GROUP BY - WRONG: SELECT Name, COUNT(Id) FROM Account - RIGHT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry - -**EXAMPLE QUERIES:** -- Basic selection: SELECT Id, Name, Industry FROM Account WHERE Industry = 'Technology' -- Date filtering: SELECT Id, Name FROM Account WHERE CreatedDate >= 2025-01-01 -- Multiple conditions: SELECT Id, Name FROM Account WHERE Name LIKE '%Corp%' AND Industry IN ('Technology', 'Finance') -- Aggregation: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry -- Boolean and numeric: SELECT Id, Name FROM Account WHERE Active__c = true AND NumberOfEmployees > 100 -- Date functions: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025 -- Null checks: SELECT Id, Name FROM Account WHERE ParentId = null -- Multi-select picklist: SELECT Id, Name FROM Account WHERE Services__c INCLUDES ('Consulting;Support') -- Sorting and limiting: SELECT Id, Name FROM Account ORDER BY Name ASC LIMIT 50 - - -***EXECUTION INSTRUCTIONS. IMPORTANT!*** -After generating the core SOQL (and nothing else), always make sure you wrap it exactly as: - - SELECT * - FROM {integration_name}( - /* your generated SOQL goes here, without a trailing semicolon */ - ) - -Return only that wrapper call. -""" diff --git a/mindsdb/integrations/handlers/salesforce_handler/icon.svg b/mindsdb/integrations/handlers/salesforce_handler/icon.svg deleted file mode 100644 index 8a15f25559c..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/salesforce_handler/requirements.txt b/mindsdb/integrations/handlers/salesforce_handler/requirements.txt deleted file mode 100644 index 8ddf2fa1bad..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -salesforce_api==0.1.45 \ No newline at end of file diff --git a/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py b/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py deleted file mode 100644 index e87b44317e0..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +++ /dev/null @@ -1,361 +0,0 @@ -from typing import Any, Dict, List, Optional, Text - -import pandas as pd -import salesforce_api -from salesforce_api.exceptions import AuthenticationError, RestRequestCouldNotBeUnderstoodError - -from mindsdb.integrations.libs.api_handler import MetaAPIHandler -from mindsdb.integrations.libs.passthrough import PassthroughMixin -from mindsdb.integrations.libs.passthrough_types import PassthroughRequest -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) -from mindsdb.integrations.handlers.salesforce_handler.salesforce_tables import create_table_class -from mindsdb.integrations.handlers.salesforce_handler.constants import get_soql_instructions -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class SalesforceHandler(MetaAPIHandler, PassthroughMixin): - """ - This handler handles the connection and execution of SQL statements on Salesforce. - """ - - name = "salesforce" - - # REST passthrough configuration. Salesforce's base URL is per-org - # (`instance_url`) and is normally discovered at auth time. v1 requires - # the caller to supply both `access_token` and `instance_url` explicitly - # in connection_data; dynamic discovery from the username/password flow - # is deferred to a future refresh-aware mixin. - _bearer_token_arg = "access_token" - _base_url_default = None - _test_request = PassthroughRequest(method="GET", path="/services/data/v60.0/") - - def _build_base_url(self) -> str | None: - data = self._get_connection_data() - instance_url = data.get("instance_url") - if not instance_url: - return None - return str(instance_url).rstrip("/") - - def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None: - """ - Initializes the handler. - - Args: - name (Text): The name of the handler instance. - connection_data (Dict): The connection data required to connect to the Salesforce API. - kwargs: Arbitrary keyword arguments. - """ - super().__init__(name) - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - self.cache_thread_safe = True - self.resource_names = [] - - def connect(self) -> salesforce_api.client.Client: - """ - Establishes a connection to the Salesforce API. - - Raises: - ValueError: If the required connection parameters are not provided. - AuthenticationError: If an authentication error occurs while connecting to the Salesforce API. - - Returns: - salesforce_api.client.Client: A connection object to the Salesforce API. - """ - if self.is_connected is True: - return self.connection - - # Mandatory connection parameters. - if not all(key in self.connection_data for key in ["username", "password", "client_id", "client_secret"]): - raise ValueError("Required parameters (username, password, client_id, client_secret) must be provided.") - - try: - self.connection = salesforce_api.Salesforce( - username=self.connection_data["username"], - password=self.connection_data["password"], - client_id=self.connection_data["client_id"], - client_secret=self.connection_data["client_secret"], - is_sandbox=self.connection_data.get("is_sandbox", False), - ) - self.is_connected = True - - resource_tables = self._get_resource_names() - for resource_name in resource_tables: - table_class = create_table_class(resource_name.lower()) - self._register_table(resource_name, table_class(self)) - - return self.connection - except AuthenticationError as auth_error: - logger.error(f"Authentication error connecting to Salesforce, {auth_error}!") - raise - except Exception as unknown_error: - logger.error(f"Unknwn error connecting to Salesforce, {unknown_error}!") - raise - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Salesforce API. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - - try: - self.connect() - response.success = True - except (AuthenticationError, ValueError) as known_error: - logger.error(f"Connection check to Salesforce failed, {known_error}!") - response.error_message = str(known_error) - except Exception as unknown_error: - logger.error(f"Connection check to Salesforce failed due to an unknown error, {unknown_error}!") - response.error_message = str(unknown_error) - - self.is_connected = response.success - - return response - - def native_query(self, query: Text) -> Response: - """ - Executes a native SOQL query on Salesforce and returns the result. - - Args: - query (Text): The SQL query to be executed. - - Returns: - Response: A response object containing the result of the query or an error message. - """ - connection = self.connect() - - try: - results = connection.sobjects.query(query) - - parsed_results = [] - for result in results: - del result["attributes"] - - # Check if the result contains any of the other Salesforce resources. - if any(key in self.resource_names for key in result.keys()): - # Parse the result to extract the nested resources. - parsed_result = {} - for key, value in result.items(): - if key in self.resource_names: - del value["attributes"] - parsed_result.update( - {f"{key}_{sub_key}": sub_value for sub_key, sub_value in value.items()} - ) - - else: - parsed_result[key] = value - - parsed_results.append(parsed_result) - - else: - parsed_results.append(result) - - response = Response(RESPONSE_TYPE.TABLE, pd.DataFrame(parsed_results)) - except RestRequestCouldNotBeUnderstoodError as rest_error: - logger.error(f"Error running query: {query} on Salesforce, {rest_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(rest_error)) - except Exception as unknown_error: - logger.error(f"Error running query: {query} on Salesforce, {unknown_error}!") - response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(unknown_error)) - - return response - - def _get_resource_names(self) -> List[str]: - """ - Retrieves the names of the Salesforce resources with optimized pre-filtering. - Returns: - List[str]: A list of filtered resource names. - """ - if not self.resource_names: - # Check for user-specified table filtering first - include_tables = self.connection_data.get("include_tables") or self.connection_data.get("tables") - exclude_tables = self.connection_data.get("exclude_tables", []) - - if include_tables: - # OPTIMIZATION: Skip expensive global describe() call - # Only validate the specified tables - logger.info(f"Using pre-filtered table list: {include_tables}") - self.resource_names = self._validate_specified_tables(include_tables, exclude_tables) - else: - # Fallback to full discovery with hard-coded filtering - logger.info("No table filter specified, performing full discovery...") - self.resource_names = self._discover_all_tables_with_filtering(exclude_tables) - - return self.resource_names - - def _validate_specified_tables(self, include_tables: List[str], exclude_tables: List[str]) -> List[str]: - """ - Validate user-specified tables without expensive global describe() call. - - Args: - include_tables: List of table names to include - exclude_tables: List of table names to exclude - - Returns: - List[str]: Validated and filtered table names - """ - validated_tables = [] - - for table_name in include_tables: - # Skip if explicitly excluded - if table_name in exclude_tables: - logger.info(f"Skipping excluded table: {table_name}") - continue - - try: - # Quick validation: check if table exists and is queryable - # This is much faster than global describe() - metadata = getattr(self.connection.sobjects, table_name).describe() - if metadata.get("queryable", False): - validated_tables.append(table_name) - logger.debug(f"Validated table: {table_name}") - else: - logger.warning(f"Table {table_name} is not queryable, skipping") - except Exception as e: - logger.warning(f"Table {table_name} not found or accessible: {e}") - - logger.info(f"Validated {len(validated_tables)} tables from include_tables") - return validated_tables - - def _discover_all_tables_with_filtering(self, exclude_tables: List[str]) -> List[str]: - """ - Fallback method: discover all tables with hard-coded filtering. - - Args: - exclude_tables: List of table names to exclude - - Returns: - List[str]: Filtered table names - """ - # This is the original expensive approach - only used when no include_tables specified - all_resources = [ - resource["name"] - for resource in self.connection.sobjects.describe()["sobjects"] - if resource.get("queryable", False) - ] - - # Apply hard-coded filtering (existing logic) - ignore_suffixes = ("Share", "History", "Feed", "ChangeEvent", "Tag", "Permission", "Setup", "Consent") - ignore_prefixes = ( - "Apex", - "CommPlatform", - "Lightning", - "Flow", - "Transaction", - "AI", - "Aura", - "ContentWorkspace", - "Collaboration", - "Datacloud", - ) - ignore_exact = { - "EntityDefinition", - "FieldDefinition", - "RecordType", - "CaseStatus", - "UserRole", - "UserLicense", - "UserPermissionAccess", - "UserRecordAccess", - "Folder", - "Group", - "Note", - "ProcessDefinition", - "ProcessInstance", - "ContentFolder", - "ContentDocumentSubscription", - "DashboardComponent", - "Report", - "Dashboard", - "Topic", - "TopicAssignment", - "Period", - "Partner", - "PackageLicense", - "ColorDefinition", - "DataUsePurpose", - "DataUseLegalBasis", - } - - ignore_substrings = ( - "CleanInfo", - "Template", - "Rule", - "Definition", - "Status", - "Policy", - "Setting", - "Access", - "Config", - "Subscription", - "DataType", - "MilestoneType", - "Entitlement", - "Auth", - ) - - # Apply hard-coded filtering - filtered = [] - for r in all_resources: - if ( - not r.endswith(ignore_suffixes) - and not r.startswith(ignore_prefixes) - and not any(sub in r for sub in ignore_substrings) - and r not in ignore_exact - and r not in exclude_tables # Apply user exclusions - ): - filtered.append(r) - - return filtered - - def meta_get_handler_info(self, **kwargs) -> str: - """ - Retrieves information about the design and implementation of the API handler. - This should include, but not be limited to, the following: - - The type of SQL queries and operations that the handler supports. - - etc. - - Args: - kwargs: Additional keyword arguments that may be used in generating the handler information. - - Returns: - str: A string containing information about the API handler's design and implementation. - """ - return get_soql_instructions(self.name) - - def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response: - """ - Retrieves metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve metadata. - - Returns: - Response: A response object containing the table metadata. - """ - connection = self.connect() - - # Retrieve the metadata for all Salesforce resources. - main_metadata = connection.sobjects.describe() - if table_names: - # Filter the metadata for the specified tables. - main_metadata = [ - resource for resource in main_metadata["sobjects"] if resource["name"].lower() in table_names - ] - else: - main_metadata = main_metadata["sobjects"] - - return super().meta_get_tables(table_names=table_names, main_metadata=main_metadata) diff --git a/mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py b/mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py deleted file mode 100644 index 5a5bf3e3b17..00000000000 --- a/mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +++ /dev/null @@ -1,269 +0,0 @@ -from typing import Dict, List, Text - -from mindsdb_sql_parser.ast import Select, Star, Identifier -import pandas as pd -from salesforce_api.exceptions import RestRequestCouldNotBeUnderstoodError - -from mindsdb.integrations.libs.api_handler import MetaAPIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -def create_table_class(resource_name: Text) -> MetaAPIResource: - """ - Creates a table class for the given Salesforce resource. - """ - - class AnyTable(MetaAPIResource): - """ - This is the table abstraction for any resource of the Salesforce API. - """ - - def __init__(self, *args, table_name=None, **kwargs): - """ - Initializes the AnyTable class. - - Args: - *args: Variable length argument list. - table_name (str): The name of the table that represents the Salesforce resource. - **kwargs: Arbitrary keyword arguments. - """ - super().__init__(*args, table_name=table_name, **kwargs) - self.resource_metadata = None - - def select(self, query: Select) -> pd.DataFrame: - """ - Executes a SELECT SQL query represented by an ASTNode object on the Salesforce resource and retrieves the data (if any). - - Args: - query (ASTNode): An ASTNode object representing the SQL query to be executed. - - Returns: - pd.DataFrame: A DataFrame containing the data retrieved from the Salesforce resource. - """ - query.from_table = resource_name - - # SOQL does not support * in SELECT queries. Replace * with column names. - if isinstance(query.targets[0], Star): - query.targets = [Identifier(column) for column in self.get_columns()] - - # SOQL does not support column aliases. Remove column aliases. - column_aliases = {} - for column in query.targets: - if column.alias is not None: - column_aliases[column.parts[-1]] = column.alias.parts[-1] - column.alias = None - - client = self.handler.connect() - - query_str = query.to_string() - - # SOQL does not support backticks. Remove backticks. - query_str = query_str.replace("`", "") - results = client.sobjects.query(query_str) - - for result in results: - del result["attributes"] - - df = pd.DataFrame(results) - df.rename(columns=column_aliases, inplace=True) - - return df - - def add(self, item: Dict) -> None: - """ - Adds a new item to the Salesforce resource. - - Args: - contact (Dict): The data to be inserted into the Salesforce resource. - """ - client = self.handler.connect() - getattr(client.sobjects, resource_name).insert(item) - - def modify(self, conditions: List[FilterCondition], values: Dict) -> None: - """ - Modifies items in the Salesforce resource based on the specified conditions. - - Args: - conditions (List[FilterCondition]): The conditions based on which the items are to be modified. - values (Dict): The values to be updated in the items. - """ - client = self.handler.connect() - - ids = self._validate_conditions(conditions) - - for id in ids: - getattr(client.sobjects, resource_name).update(id, values) - - def remove(self, conditions: List[FilterCondition]) -> None: - """ - Removes items from the Salesforce resource based on the specified conditions. - - Args: - conditions (List[FilterCondition]): The conditions based on which the items are to be removed. - """ - client = self.handler.connect() - - ids = self._validate_conditions(conditions) - - for id in ids: - getattr(client.sobjects, resource_name).delete(id) - - def _validate_conditions(self, conditions: List[FilterCondition]) -> None: - """ - Validates the conditions used for filtering items in the Salesforce resource. - - Args: - conditions (List[FilterCondition]): The conditions to be validated. - """ - # Salesforce API does not support filtering items based on attributes other than 'Id'. Raise an error if any other column is used. - if len(conditions) != 1 or conditions[0].column != "Id": - raise ValueError("Only the 'Id' column can be used to filter items.") - - # Only the 'equals' and 'in' operators can be used on the 'Id' column for deletion. Raise an error if any other operator is used. - if conditions[0].op not in [FilterOperator.EQUAL, FilterOperator.IN]: - raise ValueError("Only the 'equals' and 'in' operators can be used on the 'Id' column.") - - return conditions[0].value if isinstance(conditions[0].value, list) else [conditions[0].value] - - def _get_resource_metadata(self) -> Dict: - """ - Retrieves metadata about the Salesforce resource. - - Returns: - Dict: A dictionary containing metadata about the Salesforce resource. - """ - if self.resource_metadata: - return self.resource_metadata - - client = self.handler.connect() - return getattr(client.sobjects, resource_name).describe() - - def get_columns(self) -> List[Text]: - """ - Retrieves the attributes (columns) of the Salesforce resource. - - Returns: - List[Text]: A list of Attributes (columns) of the Salesforce resource. - """ - return [field["name"] for field in self._get_resource_metadata()["fields"]] - - def meta_get_tables(self, table_name: str, main_metadata: Dict) -> Dict: - """ - Retrieves table metadata for the Salesforce resource. - - Args: - table_name (str): The name given to the table that represents the Salesforce resource. - main_metadata (Dict): The main metadata dictionary containing information about all Salesforce resources. - - Returns: - Dict: A dictionary containing table metadata for the Salesforce resource. - """ - client = self.handler.connect() - - try: - resource_metadata = next( - (resource for resource in main_metadata if resource["name"].lower() == resource_name), - ) - except Exception as e: - logger.warning(f"Failed to get resource metadata for {resource_name}: {e}") - return { - "table_name": table_name, - "table_type": "BASE TABLE", - "table_description": "", - "row_count": None, - } - # Get row count if Id column is aggregatable. - row_count = None - # if next(field for field in resource_metadata['fields'] if field['name'] == 'Id').get('aggregatable', False): - try: - row_count = client.sobjects.query(f"SELECT COUNT(Id) FROM {resource_name}")[0]["expr0"] - except RestRequestCouldNotBeUnderstoodError as request_error: - logger.warning(f"Failed to get row count for {resource_name}: {request_error}") - - return { - "table_name": table_name, - "table_type": "BASE TABLE", - "table_description": resource_metadata.get("label", ""), - "row_count": row_count, - } - - def meta_get_columns(self, table_name: str) -> List[Dict]: - """ - Retrieves column metadata for the Salesforce resource. - - Args: - table_name (str): The name given to the table that represents the Salesforce resource. - - Returns: - List[Dict]: A list of dictionaries containing column metadata for the Salesforce resource. - """ - resource_metadata = self._get_resource_metadata() - - column_metadata = [] - for field in resource_metadata["fields"]: - column_metadata.append( - { - "table_name": table_name, - "column_name": field["name"], - "data_type": field["type"], - "is_nullable": field.get("nillable", False), - "default_value": field.get("defaultValue", ""), - "description": field.get("inlineHelpText", ""), - } - ) - - return column_metadata - - def meta_get_primary_keys(self, table_name: str) -> List[Dict]: - """ - Retrieves the primary keys for the Salesforce resource. - - Args: - table_name (str): The name given to the table that represents the Salesforce resource. - - Returns: - List[Dict]: A list of dictionaries containing primary key metadata for the Salesforce resource. - """ - return [ - { - "table_name": table_name, - "column_name": "Id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[Dict]: - """ - Retrieves the foreign keys for the Salesforce resource. - - Args: - table_name (str): The name given to the table that represents the Salesforce resource. - all_tables (List[str]): A list of all table names in the Salesforce database. - - Returns: - List[Dict]: A list of dictionaries containing foreign key metadata for the Salesforce resource. - """ - resource_metadata = self._get_resource_metadata() - - foreign_key_metadata = [] - for child_relationship in resource_metadata.get("childRelationships", []): - # Skip if the child relationship is not one of the supported tables. - child_table_name = child_relationship["childSObject"] - if child_table_name not in all_tables: - continue - - foreign_key_metadata.append( - { - "parent_table_name": table_name, - "parent_column_name": "Id", - "child_table_name": child_table_name, - "child_column_name": child_relationship["field"], - } - ) - - return foreign_key_metadata - - return AnyTable diff --git a/mindsdb/integrations/handlers/shopify_handler/README.md b/mindsdb/integrations/handlers/shopify_handler/README.md deleted file mode 100644 index ef42970c303..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/README.md +++ /dev/null @@ -1,391 +0,0 @@ -# Shopify Handler - -## Overview - -The Shopify Handler for MindsDB provides an interface to connect to Shopify stores via GraphQL API and enables executing SQL queries against store data. - ---- - -## Table of Contents - -- [Shopify Handler Implementation](#shopify-handler-implementation) -- [Connection Initialization](#connection-initialization) -- [Available Tables](#available-tables) -- [Supported Operations](#supported-operations) -- [Usage Examples](#usage-examples) -- [Technical Details](#technical-details) - ---- - - -## Shopify Handler Implementation - -This handler is implemented using: -- **ShopifyAPI** - the official Python library for Shopify -- **Shopify GraphQL Admin API** version **2025-10** - -All requests to Shopify are executed through the GraphQL API, which ensures efficient data loading with the ability to select only the necessary fields. - -Where possible, sorting and filtering are performed on the Shopify side. When this is not supported, sorting and filtering are handled on the MindsDB side. - ---- - -## Connection Initialization - -To connect to Shopify, the following parameters are required: - -### Required Parameters - -| Parameter | Type | Description | -|----------|-----|----------| -| `shop_url` | `str` | Your store URL (e.g., `shop-123456.myshopify.com`) | -| `client_id` | `str` | Client ID of your Shopify app | -| `client_secret` | `str` | Client Secret of your Shopify app | - -### Creating a Connection - -```sql -CREATE DATABASE shopify_store -WITH ENGINE = 'shopify', -PARAMETERS = { - "shop_url": "your-shop-name.myshopify.com", - "client_id": "your_client_id", - "client_secret": "your_client_secret" -}; -``` - -### Obtaining Credentials - -1. Log in to the Shopify admin panel -2. Navigate to **https://dev.shopify.com/dashboard/** -3. Create a new app. During app creation, define the required scopes. Depending on which tables you need to access, you may want to add: - - `read_products` - for access to products and variants - - `read_customers` - for access to customer data - - `read_orders` (or `read_marketplace_orders`, `read_quick_sale`) - for access to orders - - `read_inventory` - for access to inventory data - - `read_marketing_events` - for access to marketing events - - `read_staff` - for access to staff member data - - `read_gift_cards` - for access to gift cards -4. In the **Settings** section, locate: - - **Client ID** - - **Secret** -5. Install the app to your store. - ---- - -## Available Tables - -The handler provides access to the following tables: - -### 1. `products` - Products - -Contains information about store products. - -**Key Fields:** -- `id` - Unique product identifier -- `title` - Product name -- `description` - Product description -- `vendor` - Manufacturer/supplier -- `productType` - Product type -- `status` - Product status (active, draft, archived) -- `tags` - Product tags -- `createdAt` - Creation date -- `updatedAt` - Last update date -- `totalInventory` - Total stock quantity -- `priceRangeV2` - Price range -- `variantsCount` - Number of variants - -**Shopify Native Sorting:** -This table supports native sorting in Shopify API by the following fields: `createdAt`, `id`, `totalInventory`, `productType`, `publishedAt`, `title`, `updatedAt`, `vendor`. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `createdAt`, `id`, `isGiftCard`, `handle`, `totalInventory`, `productType`, `publishedAt`, `status`, `title`, `updatedAt`, `vendor`. - -### 2. `product_variants` - Product Variants - -Contains information about product variants (sizes, colors, etc.). - -**Key Fields:** -- `id` - Unique variant identifier -- `productId` - Parent product ID -- `title` - Variant name -- `displayName` - Display name -- `sku` - Stock Keeping Unit -- `barcode` - Barcode -- `inventoryQuantity` - Stock quantity -- `position` - Position in list - -**Shopify Native Sorting:** -This table supports native sorting in Shopify API by the following fields: `id`, `inventoryQuantity`, `displayName`, `position`, `sku`, `title`. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `barcode`, `id`, `inventoryQuantity`, `productId`, `sku`, `title`, `updatedAt`. - -### 3. `customers` - Customers - -Contains information about store customers. - -**Key Fields:** -- `id` - Unique customer identifier -- `firstName` - First name -- `lastName` - Last name -- `emailAddress` - Email address -- `phoneNumber` - Phone number -- `displayName` - Display name -- `country` - Country -- `createdAt` - Creation date -- `updatedAt` - Update date -- `numberOfOrders` - Number of orders -- `amountSpent` - Total amount spent -- `tags` - Customer tags -- `state` - Account state -- `verifiedEmail` - Email verified - -**Shopify Native Sorting:** -This table supports native sorting in Shopify API by the following fields: `createdAt`, `id`, `updatedAt`. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `country`, `createdAt`, `email`, `firstName`, `id`, `lastName`, `phoneNumber`, `updatedAt`. - -### 4. `orders` - Orders - -Contains information about orders. - -**Key Fields:** -- `id` - Unique order identifier -- `name` - Order number (e.g., #1001) -- `number` - Numeric order number -- `customerId` - Customer ID -- `email` - Customer email -- `phone` - Customer phone -- `createdAt` - Creation date -- `processedAt` - Processing date -- `updatedAt` - Update date -- `cancelledAt` - Cancellation date -- `currentTotalPriceSet` - Current total price -- `currentSubtotalPriceSet` - Current subtotal price -- `totalWeight` - Total weight -- `test` - Test order -- `shippingAddress` - Shipping address -- `tags` - Order tags - -**Shopify Native Sorting:** -This table supports native sorting in Shopify API by the following fields: `createdAt`, `id`, `number`, `poNumber`, `processedAt`, `updatedAt`. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `confirmationNumber`, `createdAt`, `customerId`, `discountCode`, `email`, `id`, `name`, `poNumber`, `processedAt`, `returnStatus`, `sourceIdentifier`, `sourceName`, `test`, `totalWeight`, `updatedAt`. - -### 5. `marketing_events` - Marketing Events - -Contains information about marketing events and campaigns. - -**Key Fields:** -- `id` - Unique event identifier -- `startedAt` - Start date -- `description` - Description -- `type` - Event type - -**Shopify Native Sorting:** -This table supports native sorting in Shopify API by the following fields: `id`, `startedAt`. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `id`, `startedAt`, `description`, `type`. - -### 6. `inventory_items` - Inventory Items - -Contains information about inventory items. - -**Key Fields:** -- `id` - Unique identifier -- `sku` - Stock Keeping Unit -- `createdAt` - Creation date -- `updatedAt` - Update date - -**Shopify Native Sorting:** -This table does not support native sorting in Shopify API. Sorting is handled on the MindsDB side. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `id`, `createdAt`, `sku`, `updatedAt`. - -### 7. `staff_members` - Staff Members - -Contains information about store staff members. - -**Key Fields:** -- `id` - Unique identifier -- `firstName` - First name -- `lastName` - Last name -- `email` - Email -- `accountType` - Account type - -**Shopify Native Sorting:** -This table supports native sorting in Shopify API by the following fields: `id`, `email`, `firstName`, `lastName`. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `accountType`, `email`, `firstName`, `lastName`, `id`. - -### 8. `gift_cards` - Gift Cards - -Contains information about gift cards. - -**Key Fields:** -- `id` - Unique identifier -- `balance` - Current balance -- `initialValue` - Initial value -- `customerId` - Customer ID -- `orderId` - Order ID -- `createdAt` - Creation date -- `updatedAt` - Update date -- `expiresOn` - Expiration date -- `deactivatedAt` - Deactivation date - -**Shopify Native Sorting:** -This table supports native sorting in Shopify API by the following fields: `balance`, `createdAt`, `deactivatedAt`, `expiresOn`, `id`, `initialValue`, `updatedAt`. - -**Shopify Native Filtering:** -This table supports native filtering in Shopify API by the following fields: `createdAt`, `expiresOn`, `id`. - ---- - -## Supported Operations - -### SELECT - Reading Data - -All tables support the SELECT operation with the following capabilities: - -- **SELECT** - data selection -- **WHERE** - filtering by conditions (depending on the table) -- **ORDER BY** - sorting (depending on the table) -- **LIMIT** - limiting the number of records -- **Selecting specific columns** - query optimization - -### WHERE Operators - -Depending on the field and table, the following operators are supported: - -- `=` (EQUAL) - equality -- `>` (GREATER_THAN) - greater than -- `>=` (GREATER_THAN_OR_EQUAL) - greater than or equal -- `<` (LESS_THAN) - less than -- `<=` (LESS_THAN_OR_EQUAL) - less than or equal -- `IN` - check for inclusion in a list (for some fields) -- `LIKE` - partial match (for some text fields) - ---- - -## Usage Examples - -### Basic SELECT Queries - -#### Get All Products - -```sql -SELECT id, title, vendor, status, totalInventory -FROM shopify_store.products; -``` - -#### Get Active Products with Sorting - -```sql -SELECT id, title, vendor, totalInventory, createdAt -FROM shopify_store.products -WHERE status = 'active' -ORDER BY createdAt DESC -LIMIT 10; -``` - -#### Get Products from a Specific Vendor - -```sql -SELECT id, title, productType, priceRangeV2 -FROM shopify_store.products -WHERE vendor = 'Nike' -ORDER BY title; -``` - -#### Get Customers from a Specific Country - -```sql -SELECT id, displayName, emailAddress, phoneNumber, numberOfOrders -FROM shopify_store.customers -WHERE country = 'Canada' -ORDER BY numberOfOrders DESC -LIMIT 20; -``` - -#### Get Customers Created After a Specific Date - -```sql -SELECT id, firstName, lastName, emailAddress, createdAt -FROM shopify_store.customers -WHERE createdAt > '2024-01-01' -ORDER BY createdAt DESC; -``` - -#### Get Orders for a Specific Customer - -```sql -SELECT id, name, email, currentTotalPriceSet, processedAt -FROM shopify_store.orders -WHERE customerId = 'gid://shopify/Customer/123456789' -ORDER BY processedAt DESC; -``` -#### Get Orders with Customer Information - -```sql -SELECT - o.id as order_id, - o.name as order_name, - o.createdAt as order_date, - c.displayName as customer_name, - c.emailAddress as customer_email, - o.currentTotalPriceSet as total -FROM shopify_store.orders o -JOIN shopify_store.customers c ON o.customerId = c.id -WHERE o.createdAt > '2024-01-01' -ORDER BY o.createdAt DESC -LIMIT 50; -``` - ---- - -## Technical Details - -### API Version - -The connector uses **Shopify GraphQL Admin API version 2025-10**. - -### Authentication - -The connector uses OAuth 2.0 authentication with the `client_credentials` grant type: - -1. Upon connection, a POST request is made to `https://{shop_url}/admin/oauth/access_token` -2. The `client_id` and `client_secret` are sent -3. An `access_token` is obtained for subsequent requests -4. A Shopify session is created with the obtained token - -### GraphQL Queries - -All data is loaded through the GraphQL API using nodes and connections. The connector automatically: - -- Selects only requested fields for optimization -- Applies WHERE filters to GraphQL queries -- Performs sorting at the API level -- Limits the number of returned records - -### Limitations - -- The connector provides **read-only access** (SELECT) -- INSERT, UPDATE, DELETE operations are not supported in the current version -- Some fields may be null depending on store settings -- The current version does not handle API rate limiting -- For sorting, it is recommended to use fields that have native sorting support in the Shopify API - -## Useful Links - -- [Shopify GraphQL Admin API Documentation](https://shopify.dev/docs/api/admin-graphql) -- [Shopify API Permissions](https://shopify.dev/docs/api/usage/access-scopes) -- [Shopify App Development](https://shopify.dev/docs/apps) -- [ShopifyAPI Python Library](https://github.com/Shopify/shopify_python_api) - ---- diff --git a/mindsdb/integrations/handlers/shopify_handler/__about__.py b/mindsdb/integrations/handlers/shopify_handler/__about__.py deleted file mode 100644 index 3be641b9f81..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = "MindsDB Shopify handler" -__package_name__ = "mindsdb_shopify_handler" -__version__ = "0.0.2" -__description__ = "MindsDB handler for Shopify" -__author__ = "Minura Punchihewa" -__github__ = "https://github.com/mindsdb/mindsdb" -__pypi__ = "https://pypi.org/project/mindsdb/" -__license__ = "MIT" -__copyright__ = "Copyright 2023 - mindsdb" diff --git a/mindsdb/integrations/handlers/shopify_handler/__init__.py b/mindsdb/integrations/handlers/shopify_handler/__init__.py deleted file mode 100644 index b0ec9e5f3a0..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args_example, connection_args - -try: - from .shopify_handler import ShopifyHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "Shopify" -name = "shopify" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "support_level", - "title", - "description", - "import_error", - "icon_path", - "connection_args", - "connection_args_example", -] diff --git a/mindsdb/integrations/handlers/shopify_handler/connection_args.py b/mindsdb/integrations/handlers/shopify_handler/connection_args.py deleted file mode 100644 index 464f30184b4..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/connection_args.py +++ /dev/null @@ -1,32 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - shop_url={ - "type": ARG_TYPE.STR, - "description": "Shop url", - "required": True, - "label": "Shop url", - }, - client_id={ - "type": ARG_TYPE.STR, - "description": "Client ID of the app", - "required": True, - "label": "client_id", - }, - client_secret={ - "type": ARG_TYPE.PWD, - "description": "Secret of the app", - "required": True, - "label": "Database", - "secret": True, - }, -) - -connection_args_example = OrderedDict( - shop_url="shop-123456.myshopify.com", - client_id="secret", - client_secret="shpss_secret", -) diff --git a/mindsdb/integrations/handlers/shopify_handler/icon.svg b/mindsdb/integrations/handlers/shopify_handler/icon.svg deleted file mode 100644 index b0efe53f45c..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/icon.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/shopify_handler/models/__init__.py b/mindsdb/integrations/handlers/shopify_handler/models/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/shopify_handler/models/common.py b/mindsdb/integrations/handlers/shopify_handler/models/common.py deleted file mode 100644 index e60fe0713a9..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/common.py +++ /dev/null @@ -1,155 +0,0 @@ -from enum import Enum - - -class AliasesEnum(Enum): - """A class to represent a Shopify GraphQL resource. - It adds easy method to get the aliases of the Enum class. - """ - - @classmethod - def aliases(cls): - return ((name, field.value) for name, field in cls.__members__.items()) - - -class Count(AliasesEnum): - """A class to represent a Shopify GraphQL count. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/Count - """ - - count = "count" - precision = "precision" - - -class TaxonomyAttribute(AliasesEnum): - """A class to represent a Shopify GraphQL taxonomy attribute. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/TaxonomyAttribute - """ - - id = "id" - - -class TaxonomyChoiceListAttribute(AliasesEnum): - """A class to represent a Shopify GraphQL taxonomy choice list attribute. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/TaxonomyChoiceListAttribute - """ - - id = "id" - name = "name" - # values = Nodes() - - -class TaxonomyMeasurementAttribute(AliasesEnum): - """A class to represent a Shopify GraphQL taxonomy measurement attribute. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/TaxonomyMeasurementAttribute - """ - - id = "id" - name = "name" - # options = Attribute - - -class SEO(AliasesEnum): - """A class to represent a Shopify GraphQL SEO. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/SEO - """ - - description = "description" - title = "title" - - -class TaxonomyCategory(AliasesEnum): - """A class to represent a Shopify GraphQL taxonomy category. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/TaxonomyCategory - """ - - ancestorIds = "ancestorIds" - # attributes = Nodes(TaxonomyCategoryAttributeConnection) - childrenIds = "childrenIds" - fullName = "fullName" - id = "id" - name = "name" - - -class MoneyV2(AliasesEnum): - """A class to represent a Shopify GraphQL money v2. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/MoneyV2 - """ - - amount = "amount" - currencyCode = "currencyCode" - - -class ProductPriceRangeV2(AliasesEnum): - """A class to represent a Shopify GraphQL product price range v2. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/ProductPriceRangeV2 - Require `read_products` permission. - """ - - maxVariantPrice = MoneyV2 - minVariantPrice = MoneyV2 - - -class ProductCompareAtPriceRange(AliasesEnum): - """A class to represent a Shopify GraphQL product compare at price range. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/ProductCompareAtPriceRange - Require `read_products` permission. - """ - - maxVariantCompareAtPrice = MoneyV2 - minVariantCompareAtPrice = MoneyV2 - - -class CollectionConnection(AliasesEnum): - """A class to represent a Shopify GraphQL collection connection short. - Reference: https://shopify.dev/docs/api/storefront/latest/connections/collectionconnection - Just a subset of fields. - """ - - id = "id" - title = "title" - - -class MailingAddress(AliasesEnum): - """A class to represent a Shopify GraphQL mailing address. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/MailingAddress - """ - - address1 = "address1" - address2 = "address2" - city = "city" - company = "company" - coordinatesValidated = "coordinatesValidated" - country = "country" - countryCodeV2 = "countryCodeV2" - firstName = "firstName" - formatted = "formatted" - formattedArea = "formattedArea" - id = "id" - lastName = "lastName" - latitude = "latitude" - longitude = "longitude" - name = "name" - phone = "phone" - province = "province" - provinceCode = "provinceCode" - timeZone = "timeZone" - validationResultSummary = "validationResultSummary" - zip = "zip" - - -class OrderCancellation(AliasesEnum): - """A class to represent a Shopify GraphQL order cancellation. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/OrderCancellation - Require `read_orders` permission. - """ - - staffNote = "staffNote" - - -class MoneyBag(AliasesEnum): - """A class to represent a Shopify GraphQL money bag. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/MoneyBag - """ - - presentmentMoney = MoneyV2 - shopMoney = MoneyV2 diff --git a/mindsdb/integrations/handlers/shopify_handler/models/customers.py b/mindsdb/integrations/handlers/shopify_handler/models/customers.py deleted file mode 100644 index 68c15663003..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/customers.py +++ /dev/null @@ -1,345 +0,0 @@ -from .common import AliasesEnum, MailingAddress, MoneyV2 -from .utils import Nodes, Extract - - -class Customers(AliasesEnum): - """A class to represent a Shopify GraphQL customer. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/Customer - Require `read_customers` permission. - """ - - addresses = MailingAddress - addressesV2 = Nodes(MailingAddress) - amountSpent = MoneyV2 - canDelete = "canDelete" - # companyContactProfiles = CompanyContact - createdAt = "createdAt" - dataSaleOptOut = "dataSaleOptOut" - defaultAddress = MailingAddress - country = Extract("defaultAddress", "country") # Custom - # defaultEmailAddress = defaultEmailAddress - emailAddress = Extract("defaultEmailAddress", "emailAddress") # Custom - # defaultPhoneNumber = "defaultPhoneNumber" - phoneNumber = Extract("defaultPhoneNumber", "phoneNumber") # Custom - displayName = "displayName" - # events = "events" - firstName = "firstName" - id = "id" - # image = "image" - lastName = "lastName" - # lastOrder = "lastOrder" - # legacyResourceId = "legacyResourceId" - lifetimeDuration = "lifetimeDuration" - locale = "locale" - # mergeable = "mergeable" - # metafield = "metafield" - # metafields = "metafields" - multipassIdentifier = "multipassIdentifier" - note = "note" - numberOfOrders = "numberOfOrders" - # orders = "orders" - # paymentMethods = "paymentMethods" - productSubscriberStatus = "productSubscriberStatus" - state = "state" - # statistics = "statistics" - # storeCreditAccounts = "storeCreditAccounts" - # subscriptionContracts = "subscriptionContracts" - tags = "tags" - taxExempt = "taxExempt" - # taxExemptions = "taxExemptions" - updatedAt = "updatedAt" - verifiedEmail = "verifiedEmail" - - -columns = [ - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "addresses", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A list of addresses associated with the customer.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "addressesV2", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The addresses associated with the customer.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "amountSpent", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount that the customer has spent on orders in their lifetime.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "canDelete", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the merchant can delete the customer from their store. A customer can be deleted from a store only if they haven't yet made an order. After a customer makes an order, they can't be deleted from a store.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "companyContactProfiles", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of the customer's company contact profiles.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "createdAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the customer was added to the store.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "dataSaleOptOut", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the customer has opted out of having their data sold.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "defaultAddress", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The default address associated with the customer.", - "IS_NULLABLE": None, - }, - { - # Custom field, extracted from defaultAddress - "TABLE_NAME": "customers", - "COLUMN_NAME": "country", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The coutry associated with the customer.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "defaultEmailAddress", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The customer's default email address.", - # "IS_NULLABLE": None - # }, - { - # Custom field, extracted from defaultEmailAddress - "TABLE_NAME": "customers", - "COLUMN_NAME": "emailAddress", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The customer's default email address.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "defaultPhoneNumber", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The customer's default phone number.", - # "IS_NULLABLE": None - # }, - { - # Custom field, extracted from defaultPhoneNumber - "TABLE_NAME": "customers", - "COLUMN_NAME": "phoneNumber", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The customer's default phone number.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "displayName", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The full name of the customer, based on the values for first_name and last_name. If the first_name and last_name are not available, then this falls back to the customer's email address, and if that is not available, the customer's phone number.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "events", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of events associated with the customer.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "firstName", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The customer's first name.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "image", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The image associated with the customer.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "lastName", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The customer's last name.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "lastOrder", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The customer's last order.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "legacyResourceId", - # "DATA_TYPE": "INT", - # "COLUMN_DESCRIPTION": "The ID of the corresponding resource in the REST Admin API.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "lifetimeDuration", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The amount of time since the customer was first added to the store. Example: 'about 12 years'.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "locale", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The customer's locale.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "mergeable", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "Whether the customer can be merged with another customer.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "metafield", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A custom field, including its namespace and key, that's associated with a Shopify resource for the purposes of adding and storing additional information.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "metafields", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of custom fields that a merchant associates with a Shopify resource.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "multipassIdentifier", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A unique identifier for the customer that's used with Multipass login.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "note", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A note about the customer.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "numberOfOrders", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The number of orders that the customer has made at the store in their lifetime.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "orders", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of the customer's orders.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "paymentMethods", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of the customer's payment methods.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "productSubscriberStatus", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "Possible subscriber states of a customer defined by their subscription contracts.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "state", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The state of the customer's account with the shop. Please note that this only meaningful when Classic Customer Accounts is active.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "statistics", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The statistics for a given customer.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "storeCreditAccounts", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "Returns a list of store credit accounts that belong to the owner resource. A store credit account owner can hold multiple accounts each with a different currency.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "subscriptionContracts", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of the customer's subscription contracts.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "tags", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A comma separated list of tags that have been added to the customer.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "taxExempt", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the customer is exempt from being charged taxes on their orders.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "customers", - # "COLUMN_NAME": "taxExemptions", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The list of tax exemptions applied to the customer.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "updatedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the customer was last updated.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "verifiedEmail", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the customer has verified their email address. Defaults to true if the customer is created through the Shopify admin or API.", - "IS_NULLABLE": False, - }, -] diff --git a/mindsdb/integrations/handlers/shopify_handler/models/gift_cards.py b/mindsdb/integrations/handlers/shopify_handler/models/gift_cards.py deleted file mode 100644 index 88039166c32..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/gift_cards.py +++ /dev/null @@ -1,144 +0,0 @@ -from .common import AliasesEnum, MoneyV2 -from .utils import Extract - - -class GiftCards(AliasesEnum): - """A class to represent a Shopify GraphQL gift card. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/GiftCard - Require `read_gift_cards` permission. - """ - - balance = MoneyV2 - createdAt = "createdAt" - customerId = Extract("customer", "id") # Custom - # customer = "customer" - deactivatedAt = "deactivatedAt" - enabled = "enabled" - expiresOn = "expiresOn" - id = "id" - initialValue = MoneyV2 - lastCharacters = "lastCharacters" - maskedCode = "maskedCode" - note = "note" - orderId = Extract("order", "id") # Custom - # order = "order" - # recipientAttributes = "recipientAttributes" - templateSuffix = "templateSuffix" - # transactions = "transactions" - updatedAt = "updatedAt" - - -columns = [ - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "balance", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The gift card's remaining balance.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "createdAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time at which the gift card was created.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "gift_cards", - # "COLUMN_NAME": "customer", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The customer who will receive the gift card.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "deactivatedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time at which the gift card was deactivated.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "enabled", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether the gift card is enabled.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "expiresOn", - "DATA_TYPE": "DATE", - "COLUMN_DESCRIPTION": "The date at which the gift card will expire.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "initialValue", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The initial value of the gift card.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "lastCharacters", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The final four characters of the gift card code.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "maskedCode", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The gift card code. Everything but the final four characters is masked.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "note", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The note associated with the gift card, which isn't visible to the customer.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "gift_cards", - # "COLUMN_NAME": "order", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The order associated with the gift card. This value is null if the gift card was issued manually.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "gift_cards", - # "COLUMN_NAME": "recipientAttributes", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The recipient who will receive the gift card.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "templateSuffix", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The theme template used to render the gift card online.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "gift_cards", - # "COLUMN_NAME": "transactions", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The transaction history of the gift card.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "gift_cards", - "COLUMN_NAME": "updatedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time at which the gift card was updated.", - "IS_NULLABLE": False, - }, -] diff --git a/mindsdb/integrations/handlers/shopify_handler/models/inventory_items.py b/mindsdb/integrations/handlers/shopify_handler/models/inventory_items.py deleted file mode 100644 index 978ec51b63e..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/inventory_items.py +++ /dev/null @@ -1,173 +0,0 @@ -from .common import AliasesEnum, MoneyV2 - - -class InventoryItems(AliasesEnum): - """A class to represent a Shopify GraphQL inventory item. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/InventoryItem - Require `read_inventory` or `read_products` permission. - """ - - countryCodeOfOrigin = "countryCodeOfOrigin" - # countryHarmonizedSystemCodes = "countryHarmonizedSystemCodes" - createdAt = "createdAt" - duplicateSkuCount = "duplicateSkuCount" - harmonizedSystemCode = "harmonizedSystemCode" - id = "id" - inventoryHistoryUrl = "inventoryHistoryUrl" - # inventoryLevel = "inventoryLevel" - # inventoryLevels = "inventoryLevels" - legacyResourceId = "legacyResourceId" - # locationsCount = "locationsCount" - # measurement = "measurement" - provinceCodeOfOrigin = "provinceCodeOfOrigin" - requiresShipping = "requiresShipping" - sku = "sku" - tracked = "tracked" - # trackedEditable = "trackedEditable" - unitCost = MoneyV2 - updatedAt = "updatedAt" - # variant = "variant" - - -columns = [ - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "countryCodeOfOrigin", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The ISO 3166-1 alpha-2 country code of where the item originated from.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "inventory_items", - # "COLUMN_NAME": "countryHarmonizedSystemCodes", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of country specific harmonized system codes.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "createdAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the inventory item was created.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "duplicateSkuCount", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The number of inventory items that share the same SKU with this item.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "harmonizedSystemCode", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The harmonized system code of the item. This must be a number between 6 and 13 digits.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "inventoryHistoryUrl", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The URL that points to the inventory history for the item.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "inventory_items", - # "COLUMN_NAME": "inventoryLevel", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The inventory item's quantities at the specified location.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "inventory_items", - # "COLUMN_NAME": "inventoryLevels", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of the inventory item's quantities for each location that the inventory item can be stocked at.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "legacyResourceId", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The ID of the corresponding resource in the REST Admin API.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "inventory_items", - # "COLUMN_NAME": "locationsCount", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The number of locations where this inventory item is stocked.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "inventory_items", - # "COLUMN_NAME": "measurement", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The packaging dimensions of the inventory item.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "provinceCodeOfOrigin", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The ISO 3166-2 alpha-2 province code of where the item originated from.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "requiresShipping", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether the inventory item requires shipping.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "sku", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "Inventory item SKU. Case-sensitive string.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "tracked", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether inventory levels are tracked for the item.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "inventory_items", - # "COLUMN_NAME": "trackedEditable", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "Whether the value of the tracked field for the inventory item can be changed.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "unitCost", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "Unit cost associated with the inventory item. Note: the user must have View product costs permission granted in order to access this field once product granular permissions are enabled.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "inventory_items", - "COLUMN_NAME": "updatedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the inventory item was updated.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "inventory_items", - # "COLUMN_NAME": "variant", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The variant that owns this inventory item.", - # "IS_NULLABLE": False - # } -] diff --git a/mindsdb/integrations/handlers/shopify_handler/models/marketing_events.py b/mindsdb/integrations/handlers/shopify_handler/models/marketing_events.py deleted file mode 100644 index 51a43a7d805..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/marketing_events.py +++ /dev/null @@ -1,149 +0,0 @@ -from .common import AliasesEnum - - -class MarketingEvents(AliasesEnum): - """A class to represent a Shopify GraphQL marketing event. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/MarketingEvent - Require `read_marketing_events` permission. - """ - - # app = "app" - channelHandle = "channelHandle" - description = "description" - endedAt = "endedAt" - id = "id" - # legacyResourceId = "legacyResourceId" - manageUrl = "manageUrl" - marketingChannelType = "marketingChannelType" - previewUrl = "previewUrl" - remoteId = "remoteId" - scheduledToEndAt = "scheduledToEndAt" - sourceAndMedium = "sourceAndMedium" - startedAt = "startedAt" - type = "type" - utmCampaign = "utmCampaign" - utmMedium = "utmMedium" - utmSource = "utmSource" - - -columns = [ - # { - # "TABLE_NAME": "marketing_events", - # "COLUMN_NAME": "app", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The app that the marketing event is attributed to.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "channelHandle", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The unique string identifier of the channel to which this activity belongs. For the correct handle for your channel, contact your partner manager.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "description", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A human-readable description of the marketing event.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "endedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the marketing event ended.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "marketing_events", - # "COLUMN_NAME": "legacyResourceId", - # "DATA_TYPE": "INT", - # "COLUMN_DESCRIPTION": "The ID of the corresponding resource in the REST Admin API.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "manageUrl", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The URL where the marketing event can be managed.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "marketingChannelType", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The medium through which the marketing activity and event reached consumers. This is used for reporting aggregation.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "previewUrl", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The URL where the marketing event can be previewed.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "remoteId", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "An optional ID that helps Shopify validate engagement data.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "scheduledToEndAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the marketing event is scheduled to end.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "sourceAndMedium", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "Where the MarketingEvent occurred and what kind of content was used. Because utmSource and utmMedium are often used interchangeably, this is based on a combination of marketingChannel, referringDomain, and type to provide a consistent representation for any given piece of marketing regardless of the app that created it.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "startedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the marketing event started.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "type", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The marketing event type.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "utmCampaign", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The name of the marketing campaign.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "utmMedium", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The medium that the marketing campaign is using. Example values: cpc, banner.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "marketing_events", - "COLUMN_NAME": "utmSource", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The referrer of the marketing event. Example values: google, newsletter.", - "IS_NULLABLE": None, - }, -] diff --git a/mindsdb/integrations/handlers/shopify_handler/models/orders.py b/mindsdb/integrations/handlers/shopify_handler/models/orders.py deleted file mode 100644 index b98c04551a5..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/orders.py +++ /dev/null @@ -1,1252 +0,0 @@ -from .common import AliasesEnum, Count, MailingAddress, OrderCancellation, MoneyBag -from .utils import Extract, DeepExtract - - -class Orders(AliasesEnum): - """A class to represent a Shopify GraphQL order. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/Order - Require `read_orders`, `read_marketplace_orders` or `read_quick_sale` permission. - """ - - # additionalFees = "additionalFees" - # agreements = "agreements" - # alerts = "alerts" - # app = "app" - # billingAddress = "billingAddress" - billingAddressMatchesShippingAddress = "billingAddressMatchesShippingAddress" - cancellation = OrderCancellation - cancelledAt = "cancelledAt" - cancelReason = "cancelReason" - canMarkAsPaid = "canMarkAsPaid" - canNotifyCustomer = "canNotifyCustomer" - capturable = "capturable" - # cartDiscountAmountSet = "cartDiscountAmountSet" - # channelInformation = "channelInformation" - clientIp = "clientIp" - closed = "closed" - closedAt = "closedAt" - confirmationNumber = "confirmationNumber" - confirmed = "confirmed" - createdAt = "createdAt" - currencyCode = "currencyCode" - # currentCartDiscountAmountSet = "currentCartDiscountAmountSet" - currentShippingPriceSet = MoneyBag - currentShippingPriceSet_presentmentMoney_amount = DeepExtract( - ["currentShippingPriceSet", "presentmentMoney", "amount"], "DECIMAL" - ) - currentShippingPriceSet_presentmentMoney_currencyCode = DeepExtract( - ["currentShippingPriceSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - currentShippingPriceSet_shopMoney_amount = DeepExtract( - ["currentShippingPriceSet", "shopMoney", "amount"], "DECIMAL" - ) - currentShippingPriceSet_shopMoney_currencyCode = DeepExtract( - ["currentShippingPriceSet", "shopMoney", "currencyCode"], "TEXT" - ) - currentSubtotalLineItemsQuantity = "currentSubtotalLineItemsQuantity" - currentSubtotalPriceSet = MoneyBag - currentSubtotalPriceSet_presentmentMoney_amount = DeepExtract( - ["currentSubtotalPriceSet", "presentmentMoney", "amount"], "DECIMAL" - ) - currentSubtotalPriceSet_presentmentMoney_currencyCode = DeepExtract( - ["currentSubtotalPriceSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - currentSubtotalPriceSet_shopMoney_amount = DeepExtract( - ["currentSubtotalPriceSet", "shopMoney", "amount"], "DECIMAL" - ) - currentSubtotalPriceSet_shopMoney_currencyCode = DeepExtract( - ["currentSubtotalPriceSet", "shopMoney", "currencyCode"], "TEXT" - ) - # currentTaxLines = "currentTaxLines" - currentTotalAdditionalFeesSet = MoneyBag - currentTotalAdditionalFeesSet_presentmentMoney_amount = DeepExtract( - ["currentTotalAdditionalFeesSet", "presentmentMoney", "amount"], "DECIMAL" - ) - currentTotalAdditionalFeesSet_presentmentMoney_currencyCode = DeepExtract( - ["currentTotalAdditionalFeesSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - currentTotalAdditionalFeesSet_shopMoney_amount = DeepExtract( - ["currentTotalAdditionalFeesSet", "shopMoney", "amount"], "DECIMAL" - ) - currentTotalAdditionalFeesSet_shopMoney_currencyCode = DeepExtract( - ["currentTotalAdditionalFeesSet", "shopMoney", "currencyCode"], "TEXT" - ) - currentTotalDiscountsSet = MoneyBag - currentTotalDiscountsSet_presentmentMoney_amount = DeepExtract( - ["currentTotalDiscountsSet", "presentmentMoney", "amount"], "DECIMAL" - ) - currentTotalDiscountsSet_presentmentMoney_currencyCode = DeepExtract( - ["currentTotalDiscountsSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - currentTotalDiscountsSet_shopMoney_amount = DeepExtract( - ["currentTotalDiscountsSet", "shopMoney", "amount"], "DECIMAL" - ) - currentTotalDiscountsSet_shopMoney_currencyCode = DeepExtract( - ["currentTotalDiscountsSet", "shopMoney", "currencyCode"], "TEXT" - ) - currentTotalDutiesSet = MoneyBag - currentTotalDutiesSet_presentmentMoney_amount = DeepExtract( - ["currentTotalDutiesSet", "presentmentMoney", "amount"], "DECIMAL" - ) - currentTotalDutiesSet_presentmentMoney_currencyCode = DeepExtract( - ["currentTotalDutiesSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - currentTotalDutiesSet_shopMoney_amount = DeepExtract(["currentTotalDutiesSet", "shopMoney", "amount"], "DECIMAL") - currentTotalDutiesSet_shopMoney_currencyCode = DeepExtract( - ["currentTotalDutiesSet", "shopMoney", "currencyCode"], "TEXT" - ) - currentTotalPriceSet = MoneyBag - currentTotalPriceSet_presentmentMoney_amount = DeepExtract( - ["currentTotalPriceSet", "presentmentMoney", "amount"], "DECIMAL" - ) - currentTotalPriceSet_presentmentMoney_currencyCode = DeepExtract( - ["currentTotalPriceSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - currentTotalPriceSet_shopMoney_amount = DeepExtract(["currentTotalPriceSet", "shopMoney", "amount"], "DECIMAL") - currentTotalPriceSet_shopMoney_currencyCode = DeepExtract( - ["currentTotalPriceSet", "shopMoney", "currencyCode"], "TEXT" - ) - currentTotalTaxSet = MoneyBag - currentTotalTaxSet_presentmentMoney_amount = DeepExtract( - ["currentTotalTaxSet", "presentmentMoney", "amount"], "DECIMAL" - ) - currentTotalTaxSet_presentmentMoney_currencyCode = DeepExtract( - ["currentTotalTaxSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - currentTotalTaxSet_shopMoney_amount = DeepExtract(["currentTotalTaxSet", "shopMoney", "amount"], "DECIMAL") - currentTotalTaxSet_shopMoney_currencyCode = DeepExtract(["currentTotalTaxSet", "shopMoney", "currencyCode"], "TEXT") - currentTotalWeight = "currentTotalWeight" - # customAttributes = "customAttributes" - # customer = "customer" - customerId = Extract("customer", "id") # custom - customerAcceptsMarketing = "customerAcceptsMarketing" - # customerJourneySummary = "customerJourneySummary" - customerLocale = "customerLocale" - # discountApplications = "discountApplications" - discountCode = "discountCode" - discountCodes = "discountCodes" - # displayAddress = "displayAddress" - # displayFinancialStatus = "displayFinancialStatus" - # displayFulfillmentStatus = "displayFulfillmentStatus" - # disputes = "disputes" - dutiesIncluded = "dutiesIncluded" - edited = "edited" - email = "email" - estimatedTaxes = "estimatedTaxes" - # events = "events" - fulfillable = "fulfillable" - # fulfillmentOrders = "fulfillmentOrders" - # fulfillments = "fulfillments" - fulfillmentsCount = Count - fullyPaid = "fullyPaid" - hasTimelineComment = "hasTimelineComment" - id = "id" - # legacyResourceId = "legacyResourceId" - # lineItems = "lineItems" - # localizedFields = "localizedFields" - # merchantBusinessEntity = "merchantBusinessEntity" - merchantEditable = "merchantEditable" - merchantEditableErrors = "merchantEditableErrors" - # merchantOfRecordApp = "merchantOfRecordApp" - # metafield = "metafield" - # metafields = "metafields" - name = "name" - netPaymentSet = MoneyBag - netPaymentSet_presentmentMoney_amount = DeepExtract(["netPaymentSet", "presentmentMoney", "amount"], "DECIMAL") - netPaymentSet_presentmentMoney_currencyCode = DeepExtract( - ["netPaymentSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - netPaymentSet_shopMoney_amount = DeepExtract(["netPaymentSet", "shopMoney", "amount"], "DECIMAL") - netPaymentSet_shopMoney_currencyCode = DeepExtract(["netPaymentSet", "shopMoney", "currencyCode"], "TEXT") - # nonFulfillableLineItems = "nonFulfillableLineItems" - note = "note" - number = "number" - originalTotalAdditionalFeesSet = MoneyBag - originalTotalAdditionalFeesSet_presentmentMoney_amount = DeepExtract( - ["originalTotalAdditionalFeesSet", "presentmentMoney", "amount"], "DECIMAL" - ) - originalTotalAdditionalFeesSet_presentmentMoney_currencyCode = DeepExtract( - ["originalTotalAdditionalFeesSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - originalTotalAdditionalFeesSet_shopMoney_amount = DeepExtract( - ["originalTotalAdditionalFeesSet", "shopMoney", "amount"], "DECIMAL" - ) - originalTotalAdditionalFeesSet_shopMoney_currencyCode = DeepExtract( - ["originalTotalAdditionalFeesSet", "shopMoney", "currencyCode"], "TEXT" - ) - originalTotalDutiesSet = MoneyBag - originalTotalDutiesSet_presentmentMoney_amount = DeepExtract( - ["originalTotalDutiesSet", "presentmentMoney", "amount"], "DECIMAL" - ) - originalTotalDutiesSet_presentmentMoney_currencyCode = DeepExtract( - ["originalTotalDutiesSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - originalTotalDutiesSet_shopMoney_amount = DeepExtract(["originalTotalDutiesSet", "shopMoney", "amount"], "DECIMAL") - originalTotalDutiesSet_shopMoney_currencyCode = DeepExtract( - ["originalTotalDutiesSet", "shopMoney", "currencyCode"], "TEXT" - ) - originalTotalPriceSet = MoneyBag - originalTotalPriceSet_presentmentMoney_amount = DeepExtract( - ["originalTotalPriceSet", "presentmentMoney", "amount"], "DECIMAL" - ) - originalTotalPriceSet_presentmentMoney_currencyCode = DeepExtract( - ["originalTotalPriceSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - originalTotalPriceSet_shopMoney_amount = DeepExtract(["originalTotalPriceSet", "shopMoney", "amount"], "DECIMAL") - originalTotalPriceSet_shopMoney_currencyCode = DeepExtract( - ["originalTotalPriceSet", "shopMoney", "currencyCode"], "TEXT" - ) - # paymentCollectionDetails = "paymentCollectionDetails" - paymentGatewayNames = "paymentGatewayNames" - # paymentTerms = "paymentTerms" - phone = "phone" - poNumber = "poNumber" - presentmentCurrencyCode = "presentmentCurrencyCode" - processedAt = "processedAt" - productNetwork = "productNetwork" - # publication = "publication" - # purchasingEntity = "purchasingEntity" - refundable = "refundable" - refundDiscrepancySet = MoneyBag - refundDiscrepancySet_presentmentMoney_amount = DeepExtract( - ["refundDiscrepancySet", "presentmentMoney", "amount"], "DECIMAL" - ) - refundDiscrepancySet_presentmentMoney_currencyCode = DeepExtract( - ["refundDiscrepancySet", "presentmentMoney", "currencyCode"], "TEXT" - ) - refundDiscrepancySet_shopMoney_amount = DeepExtract(["refundDiscrepancySet", "shopMoney", "amount"], "DECIMAL") - refundDiscrepancySet_shopMoney_currencyCode = DeepExtract( - ["refundDiscrepancySet", "shopMoney", "currencyCode"], "TEXT" - ) - # refunds = "refunds" - registeredSourceUrl = "registeredSourceUrl" - requiresShipping = "requiresShipping" - restockable = "restockable" - # retailLocation = "retailLocation" - # returns = "returns" - returnStatus = "returnStatus" - # risk = "risk" - shippingAddress = MailingAddress - # shippingLine = "shippingLine" - # shippingLines = "shippingLines" - # shopifyProtect = "shopifyProtect" - sourceIdentifier = "sourceIdentifier" - sourceName = "sourceName" - # staffMember = "staffMember" - statusPageUrl = "statusPageUrl" - subtotalLineItemsQuantity = "subtotalLineItemsQuantity" - subtotalPriceSet = MoneyBag - subtotalPriceSet_presentmentMoney_amount = DeepExtract( - ["subtotalPriceSet", "presentmentMoney", "amount"], "DECIMAL" - ) - subtotalPriceSet_presentmentMoney_currencyCode = DeepExtract( - ["subtotalPriceSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - subtotalPriceSet_shopMoney_amount = DeepExtract(["subtotalPriceSet", "shopMoney", "amount"], "DECIMAL") - subtotalPriceSet_shopMoney_currencyCode = DeepExtract(["subtotalPriceSet", "shopMoney", "currencyCode"], "TEXT") - # suggestedRefund = "suggestedRefund" - tags = "tags" - taxesIncluded = "taxesIncluded" - taxExempt = "taxExempt" - # taxLines = "taxLines" - test = "test" - totalCapturableSet = MoneyBag - totalCapturableSet_presentmentMoney_amount = DeepExtract( - ["totalCapturableSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalCapturableSet_presentmentMoney_currencyCode = DeepExtract( - ["totalCapturableSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalCapturableSet_shopMoney_amount = DeepExtract(["totalCapturableSet", "shopMoney", "amount"], "DECIMAL") - totalCapturableSet_shopMoney_currencyCode = DeepExtract(["totalCapturableSet", "shopMoney", "currencyCode"], "TEXT") - # totalCashRoundingAdjustment = "totalCashRoundingAdjustment" - totalDiscountsSet = MoneyBag - totalDiscountsSet_presentmentMoney_amount = DeepExtract( - ["totalDiscountsSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalDiscountsSet_presentmentMoney_currencyCode = DeepExtract( - ["totalDiscountsSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalDiscountsSet_shopMoney_amount = DeepExtract(["totalDiscountsSet", "shopMoney", "amount"], "DECIMAL") - totalDiscountsSet_shopMoney_currencyCode = DeepExtract(["totalDiscountsSet", "shopMoney", "currencyCode"], "TEXT") - totalOutstandingSet = MoneyBag - totalOutstandingSet_presentmentMoney_amount = DeepExtract( - ["totalOutstandingSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalOutstandingSet_presentmentMoney_currencyCode = DeepExtract( - ["totalOutstandingSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalOutstandingSet_shopMoney_amount = DeepExtract(["totalOutstandingSet", "shopMoney", "amount"], "DECIMAL") - totalOutstandingSet_shopMoney_currencyCode = DeepExtract( - ["totalOutstandingSet", "shopMoney", "currencyCode"], "TEXT" - ) - totalPriceSet = MoneyBag - totalPriceSet_presentmentMoney_amount = DeepExtract(["totalPriceSet", "presentmentMoney", "amount"], "DECIMAL") - totalPriceSet_presentmentMoney_currencyCode = DeepExtract( - ["totalPriceSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalPriceSet_shopMoney_amount = DeepExtract(["totalPriceSet", "shopMoney", "amount"], "DECIMAL") - totalPriceSet_shopMoney_currencyCode = DeepExtract(["totalPriceSet", "shopMoney", "currencyCode"], "TEXT") - totalReceivedSet = MoneyBag - totalReceivedSet_presentmentMoney_amount = DeepExtract( - ["totalReceivedSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalReceivedSet_presentmentMoney_currencyCode = DeepExtract( - ["totalReceivedSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalReceivedSet_shopMoney_amount = DeepExtract(["totalReceivedSet", "shopMoney", "amount"], "DECIMAL") - totalReceivedSet_shopMoney_currencyCode = DeepExtract(["totalReceivedSet", "shopMoney", "currencyCode"], "TEXT") - totalRefundedSet = MoneyBag - totalRefundedSet_presentmentMoney_amount = DeepExtract( - ["totalRefundedSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalRefundedSet_presentmentMoney_currencyCode = DeepExtract( - ["totalRefundedSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalRefundedSet_shopMoney_amount = DeepExtract(["totalRefundedSet", "shopMoney", "amount"], "DECIMAL") - totalRefundedSet_shopMoney_currencyCode = DeepExtract(["totalRefundedSet", "shopMoney", "currencyCode"], "TEXT") - totalRefundedShippingSet = MoneyBag - totalRefundedShippingSet_presentmentMoney_amount = DeepExtract( - ["totalRefundedShippingSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalRefundedShippingSet_presentmentMoney_currencyCode = DeepExtract( - ["totalRefundedShippingSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalRefundedShippingSet_shopMoney_amount = DeepExtract( - ["totalRefundedShippingSet", "shopMoney", "amount"], "DECIMAL" - ) - totalRefundedShippingSet_shopMoney_currencyCode = DeepExtract( - ["totalRefundedShippingSet", "shopMoney", "currencyCode"], "TEXT" - ) - totalShippingPriceSet = MoneyBag - totalShippingPriceSet_presentmentMoney_amount = DeepExtract( - ["totalShippingPriceSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalShippingPriceSet_presentmentMoney_currencyCode = DeepExtract( - ["totalShippingPriceSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalShippingPriceSet_shopMoney_amount = DeepExtract(["totalShippingPriceSet", "shopMoney", "amount"], "DECIMAL") - totalShippingPriceSet_shopMoney_currencyCode = DeepExtract( - ["totalShippingPriceSet", "shopMoney", "currencyCode"], "TEXT" - ) - totalTaxSet = MoneyBag - totalTaxSet_presentmentMoney_amount = DeepExtract(["totalTaxSet", "presentmentMoney", "amount"], "DECIMAL") - totalTaxSet_presentmentMoney_currencyCode = DeepExtract(["totalTaxSet", "presentmentMoney", "currencyCode"], "TEXT") - totalTaxSet_shopMoney_amount = DeepExtract(["totalTaxSet", "shopMoney", "amount"], "DECIMAL") - totalTaxSet_shopMoney_currencyCode = DeepExtract(["totalTaxSet", "shopMoney", "currencyCode"], "TEXT") - totalTipReceivedSet = MoneyBag - totalTipReceivedSet_presentmentMoney_amount = DeepExtract( - ["totalTipReceivedSet", "presentmentMoney", "amount"], "DECIMAL" - ) - totalTipReceivedSet_presentmentMoney_currencyCode = DeepExtract( - ["totalTipReceivedSet", "presentmentMoney", "currencyCode"], "TEXT" - ) - totalTipReceivedSet_shopMoney_amount = DeepExtract(["totalTipReceivedSet", "shopMoney", "amount"], "DECIMAL") - totalTipReceivedSet_shopMoney_currencyCode = DeepExtract( - ["totalTipReceivedSet", "shopMoney", "currencyCode"], "TEXT" - ) - totalWeight = "totalWeight" - # transactions = "transactions" - transactionsCount = Count - unpaid = "unpaid" - updatedAt = "updatedAt" - - -columns = [ - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "additionalFees", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of additional fees applied to an order, such as duties, import fees, or tax lines.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "agreements", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of sales agreements associated with the order, such as contracts defining payment terms, or delivery schedules between merchants and customers.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "alerts", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of messages that appear on the Orders page in the Shopify admin. These alerts provide merchants with important information about an order's status or required actions.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "app", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The application that created the order. For example, Online Store, Point of Sale, or a custom app name. Use this to identify the order source for attribution and fulfillment workflows.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "billingAddress", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The billing address associated with the payment method selected by the customer for an order. Returns null if no billing address was provided during checkout.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "billingAddressMatchesShippingAddress", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the billing address matches the shipping address. Returns true if both addresses are the same, and false if they're different or if an address is missing.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "cancellation", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "Details of an order's cancellation, if it has been canceled. This includes the reason, date, and any staff notes.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "cancelledAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time in ISO 8601 format when an order was canceled. Returns null if the order hasn't been canceled.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "cancelReason", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The reason provided for an order cancellation. For example, a merchant might cancel an order if there's insufficient inventory. Returns null if the order hasn't been canceled.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "canMarkAsPaid", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether an order can be manually marked as paid. Returns false if the order is already paid, is canceled, has pending Shopify Payments transactions, or has a negative payment amount.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "canNotifyCustomer", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether order notifications can be sent to the customer. Returns true if the customer has a valid email address.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "capturable", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether an authorized payment for an order can be captured. Returns true if an authorized payment exists that hasn't been fully captured yet.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "cartDiscountAmountSet", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The total discount amount applied at the time the order was created, displayed in both shop and presentment currencies, before returns, refunds, order edits, and cancellations. This field only includes discounts applied to the entire order.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "channelInformation", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "Details about the sales channel that created the order, such as the channel app type and channel name, which helps to track order sources.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "clientIp", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The IP address of the customer who placed the order. Useful for fraud detection and geographic analysis.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "closed", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether an order is closed. An order is considered closed if all its line items have been fulfilled or canceled, and all financial transactions are complete.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "closedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time ISO 8601 format when an order was closed. Shopify automatically records this timestamp when all items have been fulfilled or canceled, and all financial transactions are complete. Returns null if the order isn't closed.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "confirmationNumber", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A customer-facing order identifier, often shown instead of the sequential order name. It uses a random alphanumeric format (for example, XPAV284CT) and isn't guaranteed to be unique across orders.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "confirmed", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether inventory has been reserved for an order. Returns true if inventory quantities for an order's line items have been reserved.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "createdAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time in ISO 8601 format when an order was created. This timestamp is set when the customer completes checkout and remains unchanged throughout an order's lifecycle.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currencyCode", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The shop currency when the order was placed. For example, USD or CAD.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "currentCartDiscountAmountSet", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The current total of all discounts applied to the entire order, after returns, refunds, order edits, and cancellations. This includes discount codes, automatic discounts, and other promotions that affect the whole order rather than individual line items.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentShippingPriceSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The current shipping price after applying refunds and discounts. If the parent order.taxesIncluded field is true, then this price includes taxes. Otherwise, this field is the pre-tax price.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentSubtotalLineItemsQuantity", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The current sum of the quantities for all line items that contribute to the order's subtotal price, after returns, refunds, order edits, and cancellations.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentSubtotalPriceSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total price of the order, after returns and refunds, in shop and presentment currencies. This includes taxes and discounts.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "currentTaxLines", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of all tax lines applied to line items on the order, after returns. Tax line prices represent the total price for all tax lines with the same rate and title.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentTotalAdditionalFeesSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The current total of all additional fees for an order, after any returns or modifications. Modifications include returns, refunds, order edits, and cancellations. Additional fees can include charges such as duties, import fees, and special handling.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentTotalDiscountsSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount discounted on the order after returns and refunds, in shop and presentment currencies. This includes both order and line level discounts.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentTotalDutiesSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The current total duties amount for an order, after any returns or modifications. Modifications include returns, refunds, order edits, and cancellations.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentTotalPriceSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total price of the order, after returns, in shop and presentment currencies. This includes taxes and discounts.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentTotalTaxSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The sum of the prices of all tax lines applied to line items on the order, after returns and refunds, in shop and presentment currencies.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "currentTotalWeight", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The total weight of the order after returns and refunds, in grams.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "customAttributes", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of additional information that has been attached to the order. For example, gift message, delivery instructions, or internal notes.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "customer", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The customer who placed an order. Returns null if an order was created through a checkout without customer authentication, such as a guest checkout.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "customerId", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "ID of the customer.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "customerAcceptsMarketing", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the customer agreed to receive marketing emails at the time of purchase. Use this to ensure compliance with marketing consent laws and to segment customers for email campaigns.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "customerJourneySummary", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The customer's visits and interactions with the online store before placing the order. Use this to understand customer behavior, attribution sources, and marketing effectiveness to optimize your sales funnel.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "customerLocale", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The customer's language and region preference at the time of purchase. For example, en for English, fr-CA for French (Canada), or es-MX for Spanish (Mexico). Use this to provide localized customer service and targeted marketing in the customer's preferred language.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "discountApplications", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of discounts that are applied to the order, excluding order edits and refunds. Includes discount codes, automatic discounts, and other promotions that reduce the order total.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "discountCode", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The discount code used for an order. Returns null if no discount code was applied.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "discountCodes", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The discount codes used for the order. Multiple codes can be applied to a single order.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "displayAddress", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The primary address of the customer, prioritizing shipping address over billing address when both are available. Returns null if neither shipping address nor billing address was provided.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "displayFinancialStatus", - # "DATA_TYPE": "TEXT", - # "COLUMN_DESCRIPTION": "An order's financial status for display in the Shopify admin.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "displayFulfillmentStatus", - # "DATA_TYPE": "TEXT", - # "COLUMN_DESCRIPTION": "The order's fulfillment status that displays in the Shopify admin to merchants. For example, an order might be unfulfilled or scheduled.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "disputes", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of payment disputes associated with the order, such as chargebacks or payment inquiries. Disputes occur when customers challenge transactions with their bank or payment provider.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "dutiesIncluded", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether duties are included in the subtotal price of the order. Duties are import taxes charged by customs authorities when goods cross international borders.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "edited", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the order has had any edits applied. For example, adding or removing line items, updating quantities, or changing prices.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "email", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The email address associated with the customer for this order. Used for sending order confirmations, shipping notifications, and other order-related communications. Returns null if no email address was provided during checkout.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "estimatedTaxes", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether taxes on the order are estimated. This field returns false when taxes on the order are finalized and aren't subject to any changes.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "events", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of events associated with the order. Events track significant changes and activities related to the order, such as creation, payment, fulfillment, and cancellation.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "fulfillable", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether there are line items that can be fulfilled. This field returns false when the order has no fulfillable line items.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "fulfillmentOrders", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of fulfillment orders for an order. Each fulfillment order groups line items that are fulfilled together, allowing an order to be processed in parts if needed.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "fulfillments", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of shipments for the order. Fulfillments represent the physical shipment of products to customers.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "fulfillmentsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total number of fulfillments for the order, including canceled ones.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "fullyPaid", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the order has been paid in full. This field returns true when the total amount received equals or exceeds the order total.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "hasTimelineComment", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the merchant has added a timeline comment to the order.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "legacyResourceId", - # "DATA_TYPE": "INT", - # "COLUMN_DESCRIPTION": "The ID of the corresponding resource in the REST Admin API.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "lineItems", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of the order's line items. Line items represent the individual products and quantities that make up the order.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "localizedFields", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "List of localized fields for the resource.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "merchantBusinessEntity", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The legal business structure that the merchant operates under for this order, such as an LLC, corporation, or partnership. Used for tax reporting, legal compliance, and determining which business entity is responsible for the order.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "merchantEditable", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the order can be edited by the merchant. Returns false for orders that can't be modified, such as canceled orders or orders with specific payment statuses.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "merchantEditableErrors", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A list of reasons why the order can't be edited. For example, canceled orders can't be edited.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "merchantOfRecordApp", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The application acting as the Merchant of Record for the order. The Merchant of Record is responsible for tax collection and remittance.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "metafield", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A custom field, including its namespace and key, that's associated with a Shopify resource for the purposes of adding and storing additional information.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "metafields", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of custom fields that a merchant associates with a Shopify resource.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "name", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The unique identifier for the order that appears on the order page in the Shopify admin and the Order status page. For example, #1001, EN1001, or 1001-A. This value isn't unique across multiple stores.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "netPaymentSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The net payment for the order, based on the total amount received minus the total amount refunded, in shop and presentment currencies.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "nonFulfillableLineItems", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of line items that can't be fulfilled. For example, tips and fully refunded line items can't be fulfilled.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "note", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The note associated with the order. Contains additional information or instructions added by merchants or customers during the order process. Commonly used for special delivery instructions, gift messages, or internal processing notes.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "number", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The order number used to generate the name using the store's configured order number prefix/suffix. This number isn't guaranteed to follow a consecutive integer sequence, nor is it guaranteed to be unique across multiple stores, or even for a single store.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "originalTotalAdditionalFeesSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount of all additional fees, such as import fees or taxes, that were applied when an order was created. Returns null if additional fees aren't applicable.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "originalTotalDutiesSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount of duties calculated when an order was created, before any modifications. Modifications include returns, refunds, order edits, and cancellations.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "originalTotalPriceSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total price of the order at the time of order creation, in shop and presentment currencies. Use this to compare the original order value against the current total after edits, returns, or refunds.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "paymentCollectionDetails", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The payment collection details for the order, including payment status, outstanding amounts, and collection information. Use this to understand when and how payments should be collected, especially for orders with deferred or installment payment terms.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "paymentGatewayNames", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A list of the names of all payment gateways used for the order. For example, Shopify Payments and Cash on Delivery (COD).", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "paymentTerms", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The payment terms associated with the order, such as net payment due dates or early payment discounts. Payment terms define when and how an order should be paid. Returns null if no specific payment terms were set for the order.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "phone", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The phone number associated with the customer for this order. Useful for contacting customers about shipping updates, delivery notifications, or order issues. Returns null if no phone number was provided during checkout.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "poNumber", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The purchase order (PO) number that's associated with an order. This is typically provided by business customers who require a PO number for their procurement.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "presentmentCurrencyCode", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The currency used by the customer when placing the order. For example, USD, EUR, or CAD. This may differ from the shop's base currency when serving international customers or using multi-currency pricing.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "processedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time in ISO 8601 format when the order was processed. This date and time might not match the date and time when the order was created.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "productNetwork", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the customer also purchased items from other stores in the network.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "publication", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The sales channel that the order was created from, such as the Online Store or Shopify POS.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "purchasingEntity", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The business entity that placed the order, including company details and purchasing relationships. Used for B2B transactions to track which company or organization is responsible for the purchase and payment terms.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "refundable", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the order can be refunded based on its payment transactions. Returns false for orders with no eligible payment transactions, such as fully refunded orders or orders with non-refundable payment methods.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "refundDiscrepancySet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The difference between the suggested and actual refund amount of all refunds that have been applied to the order. A positive value indicates a difference in the merchant's favor, and a negative value indicates a difference in the customer's favor.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "refunds", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of refunds that have been applied to the order. Refunds represent money returned to customers for returned items, cancellations, or adjustments.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "registeredSourceUrl", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The URL of the source that the order originated from, if found in the domain registry. Returns null if the source URL isn't in the domain registry.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "requiresShipping", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the order requires physical shipping to the customer. Returns false for digital-only orders (such as gift cards or downloadable products) and true for orders with physical products that need delivery.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "restockable", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether any line items on the order can be restocked into inventory. Returns false for digital products, custom items, or items that can't be resold.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "retailLocation", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The physical location where a retail order is created or completed, except for draft POS orders completed using the mark as paid flow in the Shopify admin, which return null. Transactions associated with the order might have been processed at a different location.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "returns", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The returns associated with the order. Contains information about items that customers have requested to return, including return reasons, status, and refund details.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "returnStatus", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The order's aggregated return status for display purposes. Indicates the overall state of returns for the order, helping merchants track and manage the return process.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "risk", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The risk assessment summary for the order. Provides fraud analysis and risk scoring to help you identify potentially fraudulent orders.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "shippingAddress", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The shipping address where the order will be delivered. Contains the customer's delivery location for fulfillment and shipping label generation. Returns null for digital orders or orders that don't require shipping.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "shippingLine", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A summary of all shipping costs on the order. Aggregates shipping charges, discounts, and taxes to provide a single view of delivery costs.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "shippingLines", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The shipping methods applied to the order. Each shipping line represents a shipping option chosen during checkout, including the carrier, service level, and cost.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "shopifyProtect", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The Shopify Protect details for the order, including fraud protection status and coverage information. Shopify Protect helps protect eligible orders against fraudulent chargebacks. Returns null if Shopify Protect is disabled for the shop or the order isn't eligible for protection.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "sourceIdentifier", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A unique POS or third party order identifier. For example, 1234-12-1000 or 111-98567-54.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "sourceName", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The name of the source associated with the order, such as web, mobile_app, or pos. Use this field to identify the platform where the order was placed.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "staffMember", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The staff member who created or is responsible for the order. Useful for tracking which team member handled phone orders, manual orders, or order modifications. Returns null for orders created directly by customers through the online store.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "statusPageUrl", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The URL where customers can check their order's current status, including tracking information and delivery updates.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "subtotalLineItemsQuantity", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The sum of quantities for all line items that contribute to the order's subtotal price. This excludes quantities for items like tips, shipping costs, or gift cards that don't affect the subtotal.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "subtotalPriceSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The sum of the prices for all line items after discounts and before returns, in shop and presentment currencies. If taxesIncluded is true, then the subtotal also includes tax.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "suggestedRefund", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A calculated refund suggestion for the order based on specified line items, shipping, and duties. Use this to preview refund amounts, taxes, and processing fees before creating an actual refund.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "tags", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A comma separated list of tags associated with the order. Updating tags overwrites any existing tags that were previously added to the order.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "taxesIncluded", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether taxes are included in the subtotal price of the order. When true, the subtotal and line item prices include tax amounts. When false, taxes are calculated and displayed separately.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "taxExempt", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether taxes are exempt on the order. Returns true for orders where the customer or business has a valid tax exemption, such as non-profit organizations or tax-free purchases.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "taxLines", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of all tax lines applied to line items on the order, before returns. Tax line prices represent the total price for all tax lines with the same rate and title.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "test", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the order is a test. Test orders are made using the Shopify Bogus Gateway or a payment provider with test mode enabled. A test order can't be converted into a real order and vice versa.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalCapturableSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The authorized amount that's uncaptured or undercaptured, in shop and presentment currencies. This amount isn't adjusted for returns.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "totalCashRoundingAdjustment", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The total rounding adjustment applied to payments or refunds for an order involving cash payments. Applies to some countries where cash transactions are rounded to the nearest currency denomination.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalDiscountsSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount discounted on the order before returns, in shop and presentment currencies. This includes both order and line level discounts.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalOutstandingSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount not yet transacted for the order, in shop and presentment currencies. A positive value indicates a difference in the merchant's favor (payment from customer to merchant) and a negative value indicates a difference in the customer's favor (refund from merchant to customer).", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalPriceSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total price of the order, before returns, in shop and presentment currencies. This includes taxes and discounts.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalReceivedSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount received from the customer before returns, in shop and presentment currencies.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalRefundedSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount that was refunded, in shop and presentment currencies.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalRefundedShippingSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total amount of shipping that was refunded, in shop and presentment currencies.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalShippingPriceSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total shipping costs returned to the customer, in shop and presentment currencies. This includes fees and any related discounts that were refunded.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalTaxSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total tax amount before returns, in shop and presentment currencies.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalTipReceivedSet", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The sum of all tip amounts for the order, in shop and presentment currencies.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "totalWeight", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The total weight of the order before returns, in grams.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "orders", - # "COLUMN_NAME": "transactions", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of transactions associated with the order.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "transactionsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The number of transactions associated with the order.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "unpaid", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether no payments have been made for the order.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "orders", - "COLUMN_NAME": "updatedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time in ISO 8601 format when the order was last modified.", - "IS_NULLABLE": False, - }, -] - -# region add fields flattened from JSON -for field_name, field in Orders.aliases(): - if isinstance(field, DeepExtract): - columns.append( - { - "TABLE_NAME": "orders", - "COLUMN_NAME": field_name, - "DATA_TYPE": field.mysql_data_type, - "COLUMN_DESCRIPTION": field.description, - "IS_NULLABLE": None, - } - ) -# endregion diff --git a/mindsdb/integrations/handlers/shopify_handler/models/product_variants.py b/mindsdb/integrations/handlers/shopify_handler/models/product_variants.py deleted file mode 100644 index ddfa0ab467d..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/product_variants.py +++ /dev/null @@ -1,307 +0,0 @@ -from .common import ( - AliasesEnum, - Count, - MoneyV2, -) -from .utils import Extract - - -class ProductVariants(AliasesEnum): - """A class to represent a Shopify GraphQL product variant. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/ProductVariant - Require `read_products` permission. - """ - - availableForSale = "availableForSale" - barcode = "barcode" - compareAtPrice = "compareAtPrice" - # contextualPricing = "contextualPricing" - createdAt = "createdAt" - # defaultCursor = "defaultCursor" - # deliveryProfile = "deliveryProfile" - displayName = "displayName" - # events = "events" - id = "id" - # inventoryItem = "inventoryItem" - # inventoryPolicy = "inventoryPolicy" - inventoryQuantity = "inventoryQuantity" - # legacyResourceId = "legacyResourceId" - # media = "media" - # metafield = "metafield" - # metafields = "metafields" - position = "position" - price = "price" - # product = "product" - productId = Extract("product", "id") # Custom - # productParents = "productParents" - # productVariantComponents = "productVariantComponents" - requiresComponents = "requiresComponents" - # selectedOptions = "selectedOptions" - sellableOnlineQuantity = "sellableOnlineQuantity" - # sellingPlanGroups = "sellingPlanGroups" - sellingPlanGroupsCount = Count - showUnitPrice = "showUnitPrice" - sku = "sku" - taxable = "taxable" - title = "title" - # translations = "translations" - unitPrice = MoneyV2 - # unitPriceMeasurement = "unitPriceMeasurement" - updatedAt = "updatedAt" - - -columns = [ - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "availableForSale", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether the product variant is available for sale.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "barcode", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The value of the barcode associated with the product.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "compareAtPrice", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The compare-at price of the variant in the default shop currency.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "contextualPricing", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The pricing that applies for a customer in a given context. As of API version 2025-04, only active markets are considered in the price resolution.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "createdAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time when the variant was created.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "defaultCursor", - # "DATA_TYPE": "TEXT", - # "COLUMN_DESCRIPTION": "A default cursor that returns the single next record, sorted ascending by ID.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "deliveryProfile", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The delivery profile for the variant.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "displayName", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "Display name of the variant, based on product's title + variant's title.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "events", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The paginated list of events associated with the host subject.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "inventoryItem", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The inventory item, which is used to query for inventory information.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "inventoryPolicy", - # "DATA_TYPE": "TEXT", - # "COLUMN_DESCRIPTION": "Whether customers are allowed to place an order for the product variant when it's out of stock.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "inventoryQuantity", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The total sellable quantity of the variant.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "legacyResourceId", - # "DATA_TYPE": "INT", - # "COLUMN_DESCRIPTION": "The ID of the corresponding resource in the REST Admin API.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "media", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The media associated with the product variant.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "metafield", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A custom field, including its namespace and key, that's associated with a Shopify resource for the purposes of adding and storing additional information.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "metafields", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of custom fields that a merchant associates with a Shopify resource.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "position", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The order of the product variant in the list of product variants. The first position in the list is 1.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "price", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The price of the product variant in the default shop currency.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "product", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The product that this variant belongs to.", - # "IS_NULLABLE": False - # }, - { - # Custom, extracted from "product" - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "productId", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "ID of the product that this variant belongs to.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "productParents", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of products that have product variants that contain this variant as a product component.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "productVariantComponents", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of the product variant components.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "requiresComponents", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether a product variant requires components. The default value is false. If true, then the product variant can only be purchased as a parent bundle with components and it will be omitted from channels that don't support bundles.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "selectedOptions", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "List of product options applied to the variant.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "sellableOnlineQuantity", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The total sellable quantity of the variant for online channels. This doesn't represent the total available inventory or capture limitations based on customer location.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "sellingPlanGroups", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of all selling plan groups defined in the current shop associated with the product variant.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "sellingPlanGroupsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "Count of selling plan groups associated with the product variant.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "showUnitPrice", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether to show the unit price for this product variant.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "sku", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A case-sensitive identifier for the product variant in the shop. Required in order to connect to a fulfillment service.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "taxable", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether a tax is charged when the product variant is sold.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "title", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The title of the product variant.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "translations", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The published translations associated with the resource.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "unitPrice", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The unit price value for the variant based on the variant measurement.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "product_variants", - # "COLUMN_NAME": "unitPriceMeasurement", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The unit price measurement for the variant.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "product_variants", - "COLUMN_NAME": "updatedAt", - "DATA_TYPE": "TIMESTAMP", - "COLUMN_DESCRIPTION": "The date and time (ISO 8601 format) when the product variant was last modified.", - "IS_NULLABLE": False, - }, -] diff --git a/mindsdb/integrations/handlers/shopify_handler/models/products.py b/mindsdb/integrations/handlers/shopify_handler/models/products.py deleted file mode 100644 index 2cb95c69ce3..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/products.py +++ /dev/null @@ -1,510 +0,0 @@ -from .common import ( - AliasesEnum, - Count, - TaxonomyCategory, - CollectionConnection, - ProductCompareAtPriceRange, - ProductPriceRangeV2, - SEO, -) -from .utils import Nodes - - -class Products(AliasesEnum): - """A class to represent a Shopify GraphQL product. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/Product - Require `read_products` permission. - """ - - availablePublicationsCount = Count - # bundleComponents - category = TaxonomyCategory - collections = Nodes(CollectionConnection) - # combinedListing = CombinedListing - combinedListingRole = "combinedListingRole" - compareAtPriceRange = ProductCompareAtPriceRange - # contextualPricing = ProductContextualPricing # require context - createdAt = "createdAt" - # defaultCursor = "defaultCursor" - description = "description" - descriptionHtml = "descriptionHtml" - # events = Nodes(EventConnection) - # featuredMedia = Media - # feedback = ResourceFeedback - giftCardTemplateSuffix = "giftCardTemplateSuffix" - handle = "handle" - hasOnlyDefaultVariant = "hasOnlyDefaultVariant" - hasOutOfStockVariants = "hasOutOfStockVariants" - hasVariantsThatRequiresComponents = "hasVariantsThatRequiresComponents" - id = "id" - # inCollection = "inCollection" # require id - isGiftCard = "isGiftCard" - legacyResourceId = "legacyResourceId" - # media = Nodes(MediaConnection) - mediaCount = Count - # metafield = Metafield # require key - # metafields = Nodes(MetafieldConnection) - onlineStorePreviewUrl = "onlineStorePreviewUrl" - onlineStoreUrl = "onlineStoreUrl" - # options = List(ProductOption) - priceRangeV2 = ProductPriceRangeV2 - # productComponents = Nodes(ProductComponentType) - productComponentsCount = Count - # productParents = Nodes(Product) - productType = "productType" - publishedAt = "publishedAt" - # # publishedInContext = "publishedInContext" # require context - # publishedOnCurrentPublication = "publishedOnCurrentPublication" # raise error if publication not exists - # publishedOnPublication = "publishedOnPublication" # require id - requiresSellingPlan = "requiresSellingPlan" - # resourcePublicationOnCurrentPublication = ResourcePublicationV2 - # resourcePublications = Nodes(ResourcePublication) - resourcePublicationsCount = Count - # resourcePublicationsV2 = Nodes(ResourcePublicationV2) - # restrictedForResource = RestrictedForResource(calculatedOrderIdcalculatedOrderId=!required) - # sellingPlanGroups = Nodes(SellingPlanGroupConnection) - sellingPlanGroupsCount = Count - seo = SEO - status = "status" - tags = "tags" - templateSuffix = "templateSuffix" - title = "title" - totalInventory = "totalInventory" - tracksInventory = "tracksInventory" - # translations = Translation(locale=!required) - # unpublishedPublications = Nodes(Publication) - updatedAt = "updatedAt" - # variants = Nodes(ProductVariant) - variantsCount = Count - vendor = "vendor" - - -columns = [ - { - "TABLE_NAME": "products", - "COLUMN_NAME": "availablePublicationsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The number of publications that a resource is published to, without feedback errors.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "bundleComponents", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of components that are associated with a product in a bundle.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "category", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The category of a product from Shopify's Standard Product Taxonomy.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "collections", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A list of collections that include the product.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "combinedListing", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A special product type that combines separate products from a store into a single product listing. Combined listings are connected by a shared option, such as color, model, or dimension.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "combinedListingRole", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The role of the product in a combined listing. If null, then the product isn't part of any combined listing.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "compareAtPriceRange", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The compare-at price range of the product in the shop's default currency.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "contextualPricing", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The pricing that applies to a customer in a specific context. For example, a price might vary depending on the customer's location. Only active markets are considered in the price resolution.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "createdAt", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The date and time when the product was created.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "defaultCursor", - # "DATA_TYPE": "TEXT", - # "COLUMN_DESCRIPTION": "A default cursor that returns the single next record, sorted ascending by ID.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "description", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A single-line description of the product, with HTML tags removed.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "descriptionHtml", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The description of the product, with HTML tags. For example, the description might include bold and italic text.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "events", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The paginated list of events associated with the host subject.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "featuredMedia", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The featured media associated with the product.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "feedback", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The information that lets merchants know what steps they need to take to make sure that the app is set up correctly. For example, if a merchant hasn't set up a product correctly in the app, then the feedback might include a message that says You need to add a price to this product.", - # "IS_NULLABLE": None - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "giftCardTemplateSuffix", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The theme template that's used when customers view the gift card in a store.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "handle", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A unique, human-readable string of the product's title. A handle can contain letters, hyphens, and numbers, but no spaces. The handle is used in the online store URL for the product.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "hasOnlyDefaultVariant", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the product has only a single variant with the default option and value.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "hasOutOfStockVariants", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the product has variants that are out of stock.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "hasVariantsThatRequiresComponents", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether at least one of the product variants requires bundle components. Learn more about store eligibility for bundles.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "inCollection", - # "DATA_TYPE": "BOOLEAN", - # "COLUMN_DESCRIPTION": "Whether the product is in a specified collection.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "isGiftCard", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the product is a gift card.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "legacyResourceId", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The ID of the corresponding resource in the REST Admin API.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "media", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The media associated with the product. Valid media are images, 3D models, videos.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "mediaCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The total count of media that's associated with a product.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "metafield", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A custom field, including its namespace and key, that's associated with a Shopify resource for the purposes of adding and storing additional information.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "metafields", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of custom fields that a merchant associates with a Shopify resource.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "onlineStorePreviewUrl", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The preview URL for the online store.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "onlineStoreUrl", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The product's URL on the online store. If null, then the product isn't published to the online store sales channel.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "options", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of product options. The limit is defined by the shop's resource limits for product options.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "priceRangeV2", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The minimum and maximum prices of a product, expressed in decimal numbers. For example, if the product is priced between $10.00 and $50.00, then the price range is $10.00 - $50.00.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "productComponents", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of products that contain at least one variant associated with at least one of the current products' variants via group relationship.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "productComponentsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A count of unique products that contain at least one variant associated with at least one of the current products' variants via group relationship.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "productParents", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of products that has a variant that contains any of this product's variants as a component.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "productType", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The product type that merchants define.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "publishedAt", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The date and time when the product was published to the online store.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "publishedInContext", - # "DATA_TYPE": "BOOLEAN", - # "COLUMN_DESCRIPTION": "Whether the product is published for a customer only in a specified context. For example, a product might be published for a customer only in a specific location.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "publishedOnCurrentPublication", - # "DATA_TYPE": "BOOLEAN", - # "COLUMN_DESCRIPTION": "Whether the resource is published to the app's publication. For example, the resource might be published to the app's online store channel.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "publishedOnPublication", - # "DATA_TYPE": "BOOLEAN", - # "COLUMN_DESCRIPTION": "Whether the resource is published to a specified publication.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "requiresSellingPlan", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether the product can only be purchased with a selling plan. Products that are sold on subscription can be updated only for online stores. If you update a product to be subscription-only, then the product is unpublished from all channels, except the online store.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "resourcePublicationOnCurrentPublication", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The resource that's either published or staged to be published to the publication.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "resourcePublications", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The list of resources that are published to a publication.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "resourcePublicationsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The number of publications that a resource is published to, without feedback errors.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "resourcePublicationsV2", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The list of resources that are either published or staged to be published to a publication.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "restrictedForResource", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "Whether the merchant can make changes to the product when they edit the order associated with the product. For example, a merchant might be restricted from changing product details when they edit an order.", - # "IS_NULLABLE": None - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "sellingPlanGroups", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of all selling plan groups that are associated with the product either directly, or through the product's variants.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "sellingPlanGroupsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A count of selling plan groups that are associated with the product.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "seo", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The SEO title and description that are associated with a product.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "status", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The product status, which controls visibility across all sales channels.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "tags", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "A comma-separated list of searchable keywords that are associated with the product. For example, a merchant might apply the sports and summer tags to products that are associated with sportwear for summer. Updating tags overwrites any existing tags that were previously added to the product. To add new tags without overwriting existing tags, use the tagsAdd mutation.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "templateSuffix", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The theme template that's used when customers view the product in a store.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "title", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The name for the product that displays to customers. The title is used to construct the product's handle. For example, if a product is titled Black Sunglasses, then the handle is black-sunglasses.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "totalInventory", - "DATA_TYPE": "INT", - "COLUMN_DESCRIPTION": "The quantity of inventory that's in stock.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "tracksInventory", - "DATA_TYPE": "BOOLEAN", - "COLUMN_DESCRIPTION": "Whether inventory tracking has been enabled for the product.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "translations", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The published translations associated with the resource.", - # "IS_NULLABLE": False - # }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "unpublishedPublications", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The list of publications that the resource isn't published to.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "updatedAt", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The date and time when the product was last modified. A product's updatedAt value can change for different reasons. For example, if an order is placed for a product that has inventory tracking set up, then the inventory adjustment is counted as an update.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "products", - # "COLUMN_NAME": "variants", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "A list of variants associated with the product. If querying a single product at the root, you can fetch up to 2048 variants.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "variantsCount", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The number of variants that are associated with the product.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "vendor", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The name of the product's vendor.", - "IS_NULLABLE": False, - }, -] diff --git a/mindsdb/integrations/handlers/shopify_handler/models/staff_members.py b/mindsdb/integrations/handlers/shopify_handler/models/staff_members.py deleted file mode 100644 index 22a1ead1901..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/staff_members.py +++ /dev/null @@ -1,125 +0,0 @@ -from .common import AliasesEnum - - -class StaffMembers(AliasesEnum): - """A class to represent a Shopify GraphQL staff member. - Reference: https://shopify.dev/docs/api/admin-graphql/latest/objects/StaffMember - Require `read_users` permission. Also the app must be a finance embedded app or installed on a Shopify Plus or Advanced store. - """ - - accountType = "accountType" - active = "active" - # avatar = "avatar" - email = "email" - exists = "exists" - firstName = "firstName" - id = "id" - initials = "initials" - isShopOwner = "isShopOwner" - lastName = "lastName" - locale = "locale" - name = "name" - phone = "phone" - # privateData = "privateData" - - -columns = [ - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "accountType", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The type of account the staff member has.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "active", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether the staff member is active.", - "IS_NULLABLE": False, - }, - # { - # "TABLE_NAME": "staff_members", - # "COLUMN_NAME": "avatar", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The image used as the staff member's avatar in the Shopify admin.", - # "IS_NULLABLE": False - # }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "email", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The staff member's email address.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "exists", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether the staff member's account exists.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "firstName", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The staff member's first name.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "id", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "A globally-unique ID.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "initials", - "DATA_TYPE": "JSON", - "COLUMN_DESCRIPTION": "The staff member's initials, if available.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "isShopOwner", - "DATA_TYPE": "BOOL", - "COLUMN_DESCRIPTION": "Whether the staff member is the shop owner.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "lastName", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The staff member's last name.", - "IS_NULLABLE": None, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "locale", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The staff member's preferred locale. Locale values use the format language or language-COUNTRY, where language is a two-letter language code, and COUNTRY is a two-letter country code. For example: en or en-US", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "name", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The staff member's full name.", - "IS_NULLABLE": False, - }, - { - "TABLE_NAME": "staff_members", - "COLUMN_NAME": "phone", - "DATA_TYPE": "TEXT", - "COLUMN_DESCRIPTION": "The staff member's phone number.", - "IS_NULLABLE": None, - }, - # { - # "TABLE_NAME": "staff_members", - # "COLUMN_NAME": "privateData", - # "DATA_TYPE": "JSON", - # "COLUMN_DESCRIPTION": "The data used to customize the Shopify admin experience for the staff member.", - # "IS_NULLABLE": False - # } -] diff --git a/mindsdb/integrations/handlers/shopify_handler/models/utils.py b/mindsdb/integrations/handlers/shopify_handler/models/utils.py deleted file mode 100644 index 6eba8ae3721..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/models/utils.py +++ /dev/null @@ -1,25 +0,0 @@ -from enum import Enum - - -class Nodes: - def __init__(self, enum: Enum) -> None: - self.enum = enum - - -class Extract: - def __init__(self, obj, key: str): - self.obj = obj - self.key = key - - -class DeepExtract: - def __init__(self, path: list[str], mysql_data_type: str, description: str = None): - if len(path) < 2: - raise ValueError(f"Minimum length of path for DeepExtract is 2: {path}") - self.path = path - self.mysql_data_type = mysql_data_type - if description is None: - path_str = path[0] + "".join(f'["{p}"]' for p in path[1:]) - self.description = f"Value is extracted from {path_str}" - else: - self.description = description diff --git a/mindsdb/integrations/handlers/shopify_handler/requirements.txt b/mindsdb/integrations/handlers/shopify_handler/requirements.txt deleted file mode 100644 index 0bbd28f33a7..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -ShopifyAPI diff --git a/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py b/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py deleted file mode 100644 index 98bc6602f22..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +++ /dev/null @@ -1,183 +0,0 @@ -import json - -import shopify -import requests -import pandas as pd - -from mindsdb.integrations.handlers.shopify_handler.shopify_tables import ( - ProductsTable, - ProductVariantsTable, - CustomersTable, - OrdersTable, - MarketingEventsTable, - InventoryItemsTable, - StaffMembersTable, - GiftCardsTable, -) -from mindsdb.integrations.libs.api_handler import MetaAPIHandler -from mindsdb.integrations.libs.passthrough import PassthroughMixin -from mindsdb.integrations.libs.passthrough_types import PassthroughRequest -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - HandlerResponse as Response, - RESPONSE_TYPE, -) - -from mindsdb.utilities import log -from mindsdb.integrations.libs.api_handler_exceptions import ( - InvalidNativeQuery, - ConnectionFailed, - MissingConnectionParams, -) - -from .connection_args import connection_args - -logger = log.getLogger(__name__) - - -class ShopifyHandler(MetaAPIHandler, PassthroughMixin): - """ - The Shopify handler implementation. - """ - - name = "shopify" - - # REST passthrough configuration. Shopify sends the Admin API token in - # `X-Shopify-Access-Token`, not `Authorization: Bearer`, so we override - # the default auth header. v1 requires the caller to pre-supply the - # access token in connection_data — the existing client_id/client_secret - # OAuth dance runs inside `connect()` and isn't surfaced to the mixin. - _bearer_token_arg = "access_token" - _auth_header_name = "X-Shopify-Access-Token" - _auth_header_format = "{token}" - _auth_mode = "custom" - _base_url_default = None - # Version-less path — Shopify redirects this to the current stable - # Admin API version, so the probe survives quarterly API releases. - _test_request = PassthroughRequest(method="GET", path="/admin/shop.json") - - def _build_base_url(self) -> str | None: - data = self._get_connection_data() - shop = data.get("shop_url") - if not shop: - return None - shop = str(shop) - if not shop.startswith(("http://", "https://")): - shop = f"https://{shop}" - return shop.rstrip("/") - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of particular handler instance - **kwargs: arbitrary keyword arguments. - """ - super().__init__(name) - - if kwargs.get("connection_data") is None: - raise MissingConnectionParams("Incomplete parameters passed to Shopify Handler") - - connection_data = kwargs.get("connection_data", {}) - - required_args = [arg_name for arg_name, arg_meta in connection_args.items() if arg_meta.get("required") is True] - missed_args = set(required_args) - set(connection_data) - if missed_args: - raise MissingConnectionParams( - f"Required parameters are not found in the connection data: {', '.join(list(missed_args))}" - ) - - self.connection_data = connection_data - self.kwargs = kwargs - - self.connection = None - self.is_connected = False - - self._register_table("products", ProductsTable(self)) - self._register_table("customers", CustomersTable(self)) - self._register_table("orders", OrdersTable(self)) - self._register_table("product_variants", ProductVariantsTable(self)) - self._register_table("marketing_events", MarketingEventsTable(self)) - self._register_table("inventory_items", InventoryItemsTable(self)) - self._register_table("staff_members", StaffMembersTable(self)) - self._register_table("gift_cards", GiftCardsTable(self)) - - def connect(self): - """ - Set up the connection required by the handler. - Returns - ------- - StatusResponse - connection object - """ - if self.is_connected is True: - return self.connection - - shop_url = self.connection_data["shop_url"] - client_id = self.connection_data["client_id"] - client_secret = self.connection_data["client_secret"] - - response = requests.post( - f"https://{shop_url}/admin/oauth/access_token", - data={"grant_type": "client_credentials", "client_id": client_id, "client_secret": client_secret}, - headers={"Content-Type": "application/x-www-form-urlencoded"}, - timeout=10, - ) - response.raise_for_status() - result = response.json() - access_token = result.get("access_token") - if not access_token: - raise ConnectionFailed("Unable to get an access token") - - api_session = shopify.Session(shop_url, "2025-10", access_token) - - self.connection = api_session - self.is_connected = True - - return self.connection - - def check_connection(self) -> StatusResponse: - """ - Check connection to the handler. - Returns: - HandlerStatusResponse - """ - - response = StatusResponse(False) - - try: - api_session = self.connect() - shopify.ShopifyResource.activate_session(api_session) - shopify.Shop.current() - response.success = True - except Exception as e: - logger.error("Error connecting to Shopify!") - response.error_message = str(e) - - self.is_connected = response.success - - return response - - def native_query(self, query: str) -> Response: - """process a raw query - - Args: - query (str): query in a native format (graphql) - Returns: - Response: The query result. - """ - api_session = self.connect() - shopify.ShopifyResource.activate_session(api_session) - try: - result = shopify.GraphQL().execute(query) - except Exception as e: - raise InvalidNativeQuery(f"An error occurred when executing the query: {e}") - - try: - result = json.loads(result) - data = result.get("data") - df = pd.DataFrame(data) - except Exception as e: - raise InvalidNativeQuery(f"An error occurred when parsing the query result into a DataFrame: {e}") - - return Response(RESPONSE_TYPE.TABLE, data_frame=df) diff --git a/mindsdb/integrations/handlers/shopify_handler/shopify_tables.py b/mindsdb/integrations/handlers/shopify_handler/shopify_tables.py deleted file mode 100644 index 0bcb960cd1f..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +++ /dev/null @@ -1,755 +0,0 @@ -import json -from typing import List, Dict - -import shopify -import pandas as pd - -from mindsdb.integrations.libs.api_handler import MetaAPIResource -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, SortColumn -from mindsdb.utilities import log - -from .utils import query_graphql_nodes, get_graphql_columns, _format_error -from .models.products import Products, columns as products_columns -from .models.product_variants import ProductVariants, columns as product_variants_columns -from .models.customers import Customers, columns as customers_columns -from .models.orders import Orders, columns as orders_columns -from .models.marketing_events import MarketingEvents, columns as marketing_events_columns -from .models.inventory_items import InventoryItems, columns as inventory_items_columns -from .models.staff_members import StaffMembers, columns as staff_members_columns -from .models.gift_cards import GiftCards, columns as gift_cards_columns - -logger = log.getLogger(__name__) - - -class ShopifyMetaAPIResource(MetaAPIResource): - """A class to represent a Shopify Meta API resource.""" - - def list( - self, - conditions: list[FilterCondition] | None = None, - limit: int | None = None, - sort: list[SortColumn] | None = None, - targets: list[str] | None = None, - **kwargs, - ): - """Query the Shopify API to get the resources data. - - Args: - conditions: The conditions to apply to the query. - limit: The limit of the resources to return. - sort: The sort to apply to the query. - targets: The columns to include in the query. - - Returns: - pd.DataFrame: The data of the resources. - """ - sort_key, sort_reverse = self._get_sort(sort) - query_conditions = self._get_query_conditions(conditions) - - api_session = self.handler.connect() - shopify.ShopifyResource.activate_session(api_session) - - # region Validate that all requested target fields exist in the table schema - if isinstance(targets, list): - lower_names = [el[0].lower() for el in self.model.aliases()] - missed_targets = [t for t in targets if t.lower() not in lower_names] - if len(missed_targets) > 0: - raise ValueError( - f"The specified fields were not found in the table schema: {', '.join(missed_targets)}" - ) - # endregion - - columns = get_graphql_columns(self.model, targets) - data = query_graphql_nodes( - self.model_name, - self.model, - columns, - sort_key=sort_key, - sort_reverse=sort_reverse, - query=query_conditions, - limit=limit, - ) - - if len(data) == 0: - df_columns = targets - if targets is None or len(targets) == 0: - df_columns = [column.name for column in self.model] - products_df = pd.DataFrame(data, columns=df_columns) - else: - products_df = pd.DataFrame(data) - - return products_df - - def _get_sort(self, sort: List[SortColumn] | None) -> tuple[str, bool]: - """Get the sort key and reverse from the sort list. - - Args: - sort: The sort list. - - Returns: - tuple[str, bool]: The sort key and reverse flag. - """ - sort_key = None - sort_reverse = None - sort_map = self.sort_map or {} - if sort: - order_by = sort[0].column.lower() - asc = sort[0].ascending - if order_by not in sort_map: - logger.info( - f"Used unsupported column for order by: {order_by}, available columns are: {list(self.sort_map.keys())}." - ) - return None, None - - sort_key = sort_map[order_by] - sort_reverse = not asc - sort[0].applied = True - return sort_key, sort_reverse - - def _get_query_conditions(self, conditions: List[FilterCondition] | None) -> str: - """Get the GraphQL query conditions from the conditions list. - - Args: - conditions: The conditions list. - - Returns: - str: The query conditions. - """ - query_conditions = [] - conditions_op_map = self.conditions_op_map or {} - for condition in conditions or []: - op = condition.op - column = condition.column.lower() - mapped_op = conditions_op_map.get((column, op)) - if mapped_op: - value = condition.value - if isinstance(value, list): - value = ",".join(value) - elif isinstance(value, bool): - value = f"{value}".lower() - query_conditions.append(f"{mapped_op}{value}") - condition.applied = True - query_conditions = " AND ".join(query_conditions) - return query_conditions - - def get_columns(self) -> List[str]: - """Get the table columns names. - - Returns: - List[str]: The columns names. - """ - return [column["COLUMN_NAME"] for column in self.columns] - - def meta_get_columns(self, *args, **kwargs) -> List[dict]: - """Get the table columns metadata. - - Returns: - List[dict]: The columns metadata. - """ - return self.columns - - def query_graphql(self, query: str) -> dict: - """Query the GraphQL API. - - Args: - query: The GraphQL query to execute. - - Returns: - dict: The result of the GraphQL query. - """ - api_session = self.handler.connect() - shopify.ShopifyResource.activate_session(api_session) - result = shopify.GraphQL().execute(query) - result = json.loads(result) - if "errors" in result: - raise Exception(_format_error(result["errors"])) - return result - - -class ProductsTable(ShopifyMetaAPIResource): - """The Shopify Products Table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/products - """ - - def __init__(self, *args, **kwargs): - self.name = "products" - self.model = Products - self.model_name = "products" - self.columns = products_columns - - sort_map = { - Products.createdAt: "CREATED_AT", - Products.id: "ID", - Products.totalInventory: "INVENTORY_TOTAL", - Products.productType: "PRODUCT_TYPE", - Products.publishedAt: "PUBLISHED_AT", - Products.title: "TITLE", - Products.updatedAt: "UPDATED_AT", - Products.vendor: "VENDOR", - } - self.sort_map = {key.name.lower(): value for key, value in sort_map.items()} - - self.conditions_op_map = { - ("createdat", FilterOperator.GREATER_THAN): "created_at:>", - ("createdat", FilterOperator.GREATER_THAN_OR_EQUAL): "created_at:>=", - ("createdat", FilterOperator.LESS_THAN): "created_at:<", - ("createdat", FilterOperator.LESS_THAN_OR_EQUAL): "created_at:<=", - ("createdat", FilterOperator.EQUAL): "created_at:", - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - ("isgiftcard", FilterOperator.EQUAL): "gift_card:", - ("handle", FilterOperator.EQUAL): "handle:", - ("handle", FilterOperator.IN): "handle:", - ("totalinventory", FilterOperator.EQUAL): "inventory_total:", - ("producttype", FilterOperator.EQUAL): "product_type:", - ("producttype", FilterOperator.IN): "product_type:", - ("publishedat", FilterOperator.GREATER_THAN): "published_at:>", - ("publishedat", FilterOperator.GREATER_THAN_OR_EQUAL): "published_at:>=", - ("publishedat", FilterOperator.LESS_THAN): "published_at:<", - ("publishedat", FilterOperator.LESS_THAN_OR_EQUAL): "published_at:<=", - ("publishedat", FilterOperator.EQUAL): "published_at:", - ("status", FilterOperator.EQUAL): "status:", - ("status", FilterOperator.IN): "status:", - ("title", FilterOperator.EQUAL): "title:", - ("updatedat", FilterOperator.GREATER_THAN): "updated_at:>", - ("updatedat", FilterOperator.GREATER_THAN_OR_EQUAL): "updated_at:>=", - ("updatedat", FilterOperator.LESS_THAN): "updated_at:<", - ("updatedat", FilterOperator.LESS_THAN_OR_EQUAL): "updated_at:<=", - ("updatedat", FilterOperator.EQUAL): "updated_at:", - ("vendor", FilterOperator.EQUAL): "vendor:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - response = self.query_graphql("""{ - productsCount(limit:null) { - count - } }""") - row_count = response["data"]["productsCount"]["count"] - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "List of products. Products are the items that merchants can sell in their store.", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> list[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: list[str]) -> list[Dict]: - return [] - - -class ProductVariantsTable(ShopifyMetaAPIResource): - """The Shopify Product Variants Table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/productvariants - """ - - def __init__(self, *args, **kwargs): - self.name = "product_variants" - self.model = ProductVariants - self.model_name = "productVariants" - self.columns = product_variants_columns - - sort_map = { - ProductVariants.id: "ID", - ProductVariants.inventoryQuantity: "INVENTORY_QUANTITY", - ProductVariants.displayName: "NAME", - ProductVariants.position: "POSITION", - ProductVariants.sku: "SKU", - ProductVariants.title: "TITLE", - } - self.sort_map = {key.name.lower(): value for key, value in sort_map.items()} - - self.conditions_op_map = { - ("barcode", FilterOperator.EQUAL): "barcode:", - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - ("inventoryquantity", FilterOperator.GREATER_THAN): "inventoryquantity:>", - ("inventoryquantity", FilterOperator.GREATER_THAN_OR_EQUAL): "inventoryquantity:>=", - ("inventoryquantity", FilterOperator.LESS_THAN): "inventoryquantity:<", - ("inventoryquantity", FilterOperator.LESS_THAN_OR_EQUAL): "inventoryquantity:<=", - ("inventoryquantity", FilterOperator.EQUAL): "inventoryquantity:", - ("productid", FilterOperator.GREATER_THAN): "product_id:>", - ("productid", FilterOperator.GREATER_THAN_OR_EQUAL): "product_id:>=", - ("productid", FilterOperator.LESS_THAN): "product_id:<", - ("productid", FilterOperator.LESS_THAN_OR_EQUAL): "product_id:<=", - ("productid", FilterOperator.EQUAL): "product_id:", - ("productid", FilterOperator.IN): "toproduct_ids:", - ("sku", FilterOperator.EQUAL): "sku:", - ("title", FilterOperator.EQUAL): "title:", - ("updatedat", FilterOperator.GREATER_THAN): "updated_at:>", - ("updatedat", FilterOperator.GREATER_THAN_OR_EQUAL): "updated_at:>=", - ("updatedat", FilterOperator.LESS_THAN): "updated_at:<", - ("updatedat", FilterOperator.LESS_THAN_OR_EQUAL): "updated_at:<=", - ("updatedat", FilterOperator.EQUAL): "updated_at:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - response = self.query_graphql("""{ - productVariantsCount(limit:null) { - count - } }""") - row_count = response["data"]["productVariantsCount"]["count"] - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "List of product variants. A product variant is a specific version of a product that comes in more than one option, such as size or color.", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> list[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: list[str]) -> list[Dict]: - return [ - { - "PARENT_TABLE_NAME": table_name, - "PARENT_COLUMN_NAME": "productId", - "CHILD_TABLE_NAME": "products", - "CHILD_COLUMN_NAME": "id", - } - ] - - -class CustomersTable(ShopifyMetaAPIResource): - """The Shopify Customers Table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/customers - """ - - def __init__(self, *args, **kwargs): - self.name = "customers" - self.model = Customers - self.model_name = "customers" - self.columns = customers_columns - - sort_map = { - Customers.createdAt: "CREATED_AT", - Customers.id: "ID", - Customers.updatedAt: "UPDATED_AT", - } - self.sort_map = {key.name.lower(): value for key, value in sort_map.items()} - - self.conditions_op_map = { - ("country", FilterOperator.EQUAL): "country:", - ("createdat", FilterOperator.GREATER_THAN): "customer_date:>", - ("createdat", FilterOperator.GREATER_THAN_OR_EQUAL): "customer_date:>=", - ("createdat", FilterOperator.LESS_THAN): "customer_date:<", - ("createdat", FilterOperator.LESS_THAN_OR_EQUAL): "customer_date:<=", - ("createdat", FilterOperator.EQUAL): "customer_date:", - ("email", FilterOperator.EQUAL): "email:", - ("firstname", FilterOperator.EQUAL): "first_name:", - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - ("lastname", FilterOperator.EQUAL): "last_name:", - ("phonenumber", FilterOperator.EQUAL): "phone:", - ("updatedat", FilterOperator.GREATER_THAN): "updated_at:>", - ("updatedat", FilterOperator.GREATER_THAN_OR_EQUAL): "updated_at:>=", - ("updatedat", FilterOperator.LESS_THAN): "updated_at:<", - ("updatedat", FilterOperator.LESS_THAN_OR_EQUAL): "updated_at:<=", - ("updatedat", FilterOperator.EQUAL): "updated_at:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - response = self.query_graphql("""{ - customersCount(limit:null) { - count - } }""") - row_count = response["data"]["customersCount"]["count"] - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "List of customers in your Shopify store, including key information such as name, email, location, and purchase history", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> List[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[Dict]: - return [] - - -class OrdersTable(ShopifyMetaAPIResource): - """The Shopify Orders Table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/orders - """ - - def __init__(self, *args, **kwargs): - self.name = "orders" - self.model = Orders - self.model_name = "orders" - self.columns = orders_columns - - sort_map = { - Orders.createdAt: "CREATED_AT", - Orders.id: "ID", - Orders.number: "ORDER_NUMBER", - Orders.poNumber: "PO_NUMBER", - Orders.processedAt: "PROCESSED_AT", - Orders.updatedAt: "UPDATED_AT", - } - self.sort_map = {key.name.lower(): value for key, value in sort_map.items()} - - self.conditions_op_map = { - ("confirmationnumber", FilterOperator.EQUAL): "confirmation_number:", - ("createdat", FilterOperator.GREATER_THAN): "created_at:>", - ("createdat", FilterOperator.GREATER_THAN_OR_EQUAL): "created_at:>=", - ("createdat", FilterOperator.LESS_THAN): "created_at:<", - ("createdat", FilterOperator.LESS_THAN_OR_EQUAL): "created_at:<=", - ("createdat", FilterOperator.EQUAL): "created_at:", - ("customerid", FilterOperator.EQUAL): "customer_id:", - ("discountcode", FilterOperator.EQUAL): "discount_code:", - ("email", FilterOperator.EQUAL): "email:", - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - ("name", FilterOperator.EQUAL): "name:", - ("ponumber", FilterOperator.EQUAL): "po_number:", - ("processedat", FilterOperator.GREATER_THAN): "processed_at:>", - ("processedat", FilterOperator.GREATER_THAN_OR_EQUAL): "processed_at:>=", - ("processedat", FilterOperator.LESS_THAN): "processed_at:<", - ("processedat", FilterOperator.LESS_THAN_OR_EQUAL): "processed_at:<=", - ("processedat", FilterOperator.EQUAL): "processed_at:", - ("returnstatus", FilterOperator.EQUAL): "return_status:", - ("sourceidentifier", FilterOperator.EQUAL): "source_identifier:", - ("sourcename", FilterOperator.EQUAL): "source_name:", - ("test", FilterOperator.EQUAL): "test:", - ("totalweight", FilterOperator.GREATER_THAN): "total_weight:>", - ("totalweight", FilterOperator.GREATER_THAN_OR_EQUAL): "total_weight:>=", - ("totalweight", FilterOperator.LESS_THAN): "total_weight:<", - ("totalweight", FilterOperator.LESS_THAN_OR_EQUAL): "total_weight:<=", - ("totalweight", FilterOperator.EQUAL): "total_weight:", - ("updatedat", FilterOperator.GREATER_THAN): "updated_at:>", - ("updatedat", FilterOperator.GREATER_THAN_OR_EQUAL): "updated_at:>=", - ("updatedat", FilterOperator.LESS_THAN): "updated_at:<", - ("updatedat", FilterOperator.LESS_THAN_OR_EQUAL): "updated_at:<=", - ("updatedat", FilterOperator.EQUAL): "updated_at:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - response = self.query_graphql("""{ - ordersCount(limit:null) { - count - } }""") - row_count = response["data"]["ordersCount"]["count"] - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "List of orders placed in the store, including data such as order status, customer, and line item details.", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> List[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[Dict]: - return [ - { - "PARENT_TABLE_NAME": table_name, - "PARENT_COLUMN_NAME": "customerId", - "CHILD_TABLE_NAME": "customers", - "CHILD_COLUMN_NAME": "id", - } - ] - - -class MarketingEventsTable(ShopifyMetaAPIResource): - """The Shopify MarketingEvents table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/marketingevents - """ - - def __init__(self, *args, **kwargs): - self.name = "marketing_events" - self.model = MarketingEvents - self.model_name = "marketingEvents" - self.columns = marketing_events_columns - - sort_map = { - MarketingEvents.id: "ID", - MarketingEvents.startedAt: "STARTED_AT", - } - self.sort_map = {key.name.lower(): value for key, value in sort_map.items()} - - self.conditions_op_map = { - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - ("startedat", FilterOperator.GREATER_THAN): "started_at:>", - ("startedat", FilterOperator.GREATER_THAN_OR_EQUAL): "started_at:>=", - ("startedat", FilterOperator.LESS_THAN): "started_at:<", - ("startedat", FilterOperator.LESS_THAN_OR_EQUAL): "started_at:<=", - ("startedat", FilterOperator.EQUAL): "started_at:", - ("description", FilterOperator.EQUAL): "description:", - ("description", FilterOperator.LIKE): "description:", - ("type", FilterOperator.EQUAL): "type:", - ("type", FilterOperator.LIKE): "type:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - data = query_graphql_nodes( - self.model_name, - self.model, - "id", - ) - row_count = len(data) - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "A list of marketing events.", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> List[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[Dict]: - return [] - - -class InventoryItemsTable(ShopifyMetaAPIResource): - """The Shopify InventoryItems table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/inventoryitems - """ - - def __init__(self, *args, **kwargs): - self.name = "inventory_items" - self.model = InventoryItems - self.model_name = "inventoryItems" - self.columns = inventory_items_columns - - self.sort_map = {} - - self.conditions_op_map = { - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - ("createdat", FilterOperator.GREATER_THAN): "created_at:>", - ("createdat", FilterOperator.GREATER_THAN_OR_EQUAL): "created_at:>=", - ("createdat", FilterOperator.LESS_THAN): "created_at:<", - ("createdat", FilterOperator.LESS_THAN_OR_EQUAL): "created_at:<=", - ("createdat", FilterOperator.EQUAL): "created_at:", - ("sku", FilterOperator.EQUAL): "sku:", - ("updatedat", FilterOperator.GREATER_THAN): "updated_at:>", - ("updatedat", FilterOperator.GREATER_THAN_OR_EQUAL): "updated_at:>=", - ("updatedat", FilterOperator.LESS_THAN): "updated_at:<", - ("updatedat", FilterOperator.LESS_THAN_OR_EQUAL): "updated_at:<=", - ("updatedat", FilterOperator.EQUAL): "updated_at:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - data = query_graphql_nodes( - self.model_name, - self.model, - "id", - ) - row_count = len(data) - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "A list of inventory items.", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> List[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[Dict]: - return [] - - -class StaffMembersTable(ShopifyMetaAPIResource): - """The Shopify StaffMembers table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/staffmembers - """ - - def __init__(self, *args, **kwargs): - self.name = "staff_members" - self.model = StaffMembers - self.model_name = "staffMembers" - self.columns = staff_members_columns - - sort_map = { - StaffMembers.id: "ID", - StaffMembers.email: "EMAIL", - StaffMembers.firstName: "FIRST_NAME", - StaffMembers.lastName: "LAST_NAME", - } - self.sort_map = {key.name.lower(): value for key, value in sort_map.items()} - - self.conditions_op_map = { - ("accounttype", FilterOperator.EQUAL): "account_type:", - ("email", FilterOperator.EQUAL): "email:", - ("firstname", FilterOperator.EQUAL): "first_name:", - ("firstname", FilterOperator.LIKE): "first_name:", - ("lastname", FilterOperator.EQUAL): "last_name:", - ("lastname", FilterOperator.LIKE): "last_name:", - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - data = query_graphql_nodes( - self.model_name, - self.model, - "id", - ) - row_count = len(data) - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "The shop staff members.", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> List[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[Dict]: - return [] - - -class GiftCardsTable(ShopifyMetaAPIResource): - """The Shopify GiftCards table implementation - Reference: https://shopify.dev/docs/api/admin-graphql/latest/queries/giftcards - """ - - def __init__(self, *args, **kwargs): - self.name = "gift_cards" - self.model = GiftCards - self.model_name = "giftCards" - self.columns = gift_cards_columns - - sort_map = { - GiftCards.balance: "BALANCE", - GiftCards.createdAt: "CREATED_AT", - GiftCards.deactivatedAt: "DISABLED_AT", - GiftCards.expiresOn: "EXPIRES_ON", - GiftCards.id: "ID", - GiftCards.initialValue: "INITIAL_VALUE", - GiftCards.updatedAt: "UPDATED_AT", - } - self.sort_map = {key.name.lower(): value for key, value in sort_map.items()} - - self.conditions_op_map = { - ("createdat", FilterOperator.GREATER_THAN): "created_at:>", - ("createdat", FilterOperator.GREATER_THAN_OR_EQUAL): "created_at:>=", - ("createdat", FilterOperator.LESS_THAN): "created_at:<", - ("createdat", FilterOperator.LESS_THAN_OR_EQUAL): "created_at:<=", - ("createdat", FilterOperator.EQUAL): "created_at:", - ("expireson", FilterOperator.GREATER_THAN): "expires_on:>", - ("expireson", FilterOperator.GREATER_THAN_OR_EQUAL): "expires_on:>=", - ("expireson", FilterOperator.LESS_THAN): "expires_on:<", - ("expireson", FilterOperator.LESS_THAN_OR_EQUAL): "expires_on:<=", - ("expireson", FilterOperator.EQUAL): "expires_on:", - ("id", FilterOperator.GREATER_THAN): "id:>", - ("id", FilterOperator.GREATER_THAN_OR_EQUAL): "id:>=", - ("id", FilterOperator.LESS_THAN): "id:<", - ("id", FilterOperator.LESS_THAN_OR_EQUAL): "id:<=", - ("id", FilterOperator.EQUAL): "id:", - } - super().__init__(*args, **kwargs) - - def meta_get_tables(self, *args, **kwargs) -> dict: - response = self.query_graphql("""{ - giftCardsCount(limit:null) { - count - } }""") - row_count = response["data"]["giftCardsCount"]["count"] - - return { - "table_name": self.name, - "table_type": "BASE TABLE", - "table_description": "List of gift cards.", - "row_count": row_count, - } - - def meta_get_primary_keys(self, table_name: str) -> List[Dict]: - return [ - { - "table_name": table_name, - "column_name": "id", - } - ] - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str]) -> List[Dict]: - return [ - { - "PARENT_TABLE_NAME": table_name, - "PARENT_COLUMN_NAME": "customerId", - "CHILD_TABLE_NAME": "customers", - "CHILD_COLUMN_NAME": "id", - }, - { - "PARENT_TABLE_NAME": table_name, - "PARENT_COLUMN_NAME": "orderId", - "CHILD_TABLE_NAME": "orders", - "CHILD_COLUMN_NAME": "id", - }, - ] diff --git a/mindsdb/integrations/handlers/shopify_handler/tests/__init__.py b/mindsdb/integrations/handlers/shopify_handler/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/handlers/shopify_handler/utils.py b/mindsdb/integrations/handlers/shopify_handler/utils.py deleted file mode 100644 index e58e782af7d..00000000000 --- a/mindsdb/integrations/handlers/shopify_handler/utils.py +++ /dev/null @@ -1,225 +0,0 @@ -import json -import inspect -from enum import Enum -from dataclasses import dataclass - -import shopify - -from mindsdb.utilities import log - -from .models.utils import Nodes, Extract, DeepExtract -from .models.common import AliasesEnum - -logger = log.getLogger(__name__) - -MAX_PAGE_LIMIT = 250 -PAGE_INFO = "pageInfo { hasNextPage endCursor }" - - -def _format_error(errors_list: list[dict]) -> str: - """Format shopify's GraphQL error list into a single string. - - Args: - errors_list: The list of errors. - - Returns: - str: The formatted error string. - """ - errors_text = [record.get("message", "undescribed") for record in errors_list] - if len(errors_list) == 0: - errors_text = errors_text[0] - return f"An error occurred when executing the query: {errors_text}" - errors_text = "\n".join(errors_text) - return f"Error occurred when executing the query:\n{errors_text}" - - -def _build_nested_graphql(path: list[str]) -> str: - """Build a nested GraphQL query from a path. - - Args: - path: The path to build the query from (e.g., ["totalPriceSet", "presentmentMoney", "amount"]). - - Returns: - str: The nested GraphQL query (e.g., "totalPriceSet { presentmentMoney { amount } }"). - """ - if len(path) == 1: - return path[0] - return f"{path[0]} {{ {_build_nested_graphql(path[1:])} }}" - - -def get_graphql_columns(root: AliasesEnum, targets: list[str] | None = None) -> str: - """Get the GraphQL columns for a given object. - - Args: - root: The object to get the GraphQL columns for. - targets: The list of columns to include in the query. - - Returns: - str: The GraphQL columns string. - """ - acc = [] - if targets: - targets = [name.lower() for name in targets] - for name, value in root.aliases(): - if targets and name.lower() not in targets: - continue - if isinstance(value, Nodes): - sub_fields = get_graphql_columns(value.enum) - acc.append(f"{name}(first: {MAX_PAGE_LIMIT}) {{ nodes {{{sub_fields}}} {PAGE_INFO} }}") - elif isinstance(value, Extract): - acc.append(f"{name}:{value.obj} {{ {value.key} }}") - elif isinstance(value, DeepExtract): - nested_query = _build_nested_graphql(value.path) - acc.append(f"{name}:{nested_query}") - elif inspect.isclass(value) and issubclass(value, Enum): - sub_fields = get_graphql_columns(value) - acc.append(f"{name} {{{sub_fields}}}") - else: - acc.append(value) - return " ".join(acc) - - -@dataclass(slots=True, kw_only=True) -class ShopifyQuery: - """A class to represent a Shopify GraphQL query. - - Args: - operation_name: The name of the operation to execute. - columns: The columns to include in the query. - limit: The limit of the query. - cursor: The cursor to use for pagination. - sort_key: The key to use for sorting. - reverse: Whether to reverse the sort. - query: The query to execute. - """ - - operation_name: str - columns: str - limit: int | None = None - cursor: str | None = None - sort_key: str | None = None - reverse: bool = False - query: str | None = None - - def to_string(self) -> str: - """Convert the query to a string. - - Returns: - str: The string representation of the query. - """ - items = [f"first: {self.limit or MAX_PAGE_LIMIT}"] - if self.cursor: - items.append(f'after: "{self.cursor}"') - if self.sort_key: - items.append(f"sortKey: {self.sort_key}, reverse: {'true' if self.reverse else 'false'}") - if self.query: - items.append(f'query: "{self.query}"') - return f"{{ {self.operation_name} ({', '.join(items)}) {{ nodes {{ {self.columns} }} {PAGE_INFO} }} }}" - - def execute(self) -> list[dict]: - """Execute the query. - - Returns: - list[dict]: The result of the query. - """ - result = shopify.GraphQL().execute(self.to_string()) - return json.loads(result) - - -def query_graphql_nodes( - root_name: str, - root_class: type, - columns: str, - cursor: str | None = None, - limit: int | None = None, - sort_key: str | None = None, - sort_reverse: bool = False, - query: str | None = None, - depth: int = 1, -): - """Query the GraphQL API for nodes. - - Args: - root_name: The name of the root object. - root_class: The root object. - columns: The columns to include in the query. - cursor: The cursor to use for pagination. - limit: The limit of the query. - sort_key: The key to use for sorting. - sort_reverse: Whether to reverse the sort. - query: The query to execute. - depth: The depth of the nodes to fetch. Default is 1: fetch the first level of nested nodes. - - Returns: - list[dict]: The list of nodes. - """ - result_data = [] - hasNextPage = True - while hasNextPage: - if limit is None: - query_limit = MAX_PAGE_LIMIT - else: - remaining = limit - len(result_data) - query_limit = min(max(remaining, 1), MAX_PAGE_LIMIT) - result = ShopifyQuery( - operation_name=root_name, - columns=columns, - limit=query_limit, - cursor=cursor, - sort_key=sort_key, - reverse=sort_reverse, - query=query, - ).execute() - if "errors" in result: - raise Exception(_format_error(result["errors"])) - hasNextPage = result["data"][root_name]["pageInfo"]["hasNextPage"] - cursor = result["data"][root_name]["pageInfo"]["endCursor"] - result_data += result["data"][root_name]["nodes"] - if limit is not None and len(result_data) >= limit: - break - - fetched_fields = [] - if len(result_data) > 0: - fetched_fields = [name.lower() for name in result_data[0].keys()] - - nodes_name = [ - name for name, value in root_class.aliases() if isinstance(value, Nodes) if name.lower() in fetched_fields - ] - extracts_names = [ - name for name, value in root_class.aliases() if isinstance(value, Extract) if name.lower() in fetched_fields - ] - deep_extracts_names = [ - name for name, value in root_class.aliases() if isinstance(value, DeepExtract) if name.lower() in fetched_fields - ] - - for row in result_data: - for name in nodes_name: - value = root_class[name].value - node_data = row[name]["nodes"] - hasNextPage = row[name]["pageInfo"]["hasNextPage"] - if depth > 0 and hasNextPage: - cursor = row[name]["pageInfo"]["endCursor"] - result = query_graphql_nodes( - root_name=name, - cursor=cursor, - root_class=value.enum, - columns=get_graphql_columns(value.enum), - depth=depth - 1, - ) - node_data += result - row[name] = node_data - for name in extracts_names: - value = root_class[name].value - row[name] = (row[name] or {}).get(value.key) - for name in deep_extracts_names: - # Extract value by traversing the path (skip first element as it is the top-level key) - value = root_class[name].value - data = row[name] - for key in value.path[1:]: - data = (data or {}).get(key) - row[name] = data - - if limit: - result_data = result_data[:limit] - - return result_data diff --git a/mindsdb/integrations/handlers/snowflake_handler/README.md b/mindsdb/integrations/handlers/snowflake_handler/README.md deleted file mode 100644 index 2adbe736c38..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/README.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Snowflake -sidebarTitle: Snowflake ---- - -This documentation describes the integration of MindsDB with [Snowflake](https://www.snowflake.com/en/), a cloud data warehouse used to store and analyze data. -The integration allows MindsDB to access data stored in the Snowflake database and enhance it with AI capabilities. - - -**Important!** - -When querying data from Snowflake, MindsDB automatically converts column names to lower-case. To prevent this, users can provide an alias name as shown below. - -**This update is introduced with the MindsDB version 25.3.4.1. It is not backward-compatible and has the following implications:** - -1. Queries to Snowflake will return column names in lower-case from now on. -2. The models created with Snowflake as a data source must be recreated. - -**How it works** - -The below query presents how Snowflake columns are output when queried from MindsDB. - -```sql -SELECT - CC_NAME, -- converted to lower-case - CC_CLASS AS `CC_CLASS`, -- provided alias name in upper-case - CC_EMPLOYEES, - cc_employees -FROM snowflake_data.TPCDS_SF100TCL.CALL_CENTER; -``` - -Here is the output: - -```sql -+--------------+----------+--------------+--------------+ -| cc_name | CC_CLASS | cc_employees | cc_employees | -+--------------+----------+--------------+--------------+ -| NY Metro | large | 597159671 | 597159671 | -| Mid Atlantic | medium | 944879074 | 944879074 | -+--------------+----------+--------------+--------------+ -``` - - - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect Snowflake to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -The Snowflake handler supports two authentication methods: - -### 1. Password Authentication (Legacy) - -Establish a connection using username and password: - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "password": "your_password", - "database": "test_db", - "auth_type": "password" - }; -``` - -### 2. Key Pair Authentication (Recommended) - -Key pair authentication is more secure and is the recommended method by Snowflake: - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "private_key_path": "/path/to/your/private_key.pem", - "database": "test_db", - "auth_type": "key_pair" - }; -``` - -If the private key cannot be accesed from disk (for example when running MindsDB on Cloud), provide the PEM content directly: - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "private_key": "-----BEGIN PRIVATE KEY-----\\n...\\n-----END PRIVATE KEY-----", - "database": "test_db", - "auth_type": "key_pair" - }; -``` - -With encrypted private key (passphrase protected): - -```sql -CREATE DATABASE snowflake_datasource -WITH - ENGINE = 'snowflake', - PARAMETERS = { - "account": "tvuibdy-vm85921", - "user": "your_username", - "private_key_path": "/path/to/your/private_key.pem", - "private_key_passphrase": "your_passphrase", - "database": "test_db", - "auth_type": "key_pair" - }; -``` - -### Connection Parameters - -Required parameters: - -* `account`: The Snowflake account identifier. This [guide](https://docs.snowflake.com/en/user-guide/admin-account-identifier) will help you find your account identifier. -* `user`: The username for the Snowflake account. -* `database`: The name of the Snowflake database to connect to. -* `auth_type`: The authentication type to use. Options: `"password"` or `"key_pair"`. -* `schema`: The database schema to use within the Snowflake database. - -Authentication parameters (one method required): - -* `password`: The password for the Snowflake account (password authentication). -* `private_key_path`: Path to the private key file for key pair authentication. -* `private_key`: PEM-formatted private key content for key pair authentication. Use when the key cannot be stored on disk. -* `private_key_passphrase`: Optional passphrase for encrypted private key (key pair authentication). - -Optional parameters: - -* `warehouse`: The Snowflake warehouse to use for running queries. -* `schema`: The database schema to use within the Snowflake database. Default is `PUBLIC`. -* `role`: The Snowflake role to use. - - -For detailed instructions on setting up key pair authentication, please refer to [AUTHENTICATION.md](AUTHENTICATION.md) or the [Snowflake Key Pair Authentication documentation](https://docs.snowflake.com/en/user-guide/key-pair-auth.html). - - -## Usage - -Retrieve data from a specified table by providing the integration name, schema, and table name: - -```sql -SELECT * -FROM snowflake_datasource.schema_name.table_name -LIMIT 10; -``` - -Run Snowflake SQL queries directly on the connected Snowflake database: - -```sql -SELECT * FROM snowflake_datasource ( - - --Native Query Goes Here - SELECT - employee_table.* EXCLUDE department_id, - department_table.* RENAME department_name AS department - FROM employee_table INNER JOIN department_table - ON employee_table.department_id = department_table.department_id - ORDER BY department, last_name, first_name; - -); -``` - - -The above examples utilize `snowflake_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. - - -## Troubleshooting Guide - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the Snowflake account. -* **Checklist**: - 1. Make sure the Snowflake is active. - 2. Confirm that account, user, password and database are correct. Try a direct Snowflake connection using a client like DBeaver. - 3. Ensure a stable network between MindsDB and Snowflake. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - - -This [troubleshooting guide](https://community.snowflake.com/s/article/Snowflake-Client-Connectivity-Troubleshooting) provided by Snowflake might also be helpful. diff --git a/mindsdb/integrations/handlers/snowflake_handler/__about__.py b/mindsdb/integrations/handlers/snowflake_handler/__about__.py deleted file mode 100644 index e816c00d23b..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Snowflake handler' -__package_name__ = 'mindsdb_snowflake_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for Snowflake" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/snowflake_handler/__init__.py b/mindsdb/integrations/handlers/snowflake_handler/__init__.py deleted file mode 100644 index 817c7c0ded5..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description -from .connection_args import connection_args, connection_args_example - -try: - from .snowflake_handler import SnowflakeHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = "Snowflake" -name = "snowflake" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "support_level", - "connection_args", - "connection_args_example", - "import_error", - "icon_path", -] diff --git a/mindsdb/integrations/handlers/snowflake_handler/auth_types.py b/mindsdb/integrations/handlers/snowflake_handler/auth_types.py deleted file mode 100644 index 12c928057f5..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/auth_types.py +++ /dev/null @@ -1,79 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Dict, Any, Union -from pathlib import Path - -from cryptography.hazmat.primitives import serialization -from cryptography.hazmat.backends import default_backend - - -class SnowflakeAuthType(ABC): - @abstractmethod - def get_config(self, **kwargs) -> Dict[str, Any]: - pass - - -class PasswordAuthType(SnowflakeAuthType): - def get_config(self, **kwargs) -> Dict[str, Any]: - required_keys = ["account", "user", "database", "schema"] - if not all(kwargs.get(key) for key in required_keys): - raise ValueError("Required parameters (account, user, database, schema) must be provided.") - - if not kwargs.get("password"): - raise ValueError("Password must be provided when auth_type is 'password'.") - return { - "account": kwargs.get("account"), - "user": kwargs.get("user"), - "password": kwargs.get("password"), - "database": kwargs.get("database"), - "schema": kwargs.get("schema"), - "role": kwargs.get("role"), - "warehouse": kwargs.get("warehouse"), - "auth_type": "password", - } - - -class KeyPairAuthType(SnowflakeAuthType): - def get_config(self, **kwargs) -> Dict[str, Any]: - if not all(kwargs.get(key) for key in ["account", "user", "database", "schema"]): - raise ValueError("Required parameters (account, user, database, schema) must be provided.") - - private_key_value = kwargs.get("private_key") - private_key_path = kwargs.get("private_key_path") - - if not private_key_value and not private_key_path: - raise ValueError("Either private_key or private_key_path must be provided when auth_type is 'key_pair'.") - - config = { - "account": kwargs.get("account"), - "user": kwargs.get("user"), - "database": kwargs.get("database"), - "schema": kwargs.get("schema"), - "role": kwargs.get("role"), - "warehouse": kwargs.get("warehouse"), - "authenticator": "SNOWFLAKE_JWT", - "auth_type": "key_pair", - } - - if private_key_value: - config["private_key"] = self._load_private_key(private_key_value, kwargs.get("private_key_passphrase")) - else: - if not Path(private_key_path).exists(): - raise ValueError(f"Private key file not found: {private_key_path}") - config["private_key_file"] = private_key_path - if kwargs.get("private_key_passphrase"): - config["private_key_file_pwd"] = kwargs.get("private_key_passphrase") - return config - - def _load_private_key(self, private_key: Union[str, bytes], passphrase: str = None): - if isinstance(private_key, str): - private_key = private_key.replace("\\n", "\n").encode() - elif isinstance(private_key, bytes) is False: - raise ValueError("private_key must be a string or bytes.") - - password = passphrase.encode() if passphrase else None - try: - return serialization.load_pem_private_key(private_key, password=password, backend=default_backend()) - except Exception as exc: - raise ValueError( - "Failed to load private_key. Ensure it is a valid PEM-encoded key and the passphrase is correct." - ) from exc diff --git a/mindsdb/integrations/handlers/snowflake_handler/connection_args.py b/mindsdb/integrations/handlers/snowflake_handler/connection_args.py deleted file mode 100644 index 6c6c7497115..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/connection_args.py +++ /dev/null @@ -1,80 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -connection_args = OrderedDict( - account={ - "type": ARG_TYPE.STR, - "description": "The Snowflake account identifier.", - "required": True, - "label": "Account", - }, - user={ - "type": ARG_TYPE.STR, - "description": "The user name used to authenticate with the Snowflake account.", - "required": True, - "label": "User", - }, - password={ - "type": ARG_TYPE.PWD, - "description": "The password to authenticate the user with the Snowflake account. Required for password authentication.", - "required": False, - "label": "Password", - "secret": True, - }, - private_key_path={ - "type": ARG_TYPE.PATH, - "description": "Path to the private key file for key pair authentication. Required for key pair authentication.", - "required": False, - "label": "Private Key Path", - }, - private_key={ - "type": ARG_TYPE.PWD, - "description": "PEM-formatted private key content for key pair authentication. Use when the key cannot be stored on disk.", - "required": False, - "label": "Private Key", - "secret": True, - }, - private_key_passphrase={ - "type": ARG_TYPE.PWD, - "description": "Optional passphrase for the encrypted private key.", - "required": False, - "label": "Private Key Passphrase", - "secret": True, - }, - database={ - "type": ARG_TYPE.STR, - "description": "The database to use when connecting to the Snowflake account.", - "required": True, - "label": "Database", - }, - schema={ - "type": ARG_TYPE.STR, - "description": "The schema to use when connecting to the Snowflake account.", - "required": True, - "label": "Schema", - }, - warehouse={ - "type": ARG_TYPE.STR, - "description": "The warehouse to use when executing queries on the Snowflake account.", - "required": False, - "label": "Warehouse", - }, - role={ - "type": ARG_TYPE.STR, - "description": "The role to use when executing queries on the Snowflake account.", - "required": False, - "label": "Role", - }, - auth_type={ - "type": ARG_TYPE.STR, - "description": 'Required authentication type. Options: "password" or "key_pair".', - "required": True, - "label": "Auth Type", - }, -) - -connection_args_example = OrderedDict( - account="abcxyz-1234567", user="user", password="password", database="test", auth_type="password" -) diff --git a/mindsdb/integrations/handlers/snowflake_handler/icon.svg b/mindsdb/integrations/handlers/snowflake_handler/icon.svg deleted file mode 100644 index ede5f906069..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/icon.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/snowflake_handler/requirements.txt b/mindsdb/integrations/handlers/snowflake_handler/requirements.txt deleted file mode 100644 index b267c6e302d..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -snowflake-connector-python[pandas]==4.4.0 -snowflake-sqlalchemy==1.9.0 diff --git a/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py b/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py deleted file mode 100644 index 04898c3df63..00000000000 --- a/mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +++ /dev/null @@ -1,731 +0,0 @@ -from typing import Any, Optional, List, Generator - -import pandas -from pandas import DataFrame -from pandas.api import types as pd_types -from snowflake.sqlalchemy import snowdialect -from snowflake import connector -from snowflake.connector.errors import NotSupportedError -from snowflake.connector.cursor import ResultMetadata - -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.ast import Select, Identifier - -from mindsdb.integrations.libs.base import MetaDatabaseHandler -from mindsdb.utilities import log -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.utilities.types.column import Column -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - TableResponse, - OkResponse, - ErrorResponse, - DataHandlerResponse, -) - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - -from .auth_types import ( - PasswordAuthType, - KeyPairAuthType, -) - -try: - import pyarrow as pa - - memory_pool = pa.default_memory_pool() -except Exception: - memory_pool = None - - -logger = log.getLogger(__name__) - - -def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE: - """Map Snowflake types to MySQL types. - - Args: - internal_type_name (str): The name of the Snowflake type to map. - - Returns: - MYSQL_DATA_TYPE: The MySQL type that corresponds to the Snowflake type. - """ - internal_type_name = internal_type_name.upper() - types_map = { - ("NUMBER", "DECIMAL", "DEC", "NUMERIC"): MYSQL_DATA_TYPE.DECIMAL, - ("INT , INTEGER , BIGINT , SMALLINT , TINYINT , BYTEINT"): MYSQL_DATA_TYPE.INT, - ("FLOAT", "FLOAT4", "FLOAT8", "FIXED"): MYSQL_DATA_TYPE.FLOAT, - ("DOUBLE", "DOUBLE PRECISION", "REAL"): MYSQL_DATA_TYPE.DOUBLE, - ("VARCHAR",): MYSQL_DATA_TYPE.VARCHAR, - ("CHAR", "CHARACTER", "NCHAR"): MYSQL_DATA_TYPE.CHAR, - ("STRING", "TEXT", "NVARCHAR"): MYSQL_DATA_TYPE.TEXT, - ("NVARCHAR2", "CHAR VARYING", "NCHAR VARYING"): MYSQL_DATA_TYPE.VARCHAR, - ("BINARY", "VARBINARY"): MYSQL_DATA_TYPE.BINARY, - ("BOOLEAN",): MYSQL_DATA_TYPE.BOOL, - ("TIMESTAMP_NTZ", "DATETIME"): MYSQL_DATA_TYPE.DATETIME, - ("DATE",): MYSQL_DATA_TYPE.DATE, - ("TIME",): MYSQL_DATA_TYPE.TIME, - ("TIMESTAMP_LTZ",): MYSQL_DATA_TYPE.DATETIME, - ("TIMESTAMP_TZ",): MYSQL_DATA_TYPE.DATETIME, - ("OBJECT", "ARRAY"): MYSQL_DATA_TYPE.JSON, - ("VECTOR",): MYSQL_DATA_TYPE.VECTOR, - ("VARIANT", "MAP", "GEOGRAPHY", "GEOMETRY", "VECTOR"): MYSQL_DATA_TYPE.VARCHAR, - } - - for db_types_list, mysql_data_type in types_map.items(): - if internal_type_name in db_types_list: - return mysql_data_type - - logger.debug(f"Snowflake handler type mapping: unknown type: {internal_type_name}, use VARCHAR as fallback.") - return MYSQL_DATA_TYPE.VARCHAR - - -def _get_columns(description: list[ResultMetadata], sample: pandas.DataFrame = None) -> list[Column]: - """Get columns from Snowflake cursor description. - - Args: - description (list[ResultMetadata]): cursor description metadata. - sample (pandas.DataFrame): data sample - - Returns: - list[Column]: list of columns with mapped MySQL types. - """ - result = [] - for column in description: - mysql_type = None - sf_type_name = connector.constants.FIELD_ID_TO_NAME.get(column.type_code) - if sf_type_name is None: - logger.warning(f"Snowflake handler: unknown type code: {column.type_code}") - mysql_type = MYSQL_DATA_TYPE.VARCHAR - - if sample is not None: - column_dtype = sample[column.name].dtype - - if pd_types.is_integer_dtype(column_dtype): - column_dtype_name = column_dtype.name - if column_dtype_name in ("int8", "Int8"): - mysql_type = MYSQL_DATA_TYPE.TINYINT - elif column_dtype in ("int16", "Int16"): - mysql_type = MYSQL_DATA_TYPE.SMALLINT - elif column_dtype in ("int32", "Int32"): - mysql_type = MYSQL_DATA_TYPE.MEDIUMINT - elif column_dtype in ("int64", "Int64"): - mysql_type = MYSQL_DATA_TYPE.BIGINT - else: - mysql_type = MYSQL_DATA_TYPE.INT - - elif pd_types.is_float_dtype(column_dtype): - column_dtype_name = column_dtype.name - if column_dtype_name in ("float16", "Float16"): # Float16 does not exists so far - mysql_type = MYSQL_DATA_TYPE.FLOAT - elif column_dtype_name in ("float32", "Float32"): - mysql_type = MYSQL_DATA_TYPE.FLOAT - elif column_dtype_name in ("float64", "Float64"): - mysql_type = MYSQL_DATA_TYPE.DOUBLE - else: - mysql_type = MYSQL_DATA_TYPE.FLOAT - - elif pd_types.is_bool_dtype(column_dtype): - mysql_type = MYSQL_DATA_TYPE.BOOLEAN - - elif pd_types.is_datetime64_any_dtype(column_dtype): - mysql_type = MYSQL_DATA_TYPE.DATETIME - series = sample[column.name] - # snowflake use pytz.timezone - if series.dt.tz is not None and getattr(series.dt.tz, "zone", "UTC") != "UTC": - series = series.dt.tz_convert("UTC") - sample[column.name] = series.dt.tz_localize(None) - - elif pd_types.is_object_dtype(column_dtype): - if sf_type_name == "TEXT": - # we can also check column.internal_size, if == 16777216 then it is TEXT, else VARCHAR(internal_size) - mysql_type = MYSQL_DATA_TYPE.TEXT - elif sf_type_name == "BINARY": - # if column.internal_size == 8388608 then BINARY, else VARBINARY(internal_size) - mysql_type = MYSQL_DATA_TYPE.BINARY - elif sf_type_name == "DATE": - mysql_type = MYSQL_DATA_TYPE.DATE - elif sf_type_name == "TIME": - mysql_type = MYSQL_DATA_TYPE.TIME - elif sf_type_name == "FIXED": - if getattr(column, "scale", None) == 0: - mysql_type = MYSQL_DATA_TYPE.INT - else: - # It is NUMBER, DECIMAL or NUMERIC with scale > 0 - mysql_type = MYSQL_DATA_TYPE.FLOAT - - if mysql_type is None: - mysql_type = _map_type(sf_type_name) - - result.append(Column(name=column.name, type=mysql_type, original_type=sf_type_name)) - return result - - -class SnowflakeHandler(MetaDatabaseHandler): - """ - This handler handles connection and execution of the Snowflake statements. - """ - - name = "snowflake" - stream_response = True - - _auth_types = { - "key_pair": KeyPairAuthType(), - "password": PasswordAuthType(), - } - - def __init__(self, name, **kwargs): - super().__init__(name) - self.connection_data = kwargs.get("connection_data") - self.renderer = SqlalchemyRender(snowdialect.dialect) - - self.is_connected = False - self.connection = None - - def connect(self): - """ - Establishes a connection to a Snowflake account. - - Supports two authentication methods: - 1. User/password authentication (legacy) - 2. Key pair authentication (recommended) - - Raises: - ValueError: If the required connection parameters are not provided. - snowflake.connector.errors.Error: If an error occurs while connecting to the Snowflake account. - - Returns: - snowflake.connector.connection.SnowflakeConnection: A connection object to the Snowflake account. - """ - - if self.is_connected is True: - return self.connection - - auth_type_key = self.connection_data.get("auth_type", "password") - if auth_type_key is None: - supported = ", ".join(self._auth_types.keys()) - raise ValueError(f"auth_type is required. Supported values: {supported}.") - - auth_type = self._auth_types.get(auth_type_key) - if not auth_type: - supported = ", ".join(self._auth_types.keys()) - raise ValueError(f"Invalid auth_type '{auth_type_key}'. Supported values: {supported}.") - - config = auth_type.get_config(**self.connection_data) - - try: - self.connection = connector.connect(**config) - self.connection.telemetry_enabled = False - self.is_connected = True - return self.connection - except connector.errors.Error as e: - logger.error(f"Error connecting to Snowflake, {e}!") - raise - - def disconnect(self): - """ - Closes the connection to the Snowflake account if it's currently open. - """ - - if self.is_connected is False: - return - self.connection.close() - self.is_connected = False - - def check_connection(self) -> StatusResponse: - """ - Checks the status of the connection to the Snowflake account. - - Returns: - StatusResponse: An object containing the success status and an error message if an error occurs. - """ - response = StatusResponse(False) - need_to_close = not self.is_connected - - try: - connection = self.connect() - - # Execute a simple query to test the connection - with connection.cursor() as cur: - cur.execute("select 1;") - response.success = True - except (connector.errors.Error, ValueError) as e: - logger.error(f"Error connecting to Snowflake, {e}!") - response.error_message = str(e) - - if response.success and need_to_close: - self.disconnect() - - elif not response.success and self.is_connected: - self.is_connected = False - - return response - - def native_query(self, query: str, stream: bool = True, **kwargs) -> TableResponse | OkResponse | ErrorResponse: - """Executes a SQL query on the Snowflake account and returns the result. - - Args: - query (str): The SQL query to be executed. - stream (bool): If True - return TableResponse with generator inside. - - Returns: - DataHandlerResponse: A response object containing the result of the query or an error message. - """ - generator = self._execute_fetch_batches(query) - try: - response: TableResponse = next(generator) - response.data_generator = generator - if stream is False: - response.fetchall() - except StopIteration as e: - response = e.value - if isinstance(response, DataHandlerResponse) is False: - raise - - return response - - def _execute_fetch_batches( - self, query: str - ) -> Generator[TableResponse | pandas.DataFrame, None, OkResponse | ErrorResponse]: - """Execute a SQL query and yield results in batches. - - Args: - query (str): The SQL query to execute. - - Yields: - TableResponse: First yield — response with column metadata and affected row count. - pandas.DataFrame: Subsequent yields — batches of query results. - - Returns: - OkResponse: For DML statements (INSERT/DELETE/UPDATE) with affected row count. - ErrorResponse: If an exception occurs during query execution. - """ - connection = self.connect() - with connection.cursor(connector.DictCursor) as cursor: - try: - cursor.execute(query) - try: - try: - batches_iter = cursor.fetch_pandas_batches() - except ValueError: - # duplicated columns raises ValueError - raise NotSupportedError() - try: - sample_df = next(batches_iter) - except StopIteration: - sample_df = None - columns = _get_columns(cursor.description, sample=sample_df) - yield TableResponse(data=sample_df, affected_rows=cursor.rowcount, columns=columns) - for batch_df in batches_iter: - yield batch_df - except NotSupportedError: - # Fallback for CREATE/DELETE/UPDATE. These commands returns table with single column, - # but it cannot be retrieved as pandas DataFrame. - result = cursor.fetchall() - match result: - case ( - [{"number of rows inserted": affected_rows}] - | [{"number of rows deleted": affected_rows}] - | [{"number of rows updated": affected_rows, "number of multi-joined rows updated": _}] - ): - response = OkResponse(affected_rows=affected_rows) - case list(): - response = TableResponse(data=DataFrame(result, columns=[x[0] for x in cursor.description])) - case _: - # Looks like SnowFlake always returns something in response, so this is suspicious - logger.warning("Snowflake did not return any data in response.") - response = OkResponse() - return response - except Exception as e: - logger.error(f"Error running query: {query} on {self.connection_data.get('database')}, {e}!") - return ErrorResponse(error_code=0, error_message=str(e)) - - if memory_pool is not None and memory_pool.backend_name == "jemalloc": - # This reduce memory consumption, but will slow down next query slightly. - # Except pool type 'jemalloc': memory consumption do not change significantly - # and next query processing time may be even lower. - memory_pool.release_unused() - - def query(self, query: ASTNode) -> DataHandlerResponse: - """ - Executes a SQL query represented by an ASTNode and retrieves the data. - - Args: - query (ASTNode): An ASTNode representing the SQL query to be executed. - - Returns: - DataHandlerResponse: The response from the `native_query` method, containing the result of the SQL query execution. - """ - - query_str = self.renderer.get_string(query, with_failback=True) - logger.debug(f"Executing SQL query: {query_str}") - result = self.native_query(query_str) - return self.lowercase_columns(result, query) - - def lowercase_columns(self, result, query): - if not isinstance(query, Select) or not isinstance(result, TableResponse): - return result - - quoted_columns = [] - if query.targets is not None: - for column in query.targets: - if hasattr(column, "alias") and column.alias is not None: - if column.alias.is_quoted[-1]: - quoted_columns.append(column.alias.parts[-1]) - elif isinstance(column, Identifier): - if column.is_quoted[-1]: - quoted_columns.append(column.parts[-1]) - - for col in result.columns: - col_name = col.alias or col.name - if col_name.isupper() and col_name not in quoted_columns: - col.alias = col_name.lower() - - return result - - def get_tables(self) -> DataHandlerResponse: - """ - Retrieves a list of all non-system tables and views in the current schema of the Snowflake account. - - Returns: - DataHandlerResponse: A response object containing the list of tables and views. - """ - - query = """ - SELECT TABLE_NAME, TABLE_SCHEMA, TABLE_TYPE - FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_TYPE IN ('BASE TABLE', 'VIEW') - AND TABLE_SCHEMA = current_schema() - """ - return self.native_query(query) - - def get_columns(self, table_name) -> DataHandlerResponse: - """ - Retrieves column details for a specified table in the Snowflake account. - - Args: - table_name (str): The name of the table for which to retrieve column information. - - Returns: - DataHandlerResponse: A response object containing the column details. - - Raises: - ValueError: If the 'table_name' is not a valid string. - """ - - if not table_name or not isinstance(table_name, str): - raise ValueError("Invalid table name provided.") - - query = f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_NAME = '{table_name}' - AND TABLE_SCHEMA = current_schema() - """ - result = self.native_query(query) - result.to_columns_table_response(map_type_fn=_map_type) - - return result - - def meta_get_tables(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: - """ - Retrieves metadata information about the tables in the Snowflake database to be stored in the data catalog. - - Args: - table_names (list): A list of table names for which to retrieve metadata information. - - Returns: - DataHandlerResponse: A response object containing the metadata information. - """ - query = """ - SELECT - TABLE_CATALOG, - TABLE_SCHEMA, - TABLE_NAME, - TABLE_TYPE, - COMMENT AS TABLE_DESCRIPTION, - ROW_COUNT, - CREATED, - LAST_ALTERED - FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_SCHEMA = current_schema() - AND TABLE_TYPE IN ('BASE TABLE', 'VIEW') - """ - - if table_names is not None and len(table_names) > 0: - table_names_str = ", ".join([f"'{t.upper()}'" for t in table_names]) - query += f" AND TABLE_NAME IN ({table_names_str})" - - result = self.native_query(query) - if result.data_frame is not None and "ROW_COUNT" in result.data_frame.columns: - # Snowflake can return NULL for ROW_COUNT (e.g., for views); preserve as . - result.data_frame["ROW_COUNT"] = result.data_frame["ROW_COUNT"].astype("Int64") - return result - - def meta_get_columns(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve column metadata. - - Returns: - DataHandlerResponse: A response object containing the column metadata. - """ - query = """ - SELECT - TABLE_NAME, - COLUMN_NAME, - DATA_TYPE, - COMMENT AS COLUMN_DESCRIPTION, - COLUMN_DEFAULT, - (IS_NULLABLE = 'YES') AS IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_SCHEMA = current_schema() - """ - - if table_names is not None and len(table_names) > 0: - table_names_str = ", ".join([f"'{t.upper()}'" for t in table_names]) - query += f" AND TABLE_NAME IN ({table_names_str})" - - result = self.native_query(query) - return result - - def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: - """ - Retrieves basic column statistics: null %, distinct count. - Due to Snowflake limitations, this runs per-table not per-column. - TODO: Add most_common_values and most_common_frequencies - """ - columns_query = """ - SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE - FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_SCHEMA = current_schema() - """ - if table_names: - table_names_str = ", ".join([f"'{t.upper()}'" for t in table_names]) - columns_query += f" AND TABLE_NAME IN ({table_names_str})" - - columns_result = self.native_query(columns_query) - if ( - isinstance(columns_result, ErrorResponse) - or columns_result.data_frame is None - or columns_result.data_frame.empty - ): - return ErrorResponse(error_message="No columns found.") - - columns_df = columns_result.data_frame - grouped = columns_df.groupby(["TABLE_SCHEMA", "TABLE_NAME"]) - all_stats = [] - - for (table_schema, table_name), group in grouped: - select_parts = [] - for _, row in group.iterrows(): - col = row["COLUMN_NAME"] - data_type = row["DATA_TYPE"] - # Ensure column names in the query are properly quoted if they contain special characters or are case-sensitive - quoted_col = f'"{col}"' - select_parts.extend( - [ - f'COUNT_IF({quoted_col} IS NULL) AS "nulls_{col}"', - f'APPROX_COUNT_DISTINCT({quoted_col}) AS "distincts_{col}"', - ] - ) - # We can sort and find min/max for array but is expensive for large tables, avoid for now - if data_type not in {"ARRAY", "OBJECT", "VARIANT"}: - select_parts.extend( - [ - f'MIN({quoted_col}) AS "min_{col}"', - f'MAX({quoted_col}) AS "max_{col}"', - ] - ) - - quoted_table_name = f'"{table_schema}"."{table_name}"' - stats_query = f""" - SELECT COUNT(*) AS "total_rows", {", ".join(select_parts)} - FROM {quoted_table_name} - """ - try: - stats_res = self.native_query(stats_query) - if ( - not isinstance(stats_res, TableResponse) - or stats_res.data_frame is None - or stats_res.data_frame.empty - ): - logger.warning( - f"Could not retrieve stats for table {table_name}. Query returned no data or an error: {stats_res.error_message if isinstance(stats_res, ErrorResponse) else 'No data'}" - ) - # Add placeholder stats if query fails or returns empty - for _, row in group.iterrows(): - all_stats.append( - { - "table_name": table_name, - "column_name": row["COLUMN_NAME"], - "null_percentage": None, - "distinct_values_count": None, - "most_common_values": [], - "most_common_frequencies": [], - "minimum_value": None, - "maximum_value": None, - } - ) - continue - - stats_data = stats_res.data_frame.iloc[0] - total_rows = stats_data.get("total_rows", 0) - - for _, row in group.iterrows(): - col = row["COLUMN_NAME"] - # Keys for stats_data should match the aliases in stats_query (e.g., "nulls_COLNAME") - nulls = stats_data.get(f"nulls_{col}", 0) - distincts = stats_data.get(f"distincts_{col}", None) - min_val = stats_data.get(f"min_{col}", None) - max_val = stats_data.get(f"max_{col}", None) - null_pct = (nulls / total_rows) * 100 if total_rows is not None and total_rows > 0 else None - - all_stats.append( - { - "table_name": table_name, - "column_name": col, - "null_percentage": null_pct, - "distinct_values_count": distincts, - "most_common_values": [], - "most_common_frequencies": [], - "minimum_value": min_val, - "maximum_value": max_val, - } - ) - except Exception as e: - logger.error(f"Exception while fetching statistics for table {table_name}: {e}") - for _, row in group.iterrows(): - all_stats.append( - { - "table_name": table_name, - "column_name": row["COLUMN_NAME"], - "null_percentage": None, - "distinct_values_count": None, - "most_common_values": [], - "most_common_frequencies": [], - "minimum_value": None, - "maximum_value": None, - } - ) - - if not all_stats: - return TableResponse(data=pandas.DataFrame()) - - return TableResponse(data=pandas.DataFrame(all_stats)) - - def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: - """ - Retrieves primary key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve primary key information. - - Returns: - DataHandlerResponse: A response object containing the primary key information. - """ - try: - query = """ - SHOW PRIMARY KEYS IN TABLE; - """ - - response = self.native_query(query) - if isinstance(response, ErrorResponse): - logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}") - - df = response.data_frame - if not df.empty: - if table_names: - df = df[df["table_name"].isin(table_names)] - - df = df[["table_name", "column_name", "key_sequence", "constraint_name"]] - df = df.rename(columns={"key_sequence": "ordinal_position"}) - - response.data_frame = df - - return response - - except Exception as e: - logger.error(f"Exception in meta_get_primary_keys: {e!r}") - return ErrorResponse(error_message=f"Exception querying primary keys: {e!r}") - - def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> DataHandlerResponse: - """ - Retrieves foreign key information for the specified tables (or all tables if no list is provided). - - Args: - table_names (list): A list of table names for which to retrieve foreign key information. - - Returns: - DataHandlerResponse: A response object containing the foreign key information. - """ - try: - query = """ - SHOW IMPORTED KEYS IN TABLE; - """ - - response = self.native_query(query) - if isinstance(response, ErrorResponse): - logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}") - - df = response.data_frame - if not df.empty: - if table_names: - df = df[df["pk_table_name"].isin(table_names) & df["fk_table_name"].isin(table_names)] - - df = df[["pk_table_name", "pk_column_name", "fk_table_name", "fk_column_name"]] - df = df.rename( - columns={ - "pk_table_name": "parent_table_name", - "pk_column_name": "parent_column_name", - "fk_table_name": "child_table_name", - "fk_column_name": "child_column_name", - } - ) - - response.data_frame = df - - return response - - except Exception as e: - logger.error(f"Exception in meta_get_primary_keys: {e!r}") - return ErrorResponse(error_message=f"Exception querying primary keys: {e!r}") - - def meta_get_handler_info(self, **kwargs: Any) -> str: - """ - Retrieves information about the design and implementation of the database handler. - This should include, but not be limited to, the following: - - The type of SQL queries and operations that the handler supports. - - etc. - - Args: - kwargs: Additional keyword arguments that may be used in generating the handler information. - - Returns: - str: A string containing information about the database handler's design and implementation. - """ - return ( - "To query columns that contain special characters, use ticks around the column name, e.g. `column name`.\n" - "DO NOT use double quotes for this purpose." - ) diff --git a/mindsdb/integrations/handlers/timescaledb_handler/README.md b/mindsdb/integrations/handlers/timescaledb_handler/README.md deleted file mode 100644 index 005dce0e845..00000000000 --- a/mindsdb/integrations/handlers/timescaledb_handler/README.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: TimescaleDB -sidebarTitle: TimescaleDB ---- - -This documentation describes the integration of MindsDB with [TimescaleDB](https://docs.timescale.com). - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect TimescaleDB to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -Establish a connection to TimescaleDB from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/timescaledb_handler) as an engine. - -```sql -CREATE DATABASE timescaledb_datasource -WITH - engine = 'timescaledb', - parameters = { - "host": "examplehost.timescaledb.com", - "port": 5432, - "user": "example_user", - "password": "my_password", - "database": "tsdb" - }; -``` - - -Required connection parameters include the following: - -* `user`: The username for the TimescaleDB database. -* `password`: The password for the TimescaleDB database. -* `host`: The hostname, IP address, or URL of the TimescaleDB server. -* `port`: The port number for connecting to the TimescaleDB server. -* `database`: The name of the TimescaleDB database to connect to. - -Optional connection parameters include the following: - -* `schema`: The database schema to use. Default is public. - - -## Usage - -Before attempting to connect to a TimescaleDB server using MindsDB, ensure that it accepts incoming connections using [this guide](https://docs.timescale.com/latest/getting-started/setup/remote-connections/). - -The following usage examples utilize the connection to TimescaleDB made via the `CREATE DATABASE` statement and named `timescaledb_datasource`. - -Retrieve data from a specified table by providing the integration and table name. - - -You can use this established connection to query your table as follows, - -```sql -SELECT * -FROM timescaledb_datasource.sensor; -``` - -Run PostgreSQL-native queries directly on the connected TimescaleDB database: - -```sql -SELECT * FROM timescaledb_datasource ( - - --Native Query Goes Here - SELECT - model, - COUNT(*) OVER (PARTITION BY model, year) AS units_to_sell, - ROUND((CAST(tax AS decimal) / price), 3) AS tax_div_price - FROM used_car_price - -); -``` - -## Troubleshooting - - -`Database Connection Error` - -* **Symptoms**: Failure to connect MindsDB with the TimescaleDB database. -* **Checklist**: - 1. Make sure the TimescaleDB server is active. - 2. Confirm that host, port, user, schema, and password are correct. Try a direct TimescaleDB connection. - 3. Ensure a stable network between MindsDB and TimescaleDB. - - - -`SQL statement cannot be parsed by mindsdb_sql` - -* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. -* **Checklist**: - 1. Ensure table names with spaces or special characters are enclosed in backticks. - 2. Examples: - * Incorrect: SELECT * FROM integration.travel data - * Incorrect: SELECT * FROM integration.'travel data' - * Correct: SELECT * FROM integration.\`travel data\` - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/timescaledb_handler/__about__.py b/mindsdb/integrations/handlers/timescaledb_handler/__about__.py deleted file mode 100644 index 3b3417d274c..00000000000 --- a/mindsdb/integrations/handlers/timescaledb_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB TimeScaleDB handler' -__package_name__ = 'mindsdb_timescaledb_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for TimeScaleDB" -__author__ = 'Parthiv MAkwana' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/timescaledb_handler/__init__.py b/mindsdb/integrations/handlers/timescaledb_handler/__init__.py deleted file mode 100644 index 189cb89b856..00000000000 --- a/mindsdb/integrations/handlers/timescaledb_handler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE, HANDLER_SUPPORT_LEVEL - -from .__about__ import __version__ as version, __description__ as description - -try: - from .timescaledb_handler import PostgresHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = "TimescaleDB" -name = "timescaledb" -type = HANDLER_TYPE.DATA -icon_path = "icon.svg" -support_level = HANDLER_SUPPORT_LEVEL.MINDSDB - -__all__ = [ - "Handler", - "version", - "name", - "type", - "title", - "description", - "import_error", - "icon_path", - "support_level", -] diff --git a/mindsdb/integrations/handlers/timescaledb_handler/icon.svg b/mindsdb/integrations/handlers/timescaledb_handler/icon.svg deleted file mode 100644 index 18a2c37d30c..00000000000 --- a/mindsdb/integrations/handlers/timescaledb_handler/icon.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/timescaledb_handler/timescaledb_handler.py b/mindsdb/integrations/handlers/timescaledb_handler/timescaledb_handler.py deleted file mode 100644 index 9cd2dd280e1..00000000000 --- a/mindsdb/integrations/handlers/timescaledb_handler/timescaledb_handler.py +++ /dev/null @@ -1,51 +0,0 @@ -from mindsdb.integrations.handlers.postgres_handler import Handler as PostgresHandler -from collections import OrderedDict -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -class TimeScaleDBHandler(PostgresHandler): - name = 'timescaledb' - - def __init__(self, name, **kwargs): - super().__init__(name, **kwargs) - - -connection_args = OrderedDict( - host={ - 'type': ARG_TYPE.STR, - 'description': 'The host name or IP address of the TimeScaleDB server/database.' - }, - database={ - 'type': ARG_TYPE.STR, - 'description': """ - The database name to use when connecting with the TimeScaleDB server. - """ - }, - user={ - 'type': ARG_TYPE.STR, - 'description': 'The user name used to authenticate with the TimeScaleDB server.' - }, - password={ - 'type': ARG_TYPE.STR, - 'description': 'The password to authenticate the user with the TimeScaleDB server.' - }, - schema={ - 'type': ARG_TYPE.STR, - 'description': 'The schema in which objects are searched first.', - 'required': False, - 'label': 'Schema' - }, - port={ - 'type': ARG_TYPE.INT, - 'description': 'Specify port to connect TimeScaleDB ' - } -) - -connection_args_example = OrderedDict( - host='127.0.0.1', - port=5432, - password='password', - user='root', - database="timescaledb", - schema='public' -) diff --git a/mindsdb/integrations/handlers/web_handler/README.md b/mindsdb/integrations/handlers/web_handler/README.md deleted file mode 100644 index 2aa75a097e7..00000000000 --- a/mindsdb/integrations/handlers/web_handler/README.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -title: Web Crawler -sidebarTitle: Web Crawler ---- - -In this section, we present how to use a web crawler within MindsDB. - -A web crawler is an automated script designed to systematically browse and index content on the internet. Within MindsDB, you can utilize a web crawler to efficiently collect data from various websites. - -## Prerequisites - -Before proceeding, ensure the following prerequisites are met: - -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To use Web Crawler with MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). - -## Connection - -This handler does not require any connection parameters. - -Here is how to initialize a web crawler: - -```sql -CREATE DATABASE my_web -WITH ENGINE = 'web'; -``` - -The above query creates a database called `my_web`. This database by default has a table called `crawler` that we can use to crawl data from a given url/urls. - - -## Usage - - -Specifying a `LIMIT` clause is required. To crawl all pages on a site, consider setting the limit to a high value, such as 10,000, which exceeds the expected number of pages. Be aware that setting a higher limit may result in longer response times. - - -### Get Websites Content - -The following usage examples demonstrate how to retrieve content from `docs.mindsdb.com`: - -```sql -SELECT * -FROM my_web.crawler -WHERE url = 'docs.mindsdb.com' -LIMIT 1; -``` - -You can also retrieve content from internal pages. The following query fetches the content from 10 internal pages: - -```sql -SELECT * -FROM my_web.crawler -WHERE url = 'docs.mindsdb.com' -LIMIT 10; -``` - -In order to get the content from multiple websites, use the `UNION` operator: - -```sql -SELECT * -FROM my_web.crawler -WHERE url = 'docs.mindsdb.com' -LIMIT 5 -UNION -SELECT * -FROM my_web.crawler -WHERE url = 'docs.python.org' -LIMIT 5; -``` - -### Get PDF Content - -MindsDB accepts [file uploads](/sql/create/file) of `csv`, `xlsx`, `xls`, `sheet`, `json`, and `parquet`. However, you can also configure the web crawler to fetch data from PDF files accessible via URLs. - -```sql -SELECT * -FROM my_web.crawler -WHERE url = '' -LIMIT 1; -``` -### Configuring Web Handler for Specific Domains - -The Web Handler can be configured to interact only with specific domains by using the `web_crawling_allowed_sites` setting in the `config.json` file. -This feature allows you to restrict the handler to crawl and process content only from the domains you specify, enhancing security and control over web interactions. - -To configure this, simply list the allowed domains under the `web_crawling_allowed_sites` key in `config.json`. For example: - -```json -"web_crawling_allowed_sites": [ - "https://docs.mindsdb.com", - "https://another-allowed-site.com" -] -``` - -## Troubleshooting - - -`Web crawler encounters character encoding issues` - -* **Symptoms**: Extracted text appears garbled or contains strange characters instead of the expected text. -* **Checklist**: - 1. Open a GitHub Issue: If you encounter a bug or a repeatable error with encoding, - report it on the [MindsDB GitHub](https://github.com/mindsdb/mindsdb/issues) repository by opening an issue. - - - - -`Web crawler times out while trying to fetch content` - -* **Symptoms**: The crawler fails to retrieve data from a website, resulting in timeout errors. -* **Checklist**: - 1. Check the network connection to ensure the target site is reachable. - \ No newline at end of file diff --git a/mindsdb/integrations/handlers/web_handler/__about__.py b/mindsdb/integrations/handlers/web_handler/__about__.py deleted file mode 100644 index 12fdd95dcaf..00000000000 --- a/mindsdb/integrations/handlers/web_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = 'MindsDB Web crawl handler' -__package_name__ = 'mindsdb_web_handler' -__version__ = '0.0.1' -__description__ = "MindsDB handler for crawling web-sites" -__author__ = 'MindsDB Inc' -__github__ = 'https://github.com/mindsdb/mindsdb' -__pypi__ = 'https://pypi.org/project/mindsdb/' -__license__ = 'MIT' -__copyright__ = 'Copyright 2022 - MindsDB' diff --git a/mindsdb/integrations/handlers/web_handler/__init__.py b/mindsdb/integrations/handlers/web_handler/__init__.py deleted file mode 100644 index 7f8d52cd01d..00000000000 --- a/mindsdb/integrations/handlers/web_handler/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -try: - from .web_handler import ( - WebHandler as Handler - ) - import_error = None -except Exception as e: - Handler = None - import_error = e - - -title = 'web' -name = 'web' -type = HANDLER_TYPE.DATA -icon_path = 'icon.svg' - -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', - 'import_error', 'icon_path' -] diff --git a/mindsdb/integrations/handlers/web_handler/icon.svg b/mindsdb/integrations/handlers/web_handler/icon.svg deleted file mode 100644 index 1330df8af55..00000000000 --- a/mindsdb/integrations/handlers/web_handler/icon.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/mindsdb/integrations/handlers/web_handler/requirements.txt b/mindsdb/integrations/handlers/web_handler/requirements.txt deleted file mode 100644 index 7aef81c3742..00000000000 --- a/mindsdb/integrations/handlers/web_handler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -html2text -# bs4 # in main dependencies list \ No newline at end of file diff --git a/mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py b/mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py deleted file mode 100644 index 621f2ca9a5d..00000000000 --- a/mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +++ /dev/null @@ -1,359 +0,0 @@ -import concurrent.futures -import io -import re -import traceback -from threading import Lock -from typing import List -from urllib.parse import urljoin, urlparse, urlunparse - -import html2text -import fitz # PyMuPDF -import pandas as pd -import requests -from bs4 import BeautifulSoup -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def pdf_to_markdown(response, gap_threshold=10): - """ - Convert a PDF document to Markdown text. - - Args: - response: the response object containing the PDF data - gap_threshold (int): the vertical gap size that triggers a new line in the output (default 10) - - Returns: - A string containing the converted Markdown text. - - Raises: - Exception -- if the PDF data cannot be processed. - """ - - try: - file_stream = io.BytesIO(response.content) - document = fitz.open(stream=file_stream, filetype="pdf") - except Exception as e: - raise Exception("Failed to process PDF data: " + str(e)) - - markdown_lines = [] - for page_num in range(len(document)): - page = document.load_page(page_num) - - blocks = page.get_text("blocks") - - blocks.sort(key=lambda block: (block[1], block[0])) - - previous_block_bottom = 0 - for block in blocks: - y0 = block[1] - y1 = block[3] - block_text = block[4] - - # Check if there's a large vertical gap between this block and the previous one - if y0 - previous_block_bottom > gap_threshold: - markdown_lines.append("") - - markdown_lines.append(block_text) - previous_block_bottom = y1 - - markdown_lines.append("") - - document.close() - - return "\n".join(markdown_lines) - - -def is_valid(url) -> bool: - """ - Check if a URL is valid. - - Args: - url: the URL to check - - Returns: - bool: True if the URL is valid, False otherwise. - """ - parsed = urlparse(url) - return bool(parsed.netloc) and bool(parsed.scheme) - - -def parallel_get_all_website_links(urls) -> dict: - """ - Fetch all website links from a list of URLs. - - Args: - urls (list): a list of URLs to fetch links from - - Returns: - A dictionary mapping each URL to a list of links found on that URL. - - Raises: - Exception: if an error occurs while fetching links from a URL. - """ - url_contents = {} - - if len(urls) <= 10: - for url in urls: - url_contents[url] = get_all_website_links(url) - return url_contents - - with concurrent.futures.ProcessPoolExecutor() as executor: - future_to_url = {executor.submit(get_all_website_links, url): url for url in urls} - for future in concurrent.futures.as_completed(future_to_url): - url = future_to_url[future] - try: - url_contents[url] = future.result() - except Exception as exc: - logger.error(f"{url} generated an exception: {exc}") - # don't raise the exception, just log it, continue processing other urls - - return url_contents - - -def get_all_website_links(url, headers: dict = None) -> dict: - """ - Fetch all website links from a URL. - - Args: - url (str): the URL to fetch links from - headers (dict): a dictionary of headers to use when fetching links - - Returns: - A dictionary containing the URL, the extracted links, the HTML content, the text content, and any error that occurred. - """ - logger.info("rawling: {url} ...".format(url=url)) - urls = set() - - domain_name = urlparse(url).netloc - try: - session = requests.Session() - - # Add headers to mimic a real browser request - if headers is None: - headers = {} - if "User-Agent" not in headers: - headers["User-Agent"] = ( - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.3" - ) - - response = session.get(url, headers=headers) - if "cookie" in response.request.headers: - session.cookies.update(response.cookies) - - content_type = response.headers.get("Content-Type", "").lower() - - if "application/pdf" in content_type: - content_html = "PDF" - content_text = pdf_to_markdown(response) - else: - content_html = response.text - - # Parse HTML content with BeautifulSoup - soup = BeautifulSoup(content_html, "html.parser") - content_text = get_readable_text_from_soup(soup) - for a_tag in soup.find_all("a"): - href = a_tag.attrs.get("href") - if href == "" or href is None: - continue - href = urljoin(url, href) - parsed_href = urlparse(href) - href = urlunparse((parsed_href.scheme, parsed_href.netloc, parsed_href.path, "", "", "")) - if not is_valid(href): - continue - if href in urls: - continue - if domain_name != parsed_href.netloc: - continue - - href = href.rstrip("/") - urls.add(href) - - except Exception: - error_message = traceback.format_exc().splitlines()[-1] - logger.exception("An exception occurred:") - return { - "url": url, - "urls": urls, - "html_content": "", - "text_content": "", - "error": str(error_message), - } - - return { - "url": url, - "urls": urls, - "html_content": content_html, - "text_content": content_text, - "error": None, - } - - -def get_readable_text_from_soup(soup) -> str: - """ - Extract readable text from a BeautifulSoup object and convert it to Markdown. - - Args: - soup (BeautifulSoup): a BeautifulSoup object - - Returns: - The extracted text in Markdown format. - """ - html_converter = html2text.HTML2Text() - html_converter.ignore_links = False - return html_converter.handle(str(soup)) - - -def get_all_website_links_recursively( - url, - reviewed_urls, - limit=None, - crawl_depth: int = 1, - current_depth: int = 0, - filters: List[str] = None, - headers=None, -): - """ - Recursively gathers all links from a given website up to a specified limit. - - Args: - url (str): The starting URL to fetch links from. - reviewed_urls (dict): A dictionary to keep track of reviewed URLs and associated data. - limit (int, optional): The maximum number of URLs to process. - crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only - current_depth: How deep we are currently crawling from the base URL. - filters (List[str]): Crawl URLs that only match these regex patterns. - - TODO: Refactor this function to use a iterative aproach instead of recursion - """ - if limit is not None: - if len(reviewed_urls) >= limit: - return reviewed_urls - - if not filters: - matches_filter = True - else: - matches_filter = any(re.match(f, url) is not None for f in filters) - if url not in reviewed_urls and matches_filter: - try: - reviewed_urls[url] = get_all_website_links(url, headers=headers) - except Exception: - error_message = traceback.format_exc().splitlines()[-1] - logger.exception("An exception occurred:") - reviewed_urls[url] = { - "url": url, - "urls": [], - "html_content": "", - "text_content": "", - "error": str(error_message), - } - - if crawl_depth is not None and crawl_depth == current_depth: - return reviewed_urls - - to_rev_url_list = [] - - # create a list of new urls to review that don't exist in the already reviewed ones - for new_url in reviewed_urls[url]["urls"]: - if not filters: - matches_filter = True - else: - matches_filter = any(re.match(f, new_url) is not None for f in filters) - if not matches_filter: - continue - # if this is already in the urls, then no need to go and crawl for it - if new_url in reviewed_urls or new_url in to_rev_url_list: - continue - - # insert immediately to count limit between threads. fill later - url_list_lock = Lock() - with url_list_lock: - if limit is None or len(reviewed_urls) < limit: - reviewed_urls[new_url] = {} - to_rev_url_list.append(new_url) - else: - break - - if len(to_rev_url_list) > 0: - new_revised_urls = parallel_get_all_website_links(to_rev_url_list) - - reviewed_urls.update(new_revised_urls) - - for new_url in new_revised_urls: - get_all_website_links_recursively( - new_url, reviewed_urls, limit, crawl_depth=crawl_depth, current_depth=current_depth + 1, filters=filters - ) - - -def get_all_websites( - urls, limit=1, html=False, crawl_depth: int = 1, filters: List[str] = None, headers: dict = None -) -> pd.DataFrame: - """ - Crawl a list of websites and return a DataFrame containing the results. - - Args: - urls (list): a list of URLs to crawl - limit (int): Absolute max number of web pages to crawl, regardless of crawl depth. - crawl_depth (int): Crawl depth for URLs. - html (bool): a boolean indicating whether to include the HTML content in the results - filters (List[str]): Crawl URLs that only match these regex patterns. - headers (dict): headers of request - - Returns: - A DataFrame containing the results. - """ - reviewed_urls = {} - - def fetch_url(url, crawl_depth: int = 1, filters: List[str] = None): - # Allow URLs to be passed wrapped in quotation marks so they can be used - # directly from the SQL editor. - if url.startswith("'") and url.endswith("'"): - url = url[1:-1] - url = url.rstrip("/") - if urlparse(url).scheme == "": - # Try HTTPS first - url = "https://" + url - get_all_website_links_recursively( - url, reviewed_urls, limit, crawl_depth=crawl_depth, filters=filters, headers=headers - ) - - # Use a ThreadPoolExecutor to run the helper function in parallel. - with concurrent.futures.ThreadPoolExecutor() as executor: - future_to_url = {executor.submit(fetch_url, url, crawl_depth=crawl_depth, filters=filters): url for url in urls} - - for future in concurrent.futures.as_completed(future_to_url): - future.result() - - columns_to_ignore = ["urls"] - if html is False: - columns_to_ignore += ["html_content"] - df = dict_to_dataframe(reviewed_urls, columns_to_ignore=columns_to_ignore, index_name="url") - - if not df.empty and df[df.error.isna()].empty: - raise Exception(str(df.iloc[0].error)) - return df - - -def dict_to_dataframe(dict_of_dicts, columns_to_ignore=None, index_name=None) -> pd.DataFrame: - """ - Convert a dictionary of dictionaries to a DataFrame. - - Args: - dict_of_dicts (dict): a dictionary of dictionaries - columns_to_ignore (list): a list of columns to ignore - index_name (str): the name of the index column - Returns: - A DataFrame containing the data. - """ - df = pd.DataFrame.from_dict(dict_of_dicts, orient="index") - - if columns_to_ignore: - for column in columns_to_ignore: - if column in df.columns: - df = df.drop(column, axis=1) - - if index_name: - df.index.name = index_name - - return df diff --git a/mindsdb/integrations/handlers/web_handler/web_handler.py b/mindsdb/integrations/handlers/web_handler/web_handler.py deleted file mode 100644 index 4956d2f054f..00000000000 --- a/mindsdb/integrations/handlers/web_handler/web_handler.py +++ /dev/null @@ -1,98 +0,0 @@ -from typing import List - -import pandas as pd -from mindsdb.integrations.libs.response import HandlerStatusResponse -from mindsdb.utilities.config import config -from mindsdb.utilities.security import validate_urls -from .urlcrawl_helpers import get_all_websites - -from mindsdb.integrations.libs.api_handler import APIResource, APIHandler -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - - -class CrawlerTable(APIResource): - def list(self, conditions: List[FilterCondition] = None, limit: int = None, **kwargs) -> pd.DataFrame: - """ - Selects data from the provided websites - - Returns: - dataframe: Dataframe containing the crawled data - - Raises: - NotImplementedError: If the query is not supported - """ - urls = [] - crawl_depth = None - per_url_limit = None - headers = {} - for condition in conditions: - if condition.column == "url": - if condition.op == FilterOperator.IN: - urls = condition.value - elif condition.op == FilterOperator.EQUAL: - urls = [condition.value] - condition.applied = True - if condition.column == "crawl_depth" and condition.op == FilterOperator.EQUAL: - crawl_depth = condition.value - condition.applied = True - if condition.column == "per_url_limit" and condition.op == FilterOperator.EQUAL: - per_url_limit = condition.value - condition.applied = True - if condition.column.lower() == "user_agent" and condition.op == FilterOperator.EQUAL: - headers["User-Agent"] = condition.value - condition.applied = True - - if len(urls) == 0: - raise NotImplementedError( - 'You must specify what url you want to crawl, for example: SELECT * FROM web.crawler WHERE url = "someurl"' - ) - - allowed_urls = config.get("web_crawling_allowed_sites", []) - if allowed_urls and not validate_urls(urls, allowed_urls): - raise ValueError( - f"The provided URL is not allowed for web crawling. Please use any of {', '.join(allowed_urls)}." - ) - - if limit is None and per_url_limit is None and crawl_depth is None: - per_url_limit = 1 - if per_url_limit is not None: - # crawl every url separately - results = [] - for url in urls: - results.append(get_all_websites([url], per_url_limit, crawl_depth=crawl_depth, headers=headers)) - result = pd.concat(results) - else: - result = get_all_websites(urls, limit, crawl_depth=crawl_depth, headers=headers) - - if limit is not None and len(result) > limit: - result = result[:limit] - - return result - - def get_columns(self): - """ - Returns the columns of the crawler table - """ - return ["url", "text_content", "error"] - - -class WebHandler(APIHandler): - """ - Web handler, handling crawling content from websites. - """ - - def __init__(self, name=None, **kwargs): - super().__init__(name) - crawler = CrawlerTable(self) - self._register_table("crawler", crawler) - - def check_connection(self) -> HandlerStatusResponse: - """ - Checks the connection to the web handler - @TODO: Implement a better check for the connection - - Returns: - HandlerStatusResponse: Response containing the status of the connection. Hardcoded to True for now. - """ - response = HandlerStatusResponse(True) - return response diff --git a/mindsdb/integrations/libs/__init__.py b/mindsdb/integrations/libs/__init__.py deleted file mode 100644 index 9377578263b..00000000000 --- a/mindsdb/integrations/libs/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from mindsdb.integrations.libs import api_handler_exceptions - -__all__ = ['api_handler_exceptions'] diff --git a/mindsdb/integrations/libs/api_handler.py b/mindsdb/integrations/libs/api_handler.py deleted file mode 100644 index 9e46d6447a8..00000000000 --- a/mindsdb/integrations/libs/api_handler.py +++ /dev/null @@ -1,946 +0,0 @@ -from typing import Any, List, Optional -import ast as py_ast - -import pandas as pd -from pandas.api import types as pd_types -from mindsdb_sql_parser.ast import ASTNode, Select, Insert, Update, Delete, Star, BinaryOperation, Function -from mindsdb_sql_parser.ast.select.identifier import Identifier -from mindsdb_sql_parser.ast.select.constant import Constant - -from mindsdb.integrations.utilities.sql_utils import ( - extract_comparison_conditions, - has_aggregate_function, - filter_dataframe, - FilterCondition, - FilterOperator, - SortColumn, -) -from mindsdb.integrations.libs.base import BaseHandler -from mindsdb.integrations.libs.api_handler_exceptions import TableAlreadyExists, TableNotFound -from mindsdb.integrations.libs.response import HandlerResponse as Response, RESPONSE_TYPE -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -def _infer_data_type_from_value(value: Any) -> str: - """Infer SQL data type from Python value. - - Args: - value: Python value to infer type from - - Returns: - str: SQL data type string (varchar, integer, decimal, date, datetime, etc.) - Uses lowercase to match infer_mysql_type expectations - """ - if value is None or pd.isna(value): - return "varchar" - elif isinstance(value, bool): - return "boolean" - elif isinstance(value, int): - return "integer" - elif isinstance(value, float): - return "decimal" - elif isinstance(value, str): - # Check if it looks like a timestamp (ISO format with T and timezone) - if "T" in value and ("Z" in value or "+" in value or "-" in value[-6:]): - return "timestamp" - # Check if it looks like a date/datetime - try: - pd.to_datetime(value) - if len(value) == 10: # Just date, no time (YYYY-MM-DD) - return "date" - return "datetime" - except (ValueError, TypeError): - pass - return "varchar" - elif pd_types.is_datetime64_any_dtype(type(value)) or isinstance(value, pd.Timestamp): - return "datetime" - else: - return "varchar" - - -def _infer_data_type_from_samples(values: List[Any]) -> str: - """Infer data type from multiple sample values for better accuracy. - - Args: - values: List of sample values from a column - - Returns: - str: SQL data type string (lowercase to match infer_mysql_type expectations) - """ - non_null_values = [v for v in values if v is not None and pd.notna(v)] - - if not non_null_values: - return "varchar" - - # Analyze types across all samples - type_counts = {} - for value in non_null_values: - inferred_type = _infer_data_type_from_value(value) - type_counts[inferred_type] = type_counts.get(inferred_type, 0) + 1 - - # Return the most common type, but prefer more specific types over varchar - if type_counts: - # If we have a specific type (not varchar), prefer it - specific_types = {k: v for k, v in type_counts.items() if k != "varchar"} - if specific_types: - return max(specific_types.items(), key=lambda x: x[1])[0] - return max(type_counts.items(), key=lambda x: x[1])[0] - - return "varchar" - - -def _pandas_dtype_to_sql_type(dtype) -> str: - """Convert pandas dtype to SQL data type string. - - Args: - dtype: pandas dtype object - - Returns: - str: SQL data type string (lowercase to match infer_mysql_type expectations) - """ - # Handle string dtypes - if pd_types.is_string_dtype(dtype): - return "varchar" - - # Handle integer dtypes - if pd_types.is_integer_dtype(dtype): - return "integer" - - # Handle float/numeric dtypes - if pd_types.is_float_dtype(dtype) or pd_types.is_numeric_dtype(dtype): - return "decimal" - - # Handle boolean dtypes - if pd_types.is_bool_dtype(dtype): - return "boolean" - - # Handle datetime dtypes - if pd_types.is_datetime64_any_dtype(dtype): - return "datetime" - - # Handle date dtypes - if pd_types.is_date_dtype(dtype): - return "date" - - # Default to varchar for object and unknown types - return "varchar" - - -class FuncParser: - def from_string(self, query_string): - body = py_ast.parse(query_string.strip(), mode="eval").body - - if not isinstance(body, py_ast.Call): - raise RuntimeError(f"Api function not found {query_string}") - - fnc_name = body.func.id - - params = {} - for keyword in body.keywords: - name = keyword.arg - value = self.process(keyword.value) - - params[name] = value - - return fnc_name, params - - def process(self, node): - if isinstance(node, py_ast.List): - elements = [] - for node2 in node.elts: - elements.append(self.process(node2)) - return elements - - if isinstance(node, py_ast.Dict): - keys = [] - for node2 in node.keys: - if isinstance(node2, py_ast.Constant): - value = node2.value - elif isinstance(node2, py_ast.Str): # py37 - value = node2.s - else: - raise NotImplementedError(f"Unknown dict key {node2}") - - keys.append(value) - - values = [] - for node2 in node.values: - values.append(self.process(node2)) - - return dict(zip(keys, values)) - - if isinstance(node, py_ast.Name): - # special attributes - name = node.id - if name == "true": - return True - elif name == "false": - return False - elif name == "null": - return None - - if isinstance(node, py_ast.Constant): - return node.value - - # ---- python 3.7 objects ----- - if isinstance(node, py_ast.Str): - return node.s - - if isinstance(node, py_ast.Num): - return node.n - - # ----------------------------- - - if isinstance(node, py_ast.UnaryOp): - if isinstance(node.op, py_ast.USub): - value = self.process(node.operand) - return -value - - raise NotImplementedError(f"Unknown node {node}") - - -class APITable: - def __init__(self, handler): - self.handler = handler - - def select(self, query: Select) -> pd.DataFrame: - """Receive query as AST (abstract syntax tree) and act upon it. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Select - - Returns: - pd.DataFrame - """ - - raise NotImplementedError() - - def insert(self, query: Insert) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Insert - - Returns: - None - """ - raise NotImplementedError() - - def update(self, query: ASTNode) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Update - Returns: - None - """ - raise NotImplementedError() - - def delete(self, query: ASTNode) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Delete - - Returns: - None - """ - raise NotImplementedError() - - def get_columns(self) -> list: - """Maps the columns names from the API call resource - - Returns: - List - """ - raise NotImplementedError() - - -def extract_targets(targets: list[ASTNode]) -> list[str]: - """Recursive function to extract target column names from the query. - - Args: - targets (list[ASTNode]): The list of AST nodes representing the targets. - - Returns: - list[str]: The list of target column names. - """ - result = [] - for target in targets: - if isinstance(target, Identifier): - result.append(target.parts[-1]) - elif isinstance(target, (Function, BinaryOperation)): - result += extract_targets(target.args) - return list(set(result)) - - -class APIResource(APITable): - def __init__(self, *args, table_name=None, **kwargs): - self.table_name = table_name - super().__init__(*args, **kwargs) - - def select(self, query: Select) -> pd.DataFrame: - """Receive query as AST (abstract syntax tree) and act upon it. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Select - - Returns: - pd.DataFrame - """ - - api_conditions, raw_conditions = self._extract_conditions(query.where, strict=False) - - limit = None - if query.limit is not None: - limit = query.limit.value - - sort = None - if query.order_by and len(query.order_by) > 0: - sort = [] - for an_order in query.order_by: - if isinstance(an_order.field, Identifier): - sort.append(SortColumn(an_order.field.parts[-1], an_order.direction.upper() != "DESC")) - - targets = extract_targets(query.targets) - - # Check if query has aggregation functions (like COUNT, SUM, etc.) - has_aggregation = has_aggregate_function(query.targets) - - # If we have aggregation or GROUP BY without a LIMIT, we need all rows to compute correctly - # Pass a very large limit instead of None, since many handlers default to a small limit (e.g., 20) - # when limit is None, which would cause incorrect aggregation results - # However, if there's a LIMIT in the query, use it to limit the input rows before aggregation - if has_aggregation or query.group_by: - if limit is not None: - # LIMIT applies to input rows before aggregation - list_limit = limit - else: - # No LIMIT, fetch all rows for aggregation - list_limit = 999999 # Very large number to fetch all rows - else: - list_limit = limit - - kwargs = {"conditions": api_conditions, "limit": list_limit, "sort": sort, "targets": targets} - if self.table_name is not None: - kwargs["table_name"] = self.table_name - - result = self.list(**kwargs) - - filters = [] - for cond in api_conditions: - if not cond.applied: - filters.append([cond.op.value, cond.column, cond.value]) - - result = filter_dataframe(result, filters, raw_conditions=raw_conditions) - - if limit is not None and len(result) > limit: - result = result[: int(limit)] - - return result - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs, - ): - """ - List items based on specified conditions, limits, sorting, and targets. - - Args: - conditions (List[FilterCondition]): Optional. A list of conditions to filter the items. Each condition - should be an instance of the FilterCondition class. - limit (int): Optional. An integer to limit the number of items to be listed. - sort (List[SortColumn]): Optional. A list of sorting criteria - targets (List[str]): Optional. A list of strings representing specific fields - - Raises: - NotImplementedError: This is an abstract method and should be implemented in a subclass. - """ - raise NotImplementedError() - - def insert(self, query: Insert) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Insert - - Returns: - None - """ - - columns = [col.name for col in query.columns] - - data = [dict(zip(columns, a_row)) for a_row in query.values] - kwargs = {} - if self.table_name is not None: - kwargs["table_name"] = self.table_name - - self.add(data, **kwargs) - - def add(self, row: List[dict], **kwargs) -> None: - """ - Add a single item to the dataa collection - - Args: - r ow (dict): A dictionary representing the item to be added. - - Raises: - NotImplementedError: This is an abstract method and should be implemented in a subclass. - """ - raise NotImplementedError() - - def update(self, query: Update) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Update - - Returns: - None - """ - conditions, _ = self._extract_conditions(query.where) - - values = {key: val.value for key, val in query.update_columns.items()} - - self.modify(conditions, values) - - def modify(self, conditions: List[FilterCondition], values: dict): - """ - Modify items based on specified conditions and values. - - Args: - conditions (List[FilterCondition]): A list of conditions to filter the items. Each condition - should be an instance of the FilterCondition class. - values (dict): A dictionary of values to be updated. - - Raises: - NotImplementedError: This is an abstract method and should be implemented in a subclass. - """ - raise NotImplementedError - - def delete(self, query: Delete) -> None: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. Usually it should be ast.Delete - - Returns: - None - """ - conditions, _ = self._extract_conditions(query.where) - - self.remove(conditions) - - def remove(self, conditions: List[FilterCondition]): - """ - Remove items based on specified conditions. - - Args: - conditions (List[FilterCondition]): A list of conditions to filter the items. Each condition - should be an instance of the FilterCondition class. - - Raises: - NotImplementedError: This is an abstract method and should be implemented in a subclass. - """ - raise NotImplementedError() - - def _extract_conditions(self, where: ASTNode, strict=True): - from mindsdb.integrations.utilities.sql_utils import _extract_date_from_raw_condition - import datetime as dt - - api_conditions, raw_conditions = [], [] - for item in extract_comparison_conditions(where, strict=strict): - if isinstance(item, BinaryOperation): - # Try to extract date value from INTERVAL expressions for API pushdown - date_info = _extract_date_from_raw_condition(item) - if date_info: - column_name, op, date_value = date_info - logger.info( - f"[API Handler] Extracted date from INTERVAL: column={column_name}, op={op}, original_date_value={date_value} (type={type(date_value).__name__})" - ) - - # Format date value as string for API compatibility - if isinstance(date_value, dt.date): - date_value_str = date_value.isoformat() - elif isinstance(date_value, dt.datetime): - date_value_str = date_value.isoformat() - else: - date_value_str = str(date_value) - - logger.info(f"[API Handler] Converted date to string: {date_value_str}") - - # Convert to FilterCondition for API pushdown - try: - filter_op = FilterOperator(op.upper()) - filter_condition = FilterCondition(column_name, filter_op, date_value_str) - api_conditions.append(filter_condition) - logger.info( - f"[API Handler] Created FilterCondition: column={column_name}, op={filter_op}, value={date_value_str}" - ) - # Don't add to raw_conditions - let FilterCondition handle it - # If it can't be pushed down, it will be filtered in-memory via filter_dataframe - continue - except (ValueError, KeyError) as e: - # If operator not supported, fall through to raw condition - logger.warning(f"[API Handler] Failed to create FilterCondition: {e}") - pass - - # Keep as raw condition if we couldn't extract date value - raw_conditions.append(item) - else: - api_conditions.append(FilterCondition(item[1], FilterOperator(item[0].upper()), item[2])) - - return api_conditions, raw_conditions - - -class MetaAPIResource(APIResource): - # TODO: Add a meta_table_info() method in case metadata cannot be retrieved as expected below? - - def meta_get_tables(self, table_name: str, **kwargs) -> dict: - """ - Retrieves table metadata for the API resource. - - Args: - table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler. - kwargs: Additional keyword arguments that may be used by the specific API resource implementation. - - Returns: - Dict: The dictionary should contain the following fields: - - TABLE_NAME (str): Name of the table. - - TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional). - - TABLE_SCHEMA (str): Schema of the table (optional). - - TABLE_DESCRIPTION (str): Description of the table (optional). - - ROW_COUNT (int): Estimated number of rows in the table (optional). - """ - pass - - def meta_get_columns(self, table_name: str, **kwargs) -> List[dict]: - """ - Retrieves column metadata for the API resource. - - Args: - table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler. - kwargs: Additional keyword arguments that may be used by the specific API resource implementation. - - Returns: - List[dict]: The list should contain dictionaries with the following fields: - - TABLE_NAME (str): Name of the table. - - COLUMN_NAME (str): Name of the column. - - DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc. - - COLUMN_DESCRIPTION (str): Description of the column (optional). - - IS_NULLABLE (bool): Whether the column can contain NULL values (optional). - - COLUMN_DEFAULT (str): Default value of the column (optional). - """ - pass - - def meta_get_column_statistics(self, table_name: str, **kwargs) -> List[dict]: - """ - Retrieves column statistics for the API resource. - - Args: - table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler. - kwargs: Additional keyword arguments that may be used by the specific API resource implementation. - - Returns: - List[dict]: The list should contain dictionaries with the following fields: - - TABLE_NAME (str): Name of the table. - - COLUMN_NAME (str): Name of the column. - - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional). - - MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional). - - NULL_PERCENTAGE: Percentage of NULL values in the column (optional). - - MINIMUM_VALUE (str): Minimum value in the column (optional). - - MAXIMUM_VALUE (str): Maximum value in the column (optional). - - DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional). - """ - pass - - def meta_get_primary_keys(self, table_name: str, **kwargs) -> List[dict]: - """ - Retrieves primary key metadata for the API resource. - - Args: - table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler. - kwargs: Additional keyword arguments that may be used by the specific API resource implementation. - - Returns: - List[dict]: The list should contain dictionaries with the following fields: - - TABLE_NAME (str): Name of the table. - - COLUMN_NAME (str): Name of the column that is part of the primary key. - - ORDINAL_POSITION (int): Position of the column in the primary key (optional). - - CONSTRAINT_NAME (str): Name of the primary key constraint (optional). - """ - pass - - def meta_get_foreign_keys(self, table_name: str, all_tables: List[str], **kwargs) -> List[dict]: - """ - Retrieves foreign key metadata for the API resource. - - Args: - table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler. - all_tables (List[str]): A list of all table names in the API resource. This is used to identify relationships between tables. - kwargs: Additional keyword arguments that may be used by the specific API resource implementation. - - Returns: - List[dict]: The list should contain dictionaries with the following fields: - - PARENT_TABLE_NAME (str): Name of the parent table. - - PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key. - - CHILD_TABLE_NAME (str): Name of the child table. - - CHILD_COLUMN_NAME (str): Name of the child column that is part of the foreign key. - - CONSTRAINT_NAME (str): Name of the foreign key constraint (optional). - """ - pass - - -class APIHandler(BaseHandler): - """ - Base class for handlers associated to the applications APIs (e.g. twitter, slack, discord etc.) - """ - - def __init__(self, name: str): - super().__init__(name) - """ constructor - Args: - name (str): the handler name - """ - self._tables = {} - - def _register_table(self, table_name: str, table_class: Any): - """ - Register the data resource. For e.g if you are using Twitter API it registers the `tweets` resource from `/api/v2/tweets`. - """ - if table_name.lower() in self._tables: - raise TableAlreadyExists(f"Table with name {table_name} already exists for this handler") - self._tables[table_name.lower()] = table_class - - def _get_table(self, name: Identifier): - """ - Check if the table name was added to the _register_table - Args: - name (Identifier): the table name - """ - name = name.parts[-1].lower() - if name in self._tables: - return self._tables[name] - raise TableNotFound(f"Table not found: {name}") - - def query(self, query: ASTNode): - if isinstance(query, Select): - # If the list method exists, it should be overridden in the child class. - # The APIResource class could be used as a base class by overriding the select method, but not the list method. - table = self._get_table(query.from_table) - list_method = getattr(table, "list", None) - - # Check if query has aggregations - if so, don't modify targets - # Aggregations like COUNT(*) should be preserved and handled post-fetch - has_aggregation = has_aggregate_function(query.targets) - - if not list_method or (list_method and list_method.__func__ is APIResource.list): - # for back compatibility, targets wasn't passed in previous version - # BUT: don't modify targets if we have aggregations - they need to be preserved - if not has_aggregation: - query.targets = [Star()] - result = self._get_table(query.from_table).select(query) - elif isinstance(query, Update): - result = self._get_table(query.table).update(query) - elif isinstance(query, Insert): - result = self._get_table(query.table).insert(query) - elif isinstance(query, Delete): - result = self._get_table(query.table).delete(query) - else: - raise NotImplementedError - - if result is None: - return Response(RESPONSE_TYPE.OK) - elif isinstance(result, pd.DataFrame): - return Response(RESPONSE_TYPE.TABLE, result) - else: - raise NotImplementedError - - def get_columns(self, table_name: str) -> Response: - """ - Returns a list of entity columns with inferred types from sampled data. - Args: - table_name (str): the table name - Returns: - RESPONSE_TYPE.TABLE - """ - - table = self._get_table(Identifier(table_name)) - column_names = table.get_columns() - - # Sample data to infer types (sample up to 50 rows for better accuracy) - column_types = {} - try: - # Create a SELECT query to sample data - sample_query = Select(targets=[Star()], from_table=Identifier(table_name), limit=Constant(50)) - - # Execute the query to get sample data - sample_df = table.select(sample_query) - - if not sample_df.empty and len(sample_df) > 0: - # Infer types from sampled data for each column - for col_name in column_names: - if col_name in sample_df.columns: - column_values = sample_df[col_name].tolist() - inferred_type = _infer_data_type_from_samples(column_values) - column_types[col_name] = inferred_type - else: - # Column not in sample, default to varchar - column_types[col_name] = "varchar" - else: - # No data available, try to infer from pandas dtypes if possible - if hasattr(sample_df, "dtypes"): - for col_name in column_names: - if col_name in sample_df.columns: - dtype = sample_df[col_name].dtype - column_types[col_name] = _pandas_dtype_to_sql_type(dtype) - else: - column_types[col_name] = "varchar" - else: - # Fallback: all varchar - column_types = {col: "varchar" for col in column_names} - except Exception as e: - # If sampling fails, log and fallback to varchar for all columns - logger.warning( - f"Failed to sample data for type inference in table '{table_name}': {e}. Using varchar for all columns." - ) - column_types = {col: "varchar" for col in column_names} - - # Build response DataFrame - df = pd.DataFrame(column_names, columns=["Field"]) - df["Type"] = df["Field"].map(lambda x: column_types.get(x, "varchar")) - - return Response(RESPONSE_TYPE.TABLE, df) - - def get_tables(self) -> Response: - """ - Return list of entities - Returns: - RESPONSE_TYPE.TABLE - """ - result = list(self._tables.keys()) - - df = pd.DataFrame(result, columns=["table_name"]) - df["table_type"] = "BASE TABLE" - - return Response(RESPONSE_TYPE.TABLE, df) - - -class MetaAPIHandler(APIHandler): - """ - Base class for handlers associated to the applications APIs (e.g. twitter, slack, discord etc.) - - This class is used when the handler is also needed to store information in the data catalog. - """ - - def meta_get_handler_info(self, **kwargs) -> str: - """ - Retrieves information about the design and implementation of the API handler. - This should include, but not be limited to, the following: - - The type of SQL queries and operations that the handler supports. - - etc. - - Args: - kwargs: Additional keyword arguments that may be used in generating the handler information. - - Returns: - str: A string containing information about the API handler's design and implementation. - """ - pass - - def meta_get_tables(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve metadata. - kwargs: Additional keyword arguments that may be used by the specific API resource implementation. - - Returns: - Response: A response object containing the table metadata. - """ - df = pd.DataFrame() - for table_name, table_class in self._tables.items(): - if table_names is None or table_name in table_names: - try: - if hasattr(table_class, "meta_get_tables"): - table_metadata = table_class.meta_get_tables(table_name, **kwargs) - df = pd.concat([df, pd.DataFrame([table_metadata])], ignore_index=True) - except Exception: - logger.exception(f"Error retrieving metadata for table {table_name}:") - - if len(df.columns) == 0: - df = pd.DataFrame( - columns=[ - "TABLE_NAME", - "TABLE_TYPE", - "TABLE_SCHEMA", - "TABLE_DESCRIPTION", - "ROW_COUNT", - ] - ) - - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_columns(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves column metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve column metadata. - - Returns: - Response: A response object containing the column metadata. - """ - df = pd.DataFrame() - for table_name, table_class in self._tables.items(): - if table_names is None or table_name in table_names: - try: - if hasattr(table_class, "meta_get_columns"): - column_metadata = table_class.meta_get_columns(table_name, **kwargs) - df = pd.concat([df, pd.DataFrame(column_metadata)], ignore_index=True) - except Exception: - logger.exception(f"Error retrieving column metadata for table {table_name}:") - - if len(df.columns) == 0: - df = pd.DataFrame( - columns=[ - "TABLE_NAME", - "COLUMN_NAME", - "DATA_TYPE", - "COLUMN_DESCRIPTION", - "IS_NULLABLE", - "COLUMN_DEFAULT", - ] - ) - - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_column_statistics(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves column statistics for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve column statistics. - - Returns: - Response: A response object containing the column statistics. - """ - df = pd.DataFrame() - for table_name, table_class in self._tables.items(): - if table_names is None or table_name in table_names: - try: - if hasattr(table_class, "meta_get_column_statistics"): - column_statistics = table_class.meta_get_column_statistics(table_name, **kwargs) - df = pd.concat([df, pd.DataFrame(column_statistics)], ignore_index=True) - except Exception: - logger.exception(f"Error retrieving column statistics for table {table_name}:") - - if len(df.columns) == 0: - df = pd.DataFrame( - columns=[ - "TABLE_NAME", - "COLUMN_NAME", - "MOST_COMMON_VALUES", - "MOST_COMMON_FREQUENCIES", - "NULL_PERCENTAGE", - "MINIMUM_VALUE", - "MAXIMUM_VALUE", - "DISTINCT_VALUES_COUNT", - ] - ) - - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_primary_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves primary key metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve primary key metadata. - - Returns: - Response: A response object containing the primary key metadata. - """ - df = pd.DataFrame() - for table_name, table_class in self._tables.items(): - if table_names is None or table_name in table_names: - try: - if hasattr(table_class, "meta_get_primary_keys"): - primary_key_metadata = table_class.meta_get_primary_keys(table_name, **kwargs) - df = pd.concat([df, pd.DataFrame(primary_key_metadata)], ignore_index=True) - except Exception: - logger.exception(f"Error retrieving primary keys for table {table_name}:") - - if len(df.columns) == 0: - df = pd.DataFrame( - columns=[ - "TABLE_NAME", - "COLUMN_NAME", - "ORDINAL_POSITION", - "CONSTRAINT_NAME", - ] - ) - - return Response(RESPONSE_TYPE.TABLE, df) - - def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response: - """ - Retrieves foreign key metadata for the specified tables (or all tables if no list is provided). - - Args: - table_names (List): A list of table names for which to retrieve foreign key metadata. - - Returns: - Response: A response object containing the foreign key metadata. - """ - df = pd.DataFrame() - all_tables = list(self._tables.keys()) - for table_name, table_class in self._tables.items(): - if table_names is None or table_name in table_names: - try: - if hasattr(table_class, "meta_get_foreign_keys"): - foreign_key_metadata = table_class.meta_get_foreign_keys( - table_name, all_tables=table_names if table_names else all_tables, **kwargs - ) - df = pd.concat([df, pd.DataFrame(foreign_key_metadata)], ignore_index=True) - except Exception: - logger.exception(f"Error retrieving foreign keys for table {table_name}:") - - if len(df.columns) == 0: - df = pd.DataFrame( - columns=[ - "PARENT_TABLE_NAME", - "PARENT_COLUMN_NAME", - "CHILD_TABLE_NAME", - "CHILD_COLUMN_NAME", - "CONSTRAINT_NAME", - ] - ) - - return Response(RESPONSE_TYPE.TABLE, df) - - -class APIChatHandler(APIHandler): - def get_chat_config(self): - """Return configuration to connect to chatbot - - Returns: - Dict - """ - raise NotImplementedError() - - def get_my_user_name(self) -> list: - """Return configuration to connect to chatbot - - Returns: - Dict - """ - raise NotImplementedError() diff --git a/mindsdb/integrations/libs/api_handler_exceptions.py b/mindsdb/integrations/libs/api_handler_exceptions.py deleted file mode 100644 index d306130e89a..00000000000 --- a/mindsdb/integrations/libs/api_handler_exceptions.py +++ /dev/null @@ -1,18 +0,0 @@ -class TableNotFound(Exception): - pass - - -class ConnectionFailed(Exception): - pass - - -class InvalidNativeQuery(Exception): - pass - - -class TableAlreadyExists(Exception): - pass - - -class MissingConnectionParams(Exception): - pass diff --git a/mindsdb/integrations/libs/api_handler_generator.py b/mindsdb/integrations/libs/api_handler_generator.py deleted file mode 100644 index c629d846225..00000000000 --- a/mindsdb/integrations/libs/api_handler_generator.py +++ /dev/null @@ -1,583 +0,0 @@ -from dataclasses import dataclass -import re -from io import StringIO -import json -from typing import Dict, List, Any -import yaml -try: - from yaml import CLoader as Loader -except ImportError: - from yaml import Loader - - -import pandas as pd -import requests -from requests.auth import HTTPBasicAuth - -from mindsdb.integrations.utilities.sql_utils import ( - FilterCondition, FilterOperator, SortColumn -) -from mindsdb.integrations.libs.api_handler import APIResource - - -class ApiRequestException(Exception): - pass - - -class ApiResponseException(Exception): - pass - - -@dataclass -class APIInfo: - """ - A class to store the information about the API. - """ - base_url: str = None - auth: dict = None - - -@dataclass -class APIEndpoint: - url: str - method: str - params: dict - response: dict - - -@dataclass -class APIResourceType: - type_name: str - sub_type: str = None - properties: dict[str, str] = None - - -@dataclass -class APIEndpointParam: - name: str - type: APIResourceType - where: str = None - default: Any = None - - -def find_common_url_prefix(urls): - if len(urls) == 0: - return '' - urls = [ - url.split('/') - for url in urls - ] - - min_len = min(len(s) for s in urls) - - for i in range(min_len): - for j in range(1, len(urls)): - if urls[j][i] != urls[0][i]: - return '/'.join(urls[0][:i]) - - return '/'.join(urls[0][:min_len]) - - -class OpenAPISpecParser: - """ - A class to parse the OpenAPI specification. - """ - def __init__(self, openapi_spec_path: str) -> None: - if openapi_spec_path.startswith('http://') or openapi_spec_path.startswith('https://'): - response = requests.get(openapi_spec_path) - response.raise_for_status() - - if openapi_spec_path.endswith('.json'): - self.openapi_spec = response.json() - else: - stream = StringIO(response.text) - self.openapi_spec = yaml.load(stream, Loader=Loader) - else: - raise ApiRequestException('URL is required') - - def get_security_schemes(self) -> dict: - """ - Returns the security schemes defined in the OpenAPI specification. - - Returns: - dict: A dictionary containing the security schemes defined in the OpenAPI specification. - """ - return self.openapi_spec.get('components', {}).get('securitySchemes', {}) - - def get_schemas(self) -> dict: - """ - Returns the schemas defined in the OpenAPI specification. - - Returns: - dict: A dictionary containing the schemas defined in the OpenAPI specification. - """ - return self.openapi_spec.get('components', {}).get('schemas', {}) - - def get_paths(self) -> dict: - """ - Returns the paths defined in the OpenAPI specification. - - Returns: - dict: A dictionary containing the paths defined in the OpenAPI specification. - """ - return self.openapi_spec.get('paths', {}) - - def get_specs(self) -> dict: - return self.openapi_spec - - -class APIResourceGenerator: - """ - A class to generate API resources based on the OpenAPI specification. - """ - def __init__(self, url, connection_data, url_base=None, options=None) -> None: - self.openapi_spec_parser = OpenAPISpecParser(url) - self.connection_data = connection_data - self.url_base = url_base - self.options = options or {} - self.resources = {} - - def check_connection(self): - if 'check_connection_table' in self.options: - table = self.resources.get(self.options['check_connection_table']) - if table: - table.list(targets=[], limit=1, conditions=[]) - - def generate_api_resources(self, handler, table_name_format='{url}') -> Dict[str, APIResource]: - """ - Generates an API resource based on the OpenAPI specification. - - Returns: - Type[APIResource]: The generated API resource class. - """ - paths = self.openapi_spec_parser.get_paths() - schemas = self.openapi_spec_parser.get_schemas() - self.security_schemes = self.openapi_spec_parser.get_security_schemes() - - self.resource_types = self.process_resource_types(schemas) - endpoints = self.process_endpoints(paths) - - prefix_len = len(find_common_url_prefix([i.url for i in endpoints])) - - for endpoint in endpoints: - url = endpoint.url[prefix_len:] - # replace placehoders with x - url = re.sub(r"{(\w+)}", 'x', url) - url = url.replace('/', '_').strip('_') - table_name = table_name_format.format(url=url, method=endpoint.method).lower() - self.resources[table_name] = RestApiTable(handler, endpoint=endpoint, resource_gen=self) - - return self.resources - - def process_resource_types(self, schemas: dict) -> dict: - resource_types = {} - for name, schema_info in schemas.items(): - resource_types[name] = self._convert_to_resource_type(schema_info) - - return resource_types - - def process_endpoints(self, paths: dict) -> List[APIEndpoint]: - """ - Processes the endpoints defined in the OpenAPI specification. - - Args: - endpoints (Dict): A dictionary containing the endpoints defined in the OpenAPI specification. - - Returns: - Dict: A dictionary containing the processed endpoints. - """ - endpoints = [] - for path, path_info in paths.items(): - # filter endpoints by url base - if self.url_base is not None and (not path.startswith(self.url_base) or path == self.url_base): - continue - - for http_method, method_info in path_info.items(): - if http_method != 'get': - continue - - parameters = self._process_endpoint_parameters(method_info['parameters']) if 'parameters' in method_info else {} - - response = self._process_endpoint_response(method_info['responses']) - if response['type'] is None: - continue - - endpoint = APIEndpoint( - url=path, - method=http_method, - params=parameters, - response=response - ) - - endpoints.append(endpoint) - - return endpoints - - def get_ref_object(self, ref): - # get object by $ref link - el = self.openapi_spec_parser.get_specs() - for path in ref.lstrip('#').split('/'): - if path: - el = el[path] - return el - - def _process_endpoint_parameters(self, parameters: list) -> Dict[str, APIEndpointParam]: - """ - Processes the parameters defined in the OpenAPI specification. - - Args: - parameters (Dict): A dictionary containing the parameters defined in the OpenAPI specification. - - Returns: - Dict: A dictionary containing the processed parameters. - """ - endpoint_parameters = {} - for parameter in parameters: - if '$ref' in parameter: - parameter = self.get_ref_object(parameter['$ref']) - - type_name = self.get_resource_type(parameter['schema']) - - endpoint_parameters[parameter['name']] = APIEndpointParam( - name=parameter['name'], - type=type_name, - default=parameter['schema'].get('default'), - where=parameter['in'], - ) - - return endpoint_parameters - - def _process_endpoint_response(self, responses: dict): - response = None - response_path = [] # used to find list in response - - if '200' not in responses: - return {'type': None} - - view = 'table' - - resp_success = responses['200'] - if '$ref' in resp_success: - resp_success = self.get_ref_object(responses['200']['$ref']) - - for content_type, resp_info in resp_success['content'].items(): - if content_type != 'application/json': - continue - - # type_name=get_type(resp_info['schema']) - if 'schema' not in resp_info: - continue - - resource_type = self._convert_to_resource_type(resp_info['schema']) - - # resolve type - type_name = None - if resource_type.type_name in self.resource_types: - type_name = resource_type.type_name - resource_type = self.resource_types[resource_type.type_name] - - if resource_type.type_name == 'array': - response = resource_type.sub_type - elif resource_type.type_name == 'object': - if resource_type.properties is None: - raise NotImplementedError - - # if it is a table find property with list - is_table = False - if 'total_column' in self.options: - for col in self.options['total_column']: - if col in resource_type.properties: - is_table = True - - if is_table: - for k, v in resource_type.properties.items(): - if v.type_name == 'array': - - response = v.sub_type - response_path.append(k) - break - else: - response = type_name - view = 'record' - break - - return { - 'type': response, - 'path': response_path, - 'view': view - } - - def _convert_to_resource_type(self, schema: dict) -> APIResourceType: - """ - Converts the schema information to a resource type. - - Args: - schema (Dict): A dictionary containing the schema information. - - Returns: - APIResourceType: An object containing the resource type information. - """ - type_name = self.get_resource_type(schema) - # type_name= info['type'] - - kwargs = { - # 'name': name, - 'type_name': type_name, - } - - if type_name == 'object': - properties = {} - if 'properties' in schema: - for k, v in schema['properties'].items(): - # type_name2 = get_type(v) - properties[k] = self._convert_to_resource_type(v) - elif 'additionalProperties' in schema: - if isinstance(schema['additionalProperties'], dict) and 'type' in schema['additionalProperties']: - type_name = schema['additionalProperties']['type'] - else: - type_name = 'string' - - kwargs['properties'] = properties - if type_name == 'array' and 'items' in schema: - kwargs['sub_type'] = self.get_resource_type(schema['items']) - - return APIResourceType(**kwargs) - - def get_resource_type(self, schema: dict) -> str: - if 'type' in schema: - return schema['type'] - - elif '$ref' in schema: - return schema['$ref'].split('/')[-1] - - elif 'allOf' in schema: - # TODO Get only the first type. - return self.get_resource_type(schema['allOf'][0]) - - -class RestApiTable(APIResource): - def __init__(self, *args, endpoint: APIEndpoint = None, resource_gen=None, **kwargs): - self.endpoint = endpoint - resource_types = resource_gen.resource_types - self.connection_data = resource_gen.connection_data - self.security_schemes = resource_gen.security_schemes - self.options = resource_gen.options - - self.output_columns = {} - response_type = endpoint.response['type'] - if response_type in resource_types: - self.output_columns = resource_types[response_type].properties - else: - # let it be single column with this type - self.output_columns = {'value': response_type} - - # check params: - self.params, self.list_params = [], [] - for name, param in endpoint.params.items(): - self.params.append(name) - if param.type == 'array': - self.list_params.append(name) - - super().__init__(*args, **kwargs) - - def repr_value(self, value): - # convert dict and lists to strings to show it response table - - if isinstance(value, dict): - # remove empty keys - value = { - k: v - for k, v in value.items() - if v is not None - } - value = json.dumps(value) - elif isinstance(value, list): - value = ",".join([str(i) for i in value]) - return value - - def _handle_auth(self) -> dict: - """ - Processes the authentication mechanism defined in the OpenAPI specification. - Args: - security_schemes (Dict): A dictionary containing the security schemes defined in the OpenAPI specification. - Returns: - Dict: A dictionary containing the authentication information required to connect to the API. - """ - # API key authentication will be given preference over other mechanisms. - # NOTE: If the API supports multiple authentication mechanisms, should they be supported? Which one should be given preference? - - security_schemes = self.security_schemes - - if 'token' in self.connection_data: - headers = {'Authorization': f'Bearer {self.connection_data["token"]}'} - - return { - "headers": headers - } - - elif 'basicAuth' in security_schemes: - # For basic authentication, the username and password are required. - if not all( - key in self.connection_data - for key in ["username", "password"] - ): - raise ApiRequestException( - "The username and password are required for basic authentication." - ) - return { - "auth": HTTPBasicAuth( - self.connection_data["username"], - self.connection_data["password"], - ), - } - return {} - - def get_columns(self) -> List[str]: - return list(self.output_columns.keys()) - - def get_setting_param(self, setting_name: str) -> str: - # find input param name for specific setting - - if setting_name in self.options: - for col in self.options[setting_name]: - if col in self.endpoint.params: - return col - - def get_user_params(self): - params = {} - for k, v in self.connection_data.items(): - if k not in ('username', 'password', 'token', 'api_base'): - params[k] = v - return params - - def _api_request(self, filters): - query, body, path_vars = {}, {}, {} - for name, value in filters.items(): - param = self.endpoint.params[name] - if param.where == 'query': - query[name] = value - elif param.where == 'path': - path_vars[name] = value - else: - body[name] = value - - url = self.connection_data['api_base'] + self.endpoint.url - if path_vars: - url = url.format(**path_vars) - # check empty placeholders - placeholders = re.findall(r"{(\w+)}", url) - if placeholders: - raise ApiRequestException('Parameters are required: ' + ', '.join(placeholders)) - - kwargs = self._handle_auth() - req = requests.request(self.endpoint.method, url, params=query, data=body, **kwargs) - - if req.status_code != 200: - raise ApiResponseException(req.text) - resp = req.json() - - total = None - if 'total_column' in self.options and isinstance(resp, dict): - for col in self.options['total_column']: - if col in resp: - total = resp[col] - break - - for item in self.endpoint.response['path']: - resp = resp[item] - - if self.endpoint.response['view'] == 'record': - # response is one record, make table - resp = [resp] - return resp, total - - def list( - self, - conditions: List[FilterCondition] = None, - limit: int = None, - sort: List[SortColumn] = None, - targets: List[str] = None, - **kwargs - ) -> pd.DataFrame: - - if limit is None: - limit = 20 - - filters = {} - if conditions: - for condition in conditions: - if condition.column not in self.params: - continue - - if condition.column in self.list_params: - if condition.op == FilterOperator.IN: - filters[condition.column] = condition.value - elif condition.op == FilterOperator.EQUAL: - filters[condition.column] = [condition] - condition.applied = True - else: - filters[condition.column] = condition.value - condition.applied = True - - # user params - params = self.get_user_params() - if params: - filters.update(params) - - page_size_param = self.get_setting_param('page_size_param') - page_size = None - if page_size_param is not None: - # use default value for page size - page_size = self.endpoint.params[page_size_param].default - if page_size: - filters[page_size_param] = page_size - resp, total = self._api_request(filters) - - # pagination - offset_param = self.get_setting_param('offset_param') - page_num_param = self.get_setting_param('page_num_param') - if offset_param is not None or page_num_param is not None: - page_num = 1 - while True: - count = len(resp) - if limit <= count: - break - - if total is not None and total <= count: - # total is reached - break - - if page_size is not None and page_size > count: - # number of results are more than page, don't go to next page - break - - # download more pages - if offset_param: - filters[offset_param] = count - else: - page_num += 1 - filters[page_num_param] = page_num - resp2, total = self._api_request(filters) - if len(resp2) == 0: - # no results from next page - break - resp.extend(resp2) - - resp = resp[:limit] - - data = [] - - columns = self.get_columns() - for record in resp: - item = {} - - if isinstance(record, dict): - for name, value in record.items(): - item[name] = self.repr_value(value) - - data.append(item) - elif len(columns) > 0: - # response is value - item[columns[0]] = self.repr_value(record) - - return pd.DataFrame(data, columns=columns) diff --git a/mindsdb/integrations/libs/base.py b/mindsdb/integrations/libs/base.py deleted file mode 100644 index 2757b7ba594..00000000000 --- a/mindsdb/integrations/libs/base.py +++ /dev/null @@ -1,568 +0,0 @@ -import ast -import concurrent.futures -import functools -import inspect -import textwrap -from _ast import AnnAssign, AugAssign -from typing import Any, Dict, List, Optional, get_type_hints, get_args, Union, get_origin - -import pandas as pd -from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb.utilities import log - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse, - RESPONSE_TYPE, - DataHandlerResponse, - normalize_response, - ErrorResponse, - TableResponse, -) - -logger = log.getLogger(__name__) - - -class BaseHandler: - """Base class for database handlers - - Base class for handlers that associate a source of information with the - broader MindsDB ecosystem via SQL commands. - """ - - stream_response = False - - def __init_subclass__(cls, **kwargs): - """Automatically wrap handler methods to normalize their responses. - - When a subclass is defined, this method checks if any of the methods - in _methods_to_normalize are overridden and wraps them to convert - legacy HandlerResponse to new response types (TableResponse, OkResponse, - ErrorResponse). - """ - super().__init_subclass__(**kwargs) - - # Methods whose return values should be normalized to new response types - _methods_to_normalize = ( - "native_query", - "query", - "insert", - "get_tables", - "get_columns", - "meta_get_tables", - "meta_get_columns", - "meta_get_column_statistics", - "meta_get_column_statistics_for_table", - "meta_get_primary_keys", - "meta_get_foreign_keys", - ) - for method_name in _methods_to_normalize: - # Only wrap if method is defined directly in this class (not inherited) - if method_name not in cls.__dict__: - continue - - original_method = cls.__dict__[method_name] - - return_type = get_type_hints(original_method).get("return") - if return_type is DataHandlerResponse or ( - get_origin(return_type) is Union and issubclass(get_args(return_type)[0], DataHandlerResponse) - ): - # this is already new style response - continue - - # Skip if already wrapped - if getattr(original_method, "_response_normalized", False): - continue - - # Create wrapper that normalizes response - @functools.wraps(original_method) - def wrapper(self, *args, _orig=original_method, **kwargs): - result = _orig(self, *args, **kwargs) - return normalize_response(result) - - wrapper._response_normalized = True - setattr(cls, method_name, wrapper) - - def __init__(self, name: str): - """constructor - Args: - name (str): the handler name - """ - self.is_connected: bool = False - self.name = name - - def connect(self): - """Set up any connections required by the handler - - Should return connection - - """ - raise NotImplementedError() - - def disconnect(self): - """Close any existing connections - - Should switch self.is_connected. - """ - self.is_connected = False - return - - def check_connection(self) -> HandlerStatusResponse: - """Check connection to the handler - - Returns: - HandlerStatusResponse - """ - raise NotImplementedError() - - def native_query(self, query: Any, stream: bool = False, **kwargs) -> DataHandlerResponse: - """Receive raw query and act upon it somehow. - - Args: - query (Any): query in native format (str for sql databases, etc) - stream (bool): Whether to stream the results of the query - **kwargs: Additional keyword arguments. - Returns: - DataHandlerResponse - """ - raise NotImplementedError() - - def query(self, query: ASTNode) -> DataHandlerResponse: - """Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - - Returns: - DataHandlerResponse - """ - raise NotImplementedError() - - def get_tables(self) -> DataHandlerResponse: - """Return list of entities - - Return list of entities that will be accesible as tables. - - Returns: - DataHandlerResponse: shoud have same columns as information_schema.tables - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-tables-table.html) - Column 'TABLE_NAME' is mandatory, other is optional. - """ - raise NotImplementedError() - - def get_columns(self, table_name: str) -> DataHandlerResponse: - """Returns a list of entity columns - - Args: - table_name (str): name of one of tables returned by self.get_tables() - - Returns: - DataHandlerResponse: shoud have same columns as information_schema.columns - (https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html) - Column 'COLUMN_NAME' is mandatory, other is optional. Hightly - recomended to define also 'DATA_TYPE': it should be one of - python data types (by default it str). - """ - raise NotImplementedError() - - -class DatabaseHandler(BaseHandler): - """ - Base class for handlers associated to data storage systems (e.g. databases, data warehouses, streaming services, etc.) - """ - - def __init__(self, name: str): - super().__init__(name) - - -class MetaDatabaseHandler(DatabaseHandler): - """ - Base class for handlers associated to data storage systems (e.g. databases, data warehouses, streaming services, etc.) - - This class is used when the handler is also needed to store information in the data catalog. - This information is typically avaiable in the information schema or system tables of the database. - """ - - def __init__(self, name: str): - super().__init__(name) - - def meta_get_tables(self, table_names: Optional[List[str]]) -> DataHandlerResponse: - """ - Returns metadata information about the tables to be stored in the data catalog. - - Returns: - DataHandlerResponse: The response should consist of the following columns: - - TABLE_NAME (str): Name of the table. - - TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional). - - TABLE_SCHEMA (str): Schema of the table (optional). - - TABLE_DESCRIPTION (str): Description of the table (optional). - - ROW_COUNT (int): Estimated number of rows in the table (optional). - """ - raise NotImplementedError() - - def meta_get_columns(self, table_names: Optional[List[str]]) -> DataHandlerResponse: - """ - Returns metadata information about the columns in the tables to be stored in the data catalog. - - Returns: - DataHandlerResponse: The response should consist of the following columns: - - TABLE_NAME (str): Name of the table. - - COLUMN_NAME (str): Name of the column. - - DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc. - - COLUMN_DESCRIPTION (str): Description of the column (optional). - - IS_NULLABLE (bool): Whether the column can contain NULL values (optional). - - COLUMN_DEFAULT (str): Default value of the column (optional). - """ - raise NotImplementedError() - - def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> DataHandlerResponse: - """ - Returns metadata statisical information about the columns in the tables to be stored in the data catalog. - Either this method should be overridden in the handler or `meta_get_column_statistics_for_table` should be implemented. - - Returns: - DataHandlerResponse: The response should consist of the following columns: - - TABLE_NAME (str): Name of the table. - - COLUMN_NAME (str): Name of the column. - - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional). - - MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional). - - NULL_PERCENTAGE: Percentage of NULL values in the column (optional). - - MINIMUM_VALUE (str): Minimum value in the column (optional). - - MAXIMUM_VALUE (str): Maximum value in the column (optional). - - DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional). - """ - method = getattr(self, "meta_get_column_statistics_for_table") - if method.__func__ is not MetaDatabaseHandler.meta_get_column_statistics_for_table: - meta_columns = self.meta_get_columns(table_names) - grouped_columns = ( - meta_columns.data_frame.groupby("table_name") - .agg( - { - "column_name": list, - } - ) - .reset_index() - ) - - executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) - futures = [] - - results = [] - with executor: - for _, row in grouped_columns.iterrows(): - table_name = row["table_name"] - columns = row["column_name"] - futures.append(executor.submit(self.meta_get_column_statistics_for_table, table_name, columns)) - - for future in concurrent.futures.as_completed(futures): - try: - result = future.result(timeout=120) - if result.resp_type == RESPONSE_TYPE.TABLE: - results.append(result.data_frame) - else: - logger.error( - f"Error retrieving column statistics for table {table_name}: {result.error_message}" - ) - except Exception: - logger.exception(f"Exception occurred while retrieving column statistics for table {table_name}:") - - if not results: - logger.warning("No column statistics could be retrieved for the specified tables.") - return ErrorResponse(error_message="No column statistics could be retrieved.") - return TableResponse(data=pd.concat(results, ignore_index=True) if results else pd.DataFrame()) - else: - raise NotImplementedError() - - def meta_get_column_statistics_for_table( - self, table_name: str, column_names: Optional[List[str]] = None - ) -> DataHandlerResponse: - """ - Returns metadata statistical information about the columns in a specific table to be stored in the data catalog. - Either this method should be implemented in the handler or `meta_get_column_statistics` should be overridden. - - Args: - table_name (str): Name of the table. - column_names (Optional[List[str]]): List of column names to retrieve statistics for. If None, statistics for all columns will be returned. - - Returns: - DataHandlerResponse: The response should consist of the following columns: - - TABLE_NAME (str): Name of the table. - - COLUMN_NAME (str): Name of the column. - - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional). - - MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional). - - NULL_PERCENTAGE: Percentage of NULL values in the column (optional). - - MINIMUM_VALUE (str): Minimum value in the column (optional). - - MAXIMUM_VALUE (str): Maximum value in the column (optional). - - DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional). - """ - pass - - def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> DataHandlerResponse: - """ - Returns metadata information about the primary keys in the tables to be stored in the data catalog. - - Returns: - DataHandlerResponse: The response should consist of the following columns: - - TABLE_NAME (str): Name of the table. - - COLUMN_NAME (str): Name of the column that is part of the primary key. - - ORDINAL_POSITION (int): Position of the column in the primary key (optional). - - CONSTRAINT_NAME (str): Name of the primary key constraint (optional). - """ - raise NotImplementedError() - - def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> DataHandlerResponse: - """ - Returns metadata information about the foreign keys in the tables to be stored in the data catalog. - - Returns: - DataHandlerResponse: The response should consist of the following columns: - - PARENT_TABLE_NAME (str): Name of the parent table. - - PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key. - - CHILD_TABLE_NAME (str): Name of the child table. - - CHILD_COLUMN_NAME (str): Name of the child column that is part of the foreign key. - - CONSTRAINT_NAME (str): Name of the foreign key constraint (optional). - """ - raise NotImplementedError() - - def meta_get_handler_info(self, **kwargs) -> str: - """ - Retrieves information about the design and implementation of the database handler. - This should include, but not be limited to, the following: - - The type of SQL queries and operations that the handler supports. - - etc. - - Args: - kwargs: Additional keyword arguments that may be used in generating the handler information. - - Returns: - str: A string containing information about the database handler's design and implementation. - """ - pass - - -class ArgProbeMixin: - """ - A mixin class that provides probing of arguments that - are needed by a handler during creation and prediction time - by running the static analysis on the source code of the handler. - """ - - class ArgProbeVisitor(ast.NodeVisitor): - def __init__(self): - self.arg_keys = [] - self.var_names_to_track = {"args"} - - def visit_Assign(self, node): - # track if args['using'] get assigned to any variable - # if so, we should track the variable by adding it to - # self.var_names_to_track - # E.g., using_args = args['using'] - # we should track using_args as well - if ( - isinstance(node.value, ast.Subscript) - and isinstance(node.value.value, ast.Name) - and node.value.value.id == "args" - ): - if ( - isinstance(node.value.slice, ast.Index) - and isinstance(node.value.slice.value, ast.Str) - and node.value.slice.value.s == "using" - ): - self.var_names_to_track.add(node.targets[0].id) - - # for an assignment like `self.args['name'] = 'value'`, we should ignore - # the left side of the assignment - self.visit(node.value) - - def visit_AnnAssign(self, node: AnnAssign) -> Any: - self.visit(node.value) - - def visit_AugAssign(self, node: AugAssign) -> Any: - self.visit(node.value) - - def visit_Subscript(self, node): - if isinstance(node.value, ast.Name) and node.value.id in self.var_names_to_track: - if isinstance(node.slice, ast.Index) and isinstance(node.slice.value, ast.Str): - self.arg_keys.append({"name": node.slice.value.s, "required": True}) - self.generic_visit(node) - - def visit_Call(self, node): - if isinstance(node.func, ast.Attribute) and node.func.attr == "get": - if isinstance(node.func.value, ast.Name) and node.func.value.id in self.var_names_to_track: - if isinstance(node.args[0], ast.Str): - self.arg_keys.append({"name": node.args[0].s, "required": False}) - self.generic_visit(node) - - @classmethod - def probe_function(self, method_name: str) -> List[Dict]: - """ - Probe the source code of the method with name method_name. - Specifically, trace how the argument `args`, which is a dict is used in the method. - - Find all places where a key of the dict is used, and return a list of all keys that are used. - E.g., - args["key1"] -> "key1" is accessed, and it is required - args.get("key2", "default_value") -> "key2" is accessed, and it is optional (default value is provided) - - Return a list of dict - where each dict looks like - { - "name": "key1", - "required": True - } - """ - try: - source_code = self.get_source_code(method_name) - except Exception: - logger.exception(f"Failed to get source code of method {method_name} in {self.__class__.__name__}. Reason:") - return [] - - # parse the source code - # fix the indentation - source_code = textwrap.dedent(source_code) - # parse the source code - tree = ast.parse(source_code) - - # find all places where a key in args is accessed - # and if it is accessed using args["key"] or args.get("key", "default_value") - - visitor = self.ArgProbeVisitor() - visitor.visit(tree) - - # deduplicate the keys - # if there two records with the same name but different required status - # we should keep the one with required == True - unique_arg_keys = {} - for r in visitor.arg_keys: - if r["name"] in unique_arg_keys: - if r["required"]: - unique_arg_keys[r["name"]] = r["required"] - else: - unique_arg_keys[r["name"]] = r["required"] - - # convert back to list - visitor.arg_keys = [{"name": k, "required": v} for k, v in unique_arg_keys.items()] - - # filter out record where name == "using" - return [r for r in visitor.arg_keys if r["name"] != "using"] - - @classmethod - def get_source_code(self, method_name: str): - """ - Get the source code of the method specified by method_name - """ - method = getattr(self, method_name) - if method is None: - raise Exception(f"Method {method_name} does not exist in {self.__class__.__name__}") - source_code = inspect.getsource(method) - return source_code - - @classmethod - def prediction_args(self): - """ - Get the arguments that are needed by the prediction method - """ - return self.probe_function("predict") - - @classmethod - def creation_args(self): - """ - Get the arguments that are needed by the creation method - """ - return self.probe_function("create") - - -class BaseMLEngine(ArgProbeMixin): - """ - Base class for integration engines to connect with other machine learning libraries/frameworks. - - This class will be instanced when interacting with the underlying framework. For compliance with the interface - that MindsDB core expects, instances of this class will be wrapped with the `BaseMLEngineExec` class defined - in `libs/ml_exec_base`. - - Broadly speaking, the flow is as follows: - - A SQL statement is sent to the MindsDB executor - - The statement is parsed, and a sequential plan is generated by `mindsdb_sql` - - If any step in the plan involves an ML framework, a wrapped engine that inherits from this class will be called for the respective action - - For example, creating a new model would call `create()` - - Any output produced by the ML engine is then formatted by the wrapper and passed back into the MindsDB executor, which can then morph the data to comply with the original SQL query - """ # noqa - - def __init__(self, model_storage, engine_storage, **kwargs) -> None: - """ - Warning: This method should not be overridden. - - Initialize storage objects required by the ML engine. - - - engine_storage: persists global engine-related internals or artifacts that may be used by all models from the engine. - - model_storage: stores artifacts for any single given model. - """ - self.model_storage = model_storage - self.engine_storage = engine_storage - self.generative = False # if True, the target column name does not have to be specified at creation time - - if kwargs.get("base_model_storage"): - self.base_model_storage = kwargs["base_model_storage"] # available when updating a model - else: - self.base_model_storage = None - - def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - """ - Saves a model inside the engine registry for later usage. - - Normally, an input dataframe is required to train the model. - However, some integrations may merely require registering the model instead of training, in which case `df` can be omitted. - - Any other arguments required to register the model can be passed in an `args` dictionary. - """ - raise NotImplementedError - - def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame: - """ - Calls a model with some input dataframe `df`, and optionally some arguments `args` that may modify the model behavior. - - The expected output is a dataframe with the predicted values in the target-named column. - Additional columns can be present, and will be considered row-wise explanations if their names finish with `_explain`. - """ - raise NotImplementedError - - def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None: - """ - Optional. - - Used to fine-tune a pre-existing model without resetting its internal state (e.g. weights). - - Availability will depend on underlying integration support, as not all ML models can be partially updated. - """ - raise NotImplementedError - - def describe(self, attribute: Optional[str] = None) -> pd.DataFrame: - """Optional. - - When called, this method provides global model insights, e.g. framework-level parameters used in training. - """ - raise NotImplementedError - - def update(self, args: dict) -> None: - """Optional. - - Update model. - """ - raise NotImplementedError - - def create_engine(self, connection_args: dict): - """Optional. - - Used to connect with external sources (e.g. a REST API) that the engine will require to use any other methods. - """ - raise NotImplementedError - - def update_engine(self, connection_args: dict): - """Optional. - - Used when need to change connection args or do any make any other changes to the engine - """ - raise NotImplementedError - - def close(self): - pass diff --git a/mindsdb/integrations/libs/const.py b/mindsdb/integrations/libs/const.py deleted file mode 100644 index 01749c4ce0a..00000000000 --- a/mindsdb/integrations/libs/const.py +++ /dev/null @@ -1,34 +0,0 @@ -class HANDLER_TYPE: - __slots__ = () - DATA = "data" - ML = "ml" - - -HANDLER_TYPE = HANDLER_TYPE() - - -class HANDLER_CONNECTION_ARG_TYPE: - __slots__ = () - STR = "str" - INT = "int" - BOOL = "bool" - URL = "url" - PATH = "path" - DICT = "dict" - PWD = "pwd" - LIST = "list" - - -HANDLER_CONNECTION_ARG_TYPE = HANDLER_CONNECTION_ARG_TYPE() - - -class HANDLER_SUPPORT_LEVEL: - __slots__ = () - MINDSDB = "mindsdb" - COMMUNITY = "community" # default - - -HANDLER_SUPPORT_LEVEL = HANDLER_SUPPORT_LEVEL() - - -from mindsdb.interfaces.storage.db import PREDICTOR_STATUS # noqa diff --git a/mindsdb/integrations/libs/keyword_search_base.py b/mindsdb/integrations/libs/keyword_search_base.py deleted file mode 100644 index d515764ba2a..00000000000 --- a/mindsdb/integrations/libs/keyword_search_base.py +++ /dev/null @@ -1,41 +0,0 @@ -from mindsdb_sql_parser.ast import Select -from typing import List -import pandas as pd - -from mindsdb.integrations.utilities.sql_utils import FilterCondition, KeywordSearchArgs - - -class KeywordSearchBase: - """ - Base class for keyword search integrations. - This class provides a common interface for keyword search functionality. - """ - - def __init__(self, *args, **kwargs): - pass - - def dispatch_keyword_select( - self, query: Select, conditions: List[FilterCondition] = None, keyword_search_args: KeywordSearchArgs = None - ): - """Dispatches a keyword search select query to the appropriate method.""" - raise NotImplementedError() - - def keyword_select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> pd.DataFrame: - """Select data from table - - Args: - table_name (str): table name - columns (List[str]): columns to select - conditions (List[FilterCondition]): conditions to select - - Returns: - pd.DataFrame - """ - raise NotImplementedError() diff --git a/mindsdb/integrations/libs/llm/__init__.py b/mindsdb/integrations/libs/llm/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/libs/llm/config.py b/mindsdb/integrations/libs/llm/config.py deleted file mode 100644 index 61af973bc36..00000000000 --- a/mindsdb/integrations/libs/llm/config.py +++ /dev/null @@ -1,130 +0,0 @@ -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, ConfigDict, Field - - -class BaseLLMConfig(BaseModel): - # Remove 'model_' prefix from protected namespaces since Langchain constructor - # kwargs share the same prefix. - model_config = ConfigDict(protected_namespaces=()) - - -# See https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.openai.ChatOpenAI.html#langchain_community.chat_models.openai.ChatOpenAI -# This config does not have to be exclusively used with Langchain. -class OpenAIConfig(BaseLLMConfig): - model_name: str - temperature: Optional[float] - max_retries: Optional[int] - max_tokens: Optional[int] - openai_api_base: Optional[str] - # Inferred from OPENAI_API_KEY if not provided. - openai_api_key: Optional[str] - openai_organization: Optional[str] - request_timeout: Optional[float] - - -# See https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.anthropic.ChatAnthropic.html -# This config does not have to be exclusively used with Langchain. -class AnthropicConfig(BaseLLMConfig): - model: str - temperature: Optional[float] - max_tokens: Optional[int] - top_p: Optional[float] - top_k: Optional[int] - default_request_timeout: Optional[float] - # Inferred from ANTHROPIC_API_KEY if not provided. - anthropic_api_key: Optional[str] - anthropic_api_url: Optional[str] - - -# See https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.litellm.ChatLiteLLM.html -# This config does not have to be exclusively used with Langchain. -class LiteLLMConfig(BaseLLMConfig): - model: str - api_base: Optional[str] - max_retries: Optional[int] - max_tokens: Optional[int] - top_p: Optional[float] - top_k: Optional[int] - temperature: Optional[float] - custom_llm_provider: Optional[str] - model_kwargs: Optional[Dict[str, Any]] - - -# See https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.ollama.ChatOllama.html -# This config does not have to be exclusively used with Langchain. -class OllamaConfig(BaseLLMConfig): - base_url: str - model: str - temperature: Optional[float] - top_p: Optional[float] - top_k: Optional[int] - timeout: Optional[int] - format: Optional[str] - headers: Optional[Dict] - num_predict: Optional[int] - num_ctx: Optional[int] - num_gpu: Optional[int] - repeat_penalty: Optional[float] - stop: Optional[List[str]] - template: Optional[str] - - -class NvidiaNIMConfig(BaseLLMConfig): - base_url: str - model: str - temperature: Optional[float] - top_p: Optional[float] - timeout: Optional[int] - format: Optional[str] - headers: Optional[Dict] - num_predict: Optional[int] - num_ctx: Optional[int] - num_gpu: Optional[int] - repeat_penalty: Optional[float] - stop: Optional[List[str]] - template: Optional[str] - nvidia_api_key: Optional[str] - - -class MindsdbConfig(BaseLLMConfig): - model_name: str - project_name: str - - -# See https://python.langchain.com/api_reference/google_genai/chat_models/langchain_google_genai.chat_models.ChatGoogleGenerativeAI.html -class GoogleConfig(BaseLLMConfig): - model: str = Field(description="Gemini model name to use (e.g., 'gemini-1.5-pro')") - temperature: Optional[float] = Field(default=None, description="Controls randomness in responses") - top_p: Optional[float] = Field(default=None, description="Nucleus sampling parameter") - top_k: Optional[int] = Field(default=None, description="Number of highest probability tokens to consider") - max_output_tokens: Optional[int] = Field(default=None, description="Maximum number of tokens to generate") - google_api_key: Optional[str] = Field(default=None, description="API key for Google Generative AI") - - -# See https://api.python.langchain.com/en/latest/llms/langchain_community.llms.writer.Writer.html -class WriterConfig(BaseLLMConfig): - model_name: str = Field(default="palmyra-x5", alias="model_id") - temperature: Optional[float] = Field(default=0.7) - max_tokens: Optional[int] = Field(default=None) - top_p: Optional[float] = Field(default=None) - stop: Optional[List[str]] = Field(default=None) - best_of: Optional[int] = Field(default=None) - writer_api_key: Optional[str] = Field(default=None) - writer_org_id: Optional[str] = Field(default=None) - base_url: Optional[str] = Field(default=None) - - -# https://api.python.langchain.com/en/latest/llms/langchain_aws.llms.bedrock.BedrockLLM.html#langchain_aws.llms.bedrock.BedrockLLM -class BedrockConfig(BaseLLMConfig): - model_id: str - aws_access_key_id: Optional[str] = Field(default=None) - aws_secret_access_key: Optional[str] = Field(default=None) - aws_session_token: Optional[str] = Field(default=None) - region_name: Optional[str] = Field(default=None) - credentials_profile_name: Optional[str] = Field(default=None) - endpoint_url: Optional[str] = Field(default=None) - stop: Optional[List[str]] = Field(default=None) - temperature: Optional[float] = Field(default=0.7) - max_tokens: Optional[int] = Field(default=None) - model_kwargs: Optional[Dict[str, Any]] = Field(default=None) diff --git a/mindsdb/integrations/libs/llm/utils.py b/mindsdb/integrations/libs/llm/utils.py deleted file mode 100644 index da01454142e..00000000000 --- a/mindsdb/integrations/libs/llm/utils.py +++ /dev/null @@ -1,238 +0,0 @@ -import re -from typing import Dict, List, Tuple - -import numpy as np -import pandas as pd - -from mindsdb.integrations.libs.llm.config import ( - AnthropicConfig, - BaseLLMConfig, - GoogleConfig, - LiteLLMConfig, - OllamaConfig, - OpenAIConfig, - NvidiaNIMConfig, - MindsdbConfig, - WriterConfig, - BedrockConfig, -) -from mindsdb.utilities.config import config - - -# Default to latest GPT-4 model (https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) -DEFAULT_OPENAI_MODEL = "gpt-4o" -# Requires more than vanilla OpenAI due to ongoing summarization and 3rd party input. -DEFAULT_OPENAI_MAX_TOKENS = 8096 -DEFAULT_OPENAI_MAX_RETRIES = 3 - -DEFAULT_ANTHROPIC_MODEL = "claude-3-haiku-20240307" - -DEFAULT_GOOGLE_MODEL = "gemini-2.5-pro-preview-03-25" - -DEFAULT_LITELLM_MODEL = "gpt-3.5-turbo" -DEFAULT_LITELLM_PROVIDER = "openai" -DEFAULT_LITELLM_BASE_URL = "https://ai.dev.mindsdb.com" - -DEFAULT_OLLAMA_BASE_URL = "http://localhost:11434" -DEFAULT_OLLAMA_MODEL = "llama2" - -DEFAULT_NVIDIA_NIM_BASE_URL = "http://localhost:8000/v1" # Assumes local port forwarding through ssh -DEFAULT_NVIDIA_NIM_MODEL = "meta/llama-3_1-8b-instruct" -DEFAULT_VLLM_SERVER_URL = "http://localhost:8000/v1" - - -def get_completed_prompts(base_template: str, df: pd.DataFrame, strict=True) -> Tuple[List[str], np.ndarray]: - """ - Helper method that produces formatted prompts given a template and data in a Pandas DataFrame. - It also returns the ID of any empty templates that failed to be filled due to missing data. - - :param base_template: string with placeholders for each column in the DataFrame. Placeholders should follow double curly braces format, e.g. `{{column_name}}`. All placeholders should have matching columns in `df`. - :param df: pd.DataFrame to generate full prompts. Each placeholder in `base_template` must exist as a column in the DataFrame. If a column is not in the template, it is ignored entirely. - :param strict: raise exception if base_template doesn't contain placeholders - - :return prompts: list of in-filled prompts using `base_template` and relevant columns from `df` - :return empty_prompt_ids: np.int numpy array (shape (n_missing_rows,)) with the row indexes where in-fill failed due to missing data. - """ # noqa - columns = [] - spans = [] - matches = list(re.finditer("{{(.*?)}}", base_template)) - - if len(matches) == 0: - # no placeholders - if strict: - raise AssertionError("No placeholders found in the prompt, please provide a valid prompt template.") - prompts = [base_template] * len(df) - return prompts, np.ndarray(0) - - first_span = matches[0].start() - last_span = matches[-1].end() - - for m in matches: - columns.append(m[0].replace("{", "").replace("}", "")) - spans.extend((m.start(), m.end())) - - spans = spans[1:-1] # omit first and last, they are added separately - template = [ - base_template[s:e] for s, e in list(zip(spans, spans[1:]))[::2] - ] # take every other to skip placeholders # noqa - template.insert(0, base_template[0:first_span]) # add prompt start - template.append(base_template[last_span:]) # add prompt end - - empty_prompt_ids = np.where(df[columns].isna().all(axis=1).values)[0] - - df["__mdb_prompt"] = "" - for i in range(len(template)): - atom = template[i] - if i < len(columns): - col = df[columns[i]].replace(to_replace=[None], value="") # add empty quote if data is missing - df["__mdb_prompt"] = df["__mdb_prompt"].apply(lambda x: x + atom) + col.astype("string") - else: - df["__mdb_prompt"] = df["__mdb_prompt"].apply(lambda x: x + atom) - prompts = list(df["__mdb_prompt"]) - - return prompts, empty_prompt_ids - - -def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig: - """ - Helper method that returns the configuration for a given LLM provider. - - :param provider: string with the name of the provider. - :param config: dictionary with the configuration for the provider. - - :return: LLMConfig object with the configuration for the provider. - """ - temperature = min(1.0, max(0.0, args.get("temperature", 0.0))) - if provider == "openai": - if any(x in args.get("model_name", "") for x in ["o1", "o3"]): - # for o1 and 03, 'temperature' does not support 0.0 with this model. Only the default (1) value is supported - temperature = 1 - - return OpenAIConfig( - model_name=args.get("model_name", DEFAULT_OPENAI_MODEL), - temperature=temperature, - max_retries=args.get("max_retries", DEFAULT_OPENAI_MAX_RETRIES), - max_tokens=args.get("max_tokens", DEFAULT_OPENAI_MAX_TOKENS), - openai_api_base=args.get("base_url", None), - openai_api_key=args["api_keys"].get("openai", None), - openai_organization=args.get("api_organization", None), - request_timeout=args.get("request_timeout", None), - ) - if provider == "anthropic": - return AnthropicConfig( - model=args.get("model_name", DEFAULT_ANTHROPIC_MODEL), - temperature=temperature, - max_tokens=args.get("max_tokens", None), - top_p=args.get("top_p", None), - top_k=args.get("top_k", None), - default_request_timeout=args.get("default_request_timeout", None), - anthropic_api_key=args["api_keys"].get("anthropic", None), - anthropic_api_url=args.get("base_url", None), - ) - if provider == "litellm": - model_kwargs = { - "api_key": args["api_keys"].get("litellm", None), - "top_p": args.get("top_p", None), - "request_timeout": args.get("request_timeout", None), - "frequency_penalty": args.get("frequency_penalty", None), - "presence_penalty": args.get("presence_penalty", None), - "logit_bias": args.get("logit_bias", None), - } - return LiteLLMConfig( - model=args.get("model_name", DEFAULT_LITELLM_MODEL), - temperature=temperature, - api_base=args.get("base_url", DEFAULT_LITELLM_BASE_URL), - max_retries=args.get("max_retries", DEFAULT_OPENAI_MAX_RETRIES), - max_tokens=args.get("max_tokens", DEFAULT_OPENAI_MAX_TOKENS), - top_p=args.get("top_p", None), - top_k=args.get("top_k", None), - custom_llm_provider=args.get("custom_llm_provider", DEFAULT_LITELLM_PROVIDER), - model_kwargs=model_kwargs, - ) - if provider == "ollama": - return OllamaConfig( - base_url=args.get("base_url", DEFAULT_OLLAMA_BASE_URL), - model=args.get("model_name", DEFAULT_OLLAMA_MODEL), - temperature=temperature, - top_p=args.get("top_p", None), - top_k=args.get("top_k", None), - timeout=args.get("request_timeout", None), - format=args.get("format", None), - headers=args.get("headers", None), - num_predict=args.get("num_predict", None), - num_ctx=args.get("num_ctx", None), - num_gpu=args.get("num_gpu", None), - repeat_penalty=args.get("repeat_penalty", None), - stop=args.get("stop", None), - template=args.get("template", None), - ) - if provider == "nvidia_nim": - return NvidiaNIMConfig( - base_url=args.get("base_url", DEFAULT_NVIDIA_NIM_BASE_URL), - model=args.get("model_name", DEFAULT_NVIDIA_NIM_MODEL), - temperature=temperature, - top_p=args.get("top_p", None), - timeout=args.get("request_timeout", None), - format=args.get("format", None), - headers=args.get("headers", None), - num_predict=args.get("num_predict", None), - num_ctx=args.get("num_ctx", None), - num_gpu=args.get("num_gpu", None), - repeat_penalty=args.get("repeat_penalty", None), - stop=args.get("stop", None), - template=args.get("template", None), - nvidia_api_key=args["api_keys"].get("nvidia_nim", None), - ) - if provider == "mindsdb": - return MindsdbConfig( - model_name=args["model_name"], - project_name=args.get("project_name", config.get("default_project")), - ) - if provider == "vllm": - return OpenAIConfig( - model_name=args.get("model_name"), - temperature=temperature, - max_retries=args.get("max_retries", DEFAULT_OPENAI_MAX_RETRIES), - max_tokens=args.get("max_tokens", DEFAULT_OPENAI_MAX_TOKENS), - openai_api_base=args.get("base_url", DEFAULT_VLLM_SERVER_URL), - openai_api_key=args["api_keys"].get("vllm", "EMPTY`"), - openai_organization=args.get("api_organization", None), - request_timeout=args.get("request_timeout", None), - ) - if provider == "google": - return GoogleConfig( - model=args.get("model_name", DEFAULT_GOOGLE_MODEL), - temperature=temperature, - top_p=args.get("top_p", None), - top_k=args.get("top_k", None), - max_output_tokens=args.get("max_tokens", None), - google_api_key=args["api_keys"].get("google", None), - ) - if provider == "writer": - return WriterConfig( - model_name=args.get("model_name", "palmyra-x5"), - temperature=temperature, - max_tokens=args.get("max_tokens", None), - top_p=args.get("top_p", None), - stop=args.get("stop", None), - best_of=args.get("best_of", None), - writer_api_key=args["api_keys"].get("writer", None), - writer_org_id=args.get("writer_org_id", None), - base_url=args.get("base_url", None), - ) - if provider == "bedrock": - return BedrockConfig( - model_id=args.get("model_name"), - temperature=temperature, - max_tokens=args.get("max_tokens", None), - stop=args.get("stop", None), - base_url=args.get("endpoint_url", None), - aws_access_key_id=args.get("aws_access_key_id", None), - aws_secret_access_key=args.get("aws_secret_access_key", None), - aws_session_token=args.get("aws_session_token", None), - region_name=args.get("aws_region_name", None), - credentials_profile_name=args.get("credentials_profile_name", None), - model_kwargs=args.get("model_kwargs", None), - ) - - raise ValueError(f"Provider {provider} is not supported.") diff --git a/mindsdb/integrations/libs/ml_exec_base.py b/mindsdb/integrations/libs/ml_exec_base.py deleted file mode 100644 index abac27d75de..00000000000 --- a/mindsdb/integrations/libs/ml_exec_base.py +++ /dev/null @@ -1,447 +0,0 @@ -""" -This module defines the wrapper for ML engines which abstracts away a lot of complexity. - -In particular, three big components are included: - - - `BaseMLEngineExec` class: this class wraps any object that inherits from `BaseMLEngine` and exposes some endpoints - normally associated with a DB handler (e.g. `native_query`, `get_tables`), as well as other ML-specific behaviors, - like `learn()` or `predict()`. Note that while these still have to be implemented at the engine level, the burden - on that class is lesser given that it only needs to return a pandas DataFrame. It's this class that will take said - output and format it into the DataHandlerResponse instance that MindsDB core expects. - - - `learn_process` method: handles async dispatch of the `learn` method in an engine, as well as registering all - models inside of the internal MindsDB registry. - - - `predict_process` method: handles async dispatch of the `predict` method in an engine. - -""" - -import socket -import contextlib -import datetime as dt -from types import ModuleType -from typing import Optional, Union - -import pandas as pd -from sqlalchemy import func, null -from sqlalchemy.sql.functions import coalesce - -from mindsdb.utilities.config import Config -import mindsdb.interfaces.storage.db as db -from mindsdb.__about__ import __version__ as mindsdb_version -from mindsdb.utilities.hooks import after_predict as after_predict_hook -from mindsdb.interfaces.model.functions import get_model_record -from mindsdb.integrations.libs.const import PREDICTOR_STATUS -from mindsdb.interfaces.database.database import DatabaseController -from mindsdb.utilities.context import context as ctx -from mindsdb.interfaces.model.functions import get_model_records -from mindsdb.utilities.functions import mark_process -import mindsdb.utilities.profiler as profiler -from mindsdb.utilities.ml_task_queue.producer import MLTaskProducer -from mindsdb.utilities.ml_task_queue.const import ML_TASK_TYPE -from mindsdb.integrations.libs.process_cache import process_cache, empty_callback, MLProcessException - -try: - import torch.multiprocessing as mp -except Exception: - import multiprocessing as mp -mp_ctx = mp.get_context("spawn") - - -class MLEngineException(Exception): - pass - - -class BaseMLEngineExec: - def __init__(self, name: str, integration_id: int, handler_module: ModuleType): - """ML handler interface - - Args: - name (str): name of the ml_engine - integration_id (int): id of the ml_engine - handler_module (ModuleType): module of the ml_engine - """ - self.name = name - self.config = Config() - self.integration_id = integration_id - self.engine = handler_module.name - self.handler_module = handler_module - - self.database_controller = DatabaseController() - - self.base_ml_executor = process_cache - if self.config["ml_task_queue"]["type"] == "redis": - self.base_ml_executor = MLTaskProducer() - - @profiler.profile() - def learn( - self, - model_name, - project_name, - data_integration_ref=None, - fetch_data_query=None, - problem_definition=None, - join_learn_process=False, - label=None, - is_retrain=False, - set_active=True, - ): - """Trains a model given some data-gathering SQL statement.""" - - # may or may not be provided (e.g. 0-shot models do not need it), so engine will handle it - target = problem_definition.get("target", [""]) # db.Predictor expects Column(Array(String)) - - project = self.database_controller.get_project(name=project_name) - - self.create_validation(target, problem_definition, self.integration_id) - - predictor_record = db.Predictor( - company_id=ctx.company_id, - user_id=ctx.user_id, - name=model_name, - integration_id=self.integration_id, - data_integration_ref=data_integration_ref, - fetch_data_query=fetch_data_query, - mindsdb_version=mindsdb_version, - to_predict=target, - learn_args=problem_definition, - data={"name": model_name}, - project_id=project.id, - training_data_columns_count=None, - training_data_rows_count=None, - training_start_at=dt.datetime.now(), - status=PREDICTOR_STATUS.GENERATING, - label=label, - version=( - db.session.query(coalesce(func.max(db.Predictor.version), 1) + (1 if is_retrain else 0)) - .filter_by( - company_id=ctx.company_id, - user_id=ctx.user_id, - name=model_name, - project_id=project.id, - deleted_at=null(), - ) - .scalar_subquery() - ), - active=(not is_retrain), # if create then active - training_metadata={"hostname": socket.gethostname(), "reason": "retrain" if is_retrain else "learn"}, - ) - - db.serializable_insert(predictor_record) - - with self._catch_exception(model_name): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.LEARN, - model_id=predictor_record.id, - payload={ - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": self.integration_id, - }, - "context": ctx.dump(), - "problem_definition": problem_definition, - "set_active": set_active, - "data_integration_ref": data_integration_ref, - "fetch_data_query": fetch_data_query, - "project_name": project_name, - }, - ) - - if join_learn_process is True: - task.result() - predictor_record = db.Predictor.query.get(predictor_record.id) - db.session.refresh(predictor_record) - else: - # to prevent memory leak need to add any callback - task.add_done_callback(empty_callback) - - return predictor_record - - def describe(self, model_id: int, attribute: Optional[str] = None) -> pd.DataFrame: - with self._catch_exception(model_id): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.DESCRIBE, - model_id=model_id, - payload={ - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": self.integration_id, - }, - "attribute": attribute, - "context": ctx.dump(), - }, - ) - result = task.result() - return result - - def function_call(self, func_name, args): - with self._catch_exception(): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.FUNC_CALL, - model_id=0, # can not be None - payload={ - "context": ctx.dump(), - "name": func_name, - "args": args, - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": self.integration_id, - }, - }, - ) - result = task.result() - return result - - @profiler.profile() - @mark_process(name="predict") - def predict( - self, - model_name: str, - df: pd.DataFrame, - pred_format: str = "dict", - project_name: str = None, - version=None, - params: dict = None, - ): - """Generates predictions with some model and input data.""" - - kwargs = {"name": model_name, "ml_handler_name": self.name, "project_name": project_name} - if version is None: - kwargs["active"] = True - else: - kwargs["active"] = None - kwargs["version"] = version - predictor_record = get_model_record(**kwargs) - if predictor_record is None: - if version is not None: - model_name = f"{model_name}.{version}" - raise Exception(f"Error: model '{model_name}' does not exists!") - if predictor_record.status != PREDICTOR_STATUS.COMPLETE: - raise Exception("Error: model creation not completed") - - using = {} if params is None else params - args = {"pred_format": pred_format, "predict_params": using, "using": using} - - with self._catch_exception(model_name): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.PREDICT, - model_id=predictor_record.id, - payload={ - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": self.integration_id, - }, - "context": ctx.dump(), - "predictor_record": predictor_record, - "args": args, - }, - dataframe=df, - ) - predictions = task.result() - - # mdb indexes - if "__mindsdb_row_id" not in predictions.columns and "__mindsdb_row_id" in df.columns: - predictions["__mindsdb_row_id"] = df["__mindsdb_row_id"] - - after_predict_hook( - company_id=ctx.company_id, - user_id=ctx.user_id, - predictor_id=predictor_record.id, - rows_in_count=df.shape[0], - columns_in_count=df.shape[1], - rows_out_count=len(predictions), - ) - return predictions - - def create_validation(self, target, args, integration_id): - with self._catch_exception(): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.CREATE_VALIDATION, - model_id=0, # can not be None - payload={ - "context": ctx.dump(), - "target": target, - "args": args, - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": integration_id, - }, - }, - ) - result = task.result() - return result - - def update(self, args: dict, model_id: int): - with self._catch_exception(model_id): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.UPDATE, - model_id=model_id, - payload={ - "context": ctx.dump(), - "args": args, - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": self.integration_id, - }, - }, - ) - result = task.result() - return result - - def update_engine(self, connection_args): - with self._catch_exception(): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.UPDATE_ENGINE, - model_id=0, # can not be None - payload={ - "context": ctx.dump(), - "connection_args": connection_args, - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": self.integration_id, - }, - }, - ) - result = task.result() - return result - - def create_engine(self, connection_args: dict, integration_id: int) -> None: - with self._catch_exception(): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.CREATE_ENGINE, - model_id=0, # can not be None - payload={ - "context": ctx.dump(), - "connection_args": connection_args, - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": integration_id, - }, - }, - ) - result = task.result() - return result - - @profiler.profile() - def finetune( - self, - model_name, - project_name, - base_model_version: int, - data_integration_ref=None, - fetch_data_query=None, - join_learn_process=False, - label=None, - set_active=True, - args: Optional[dict] = None, - ): - # generate new record from latest version as starting point - project = self.database_controller.get_project(name=project_name) - - search_args = {"active": None, "name": model_name, "status": PREDICTOR_STATUS.COMPLETE} - if base_model_version is not None: - search_args["version"] = base_model_version - else: - search_args["active"] = True - predictor_records = get_model_records(**search_args) - if len(predictor_records) == 0: - raise Exception("Can't find suitable base model") - - predictor_records.sort(key=lambda x: x.training_stop_at, reverse=True) - predictor_records = [x for x in predictor_records if x.training_stop_at is not None] - base_predictor_record = predictor_records[0] - - learn_args = base_predictor_record.learn_args - learn_args["using"] = args if not learn_args.get("using", False) else {**learn_args["using"], **args} - - self.create_validation( - target=base_predictor_record.to_predict, args=learn_args, integration_id=self.integration_id - ) - - predictor_record = db.Predictor( - company_id=ctx.company_id, - user_id=ctx.user_id, - name=model_name, - integration_id=self.integration_id, - data_integration_ref=data_integration_ref, - fetch_data_query=fetch_data_query, - mindsdb_version=mindsdb_version, - to_predict=base_predictor_record.to_predict, - learn_args=learn_args, - data={"name": model_name}, - project_id=project.id, - training_data_columns_count=None, - training_data_rows_count=None, - training_start_at=dt.datetime.now(), - status=PREDICTOR_STATUS.GENERATING, - label=label, - version=( - db.session.query(coalesce(func.max(db.Predictor.version), 1) + 1) - .filter_by( - company_id=ctx.company_id, - user_id=ctx.user_id, - name=model_name, - project_id=project.id, - deleted_at=null(), - ) - .scalar_subquery() - ), - active=False, - training_metadata={"hostname": socket.gethostname(), "reason": "finetune"}, - ) - db.serializable_insert(predictor_record) - - with self._catch_exception(model_name): - task = self.base_ml_executor.apply_async( - task_type=ML_TASK_TYPE.FINETUNE, - model_id=predictor_record.id, - payload={ - "handler_meta": { - "module_path": self.handler_module.__package__, - "engine": self.engine, - "integration_id": self.integration_id, - }, - "context": ctx.dump(), - "model_id": predictor_record.id, - "problem_definition": predictor_record.learn_args, - "set_active": set_active, - "base_model_id": base_predictor_record.id, - "data_integration_ref": data_integration_ref, - "fetch_data_query": fetch_data_query, - "project_name": project_name, - }, - ) - - if join_learn_process is True: - task.result() - predictor_record = db.Predictor.query.get(predictor_record.id) - db.session.refresh(predictor_record) - else: - # to prevent memory leak need to add any callback - task.add_done_callback(empty_callback) - - return predictor_record - - @contextlib.contextmanager - def _catch_exception(self, model_identifier: Optional[Union[int, str]] = None): - try: - yield - except (ImportError, ModuleNotFoundError): - raise - except Exception as e: - if type(e) is MLProcessException: - e = e.base_exception - msg = str(e).strip() - if msg == "": - msg = e.__class__.__name__ - model_identifier = "" if model_identifier is None else f"/{model_identifier}" - msg = f"[{self.name}{model_identifier}]: {msg}" - raise MLEngineException(msg) from e diff --git a/mindsdb/integrations/libs/ml_handler_process/__init__.py b/mindsdb/integrations/libs/ml_handler_process/__init__.py deleted file mode 100644 index cc918049508..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from mindsdb.integrations.libs.ml_handler_process.create_validation_process import create_validation_process -from mindsdb.integrations.libs.ml_handler_process.create_engine_process import create_engine_process -from mindsdb.integrations.libs.ml_handler_process.update_engine_process import update_engine_process -from mindsdb.integrations.libs.ml_handler_process.describe_process import describe_process -from mindsdb.integrations.libs.ml_handler_process.predict_process import predict_process -from mindsdb.integrations.libs.ml_handler_process.update_process import update_process -from mindsdb.integrations.libs.ml_handler_process.learn_process import learn_process -from mindsdb.integrations.libs.ml_handler_process.func_call_process import func_call_process diff --git a/mindsdb/integrations/libs/ml_handler_process/create_engine_process.py b/mindsdb/integrations/libs/ml_handler_process/create_engine_process.py deleted file mode 100644 index 24977ca9c7e..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +++ /dev/null @@ -1,25 +0,0 @@ -import importlib - -from pandas import DataFrame - -from mindsdb.interfaces.storage.model_fs import HandlerStorage - - -def create_engine_process(connection_args: dict, integration_id: int, module_path: str) -> DataFrame: - module = importlib.import_module(module_path) - - if module.import_error is not None: - raise module.import_error - - result = None - - if hasattr(module.Handler, "create_engine"): - engine_storage = HandlerStorage(integration_id) - try: - result = module.Handler(engine_storage=engine_storage, model_storage=None).create_engine( - connection_args=connection_args - ) - except NotImplementedError: - return None - - return result diff --git a/mindsdb/integrations/libs/ml_handler_process/create_validation_process.py b/mindsdb/integrations/libs/ml_handler_process/create_validation_process.py deleted file mode 100644 index a90a140be92..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/create_validation_process.py +++ /dev/null @@ -1,14 +0,0 @@ -import importlib - -from mindsdb.interfaces.storage.model_fs import HandlerStorage - - -def create_validation_process(target: str, args: dict, integration_id: int, module_path: str) -> None: - module = importlib.import_module(module_path) - - if hasattr(module.Handler, 'create_validation'): - module.Handler.create_validation( - target, - args=args, - handler_storage=HandlerStorage(integration_id) - ) diff --git a/mindsdb/integrations/libs/ml_handler_process/describe_process.py b/mindsdb/integrations/libs/ml_handler_process/describe_process.py deleted file mode 100644 index 4f3c44b402f..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/describe_process.py +++ /dev/null @@ -1,117 +0,0 @@ -import importlib -from textwrap import dedent -from types import ModuleType -from typing import Optional, Union - -from pandas import DataFrame - -import mindsdb.interfaces.storage.db as db -from mindsdb.utilities.config import Config -from mindsdb.interfaces.storage.model_fs import ModelStorage, HandlerStorage -from mindsdb.interfaces.model.model_controller import ModelController - - -def get_module_import_error_str(module: ModuleType) -> str: - '''Make a str with human-readable module import error message - - Atrs: - module (ModuleType): module with import error - - Returns: - str: error message - ''' - is_cloud = Config().get('cloud', False) - - msg = dedent(f'''\ - ML engine '{module.name}' cannot be used. Reason is: - {module.import_error} - ''') - - if is_cloud is False: - msg += '\n' - msg += dedent(f'''\ - If error is related to missing dependencies, then try to run command in shell and restart mindsdb: - pip install mindsdb[{module.name}] - ''') - - return msg - - -def describe_process(integration_id: int, attribute: Optional[Union[str, list]], - model_id: int, module_path: str) -> DataFrame: - '''get a model description - - Args: - model_id (int): id of the model - integration_id (int): id of the integration - attribute (Optional[Union[str, list]]): attribute, or list model attributes to describe - module_path: (str): path integration module - - Returns: - DataFrame: usually 1-row dataframe with model description - ''' - module = importlib.import_module(module_path) - - handlerStorage = HandlerStorage(integration_id) - modelStorage = ModelStorage(model_id) - - model_record = db.Predictor.query.get(model_id) - if model_record is None: - return DataFrame(['The model does not exist'], columns=['error']) - - if isinstance(attribute, str) and attribute.lower() == 'import_error': - return DataFrame([get_module_import_error_str(module)], columns=['error']) - - if attribute is not None: - if module.import_error is not None: - return DataFrame([get_module_import_error_str(module)], columns=['error']) - - try: - ml_handler = module.Handler( - engine_storage=handlerStorage, - model_storage=modelStorage - ) - return ml_handler.describe(attribute) - except NotImplementedError: - return DataFrame() - except Exception as e: - return DataFrame( - [f'{e.__class__.__name__}: {e}'], - columns=['error'] - ) - else: - model_info = ModelController.get_model_info(model_record) - - attrs_df = DataFrame() - if module.import_error is not None: - attrs_df = DataFrame(['import_error'], columns=['error']) - model_error = model_info['ERROR'][0] or '-' - model_info['ERROR'][0] = 'ML engine error:\n\n' - model_info['ERROR'][0] += get_module_import_error_str(module) - model_info['ERROR'][0] += '\nModel error:\n\n' - model_info['ERROR'][0] += model_error - else: - try: - ml_handler = module.Handler( - engine_storage=handlerStorage, - model_storage=modelStorage - ) - attrs_df = ml_handler.describe(attribute) - except NotImplementedError: - pass - except Exception as e: - model_error = model_info['ERROR'][0] or '-' - model_info['ERROR'][0] = 'ML engine error:\n\n' - model_info['ERROR'][0] += f'{e.__class__.__name__}: {e}\n' - model_info['ERROR'][0] += '\nModel error:\n\n' - model_info['ERROR'][0] += model_error - - attributes = [] - if len(attrs_df) > 0 and len(attrs_df.columns) > 0: - attributes = list(attrs_df[attrs_df.columns[0]]) - if len(attributes) == 1 and isinstance(attributes[0], list): - # first cell already has a list - attributes = attributes[0] - - model_info.insert(0, 'TABLES', [attributes]) - return model_info diff --git a/mindsdb/integrations/libs/ml_handler_process/func_call_process.py b/mindsdb/integrations/libs/ml_handler_process/func_call_process.py deleted file mode 100644 index f8b49f685ce..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/func_call_process.py +++ /dev/null @@ -1,21 +0,0 @@ -import importlib - -from mindsdb.interfaces.storage.model_fs import HandlerStorage - - -def func_call_process(name: str, args: dict, integration_id: int, module_path: str) -> None: - module = importlib.import_module(module_path) - - if module.import_error is not None: - raise module.import_error - - result = None - - if hasattr(module.Handler, "function_call"): - engine_storage = HandlerStorage(integration_id) - try: - result = module.Handler(engine_storage=engine_storage, model_storage=None).function_call(name, args) - except NotImplementedError: - return None - - return result diff --git a/mindsdb/integrations/libs/ml_handler_process/handlers_cacher.py b/mindsdb/integrations/libs/ml_handler_process/handlers_cacher.py deleted file mode 100644 index 5ab5295cd84..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/handlers_cacher.py +++ /dev/null @@ -1,28 +0,0 @@ -import time -from collections import UserDict - - -class HandlersCache(UserDict): - def __init__(self, max_size: int = 5) -> None: - self._max_size = max_size - super().__init__() - - def __setitem__(self, key, value) -> None: - if len(self.data) > self._max_size: - sorted_elements = sorted( - self.data.items(), - key=lambda x: x[1]['last_usage_at'] - ) - del self.data[sorted_elements[0][0]] - self.data[key] = { - 'last_usage_at': time.time(), - 'handler': value - } - - def __getitem__(self, key: int) -> object: - el = super().__getitem__(key) - el['last_usage_at'] = time.time() - return el['handler'] - - -handlers_cacher = HandlersCache() diff --git a/mindsdb/integrations/libs/ml_handler_process/learn_process.py b/mindsdb/integrations/libs/ml_handler_process/learn_process.py deleted file mode 100644 index 9e451b70ca5..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/learn_process.py +++ /dev/null @@ -1,151 +0,0 @@ -import os -import importlib -import datetime as dt - -from sqlalchemy.orm.attributes import flag_modified - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Identifier, Select, Star, NativeQuery - -from mindsdb.api.executor.sql_query import SQLQuery -import mindsdb.utilities.profiler as profiler -from mindsdb.utilities.functions import mark_process -from mindsdb.utilities.config import Config -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities import log -import mindsdb.interfaces.storage.db as db -from mindsdb.interfaces.storage.model_fs import ModelStorage, HandlerStorage -from mindsdb.interfaces.model.functions import get_model_records -from mindsdb.integrations.utilities.utils import format_exception_error -from mindsdb.integrations.utilities.sql_utils import make_sql_session -from mindsdb.integrations.libs.const import PREDICTOR_STATUS -from mindsdb.integrations.libs.ml_handler_process.handlers_cacher import handlers_cacher - -logger = log.getLogger(__name__) - - -@mark_process(name="learn") -def learn_process( - data_integration_ref: dict, - problem_definition: dict, - fetch_data_query: str, - project_name: str, - model_id: int, - integration_id: int, - base_model_id: int, - set_active: bool, - module_path: str, -): - ctx.profiling = {"level": 0, "enabled": True, "pointer": None, "tree": None} - profiler.set_meta(query="learn_process", api="http", environment=Config().get("environment")) - with profiler.Context("learn_process"): - from mindsdb.interfaces.database.database import DatabaseController - - try: - predictor_record = db.Predictor.query.with_for_update().get(model_id) - predictor_record.training_metadata["process_id"] = os.getpid() - flag_modified(predictor_record, "training_metadata") - db.session.commit() - - target = problem_definition.get("target", None) - training_data_df = None - if data_integration_ref is not None: - database_controller = DatabaseController() - sql_session = make_sql_session() - if data_integration_ref["type"] == "integration": - integration_name = database_controller.get_integration(data_integration_ref["id"])["name"] - query = Select( - targets=[Star()], - from_table=NativeQuery(integration=Identifier(integration_name), query=fetch_data_query), - ) - sqlquery = SQLQuery(query, session=sql_session) - if data_integration_ref["type"] == "system": - query = Select( - targets=[Star()], from_table=NativeQuery(integration=Identifier("log"), query=fetch_data_query) - ) - sqlquery = SQLQuery(query, session=sql_session) - elif data_integration_ref["type"] == "view": - project = database_controller.get_project(project_name) - query_ast = parse_sql(fetch_data_query) - view_meta = project.get_view_meta(query_ast) - sqlquery = SQLQuery(view_meta["query_ast"], session=sql_session) - elif data_integration_ref["type"] == "project": - query_ast = parse_sql(fetch_data_query) - sqlquery = SQLQuery(query_ast, session=sql_session) - - training_data_df = sqlquery.fetched_data.to_df() - - training_data_columns_count, training_data_rows_count = 0, 0 - if training_data_df is not None: - training_data_columns_count = len(training_data_df.columns) - training_data_rows_count = len(training_data_df) - - predictor_record.training_data_columns_count = training_data_columns_count - predictor_record.training_data_rows_count = training_data_rows_count - db.session.commit() - - module = importlib.import_module(module_path) - - # check if module is imported successfully and raise exception if not - if module.import_error is not None: - raise module.import_error - - handlerStorage = HandlerStorage(integration_id) - modelStorage = ModelStorage(model_id) - modelStorage.fileStorage.push() # FIXME - - kwargs = {} - if base_model_id is not None: - kwargs["base_model_storage"] = ModelStorage(base_model_id) - kwargs["base_model_storage"].fileStorage.pull() - ml_handler = module.Handler(engine_storage=handlerStorage, model_storage=modelStorage, **kwargs) - handlers_cacher[predictor_record.id] = ml_handler - - if not ml_handler.generative and target is not None: - if training_data_df is not None and target not in training_data_df.columns: - # is the case different? convert column case in input dataframe - col_names = {c.lower(): c for c in training_data_df.columns} - target_found = col_names.get(target.lower()) - if target_found: - training_data_df.rename(columns={target_found: target}, inplace=True) - else: - raise Exception( - f'Prediction target "{target}" not found in training dataframe: {list(training_data_df.columns)}' - ) - - # create new model - if base_model_id is None: - with profiler.Context("create"): - ml_handler.create(target, df=training_data_df, args=problem_definition) - - # fine-tune (partially train) existing model - else: - # load model from previous version, use it as starting point - with profiler.Context("finetune"): - problem_definition["base_model_id"] = base_model_id - ml_handler.finetune(df=training_data_df, args=problem_definition) - - predictor_record.status = PREDICTOR_STATUS.COMPLETE - predictor_record.active = set_active - db.session.commit() - # if retrain and set_active after success creation - if set_active is True: - models = get_model_records( - name=predictor_record.name, project_id=predictor_record.project_id, active=None - ) - for model in models: - model.active = False - models = [x for x in models if x.status == PREDICTOR_STATUS.COMPLETE] - models.sort(key=lambda x: x.created_at) - models[-1].active = True - except Exception as e: - logger.exception("Error during 'learn' process:") - error_message = format_exception_error(e) - - predictor_record = db.Predictor.query.with_for_update().get(model_id) - predictor_record.data = {"error": error_message} - predictor_record.status = PREDICTOR_STATUS.ERROR - db.session.commit() - - predictor_record.training_stop_at = dt.datetime.now() - db.session.commit() diff --git a/mindsdb/integrations/libs/ml_handler_process/predict_process.py b/mindsdb/integrations/libs/ml_handler_process/predict_process.py deleted file mode 100644 index daf9636a2a5..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/predict_process.py +++ /dev/null @@ -1,35 +0,0 @@ -import importlib - -from pandas import DataFrame - -import mindsdb.interfaces.storage.db as db -from mindsdb.interfaces.storage.model_fs import ModelStorage, HandlerStorage -from mindsdb.integrations.libs.ml_handler_process.handlers_cacher import handlers_cacher -from mindsdb.utilities.functions import mark_process - - -@mark_process(name="learn") -def predict_process( - integration_id: int, - predictor_record: db.Predictor, - args: dict, - module_path: str, - ml_engine_name: str, - dataframe: DataFrame, -) -> DataFrame: - module = importlib.import_module(module_path) - - if predictor_record.id not in handlers_cacher: - handlerStorage = HandlerStorage(integration_id) - modelStorage = ModelStorage(predictor_record.id) - ml_handler = module.Handler( - engine_storage=handlerStorage, - model_storage=modelStorage, - ) - handlers_cacher[predictor_record.id] = ml_handler - else: - ml_handler = handlers_cacher[predictor_record.id] - - predictions = ml_handler.predict(dataframe, args) - ml_handler.close() - return predictions diff --git a/mindsdb/integrations/libs/ml_handler_process/update_engine_process.py b/mindsdb/integrations/libs/ml_handler_process/update_engine_process.py deleted file mode 100644 index ee6072e1e28..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +++ /dev/null @@ -1,25 +0,0 @@ -import importlib - -from pandas import DataFrame - -from mindsdb.interfaces.storage.model_fs import HandlerStorage - - -def update_engine_process(connection_args: dict, integration_id: int, module_path: str) -> DataFrame: - module = importlib.import_module(module_path) - - if module.import_error is not None: - raise module.import_error - - result = None - - if hasattr(module.Handler, "update_engine"): - engine_storage = HandlerStorage(integration_id) - try: - result = module.Handler(engine_storage=engine_storage, model_storage=None).update_engine( - connection_args=connection_args - ) - except NotImplementedError: - return None - - return result diff --git a/mindsdb/integrations/libs/ml_handler_process/update_process.py b/mindsdb/integrations/libs/ml_handler_process/update_process.py deleted file mode 100644 index 060f4a7b052..00000000000 --- a/mindsdb/integrations/libs/ml_handler_process/update_process.py +++ /dev/null @@ -1,22 +0,0 @@ -import importlib - -from mindsdb.interfaces.storage.model_fs import HandlerStorage, ModelStorage - - -def update_process(args: dict, integration_id: int, module_path: str, model_id: int) -> None: - module = importlib.import_module(module_path) - - if module.import_error is not None: - raise module.import_error - - result = None - - if hasattr(module.Handler, "upgate"): - engine_storage = HandlerStorage(integration_id) - model_storage = ModelStorage(model_id) - try: - result = module.Handler(engine_storage=engine_storage, model_storage=model_storage).upgate(args=args) - except NotImplementedError: - return None - - return result diff --git a/mindsdb/integrations/libs/passthrough.py b/mindsdb/integrations/libs/passthrough.py deleted file mode 100644 index f535d3dfeb8..00000000000 --- a/mindsdb/integrations/libs/passthrough.py +++ /dev/null @@ -1,477 +0,0 @@ -""" -PassthroughMixin — generic HTTP passthrough for authenticated REST APIs. - -A handler opts in by declaring three class attributes: - - class MyHandler(APIHandler, PassthroughMixin): - _bearer_token_arg = "api_key" # key in connection_data - _base_url_default = "https://api.example.com" # fallback if user omits - _test_request = PassthroughRequest("GET", "/me") - -The mixin defaults to ``Authorization: Bearer ``. Handlers using a -different auth scheme (e.g. Shopify's ``X-Shopify-Access-Token``) override -``_auth_header_name`` and ``_auth_header_format`` — see CHANGE 3. - -The mixin reads ``self.connection_data`` (a dict populated from -integration setup) to pull the token, resolve the base URL, and enforce -the host allowlist. Handlers that need custom URL composition (e.g. -``http://{host}:{port}``) override ``_build_base_url``. - -``PassthroughProtocol`` is a structural type describing the two public -methods (``api_passthrough`` and ``test_passthrough``). The HTTP layer -checks against the protocol rather than the mixin class, so a handler -can satisfy the contract without inheriting the default implementation. -""" - -import ipaddress -import os -import time -from typing import Any, Protocol, runtime_checkable -from urllib.parse import urlparse - -import requests - -from mindsdb.integrations.libs.passthrough_types import ( - ALLOWED_METHODS, - FORBIDDEN_REQUEST_HEADERS, - HOP_BY_HOP_RESPONSE_HEADERS, - HostNotAllowedError, - PassthroughConfigError, - PassthroughRequest, - PassthroughResponse, - PassthroughValidationError, -) -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -PASSTHROUGH_TIMEOUT_SECONDS = int(os.getenv("MINDSDB_PASSTHROUGH_TIMEOUT_SECONDS", "30")) -PASSTHROUGH_MAX_REQUEST_BYTES = int(os.getenv("MINDSDB_PASSTHROUGH_MAX_REQUEST_BYTES", str(1 * 1024 * 1024))) -PASSTHROUGH_MAX_RESPONSE_BYTES = int(os.getenv("MINDSDB_PASSTHROUGH_MAX_RESPONSE_BYTES", str(10 * 1024 * 1024))) - -REDACTED_SENTINEL = "[REDACTED_API_KEY]" - - -def _is_private_host(hostname: str) -> bool: - """Return True if `hostname` resolves to a private/loopback/link-local IP literal. - - Only IP literals are checked; DNS resolution is intentionally not performed - (handlers may legitimately point at an internal DNS name the operator has - approved via `allowed_hosts`). The IP-literal check prevents a caller from - smuggling `http://127.0.0.1/` or `http://10.0.0.1/` through a typo'd base_url. - """ - try: - ip = ipaddress.ip_address(hostname) - except ValueError: - return False - return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_multicast or ip.is_reserved - - -def _host_matches(host: str, allowlist: list[str]) -> bool: - if not host: - return False - host = host.lower() - return any(host == entry.lower() for entry in allowlist) - - -@runtime_checkable -class PassthroughProtocol(Protocol): - """Structural contract for handlers that expose HTTP passthrough. - - The HTTP namespace checks against this Protocol rather than the - `PassthroughMixin` class, which lets future handlers satisfy the - contract without inheriting the default implementation. - """ - - def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse: ... - - def test_passthrough(self) -> dict[str, Any]: ... - - -class PassthroughMixin: - # Required overrides - _bearer_token_arg: str = "" - - # Optional overrides - _base_url_arg: str = "base_url" - _base_url_default: str | None = None - _allowed_hosts_arg: str = "allowed_hosts" - _default_headers_arg: str = "default_headers" - - # Auth header. Defaults to bearer-compatible; handlers using a custom - # scheme (e.g. Shopify's `X-Shopify-Access-Token: `) override - # both attrs. The value from `_get_bearer_token()` is formatted into - # `{token}` — the method name is retained for backwards compat but - # now represents "the auth secret" regardless of scheme. - _auth_header_name: str = "Authorization" - _auth_header_format: str = "Bearer {token}" - - # Declarative auth mode surfaced to /capabilities. One handler instance - # has exactly one auth mode, so this is a single string; the API - # response still wraps it in a list because a future contract may - # surface handlers supporting multiple configurations. Known values: - # "bearer", "custom", "oauth_refresh". Handlers that use a non-bearer - # header scheme or a refresh-aware mixin should set this explicitly — - # don't infer it from _auth_header_format, since OAuth-refresh also - # uses "Bearer {token}" but is a distinct mode. - _auth_mode: str = "bearer" - - # Canonical sanity-check request for `test_passthrough()`. Handlers MUST - # set this if they want the /passthrough/test endpoint to do anything - # useful. `None` means "test endpoint returns 'not implemented'". - _test_request: PassthroughRequest | None = None - - # Stamped on every upstream request so the upstream can identify our - # traffic for support/debugging. See design §13 (q3). - _upstream_marker_header: str = "X-Minds-Passthrough" - - # Hook: override when URL composition is more than "take a string" - # (e.g. strapi composes from host+port). - def _build_base_url(self) -> str | None: - data = self._get_connection_data() - value = data.get(self._base_url_arg) if self._base_url_arg else None - if value: - return str(value).rstrip("/") - if self._base_url_default is not None: - return self._base_url_default.rstrip("/") - return None - - def _get_connection_data(self) -> dict[str, Any]: - """Return the handler's stored connection_data dict. - - Handlers store this differently; we check the common attribute names - so most handlers don't need to override. - """ - for attr in ("connection_data", "_connection_data"): - value = getattr(self, attr, None) - if isinstance(value, dict): - return value - return {} - - def _get_bearer_token(self) -> str: - if not self._bearer_token_arg: - raise PassthroughConfigError("handler did not declare _bearer_token_arg") - token = self._get_connection_data().get(self._bearer_token_arg) - if not token: - raise PassthroughConfigError(f"bearer token ('{self._bearer_token_arg}') is missing from connection_data") - return str(token) - - def _resolve_url(self, path: str) -> tuple[str, str]: - """Return ``(url, hostname)`` for the outgoing request. - - `path` is appended to the base URL verbatim. After joining we parse - the result and compare the hostname against the allowlist — path - injection tricks like ``@evil.com`` or ``//evil.com`` are rejected - at the hostname-comparison step, not by string matching. - """ - if not path.startswith("/"): - raise PassthroughValidationError("path must start with '/'") - base = self._build_base_url() - if not base: - raise PassthroughConfigError("base_url is not configured for this datasource") - - url = f"{base}{path}" - parsed = urlparse(url) - if parsed.scheme not in ("http", "https") or not parsed.hostname: - raise PassthroughValidationError(f"resolved URL is not valid: {url}") - return url, parsed.hostname - - def _allowed_hosts(self, default_host: str) -> list[str]: - data = self._get_connection_data() - allowed = data.get(self._allowed_hosts_arg) - if isinstance(allowed, list) and allowed: - return [str(h) for h in allowed] - return [default_host] - - def _check_host_allowed(self, hostname: str) -> None: - allowlist = self._allowed_hosts(hostname) - if allowlist == ["*"]: - return - if not _host_matches(hostname, allowlist): - raise HostNotAllowedError(f"host '{hostname}' is not in the datasource allowlist") - if _is_private_host(hostname): - raise HostNotAllowedError( - f"host '{hostname}' resolves to a private/loopback address; " - "set allowed_hosts=['*'] to bypass this check (explicit " - "listing is ignored for private IPs)" - ) - - def _build_outgoing_headers(self, caller_headers: dict[str, str], bearer: str) -> dict[str, str]: - """Merge caller headers (filtered) + default_headers + Authorization.""" - out: dict[str, str] = {} - data = self._get_connection_data() - defaults = data.get(self._default_headers_arg) or {} - if isinstance(defaults, dict): - out.update({str(k): str(v) for k, v in defaults.items()}) - for name, value in (caller_headers or {}).items(): - if name.lower() in FORBIDDEN_REQUEST_HEADERS: - continue - if name.lower().startswith("proxy-"): - continue - out[name] = value - out[self._auth_header_name] = self._auth_header_format.format(token=bearer) - out[self._upstream_marker_header] = "1" - return out - - def _secrets_for_scrub(self) -> list[str]: - """Values that must not appear in the response returned to the caller.""" - secrets: list[str] = [] - try: - secrets.append(self._get_bearer_token()) - except PassthroughConfigError: - pass - data = self._get_connection_data() - defaults = data.get(self._default_headers_arg) or {} - if isinstance(defaults, dict): - for value in defaults.values(): - s = str(value) - if len(s) >= 16: - secrets.append(s) - return secrets - - def _scrub(self, text: str, secrets: list[str]) -> str: - for s in secrets: - if s: - text = text.replace(s, REDACTED_SENTINEL) - return text - - def _scrub_bytes(self, data: bytes, secrets: list[str]) -> bytes: - """Byte-level secret scrub (spec §7.6). - - Replacing on raw bytes before decoding prevents U+FFFD substitutions - from `errors="replace"` from fragmenting a secret and letting part of - it survive the scrub. - """ - sentinel = REDACTED_SENTINEL.encode("utf-8") - for s in secrets: - if s: - data = data.replace(s.encode("utf-8"), sentinel) - return data - - def _filter_response_headers(self, headers: dict[str, str], secrets: list[str]) -> dict[str, str]: - filtered: dict[str, str] = {} - for name, value in headers.items(): - if name.lower() in HOP_BY_HOP_RESPONSE_HEADERS: - continue - filtered[name] = self._scrub(str(value), secrets) - return filtered - - def _read_capped_body(self, response: requests.Response) -> bytes: - """Read the response body in chunks, abort if it exceeds the cap.""" - chunks: list[bytes] = [] - total = 0 - try: - for chunk in response.iter_content(chunk_size=64 * 1024): - if not chunk: - continue - total += len(chunk) - if total > PASSTHROUGH_MAX_RESPONSE_BYTES: - raise PassthroughValidationError(f"response body exceeded {PASSTHROUGH_MAX_RESPONSE_BYTES} bytes") - chunks.append(chunk) - finally: - response.close() - return b"".join(chunks) - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse: - method = (req.method or "").upper() - if method not in ALLOWED_METHODS: - raise PassthroughValidationError(f"method '{req.method}' is not allowed") - - connection_data = self._get_connection_data() - allowed_methods_cfg = connection_data.get("allowed_methods") - if allowed_methods_cfg is not None: - if not isinstance(allowed_methods_cfg, list): - raise PassthroughConfigError("'allowed_methods' must be a list of HTTP method strings") - if not all(isinstance(m, str) for m in allowed_methods_cfg): - raise PassthroughConfigError("'allowed_methods' must be a list of HTTP method strings") - allowed_upper = {m.upper() for m in allowed_methods_cfg} - unknown = sorted(allowed_upper - ALLOWED_METHODS) - if unknown: - raise PassthroughConfigError( - f"'allowed_methods' contains unsupported verbs: {unknown}. Allowed: {sorted(ALLOWED_METHODS)}" - ) - if method not in allowed_upper: - raise PassthroughValidationError( - f"method '{method}' is not permitted by this datasource", - error_code="method_not_allowed", - http_status=405, - ) - - request_bytes = 0 - if req.body is not None: - # requests will serialize dict bodies to JSON; we cap on the - # serialized length. For raw strings / bytes we cap directly. - import json as _json - - if isinstance(req.body, (dict, list)): - body_bytes_for_size = _json.dumps(req.body).encode("utf-8") - elif isinstance(req.body, (bytes, bytearray)): - body_bytes_for_size = bytes(req.body) - else: - body_bytes_for_size = str(req.body).encode("utf-8") - if len(body_bytes_for_size) > PASSTHROUGH_MAX_REQUEST_BYTES: - raise PassthroughValidationError(f"request body exceeded {PASSTHROUGH_MAX_REQUEST_BYTES} bytes") - request_bytes = len(body_bytes_for_size) - - url, hostname = self._resolve_url(req.path) - self._check_host_allowed(hostname) - bearer = self._get_bearer_token() - headers = self._build_outgoing_headers(req.headers or {}, bearer) - - request_kwargs: dict[str, Any] = { - "headers": headers, - "params": req.query or None, - "timeout": PASSTHROUGH_TIMEOUT_SECONDS, - "stream": True, - } - if req.body is not None: - if isinstance(req.body, (dict, list)): - request_kwargs["json"] = req.body - else: - request_kwargs["data"] = req.body - - datasource_name = getattr(self, "name", None) or "?" - start = time.monotonic() - response = requests.request(method, url, **request_kwargs) - body_bytes = self._read_capped_body(response) - duration_ms = int((time.monotonic() - start) * 1000) - - secrets = self._secrets_for_scrub() - body_bytes = self._scrub_bytes(body_bytes, secrets) - content_type = response.headers.get("Content-Type", "") or "" - out_headers = self._filter_response_headers(dict(response.headers), secrets) - - body: Any - if "application/json" in content_type.lower(): - try: - text = body_bytes.decode("utf-8", errors="replace") - import json as _json - - body = _json.loads(text) if text else None - except ValueError: - body = body_bytes.decode("utf-8", errors="replace") - else: - body = body_bytes.decode("utf-8", errors="replace") - - self._log_passthrough_call( - method=method, - path=req.path, - datasource_name=datasource_name, - upstream_status_code=response.status_code, - request_bytes=request_bytes, - response_bytes=len(body_bytes), - duration_ms=duration_ms, - ) - - return PassthroughResponse( - status_code=response.status_code, - headers=out_headers, - body=body, - content_type=content_type.split(";", 1)[0].strip() or None, - ) - - def _log_passthrough_call( - self, - *, - method: str, - path: str, - datasource_name: str, - upstream_status_code: int, - request_bytes: int, - response_bytes: int, - duration_ms: int, - ) -> None: - """Emit one audit line per passthrough call (spec §7.8). - - Never logs headers or bodies. user_id / org_id are pulled from the - MindsDB request context when available; in test/dev invocations - where the context is not populated, they are omitted. - """ - fields: dict[str, Any] = { - "method": method, - "path": path, - "datasource_name": datasource_name, - "upstream_status_code": upstream_status_code, - "request_bytes": request_bytes, - "response_bytes": response_bytes, - "duration_ms": duration_ms, - } - # TODO: org_id lives in Minds; when the passthrough is called via the - # Minds gateway the org scope should be propagated and logged here. - try: - from mindsdb.utilities.context import context as _ctx - - user_id = getattr(_ctx, "user_id", None) - company_id = getattr(_ctx, "company_id", None) - if user_id is not None: - fields["user_id"] = user_id - if company_id is not None: - fields["company_id"] = company_id - except Exception: - pass - # DEBUG level per team decision: per-request audit logging at - # info level happens in Minds at the HTTP layer. This log is - # intended for mindsdb-side troubleshooting only. - logger.debug("passthrough %s", fields) - - def test_passthrough(self) -> dict[str, Any]: - """Run the handler's canonical sanity-check call (see §6.1a). - - Returns a structured dict the HTTP layer forwards to the caller: - { "ok": bool, "status_code": int?, "host": str?, "latency_ms": int?, - "error_code": str?, "message": str? } - """ - if self._test_request is None: - return { - "ok": False, - "error_code": "not_implemented", - "message": "this handler does not define a passthrough test request", - } - - start = time.monotonic() - try: - resp = self.api_passthrough(self._test_request) - except HostNotAllowedError as e: - return {"ok": False, "error_code": e.error_code, "message": str(e)} - except PassthroughConfigError as e: - return {"ok": False, "error_code": e.error_code, "message": str(e)} - except PassthroughValidationError as e: - return {"ok": False, "error_code": e.error_code, "message": str(e)} - except requests.exceptions.Timeout as e: - return {"ok": False, "error_code": "timeout", "message": str(e)} - except requests.exceptions.ConnectionError as e: - return {"ok": False, "error_code": "network", "message": str(e)} - except Exception as e: # noqa: BLE001 - logger.exception("passthrough test failed unexpectedly") - return {"ok": False, "error_code": "unknown", "message": str(e)} - - latency_ms = int((time.monotonic() - start) * 1000) - try: - _, host = self._resolve_url(self._test_request.path) - except Exception: - host = None - - if 200 <= resp.status_code < 300: - return {"ok": True, "status_code": resp.status_code, "host": host, "latency_ms": latency_ms} - if resp.status_code in (401, 403): - return { - "ok": False, - "status_code": resp.status_code, - "host": host, - "latency_ms": latency_ms, - "error_code": "auth_failed", - "message": "upstream rejected credentials; base URL and allowlist look correct", - } - return { - "ok": False, - "status_code": resp.status_code, - "host": host, - "latency_ms": latency_ms, - "error_code": "upstream_error", - "message": f"upstream returned {resp.status_code}", - } diff --git a/mindsdb/integrations/libs/passthrough_types.py b/mindsdb/integrations/libs/passthrough_types.py deleted file mode 100644 index 63d1cb14524..00000000000 --- a/mindsdb/integrations/libs/passthrough_types.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Request/response dataclasses and error types for the REST passthrough path. - -These are the payloads exchanged between the HTTP layer and -`PassthroughMixin`. They are intentionally framework-agnostic so the -mixin can be unit-tested without Flask. -""" - -from dataclasses import dataclass, field -from typing import Any - - -ALLOWED_METHODS = frozenset({"GET", "POST", "PUT", "PATCH", "DELETE"}) - -# Hop-by-hop and auth-related headers that must never come from the caller. -FORBIDDEN_REQUEST_HEADERS = frozenset( - h.lower() - for h in ( - "authorization", - "host", - "cookie", - "content-length", - "connection", - ) -) - -# Hop-by-hop response headers stripped before returning to the caller. -HOP_BY_HOP_RESPONSE_HEADERS = frozenset( - h.lower() - for h in ( - "connection", - "keep-alive", - "proxy-authenticate", - "proxy-authorization", - "te", - "trailers", - "transfer-encoding", - "upgrade", - "content-length", - ) -) - - -@dataclass -class PassthroughRequest: - method: str - path: str - query: dict[str, Any] = field(default_factory=dict) - headers: dict[str, str] = field(default_factory=dict) - body: Any = None - - -@dataclass -class PassthroughResponse: - status_code: int - headers: dict[str, str] - body: Any - content_type: str | None = None - - -class PassthroughError(Exception): - """Base class for passthrough failures that should not be leaked as 500s.""" - - error_code: str = "passthrough_error" - http_status: int = 400 - - def __init__(self, message: str, *, error_code: str | None = None, http_status: int | None = None): - super().__init__(message) - if error_code is not None: - self.error_code = error_code - if http_status is not None: - self.http_status = http_status - - -class PassthroughConfigError(PassthroughError): - error_code = "config_error" - http_status = 500 - - -class HostNotAllowedError(PassthroughError): - error_code = "host_not_allowed" - http_status = 400 - - -class PassthroughValidationError(PassthroughError): - error_code = "invalid_request" - http_status = 400 - - -class PassthroughNotSupportedError(PassthroughError): - """Raised when a handler does not implement the mixin.""" - - error_code = "passthrough_not_supported" - http_status = 501 diff --git a/mindsdb/integrations/libs/process_cache.py b/mindsdb/integrations/libs/process_cache.py deleted file mode 100644 index 8948a6e30d8..00000000000 --- a/mindsdb/integrations/libs/process_cache.py +++ /dev/null @@ -1,418 +0,0 @@ -import time -import threading -import traceback -from typing import Optional, Callable -from concurrent.futures import ProcessPoolExecutor, Future - -from pandas import DataFrame - -import mindsdb.interfaces.storage.db as db -from mindsdb.utilities.config import Config -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.ml_task_queue.const import ML_TASK_TYPE -from mindsdb.integrations.libs.ml_handler_process import ( - learn_process, - update_process, - predict_process, - describe_process, - create_engine_process, - update_engine_process, - create_validation_process, - func_call_process, -) - - -def init_ml_handler(module_path): - import importlib # noqa - - import mindsdb.integrations.libs.ml_handler_process # noqa - - db.init() - importlib.import_module(module_path) - - -def dummy_task(): - return None - - -def empty_callback(_task): - return None - - -class MLProcessException(Exception): - """Wrapper for exception to safely send it back to the main process. - - If exception can not be pickled (pickle.loads(pickle.dumps(e))) then it may lead to termination of the ML process. - Also in this case, the error sent to the user will not be relevant. This wrapper should prevent it. - """ - - base_exception_bytes: bytes = None - - def __init__(self, base_exception: Exception, message: str = None) -> None: - super().__init__(message) - traceback_text = "\n".join(traceback.format_exception(base_exception)) - self.message = f"{base_exception.__class__.__name__}: {base_exception}\n{traceback_text}" - - @property - def base_exception(self) -> Exception: - return RuntimeError(self.message) - - -class WarmProcess: - """Class-wrapper for a process that persist for a long time. The process - may be initialized with any handler requirements. Current implimentation - is based on ProcessPoolExecutor just because of multiprocessing.pool - produce daemon processes, which can not be used for learning. That - bahaviour may be changed only using inheritance. - """ - - def __init__(self, initializer: Optional[Callable] = None, initargs: tuple = ()): - """create and init new process - - Args: - initializer (Callable): the same as ProcessPoolExecutor initializer - initargs (tuple): the same as ProcessPoolExecutor initargs - """ - self.pool = ProcessPoolExecutor(1, initializer=initializer, initargs=initargs) - self.last_usage_at = time.time() - self._markers = set() - # region bacause of ProcessPoolExecutor does not start new process - # untill it get a task, we need manually run dummy task to force init. - self.task = self.pool.submit(dummy_task) - self._init_done = False - self.task.add_done_callback(self._init_done_callback) - # endregion - - def __del__(self): - self.shutdown() - - def shutdown(self, wait: bool = False) -> None: - """Like ProcessPoolExecutor.shutdown - - Args: - wait (bool): If True then shutdown will not return until all running futures have finished executing - """ - self.pool.shutdown(wait=wait) - - def _init_done_callback(self, _task): - """callback for initial task""" - self._init_done = True - - def _update_last_usage_at_callback(self, _task): - self.last_usage_at = time.time() - - def ready(self) -> bool: - """check is process ready to get a task or not - - Returns: - bool - """ - if self._init_done is False: - self.task.result() - self._init_done = True - if self.task is None or self.task.done(): - return True - return False - - def add_marker(self, marker: tuple): - """remember that that process processed task for that model - - Args: - marker (tuple): identifier of model - """ - if marker is not None: - self._markers.add(marker) - - def has_marker(self, marker: tuple) -> bool: - """check if that process processed task for model - - Args: - marker (tuple): identifier of model - - Returns: - bool - """ - if marker is None: - return False - return marker in self._markers - - def is_marked(self) -> bool: - """check if process has any marker - - Returns: - bool - """ - return len(self._markers) > 0 - - def apply_async(self, func: Callable, *args: tuple, **kwargs: dict) -> Future: - """Run new task - - Args: - func (Callable): function to run - args (tuple): args to be passed to function - kwargs (dict): kwargs to be passed to function - - Returns: - Future - """ - if not self.ready(): - raise Exception("Process task is not ready") - self.task = self.pool.submit(func, *args, **kwargs) - self.task.add_done_callback(self._update_last_usage_at_callback) - self.last_usage_at = time.time() - return self.task - - -def warm_function(func, context: str, *args, **kwargs): - ctx.load(context) - try: - return func(*args, **kwargs) - except Exception as e: - if type(e) in (ImportError, ModuleNotFoundError): - raise - raise MLProcessException(base_exception=e) - - -class ProcessCache: - """simple cache for WarmProcess-es""" - - def __init__(self, ttl: int = 120): - """Args: - ttl (int) time to live for unused process - """ - self.cache = {} - self._init = False - self._lock = threading.Lock() - self._ttl = ttl - self._keep_alive = {} - self._stop_event = threading.Event() - self.cleaner_thread = None - - def __del__(self): - self._stop_clean() - - def _start_clean(self) -> None: - """start worker that close connections after ttl expired""" - if isinstance(self.cleaner_thread, threading.Thread) and self.cleaner_thread.is_alive(): - return - self._stop_event.clear() - self.cleaner_thread = threading.Thread(target=self._clean, name="ProcessCache.clean") - self.cleaner_thread.daemon = True - self.cleaner_thread.start() - - def _stop_clean(self) -> None: - """stop clean worker""" - self._stop_event.set() - - def init(self): - """run processes for specified handlers""" - from mindsdb.interfaces.database.integrations import integration_controller - - preload_handlers = {} - config = Config() - is_cloud = config.get("cloud", False) # noqa - - if config["ml_task_queue"]["type"] != "redis": - if is_cloud: - huggingface_handler = integration_controller.get_handler_module("huggingface") - if huggingface_handler is not None and huggingface_handler.Handler is not None: - preload_handlers[huggingface_handler.Handler] = 1 - - openai_handler = integration_controller.get_handler_module("openai") - if openai_handler is not None and openai_handler.Handler is not None: - preload_handlers[openai_handler.Handler] = 1 - - with self._lock: - if self._init is False: - self._init = True - for handler in preload_handlers: - self._keep_alive[handler.name] = preload_handlers[handler] - self.cache[handler.name] = { - "last_usage_at": time.time(), - "handler_module": handler.__module__, - "processes": [ - WarmProcess(init_ml_handler, (handler.__module__,)) - for _x in range(preload_handlers[handler]) - ], - } - - def apply_async( - self, task_type: ML_TASK_TYPE, model_id: Optional[int], payload: dict, dataframe: Optional[DataFrame] = None - ) -> Future: - """run new task. If possible - do it in existing process, if not - start new one. - - Args: - task_type (ML_TASK_TYPE): type of the task (learn, predict, etc) - model_id (int): id of the model - payload (dict): any 'lightweight' data that needs to be send in the process - dataframe (DataFrame): DataFrame to be send in the process - - Returns: - Future - """ - self._start_clean() - handler_module_path = payload["handler_meta"]["module_path"] - integration_id = payload["handler_meta"]["integration_id"] - if task_type in (ML_TASK_TYPE.LEARN, ML_TASK_TYPE.FINETUNE): - func = learn_process - kwargs = { - "data_integration_ref": payload["data_integration_ref"], - "problem_definition": payload["problem_definition"], - "fetch_data_query": payload["fetch_data_query"], - "project_name": payload["project_name"], - "model_id": model_id, - "base_model_id": payload.get("base_model_id"), - "set_active": payload["set_active"], - "integration_id": integration_id, - "module_path": handler_module_path, - } - elif task_type == ML_TASK_TYPE.PREDICT: - func = predict_process - kwargs = { - "predictor_record": payload["predictor_record"], - "ml_engine_name": payload["handler_meta"]["engine"], - "args": payload["args"], - "dataframe": dataframe, - "integration_id": integration_id, - "module_path": handler_module_path, - } - elif task_type == ML_TASK_TYPE.DESCRIBE: - func = describe_process - kwargs = { - "attribute": payload.get("attribute"), - "model_id": model_id, - "integration_id": integration_id, - "module_path": handler_module_path, - } - elif task_type == ML_TASK_TYPE.CREATE_VALIDATION: - func = create_validation_process - kwargs = { - "target": payload.get("target"), - "args": payload.get("args"), - "integration_id": integration_id, - "module_path": handler_module_path, - } - elif task_type == ML_TASK_TYPE.CREATE_ENGINE: - func = create_engine_process - kwargs = { - "connection_args": payload["connection_args"], - "integration_id": integration_id, - "module_path": handler_module_path, - } - elif task_type == ML_TASK_TYPE.UPDATE_ENGINE: - func = update_engine_process - kwargs = { - "connection_args": payload["connection_args"], - "integration_id": integration_id, - "module_path": handler_module_path, - } - elif task_type == ML_TASK_TYPE.UPDATE: - func = update_process - kwargs = { - "args": payload["args"], - "integration_id": integration_id, - "model_id": model_id, - "module_path": handler_module_path, - } - elif task_type == ML_TASK_TYPE.FUNC_CALL: - func = func_call_process - kwargs = { - "name": payload["name"], - "args": payload["args"], - "integration_id": integration_id, - "module_path": handler_module_path, - } - else: - raise Exception(f"Unknown ML task type: {task_type}") - - ml_engine_name = payload["handler_meta"]["engine"] - model_marker = (model_id, payload["context"]["company_id"], payload["context"]["user_id"]) - with self._lock: - if ml_engine_name not in self.cache: - warm_process = WarmProcess(init_ml_handler, (handler_module_path,)) - self.cache[ml_engine_name] = { - "last_usage_at": None, - "handler_module": handler_module_path, - "processes": [warm_process], - } - else: - warm_process = None - if model_marker is not None: - try: - warm_process = next( - p - for p in self.cache[ml_engine_name]["processes"] - if p.ready() and p.has_marker(model_marker) - ) - except StopIteration: - pass - if warm_process is None: - try: - warm_process = next(p for p in self.cache[ml_engine_name]["processes"] if p.ready()) - except StopIteration: - pass - if warm_process is None: - warm_process = WarmProcess(init_ml_handler, (handler_module_path,)) - self.cache[ml_engine_name]["processes"].append(warm_process) - - task = warm_process.apply_async(warm_function, func, payload["context"], **kwargs) - self.cache[ml_engine_name]["last_usage_at"] = time.time() - warm_process.add_marker(model_marker) - return task - - def _clean(self) -> None: - """worker that stop unused processes""" - while self._stop_event.wait(timeout=10) is False: - with self._lock: - for handler_name in self.cache.keys(): - processes = self.cache[handler_name]["processes"] - processes.sort(key=lambda x: x.is_marked()) - - expected_count = 0 - if handler_name in self._keep_alive: - expected_count = self._keep_alive[handler_name] - - # stop processes which was used, it needs to free memory - for i, process in enumerate(processes): - if ( - process.ready() - and process.is_marked() - and (time.time() - process.last_usage_at) > self._ttl - ): - processes.pop(i) - # del process - process.shutdown() - break - - while expected_count > len(processes): - processes.append(WarmProcess(init_ml_handler, (self.cache[handler_name]["handler_module"],))) - - def shutdown(self, wait: bool = True) -> None: - """Call 'shutdown' for each process cache - - wait (bool): like ProcessPoolExecutor.shutdown wait arg. - """ - with self._lock: - for handler_name in self.cache: - for process in self.cache[handler_name]["processes"]: - process.shutdown(wait=wait) - self.cache[handler_name]["processes"] = [] - - def remove_processes_for_handler(self, handler_name: str) -> None: - """ - Remove all warm processes for a given handler. - This is useful when the previous processes use an outdated instance of the handler. - A good example is when the dependencies for a handler are installed after attempting to use the handler. - - Args: - handler_name (str): name of the handler. - """ - with self._lock: - if handler_name in self.cache: - for process in self.cache[handler_name]["processes"]: - process.shutdown() - - self.cache[handler_name]["processes"] = [] - - -process_cache = ProcessCache() diff --git a/mindsdb/integrations/libs/realtime_chat_handler.py b/mindsdb/integrations/libs/realtime_chat_handler.py deleted file mode 100644 index 375655c5b67..00000000000 --- a/mindsdb/integrations/libs/realtime_chat_handler.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Callable - -from mindsdb.interfaces.chatbot.chatbot_message import ChatBotMessage -from mindsdb.interfaces.chatbot.chatbot_response import ChatBotResponse - - -class RealtimeChatHandler: - """Interface to send and receive messages over a chat application (Slack, RocketChat, etc)""" - - def __init__(self, name: str, on_message: Callable[[ChatBotMessage], None]): - self.name = name - # Should be called every time a message is received. - self.on_message = on_message - - def connect(self): - """Connects to chat application and starts listening for messages.""" - raise NotImplementedError() - - def disconnect(self): - """Disconnects from the chat application and stops listening for messages.""" - raise NotImplementedError() - - def send_message(self, message: ChatBotMessage) -> ChatBotResponse: - """ - Sends a message through the chat application - - Parameters: - message (ChatBotMessage): Message to send - - Returns: - response (ChatBotResponse): Response indicating whether the message was sent successfully - """ - raise NotImplementedError() diff --git a/mindsdb/integrations/libs/response.py b/mindsdb/integrations/libs/response.py deleted file mode 100644 index 3af33b444fa..00000000000 --- a/mindsdb/integrations/libs/response.py +++ /dev/null @@ -1,609 +0,0 @@ -import sys -from abc import ABC -from typing import Callable, Generator, ClassVar -from dataclasses import dataclass, fields - -import numpy -import pandas -import psutil - -from mindsdb.utilities import log -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb_sql_parser.ast import ASTNode -from mindsdb.utilities.types.column import Column - - -logger = log.getLogger(__name__) - - -@dataclass(frozen=True) -class _INFORMATION_SCHEMA_COLUMNS_NAMES: - """Set of DataFrame columns that must be returned when calling `handler.get_columns(...)`. - These column names match the standard INFORMATION_SCHEMA.COLUMNS structure - used in SQL databases to describe table metadata. - """ - - COLUMN_NAME: str = "COLUMN_NAME" - DATA_TYPE: str = "DATA_TYPE" - ORDINAL_POSITION: str = "ORDINAL_POSITION" - COLUMN_DEFAULT: str = "COLUMN_DEFAULT" - IS_NULLABLE: str = "IS_NULLABLE" - CHARACTER_MAXIMUM_LENGTH: str = "CHARACTER_MAXIMUM_LENGTH" - CHARACTER_OCTET_LENGTH: str = "CHARACTER_OCTET_LENGTH" - NUMERIC_PRECISION: str = "NUMERIC_PRECISION" - NUMERIC_SCALE: str = "NUMERIC_SCALE" - DATETIME_PRECISION: str = "DATETIME_PRECISION" - CHARACTER_SET_NAME: str = "CHARACTER_SET_NAME" - COLLATION_NAME: str = "COLLATION_NAME" - MYSQL_DATA_TYPE: str = "MYSQL_DATA_TYPE" - - -INF_SCHEMA_COLUMNS_NAMES = _INFORMATION_SCHEMA_COLUMNS_NAMES() -INF_SCHEMA_COLUMNS_NAMES_SET = set(f.name for f in fields(INF_SCHEMA_COLUMNS_NAMES)) - - -class HandlerStatusResponse: - def __init__( - self, - success: bool = True, - error_message: str = None, - redirect_url: str = None, - copy_storage: str = None, - ) -> None: - self.success = success - self.error_message = error_message - self.redirect_url = redirect_url - self.copy_storage = copy_storage - - def to_json(self): - data = {"success": self.success, "error": self.error_message} - if self.redirect_url is not None: - data["redirect_url"] = self.redirect_url - if self.copy_storage is not None: - data["copy_storage"] = self.copy_storage - return data - - def __repr__(self): - return ( - f"{self.__class__.__name__}(" - f"success={self.success}, " - f"error={self.error_message}, " - f"redirect_url={self.redirect_url}, " - f"copy_storage={self.copy_storage})" - ) - - -class DataHandlerResponse(ABC): - """Base class for all data handler responses.""" - - type: ClassVar[str] - - @property - def resp_type(self): - # For back compatibility with old code, use the type attribute instead of resp_type - return self.type - - -class ErrorResponse(DataHandlerResponse): - """Response for error cases. - - Attributes: - type: RESPONSE_TYPE.ERROR - error_code: int - error_message: str | None - is_expected_error: bool - exception: Exception | None - """ - - type: ClassVar[str] = RESPONSE_TYPE.ERROR - error_code: int - error_message: str | None - is_expected_error: bool - exception: Exception | None - - def __init__(self, error_code: int = 0, error_message: str | None = None, is_expected_error: bool = False): - self.error_code = error_code - self.error_message = error_message - self.is_expected_error = is_expected_error - self.exception = None - current_exception = sys.exc_info() - if current_exception[0] is not None: - self.exception = current_exception[1] - - def to_columns_table_response(self, map_type_fn: Callable) -> None: - raise ValueError( - f"Cannot convert {self.type} to {RESPONSE_TYPE.COLUMNS_TABLE}, the error is: {self.error_message}" - ) - - -class OkResponse(DataHandlerResponse): - """Response for successful cases without data (e.g. CREATE TABLE, DROP TABLE, etc.). - - Attributes: - type: RESPONSE_TYPE.OK - affected_rows: int - how many rows were affected by the query - """ - - type: ClassVar[str] = RESPONSE_TYPE.OK - affected_rows: int - - def __init__(self, affected_rows: int = None): - self.affected_rows = affected_rows - - -def _safe_pandas_concat(pieces: list[pandas.DataFrame]) -> pandas.DataFrame: - """Safely concatenates multiple pandas DataFrames while checking available memory. - If the estimated memory required for concatenation (with a safety multiplier of 2.5x) - exceeds the available memory, it raises a MemoryError. - - Args: - pieces (list[pandas.DataFrame]): A list of pandas DataFrames to concatenate. - - Returns: - pandas.DataFrame: The concatenated DataFrame. - - Raises: - MemoryError: If there is insufficient memory to perform the concatenation safely. - """ - if len(pieces) == 1: - return pieces[0] - available_memory_kb = psutil.virtual_memory().available >> 10 - pieces_size_kb = sum([(x.memory_usage(index=True, deep=True).sum() >> 10) for x in pieces]) - if (pieces_size_kb * 2.5) > available_memory_kb: - raise MemoryError() - return pandas.concat(pieces) - - -class TableResponse(DataHandlerResponse): - """Response for successful cases with data (e.g. SELECT, SHOW, etc.). - - Attributes: - type: RESPONSE_TYPE.TABLE | RESPONSE_TYPE.COLUMNS_TABLE - type of data in the response - affected_rows: int | None - how many rows were affected by the query - data_generator: Generator[pandas.DataFrame, None, None] | None - generator of data for lazy loading - _columns: list[Column] | None - list of columns - _data: pandas.DataFrame | None - loaded data - _fetched: bool - if data was already fetched (data_generator is consumed) - _invalid: bool - if data has already been fetched and cannot be iterated over - _last_data_piece: pandas.DataFrame | None - last data piece fetched - rows_fetched: int - how many rows were fetched - """ - - type: str - affected_rows: int | None - _data_generator: Generator[pandas.DataFrame, None, None] | None - _columns: list[Column] | None - _data: pandas.DataFrame | None - _fetched: bool - _invalid: bool - _last_data_piece: pandas.DataFrame | None - rows_fetched: int - - def __init__( - self, - data: pandas.DataFrame | None = None, - data_generator: Generator[pandas.DataFrame, None, None] | None = None, - affected_rows: int | None = None, - columns: list[Column] = None, - ): - """ - Either data and/or data_generator must be provided. - Args: - data (pandas.DataFrame): initial data - data_generator (Generator[pandas.DataFrame, None, None]): generator of data - affected_rows (int): total data rowcount - can be None depending on the handler - NOTE: name affected_rows for compatibility with OKResponse - columns (list[Column]): list of columns - """ - self.type = RESPONSE_TYPE.TABLE - self._data_generator = data_generator - self._columns = columns - self.affected_rows = affected_rows - self._data = data - self._fetched = False if data_generator else True - self._invalid = False - self._last_data_piece = None - self.rows_fetched = len(data) if data is not None else 0 - - @property - def data_generator(self) -> Generator[pandas.DataFrame, None, None]: - return self._data_generator - - @data_generator.setter - def data_generator(self, value): - self._fetched = False if value else True - self._data_generator = value - - def fetchall(self) -> pandas.DataFrame: - """Fetch all data and store it in the _data attribute. - - Returns: - pandas.DataFrame: Data frame. - """ - self._raise_if_invalid() - if self._data_generator is None or self._fetched: - return self._data - - pieces = list(self._iterate_with_memory_check()) - if self._data is None: - if len(pieces) == 1: - self._data = pieces[0] - elif len(pieces) == 0: - self._data = pandas.DataFrame([], columns=[column.name for column in self._columns]) - else: - self._data = _safe_pandas_concat(pieces) - elif len(pieces) > 0: - self._data = _safe_pandas_concat([self._data, *pieces]) - - self._fetched = True - self._data_generator = None - - return self._data - - def _raise_if_low_memory(self) -> None: - """Check if there is enough available memory to load the next data chunk. - - Estimates the memory required for the next chunk based on the size of the last - fetched chunk. If `affected_rows` (fetched rows) is known, the estimate is capped at the - number of remaining rows (but no more than one chunk). Otherwise, assumes the next chunk will - be the same size as the previous one. - - Does nothing when no data has been fetched yet. - - Raises: - MemoryError: If estimated memory for the next chunk exceeds available memory. - """ - if self._last_data_piece is None or len(self._last_data_piece) == 0: - return - - data_piece_size_kb = self._last_data_piece.memory_usage(index=True, deep=True).sum() >> 10 - if isinstance(self.affected_rows, int) and self.affected_rows > 0: - row_size_kb = data_piece_size_kb / len(self._last_data_piece) - rows_expected = min(self.affected_rows - self.rows_fetched, len(self._last_data_piece)) - if rows_expected > 0: - available_memory_kb = psutil.virtual_memory().available >> 10 - if available_memory_kb < (row_size_kb * rows_expected * 1.1): - raise MemoryError( - f"Not enough memory to load remaining data. " - f"Available: {available_memory_kb}KB, estimated need: {int(row_size_kb * rows_expected * 1.1)}KB" - ) - else: - # assume that next piece is the same size - available_memory_kb = psutil.virtual_memory().available >> 10 - if available_memory_kb < (data_piece_size_kb * 1.1): - raise MemoryError( - f"Not enough memory to load remaining data. " - f"Available: {available_memory_kb}KB, estimated need: {int(data_piece_size_kb * 1.1)}KB" - ) - - def _iterate_with_memory_check(self) -> Generator[pandas.DataFrame, None, None]: - """Iterate over `_data_generator` with memory safety checks. - - Yields: - pandas.DataFrame: The next chunk from the underlying data generator. - - Raises: - MemoryError: Propagated from `_raise_if_low_memory` if available - memory is insufficient for the next chunk. - """ - if self._data_generator is None: - return - - self._raise_if_low_memory() - - for piece in self._data_generator: - self._last_data_piece = piece - self.rows_fetched += len(piece) - yield piece - self._raise_if_low_memory() - - def fetchmany(self) -> pandas.DataFrame | None: - """Fetch one piece of data and store it in the _data attribute. - - Returns: - pandas.DataFrame: Data frame, piece of data. - """ - self._raise_if_invalid() - try: - piece = next(self._iterate_with_memory_check()) - if self._data is None: - self._data = piece - else: - self._data = _safe_pandas_concat([self._data, piece]) - except StopIteration: - self._fetched = True - self._data_generator = None - return None - return piece - - def iterate_no_save(self) -> Generator[pandas.DataFrame, None, None]: - """Iterate over the data and yield each piece of data. Do not save the data to the _data attribute. - NOTE: do it only once, before return result to the user - - Returns: - Generator[pandas.DataFrame, None, None]: Generator of data frames. - """ - self._raise_if_invalid() - if self._data is not None: - yield self._data - if self._data_generator: - self._invalid = True - for piece in self._iterate_with_memory_check(): - yield piece - - def _raise_if_invalid(self): - if self._invalid: - raise ValueError("Data has already been fetched and cannot be iterated over.") - - @property - def data_frame(self) -> pandas.DataFrame: - """Get the data frame. Represents the entire dataset. - - Returns: - pandas.DataFrame: Data frame. - """ - self.fetchall() - return self._data - - @data_frame.setter - def data_frame(self, value): - """for back compatibility""" - self._data = value - - @property - def columns(self) -> list[Column]: - """Get the columns. - - Returns: - list[Column]: List of columns. - """ - self._resolve_columns() - return self._columns - - def _resolve_columns(self): - if self._columns is not None: - return - self.fetchall() - self._columns = [Column(name=c) for c in self._data.columns] - - def set_columns_attrs(self, table_name: str | None, table_alias: str | None, database: str | None): - """Set the attributes of the columns. - - Args: - table_name (str | None): Table name. - table_alias (str | None): Table alias. - database (str | None): Database name. - """ - self._resolve_columns() - for column in self._columns: - if table_name: - column.table_name = table_name - if table_alias: - column.table_alias = table_alias - if database: - column.database = database - - def to_columns_table_response(self, map_type_fn: Callable) -> None: - """Transform the response to a `columns table` response. - NOTE: original dataframe will be mutated - - Args: - map_type_fn (Callable): Function to map the data type to the MySQL data type. - """ - if self.type == RESPONSE_TYPE.COLUMNS_TABLE: - return - if self.type != RESPONSE_TYPE.TABLE: - raise ValueError( - f"Cannot convert handler response with type '{self.type}' to '{RESPONSE_TYPE.COLUMNS_TABLE}'" - ) - - self.fetchall() - self._resolve_columns() - self.type = RESPONSE_TYPE.COLUMNS_TABLE - - if self._data is None: - return - self._data.columns = [name.upper() for name in self._data.columns] - - for required_column in (INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME, INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE): - if required_column not in self._data.columns: - raise ValueError( - f"Missed required for INFORMATION_SCHEMA.COLUMNS column {required_column}. " - f"Columns set: {self._data.columns}" - ) - for column_name in INF_SCHEMA_COLUMNS_NAMES_SET: - if column_name not in self._data.columns: - self._data[column_name] = None - - self._data[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self._data[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE].apply( - map_type_fn - ) - - self._data = self._data.astype( - { - INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: "string", - INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE: "string", - INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION: "Int32", - INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT: "string", - INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE: "string", - INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH: "Int32", - INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH: "Int32", - INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION: "Int32", - INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE: "Int32", - INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION: "Int32", - INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME: "string", - INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME: "string", - } - ) - self._data.replace([numpy.nan, pandas.NA], None, inplace=True) - - -def normalize_response(response) -> TableResponse | OkResponse | ErrorResponse: - """Convert legacy HandlerResponse to new response types. - - If response is already a new type (TableResponse, OkResponse, ErrorResponse), - return it as-is. If response is a legacy HandlerResponse, convert it based - on its resp_type. - - Args: - response: Either a new response type or legacy HandlerResponse - - Returns: - TableResponse | OkResponse | ErrorResponse: Normalized response - """ - # Already new format - return as-is - if isinstance(response, (TableResponse, OkResponse, ErrorResponse)): - return response - - # Legacy HandlerResponse - convert based on type - if isinstance(response, HandlerResponse): - if response.resp_type == RESPONSE_TYPE.ERROR: - err = ErrorResponse( - error_code=response.error_code, - error_message=response.error_message, - is_expected_error=response.is_expected_error, - ) - err.exception = response.exception - return err - - if response.resp_type == RESPONSE_TYPE.OK: - return OkResponse(affected_rows=response.affected_rows) - - # TABLE or COLUMNS_TABLE - if response.data_frame is not None: - columns = list(response.data_frame.columns) - else: - columns = [] - - mysql_types = response.mysql_types - if mysql_types is None: - mysql_types = [None] * len(columns) - - table_response = TableResponse( - data=response.data_frame, - columns=[ - Column(name=column_name, type=mysql_type) for column_name, mysql_type in zip(columns, mysql_types) - ], - data_generator=iter([]), # empty generator for legacy responses - ) - if response.resp_type == RESPONSE_TYPE.COLUMNS_TABLE: - table_response.type = RESPONSE_TYPE.COLUMNS_TABLE - return table_response - - # Unknown type - return as-is (shouldn't happen normally) - return response - - -# ! deprecated -class HandlerResponse: - """Legacy response class for compatibility with old code. - NOTE: do not use this class directly, use DataHandlerResponse instead - """ - - def __init__( - self, - resp_type: RESPONSE_TYPE, - data_frame: pandas.DataFrame = None, - query: ASTNode = 0, - error_code: int = 0, - error_message: str | None = None, - affected_rows: int | None = None, - mysql_types: list[MYSQL_DATA_TYPE] | None = None, - is_expected_error: bool = False, - ) -> None: - self.resp_type = resp_type - self.query = query - self.data_frame = data_frame - self.error_code = error_code - self.error_message = error_message - self.affected_rows = affected_rows - if isinstance(self.affected_rows, int) is False or self.affected_rows < 0: - self.affected_rows = 0 - self.mysql_types = mysql_types - self.is_expected_error = is_expected_error - self.exception = None - current_exception = sys.exc_info() - if current_exception[0] is not None: - self.exception = current_exception[1] - - @property - def type(self): - return self.resp_type - - def to_columns_table_response(self, map_type_fn: Callable) -> None: - """Transform the response to a `columns table` response. - NOTE: original dataframe will be mutated - """ - if self.resp_type == RESPONSE_TYPE.COLUMNS_TABLE: - return - if self.resp_type != RESPONSE_TYPE.TABLE: - if self.resp_type == RESPONSE_TYPE.ERROR: - raise ValueError( - f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}, " - f"the error is: {self.error_message}" - ) - raise ValueError(f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}") - - self.data_frame.columns = [name.upper() for name in self.data_frame.columns] - - for required_column in (INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME, INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE): - if required_column not in self.data_frame.columns: - raise ValueError( - f"Missed required for INFORMATION_SCHEMA.COLUMNS column {required_column}. " - f"Columns set: {self.data_frame.columns}" - ) - for column_name in INF_SCHEMA_COLUMNS_NAMES_SET: - if column_name not in self.data_frame.columns: - self.data_frame[column_name] = None - - self.data_frame[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self.data_frame[ - INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE - ].apply(map_type_fn) - - self.data_frame = self.data_frame.astype( - { - INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: "string", - INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE: "string", - INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION: "Int32", - INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT: "string", - INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE: "string", - INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH: "Int32", - INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH: "Int32", - INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION: "Int32", - INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE: "Int32", - INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION: "Int32", - INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME: "string", - INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME: "string", - } - ) - self.data_frame.replace([numpy.nan, pandas.NA], None, inplace=True) - - self.resp_type = RESPONSE_TYPE.COLUMNS_TABLE - - def to_json(self): - try: - data = None - if self.data_frame is not None: - data = self.data_frame.to_json(orient="split", index=False, date_format="iso") - except Exception as e: - logger.error("%s.to_json: error - %s", self.__class__.__name__, e) - data = None - return { - "type": self.resp_type, - "query": self.query, - "data_frame": data, - "error_code": self.error_code, - "error": self.error_message, - } - - def __repr__(self): - return "%s: resp_type=%s, query=%s, data_frame=\n%s\nerr_code=%s, error=%s, affected_rows=%s" % ( - self.__class__.__name__, - self.resp_type, - self.query, - self.data_frame, - self.error_code, - self.error_message, - self.affected_rows, - ) diff --git a/mindsdb/integrations/libs/storage_handler.py b/mindsdb/integrations/libs/storage_handler.py deleted file mode 100644 index e9f2470f49c..00000000000 --- a/mindsdb/integrations/libs/storage_handler.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import dill -import pickle -from typing import Dict -from hashlib import md5 - -import redis -import sqlite3 - - -class KVStorageHandler: - """ - Simple key-value store. Instances of this handler shall store any information required by other handlers. - Context should store anything relevant to the storage handler, e.g. CompanyID, UserID, parent handler name, among others. - """ # noqa - def __init__(self, context: Dict, config=None): - self.config = config if config else os.getenv('MDB_STORAGE_HANDLER_CONFIG') - self.serializer = pickle if config.get('serializer', '') == 'pickle' else dill - self.context = self.serializer.dumps(context) # store serialized - - def _get_context_key(self, key: str): - serialized_key = self.serializer.dumps(key) - return md5(serialized_key).hexdigest() + md5(self.context).hexdigest() - - def get(self, key, default_value=None): - serialized_value = self._get(self._get_context_key(key)) - if serialized_value: - return self.serializer.loads(serialized_value) - elif default_value is not None: - return default_value - else: - raise KeyError(f"Key not found: {key}") - - def set(self, key: str, value: object): - serialized_value = self.serializer.dumps(value) - self._set(self._get_context_key(key), serialized_value) - - def _get(self, serialized_key): - raise NotImplementedError() - - def _set(self, serialized_key, serialized_value): - raise NotImplementedError() - - -class SqliteStorageHandler(KVStorageHandler): - """ StorageHandler that uses SQLite as backend. """ # noqa - def __init__(self, context: Dict, config=None): - super().__init__(context, config) - name = self.config["name"] if self.config["name"][-3:] == '.db' else self.config["name"] + '.db' - path = os.path.join(self.config.get("path", "./"), name) - self.connection = sqlite3.connect(path) - self._setup_connection() - - def _setup_connection(self): - """ Checks that a key-value table exists, otherwise creates it. """ # noqa - cur = self.connection.cursor() - if ('store',) not in list(cur.execute("SELECT name FROM sqlite_master WHERE type='table';")): - cur.execute("""create table store (key text PRIMARY KEY, value text)""") - self.connection.commit() - - def _get(self, serialized_key): - cur = self.connection.cursor() - results = list(cur.execute(f"""select value from store where key='{serialized_key}'""")) - if results: - return results[0][0] # should always be a single match, hence the [0]s - else: - return None - - def _set(self, serialized_key, serialized_value): - cur = self.connection.cursor() - cur.execute("insert or replace into store values (?, ?)", (serialized_key, serialized_value)) - self.connection.commit() - - -class RedisStorageHandler(KVStorageHandler): - """ StorageHandler that uses Redis as backend. """ # noqa - def __init__(self, context: Dict, config=None): - super().__init__(context, config) - assert self.config.get('host', False) - assert self.config.get('port', False) - - self.connection = redis.Redis(host=self.config['host'], port=self.config['port']) - - def _get(self, serialized_key): - return self.connection.get(serialized_key) - - def _set(self, serialized_key, serialized_value): - self.connection.set(serialized_key, serialized_value) diff --git a/mindsdb/integrations/libs/vectordatabase_handler.py b/mindsdb/integrations/libs/vectordatabase_handler.py deleted file mode 100644 index 1c8b9074b2c..00000000000 --- a/mindsdb/integrations/libs/vectordatabase_handler.py +++ /dev/null @@ -1,663 +0,0 @@ -import ast -import copy -import hashlib -from enum import Enum -from typing import List, Optional -import datetime as dt - -import pandas as pd -from mindsdb_sql_parser.ast import ( - BinaryOperation, - Constant, - CreateTable, - Delete, - DropTables, - Insert, - Select, - Star, - Tuple, - Update, - Function, - Identifier, -) -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.integrations.libs.response import DataHandlerResponse, OkResponse, TableResponse -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.utilities import log -from .base import BaseHandler - -LOG = log.getLogger(__name__) - - -class VectorHandlerException(Exception): ... - - -class TableField(Enum): - """ - Enum for table fields. - """ - - ID = "id" - CONTENT = "content" - EMBEDDINGS = "embeddings" - METADATA = "metadata" - SEARCH_VECTOR = "search_vector" - DISTANCE = "distance" - RELEVANCE = "relevance" - - -class DistanceFunction(Enum): - SQUARED_EUCLIDEAN_DISTANCE = ("<->",) - NEGATIVE_DOT_PRODUCT = ("<#>",) - COSINE_DISTANCE = "<=>" - - -class VectorStoreHandler(BaseHandler): - """ - Base class for handlers associated to vector databases. - """ - - SCHEMA = [ - { - "name": TableField.ID.value, - "data_type": "string", - }, - { - "name": TableField.CONTENT.value, - "data_type": "string", - }, - { - "name": TableField.EMBEDDINGS.value, - "data_type": "list", - }, - { - "name": TableField.METADATA.value, - "data_type": "json", - }, - { - "name": TableField.DISTANCE.value, - "data_type": "float", - }, - ] - - def validate_connection_parameters(self, name, **kwargs): - """Create validation for input parameters.""" - - return NotImplementedError() - - def __del__(self): - if self.is_connected is True: - self.disconnect() - - def disconnect(self): - pass - - def _value_or_self(self, value): - if isinstance(value, Constant): - return value.value - else: - return value - - def extract_conditions(self, where_statement) -> Optional[List[FilterCondition]]: - conditions = [] - # parse conditions - if where_statement is not None: - # dfs to get all binary operators in the where statement - def _extract_comparison_conditions(node, **kwargs): - if isinstance(node, BinaryOperation): - # if the op is and, continue - # TODO: need to handle the OR case - if node.op.upper() == "AND": - return - op = FilterOperator(node.op.upper()) - - arg1, arg2 = node.args - if isinstance(arg1, Function): - if arg1.op.lower() in ("lower", "lower") and len(arg1.args) == 1: - func_arg = arg1.args[0] - if isinstance(func_arg, Identifier) and len(func_arg.parts) == 1: - if func_arg.parts[0].lower() in ("chunk_content", "content"): - arg1 = func_arg - - if not isinstance(arg1, Identifier): - raise ValueError(f"Not supported condition: {node}") - - # unquote the left hand side - left_hand = arg1.parts[-1].strip("`") - if isinstance(arg2, Constant): - if left_hand == TableField.SEARCH_VECTOR.value: - right_hand = ast.literal_eval(arg2.value) - else: - right_hand = arg2.value - elif isinstance(arg2, Tuple): - # Constant could be actually a list i.e. [1.2, 3.2] - right_hand = [item.value for item in arg2.items] - else: - raise Exception(f"Unsupported right hand side: {arg2}") - conditions.append(FilterCondition(column=left_hand, op=op, value=right_hand)) - - query_traversal(where_statement, _extract_comparison_conditions) - - else: - conditions = None - - return conditions - - def _convert_metadata_filters(self, conditions, allowed_metadata_columns=None): - if conditions is None: - return - # try to treat conditions that are not in TableField as metadata conditions - for condition in conditions: - if self._is_metadata_condition(condition): - # check restriction - if allowed_metadata_columns is not None: - # system columns are underscored, skip them - if condition.column not in allowed_metadata_columns and not condition.column.startswith("_"): - raise ValueError(f"Column is not found: {condition.column}") - - # convert if required - if not condition.column.startswith(TableField.METADATA.value): - condition.column = TableField.METADATA.value + "." + condition.column - - def _is_columns_allowed(self, columns: List[str]) -> bool: - """ - Check if columns are allowed. - """ - allowed_columns = set([col["name"] for col in self.SCHEMA]) - return set(columns).issubset(allowed_columns) - - def _is_metadata_condition(self, condition: FilterCondition) -> bool: - allowed_field_values = set([field.value for field in TableField]) - if condition.column in allowed_field_values: - return False - return True - - def _dispatch_create_table(self, query: CreateTable): - """ - Dispatch create table query to the appropriate method. - """ - # parse key arguments - table_name = query.name.parts[-1] - if_not_exists = getattr(query, "if_not_exists", False) - return self.create_table(table_name, if_not_exists=if_not_exists) - - def _dispatch_drop_table(self, query: DropTables): - """ - Dispatch drop table query to the appropriate method. - """ - table_name = query.tables[0].parts[-1] - if_exists = getattr(query, "if_exists", False) - - return self.drop_table(table_name, if_exists=if_exists) - - def _dispatch_insert(self, query: Insert): - """ - Dispatch insert query to the appropriate method. - """ - # parse key arguments - table_name = query.table.parts[-1] - columns = [column.name for column in query.columns] - - if not self._is_columns_allowed(columns): - raise Exception(f"Columns {columns} not allowed.Allowed columns are {[col['name'] for col in self.SCHEMA]}") - - # get content column if it is present - if TableField.CONTENT.value in columns: - content_col_index = columns.index("content") - content = [self._value_or_self(row[content_col_index]) for row in query.values] - else: - content = None - - # get id column if it is present - ids = None - if TableField.ID.value in columns: - id_col_index = columns.index("id") - ids = [self._value_or_self(row[id_col_index]) for row in query.values] - elif TableField.CONTENT.value is None: - raise Exception("Content or id is required!") - - # get embeddings column if it is present - if TableField.EMBEDDINGS.value in columns: - embeddings_col_index = columns.index("embeddings") - embeddings = [ast.literal_eval(self._value_or_self(row[embeddings_col_index])) for row in query.values] - else: - raise Exception("Embeddings column is required!") - - if TableField.METADATA.value in columns: - metadata_col_index = columns.index("metadata") - metadata = [ast.literal_eval(self._value_or_self(row[metadata_col_index])) for row in query.values] - else: - metadata = None - - # create dataframe - data = { - TableField.CONTENT.value: content, - TableField.EMBEDDINGS.value: embeddings, - TableField.METADATA.value: metadata, - } - if ids is not None: - data[TableField.ID.value] = ids - - return self.do_upsert(table_name, pd.DataFrame(data)) - - def dispatch_update(self, query: Update, conditions: List[FilterCondition] = None): - """ - Dispatch update query to the appropriate method. - """ - table_name = query.table.parts[-1] - - row = {} - for k, v in query.update_columns.items(): - k = k.lower() - if isinstance(v, Constant): - v = v.value - if k == TableField.EMBEDDINGS.value and isinstance(v, str): - # it could be embeddings in string - try: - v = ast.literal_eval(v) - except Exception: - pass - row[k] = v - - if conditions is None: - where_statement = query.where - conditions = self.extract_conditions(where_statement) - - for condition in conditions: - if condition.op != FilterOperator.EQUAL: - raise NotImplementedError - - row[condition.column] = condition.value - - # checks - if TableField.EMBEDDINGS.value not in row: - raise Exception("Embeddings column is required!") - - if TableField.CONTENT.value not in row: - raise Exception("Content is required!") - - # store - df = pd.DataFrame([row]) - - return self.do_upsert(table_name, df) - - def set_metadata_cur_time(self, df, col_name): - metadata_col = TableField.METADATA.value - cur_date = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - def set_time(meta): - meta[col_name] = cur_date - - df[metadata_col].apply(set_time) - - def do_upsert(self, table_name, df): - """Upsert data into table, handling document updates and deletions. - - Args: - table_name (str): Name of the table - df (pd.DataFrame): DataFrame containing the data to upsert - - The function handles three cases: - 1. New documents: Insert them - 2. Updated documents: Delete old chunks and insert new ones - """ - id_col = TableField.ID.value - metadata_col = TableField.METADATA.value - content_col = TableField.CONTENT.value - - def gen_hash(v): - return hashlib.md5(str(v).encode()).hexdigest() - - if id_col not in df.columns: - # generate for all - df[id_col] = df[content_col].apply(gen_hash) - else: - # generate for empty - for i in range(len(df)): - if pd.isna(df.loc[i, id_col]): - df.loc[i, id_col] = gen_hash(df.loc[i, content_col]) - - # remove duplicated ids - df = df.drop_duplicates([TableField.ID.value]) - - # id is string TODO is it ok? - df[id_col] = df[id_col].apply(str) - - # set updated_at - self.set_metadata_cur_time(df, "_updated_at") - - if hasattr(self, "upsert"): - self.upsert(table_name, df) - return - - original_doc_id = "_original_doc_id" - - def get_original_ids(metadata): - return metadata.apply(lambda m: m.get(original_doc_id)) - - df["orig_id"] = get_original_ids(df[metadata_col]) - - df_original_ids = df[~df["orig_id"].isna()] - df_chunk_ids = df[df["orig_id"].isna()] - - if not df_original_ids.empty: - # data has original ids - find all related records - - all_ids = list(df_original_ids["orig_id"]) - - # find existing original_ids - df_existed = self.select( - table_name, - columns=[id_col, metadata_col], - conditions=[FilterCondition(column=f"metadata.{original_doc_id}", op=FilterOperator.IN, value=all_ids)], - ) - - # split into groups: - # - to update: records that match by `chunk_id`+`original_id` in `df_existed` and `df` - # - to delete: all chunk_ids from `df_existed` that don't match by `chunk_id`+`original_id` - # - to insert: all records from `df` that don't match by `chunk_id`+`original_id` - - if not df_existed.empty: - df_existed["orig_id"] = get_original_ids(df_existed[metadata_col]) - df_existed["match"] = 1 - - df_common = df_original_ids.merge( - df_existed[["id", "orig_id", "match"]], on=["id", "orig_id"], how="left" - ) - - df_update = df_common[~df_common["match"].isna()].drop("orig_id", axis=1).drop("match", axis=1) - df_insert = df_common[df_common["match"].isna()].drop("match", axis=1) - - ids_to_remove = set(df_existed["id"]) - set(df_update["id"]) - else: - df_insert = df_original_ids - ids_to_remove = [] - df_update = pd.DataFrame() - df_insert = df_insert.drop("orig_id", axis=1) - self._apply_diff_changes(table_name, ids_to_remove, df_update, df_insert, df_existed) - - if not df_chunk_ids.empty: - df_chunk_ids = df_chunk_ids.drop("orig_id", axis=1) - - # records have only chunk_ids - update/insert only them - df_existed = self.select( - table_name, - columns=[id_col, metadata_col], - conditions=[FilterCondition(column=id_col, op=FilterOperator.IN, value=list(df_chunk_ids[id_col]))], - ) - existed_ids = list(df_existed[id_col]) - - # update existed - df_update = df_chunk_ids[df_chunk_ids[id_col].isin(existed_ids)] - df_insert = df_chunk_ids[~df_chunk_ids[id_col].isin(existed_ids)] - - self._apply_diff_changes(table_name, [], df_update, df_insert, df_existed) - - def _apply_diff_changes(self, table_name, ids_to_remove, df_update, df_insert, df_existed): - # -- apply changes -- - - id_col = TableField.ID.value - metadata_col = TableField.METADATA.value - original_doc_id = "_original_doc_id" - - if ids_to_remove: - conditions = [FilterCondition(column=id_col, op=FilterOperator.IN, value=list(ids_to_remove))] - self.delete(table_name, conditions) - - if not df_update.empty: - # get values of existed `created_at` and return them to metadata - - created_dates, ids = {}, {} - for _, row in df_existed.iterrows(): - chunk_id = row[id_col] - created_dates[chunk_id] = row[metadata_col].get("_created_at") - ids[chunk_id] = row[metadata_col].get(original_doc_id) - - def keep_created_at(row): - val = created_dates.get(row[id_col]) - if val: - row[metadata_col]["_created_at"] = val - # keep id column - if original_doc_id not in row[metadata_col]: - row[metadata_col][original_doc_id] = ids.get(row[id_col]) - return row - - df_update = df_update - df_update.apply(keep_created_at, axis=1) - - if hasattr(self, "update"): - self.update(table_name, df_update, [id_col]) - else: - # no update method in vector db: just remove old records before insert - - ids_to_remove = df_update[id_col] - conditions = [FilterCondition(column=id_col, op=FilterOperator.IN, value=list(ids_to_remove))] - self.delete(table_name, conditions) - self.insert(table_name, df_update) - if not df_insert.empty: - # set created_at - self.set_metadata_cur_time(df_insert, "_created_at") - df_insert = df_insert - self.insert(table_name, df_insert) - - def dispatch_delete(self, query: Delete, conditions: List[FilterCondition] = None): - """ - Dispatch delete query to the appropriate method. - """ - # parse key arguments - table_name = query.table.parts[-1] - if conditions is None: - where_statement = query.where - conditions = self.extract_conditions(where_statement) - self._convert_metadata_filters(conditions) - - # dispatch delete - return self.delete(table_name, conditions=conditions) - - def dispatch_select( - self, - query: Select, - conditions: Optional[List[FilterCondition]] = None, - allowed_metadata_columns: List[str] = None, - keyword_search_args: Optional[KeywordSearchArgs] = None, - ): - """ - Dispatches a select query to the appropriate method, handling both - standard selections and keyword searches based on the provided arguments. - """ - # 1. Parse common query arguments - table_name = query.from_table.parts[-1] - - # If targets are a star (*), select all schema columns - if isinstance(query.targets[0], Star): - columns = [col["name"] for col in self.SCHEMA] - else: - columns = [col.parts[-1] for col in query.targets] - - # 2. Validate columns - if not self._is_columns_allowed(columns): - allowed_cols = [col["name"] for col in self.SCHEMA] - raise Exception(f"Columns {columns} not allowed. Allowed columns are {allowed_cols}") - - # 3. Extract and process conditions - if conditions is None: - where_statement = query.where - conditions = self.extract_conditions(where_statement) - else: - # it is mutated - conditions = copy.deepcopy(conditions) - self._convert_metadata_filters(conditions, allowed_metadata_columns=allowed_metadata_columns) - - # 4. Get offset and limit - offset = query.offset.value if query.offset is not None else None - limit = query.limit.value if query.limit is not None else None - - # 5. Conditionally dispatch to the correct select method - if keyword_search_args: - # It's a keyword search - return self.keyword_select( - table_name, - columns=columns, - conditions=conditions, - offset=offset, - limit=limit, - keyword_search_args=keyword_search_args, - ) - else: - # It's a standard select - try: - return self.select( - table_name, - columns=columns, - conditions=conditions, - offset=offset, - limit=limit, - ) - - except Exception as e: - handler_engine = self.__class__.name - raise VectorHandlerException(f"Error in {handler_engine} database: {e}") - - def _dispatch(self, query: ASTNode) -> DataHandlerResponse: - """ - Parse and Dispatch query to the appropriate method. - """ - dispatch_router = { - CreateTable: self._dispatch_create_table, - DropTables: self._dispatch_drop_table, - Insert: self._dispatch_insert, - Update: self.dispatch_update, - Delete: self.dispatch_delete, - Select: self.dispatch_select, - } - if type(query) in dispatch_router: - resp = dispatch_router[type(query)](query) - if resp is not None: - return TableResponse(data=resp) - else: - return OkResponse() - - else: - raise NotImplementedError(f"Query type {type(query)} not implemented.") - - def query(self, query: ASTNode) -> DataHandlerResponse: - """ - Receive query as AST (abstract syntax tree) and act upon it somehow. - - Args: - query (ASTNode): sql query represented as AST. May be any kind - of query: SELECT, INSERT, DELETE, etc - - Returns: - DataHandlerResponse - """ - return self._dispatch(query) - - def create_table(self, table_name: str, if_not_exists=True) -> DataHandlerResponse: - """Create table - - Args: - table_name (str): table name - if_not_exists (bool): if True, do nothing if table exists - - Returns: - DataHandlerResponse - """ - raise NotImplementedError() - - def drop_table(self, table_name: str, if_exists=True) -> DataHandlerResponse: - """Drop table - - Args: - table_name (str): table name - if_exists (bool): if True, do nothing if table does not exist - - Returns: - DataHandlerResponse - """ - raise NotImplementedError() - - def insert(self, table_name: str, data: pd.DataFrame) -> DataHandlerResponse: - """Insert data into table - - Args: - table_name (str): table name - data (pd.DataFrame): data to insert - columns (List[str]): columns to insert - - Returns: - DataHandlerResponse - """ - raise NotImplementedError() - - def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> DataHandlerResponse: - """Delete data from table - - Args: - table_name (str): table name - conditions (List[FilterCondition]): conditions to delete - - Returns: - DataHandlerResponse - """ - raise NotImplementedError() - - def select( - self, - table_name: str, - columns: List[str] = None, - conditions: List[FilterCondition] = None, - offset: int = None, - limit: int = None, - ) -> DataHandlerResponse: - """Select data from table - - Args: - table_name (str): table name - columns (List[str]): columns to select - conditions (List[FilterCondition]): conditions to select - - Returns: - DataHandlerResponse - """ - raise NotImplementedError() - - def get_columns(self, table_name: str) -> TableResponse: - # return a fixed set of columns - data = pd.DataFrame(self.SCHEMA) - data.columns = ["COLUMN_NAME", "DATA_TYPE"] - return TableResponse(data=data) - - def check_existing_ids(self, table_name: str, ids: List[str]) -> List[str]: - """ - Check which IDs from the provided list already exist in the table. - - Args: - table_name (str): Name of the table to check - ids (List[str]): List of IDs to check for existence - - Returns: - List[str]: List of IDs that already exist in the table - """ - if not ids: - return [] - - try: - # Query existing IDs - df_existing = self.select( - table_name, - columns=[TableField.ID.value], - conditions=[FilterCondition(column=TableField.ID.value, op=FilterOperator.IN, value=ids)], - ) - return list(df_existing[TableField.ID.value]) if not df_existing.empty else [] - except Exception: - # If select fails for any reason, return empty list to be safe - return [] - - def create_index(self, *args, **kwargs): - """ - Create an index on the specified table. - """ - raise NotImplementedError(f"create_index not supported for VectorStoreHandler {self.name}") diff --git a/mindsdb/integrations/utilities/__init__.py b/mindsdb/integrations/utilities/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/community_handler_fetcher.py b/mindsdb/integrations/utilities/community_handler_fetcher.py deleted file mode 100644 index 2f66640a035..00000000000 --- a/mindsdb/integrations/utilities/community_handler_fetcher.py +++ /dev/null @@ -1,265 +0,0 @@ -import base64 -import json -import os -import shutil -import threading -from pathlib import Path -from typing import Optional - -import requests - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -# GitHub API configuration -# It can be replaced later with making the repo public. -GITHUB_API_BASE = "https://api.github.com" -DEFAULT_REPO = "mindsdb/mindsdb-community-handlers" -DEFAULT_BRANCH = "main" -DEFAULT_PATH_PREFIX = "community_handlers" -_fetch_locks: dict = {} -_fetch_locks_lock = threading.Lock() - - -def _get_fetch_lock(handler_dir_name: str) -> threading.Lock: - """ - Get and create if needed a threading. - Lock for the given handler directory. - This ensures that concurrent fetches for the same handler_dir_name are - serializedlly, preventing race conditions on disk. - """ - with _fetch_locks_lock: - if handler_dir_name not in _fetch_locks: - _fetch_locks[handler_dir_name] = threading.Lock() - return _fetch_locks[handler_dir_name] - - -def _github_headers() -> dict: - """ - Return headers for GitHub API requests, including optional auth if GITHUB_TOKEN is set in the environment. - TODO: Remove this after repository is set to public. - """ - headers = {"Accept": "application/vnd.github.v3+json"} - token = os.environ.get("GITHUB_TOKEN") - if token: - headers["Authorization"] = f"token {token}" - return headers - - -# It can be removed later with making the repo public. TBD -def _get_repo_config() -> tuple: - """Returns (repo, branch, path_prefix).""" - repo = os.environ.get("COMMUNITY_HANDLERS_REPO", DEFAULT_REPO) - branch = os.environ.get("COMMUNITY_HANDLERS_BRANCH", DEFAULT_BRANCH) - path_prefix = os.environ.get("COMMUNITY_HANDLERS_PATH", DEFAULT_PATH_PREFIX) - return repo, branch, path_prefix - - -def _resolve_tree_sha(repo: str, branch: str, dir_path: str, headers: dict) -> Optional[str]: - """Return the Git tree SHA for dir_path by inspecting its parent directory listing. - - Calls the Contents API on the parent of dir_path, then finds the matching - directory entry and returns its SHA. Returns None if the path does not exist - (404) or if the directory name is not found in the parent listing. - - Raises: - RuntimeError: On network errors or unexpected GitHub API responses. - """ - parent_path, _, dir_name = dir_path.rstrip("/").rpartition("/") - api_url = f"{GITHUB_API_BASE}/repos/{repo}/contents/{parent_path}" - params = {"ref": branch} - try: - resp = requests.get(api_url, params=params, headers=headers, timeout=30) - except requests.RequestException as e: - raise RuntimeError(f"Network error resolving tree SHA for '{dir_path}': {e}") from e - if resp.status_code == 404: - return None - if resp.status_code != 200: - raise RuntimeError( - f"GitHub API error resolving tree SHA for '{dir_path}': HTTP {resp.status_code} — {resp.text[:300]}" - ) - try: - entries = resp.json() - except json.JSONDecodeError as e: - raise RuntimeError(f"Invalid JSON resolving tree SHA for '{dir_path}': {e}") from e - for entry in entries: - if entry.get("name") == dir_name and entry.get("type") == "dir": - return entry.get("sha") - return None - - -def _fetch_tree_recursive( - repo: str, - branch: str, - tree_sha: str, - remote_prefix: str, - dest_dir: Path, - headers: dict, - max_depth: int = 4, -) -> int: - """Fetch all files in a Git tree recursively, preserving directory structure. - - Uses the Git Trees API with ?recursive=1 to obtain the full file listing in - a single API call, then downloads each blob from raw.githubusercontent.com. - - Args: - repo: GitHub repository in "owner/repo" format. - branch: Branch or ref name used to build raw download URLs. - tree_sha: SHA of the Git tree to fetch. - remote_prefix: Path within the repo to the handler directory - Used to construct raw download URLs. - dest_dir: Local directory where files will be written. - headers: HTTP headers (auth, Accept) for GitHub API requests. - max_depth: Maximum allowed directory nesting depth. Entries whose - relative path contains >= max_depth slashes are skipped. - - Returns: - Number of files downloaded. - - Raises: - RuntimeError: On network errors or unexpected API responses. - """ - api_url = f"{GITHUB_API_BASE}/repos/{repo}/git/trees/{tree_sha}" - params = {"recursive": "1"} - try: - resp = requests.get(api_url, params=params, headers=headers, timeout=30) - resp.raise_for_status() - except requests.RequestException as e: - raise RuntimeError(f"Network error fetching tree '{tree_sha}': {e}") from e - try: - tree_data = resp.json() - except json.JSONDecodeError as e: - raise RuntimeError(f"Invalid JSON from Git Trees API for tree '{tree_sha}': {e}") from e - - if tree_data.get("truncated"): - logger.warning("Tree for handler '%s' was truncated; some files may be missing", remote_prefix) - - file_count = 0 - for entry in tree_data.get("tree", []): - if entry.get("type") != "blob": - continue - path = entry["path"] - if path.count("/") >= max_depth: - logger.debug("Skipping deeply nested path '%s' (max_depth=%d)", path, max_depth) - continue - local_path = dest_dir / path - local_path.parent.mkdir(parents=True, exist_ok=True) - raw_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{remote_prefix}/{path}" - try: - file_resp = requests.get(raw_url, headers=headers, timeout=30) - file_resp.raise_for_status() - except requests.RequestException as e: - raise RuntimeError(f"Failed to download '{path}' for handler '{remote_prefix}': {e}") from e - local_path.write_bytes(file_resp.content) - logger.debug("Downloaded %s (%d bytes)", path, entry.get("size", 0)) - file_count += 1 - return file_count - - -def fetch_handler(handler_dir_name: str, storage_dir: Path) -> Optional[Path]: - """ - Fetch a single community handler directory from GitHub into storage_dir. - - Downloads the full directory tree for the requested handler using the - GitHub Git Trees API, preserving subdirectory structure. - - Args: - handler_dir_name: The directory name of the handler (e.g. "github_handler") - storage_dir: Root directory where community handlers are stored - - Returns: - Path to the fetched handler directory, or None if the handler does not - exist in the remote repository. - - Raises: - RuntimeError: On network errors or unexpected GitHub API responses. - """ - lock = _get_fetch_lock(handler_dir_name) - with lock: - dest_dir = storage_dir / handler_dir_name - - if dest_dir.is_dir() and (dest_dir / "__init__.py").exists(): - logger.debug("Community handler '%s' already on disk at %s", handler_dir_name, dest_dir) - return dest_dir - - repo, branch, path_prefix = _get_repo_config() - headers = _github_headers() - remote_prefix = f"{path_prefix}/{handler_dir_name}" - - logger.debug("Fetching community handler '%s' from %s@%s", handler_dir_name, repo, branch) - - tree_sha = _resolve_tree_sha(repo, branch, remote_prefix, headers) - if tree_sha is None: - logger.error("Community handler '%s' not found in repo '%s'", handler_dir_name, repo) - return None - - # Use a temporary directory for downloading files before moving to the final location. - # This prevents leaving a partially downloaded handler on disk if something goes wrong. - # As a fail-safe measure, we remove any existing temp directory before starting, and ensure cleanup on exceptions. - tmp_dir = storage_dir / f".tmp_{handler_dir_name}" - if tmp_dir.exists(): - shutil.rmtree(tmp_dir) - tmp_dir.mkdir(parents=True, exist_ok=True) - - try: - file_count = _fetch_tree_recursive(repo, branch, tree_sha, remote_prefix, tmp_dir, headers) - logger.debug("Fetched %d files for handler '%s'", file_count, handler_dir_name) - - # Atomic rename. - # If dest_dir already exists, remove it first. - # This ensures that we don't end up with a mix of old and new files if the handler is updated. - if dest_dir.exists(): - shutil.rmtree(dest_dir) - tmp_dir.rename(dest_dir) - - except Exception: - if tmp_dir.exists(): - shutil.rmtree(tmp_dir) - raise - - logger.debug("Community handler '%s' fetched successfully to %s", handler_dir_name, dest_dir) - return dest_dir - - -def community_handlers_enabled() -> bool: - """Returns True if community handlers are enabled via env var. - - Set MINDSDB_COMMUNITY_HANDLERS=true to opt in. - Community handlers are disabled by default. - """ - val = os.environ.get("MINDSDB_COMMUNITY_HANDLERS", "false").lower() - return val in ("1", "true", "yes", "enabled") - - -def get_community_handlers_storage_dir(storage_root: Path) -> Path: - """Returns (and creates if needed) the community handlers storage directory.""" - community_dir = storage_root / "community_handlers" - # Creating the directory, maybe can be done on init? - community_dir.mkdir(parents=True, exist_ok=True) - return community_dir - - -def list_available_handlers() -> list: - """ - Return handler metadata from the community index.json. - - Each dict has keys: name, title, folder, type, support_level, - icon_path, description. - """ - repo, branch, _ = _get_repo_config() - api_url = f"{GITHUB_API_BASE}/repos/{repo}/contents/index.json" - params = {"ref": branch} - - try: - logger.debug("Fetching community handlers index from GitHub: %s", api_url) - resp = requests.get(api_url, params=params, headers=_github_headers(), timeout=30) - if resp.status_code == 200: - entry = resp.json() - raw = base64.b64decode(entry["content"]).decode("utf-8") - data = json.loads(raw) - return data.get("handlers", []) - logger.warning("Could not fetch community index: HTTP %s", resp.status_code) - except Exception as e: - logger.warning("Could not fetch community handlers index: %s", e) - return [] diff --git a/mindsdb/integrations/utilities/datasets/__init__.py b/mindsdb/integrations/utilities/datasets/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/datasets/dataset.py b/mindsdb/integrations/utilities/datasets/dataset.py deleted file mode 100644 index 1a06bd3a623..00000000000 --- a/mindsdb/integrations/utilities/datasets/dataset.py +++ /dev/null @@ -1,73 +0,0 @@ -from pathlib import Path - -import pandas as pd - -DATASETS_BASE_PATH = Path(__file__).parent - -SUPPORTED_TASK_TYPES = ("question_answering",) - - -class DatasetNameMissing(Exception): - pass - - -class MLTaskTypeMissing(Exception): - pass - - -class DatasetNotFound(Exception): - pass - - -class UnsupportedMLTaskType(Exception): - pass - - -class MissingColumns(Exception): - pass - - -def validate_dataset(ml_task_type=None, dataset_name=None): - - if ml_task_type is None: - raise MLTaskTypeMissing( - "ML Task type is missing. Please provide a valid 'ml_task_type'." - ) - - if dataset_name is None: - raise DatasetNameMissing( - "Dataset name is missing. Please provide a valid 'dataset_name'." - ) - - if ml_task_type not in SUPPORTED_TASK_TYPES: - raise UnsupportedMLTaskType( - f"ML Task type '{ml_task_type}' is not supported. Supported types are: {SUPPORTED_TASK_TYPES}" - ) - - dataset_path = DATASETS_BASE_PATH / ml_task_type / f"{dataset_name}.csv" - - if not dataset_path.exists(): - raise DatasetNotFound( - f"Dataset '{dataset_name}' for ML Task type '{ml_task_type}' not found '{dataset_path}'." - ) - - return dataset_path - - -def load_dataset(ml_task_type=None, dataset_name=None): - - dataset_path = validate_dataset(ml_task_type, dataset_name) - - return pd.read_csv(dataset_path) - - -def validate_dataframe(df, mandatory_columns): - - columns_exist = all([col in df.columns for col in mandatory_columns]) - - if not columns_exist: - raise MissingColumns( - f"Columns {mandatory_columns} are missing from the dataframe." - ) - - return df diff --git a/mindsdb/integrations/utilities/datasets/question_answering/fda_style_qa.csv b/mindsdb/integrations/utilities/datasets/question_answering/fda_style_qa.csv deleted file mode 100644 index 74e8fe2508a..00000000000 --- a/mindsdb/integrations/utilities/datasets/question_answering/fda_style_qa.csv +++ /dev/null @@ -1,21 +0,0 @@ -question,context,answers -what is the treatment for cold,"For adults and children age 5 and older, OTC decongestants, antihistamines and pain relievers might offer some symptom relief. However, they won't prevent a cold or shorten its duration, and most have some side effects.","{'text': [""Try OTC cold and cough medications.""]}" -what is the best treatment for type 2 diabetes,"Metformin (Fortamet, Glumetza, others) is generally the first medicine prescribed for type 2 diabetes. It works mainly by lowering glucose production in the liver and improving the body's sensitivity to insulin so it uses insulin more effectively. Some people experience B-12 deficiency and may need to take supplements.","{'text': [""Metaformin""]}" -What is the best treatment for high cholesterol?,"Statins are one of the best-studied classes of medications and the most commonly used drugs for lowering LDL cholesterol. They are the most effective drugs for prevention of coronary heart disease, heart attack, stroke, and death","{'text': [""statins""]}" -what is the best treatment for ashma?,"There's currently no cure for asthma, but treatment can help control the symptoms so you're able to live a normal, active life. Inhalers, which are devices that let you breathe in medicine, are the main treatment. Tablets and other treatments may also be needed if your asthma is severe.","{'text': [""inhalers""]}" -what is the best treatment for acid reflux,Treatment of Gastroesophageal Reflux Disease GERD adults and pediatric patients Symptomatic GERD Omeprazole delayedrelease capsules USP are indicated for the treatment of heartburn and other symptoms associated with GERD in pediatric patients and adults Erosive Esophagitis Omeprazole delayedrelease capsules USP are indicated for the shortterm treatment 4 to 8 weeks of erosive esophagitis that has been,"{'text': [""omeprazole""]}" -What is the best treatment for hepatitis C infections?,Hepatitis C is treated using direct-acting antiviral (DAA) tablets. DAA tablets are the safest and most effective medicines for treating hepatitis C. They're highly effective at clearing the infection in more than 90% of people.,"{'text': [""direct-acting antiviral (DAA) tablets""]}" -What is the best treatment for rheumatoid arthritis pain?,There is no cure for rheumatoid arthritis. But clinical studies indicate that remission of symptoms is more likely when treatment begins early with medications known as disease-modifying antirheumatic drugs (DMARDs).,"{'text': [""disease-modifying antirheumatic drugs (DMARDs)""]}" -What is the best treatment for ADHD symptoms?,Stimulants are typically the first medication used for ADHD in both children and adults. Stimulants have been shown in multiple studies to be more effective than other medications in the treatment of ADHD.,"{'text': [""stimulants""]}" -What is the best treatment for hypothyroidism?,"An underactive thyroid (hypothyroidism) is usually treated by taking daily hormone replacement tablets called levothyroxine. Levothyroxine replaces the thyroxine hormone, which your thyroid does not make enough of. You'll initially have regular blood tests until the correct dose of levothyroxine is reached.","{'text': [""Levothyroxine""]}" -What is the best treatment for COPD symptoms?,"Several kinds of medications are used to treat the symptoms and complications of COPD. You may take some medications on a regular basis and others as needed. Bronchodilators are medications that usually come in inhalers — they relax the muscles around your airways. This can help relieve coughing and shortness of breath and make breathing easier. Depending on the severity of your disease, you may need a short-acting bronchodilator before activities, a long-acting bronchodilator that you use every day or both.","{'text': [""bronchodilator""]}" -What is the best treatment for type 1 diabetes?,Anyone who has type 1 diabetes needs insulin therapy throughout their life,"{'text': [""insulin therapy""]}" -What is the best treatment for Crohn's disease?,"For some people, a combination of these drugs works better than one drug alone. Immune system suppressors include: Azathioprine (Azasan, Imuran) and mercaptopurine (Purinethol, Purixan). These are the most widely used immunosuppressants for treatment of inflammatory bowel disease.","{'text': ["" Immune system suppressors include: Azathioprine (Azasan, Imuran) and mercaptopurine (Purinethol, Purixan)""]}" -What is the best treatment for epilepsy seizures?,"Most people with epilepsy can become seizure-free by taking one anti-seizure medicine, which is also called anti-epileptic medicine. Others may be able to decrease the frequency and intensity of their seizures by taking a combination of medicines.","{'text': [""anti-seizure medicine. Also called anti-epileptic medicine""]}" -What is the best treatment for skin cancer?,"Surgery is the primary treatment for most skin cancers. For patients with basal cell or squamous cell carcinomas, a dermatologist or other qualified doctor may perform an outpatient procedure using a local anesthetic.","{'text': [""surgery""]}" -What is the best treatment for ulcerative colitis?,"Ulcerative colitis treatment usually involves either medication therapy or surgery. Several categories of medications may be effective in treating ulcerative colitis. The type you take will depend on the severity of your condition. The medications that work well for some people may not work for others. It may take time to find a medication that helps you. In addition, because some medications have serious side effects, you'll need to weigh the benefits and risks of any treatment.","{'text': [""medication or surgery""]}" -What is the best treatment for osteoporosis?,Bisphosphonates are usually the first choice for osteoporosis treatment,"{'text': [""Bisphosphonates ""]}" -what is the best treatment for MS?,"For primary-progressive MS , ocrelizumab (Ocrevus) is the only FDA-approved disease-modifying therapy (DMT). Those who receive this treatment are slightly less likely to progress than those who are untreated","{'text': [""ocrelizumab (Ocrevus)""]}" -what is the best treatment for osteoarthritis?,Exercise and weight loss are the best ways to beat osteoarthritis (OA) pain.,"{'text': [""Exercise and weight loss ""]}" -what is the best treatment for flu?,"To treat flu, oseltamivir or inhaled zanamivir are usually prescribed for five days","{'text': [""oseltamivir or inhaled zanamivir ""]}" -what is the best treatment for DVT?,Treatment for DVT usually involves taking anticoagulant medicines. These reduce the blood's ability to clot and stop existing clots getting bigger. Heparin and warfarin are 2 types of anticoagulant often used to treat DVT. Heparin is usually prescribed first because it works immediately to prevent further clotting.,"{'text': ["" anticoagulant drugs. Heparin and warfarin are two examples""]}" diff --git a/mindsdb/integrations/utilities/datasets/question_answering/squad_v2_val_100_sample.csv b/mindsdb/integrations/utilities/datasets/question_answering/squad_v2_val_100_sample.csv deleted file mode 100644 index 928a7c7166e..00000000000 --- a/mindsdb/integrations/utilities/datasets/question_answering/squad_v2_val_100_sample.csv +++ /dev/null @@ -1,106 +0,0 @@ -id,title,context,question,answers -571156152419e3140095559d,Steam_engine,"The historical measure of a steam engine's energy efficiency was its ""duty"". The concept of duty was first introduced by Watt in order to illustrate how much more efficient his engines were over the earlier Newcomen designs. Duty is the number of foot-pounds of work delivered by burning one bushel (94 pounds) of coal. The best examples of Newcomen designs had a duty of about 7 million, but most were closer to 5 million. Watt's original low-pressure designs were able to deliver duty as high as 25 million, but averaged about 17. This was a three-fold improvement over the average Newcomen design. Early Watt engines equipped with high-pressure steam improved this to 65 million.",What is the weight of a bushel of coal in pounds?,"{'text': ['94', '94 pounds', '94 pounds'], 'answer_start': [300, 300, 300]}" -57339eb9d058e614000b5ef6,Warsaw,"As interesting examples of expositions the most notable are: the world's first Museum of Posters boasting one of the largest collections of art posters in the world, Museum of Hunting and Riding and the Railway Museum. From among Warsaw's 60 museums, the most prestigious ones are National Museum with a collection of works whose origin ranges in time from antiquity till the present epoch as well as one of the best collections of paintings in the country including some paintings from Adolf Hitler's private collection, and Museum of the Polish Army whose set portrays the history of arms.",What does the world's first Museum of Posters have one of the largest collections of in the world?,"{'text': ['art posters', 'art posters', 'art posters'], 'answer_start': [140, 140, 140]}" -5728855d3acd2414000dfa90,Yuan_dynasty,"When the Mongols placed the Uighurs of the Kingdom of Qocho over the Koreans at the court the Korean King objected, then the Mongol Emperor Kublai Khan rebuked the Korean King, saying that the Uighur King of Qocho was ranked higher than the Karluk Kara-Khanid ruler, who in turn was ranked higher than the Korean King, who was ranked last, because the Uighurs surrendered to the Mongols first, the Karluks surrendered after the Uighurs, and the Koreans surrendered last, and that the Uighurs surrendered peacefully without violently resisting.",Why were the Uighurs ranked higher by the Mongols?,"{'text': ['the Uighurs surrendered peacefully without violently resisting', 'Uighurs surrendered peacefully without violently resisting', 'surrendered peacefully without violently resisting'], 'answer_start': [480, 484, 492]}" -57269aa65951b619008f77ab,European_Union_law,"While constitutional law concerns the European Union's governance structure, administrative law binds EU institutions and member states to follow the law. Both member states and the Commission have a general legal right or ""standing"" (locus standi) to bring claims against EU institutions and other member states for breach of the treaties. From the EU's foundation, the Court of Justice also held that the Treaties allowed citizens or corporations to bring claims against EU and member state institutions for violation of the Treaties and Regulations, if they were properly interpreted as creating rights and obligations. However, under Directives, citizens or corporations were said in 1986 to not be allowed to bring claims against other non-state parties. This meant courts of member states were not bound to apply an EU law where a national rule conflicted, even though the member state government could be sued, if it would impose an obligation on another citizen or corporation. These rules on ""direct effect"" limit the extent to which member state courts are bound to administer EU law. All actions by EU institutions can be subject to judicial review, and judged by standards of proportionality, particularly where general principles of law, or fundamental rights are engaged. The remedy for a claimant where there has been a breach of the law is often monetary damages, but courts can also require specific performance or will grant an injunction, in order to ensure the law is effective as possible.",Which type of law makes EU institutions and its member states follow the law?,"{'text': ['administrative law', 'administrative law', 'administrative law'], 'answer_start': [77, 77, 77]}" -5705f7c875f01819005e77df,Southern_California,"Southern California consists of a heavily developed urban environment, home to some of the largest urban areas in the state, along with vast areas that have been left undeveloped. It is the third most populated megalopolis in the United States, after the Great Lakes Megalopolis and the Northeastern megalopolis. Much of southern California is famous for its large, spread-out, suburban communities and use of automobiles and highways. The dominant areas are Los Angeles, Orange County, San Diego, and Riverside-San Bernardino, each of which is the center of its respective metropolitan area, composed of numerous smaller cities and communities. The urban area is also host to an international metropolitan region in the form of San Diego–Tijuana, created by the urban area spilling over into Baja California.","Outside of its use of automobiles, what else is southern California famous for using?","{'text': ['highways', 'highways'], 'answer_start': [426, 426]}" -56e1f10ee3433e1400423225,Computational_complexity_theory,"Similarly, it is not known if L (the set of all problems that can be solved in logarithmic space) is strictly contained in P or equal to P. Again, there are many complexity classes between the two, such as NL and NC, and it is not known if they are distinct or equal classes.",What are two complexity classes between L and P?,"{'text': ['NL and NC', 'NL and NC', 'NL and NC'], 'answer_start': [206, 206, 206]}" -572726c9708984140094da7d,Civil_disobedience,"It has been argued that the term ""civil disobedience"" has always suffered from ambiguity and in modern times, become utterly debased. Marshall Cohen notes, ""It has been used to describe everything from bringing a test-case in the federal courts to taking aim at a federal official. Indeed, for Vice President Agnew it has become a code-word describing the activities of muggers, arsonists, draft evaders, campaign hecklers, campus militants, anti-war demonstrators, juvenile delinquents and political assassins.""",More in the present prevalence of civil disobedience has turned and said to be?,"{'text': ['utterly debased', 'debased', 'everything from bringing a test-case in the federal courts to taking aim at a federal official', 'utterly debased'], 'answer_start': [117, 125, 186, 117]}" -5730a0778ab72b1400f9c60e,Imperialism,"The concept environmental determinism served as a moral justification for domination of certain territories and peoples. It was believed that a certain person's behaviours were determined by the environment in which they lived and thus validated their domination. For example, people living in tropical environments were seen as ""less civilized"" therefore justifying colonial control as a civilizing mission. Across the three waves of European colonialism (first in the Americas, second in Asia and lastly in Africa), environmental determinism was used to categorically place indigenous people in a racial hierarchy. This takes two forms, orientalism and tropicality.",What were the two forms of environmental determinism?,"{'text': ['orientalism and tropicality', 'orientalism and tropicality', 'orientalism and tropicality', 'orientalism and tropicality', 'orientalism and tropicality.'], 'answer_start': [639, 639, 639, 639, 639]}" -57293f8a6aef051400154bde,Intergovernmental_Panel_on_Climate_Change,"In addition to climate assessment reports, the IPCC is publishing Special Reports on specific topics. The preparation and approval process for all IPCC Special Reports follows the same procedures as for IPCC Assessment Reports. In the year 2011 two IPCC Special Report were finalized, the Special Report on Renewable Energy Sources and Climate Change Mitigation (SRREN) and the Special Report on Managing Risks of Extreme Events and Disasters to Advance Climate Change Adaptation (SREX). Both Special Reports were requested by governments.",How does the IPCC prepare Special Reports?,"{'text': ['the same procedures as for IPCC Assessment Reports', 'follows the same procedures as for IPCC Assessment Reports', 'the same procedures as for IPCC Assessment Reports'], 'answer_start': [176, 168, 176]}" -56e1bc3ae3433e1400423104,Computational_complexity_theory,"To classify the computation time (or similar resources, such as space consumption), one is interested in proving upper and lower bounds on the minimum amount of time required by the most efficient algorithm solving a given problem. The complexity of an algorithm is usually taken to be its worst-case complexity, unless specified otherwise. Analyzing a particular algorithm falls under the field of analysis of algorithms. To show an upper bound T(n) on the time complexity of a problem, one needs to show only that there is a particular algorithm with running time at most T(n). However, proving lower bounds is much more difficult, since lower bounds make a statement about all possible algorithms that solve a given problem. The phrase ""all possible algorithms"" includes not just the algorithms known today, but any algorithm that might be discovered in the future. To show a lower bound of T(n) for a problem requires showing that no algorithm can have time complexity lower than T(n).",Classification of resources is contingent on determining the upper and lower bounds of minimum time required by what? ,"{'text': ['the most efficient algorithm', 'the most efficient algorithm', 'the most efficient algorithm solving a given problem'], 'answer_start': [178, 178, 178]}" -5728202c4b864d19001644f0,Civil_disobedience,"Non-revolutionary civil disobedience is a simple disobedience of laws on the grounds that they are judged ""wrong"" by an individual conscience, or as part of an effort to render certain laws ineffective, to cause their repeal, or to exert pressure to get one's political wishes on some other issue. Revolutionary civil disobedience is more of an active attempt to overthrow a government (or to change cultural traditions, social customs, religious beliefs, etc...revolution doesn't have to be political, i.e. ""cultural revolution"", it simply implies sweeping and widespread change to a section of the social fabric). Gandhi's acts have been described as revolutionary civil disobedience. It has been claimed that the Hungarians under Ferenc Deák directed revolutionary civil disobedience against the Austrian government. Thoreau also wrote of civil disobedience accomplishing ""peaceable revolution."" Howard Zinn, Harvey Wheeler, and others have identified the right espoused in The Declaration of Independence to ""alter or abolish"" an unjust government to be a principle of civil disobedience. ",What other topics can Civil disobedience pertain to?,"{'text': ['cultural traditions, social customs, religious beliefs', 'revolutionary civil disobedience', 'change cultural traditions, social customs, religious beliefs, etc', 'cultural traditions, social customs, religious beliefs', 'peaceable revolution'], 'answer_start': [400, 653, 393, 400, 876]}" -57263eaa38643c19005ad375,Ctenophora,"Ctenophora (/tᵻˈnɒfərə/; singular ctenophore, /ˈtɛnəfɔːr/ or /ˈtiːnəfɔːr/; from the Greek κτείς kteis 'comb' and φέρω pherō 'carry'; commonly known as comb jellies) is a phylum of animals that live in marine waters worldwide. Their most distinctive feature is the ‘combs’ – groups of cilia which they use for swimming – they are the largest animals that swim by means of cilia. Adults of various species range from a few millimeters to 1.5 m (4 ft 11 in) in size. Like cnidarians, their bodies consist of a mass of jelly, with one layer of cells on the outside and another lining the internal cavity. In ctenophores, these layers are two cells deep, while those in cnidarians are only one cell deep. Some authors combined ctenophores and cnidarians in one phylum, Coelenterata, as both groups rely on water flow through the body cavity for both digestion and respiration. Increasing awareness of the differences persuaded more recent authors to classify them as separate phyla.",Where do ctenophora live?,"{'text': ['marine waters', 'marine waters worldwide', 'marine waters'], 'answer_start': [201, 201, 201]}" -5728e3c33acd2414000e0132,Civil_disobedience,"The earliest recorded incidents of collective civil disobedience took place during the Roman Empire[citation needed]. Unarmed Jews gathered in the streets to prevent the installation of pagan images in the Temple in Jerusalem.[citation needed][original research?] In modern times, some activists who commit civil disobedience as a group collectively refuse to sign bail until certain demands are met, such as favorable bail conditions, or the release of all the activists. This is a form of jail solidarity.[page needed] There have also been many instances of solitary civil disobedience, such as that committed by Thoreau, but these sometimes go unnoticed. Thoreau, at the time of his arrest, was not yet a well-known author, and his arrest was not covered in any newspapers in the days, weeks and months after it happened. The tax collector who arrested him rose to higher political office, and Thoreau's essay was not published until after the end of the Mexican War.",What was the goal of this Roman disobedience?,"{'text': ['prevent the installation of pagan images', 'prevent the installation of pagan images in the Temple in Jerusalem', 'prevent the installation of pagan images in the Temple in Jerusalem', 'to prevent the installation of pagan images in the Temple in Jerusalem', 'prevent the installation of pagan images'], 'answer_start': [158, 158, 158, 155, 158]}" -572ffabf04bcaa1900d76f9f,Islamism,"Islamists have asked the question, ""If Islam is a way of life, how can we say that those who want to live by its principles in legal, social, political, economic, and political spheres of life are not Muslims, but Islamists and believe in Islamism, not [just] Islam?"" Similarly, a writer for the International Crisis Group maintains that ""the conception of 'political Islam'"" is a creation of Americans to explain the Iranian Islamic Revolution and apolitical Islam was a historical fluke of the ""short-lived era of the heyday of secular Arab nationalism between 1945 and 1970"", and it is quietist/non-political Islam, not Islamism, that requires explanation.",What term do Islamists think should be applied to them?,"{'text': ['Muslims', 'Muslims'], 'answer_start': [201, 201]}" -5729a3716aef05140015506c,Prime_number,"Prime numbers have influenced many artists and writers. The French composer Olivier Messiaen used prime numbers to create ametrical music through ""natural phenomena"". In works such as La Nativité du Seigneur (1935) and Quatre études de rythme (1949–50), he simultaneously employs motifs with lengths given by different prime numbers to create unpredictable rhythms: the primes 41, 43, 47 and 53 appear in the third étude, ""Neumes rythmiques"". According to Messiaen this way of composing was ""inspired by the movements of nature, movements of free and unequal durations"".",What is another piece created by Olivier Messiaen?,"{'text': ['Quatre études de rythme', 'Quatre études de rythme', 'Quatre études de rythme', 'Quatre études de rythme'], 'answer_start': [219, 219, 219, 219]}" -57377ec7c3c5551400e51f09,Force,"In modern particle physics, forces and the acceleration of particles are explained as a mathematical by-product of exchange of momentum-carrying gauge bosons. With the development of quantum field theory and general relativity, it was realized that force is a redundant concept arising from conservation of momentum (4-momentum in relativity and momentum of virtual particles in quantum electrodynamics). The conservation of momentum can be directly derived from the homogeneity or symmetry of space and so is usually considered more fundamental than the concept of a force. Thus the currently known fundamental forces are considered more accurately to be ""fundamental interactions"".:199–128 When particle A emits (creates) or absorbs (annihilates) virtual particle B, a momentum conservation results in recoil of particle A making impression of repulsion or attraction between particles A A' exchanging by B. This description applies to all forces arising from fundamental interactions. While sophisticated mathematical descriptions are needed to predict, in full detail, the accurate result of such interactions, there is a conceptually simple way to describe such interactions through the use of Feynman diagrams. In a Feynman diagram, each matter particle is represented as a straight line (see world line) traveling through time, which normally increases up or to the right in the diagram. Matter and anti-matter particles are identical except for their direction of propagation through the Feynman diagram. World lines of particles intersect at interaction vertices, and the Feynman diagram represents any force arising from an interaction as occurring at the vertex with an associated instantaneous change in the direction of the particle world lines. Gauge bosons are emitted away from the vertex as wavy lines and, in the case of virtual particle exchange, are absorbed at an adjacent vertex.",Matter particles are shown as what kind of lines in a Feynman diagram?,"{'text': ['straight', 'straight line', 'straight', 'straight'], 'answer_start': [1280, 1280, 1280, 1280]}" -57265700dd62a815002e820e,Black_Death,"In the first half of the 17th century, a plague claimed some 1.7 million victims in Italy, or about 14% of the population. In 1656, the plague killed about half of Naples' 300,000 inhabitants. More than 1.25 million deaths resulted from the extreme incidence of plague in 17th-century Spain. The plague of 1649 probably reduced the population of Seville by half. In 1709–13, a plague epidemic that followed the Great Northern War (1700–21, Sweden v. Russia and allies) killed about 100,000 in Sweden, and 300,000 in Prussia. The plague killed two-thirds of the inhabitants of Helsinki, and claimed a third of Stockholm's population. Europe's last major epidemic occurred in 1720 in Marseille.",How many were killed by plague in Italy in the 17th century?,"{'text': ['some 1.7 million victims', '1.7 million', '1.7 million'], 'answer_start': [56, 61, 61]}" -572754dd708984140094dc3b,Private_school,"In Sweden, pupils are free to choose a private school and the private school gets paid the same amount as municipal schools. Over 10% of Swedish pupils were enrolled in private schools in 2008. Sweden is internationally known for this innovative school voucher model that provides Swedish pupils with the opportunity to choose the school they prefer. For instance, the biggest school chain, Kunskapsskolan (“The Knowledge School”), offers 30 schools and a web-based environment, has 700 employees and teaches nearly 10,000 pupils. The Swedish system has been recommended to Barack Obama.","As of 2008, about what percentage of Swedish students attended private schools?","{'text': ['10', '10%', '10'], 'answer_start': [130, 130, 130]}" -5725e748ec44d21400f3d735,"Fresno,_California","The area is also known for its early twentieth century homes, many of which have been restored in recent decades. The area includes many California Bungalow and American Craftsman style homes, Spanish Colonial Revival Style architecture, Mediterranean Revival Style architecture, Mission Revival Style architecture, and many Storybook houses designed by Fresno architects, Hilliard, Taylor & Wheeler. The residential architecture of the Tower District contrasts with the newer areas of tract homes urban sprawl in north and east areas of Fresno.",Does the residential architecture of the Tower District compare or contrast with other part of Fresno?,"{'text': ['contrasts', 'contrasts', 'contrasts'], 'answer_start': [452, 452, 452]}" -5730b8ca8ab72b1400f9c705,Imperialism,"One key figure in the plans for what would come to be known as American Empire, was a geographer named Isiah Bowman. Bowman was the director of the American Geographical Society in 1914. Three years later in 1917, he was appointed to then President Woodrow Wilson's inquiry in 1917. The inquiry was the idea of President Wilson and the American delegation from the Paris Peace Conference. The point of this inquiry was to build a premise that would allow for U.S authorship of a 'new world' which was to be characterized by geographical order. As a result of his role in the inquiry, Isiah Bowman would come to be known as Wilson's geographer. ",When was Isiah Bowman appointed to President Wilson's Inquiry?,"{'text': ['1917', '1917', '1917', '1917', '1917'], 'answer_start': [208, 277, 208, 208, 208]}" -572fcc43b2c2fd1400568480,Scottish_Parliament,"Reserved matters are subjects that are outside the legislative competence of the Scotland Parliament. The Scottish Parliament is unable to legislate on such issues that are reserved to, and dealt with at, Westminster (and where Ministerial functions usually lie with UK Government ministers). These include abortion, broadcasting policy, civil service, common markets for UK goods and services, constitution, electricity, coal, oil, gas, nuclear energy, defence and national security, drug policy, employment, foreign policy and relations with Europe, most aspects of transport safety and regulation, National Lottery, protection of borders, social security and stability of UK's fiscal, economic and monetary system.",Most aspects of transport safety is a subject dealt with by whom?,"{'text': ['UK Government ministers', 'UK Government ministers', 'Westminster'], 'answer_start': [267, 267, 205]}" -572fc043a23a5019007fc960,Scottish_Parliament,"The first item of business on Wednesdays is usually Time for Reflection, at which a speaker addresses members for up to four minutes, sharing a perspective on issues of faith. This contrasts with the formal style of ""Prayers"", which is the first item of business in meetings of the House of Commons. Speakers are drawn from across Scotland and are chosen to represent the balance of religious beliefs according to the Scottish census. Invitations to address Parliament in this manner are determined by the Presiding Officer on the advice of the parliamentary bureau. Faith groups can make direct representations to the Presiding Officer to nominate speakers.",Who decides who gets to address the members of Parliament to share their thoughts on issues of faith?,"{'text': ['Presiding Officer', 'Presiding Officer', 'the Presiding Officer'], 'answer_start': [506, 619, 502]}" -57286ead2ca10214002da347,Yuan_dynasty,"Following the conquest of Dali in 1253, the former ruling Duan dynasty were appointed as governors-general, recognized as imperial officials by the Yuan, Ming, and Qing-era governments, principally in the province of Yunnan. Succession for the Yuan dynasty, however, was an intractable problem, later causing much strife and internal struggle. This emerged as early as the end of Kublai's reign. Kublai originally named his eldest son, Zhenjin, as the Crown Prince, but he died before Kublai in 1285. Thus, Zhenjin's third son, with the support of his mother Kökejin and the minister Bayan, succeeded the throne and ruled as Temür Khan, or Emperor Chengzong, from 1294 to 1307. Temür Khan decided to maintain and continue much of the work begun by his grandfather. He also made peace with the western Mongol khanates as well as neighboring countries such as Vietnam, which recognized his nominal suzerainty and paid tributes for a few decades. However, the corruption in the Yuan dynasty began during the reign of Temür Khan.",Who had Kublai wanted to succeed him?,"{'text': ['his eldest son, Zhenjin', 'Zhenjin', 'Zhenjin'], 'answer_start': [420, 436, 436]}" -5737898f1c45671900574498,Force,"It was only the orbit of the planet Mercury that Newton's Law of Gravitation seemed not to fully explain. Some astrophysicists predicted the existence of another planet (Vulcan) that would explain the discrepancies; however, despite some early indications, no such planet could be found. When Albert Einstein formulated his theory of general relativity (GR) he turned his attention to the problem of Mercury's orbit and found that his theory added a correction, which could account for the discrepancy. This was the first time that Newton's Theory of Gravity had been shown to be less correct than an alternative.",Who came up with the theory of relativity?,"{'text': ['Albert Einstein', 'Albert Einstein', 'Albert Einstein', 'Albert Einstein'], 'answer_start': [293, 293, 293, 293]}" -56de3ebc4396321400ee26e7,Normans,"In 1096, Crusaders passing by the siege of Amalfi were joined by Bohemond of Taranto and his nephew Tancred with an army of Italo-Normans. Bohemond was the de facto leader of the Crusade during its passage through Asia Minor. After the successful Siege of Antioch in 1097, Bohemond began carving out an independent principality around that city. Tancred was instrumental in the conquest of Jerusalem and he worked for the expansion of the Crusader kingdom in Transjordan and the region of Galilee.[citation needed]",What was the name of Bohemond's nephew?,"{'text': ['Tancred', 'Tancred', 'Tancred'], 'answer_start': [100, 100, 100]}" -5729d36b1d04691400779607,Economic_inequality,"A study by the World Institute for Development Economics Research at United Nations University reports that the richest 1% of adults alone owned 40% of global assets in the year 2000. The three richest people in the world possess more financial assets than the lowest 48 nations combined. The combined wealth of the ""10 million dollar millionaires"" grew to nearly $41 trillion in 2008. A January 2014 report by Oxfam claims that the 85 wealthiest individuals in the world have a combined wealth equal to that of the bottom 50% of the world's population, or about 3.5 billion people. According to a Los Angeles Times analysis of the report, the wealthiest 1% owns 46% of the world's wealth; the 85 richest people, a small part of the wealthiest 1%, own about 0.7% of the human population's wealth, which is the same as the bottom half of the population. More recently, in January 2015, Oxfam reported that the wealthiest 1 percent will own more than half of the global wealth by 2016. An October 2014 study by Credit Suisse also claims that the top 1% now own nearly half of the world's wealth and that the accelerating disparity could trigger a recession. In October 2015, Credit Suisse published a study which shows global inequality continues to increase, and that half of the world's wealth is now in the hands of those in the top percentile, whose assets each exceed $759,900. A 2016 report by Oxfam claims that the 62 wealthiest individuals own as much wealth as the poorer half of the global population combined. Oxfam's claims have however been questioned on the basis of the methodology used: by using net wealth (adding up assets and subtracting debts), the Oxfam report, for instance, finds that there are more poor people in the United States and Western Europe than in China (due to a greater tendency to take on debts).[unreliable source?][unreliable source?] Anthony Shorrocks, the lead author of the Credit Suisse report which is one of the sources of Oxfam's data, considers the criticism about debt to be a ""silly argument"" and ""a non-issue . . . a diversion.""",What percent of the global assets in 2000 were owned by just 1% of adults?,"{'text': ['40%', '40%', '40%', '40'], 'answer_start': [145, 145, 145, 145]}" -57339eb9d058e614000b5ef8,Warsaw,"As interesting examples of expositions the most notable are: the world's first Museum of Posters boasting one of the largest collections of art posters in the world, Museum of Hunting and Riding and the Railway Museum. From among Warsaw's 60 museums, the most prestigious ones are National Museum with a collection of works whose origin ranges in time from antiquity till the present epoch as well as one of the best collections of paintings in the country including some paintings from Adolf Hitler's private collection, and Museum of the Polish Army whose set portrays the history of arms.",Warsaw's National Museum is one of the most what?,"{'text': ['prestigious', 'prestigious', 'prestigious'], 'answer_start': [260, 260, 260]}" -572643de5951261400b5195c,Packet_switching,There were two kinds of X.25 networks. Some such as DATAPAC and TRANSPAC were initially implemented with an X.25 external interface. Some older networks such as TELENET and TYMNET were modified to provide a X.25 host interface in addition to older host connection schemes. DATAPAC was developed by Bell Northern Research which was a joint venture of Bell Canada (a common carrier) and Northern Telecom (a telecommunications equipment supplier). Northern Telecom sold several DATAPAC clones to foreign PTTs including the Deutsche Bundespost. X.75 and X.121 allowed the interconnection of national X.25 networks. A user or host could call a host on a foreign network by including the DNIC of the remote network as part of the destination address.[citation needed],WHat did foreign clones of DATAPAC allow for ,"{'text': ['A user or host could call a host on a foreign network by including the DNIC of the remote network as part of the destination address', 'the interconnection of national X.25 networks', 'interconnection of national X.25 networks'], 'answer_start': [611, 564, 568]}" -5726449f1125e71900ae1928,Ctenophora,"Despite their soft, gelatinous bodies, fossils thought to represent ctenophores, apparently with no tentacles but many more comb-rows than modern forms, have been found in lagerstätten as far back as the early Cambrian, about 515 million years ago. The position of the ctenophores in the evolutionary family tree of animals has long been debated, and the majority view at present, based on molecular phylogenetics, is that cnidarians and bilaterians are more closely related to each other than either is to ctenophores. A recent molecular phylogenetics analysis concluded that the common ancestor of all modern ctenophores was cydippid-like, and that all the modern groups appeared relatively recently, probably after the Cretaceous–Paleogene extinction event 66 million years ago. Evidence accumulating since the 1980s indicates that the ""cydippids"" are not monophyletic, in other words do not include all and only the descendants of a single common ancestor, because all the other traditional ctenophore groups are descendants of various cydippids.",Fossils found that were believed to be ctenophores were how old?,"{'text': ['515 million years', '66 million years', '515 million years'], 'answer_start': [226, 760, 226]}" -57264cc6dd62a815002e80e6,Black_Death,"The historian Francis Aidan Gasquet wrote about the 'Great Pestilence' in 1893 and suggested that ""it would appear to be some form of the ordinary Eastern or bubonic plague"". He was able to adopt the epidemiology of the bubonic plague for the Black Death for the second edition in 1908, implicating rats and fleas in the process, and his interpretation was widely accepted for other ancient and medieval epidemics, such as the Justinian plague that was prevalent in the Eastern Roman Empire from 541 to 700 CE.",When did the second edition of Gasquet's book come out?,"{'text': ['1908', '1908', '1908'], 'answer_start': [281, 281, 281]}" -57263b1638643c19005ad335,Packet_switching,"Both X.25 and Frame Relay provide connection-oriented operations. But X.25 does it at the network layer of the OSI Model. Frame Relay does it at level two, the data link layer. Another major difference between X.25 and Frame Relay is that X.25 requires a handshake between the communicating parties before any user packets are transmitted. Frame Relay does not define any such handshakes. X.25 does not define any operations inside the packet network. It only operates at the user-network-interface (UNI). Thus, the network provider is free to use any procedure it wishes inside the network. X.25 does specify some limited re-transmission procedures at the UNI, and its link layer protocol (LAPB) provides conventional HDLC-type link management procedures. Frame Relay is a modified version of ISDN's layer two protocol, LAPD and LAPB. As such, its integrity operations pertain only between nodes on a link, not end-to-end. Any retransmissions must be carried out by higher layer protocols. The X.25 UNI protocol is part of the X.25 protocol suite, which consists of the lower three layers of the OSI Model. It was widely used at the UNI for packet switching networks during the 1980s and early 1990s, to provide a standardized interface into and out of packet networks. Some implementations used X.25 within the network as well, but its connection-oriented features made this setup cumbersome and inefficient. Frame relay operates principally at layer two of the OSI Model. However, its address field (the Data Link Connection ID, or DLCI) can be used at the OSI network layer, with a minimum set of procedures. Thus, it rids itself of many X.25 layer 3 encumbrances, but still has the DLCI as an ID beyond a node-to-node layer two link protocol. The simplicity of Frame Relay makes it faster and more efficient than X.25. Because Frame relay is a data link layer protocol, like X.25 it does not define internal network routing operations. For X.25 its packet IDs---the virtual circuit and virtual channel numbers have to be correlated to network addresses. The same is true for Frame Relays DLCI. How this is done is up to the network provider. Frame Relay, by virtue of having no network layer procedures is connection-oriented at layer two, by using the HDLC/LAPD/LAPB Set Asynchronous Balanced Mode (SABM). X.25 connections are typically established for each communication session, but it does have a feature allowing a limited amount of traffic to be passed across the UNI without the connection-oriented handshake. For a while, Frame Relay was used to interconnect LANs across wide area networks. However, X.25 and well as Frame Relay have been supplanted by the Internet Protocol (IP) at the network layer, and the Asynchronous Transfer Mode (ATM) and or versions of Multi-Protocol Label Switching (MPLS) at layer two. A typical configuration is to run IP over ATM or a version of MPLS. < Uyless Black, ATM, Volume I, Prentice Hall, 1995>",What supplanted Frame Relay and X.25 ,"{'text': ['supplanted by the Internet Protocol (IP) at the network layer, and the Asynchronous Transfer Mode (ATM) and or versions of Multi-Protocol Label Switching', 'Internet Protocol (IP)', 'Internet Protocol'], 'answer_start': [2652, 2670, 2670]}" -56de3f784396321400ee26fa,Normans,"In April 1191 Richard the Lion-hearted left Messina with a large fleet in order to reach Acre. But a storm dispersed the fleet. After some searching, it was discovered that the boat carrying his sister and his fiancée Berengaria was anchored on the south coast of Cyprus, together with the wrecks of several other ships, including the treasure ship. Survivors of the wrecks had been taken prisoner by the island's despot Isaac Komnenos. On 1 May 1191, Richard's fleet arrived in the port of Limassol on Cyprus. He ordered Isaac to release the prisoners and the treasure. Isaac refused, so Richard landed his troops and took Limassol.",What ruined Richard's plans to reach Acre?,"{'text': ['a storm', 'a storm', 'a storm'], 'answer_start': [99, 99, 99]}" -57111b95a58dae1900cd6c53,Huguenot,"Frederick William, Elector of Brandenburg, invited Huguenots to settle in his realms, and a number of their descendants rose to positions of prominence in Prussia. Several prominent German military, cultural, and political figures were ethnic Huguenot, including poet Theodor Fontane, General Hermann von François, the hero of the First World War Battle of Tannenberg, Luftwaffe General and fighter ace Adolf Galland, Luftwaffe flying ace Hans-Joachim Marseille, and famed U-boat captain Lothar von Arnauld de la Perière. The last Prime Minister of the (East) German Democratic Republic, Lothar de Maizière, is also a descendant of a Huguenot family, as is the German Federal Minister of the Interior, Thomas de Maizière.",Who was the final Prime Minister of East Germany?,"{'text': ['Lothar de Maizière', 'Lothar de Maizière', 'Lothar de Maizière'], 'answer_start': [588, 588, 588]}" -57096f37200fba1400367fe7,Sky_(United_Kingdom),"BSkyB has no veto over the presence of channels on their EPG, with open access being an enforced part of their operating licence from Ofcom. Any channel which can get carriage on a suitable beam of a satellite at 28° East is entitled to access to BSkyB's EPG for a fee, ranging from £15–100,000. Third-party channels which opt for encryption receive discounts ranging from reduced price to free EPG entries, free carriage on a BSkyB leased transponder, or actual payment for being carried. However, even in this case, BSkyB does not carry any control over the channel's content or carriage issues such as picture quality.",Can BSkyB veto the presence of channels on their EPG?,"{'text': ['no', 'no', 'Third-party channels'], 'answer_start': [10, 10, 296]}" -5727ee372ca10214002d99f0,Economic_inequality,"On the other hand, higher economic inequality tends to increase entrepreneurship rates at the individual level (self-employment). However, most of it is often based on necessity rather than opportunity. Necessity-based entrepreneurship is motivated by survival needs such as income for food and shelter (""push"" motivations), whereas opportunity-based entrepreneurship is driven by achievement-oriented motivations (""pull"") such as vocation and more likely to involve the pursue of new products, services, or underserved market needs. The economic impact of the former type of entrepreneurialism tends to be redistributive while the latter is expected to foster technological progress and thus have a more positive impact on economic growth.",What type of entrepreneurship leads to advancements in technology?,"{'text': ['opportunity-based entrepreneurship', 'opportunity-based entrepreneurship', 'opportunity-based'], 'answer_start': [333, 333, 333]}" -5725b76389a1e219009abd4d,1973_oil_crisis,"On August 15, 1971, the United States unilaterally pulled out of the Bretton Woods Accord. The US abandoned the Gold Exchange Standard whereby the value of the dollar had been pegged to the price of gold and all other currencies were pegged to the dollar, whose value was left to ""float"" (rise and fall according to market demand). Shortly thereafter, Britain followed, floating the pound sterling. The other industrialized nations followed suit with their respective currencies. Anticipating that currency values would fluctuate unpredictably for a time, the industrialized nations increased their reserves (by expanding their money supplies) in amounts far greater than before. The result was a depreciation of the dollar and other industrialized nations' currencies. Because oil was priced in dollars, oil producers' real income decreased. In September 1971, OPEC issued a joint communiqué stating that, from then on, they would price oil in terms of a fixed amount of gold.",When did oil start getting priced in the terms of gold?,"{'text': ['In September 1971', 'September 1971', 'September 1971', 'September 1971', 'September 1971'], 'answer_start': [843, 846, 846, 846, 846]}" -572991943f37b319004784a5,Prime_number,"A third type of conjectures concerns aspects of the distribution of primes. It is conjectured that there are infinitely many twin primes, pairs of primes with difference 2 (twin prime conjecture). Polignac's conjecture is a strengthening of that conjecture, it states that for every positive integer n, there are infinitely many pairs of consecutive primes that differ by 2n. It is conjectured there are infinitely many primes of the form n2 + 1. These conjectures are special cases of the broad Schinzel's hypothesis H. Brocard's conjecture says that there are always at least four primes between the squares of consecutive primes greater than 2. Legendre's conjecture states that there is a prime number between n2 and (n + 1)2 for every positive integer n. It is implied by the stronger Cramér's conjecture.",What conjecture holds that there are always a minimum of 4 primes between the squares of consecutive primes greater than 2?,"{'text': [""Brocard's conjecture"", ""Brocard's"", ""Brocard's conjecture"", ""Brocard's""], 'answer_start': [521, 521, 521, 521]}" -5725cbb289a1e219009abed4,Amazon_rainforest,"The first European to travel the length of the Amazon River was Francisco de Orellana in 1542. The BBC's Unnatural Histories presents evidence that Orellana, rather than exaggerating his claims as previously thought, was correct in his observations that a complex civilization was flourishing along the Amazon in the 1540s. It is believed that the civilization was later devastated by the spread of diseases from Europe, such as smallpox. Since the 1970s, numerous geoglyphs have been discovered on deforested land dating between AD 0–1250, furthering claims about Pre-Columbian civilizations. Ondemar Dias is accredited with first discovering the geoglyphs in 1977 and Alceu Ranzi with furthering their discovery after flying over Acre. The BBC's Unnatural Histories presented evidence that the Amazon rainforest, rather than being a pristine wilderness, has been shaped by man for at least 11,000 years through practices such as forest gardening and terra preta.",What was believed to be the cause of devastation to the civilization?,"{'text': ['diseases from Europe', 'the spread of diseases from Europe', 'spread of diseases from Europe'], 'answer_start': [399, 385, 389]}" -572673f5708984140094c69b,Geology,"The addition of new rock units, both depositionally and intrusively, often occurs during deformation. Faulting and other deformational processes result in the creation of topographic gradients, causing material on the rock unit that is increasing in elevation to be eroded by hillslopes and channels. These sediments are deposited on the rock unit that is going down. Continual motion along the fault maintains the topographic gradient in spite of the movement of sediment, and continues to create accommodation space for the material to deposit. Deformational events are often also associated with volcanism and igneous activity. Volcanic ashes and lavas accumulate on the surface, and igneous intrusions enter from below. Dikes, long, planar igneous intrusions, enter along cracks, and therefore often form in large numbers in areas that are being actively deformed. This can result in the emplacement of dike swarms, such as those that are observable across the Canadian shield, or rings of dikes around the lava tube of a volcano.","What is another word for long, planar igneous intrusions?","{'text': ['Dikes', 'Dikes', 'Dikes'], 'answer_start': [724, 724, 724]}" -5727526cdd62a815002e9b0e,Construction,"There is also a growing number of new forms of procurement that involve relationship contracting where the emphasis is on a co-operative relationship between the principal and contractor and other stakeholders within a construction project. New forms include partnering such as Public-Private Partnering (PPPs) aka private finance initiatives (PFIs) and alliances such as ""pure"" or ""project"" alliances and ""impure"" or ""strategic"" alliances. The focus on co-operation is to ameliorate the many problems that arise from the often highly competitive and adversarial practices within the construction industry.",A growing number of new forms of procurement involves what?,"{'text': ['relationship contracting where the emphasis is on a co-operative relationship', 'relationship contracting', 'relationship contracting'], 'answer_start': [72, 72, 72]}" -5726431d271a42140099d7f9,Ctenophora,"Ctenophores may be abundant during the summer months in some coastal locations, but in other places they are uncommon and difficult to find. In bays where they occur in very high numbers, predation by ctenophores may control the populations of small zooplanktonic organisms such as copepods, which might otherwise wipe out the phytoplankton (planktonic plants), which are a vital part of marine food chains. One ctenophore, Mnemiopsis, has accidentally been introduced into the Black Sea, where it is blamed for causing fish stocks to collapse by eating both fish larvae and organisms that would otherwise have fed the fish. The situation was aggravated by other factors, such as over-fishing and long-term environmental changes that promoted the growth of the Mnemiopsis population. The later accidental introduction of Beroe helped to mitigate the problem, as Beroe preys on other ctenophores.",What was done to counteract the overpopulation of mnemiopsis in The Black Sea?,"{'text': ['introduction of Beroe', 'accidental introduction of Beroe'], 'answer_start': [805, 794]}" -57280e1aff5b5019007d9bec,"Jacksonville,_Florida","The area of the modern city of Jacksonville has been inhabited for thousands of years. On Black Hammock Island in the national Timucuan Ecological and Historic Preserve, a University of North Florida team discovered some of the oldest remnants of pottery in the United States, dating to 2500 BC. In the 16th century, the beginning of the historical era, the region was inhabited by the Mocama, a coastal subgroup of the Timucua people. At the time of contact with Europeans, all Mocama villages in present-day Jacksonville were part of the powerful chiefdom known as the Saturiwa, centered around the mouth of the St. Johns River. One early map shows a village called Ossachite at the site of what is now downtown Jacksonville; this may be the earliest recorded name for that area.",The area where Jacksonville currently sits has been inhabited for how many years?,"{'text': ['thousands', 'thousands of years', 'thousands of years'], 'answer_start': [67, 67, 67]}" -5706074552bb8914006897d7,Southern_California,"Southern California consists of one Combined Statistical Area, eight Metropolitan Statistical Areas, one international metropolitan area, and multiple metropolitan divisions. The region is home to two extended metropolitan areas that exceed five million in population. These are the Greater Los Angeles Area at 17,786,419, and San Diego–Tijuana at 5,105,768. Of these metropolitan areas, the Los Angeles-Long Beach-Santa Ana metropolitan area, Riverside-San Bernardino-Ontario metropolitan area, and Oxnard-Thousand Oaks-Ventura metropolitan area form Greater Los Angeles; while the El Centro metropolitan area and San Diego-Carlsbad-San Marcos metropolitan area form the Southern Border Region. North of Greater Los Angeles are the Santa Barbara, San Luis Obispo, and Bakersfield metropolitan areas.",What does the El Centro metropolitan area and San Diego-Carslbad-San Marcos metropolitan area form?,"{'text': ['Southern Border Region', 'the Southern Border Region', 'Southern Border Region'], 'answer_start': [672, 668, 672]}" -57263c78ec44d21400f3dc7c,Packet_switching,"ARPANET and SITA HLN became operational in 1969. Before the introduction of X.25 in 1973, about twenty different network technologies had been developed. Two fundamental differences involved the division of functions and tasks between the hosts at the edge of the network and the network core. In the datagram system, the hosts have the responsibility to ensure orderly delivery of packets. The User Datagram Protocol (UDP) is an example of a datagram protocol. In the virtual call system, the network guarantees sequenced delivery of data to the host. This results in a simpler host interface with less functionality than in the datagram model. The X.25 protocol suite uses this network type.",2 differences betwen X.25 and ARPNET CITA technologies ,"{'text': ['Two fundamental differences involved the division of functions and tasks between the hosts at the edge of the network and the network core', 'the division of functions and tasks between the hosts at the edge of the network and the network core.', 'division of functions and tasks between the hosts at the edge of the network and the network core'], 'answer_start': [154, 191, 195]}" -5729efab3f37b319004785d1,Immune_system,"Dendritic cells (DC) are phagocytes in tissues that are in contact with the external environment; therefore, they are located mainly in the skin, nose, lungs, stomach, and intestines. They are named for their resemblance to neuronal dendrites, as both have many spine-like projections, but dendritic cells are in no way connected to the nervous system. Dendritic cells serve as a link between the bodily tissues and the innate and adaptive immune systems, as they present antigens to T cells, one of the key cell types of the adaptive immune system.",What are one of the key cell types of the adaptive immune system?,"{'text': ['T cells', 'T cells', 'T cells'], 'answer_start': [484, 484, 484]}" -57264c42dd62a815002e80c7,Black_Death,"The dominant explanation for the Black Death is the plague theory, which attributes the outbreak to Yersinia pestis, also responsible for an epidemic that began in southern China in 1865, eventually spreading to India. The investigation of the pathogen that caused the 19th-century plague was begun by teams of scientists who visited Hong Kong in 1894, among whom was the French-Swiss bacteriologist Alexandre Yersin, after whom the pathogen was named Yersinia pestis. The mechanism by which Y. pestis was usually transmitted was established in 1898 by Paul-Louis Simond and was found to involve the bites of fleas whose midguts had become obstructed by replicating Y. pestis several days after feeding on an infected host. This blockage results in starvation and aggressive feeding behaviour by the fleas, which repeatedly attempt to clear their blockage by regurgitation, resulting in thousands of plague bacteria being flushed into the feeding site, infecting the host. The bubonic plague mechanism was also dependent on two populations of rodents: one resistant to the disease, which act as hosts, keeping the disease endemic, and a second that lack resistance. When the second population dies, the fleas move on to other hosts, including people, thus creating a human epidemic.",Where and when did the investigation of the plague pathogen begin?,"{'text': ['Hong Kong in 1894', 'Hong Kong', 'Hong Kong in 1894'], 'answer_start': [334, 334, 334]}" -572820512ca10214002d9e74,"Jacksonville,_Florida","According to the United States Census Bureau, the city has a total area of 874.3 square miles (2,264 km2), making Jacksonville the largest city in land area in the contiguous United States; of this, 86.66% (757.7 sq mi or 1,962 km2) is land and ; 13.34% (116.7 sq mi or 302 km2) is water. Jacksonville surrounds the town of Baldwin. Nassau County lies to the north, Baker County lies to the west, and Clay and St. Johns County lie to the south; the Atlantic Ocean lies to the east, along with the Jacksonville Beaches. The St. Johns River divides the city. The Trout River, a major tributary of the St. Johns River, is located entirely within Jacksonville.",What is the name of the river that is completely contained inside Jacksonville?,"{'text': ['The Trout River', 'Trout River', 'The Trout River'], 'answer_start': [557, 561, 557]}" -5729f60caf94a219006aa6f2,Economic_inequality,"Effects of inequality researchers have found include higher rates of health and social problems, and lower rates of social goods, a lower level of economic utility in society from resources devoted on high-end consumption, and even a lower level of economic growth when human capital is neglected for high-end consumption. For the top 21 industrialised countries, counting each person equally, life expectancy is lower in more unequal countries (r = -.907). A similar relationship exists among US states (r = -.620).",What is a lower rate of social goods an effect of?,"{'text': ['inequality', 'inequality', 'inequality'], 'answer_start': [11, 11, 11]}" -57337ea24776f41900660bd2,Warsaw,"Demographically, it was the most diverse city in Poland, with significant numbers of foreign-born inhabitants. In addition to the Polish majority, there was a significant Jewish minority in Warsaw. According to Russian census of 1897, out of the total population of 638,000, Jews constituted 219,000 (around 34% percent). Warsaw's prewar Jewish population of more than 350,000 constituted about 30 percent of the city's total population. In 1933, out of 1,178,914 inhabitants 833,500 were of Polish mother tongue. World War II changed the demographics of the city, and to this day there is much less ethnic diversity than in the previous 300 years of Warsaw's history. Most of the modern day population growth is based on internal migration and urbanisation.",What percentage of the population of Warsaw was Jewish in 1897?,"{'text': ['around 34%', '34', '34%'], 'answer_start': [301, 308, 308]}" -57263677ec44d21400f3dc4c,Packet_switching,"Baran developed the concept of distributed adaptive message block switching during his research at the RAND Corporation for the US Air Force into survivable communications networks, first presented to the Air Force in the summer of 1961 as briefing B-265, later published as RAND report P-2626 in 1962, and finally in report RM 3420 in 1964. Report P-2626 described a general architecture for a large-scale, distributed, survivable communications network. The work focuses on three key ideas: use of a decentralized network with multiple paths between any two points, dividing user messages into message blocks, later called packets, and delivery of these messages by store and forward switching.",What delivery message was used ,"{'text': ['by store and forward switching', 'packets', 'store and forward switching'], 'answer_start': [665, 625, 668]}" -57284142ff5b5019007da00e,University_of_Chicago,"The University of Chicago was created and incorporated as a coeducational, secular institution in 1890 by the American Baptist Education Society and a donation from oil magnate and philanthropist John D. Rockefeller on land donated by Marshall Field. While the Rockefeller donation provided money for academic operations and long-term endowment, it was stipulated that such money could not be used for buildings. The original physical campus was financed by donations from wealthy Chicagoans like Silas B. Cobb who provided the funds for the campus' first building, Cobb Lecture Hall, and matched Marshall Field's pledge of $100,000. Other early benefactors included businessmen Charles L. Hutchinson (trustee, treasurer and donor of Hutchinson Commons), Martin A. Ryerson (president of the board of trustees and donor of the Ryerson Physical Laboratory) Adolphus Clay Bartlett and Leon Mandel, who funded the construction of the gymnasium and assembly hall, and George C. Walker of the Walker Museum, a relative of Cobb who encouraged his inaugural donation for facilities.",What is the name of the donor who helped establish the Hutchinson Commons?,"{'text': ['Charles L. Hutchinson', 'Charles L. Hutchinson', 'Charles L. Hutchinson', 'Charles L. Hutchinson'], 'answer_start': [679, 679, 679, 679]}" -56e1dc62cd28a01900c67bca,Computational_complexity_theory,"The complexity class P is often seen as a mathematical abstraction modeling those computational tasks that admit an efficient algorithm. This hypothesis is called the Cobham–Edmonds thesis. The complexity class NP, on the other hand, contains many problems that people would like to solve efficiently, but for which no efficient algorithm is known, such as the Boolean satisfiability problem, the Hamiltonian path problem and the vertex cover problem. Since deterministic Turing machines are special non-deterministic Turing machines, it is easily observed that each problem in P is also member of the class NP.",What complexity class is characterized by a computational tasks and efficient algorithms?,"{'text': ['P', 'P', 'P'], 'answer_start': [21, 21, 21]}" -573060b48ab72b1400f9c4c6,Imperialism,"Imperialism is a type of advocacy of empire. Its name originated from the Latin word ""imperium"", which means to rule over large territories. Imperialism is ""a policy of extending a country's power and influence through colonization, use of military force, or other means"". Imperialism has greatly shaped the contemporary world. It has also allowed for the rapid spread of technologies and ideas. The term imperialism has been applied to Western (and Japanese) political and economic dominance especially in Asia and Africa in the 19th and 20th centuries. Its precise meaning continues to be debated by scholars. Some writers, such as Edward Said, use the term more broadly to describe any system of domination and subordination organised with an imperial center and a periphery.",The word imperialism has it's origins in which ancient language? ,"{'text': ['Latin', 'Latin', 'Latin', 'Latin', 'Latin'], 'answer_start': [74, 74, 74, 74, 74]}" -571126dfa58dae1900cd6cb2,Steam_engine,"The first commercially successful true engine, in that it could generate power and transmit it to a machine, was the atmospheric engine, invented by Thomas Newcomen around 1712. It was an improvement over Savery's steam pump, using a piston as proposed by Papin. Newcomen's engine was relatively inefficient, and in most cases was used for pumping water. It worked by creating a partial vacuum by condensing steam under a piston within a cylinder. It was employed for draining mine workings at depths hitherto impossible, and also for providing a reusable water supply for driving waterwheels at factories sited away from a suitable ""head"". Water that had passed over the wheel was pumped back up into a storage reservoir above the wheel.",What was the first true engine that was commercially successful?,"{'text': ['atmospheric engine', 'atmospheric engine', 'atmospheric engine', 'the atmospheric engine'], 'answer_start': [117, 117, 117, 113]}" -571c3a685efbb31900334db6,Oxygen,"Oxygen is a chemical element with symbol O and atomic number 8. It is a member of the chalcogen group on the periodic table and is a highly reactive nonmetal and oxidizing agent that readily forms compounds (notably oxides) with most elements. By mass, oxygen is the third-most abundant element in the universe, after hydrogen and helium. At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic gas with the formula O -2. Diatomic oxygen gas constitutes 20.8% of the Earth's atmosphere. However, monitoring of atmospheric oxygen levels show a global downward trend, because of fossil-fuel burning. Oxygen is the most abundant element by mass in the Earth's crust as part of oxide compounds such as silicon dioxide, making up almost half of the crust's mass.","Under normal conditions, what do two atoms of oxygen form?","{'text': ['dioxygen', 'diatomic gas', 'dioxygen', 'dioxygen', 'dioxygen'], 'answer_start': [415, 450, 415, 415, 415]}" -57265aaf5951b619008f706c,Ctenophora,"The Lobata have a pair of lobes, which are muscular, cuplike extensions of the body that project beyond the mouth. Their inconspicuous tentacles originate from the corners of the mouth, running in convoluted grooves and spreading out over the inner surface of the lobes (rather than trailing far behind, as in the Cydippida). Between the lobes on either side of the mouth, many species of lobates have four auricles, gelatinous projections edged with cilia that produce water currents that help direct microscopic prey toward the mouth. This combination of structures enables lobates to feed continuously on suspended planktonic prey.",What are auricles?,"{'text': ['gelatinous projections edged with cilia that produce water currents', 'gelatinous projections edged with cilia', 'gelatinous projections edged with cilia'], 'answer_start': [417, 417, 417]}" -572fb059947a6a140053cb80,Scottish_Parliament,"In addition to the General Assembly Hall, the Parliament also used buildings rented from the City of Edinburgh Council. The former administrative building of Lothian Regional Council on George IV Bridge was used for the MSP's offices. Following the move to Holyrood in 2004 this building was demolished. The former Midlothian County Buildings facing Parliament Square, High Street and George IV Bridge in Edinburgh (originally built as the headquarters of the pre-1975 Midlothian County Council) housed the Parliament's visitors' centre and shop, whilst the main hall was used as the Parliament's principal committee room.",Who did the Parliament rent additional buildings from?,"{'text': ['City of Edinburgh Council', 'City of Edinburgh Council', 'the City of Edinburgh Council'], 'answer_start': [93, 93, 89]}" -5727ec062ca10214002d99b7,Economic_inequality,"In a purely capitalist mode of production (i.e. where professional and labor organizations cannot limit the number of workers) the workers wages will not be controlled by these organizations, or by the employer, but rather by the market. Wages work in the same way as prices for any other good. Thus, wages can be considered as a function of market price of skill. And therefore, inequality is driven by this price. Under the law of supply and demand, the price of skill is determined by a race between the demand for the skilled worker and the supply of the skilled worker. ""On the other hand, markets can also concentrate wealth, pass environmental costs on to society, and abuse workers and consumers."" ""Markets, by themselves, even when they are stable, often lead to high levels of inequality, outcomes that are widely viewed as unfair."" Employers who offer a below market wage will find that their business is chronically understaffed. Their competitors will take advantage of the situation by offering a higher wage the best of their labor. For a businessman who has the profit motive as the prime interest, it is a losing proposition to offer below or above market wages to workers.",Under what law is value of a worker determined?,"{'text': ['supply and demand', 'law of supply and demand', 'supply and demand'], 'answer_start': [433, 426, 433]}" -57299a6f6aef051400155016,Prime_number,"The concept of prime number is so important that it has been generalized in different ways in various branches of mathematics. Generally, ""prime"" indicates minimality or indecomposability, in an appropriate sense. For example, the prime field is the smallest subfield of a field F containing both 0 and 1. It is either Q or the finite field with p elements, whence the name. Often a second, additional meaning is intended by using the word prime, namely that any object can be, essentially uniquely, decomposed into its prime components. For example, in knot theory, a prime knot is a knot that is indecomposable in the sense that it cannot be written as the knot sum of two nontrivial knots. Any knot can be uniquely expressed as a connected sum of prime knots. Prime models and prime 3-manifolds are other examples of this type.",What does the word prime generally suggest?,"{'text': ['indecomposability', 'minimality', 'minimality or indecomposability', 'minimality or indecomposability'], 'answer_start': [170, 156, 156, 156]}" -573011de04bcaa1900d770fd,Islamism,"While Qutb's ideas became increasingly radical during his imprisonment prior to his execution in 1966, the leadership of the Brotherhood, led by Hasan al-Hudaybi, remained moderate and interested in political negotiation and activism. Fringe or splinter movements inspired by the final writings of Qutb in the mid-1960s (particularly the manifesto Milestones, a.k.a. Ma'alim fi-l-Tariq) did, however, develop and they pursued a more radical direction. By the 1970s, the Brotherhood had renounced violence as a means of achieving its goals.",When had the Brotherhood renounced violence as a means of achieving its goals?,"{'text': ['By the 1970s', 'the 1970s', '1970s'], 'answer_start': [452, 455, 459]}" -57309564069b5314008321a6,Imperialism,"During the 20th century, historians John Gallagher (1919–1980) and Ronald Robinson (1920–1999) constructed a framework for understanding European imperialism. They claim that European imperialism was influential, and Europeans rejected the notion that ""imperialism"" required formal, legal control by one government over another country. ""In their view, historians have been mesmerized by formal empire and maps of the world with regions colored red. The bulk of British emigration, trade, and capital went to areas outside the formal British Empire. Key to their thinking is the idea of empire 'informally if possible and formally if necessary.'""[attribution needed] Because of the resources made available by imperialism, the world's economy grew significantly and became much more interconnected in the decades before World War I, making the many imperial powers rich and prosperous.",When did Ronald Robinson die?,"{'text': ['1999', '1999', '1999', '1999', '1999'], 'answer_start': [89, 89, 89, 89, 89]}" -5725f8f5ec44d21400f3d7b2,"Fresno,_California","There were 158,349 households, of which 68,511 (43.3%) had children under the age of 18 living in them, 69,284 (43.8%) were opposite-sex married couples living together, 30,547 (19.3%) had a female householder with no husband present, 11,698 (7.4%) had a male householder with no wife present. There were 12,843 (8.1%) unmarried opposite-sex partnerships, and 1,388 (0.9%) same-sex married couples or partnerships. 35,064 households (22.1%) were made up of individuals and 12,344 (7.8%) had someone living alone who was 65 years of age or older. The average household size was 3.07. There were 111,529 families (70.4% of all households); the average family size was 3.62.",What was the percentage of a female householder with no husband present?,"{'text': ['19.3%', '19.3%', '19.3%'], 'answer_start': [178, 178, 178]}" -572a058aaf94a219006aa754,Immune_system,"When a T-cell encounters a foreign pathogen, it extends a vitamin D receptor. This is essentially a signaling device that allows the T-cell to bind to the active form of vitamin D, the steroid hormone calcitriol. T-cells have a symbiotic relationship with vitamin D. Not only does the T-cell extend a vitamin D receptor, in essence asking to bind to the steroid hormone version of vitamin D, calcitriol, but the T-cell expresses the gene CYP27B1, which is the gene responsible for converting the pre-hormone version of vitamin D, calcidiol into the steroid hormone version, calcitriol. Only after binding to calcitriol can T-cells perform their intended function. Other immune system cells that are known to express CYP27B1 and thus activate vitamin D calcidiol, are dendritic cells, keratinocytes and macrophages.",What gene is responsible for converting calcidiol into calcitriol?,"{'text': ['gene CYP27B1', 'CYP27B1', 'gene CYP27B1'], 'answer_start': [433, 438, 433]}" -5737a0acc3c5551400e51f4a,Force,"Newton's laws and Newtonian mechanics in general were first developed to describe how forces affect idealized point particles rather than three-dimensional objects. However, in real life, matter has extended structure and forces that act on one part of an object might affect other parts of an object. For situations where lattice holding together the atoms in an object is able to flow, contract, expand, or otherwise change shape, the theories of continuum mechanics describe the way forces affect the material. For example, in extended fluids, differences in pressure result in forces being directed along the pressure gradients as follows:",What may a force on one part of an object affect?,"{'text': ['other parts', 'other parts of an object', 'other parts of an object', 'other parts of an object'], 'answer_start': [276, 276, 276, 276]}" -572a07c11d046914007796d9,Amazon_rainforest,"The use of remote sensing for the conservation of the Amazon is also being used by the indigenous tribes of the basin to protect their tribal lands from commercial interests. Using handheld GPS devices and programs like Google Earth, members of the Trio Tribe, who live in the rainforests of southern Suriname, map out their ancestral lands to help strengthen their territorial claims. Currently, most tribes in the Amazon do not have clearly defined boundaries, making it easier for commercial ventures to target their territories.",Why do some tribes use remote sensing technology?,"{'text': ['to protect their tribal lands from commercial interests', 'to protect their tribal lands from commercial interests', 'protect their tribal lands from commercial interests'], 'answer_start': [118, 118, 121]}" -5725fb8138643c19005acf40,"Fresno,_California","To avoid interference with existing VHF television stations in the San Francisco Bay Area and those planned for Chico, Sacramento, Salinas, and Stockton, the Federal Communications Commission decided that Fresno would only have UHF television stations. The very first Fresno television station to begin broadcasting was KMJ-TV, which debuted on June 1, 1953. KMJ is now known as NBC affiliate KSEE. Other Fresno stations include ABC O&O KFSN, CBS affiliate KGPE, CW affiliate KFRE, FOX affiliate KMPH, MNTV affiliate KAIL, PBS affiliate KVPT, Telemundo O&O KNSO, Univision O&O KFTV, and MundoFox and Azteca affiliate KGMC-DT.",What was the very first television station to broadcast in Fresno?,"{'text': ['KMJ-TV', 'KMJ-TV', 'KMJ-TV'], 'answer_start': [320, 320, 320]}" -573085ea8ab72b1400f9c550,Imperialism,"Imperialism and colonialism both dictate the political and economic advantage over a land and the indigenous populations they control, yet scholars sometimes find it difficult to illustrate the difference between the two. Although imperialism and colonialism focus on the suppression of an other, if colonialism refers to the process of a country taking physical control of another, imperialism refers to the political and monetary dominance, either formally or informally. Colonialism is seen to be the architect deciding how to start dominating areas and then imperialism can be seen as creating the idea behind conquest cooperating with colonialism. Colonialism is when the imperial nation begins a conquest over an area and then eventually is able to rule over the areas the previous nation had controlled. Colonialism's core meaning is the exploitation of the valuable assets and supplies of the nation that was conquered and the conquering nation then gaining the benefits from the spoils of the war. The meaning of imperialism is to create an empire, by conquering the other state's lands and therefore increasing its own dominance. Colonialism is the builder and preserver of the colonial possessions in an area by a population coming from a foreign region. Colonialism can completely change the existing social structure, physical structure and economics of an area; it is not unusual that the characteristics of the conquering peoples are inherited by the conquered indigenous populations.",What is colonialism's core meaning?,"{'text': ['exploitation', 'the exploitation of the valuable assets and supplies', 'the exploitation of the valuable assets and supplies of the nation that was conquered', 'exploitation of the valuable assets and supplies of the nation that was conquered and the conquering nation then gaining the benefits', 'exploitation of the valuable assets and supplies of the nation that was conquered'], 'answer_start': [845, 841, 841, 845, 845]}" -571cb27fdd7acb1400e4c134,Oxygen,"Paleoclimatologists measure the ratio of oxygen-18 and oxygen-16 in the shells and skeletons of marine organisms to determine what the climate was like millions of years ago (see oxygen isotope ratio cycle). Seawater molecules that contain the lighter isotope, oxygen-16, evaporate at a slightly faster rate than water molecules containing the 12% heavier oxygen-18; this disparity increases at lower temperatures. During periods of lower global temperatures, snow and rain from that evaporated water tends to be higher in oxygen-16, and the seawater left behind tends to be higher in oxygen-18. Marine organisms then incorporate more oxygen-18 into their skeletons and shells than they would in a warmer climate. Paleoclimatologists also directly measure this ratio in the water molecules of ice core samples that are up to several hundreds of thousands of years old.",How much heavier is oxygen 18 than oxygen 16?,"{'text': ['12%', '12%', '12%', '12%', '12%'], 'answer_start': [344, 344, 344, 344, 344]}" -573796edc3c5551400e51f36,Force,"The strong force only acts directly upon elementary particles. However, a residual of the force is observed between hadrons (the best known example being the force that acts between nucleons in atomic nuclei) as the nuclear force. Here the strong force acts indirectly, transmitted as gluons, which form part of the virtual pi and rho mesons, which classically transmit the nuclear force (see this topic for more). The failure of many searches for free quarks has shown that the elementary particles affected are not directly observable. This phenomenon is called color confinement.",How are nuclear forces transmitted?,"{'text': ['as gluons', 'as gluons', 'as gluons', 'as gluons'], 'answer_start': [282, 282, 282, 282]}" -57268220f1498d1400e8e219,Geology,"In the laboratory, biostratigraphers analyze rock samples from outcrop and drill cores for the fossils found in them. These fossils help scientists to date the core and to understand the depositional environment in which the rock units formed. Geochronologists precisely date rocks within the stratigraphic section in order to provide better absolute bounds on the timing and rates of deposition. Magnetic stratigraphers look for signs of magnetic reversals in igneous rock units within the drill cores. Other scientists perform stable isotope studies on the rocks to gain information about past climate.",What types of scientists looks for signs of magnetic reversals in igneous rocks within the drill cores?,"{'text': ['Magnetic stratigraphers', 'Magnetic stratigraphers', 'Magnetic stratigraphers'], 'answer_start': [397, 397, 397]}" -5705edcd52bb8914006896cb,Southern_California,"""Southern California"" is not a formal geographic designation, and definitions of what constitutes southern California vary. Geographically, California's north-south midway point lies at exactly 37° 9' 58.23"" latitude, around 11 miles (18 km) south of San Jose; however, this does not coincide with popular use of the term. When the state is divided into two areas (northern and southern California), the term ""southern California"" usually refers to the ten southern-most counties of the state. This definition coincides neatly with the county lines at 35° 47′ 28″ north latitude, which form the northern borders of San Luis Obispo, Kern, and San Bernardino counties. Another definition for southern California uses Point Conception and the Tehachapi Mountains as the northern boundary.",How many miles south of San Jose is the north - south midway point located?,"{'text': ['11', '11', '11'], 'answer_start': [225, 225, 225]}" -5726db5add62a815002e92d7,Pharmacy,"In Ancient Greece, Diocles of Carystus (4th century BC) was one of several men studying the medicinal properties of plants. He wrote several treatises on the topic. The Greek physician Pedanius Dioscorides is famous for writing a five volume book in his native Greek Περί ύλης ιατρικής in the 1st century AD. The Latin translation De Materia Medica (Concerning medical substances) was used a basis for many medieval texts, and was built upon by many middle eastern scientists during the Islamic Golden Age. The title coined the term materia medica.",What term resulted from Dioscorides' book?,"{'text': ['materia medica', 'materia medica', 'materia medica'], 'answer_start': [533, 533, 533]}" -57297d421d046914007794e6,Prime_number,"Modern primality tests for general numbers n can be divided into two main classes, probabilistic (or ""Monte Carlo"") and deterministic algorithms. Deterministic algorithms provide a way to tell for sure whether a given number is prime or not. For example, trial division is a deterministic algorithm because, if performed correctly, it will always identify a prime number as prime and a composite number as composite. Probabilistic algorithms are normally faster, but do not completely prove that a number is prime. These tests rely on testing a given number in a partly random way. For example, a given test might pass all the time if applied to a prime number, but pass only with probability p if applied to a composite number. If we repeat the test n times and pass every time, then the probability that our number is composite is 1/(1-p)n, which decreases exponentially with the number of tests, so we can be as sure as we like (though never perfectly sure) that the number is prime. On the other hand, if the test ever fails, then we know that the number is composite.",What is the name of one type of modern primality test?,"{'text': ['probabilistic (or ""Monte Carlo"")', 'probabilistic (or ""Monte Carlo"")', 'probabilistic', 'probabilistic', 'probabilistic'], 'answer_start': [83, 83, 83, 83, 83]}" -5729fd111d046914007796a5,Immune_system,"Unlike animals, plants lack phagocytic cells, but many plant immune responses involve systemic chemical signals that are sent through a plant. Individual plant cells respond to molecules associated with pathogens known as Pathogen-associated molecular patterns or PAMPs. When a part of a plant becomes infected, the plant produces a localized hypersensitive response, whereby cells at the site of infection undergo rapid apoptosis to prevent the spread of the disease to other parts of the plant. Systemic acquired resistance (SAR) is a type of defensive response used by plants that renders the entire plant resistant to a particular infectious agent. RNA silencing mechanisms are particularly important in this systemic response as they can block virus replication.",Plants lack what kind of immune cells?,"{'text': ['phagocytic cells', 'phagocytic', 'phagocytic cells'], 'answer_start': [28, 28, 28]}" -5730a40f396df91900096234,Imperialism,"Britain's imperialist ambitions can be seen as early as the sixteenth century. In 1599 the British East India Company was established and was chartered by Queen Elizabeth in the following year. With the establishment of trading posts in India, the British were able to maintain strength relative to others empires such as the Portuguese who already had set up trading posts in India. In 1767 political activity caused exploitation of the East India Company causing the plundering of the local economy, almost bringing the company into bankruptcy.",When is the earliest Britain had an imperialist policy?,"{'text': ['the sixteenth century', 'sixteenth century', 'sixteenth century', 'sixteenth century', 'sixteenth century'], 'answer_start': [56, 60, 60, 60, 60]}" -573003dd947a6a140053cf44,Rhine,"Since 7500 yr ago, a situation with tides and currents, very similar to present has existed. Rates of sea-level rise had dropped so far, that natural sedimentation by the Rhine and coastal processes together, could compensate the transgression by the sea; in the last 7000 years, the coast line was roughly at the same location. In the southern North Sea, due to ongoing tectonic subsidence, the sea level is still rising, at the rate of about 1–3 cm (0.39–1.18 in) per century (1 metre or 39 inches in last 3000 years).",How long has the Rhine coastline been in the same location?,"{'text': ['last 7000 years', '7000 years', 'last 7000 years'], 'answer_start': [263, 268, 263]}" -57108c95b654c5140001f979,Huguenot,"Huguenot immigrants did not disperse or settle in different parts of the country, but rather, formed three societies or congregations; one in the city of New York, another 21 miles north of New York in a town which they named New Rochelle, and a third further upstate in New Paltz. The ""Huguenot Street Historic District"" in New Paltz has been designated a National Historic Landmark site and contains the oldest street in the United States of America. A small group of Huguenots also settled on the south shore of Staten Island along the New York Harbor, for which the current neighborhood of Huguenot was named.",What city north of New York was settled by Huguenots?,"{'text': ['New Rochelle', 'New Rochelle', 'New Rochelle'], 'answer_start': [226, 226, 226]}" -573796edc3c5551400e51f37,Force,"The strong force only acts directly upon elementary particles. However, a residual of the force is observed between hadrons (the best known example being the force that acts between nucleons in atomic nuclei) as the nuclear force. Here the strong force acts indirectly, transmitted as gluons, which form part of the virtual pi and rho mesons, which classically transmit the nuclear force (see this topic for more). The failure of many searches for free quarks has shown that the elementary particles affected are not directly observable. This phenomenon is called color confinement.",What is the term for the lack of obsevable free quarks?,"{'text': ['color confinement', 'color confinement', 'color confinement', 'color confinement'], 'answer_start': [564, 564, 564, 564]}" -57267947f1498d1400e8e0ec,Geology,"In addition to identifying rocks in the field, petrologists identify rock samples in the laboratory. Two of the primary methods for identifying rocks in the laboratory are through optical microscopy and by using an electron microprobe. In an optical mineralogy analysis, thin sections of rock samples are analyzed through a petrographic microscope, where the minerals can be identified through their different properties in plane-polarized and cross-polarized light, including their birefringence, pleochroism, twinning, and interference properties with a conoscopic lens. In the electron microprobe, individual locations are analyzed for their exact chemical compositions and variation in composition within individual crystals. Stable and radioactive isotope studies provide insight into the geochemical evolution of rock units.",What do petrologists use electron microprobes in the laboratory for?,"{'text': ['identifying rocks', 'identifying rocks', 'identifying rocks'], 'answer_start': [15, 132, 132]}" -572fcd86947a6a140053ccdc,Scottish_Parliament,Bills can be introduced to Parliament in a number of ways; the Scottish Government can introduce new laws or amendments to existing laws as a bill; a committee of the Parliament can present a bill in one of the areas under its remit; a member of the Scottish Parliament can introduce a bill as a private member; or a private bill can be submitted to Parliament by an outside proposer. Most draft laws are government bills introduced by ministers in the governing party. Bills pass through Parliament in a number of stages:,An MSP may introduce a bill as what?,"{'text': ['a private member', 'a private member', 'private member'], 'answer_start': [294, 294, 296]}" -5728fd206aef05140015494f,Immune_system,"Within the genitourinary and gastrointestinal tracts, commensal flora serve as biological barriers by competing with pathogenic bacteria for food and space and, in some cases, by changing the conditions in their environment, such as pH or available iron. This reduces the probability that pathogens will reach sufficient numbers to cause illness. However, since most antibiotics non-specifically target bacteria and do not affect fungi, oral antibiotics can lead to an ""overgrowth"" of fungi and cause conditions such as a vaginal candidiasis (a yeast infection). There is good evidence that re-introduction of probiotic flora, such as pure cultures of the lactobacilli normally found in unpasteurized yogurt, helps restore a healthy balance of microbial populations in intestinal infections in children and encouraging preliminary data in studies on bacterial gastroenteritis, inflammatory bowel diseases, urinary tract infection and post-surgical infections.",Commensal flora can change what specific conditions of their environment in the gastrointestinal tract?,"{'text': ['pH or available iron', 'pH or available iron', 'balance of microbial populations'], 'answer_start': [233, 233, 733]}" -572995d46aef051400154feb,Prime_number,"Giuga's conjecture says that this equation is also a sufficient condition for p to be prime. Another consequence of Fermat's little theorem is the following: if p is a prime number other than 2 and 5, 1/p is always a recurring decimal, whose period is p − 1 or a divisor of p − 1. The fraction 1/p expressed likewise in base q (rather than base 10) has similar effect, provided that p is not a prime factor of q. Wilson's theorem says that an integer p > 1 is prime if and only if the factorial (p − 1)! + 1 is divisible by p. Moreover, an integer n > 4 is composite if and only if (n − 1)! is divisible by n.","According to Wilson's theorem, what factorial must be divisible by n if some integer n > 4 is to be considered composite?","{'text': ['(n − 1)!', '(n − 1)!', '(n − 1)!', '(n − 1)!', '(n − 1)!'], 'answer_start': [582, 582, 582, 582, 582]}" -573750f61c4567190057446b,Force,"Newton's Second Law asserts the direct proportionality of acceleration to force and the inverse proportionality of acceleration to mass. Accelerations can be defined through kinematic measurements. However, while kinematics are well-described through reference frame analysis in advanced physics, there are still deep questions that remain as to what is the proper definition of mass. General relativity offers an equivalence between space-time and mass, but lacking a coherent theory of quantum gravity, it is unclear as to how or whether this connection is relevant on microscales. With some justification, Newton's second law can be taken as a quantitative definition of mass by writing the law as an equality; the relative units of force and mass then are fixed.","In Newton's second law, what are the units of mass and force in relation to microscales?","{'text': ['fixed', 'an equality', 'fixed', 'fixed', 'unclear'], 'answer_start': [760, 701, 760, 760, 511]}" -572a11663f37b31900478697,Economic_inequality,"In 1993, Galor and Zeira showed that inequality in the presence of credit market imperfections has a long lasting detrimental effect on human capital formation and economic development. A 1996 study by Perotti examined the channels through which inequality may affect economic growth. He showed that, in accordance with the credit market imperfection approach, inequality is associated with lower level of human capital formation (education, experience, and apprenticeship) and higher level of fertility, and thereby lower levels of growth. He found that inequality is associated with higher levels of redistributive taxation, which is associated with lower levels of growth from reductions in private savings and investment. Perotti concluded that, ""more equal societies have lower fertility rates and higher rates of investment in education. Both are reflected in higher rates of growth. Also, very unequal societies tend to be politically and socially unstable, which is reflected in lower rates of investment and therefore growth.""",What do extremely unequal societies tend to be?,"{'text': ['politically and socially unstable', 'politically and socially unstable', 'politically and socially unstable'], 'answer_start': [930, 930, 930]}" -571ce3745efbb31900334e24,Oxygen,"Oxygen toxicity to the lungs and central nervous system can also occur in deep scuba diving and surface supplied diving. Prolonged breathing of an air mixture with an O -2 partial pressure more than 60 kPa can eventually lead to permanent pulmonary fibrosis. Exposure to a O -2 partial pressures greater than 160 kPa (about 1.6 atm) may lead to convulsions (normally fatal for divers). Acute oxygen toxicity (causing seizures, its most feared effect for divers) can occur by breathing an air mixture with 21% O -2 at 66 m or more of depth; the same thing can occur by breathing 100% O -2 at only 6 m.",What physical condition can acute oxygen toxicity cause?,"{'text': ['seizures', 'seizures', 'seizures', 'seizures', 'seizures'], 'answer_start': [415, 415, 415, 415, 415]}" -57265e11708984140094c3be,1973_oil_crisis,"OPEC soon lost its preeminent position, and in 1981, its production was surpassed by that of other countries. Additionally, its own member nations were divided. Saudi Arabia, trying to recover market share, increased production, pushing prices down, shrinking or eliminating profits for high-cost producers. The world price, which had peaked during the 1979 energy crisis at nearly $40 per barrel, decreased during the 1980s to less than $10 per barrel. Adjusted for inflation, oil briefly fell back to pre-1973 levels. This ""sale"" price was a windfall for oil-importing nations, both developing and developed.","In 1979, during the oil crisis, what was the highest price of oil?","{'text': ['nearly $40 per barrel', 'nearly $40 per barrel', '$40 per barrel', '$40 per barrel', '$40 per barrel'], 'answer_start': [375, 375, 382, 382, 382]}" -57268c01dd62a815002e8915,Ctenophora,"On the other hand, in the late 1980s the Western Atlantic ctenophore Mnemiopsis leidyi was accidentally introduced into the Black Sea and Sea of Azov via the ballast tanks of ships, and has been blamed for causing sharp drops in fish catches by eating both fish larvae and small crustaceans that would otherwise feed the adult fish. Mnemiopsis is well equipped to invade new territories (although this was not predicted until after it so successfully colonized the Black Sea), as it can breed very rapidly and tolerate a wide range of water temperatures and salinities. The impact was increased by chronic overfishing, and by eutrophication that gave the entire ecosystem a short-term boost, causing the Mnemiopsis population to increase even faster than normal – and above all by the absence of efficient predators on these introduced ctenophores. Mnemiopsis populations in those areas were eventually brought under control by the accidental introduction of the Mnemiopsis-eating North American ctenophore Beroe ovata, and by a cooling of the local climate from 1991 to 1993, which significantly slowed the animal's metabolism. However the abundance of plankton in the area seems unlikely to be restored to pre-Mnemiopsis levels.",How was the population of mnemiopsis in The black Sea and the Sea of Azov brought under control?,"{'text': ['by the accidental introduction of the Mnemiopsis-eating North American ctenophore Beroe ovata,', 'the accidental introduction of the Mnemiopsis-eating North American ctenophore Beroe ovata', 'the accidental introduction of the Mnemiopsis-eating North American ctenophore Beroe ovata, and by a cooling of the local climate from 1991 to 1993'], 'answer_start': [925, 928, 928]}" -572ff07304bcaa1900d76ef7,Rhine,"The Rhine is the longest river in Germany. It is here that the Rhine encounters some more of its main tributaries, such as the Neckar, the Main and, later, the Moselle, which contributes an average discharge of more than 300 m3/s (11,000 cu ft/s). Northeastern France drains to the Rhine via the Moselle; smaller rivers drain the Vosges and Jura Mountains uplands. Most of Luxembourg and a very small part of Belgium also drain to the Rhine via the Moselle. As it approaches the Dutch border, the Rhine has an annual mean discharge of 2,290 m3/s (81,000 cu ft/s) and an average width of 400 m (1,300 ft).",Which of the tributaries in Germany contributes most? ,"{'text': ['Moselle', 'Neckar', 'Neckar'], 'answer_start': [160, 127, 127]}" -56e1e9dfe3433e14004231ff,Computational_complexity_theory,"The graph isomorphism problem is the computational problem of determining whether two finite graphs are isomorphic. An important unsolved problem in complexity theory is whether the graph isomorphism problem is in P, NP-complete, or NP-intermediate. The answer is not known, but it is believed that the problem is at least not NP-complete. If graph isomorphism is NP-complete, the polynomial time hierarchy collapses to its second level. Since it is widely believed that the polynomial hierarchy does not collapse to any finite level, it is believed that graph isomorphism is not NP-complete. The best algorithm for this problem, due to Laszlo Babai and Eugene Luks has run time 2O(√(n log(n))) for graphs with n vertices.",To what level would the polynomial time hierarchy collapse if graph isomorphism is NP-complete?,"{'text': ['second level', 'second', 'second'], 'answer_start': [424, 424, 424]}" -57294baaaf94a219006aa26b,Intergovernmental_Panel_on_Climate_Change,"Michael Oppenheimer, a long-time participant in the IPCC and coordinating lead author of the Fifth Assessment Report conceded in Science Magazine's State of the Planet 2008-2009 some limitations of the IPCC consensus approach and asks for concurring, smaller assessments of special problems instead of the large scale approach as in the previous IPCC assessment reports. It has become more important to provide a broader exploration of uncertainties. Others see as well mixed blessings of the drive for consensus within the IPCC process and ask to include dissenting or minority positions or to improve statements about uncertainties.",What role did Michael Oppenheimer have in the IPCC's reports?,"{'text': ['coordinating lead author of the Fifth Assessment Report', 'participant in the IPCC and coordinating lead author of the Fifth Assessment Report', 'coordinating lead author of the Fifth Assessment Report'], 'answer_start': [61, 33, 61]}" -572ffb02b2c2fd14005686b9,Rhine,"From the Eocene onwards, the ongoing Alpine orogeny caused a N–S rift system to develop in this zone. The main elements of this rift are the Upper Rhine Graben, in southwest Germany and eastern France and the Lower Rhine Embayment, in northwest Germany and the southeastern Netherlands. By the time of the Miocene, a river system had developed in the Upper Rhine Graben, that continued northward and is considered the first Rhine river. At that time, it did not yet carry discharge from the Alps; instead, the watersheds of the Rhone and Danube drained the northern flanks of the Alps.",What time did a river system develop in the Upper Rhine Graben?,"{'text': ['Miocene', 'By the time of the Miocene', 'time of the Miocene'], 'answer_start': [306, 287, 294]}" -56de16ca4396321400ee25c7,Normans,"In 1066, Duke William II of Normandy conquered England killing King Harold II at the Battle of Hastings. The invading Normans and their descendants replaced the Anglo-Saxons as the ruling class of England. The nobility of England were part of a single Normans culture and many had lands on both sides of the channel. Early Norman kings of England, as Dukes of Normandy, owed homage to the King of France for their land on the continent. They considered England to be their most important holding (it brought with it the title of King—an important status symbol).",When was the Battle of Hastings?,"{'text': ['1066', 'In 1066', '1066'], 'answer_start': [3, 0, 3]}" -5725d42a89a1e219009abf59,"Fresno,_California","In the 1970s, the city was the subject of a song, ""Walking Into Fresno"", written by Hall Of Fame guitarist Bill Aken and recorded by Bob Gallion of the world-famous ""WWVA Jamboree"" radio and television show in Wheeling, West Virginia. Aken, adopted by Mexican movie actress Lupe Mayorga, grew up in the neighboring town of Madera and his song chronicled the hardships faced by the migrant farm workers he saw as a child. Aken also made his first TV appearance playing guitar on the old country-western show at The Fresno Barn.","Who recorded ""Walking in Fresno?""","{'text': ['Bob Gallion', 'Bob Gallion', 'Bob Gallion'], 'answer_start': [133, 133, 133]}" -5705fc3a52bb89140068976c,Southern_California,"Southern California contains a Mediterranean climate, with infrequent rain and many sunny days. Summers are hot and dry, while winters are a bit warm or mild and wet. Serious rain can occur unusually. In the summers, temperature ranges are 90-60's while as winters are 70-50's, usually all of Southern California have Mediterranean climate. But snow is very rare in the Southwest of the state, it occurs on the Southeast of the state.",What is the low end of the temperature range in summer?,"{'text': [""60's"", ""60's"", ""60's""], 'answer_start': [243, 243, 243]}" -5705e63175f01819005e7723,Southern_California,"Within southern California are two major cities, Los Angeles and San Diego, as well as three of the country's largest metropolitan areas. With a population of 3,792,621, Los Angeles is the most populous city in California and the second most populous in the United States. To the south and with a population of 1,307,402 is San Diego, the second most populous city in the state and the eighth most populous in the nation.",What is the eighth most populous city in the nation?,"{'text': ['San Diego', 'San Diego', 'San Diego'], 'answer_start': [324, 324, 324]}" -572a0ce11d04691400779701,Immune_system,"An evasion strategy used by several pathogens to avoid the innate immune system is to hide within the cells of their host (also called intracellular pathogenesis). Here, a pathogen spends most of its life-cycle inside host cells, where it is shielded from direct contact with immune cells, antibodies and complement. Some examples of intracellular pathogens include viruses, the food poisoning bacterium Salmonella and the eukaryotic parasites that cause malaria (Plasmodium falciparum) and leishmaniasis (Leishmania spp.). Other bacteria, such as Mycobacterium tuberculosis, live inside a protective capsule that prevents lysis by complement. Many pathogens secrete compounds that diminish or misdirect the host's immune response. Some bacteria form biofilms to protect themselves from the cells and proteins of the immune system. Such biofilms are present in many successful infections, e.g., the chronic Pseudomonas aeruginosa and Burkholderia cenocepacia infections characteristic of cystic fibrosis. Other bacteria generate surface proteins that bind to antibodies, rendering them ineffective; examples include Streptococcus (protein G), Staphylococcus aureus (protein A), and Peptostreptococcus magnus (protein L).",What protein does Staphylococcus aureus produce to make antibodies ineffective?,"{'text': ['protein A', 'G', 'Streptococcus (protein G)'], 'answer_start': [1166, 1139, 1116]}" -57113f83b654c5140001fc2a,Steam_engine,"Near the end of the 19th century compound engines came into widespread use. Compound engines exhausted steam in to successively larger cylinders to accommodate the higher volumes at reduced pressures, giving improved efficiency. These stages were called expansions, with double and triple expansion engines being common, especially in shipping where efficiency was important to reduce the weight of coal carried. Steam engines remained the dominant source of power until the early 20th century, when advances in the design of electric motors and internal combustion engines gradually resulted in the replacement of reciprocating (piston) steam engines, with shipping in the 20th-century relying upon the steam turbine.",What are the stages in a compound engine called?,"{'text': ['expansions', 'expansions', 'expansions'], 'answer_start': [254, 254, 254]}" -5705fd8475f01819005e7841,Southern_California,"Southern California consists of one of the more varied collections of geologic, topographic, and natural ecosystem landscapes in a diversity outnumbering other major regions in the state and country. The region spans from Pacific Ocean islands, shorelines, beaches, and coastal plains, through the Transverse and Peninsular Ranges with their peaks, into the large and small interior valleys, to the vast deserts of California.",The region spans starting at islands found in which body of water?,"{'text': ['Pacific Ocean', 'Pacific Ocean', 'Pacific Ocean'], 'answer_start': [222, 222, 222]}" -57264a8cdd62a815002e808e,European_Union_law,"The European Commission is the main executive body of the European Union. Article 17(1) of the Treaty on European Union states the Commission should ""promote the general interest of the Union"" while Article 17(3) adds that Commissioners should be ""completely independent"" and not ""take instructions from any Government"". Under article 17(2), ""Union legislative acts may only be adopted on the basis of a Commission proposal, except where the Treaties provide otherwise."" This means that the Commission has a monopoly on initiating the legislative procedure, although the Council is the ""de facto catalyst of many legislative initiatives"". The Parliament can also formally request the Commission to submit a legislative proposal but the Commission can reject such a suggestion, giving reasons. The Commission's President (currently an ex-Luxembourg Prime Minister, Jean-Claude Juncker) sets the agenda for the EU's work. Decisions are taken by a simple majority vote, usually through a ""written procedure"" of circulating the proposals and adopting if there are no objections.[citation needed] Since Ireland refused to consent to changes in the Treaty of Lisbon 2007, there remains one Commissioner for each of the 28 member states, including the President and the High Representative for Foreign and Security Policy (currently Federica Mogherini). The Commissioners (and most importantly, the portfolios they will hold) are bargained over intensively by the member states. The Commissioners, as a block, are then subject to a qualified majority vote of the Council to approve, and majority approval of the Parliament. The proposal to make the Commissioners be drawn from the elected Parliament, was not adopted in the Treaty of Lisbon. This means Commissioners are, through the appointment process, the unelected subordinates of member state governments.",Which authority figure is designated to schedule and set the work of the EU?,"{'text': [""The Commission's President"", ""The Commission's President"", ""The Commission's President"", ""The Commission's President (""], 'answer_start': [793, 793, 793, 793]}" -572847dd4b864d19001648bf,Amazon_rainforest,"For a long time, it was thought that the Amazon rainforest was only ever sparsely populated, as it was impossible to sustain a large population through agriculture given the poor soil. Archeologist Betty Meggers was a prominent proponent of this idea, as described in her book Amazonia: Man and Culture in a Counterfeit Paradise. She claimed that a population density of 0.2 inhabitants per square kilometre (0.52/sq mi) is the maximum that can be sustained in the rainforest through hunting, with agriculture needed to host a larger population. However, recent anthropological findings have suggested that the region was actually densely populated. Some 5 million people may have lived in the Amazon region in AD 1500, divided between dense coastal settlements, such as that at Marajó, and inland dwellers. By 1900 the population had fallen to 1 million and by the early 1980s it was less than 200,000.",In what book did Betty Meggers describe the idea of the Amazon being sparsely populated?,"{'text': ['Amazonia: Man and Culture in a Counterfeit Paradise', 'Amazonia: Man and Culture in a Counterfeit Paradise', 'Amazonia: Man and Culture in a Counterfeit Paradise'], 'answer_start': [277, 277, 277]}" diff --git a/mindsdb/integrations/utilities/date_utils.py b/mindsdb/integrations/utilities/date_utils.py deleted file mode 100644 index d7c71daa4d5..00000000000 --- a/mindsdb/integrations/utilities/date_utils.py +++ /dev/null @@ -1,79 +0,0 @@ -import datetime as dt -import pytz -import re - - -def parse_local_date(date_str: str) -> dt.datetime: - """Parses common date string formats to local datetime objects.""" - if isinstance(date_str, dt.datetime): - return date_str - date_formats = ['%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d'] - - date = None - for date_format in date_formats: - try: - date = dt.datetime.strptime(date_str, date_format) - except ValueError: - pass - if date is None: - raise ValueError(f"Can't parse date: {date_str}") - return date - - -def parse_utc_date_with_limit(date_str: str, max_window_in_days: int = None) -> dt.datetime: - """Parses common date string formats to UTC datetime objects.""" - date = parse_local_date(date_str) - - # Convert date to UTC - date_utc = date.astimezone(pytz.utc) - - # If max_window_in_days is provided, apply the logic - if max_window_in_days is not None: - # Get the current UTC time - now_utc = dt.datetime.utcnow().replace(tzinfo=pytz.utc) - # Check if the parsed date is earlier than the maximum window allowed - max_window_date = now_utc - dt.timedelta(days=max_window_in_days) - if date_utc < max_window_date: - return max_window_date - return date_utc - - -def parse_utc_date(date_str: str) -> dt.datetime: - """Parses common date string formats to UTC datetime objects.""" - date = parse_local_date(date_str) - return date.astimezone(pytz.utc) - - -def utc_date_str_to_timestamp_ms(date_str: str) -> int: - """Parses common date string formats into ms since the Unix epoch in UTC.""" - date = parse_local_date(date_str) - # `timestamp` method doesn't work as expected unless we replace the timezone info this way. - date = date.replace(tzinfo=pytz.UTC) - return int(date.timestamp() * 1000) - - -def interval_str_to_duration_ms(interval_str: str) -> int: - """Parses interval strings into how long they represent in ms. - Supported intervals: - - seconds (e.g. 1s) - - minutes (e.g. 5m) - - hours (e.g. 1h) - - days (e.g. 5d) - - weeks (e.g. 1w) - """ - duration_match = re.search(r'^\d+', interval_str) - time_unit_match = re.search('[smhdw]', interval_str) - if not duration_match or not time_unit_match: - raise ValueError('Invalid interval {}'.format(interval_str)) - duration = int(duration_match.group()) - time_unit = time_unit_match.group() - if time_unit == 's': - return duration * 1000 - if time_unit == 'm': - return duration * 1000 * 60 - if time_unit == 'h': - return duration * 1000 * 60 * 60 - if time_unit == 'd': - return duration * 1000 * 60 * 60 * 24 - if time_unit == 'w': - return duration * 1000 * 60 * 60 * 24 * 7 diff --git a/mindsdb/integrations/utilities/files/__init__.py b/mindsdb/integrations/utilities/files/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/files/file_reader.py b/mindsdb/integrations/utilities/files/file_reader.py deleted file mode 100644 index ab88dbfc486..00000000000 --- a/mindsdb/integrations/utilities/files/file_reader.py +++ /dev/null @@ -1,398 +0,0 @@ -import csv -import json -import codecs -from io import BytesIO, StringIO, IOBase -from typing import List, Generator -from pathlib import Path -from dataclasses import dataclass, astuple - -import filetype -import pandas as pd -from charset_normalizer import from_bytes - -from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -DEFAULT_CHUNK_SIZE = 500 -DEFAULT_CHUNK_OVERLAP = 250 - - -class FileProcessingError(Exception): ... - - -@dataclass(frozen=True, slots=True) -class _SINGLE_PAGE_FORMAT: - CSV: str = "csv" - JSON: str = "json" - TXT: str = "txt" - PDF: str = "pdf" - PARQUET: str = "parquet" - - -SINGLE_PAGE_FORMAT = _SINGLE_PAGE_FORMAT() - - -@dataclass(frozen=True, slots=True) -class _MULTI_PAGE_FORMAT: - XLSX: str = "xlsx" - XLS: str = "xls" - - -MULTI_PAGE_FORMAT = _MULTI_PAGE_FORMAT() - - -def decode(file_obj: IOBase) -> StringIO: - file_obj.seek(0) - byte_str = file_obj.read() - # Move it to StringIO - try: - # Handle Microsoft's BOM "special" UTF-8 encoding - if byte_str.startswith(codecs.BOM_UTF8): - data_str = StringIO(byte_str.decode("utf-8-sig")) - else: - file_encoding_meta = from_bytes( - byte_str[: 32 * 1024], - steps=32, # Number of steps/block to extract from my_byte_str - chunk_size=1024, # Set block size of each extraction) - explain=False, - ) - best_meta = file_encoding_meta.best() - errors = "strict" - if best_meta is not None: - encoding = file_encoding_meta.best().encoding - - try: - data_str = StringIO(byte_str.decode(encoding, errors)) - except UnicodeDecodeError: - encoding = "utf-8" - errors = "replace" - - data_str = StringIO(byte_str.decode(encoding, errors)) - else: - encoding = "utf-8" - errors = "replace" - - data_str = StringIO(byte_str.decode(encoding, errors)) - except Exception as e: - logger.exception("Error during file decode:") - raise FileProcessingError("Could not load into string") from e - - return data_str - - -class FormatDetector: - supported_formats = astuple(SINGLE_PAGE_FORMAT) + astuple(MULTI_PAGE_FORMAT) - multipage_formats = astuple(MULTI_PAGE_FORMAT) - - def __init__( - self, - path: str | None = None, - name: str | None = None, - file: IOBase | None = None, - ): - """ - File format detector - One of these arguments has to be passed: `path` or `file` - - :param path: path to the file - :param name: name of the file - :param file: file descriptor (via open(...), of BytesIO(...)) - """ - if path is not None: - file = open(path, "rb") - - elif file is not None: - if name is None: - if hasattr(file, "name"): - path = file.name - else: - path = "file" - else: - raise FileProcessingError("Wrong arguments: path or file is required") - - if name is None: - name = Path(path).name - - self.name = name - self.file_obj = file - self.format = None - - self.parameters = {} - - def close(self): - if self.file_obj is not None: - self.file_obj.close() - - def get_format(self) -> str: - if self.format is not None: - return self.format - - format = self.get_format_by_name() - if format is not None: - if format not in self.supported_formats: - raise FileProcessingError(f"Not supported format: {format}") - - if format is None and self.file_obj is not None: - format = self.get_format_by_content() - self.file_obj.seek(0) - - if format is None: - raise FileProcessingError(f"Unable to detect format: {self.name}") - - self.format = format - return format - - def get_format_by_name(self): - extension = Path(self.name).suffix.strip(".").lower() - if extension == "tsv": - extension = "csv" - self.parameters["delimiter"] = "\t" - - return extension or None - - def get_format_by_content(self): - if self.is_parquet(self.file_obj): - return SINGLE_PAGE_FORMAT.PARQUET - - file_type = filetype.guess(self.file_obj) - if file_type is not None: - if file_type.mime in { - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - }: - return MULTI_PAGE_FORMAT.XLSX - if file_type.mime == "application/vnd.ms-excel": - return MULTI_PAGE_FORMAT.XLS - - if file_type.mime == "application/pdf": - return SINGLE_PAGE_FORMAT.PDF - - file_obj = decode(self.file_obj) - - if self.is_json(file_obj): - return SINGLE_PAGE_FORMAT.JSON - - if self.is_csv(file_obj): - return SINGLE_PAGE_FORMAT.CSV - - @staticmethod - def is_json(data_obj: StringIO) -> bool: - # see if its JSON - data_obj.seek(0) - text = data_obj.read(100).strip() - data_obj.seek(0) - if len(text) > 0: - # it looks like a json, then try to parse it - if text.startswith("{") or text.startswith("["): - try: - json.loads(data_obj.read()) - return True - except Exception: - return False - return False - - @classmethod - def is_csv(cls, data_obj: StringIO) -> bool: - data_obj.seek(0) - sample = data_obj.readline() # trying to get dialect from header - try: - data_obj.seek(0) - csv.Sniffer().sniff(sample) - - # Avoid a false-positive for json files - if cls.is_json(data_obj): - return False - return True - except Exception: - return False - - @staticmethod - def is_parquet(data: IOBase) -> bool: - # Check first and last 4 bytes equal to PAR1. - # Refer: https://parquet.apache.org/docs/file-format/ - parquet_sig = b"PAR1" - data.seek(0, 0) - start_meta = data.read(4) - data.seek(-4, 2) - end_meta = data.read() - data.seek(0) - if start_meta == parquet_sig and end_meta == parquet_sig: - return True - return False - - -def format_column_names(df: pd.DataFrame): - df.columns = [column.strip(" \t") for column in df.columns] - if len(df.columns) != len(set(df.columns)) or any(len(column_name) == 0 for column_name in df.columns): - raise FileProcessingError("Each column should have a unique and non-empty name.") - - -class FileReader(FormatDetector): - def _get_fnc(self): - format = self.get_format() - func = getattr(self, f"read_{format}", None) - if func is None: - raise FileProcessingError(f"Unsupported format: {format}") - - if format in astuple(MULTI_PAGE_FORMAT): - - def format_multipage(*args, **kwargs): - for page_number, df in func(*args, **kwargs): - format_column_names(df) - yield page_number, df - - return format_multipage - - def format_singlepage(*args, **kwargs) -> pd.DataFrame: - """Check that the columns have unique not-empty names""" - df = func(*args, **kwargs) - format_column_names(df) - return df - - return format_singlepage - - def get_pages(self, **kwargs) -> List[str]: - """ - Get list of tables in file - """ - format = self.get_format() - if format not in self.multipage_formats: - # only one table - return ["main"] - - func = self._get_fnc() - self.file_obj.seek(0) - - return [name for name, _ in func(self.file_obj, only_names=True, **kwargs)] - - def get_contents(self, **kwargs) -> dict[str, pd.DataFrame]: - """ - Get all info(pages with content) from file as dict: {tablename, content} - """ - func = self._get_fnc() - self.file_obj.seek(0) - - format = self.get_format() - if format not in self.multipage_formats: - # only one table - return {"main": func(self.file_obj, name=self.name, **kwargs)} - - return {name: df for name, df in func(self.file_obj, **kwargs)} - - def get_page_content(self, page_name: str | None = None, **kwargs) -> pd.DataFrame: - """ - Get content of a single table - """ - func = self._get_fnc() - self.file_obj.seek(0) - - format = self.get_format() - if format not in self.multipage_formats: - # only one table - return func(self.file_obj, name=self.name, **kwargs) - - for _, df in func(self.file_obj, name=self.name, page_name=page_name, **kwargs): - return df - - @staticmethod - def _get_csv_dialect(buffer, delimiter: str | None = None) -> csv.Dialect | None: - sample = buffer.readline() # trying to get dialect from header - buffer.seek(0) - try: - if isinstance(sample, bytes): - sample = sample.decode() - - if delimiter is not None: - accepted_csv_delimiters = [delimiter] - else: - accepted_csv_delimiters = [",", "\t", ";"] - try: - dialect = csv.Sniffer().sniff(sample, delimiters=accepted_csv_delimiters) - dialect.doublequote = True # assume that all csvs have " as string escape - except Exception: - dialect = csv.reader(sample).dialect - if dialect.delimiter not in accepted_csv_delimiters: - raise FileProcessingError(f"CSV delimeter '{dialect.delimiter}' is not supported") - - except csv.Error: - dialect = None - return dialect - - @classmethod - def read_csv(cls, file_obj: BytesIO, delimiter: str | None = None, **kwargs) -> pd.DataFrame: - file_obj = decode(file_obj) - dialect = cls._get_csv_dialect(file_obj, delimiter=delimiter) - return pd.read_csv(file_obj, sep=dialect.delimiter, index_col=False) - - @staticmethod - def read_txt(file_obj: BytesIO, name: str | None = None, **kwargs) -> pd.DataFrame: - # the lib is heavy, so import it only when needed - - file_obj = decode(file_obj) - - text = file_obj.read() - - text_splitter = TextSplitter(chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP) - - docs = text_splitter.split_text(text) - return pd.DataFrame([{"content": doc, "metadata": {"source_file": name, "file_format": "txt"}} for doc in docs]) - - @staticmethod - def read_pdf(file_obj: BytesIO, name: str | None = None, **kwargs) -> pd.DataFrame: - # the libs are heavy, so import it only when needed - import fitz # pymupdf - - with fitz.open(stream=file_obj.read()) as pdf: # open pdf - text = chr(12).join([page.get_text() for page in pdf]) - - text_splitter = TextSplitter(chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP) - - split_text = text_splitter.split_text(text) - - return pd.DataFrame( - { - "content": split_text, - "metadata": [{"file_format": "pdf", "source_file": name}] * len(split_text), - } - ) - - @staticmethod - def read_json(file_obj: BytesIO, **kwargs) -> pd.DataFrame: - file_obj = decode(file_obj) - file_obj.seek(0) - json_doc = json.loads(file_obj.read()) - return pd.json_normalize(json_doc, max_level=0) - - @staticmethod - def read_parquet(file_obj: BytesIO, **kwargs) -> pd.DataFrame: - return pd.read_parquet(file_obj) - - @staticmethod - def read_xlsx( - file_obj: BytesIO, - page_name: str | None = None, - only_names: bool = False, - **kwargs, - ) -> Generator[tuple[str, pd.DataFrame | None], None, None]: - with pd.ExcelFile(file_obj) as xls: - if page_name is not None: - # return specific page - yield page_name, pd.read_excel(xls, sheet_name=page_name) - - for page_name in xls.sheet_names: - if only_names: - # extract only pages names - df = None - else: - df = pd.read_excel(xls, sheet_name=page_name) - yield page_name, df - - @staticmethod - def read_xls( - file_obj: BytesIO, - page_name: str | None = None, - only_names: bool = False, - **kwargs, - ): - return FileReader.read_xlsx(file_obj, page_name=page_name, only_names=only_names, **kwargs) diff --git a/mindsdb/integrations/utilities/handler_utils.py b/mindsdb/integrations/utilities/handler_utils.py deleted file mode 100644 index 44bf5427a51..00000000000 --- a/mindsdb/integrations/utilities/handler_utils.py +++ /dev/null @@ -1,121 +0,0 @@ -import os -from typing import Dict - -from mindsdb.interfaces.storage.model_fs import HandlerStorage -from mindsdb.utilities.config import Config - -"""Contains utilities to be used by handlers.""" - - -def get_api_key( - api_name: str, - create_args: Dict[str, str], - engine_storage: HandlerStorage = None, - strict: bool = True, -): - """Gets the API key needed to use an ML Handler. - - Args: - api_name (str): Name of the API (e.g. openai, anthropic) - create_args (Dict[str, str]): Args user passed to the created model with USING keyword - engine_storage (HandlerStorage): Engine storage for the ML handler - strict (bool): Whether or not to require the API key - - Returns: - api_key (str): The API key - - API_KEY preference order: - 1. provided at inference - 2. provided at model creation - 3. provided at engine creation - 4. api key env variable - 5. api_key setting in config.json - """ - # Special case for vLLM - always return dummy key - if api_name == "vllm": - return "EMPTY" - - # 1 - if "using" in create_args and f"{api_name.lower()}_api_key" in create_args["using"]: - api_key = create_args["using"][f"{api_name.lower()}_api_key"] - if api_key: - return api_key - - # 1.5 - Check for generic api_key in using - if "using" in create_args and "api_key" in create_args["using"]: - api_key = create_args["using"]["api_key"] - if api_key: - return api_key - - # 2 - if f"{api_name.lower()}_api_key" in create_args: - api_key = create_args[f"{api_name.lower()}_api_key"] - if api_key: - return api_key - - # 2.5 - Check for generic api_key - if "api_key" in create_args: - api_key = create_args["api_key"] - if api_key: - return api_key - - # 3 - Check in params dictionary if it exists (for agents) - if "params" in create_args and create_args["params"] is not None: - if f"{api_name.lower()}_api_key" in create_args["params"]: - api_key = create_args["params"][f"{api_name.lower()}_api_key"] - if api_key: - return api_key - # 3.5 - Check for generic api_key in params - if "api_key" in create_args["params"]: - api_key = create_args["params"]["api_key"] - if api_key: - return api_key - - # 4 - if engine_storage is not None: - connection_args = engine_storage.get_connection_args() - if f"{api_name.lower()}_api_key" in connection_args: - api_key = connection_args[f"{api_name.lower()}_api_key"] - if api_key: - return api_key - # 4.5 - Check for generic api_key in connection_args - if "api_key" in connection_args: - api_key = connection_args["api_key"] - if api_key: - return api_key - - # 5 - api_key = os.getenv(f"{api_name.lower()}_api_key") - if api_key: - return api_key - api_key = os.getenv(f"{api_name.upper()}_API_KEY") - if api_key: - return api_key - - # 6 - config = Config() - api_cfg = config.get(api_name, {}) - if f"{api_name.lower()}_api_key" in api_cfg: - api_key = api_cfg[f"{api_name.lower()}_api_key"] - if api_key: - return api_key - - # 7 - if "api_keys" in create_args and api_name in create_args["api_keys"]: - api_key = create_args["api_keys"][api_name] - if api_key: - return api_key - - if strict: - provider_upper = api_name.upper() - api_key_env_var = f"{provider_upper}_API_KEY" - api_key_arg = f"{api_name.lower()}_api_key" - error_message = ( - f"API key for {api_name} not found. Please provide it using one of the following methods:\n" - f"1. Set the {api_key_env_var} environment variable\n" - f"2. Provide it as '{api_key_arg}' parameter or 'api_key' parameter when creating an agent using the CREATE AGENT syntax\n" - f" Example: CREATE AGENT my_agent USING model='gpt-4', provider='{api_name}', {api_key_arg}='your-api-key';\n" - f" Or: CREATE AGENT my_agent USING model='gpt-4', provider='{api_name}', api_key='your-api-key';\n" - ) - raise Exception(error_message) - return None diff --git a/mindsdb/integrations/utilities/handlers/__init__.py b/mindsdb/integrations/utilities/handlers/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/handlers/api_utilities/__init__.py b/mindsdb/integrations/utilities/handlers/api_utilities/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/handlers/api_utilities/microsoft/__init__.py b/mindsdb/integrations/utilities/handlers/api_utilities/microsoft/__init__.py deleted file mode 100644 index 21a69e7f7b4..00000000000 --- a/mindsdb/integrations/utilities/handlers/api_utilities/microsoft/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .ms_graph_api_utilities import MSGraphAPIBaseClient \ No newline at end of file diff --git a/mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py b/mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py deleted file mode 100644 index 0e4781d8d27..00000000000 --- a/mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +++ /dev/null @@ -1,166 +0,0 @@ -import requests -import time -from typing import Dict, Generator, List, Optional, Text, Union - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class MSGraphAPIBaseClient: - """ - The base class for the Microsoft Graph API clients. - This class contains common methods for accessing the Microsoft Graph API. - - Attributes: - MICROSOFT_GRAPH_BASE_API_URL (Text): The base URL of the Microsoft Graph API. - MICROSOFT_GRAPH_API_VERSION (Text): The version of the Microsoft Graph API. - PAGINATION_COUNT (Optional[int]): The number of items to retrieve per request. - """ - MICROSOFT_GRAPH_BASE_API_URL: Text = "https://graph.microsoft.com/" - MICROSOFT_GRAPH_API_VERSION: Text = "v1.0" - PAGINATION_COUNT: Optional[int] = 20 - - def __init__(self, access_token: Text) -> None: - """ - Initializes the Microsoft Graph API client. - - Args: - access_token (Text): The access token for authenticating the requests to the Microsoft Graph API. - """ - self.access_token = access_token - self._group_ids = None - - def _get_api_url(self, endpoint: Text) -> Text: - """ - Constructs the API URL for the specified endpoint. - - Args: - endpoint (Text): The endpoint of the Microsoft Graph API. - - Returns: - Text: The fully constructed API URL. - """ - api_url = f"{self.MICROSOFT_GRAPH_BASE_API_URL}{self.MICROSOFT_GRAPH_API_VERSION}/{endpoint}/" - return api_url - - def _make_request( - self, - api_url: Text, - params: Optional[Dict] = None, - data: Optional[Dict] = None, - method: Text = "GET" - ) -> Union[Dict, object]: - """ - Makes a request to the Microsoft Graph API. - - Args: - api_url (Text): The API URL to make the request to. - params (Optional[Dict]): The parameters to include in the request. - data (Optional[Dict]): The data to include in the request. - method (Text): The HTTP method to use for the request. - - Returns: - Union[Dict, object]: The response content of the request. - """ - headers = {"Authorization": f"Bearer {self.access_token}"} - - # Make the request to the Microsoft Graph API based on the method. - if method == "GET": - response = requests.get(api_url, headers=headers, params=params) - elif method == "POST": - response = requests.post(api_url, headers=headers, json=data) - else: - raise NotImplementedError(f"Method {method} not implemented") - - # Process the response. - # If the response is a 429 (rate limit exceeded), wait for the specified time and retry. - if response.status_code == 429: - if "Retry-After" in response.headers: - pause_time = float(response.headers["Retry-After"]) - time.sleep(pause_time) - response = requests.get(api_url, headers=headers, params=params) - - # If the response is not successful, raise an exception. - if response.status_code not in [200, 201]: - raise requests.exceptions.RequestException(response.text) - - return response - - def fetch_paginated_data(self, endpoint: Text, params: Optional[Dict] = None) -> Generator: - """ - Fetches data from the Microsoft Graph API by making the specified request and handling pagination. - - Args: - endpoint (str): The endpoint of the Microsoft Graph API to fetch data from. - params (Optional[Dict]): The parameters to include in the request. - - Yields: - List: The data fetched from the Microsoft Graph API. - """ - if params is None: - params = {} - api_url = self._get_api_url(endpoint) - - # Add the pagination count to the request parameters. - if "$top" not in params: - params["$top"] = self.PAGINATION_COUNT - - while api_url: - # Make the initial request to the Microsoft Graph API. - response = self._make_request(api_url, params) - response_json = response.json() - value = response.json()["value"] - - # Get the next page of data if pagination is enabled. - params = None - api_url = response_json.get("@odata.nextLink", "") - yield value - - def _fetch_data(self, endpoint: str, params: Optional[Dict] = {}) -> Union[List, Dict, bytes]: - """ - Fetches data from the Microsoft Graph API by making the specified request. - - Args: - endpoint (str): The endpoint of the Microsoft Graph API to fetch data from. - params (Optional[Dict]): The parameters to include in the request. - - Returns: - Union[List, Dict, bytes]: The data fetched from the Microsoft Graph API. - """ - api_url = self._get_api_url(endpoint) - - response = self._make_request(api_url, params) - return response - - def fetch_data_content(self, endpoint: str, params: Optional[Dict] = {}) -> bytes: - """ - Fetches data content from the Microsoft Graph API by making the specified request. - - Args: - endpoint (str): The endpoint of the Microsoft Graph API to fetch data from. - params (Optional[Dict]): The parameters to include in the request. - - Returns: - bytes: The data content fetched from the Microsoft Graph API. - """ - response = self._fetch_data(endpoint, params) - return response.content - - def fetch_data_json(self, endpoint: str, params: Optional[Dict] = {}) -> Union[List, Dict]: - """ - Fetches data from the Microsoft Graph API by making the specified request and returns the JSON response. - - Args: - endpoint (str): The endpoint of the Microsoft Graph API to fetch data from. - params (Optional[Dict]): The parameters to include in the request. - - Returns: - Union[List, Dict]: The JSON response fetched from the Microsoft Graph API. - """ - response = self._fetch_data(endpoint, params) - response_json = response.json() - - if "value" in response_json: - return response_json["value"] - return response_json diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py b/mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/exceptions.py b/mindsdb/integrations/utilities/handlers/auth_utilities/exceptions.py deleted file mode 100644 index f4bfa65d819..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/exceptions.py +++ /dev/null @@ -1,11 +0,0 @@ - -class AuthException(Exception): - def __init__(self, message, auth_url=None): - super().__init__(message) - - self.auth_url = auth_url - - -class NoCredentialsException(Exception): - def __init__(self, message): - super().__init__(message) diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/google/__init__.py b/mindsdb/integrations/utilities/handlers/auth_utilities/google/__init__.py deleted file mode 100644 index 3469b4d6a40..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/google/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .google_user_oauth_utilities import GoogleUserOAuth2Manager -from .google_service_account_oauth_utilities import GoogleServiceAccountOAuth2Manager \ No newline at end of file diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/google/google_service_account_oauth_utilities.py b/mindsdb/integrations/utilities/handlers/auth_utilities/google/google_service_account_oauth_utilities.py deleted file mode 100644 index 5f9f163ac1c..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/google/google_service_account_oauth_utilities.py +++ /dev/null @@ -1,59 +0,0 @@ -import json -import requests -from typing import Union -from google.oauth2 import service_account - -from mindsdb.utilities import log - -from ..exceptions import NoCredentialsException, AuthException - - -logger = log.getLogger(__name__) - - -class GoogleServiceAccountOAuth2Manager: - def __init__(self, credentials_url: str = None, credentials_file: str = None, credentials_json: Union[dict, str] = None) -> None: - # if no credentials provided, raise an exception - if not any([credentials_url, credentials_file, credentials_json]): - raise NoCredentialsException('No valid Google Service Account credentials provided.') - self.credentials_url = credentials_url - self.credentials_file = credentials_file - if credentials_json: - self.credentials_json = self._parse_credentials_json(credentials_json) - else: - self.credentials_json = None - - def get_oauth2_credentials(self): - try: - if self.credentials_url: - creds = service_account.Credentials.from_service_account_info(self._download_credentials_file()) - return creds - - if self.credentials_file: - creds = service_account.Credentials.from_service_account_file(self.credentials_file) - return creds - - if self.credentials_json: - creds = service_account.Credentials.from_service_account_info(self.credentials_json) - return creds - except Exception as e: - raise AuthException(f"Authentication failed: {e}") - - def _download_credentials_file(self): - response = requests.get(self.credentials_url) - # raise a HTTPError if the status is 4xx or 5xx - response.raise_for_status() - - return self._parse_credentials_json(response.json()) - - def _parse_credentials_json(self, credentials_json: str) -> dict: - if isinstance(credentials_json, str): - try: - # attempt to convert to JSON - return json.loads(credentials_json) - except json.JSONDecodeError: - raise ValueError("Failed to parse credentials provided. Please provide a valid service account key.") - else: - # unescape new lines in private_key - credentials_json['private_key'] = credentials_json['private_key'].replace('\\n', '\n') - return credentials_json diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py b/mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py deleted file mode 100644 index c1e374cb0f5..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +++ /dev/null @@ -1,99 +0,0 @@ -import json -from pathlib import Path -import requests -import datetime as dt -from flask import request - -from mindsdb.utilities import log - -from ..exceptions import AuthException - -from google_auth_oauthlib.flow import Flow - -from google.oauth2.credentials import Credentials -from google.auth.transport.requests import Request - -logger = log.getLogger(__name__) - - -class GoogleUserOAuth2Manager: - def __init__(self, handler_stroage: str, scopes: list, credentials_file: str = None, credentials_url: str = None, code: str = None): - self.handler_storage = handler_stroage - self.scopes = scopes - self.credentials_file = credentials_file - self.credentials_url = credentials_url - self.code = code - - def get_oauth2_credentials(self): - creds = None - - if self.credentials_file or self.credentials_url: - oauth_user_info = self.handler_storage.encrypted_json_get('oauth_user_info') - - if oauth_user_info: - creds = Credentials.from_authorized_user_info(oauth_user_info, self.scopes) - - if not creds or not creds.valid: - logger.debug("Credentials do not exist or are invalid, attempting to authorize again") - - oauth_user_info = self._download_oauth_user_info() - - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - logger.debug("Credentials refreshed successfully") - else: - creds = self._execute_google_auth_flow(oauth_user_info) - logger.debug("New credentials obtained") - - self.handler_storage.encrypted_json_set('oauth_user_info', self._convert_credentials_to_dict(creds)) - logger.debug("Saving credentials to storage") - - return creds - - def _download_oauth_user_info(self): - # if credentials_url is set, attempt to download the contents of the files - # this will be given preference over credentials_file - if self.credentials_url: - response = requests.get(self.credentials_url) - if response.status_code == 200: - return response.json() - else: - logger.error("Failed to get credentials from URL", response.status_code) - - # if credentials_file is set, attempt to read the contents of the file - if self.credentials_file: - path = Path(self.credentials_file).expanduser() - if path.exists(): - with open(path, 'r') as f: - return json.load(f) - else: - logger.error("Credentials file does not exist") - - raise ValueError('OAuth2 credentials could not be found') - - def _execute_google_auth_flow(self, oauth_user_info: dict): - flow = Flow.from_client_config( - oauth_user_info, - scopes=self.scopes - ) - - flow.redirect_uri = request.headers['ORIGIN'] + '/verify-auth' - - if self.code: - flow.fetch_token(code=self.code) - creds = flow.credentials - return creds - else: - auth_url = flow.authorization_url()[0] - raise AuthException(f'Authorisation required. Please follow the url: {auth_url}', auth_url=auth_url) - - def _convert_credentials_to_dict(self, credentials): - return { - 'token': credentials.token, - 'refresh_token': credentials.refresh_token, - 'token_uri': credentials.token_uri, - 'client_id': credentials.client_id, - 'client_secret': credentials.client_secret, - 'scopes': credentials.scopes, - 'expiry': dt.datetime.strftime(credentials.expiry, '%Y-%m-%dT%H:%M:%S') - } diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt b/mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt deleted file mode 100644 index 5075b059c90..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -google-auth -google-auth-oauthlib \ No newline at end of file diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/__init__.py b/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/__init__.py deleted file mode 100644 index 5a6e85c7bf5..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .ms_graph_api_auth_utilities import MSGraphAPIApplicationPermissionsManager, MSGraphAPIDelegatedPermissionsManager \ No newline at end of file diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py b/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py deleted file mode 100644 index 88da250fa02..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +++ /dev/null @@ -1,212 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Dict, List, Text - -from flask import request -import msal - -from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) - - -class MSGraphAPIPermissionsManager(ABC): - """ - The base class for managing the delegated permissions for the Microsoft Graph API. - """ - def __init__( - self, - client_id: Text, - client_secret: Text, - tenant_id: Text, - cache: msal.SerializableTokenCache, - scopes: List = ["https://graph.microsoft.com/.default"], - ) -> None: - """ - Initializes the permissions manager. - - Args: - client_id (Text): The client ID of the application registered in Microsoft Entra ID. - client_secret (Text): The client secret of the application registered in Microsoft Entra ID. - tenant_id (Text): The tenant ID of the application registered in Microsoft Entra ID. - cache (msal.SerializableTokenCache): The token cache for storing the access token. - scopes (List): The scopes for the Microsoft Graph API. - code (Text): The authentication code for acquiring the access token. - """ - self.client_id = client_id - self.client_secret = client_secret - self.tenant_id = tenant_id - self.cache = cache - self.scopes = scopes - - @abstractmethod - def get_access_token(self) -> Text: - """ - Retrieves an access token for the Microsoft Graph API. - - Returns: - Text: The access token for the Microsoft Graph API. - """ - pass - - def _get_msal_app(self) -> msal.ConfidentialClientApplication: - """ - Returns an instance of the MSAL ConfidentialClientApplication. - - Returns: - msal.ConfidentialClientApplication: An instance of the MSAL ConfidentialClientApplication. - """ - return msal.ConfidentialClientApplication( - self.client_id, - authority=f"https://login.microsoftonline.com/{self.tenant_id}", - client_credential=self.client_secret, - token_cache=self.cache, - ) - - -class MSGraphAPIDelegatedPermissionsManager(MSGraphAPIPermissionsManager): - """ - The class for managing the delegated permissions for the Microsoft Graph API. - """ - def __init__( - self, - client_id: Text, - client_secret: Text, - tenant_id: Text, - cache: msal.SerializableTokenCache, - scopes: List = ["https://graph.microsoft.com/.default"], - code: Text = None, - ) -> None: - """ - Initializes the delegated permissions manager. - - Args: - client_id (Text): The client ID of the application registered in Microsoft Entra ID. - client_secret (Text): The client secret of the application registered in Microsoft Entra ID. - tenant_id (Text): The tenant ID of the application registered in Microsoft Entra ID. - cache (msal.SerializableTokenCache): The token cache for storing the access token. - scopes (List): The scopes for the Microsoft Graph API. - code (Text): The authentication code for acquiring the access token. - """ - super().__init__(client_id, client_secret, tenant_id, cache, scopes) - self.code = code - self.redirect_uri = None - self._set_redirect_uri() - - def _set_redirect_uri(self) -> None: - """ - Sets the redirect URI based on the request origin. - - Raises: - AuthException: If the request origin could not be determined. - """ - # Set the redirect URI based on the request origin. - # If the request origin is 127.0.0.1 (localhost), replace it with localhost. - # This is done because the only HTTP origin allowed in Microsoft Entra ID app registration is localhost. - try: - request_origin = request.headers.get('ORIGIN') or (request.scheme + '://' + request.host) - if not request_origin: - raise AuthException('Request origin could not be determined!') - except RuntimeError: - # if it is outside of request context (streaming in agent) - request_origin = '' - - request_origin = request_origin.replace('127.0.0.1', 'localhost') if 'http://127.0.0.1' in request_origin else request_origin - self.redirect_uri = request_origin + '/verify-auth' - - def get_access_token(self) -> Text: - """ - Retrieves an access token for the Microsoft Graph API. - If a valid access token is found in the cache, it is returned. - Otherwise, the authentication flow is executed. - - Returns: - Text: The access token for the Microsoft Graph API. - """ - # Check if a valid access token is already in the cache for the signed-in user. - msal_app = self._get_msal_app() - accounts = msal_app.get_accounts() - - if accounts: - response = msal_app.acquire_token_silent(self.scopes, account=accounts[0]) - if "access_token" in response: - return response['access_token'] - - # If no valid access token is found in the cache, run the authentication flow. - response = self._execute_ms_graph_api_auth_flow() - - if "access_token" in response: - return response['access_token'] - # If no access token is returned, raise an exception. - # This is the expected behaviour when the user attempts to authenticate for the first time. - else: - raise AuthException( - f'Error getting access token: {response.get("error_description")}', - auth_url=response.get('auth_url') - ) - - def _execute_ms_graph_api_auth_flow(self) -> Dict: - """ - Executes the authentication flow for the Microsoft Graph API. - If the authentication code is provided, the token is acquired by authorization code. - Otherwise, the authorization request URL is returned. - - Raises: - AuthException: If the authentication code is not provided - - Returns: - Dict: The response from the Microsoft Graph API authentication flow. - """ - msal_app = self._get_msal_app() - - # If the authentication code is provided, acquire the token by authorization code. - if self.code: - response = msal_app.acquire_token_by_authorization_code( - code=self.code, - scopes=self.scopes, - redirect_uri=self.redirect_uri - ) - - return response - - # If the authentication code is not provided, get the authorization request URL. - else: - auth_url = msal_app.get_authorization_request_url( - scopes=self.scopes, - redirect_uri=self.redirect_uri - ) - - raise AuthException(f'Authorisation required. Please follow the url: {auth_url}', auth_url=auth_url) - - -class MSGraphAPIApplicationPermissionsManager(MSGraphAPIPermissionsManager): - """ - The class for managing application permissions for the Microsoft Graph API. - """ - - def get_access_token(self) -> Text: - """ - Retrieves an access token for the Microsoft Graph API using the client credentials flow. - - Returns: - Text: The access token for the Microsoft Graph API. - """ - msal_app = self._get_msal_app() - - # Check if a valid access token is already in the cache. - accounts = msal_app.get_accounts() - if accounts: - response = msal_app.acquire_token_silent(self.scopes, account=accounts[0]) - if "access_token" in response: - return response["access_token"] - - # If no valid access token is found in the cache, acquire a new token using client credentials. - response = msal_app.acquire_token_for_client(scopes=self.scopes) - - if "access_token" in response: - return response["access_token"] - else: - raise AuthException( - f"Error getting access token: {response.get('error_description')}" - ) diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/requirements.txt b/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/requirements.txt deleted file mode 100644 index 5d86e67ca54..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -msal \ No newline at end of file diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py b/mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py deleted file mode 100644 index ffff904881d..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .snowflake_jwt_gen import get_validated_jwt as get_validated_jwt diff --git a/mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py b/mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py deleted file mode 100644 index a1dc4c8c21f..00000000000 --- a/mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +++ /dev/null @@ -1,151 +0,0 @@ -# Based on https://docs.snowflake.com/en/developer-guide/sql-api/authenticating - -import time -import base64 -import hashlib -import logging -from datetime import timedelta, timezone, datetime - -from cryptography.hazmat.primitives.serialization import load_pem_private_key -from cryptography.hazmat.primitives.serialization import Encoding -from cryptography.hazmat.primitives.serialization import PublicFormat -from cryptography.hazmat.backends import default_backend -import jwt - -logger = logging.getLogger(__name__) - -ISSUER = "iss" -EXPIRE_TIME = "exp" -ISSUE_TIME = "iat" -SUBJECT = "sub" - - -class JWTGenerator(object): - """ - Creates and signs a JWT with the specified private key file, username, and account identifier. The JWTGenerator keeps the - generated token and only regenerates the token if a specified period of time has passed. - """ - - LIFETIME = timedelta(minutes=60) # The tokens will have a 59 minute lifetime - ALGORITHM = "RS256" # Tokens will be generated using RSA with SHA256 - - def __init__(self, account: str, user: str, private_key: str, lifetime: timedelta = LIFETIME): - """ - __init__ creates an object that generates JWTs for the specified user, account identifier, and private key. - :param account: Your Snowflake account identifier. See https://docs.snowflake.com/en/user-guide/admin-account-identifier.html. Note that if you are using the account locator, exclude any region information from the account locator. - :param user: The Snowflake username. - :param private_key: The private key file used for signing the JWTs. - :param lifetime: The number of minutes (as a timedelta) during which the key will be valid. - """ - - logger.info( - """Creating JWTGenerator with arguments - account : %s, user : %s, lifetime : %s""", - account, - user, - lifetime, - ) - - # Construct the fully qualified name of the user in uppercase. - self.account = self.prepare_account_name_for_jwt(account) - self.user = user.upper() - self.qualified_username = self.account + "." + self.user - - self.lifetime = lifetime - self.renew_time = datetime.now(timezone.utc) - self.token = None - - self.private_key = load_pem_private_key(private_key.encode(), None, default_backend()) - - def prepare_account_name_for_jwt(self, raw_account: str) -> str: - """ - Prepare the account identifier for use in the JWT. - For the JWT, the account identifier must not include the subdomain or any region or cloud provider information. - :param raw_account: The specified account identifier. - :return: The account identifier in a form that can be used to generate JWT. - """ - account = raw_account - if ".global" not in account: - # Handle the general case. - idx = account.find(".") - if idx > 0: - account = account[0:idx] - else: - # Handle the replication case. - idx = account.find("-") - if idx > 0: - account = account[0:idx] - # Use uppercase for the account identifier. - return account.upper() - - def get_token(self) -> str: - """ - Generates a new JWT. - :return: the new token - """ - now = datetime.now(timezone.utc) # Fetch the current time - - # Prepare the fields for the payload. - # Generate the public key fingerprint for the issuer in the payload. - public_key_fp = self.calculate_public_key_fingerprint(self.private_key) - - # Create our payload - payload = { - # Set the issuer to the fully qualified username concatenated with the public key fingerprint. - ISSUER: self.qualified_username + "." + public_key_fp, - # Set the subject to the fully qualified username. - SUBJECT: self.qualified_username, - # Set the issue time to now. - ISSUE_TIME: now, - # Set the expiration time, based on the lifetime specified for this object. - EXPIRE_TIME: now + self.lifetime, - } - - # Regenerate the actual token - token = jwt.encode(payload, key=self.private_key, algorithm=JWTGenerator.ALGORITHM) - # If you are using a version of PyJWT prior to 2.0, jwt.encode returns a byte string, rather than a string. - # If the token is a byte string, convert it to a string. - if isinstance(token, bytes): - token = token.decode("utf-8") - self.token = token - - return self.token - - def calculate_public_key_fingerprint(self, private_key: str) -> str: - """ - Given a private key in PEM format, return the public key fingerprint. - :param private_key: private key string - :return: public key fingerprint - """ - # Get the raw bytes of public key. - public_key_raw = private_key.public_key().public_bytes(Encoding.DER, PublicFormat.SubjectPublicKeyInfo) - - # Get the sha256 hash of the raw bytes. - sha256hash = hashlib.sha256() - sha256hash.update(public_key_raw) - - # Base64-encode the value and prepend the prefix 'SHA256:'. - public_key_fp = "SHA256:" + base64.b64encode(sha256hash.digest()).decode("utf-8") - logger.info("Public key fingerprint is %s", public_key_fp) - - return public_key_fp - - -def get_validated_jwt(token: str, account: str, user: str, private_key: str) -> str: - try: - content = jwt.decode(token, algorithms=[JWTGenerator.ALGORITHM], options={"verify_signature": False}) - - expired = content.get("exp", 0) - # add 5 seconds before limit - if expired - 5 > time.time(): - # keep the same - return token - - except jwt.DecodeError: - # wrong key - ... - - # generate new token - if private_key is None: - raise ValueError("Private key is missing") - return JWTGenerator(account, user, private_key).get_token() diff --git a/mindsdb/integrations/utilities/handlers/query_utilities/__init__.py b/mindsdb/integrations/utilities/handlers/query_utilities/__init__.py deleted file mode 100644 index 53ff8561282..00000000000 --- a/mindsdb/integrations/utilities/handlers/query_utilities/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .select_query_utilities import SELECTQueryParser, SELECTQueryExecutor -from .insert_query_utilities import INSERTQueryParser -from .update_query_utilities import UPDATEQueryParser, UPDATEQueryExecutor -from .delete_query_utilities import DELETEQueryParser, DELETEQueryExecutor diff --git a/mindsdb/integrations/utilities/handlers/query_utilities/base_query_utilities.py b/mindsdb/integrations/utilities/handlers/query_utilities/base_query_utilities.py deleted file mode 100644 index 3eb6653fe3f..00000000000 --- a/mindsdb/integrations/utilities/handlers/query_utilities/base_query_utilities.py +++ /dev/null @@ -1,63 +0,0 @@ -import pandas as pd -from typing import Text, List -from mindsdb_sql_parser import ast -from abc import ABC, abstractmethod -from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, filter_dataframe - - -class BaseQueryParser(ABC): - """ - Parses a SQL query into its component parts. - - Parameters - ---------- - query : ast - Given SQL query. - """ - - def __init__(self, query: ast): - self.query = query - - @abstractmethod - def parse_query(self): - """ - Parses a SQL statement into its components. - """ - pass - - def parse_where_clause(self) -> List[List[Text]]: - """ - Parses the WHERE clause of the query. - """ - where_conditions = extract_comparison_conditions(self.query.where) - return where_conditions - - -class BaseQueryExecutor(): - """ - Executes a SQL query. - - Parameters - ---------- - query : ast - Given SQL query. - """ - - def __init__(self, df: pd.DataFrame, where_conditions: List[List[Text]]): - self.df = df - self.where_conditions = where_conditions - - def execute_query(self): - """ - Executes the SQL query. - """ - self.execute_where_clause() - - return self.df - - def execute_where_clause(self): - """ - Executes the where clause of the query. - """ - if len(self.where_conditions) > 0: - self.df = filter_dataframe(self.df, self.where_conditions) diff --git a/mindsdb/integrations/utilities/handlers/query_utilities/delete_query_utilities.py b/mindsdb/integrations/utilities/handlers/query_utilities/delete_query_utilities.py deleted file mode 100644 index 036ae3d7d6e..00000000000 --- a/mindsdb/integrations/utilities/handlers/query_utilities/delete_query_utilities.py +++ /dev/null @@ -1,44 +0,0 @@ -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.handlers.query_utilities.base_query_utilities import BaseQueryParser -from mindsdb.integrations.utilities.handlers.query_utilities.base_query_utilities import BaseQueryExecutor - - -class DELETEQueryParser(BaseQueryParser): - """ - Parses a DELETE query into its component parts. - - Parameters - ---------- - query : ast.Delete - Given SQL DELETE query. - """ - def __init__(self, query: ast.Delete): - super().__init__(query) - - def parse_query(self): - """ - Parses a SQL DELETE statement into its components: WHERE. - """ - where_conditions = self.parse_where_clause() - - return where_conditions - - -class DELETEQueryExecutor(BaseQueryExecutor): - """ - Executes a DELETE query. - - Parameters - ---------- - df : pd.DataFrame - Given table. - where_conditions : List[List[Text]] - WHERE conditions of the query. - - NOTE: This class DOES NOT delete the relevant records of the entity for you, it will simply return the records that need to be deleted based on the WHERE conditions. - - This class expects all of the entities to be passed in as a DataFrane and filters out the relevant records based on the WHERE conditions. - Because all of the records need to be extracted to be passed in as a DataFrame, this class is not very computationally efficient. - Therefore, DO NOT use this class if the API/SDK that you are using supports deleting records in bulk. - """ diff --git a/mindsdb/integrations/utilities/handlers/query_utilities/exceptions.py b/mindsdb/integrations/utilities/handlers/query_utilities/exceptions.py deleted file mode 100644 index 8ee132b377d..00000000000 --- a/mindsdb/integrations/utilities/handlers/query_utilities/exceptions.py +++ /dev/null @@ -1,17 +0,0 @@ - -class UnsupportedColumnException(Exception): - """ - Exception raised when a column that is not supported is used in a query. - """ - - -class MandatoryColumnException(Exception): - """ - Exception raised when a mandatory column is missing from a query. - """ - - -class ColumnCountMismatchException(Exception): - """ - Exception raised when the number of columns in the query does not match the number of values. - """ diff --git a/mindsdb/integrations/utilities/handlers/query_utilities/insert_query_utilities.py b/mindsdb/integrations/utilities/handlers/query_utilities/insert_query_utilities.py deleted file mode 100644 index 47d1f5dad46..00000000000 --- a/mindsdb/integrations/utilities/handlers/query_utilities/insert_query_utilities.py +++ /dev/null @@ -1,71 +0,0 @@ -from mindsdb_sql_parser import ast -from typing import Text, List, Dict, Any, Optional - -from .exceptions import UnsupportedColumnException, MandatoryColumnException, ColumnCountMismatchException - - -class INSERTQueryParser: - """ - Parses a INSERT query into its component parts. - - Parameters - ---------- - query : ast.Insert - Given SQL INSERT query. - supported_columns : List[Text], Optional - List of columns supported by the table for inserting. - mandatory_columns : List[Text], Optional - List of columns that must be present in the query for inserting. - all_mandatory : Optional[Any], Optional (default=True) - Whether all mandatory columns must be present in the query. If False, only one of the mandatory columns must be present. - """ - def __init__(self, query: ast.Insert, supported_columns: Optional[List[Text]] = None, mandatory_columns: Optional[List[Text]] = None, all_mandatory: Optional[Any] = True): - self.query = query - self.supported_columns = supported_columns - self.mandatory_columns = mandatory_columns - self.all_mandatory = all_mandatory - - def parse_query(self) -> List[Dict[Text, Any]]: - """ - Parses a SQL INSERT statement into its components: columns, values and returns a list of dictionaries with the values to insert. - """ - columns = self.parse_columns() - values = self.parse_values() - - values_to_insert = [] - for value in values: - if len(columns) != len(value): - raise ColumnCountMismatchException("Number of columns does not match the number of values") - else: - values_to_insert.append(dict(zip(columns, value))) - - return values_to_insert - - def parse_columns(self): - """ - Parses the columns in the query. Raises an exception if the columns are not supported or if mandatory columns are missing. - """ - columns = [col.name for col in self.query.columns] - - if self.supported_columns: - if not set(columns).issubset(self.supported_columns): - unsupported_columns = set(columns).difference(self.supported_columns) - raise UnsupportedColumnException(f"Unsupported columns: {', '.join(unsupported_columns)}") - - if self.mandatory_columns: - if self.all_mandatory: - if not set(self.mandatory_columns).issubset(columns): - missing_mandatory_columns = set(self.mandatory_columns).difference(columns) - raise MandatoryColumnException(f"Mandatory columns missing: {', '.join(missing_mandatory_columns)}") - else: - if not set(self.mandatory_columns).intersection(columns): - missing_mandatory_columns = set(self.mandatory_columns).difference(columns) - raise MandatoryColumnException(f"Mandatory columns missing: {', '.join(missing_mandatory_columns)}") - - return columns - - def parse_values(self): - """ - Parses the values in the query. - """ - return self.query.values diff --git a/mindsdb/integrations/utilities/handlers/query_utilities/select_query_utilities.py b/mindsdb/integrations/utilities/handlers/query_utilities/select_query_utilities.py deleted file mode 100644 index fda7ec78a39..00000000000 --- a/mindsdb/integrations/utilities/handlers/query_utilities/select_query_utilities.py +++ /dev/null @@ -1,145 +0,0 @@ -from typing import Text, List, Dict, Tuple - -import pandas as pd -from mindsdb_sql_parser import ast - -from mindsdb.integrations.utilities.sql_utils import sort_dataframe -from mindsdb.integrations.utilities.handlers.query_utilities.base_query_utilities import BaseQueryParser -from mindsdb.integrations.utilities.handlers.query_utilities.base_query_utilities import BaseQueryExecutor - - -class SELECTQueryParser(BaseQueryParser): - """ - Parses a SELECT query into its component parts. - - Parameters - ---------- - query : ast.Select - Given SQL SELECT query. - table : Text - Name of the table to query. - columns : List[Text] - List of columns in the table. - """ - - def __init__(self, query: ast.Select, table: Text, columns: List[Text]): - super().__init__(query) - self.table = table - self.columns = columns - - def parse_query(self) -> Tuple[List[Text], List[List[Text]], Dict[Text, List[Text]], int]: - """ - Parses a SQL SELECT statement into its components: SELECT, WHERE, ORDER BY, LIMIT. - """ - selected_columns = self.parse_select_clause() - where_conditions = self.parse_where_clause() - order_by_conditions = self.parse_order_by_clause() - result_limit = self.parse_limit_clause() - - return selected_columns, where_conditions, order_by_conditions, result_limit - - def parse_select_clause(self) -> List[Text]: - """ - Parses the SELECT (column selection) clause of the query. - """ - selected_columns = [] - for target in self.query.targets: - if isinstance(target, ast.Star): - selected_columns = self.columns - break - elif isinstance(target, ast.Identifier): - selected_columns.append(target.parts[-1]) - elif isinstance(target, ast.Function): - selected_columns += [arg.parts[-1] for arg in target.args if isinstance(arg, ast.Identifier)] - else: - raise ValueError(f"Unknown query target {type(target)}") - - return selected_columns - - def parse_order_by_clause(self) -> Dict[Text, List[Text]]: - """ - Parses the ORDER BY clause of the query. - """ - if self.query.order_by and len(self.query.order_by) > 0: - return self.query.order_by - else: - return [] - - def parse_limit_clause(self) -> int: - """ - Parses the LIMIT clause of the query. - """ - if self.query.limit: - result_limit = self.query.limit.value - else: - result_limit = 20 - - return result_limit - - -class SELECTQueryExecutor(BaseQueryExecutor): - """ - Executes a SELECT query. - - Parameters - ---------- - df : pd.DataFrame - Dataframe to query. - selected_columns : List[Text] - List of columns to select. - where_conditions : List[List[Text]] - List of where conditions. - order_by_conditions : Dict[Text, List[Text]] - Dictionary of order by conditions. - result_limit : int - Number of results to return. - """ - - def __init__( - self, - df: pd.DataFrame, - selected_columns: List[Text], - where_conditions: List[List[Text]], - order_by_conditions: List, - result_limit: int = None, - ): - super().__init__(df, where_conditions) - self.selected_columns = selected_columns - self.order_by_conditions = order_by_conditions - self.result_limit = result_limit - - def execute_query(self): - """ - Execute the query. - """ - self.execute_limit_clause() - - self.execute_where_clause() - - self.execute_select_clause() - - self.execute_order_by_clause() - - return self.df - - def execute_select_clause(self): - """ - Execute the select clause of the query. - """ - if len(self.df) == 0: - self.df = pd.DataFrame([], columns=self.selected_columns) - else: - self.df = self.df[self.selected_columns] - - def execute_order_by_clause(self): - """ - Execute the order by clause of the query. - """ - self.df = sort_dataframe(self.df, self.order_by_conditions) - - def execute_limit_clause(self): - """ - Execute the limit clause of the query. - """ - if self.result_limit: - self.df = self.df.head(self.result_limit) diff --git a/mindsdb/integrations/utilities/handlers/query_utilities/update_query_utilities.py b/mindsdb/integrations/utilities/handlers/query_utilities/update_query_utilities.py deleted file mode 100644 index 0a0e81865f5..00000000000 --- a/mindsdb/integrations/utilities/handlers/query_utilities/update_query_utilities.py +++ /dev/null @@ -1,67 +0,0 @@ -from mindsdb_sql_parser import ast -from typing import Text, List, Optional - -from .exceptions import UnsupportedColumnException - -from mindsdb.integrations.utilities.handlers.query_utilities.base_query_utilities import BaseQueryParser -from mindsdb.integrations.utilities.handlers.query_utilities.base_query_utilities import BaseQueryExecutor - - -class UPDATEQueryParser(BaseQueryParser): - """ - Parses an UPDATE query into its component parts. - - Parameters - ---------- - query : ast.Update - Given SQL UPDATE query. - supported_columns : List[Text], Optional - List of columns supported by the table for updating. - """ - def __init__(self, query: ast.Update, supported_columns: Optional[List[Text]] = None): - super().__init__(query) - self.supported_columns = supported_columns - - def parse_query(self): - """ - Parses a SQL UPDATE statement into its components: the columns and values to update as a dictionary, and the WHERE conditions. - """ - values_to_update = self.parse_set_clause() - where_conditions = self.parse_where_clause() - - return values_to_update, where_conditions - - def parse_set_clause(self): - """ - Parses the SET clause of the query and returns a dictionary of columns and values to update. - """ - values = list(self.query.update_columns.items()) - - values_to_update = {} - for value in values: - if self.supported_columns: - if value[0] not in self.supported_columns: - raise UnsupportedColumnException(f"Unsupported column: {value[0]}") - - values_to_update[value[0]] = value[1].value - - return values_to_update - - -class UPDATEQueryExecutor(BaseQueryExecutor): - """ - Executes an UPDATE query. - - Parameters - ---------- - df : pd.DataFrame - Given table. - where_conditions : List[List[Text]] - WHERE conditions of the query. - - NOTE: This class DOES NOT update the relevant records of the entity for you, it will simply return the records that need to be updated based on the WHERE conditions. - - This class expects all of the entities to be passed in as a DataFrane and filters out the relevant records based on the WHERE conditions. - Because all of the records need to be extracted to be passed in as a DataFrame, this class is not very computationally efficient. - Therefore, DO NOT use this class if the API/SDK that you are using supports updating records in bulk. - """ diff --git a/mindsdb/integrations/utilities/handlers/validation_utilities/__init__.py b/mindsdb/integrations/utilities/handlers/validation_utilities/__init__.py deleted file mode 100644 index 65b409693fc..00000000000 --- a/mindsdb/integrations/utilities/handlers/validation_utilities/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .parameter_validation_utilities import ParameterValidationUtilities \ No newline at end of file diff --git a/mindsdb/integrations/utilities/handlers/validation_utilities/parameter_validation_utilities.py b/mindsdb/integrations/utilities/handlers/validation_utilities/parameter_validation_utilities.py deleted file mode 100644 index 0f5ebf1ac98..00000000000 --- a/mindsdb/integrations/utilities/handlers/validation_utilities/parameter_validation_utilities.py +++ /dev/null @@ -1,18 +0,0 @@ -import difflib - - -class ParameterValidationUtilities: - @staticmethod - def validate_parameter_spelling(handler_cls, parameters): - expected_params = handler_cls.model_fields.keys() - for key in parameters.keys(): - if key not in expected_params: - close_matches = difflib.get_close_matches( - key, expected_params, cutoff=0.4 - ) - if close_matches: - raise ValueError( - f"Unexpected parameter '{key}'. Did you mean '{close_matches[0]}'?" - ) - else: - raise ValueError(f"Unexpected parameter '{key}'.") diff --git a/mindsdb/integrations/utilities/install.py b/mindsdb/integrations/utilities/install.py deleted file mode 100644 index 9a56b2e4ae4..00000000000 --- a/mindsdb/integrations/utilities/install.py +++ /dev/null @@ -1,145 +0,0 @@ -import os -import sys -import subprocess -from enum import Enum -from typing import Text, List - - -class InstallTool(Enum): - pip = (sys.executable, "-m", "pip") - uv = ("uv", "pip") - - -def install_dependencies(dependencies: List[Text], tool: InstallTool = InstallTool.pip) -> dict: - """ - Installs the dependencies for a handler by calling the `pip install` command via subprocess. - - Args: - dependencies (List[Text]): List of dependencies for the handler. - tool (InstallTool): tool the tool that will be used to install dependencies - - Returns: - dict: A dictionary containing the success status and an error message if an error occurs. - """ - outs = b"" - errs = b"" - result = {"success": False, "error_message": None} - code = None - - try: - # Split the dependencies by parsing the contents of the requirements.txt file. - split_dependencies = parse_dependencies(dependencies) - except FileNotFoundError as file_not_found_error: - result["error_message"] = f"Error parsing dependencies, file not found: {str(file_not_found_error)}" - return result - except Exception as unknown_error: - result["error_message"] = f"Unknown error parsing dependencies: {str(unknown_error)}" - return result - - try: - # Install the dependencies using the selected tool. - sp = subprocess.Popen( - [*tool.value, "install", *split_dependencies], stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - code = sp.wait() - outs, errs = sp.communicate(timeout=1) - except subprocess.TimeoutExpired as timeout_error: - sp.kill() - result["error_message"] = f"Timeout error while installing dependencies: {str(timeout_error)}" - return result - except FileNotFoundError as e: - if e.filename == "uv": - result["error_message"] = "The 'pip' and 'uv' tools are not found. Please install them." - else: - result["error_message"] = f"FileNotFoundError error while installing dependencies: {str(e)}" - return result - except Exception as unknown_error: - result["error_message"] = f"Unknown error while installing dependencies: {str(unknown_error)}" - return result - - # Return the result of the installation if successful, otherwise return an error message. - if code != 0: - output = "" - if isinstance(outs, bytes) and len(outs) > 0: - output = output + "Output: " + outs.decode() - if isinstance(errs, bytes) and len(errs) > 0: - if len(output) > 0: - output = output + "\n" - output = output + "Errors: " + errs.decode() - if "no module named pip" in output.lower() and tool is InstallTool.pip: - # try with uv - return install_dependencies(dependencies, InstallTool.uv) - result["error_message"] = output - else: - result["success"] = True - - return result - - -def parse_dependencies(dependencies: List[Text]) -> List[Text]: - """ - Recursively parses dependencies from a list of dependencies given in a requirements.txt file for a handler. - This function will perform the following: - 1. Ignore standalone comments. - 2. Remove inline comments. - 3. Check if the dependency is a path to a requirements file and recursively parse the dependencies from that file. - - Args: - dependencies (List[Text]): List of dependencies for a handler as read from the requirements.txt file. - - Returns: - List[Text]: List of parsed dependencies for the handler. - """ - # get the path to this script - script_path = os.path.dirname(os.path.realpath(__file__)) - - split_dependencies = [] - for dependency in dependencies: - # ignore standalone comments - if dependency.startswith("#"): - continue - - # remove inline comments - if "#" in dependency: - dependency = dependency.split("#")[0].strip() - - # check if the dependency is a path to a requirements file - if dependency.startswith("-r"): - # get the path to the requirements file - req_path = dependency.split(" ")[1] - # create the absolute path to the requirements file - abs_req_path = os.path.abspath(os.path.join(script_path, req_path.replace("mindsdb/integrations", ".."))) - # check if the file exists - if os.path.exists(abs_req_path): - inner_dependencies, inner_split_dependencies = [], [] - # read the dependencies from the file - inner_dependencies = read_dependencies(abs_req_path) - # recursively split the dependencies - inner_split_dependencies = parse_dependencies(inner_dependencies) - # add the inner dependencies to the split dependencies - split_dependencies.extend(inner_split_dependencies) - else: - raise FileNotFoundError(f"Requirements file not found: {req_path}") - - else: - split_dependencies.append(dependency) - - return split_dependencies - - -def read_dependencies(path: Text) -> List[Text]: - """ - Reads the dependencies for a handler from the relevant requirements.txt file and returns them as a list. - - Args: - path (Text): Path to the requirements.txt file for the handler. - - Returns: - List[Text]: List of dependencies for the handler. - """ - dependencies = [] - # read the dependencies from the file - with open(str(path), "rt") as f: - dependencies = [x.strip(" \t\n") for x in f.readlines()] - dependencies = [x for x in dependencies if len(x) > 0] - return dependencies diff --git a/mindsdb/integrations/utilities/pydantic_utils.py b/mindsdb/integrations/utilities/pydantic_utils.py deleted file mode 100644 index 0e913cf13b6..00000000000 --- a/mindsdb/integrations/utilities/pydantic_utils.py +++ /dev/null @@ -1,208 +0,0 @@ -import pprint - -pydantic_schema_description = """## Understanding Pydantic Schemas for JSON Formatting - -Pydantic schemas provide a framework for defining the structure and validation rules for JSON output. Below is an overview of key components commonly found in a Pydantic schema: - -### Key Components - -Each object in the schema represents a Pydantic model in JSON format. Typical fields in a Pydantic model description include: - -- **`anyOf`**: - - A list describing possible values for a Pydantic model field. - -- **`additionalProperties`**: - - Describes the keys of a dictionary. Keys are always of type `string` due to this being a JSON Pydantic schema. The corresponding key types supported by Pydantic are: - - `string`: a text string - - `integer`: an integer number - - `number`: a floating-point number - -- **`items`**: - - Describes the items contained within an `array` (list). - -- **`type`**: - - Specifies the Pydantic type assigned to the field, defining the expected data type. Common types include: - - `string`: a text string - - `integer`: an integer number - - `number`: a floating-point number - - `array`: a list - - `object`: a dictionary - - `null`: the python null value None. Indicates the field is optional. - -- **`description`**: - - Provides a textual narrative explaining the purpose and details of the output JSON field. - -- **`title`**: - - A Pydantic-generated, human-readable title for the field. - -- **`default`**: - - The default value for this field if no value is provided by the user. - -### Schema - -Below is the Pydantic schema: - -{schema} - -### Examples - -Below is an example of well-formed output adhering to this schema. - -- Dummy text strings are represented as "lorem ipsum." - -{example} -""" - - -def get_dummy_value(field_value): - """A function to return a dummy value of a Pydantic model field.""" - type_str = field_value["type"] - example_dict = { - "string": "lorem ipsum", - "int": 3, - "number": 42.0, - "null": None, - "object": {"lorem ipsum": "lorem_ipsum"}, - } - - if type_str in example_dict: - return example_dict[type_str] - else: - return None - - -def get_dummy_array(field_value): - """A function to return a dummy array of a Pydantic model field.""" - items = field_value["items"] - - if "type" in items: - if items["type"] == "null": # skip if null - pass - elif items["type"] == "array": # is it an array? - array_value = get_dummy_array(items) - elif ( - items["type"] == "object" and "additionalProperties" in items - ): # is it a dict? - array_value = get_dummy_dict(items) - else: # it is a regular value! - array_value = get_dummy_value(items) - return [array_value for _ in range(2)] - - elif "AnyOf" in field_value["items"]: - array_value = get_any_of(field_value["items"]) # can be one of many types - return [array_value for _ in range(2)] - - else: # is it a pydantic class? - array_value = example_generator(items) - return [array_value for _ in range(2)] - - -def get_dummy_dict(field_value): - """A function to return a dummy dictionary of a Pydantic model field.""" - return get_dummy_value(field_value) - - -def get_any_of(field_value): - """A function to return the first viable pydantic type of an Any() Pydantic model field.""" - for any_of in field_value["anyOf"]: - if "type" in any_of: - if any_of["type"] == "null": # skip if null - continue - elif any_of["type"] == "array": # is it an array? - out = get_dummy_array(any_of) - return out - elif ( - any_of["type"] == "object" and "additionalProperties" in any_of - ): # is it a dict? - out = get_dummy_dict(any_of) - return out - else: # it is a regular value! - out = get_dummy_value(any_of) - return out - else: # is it a pydantic class? - out = example_generator(any_of) - return out - - -def example_generator(pydantic_json_schema): - """dynamically parse a pydantic object and generate an example of it's formatting.""" - - example_dict = {} - for schema_name, schema in pydantic_json_schema.items(): - - for field_name, field_value in schema.items(): - if "type" in field_value: - - if field_value["type"] == "array": # is it an array? - example_dict[field_name] = get_dummy_array(field_value) - - elif ( - field_value["type"] == "object" - and "additionalProperties" in field_value - ): # is it a dict? - example_dict[field_name] = get_dummy_dict(field_value) - - else: # it is a regular value! - example_dict[field_name] = get_dummy_value(field_value) - - elif "anyOf" in field_value: - example_dict[field_name] = get_any_of(field_value) - - else: # it is a pydantic class - example_dict[field_name] = example_generator(field_value) - return example_dict - - -def search_and_replace_refs(schema, defs, ref_skip={}, n=0): - """Dynamically substitute subclass references in a Pydantic object schema.""" - for key, value in schema.items(): - if key in ref_skip: - continue - if type(value) is dict: - if "$ref" in value: - definition_key = value["$ref"].split("/")[-1] - if definition_key in ref_skip: - schema[key] = {"type": "null"} - else: - schema[key] = {definition_key: defs[definition_key]["properties"]} - else: - search_and_replace_refs(value, defs, ref_skip, n + 1) - elif type(value) is list: - for val in value: - search_and_replace_refs(val, defs, ref_skip, n + 1) - - -def remove_extraneous_fields(schema, ref_skip): - """Remove extraneous fields from object descriptions.""" - reduced_schema = schema["properties"] - - for ref in ref_skip.keys(): - if ref in reduced_schema: - del reduced_schema[ref] - - for key, value in reduced_schema.items(): - if "title" in value: - del value["title"] - if "$defs" in value: - del value["$defs"] - if "required" in value: - del value["required"] - - return reduced_schema - - -def format_for_prompt(pydantic_object, ref_skip={}): - """Format a Pydantic object description for prompting an LLM.""" - schema = {k: v for k, v in pydantic_object.schema().items()} - - search_and_replace_refs( - schema=schema["properties"], defs=schema["$defs"], ref_skip=ref_skip, n=0 - ) - - reduced_schema = remove_extraneous_fields(schema, ref_skip) - - reduced_schema = {schema["title"]: reduced_schema} - - out = pprint.pformat(reduced_schema) - - return out, reduced_schema diff --git a/mindsdb/integrations/utilities/query_traversal.py b/mindsdb/integrations/utilities/query_traversal.py deleted file mode 100644 index a52b64860c2..00000000000 --- a/mindsdb/integrations/utilities/query_traversal.py +++ /dev/null @@ -1,248 +0,0 @@ -from mindsdb_sql_parser import ast - - -def query_traversal(node, callback, is_table=False, is_target=False, parent_query=None, stack=None): - """ - :param node: element - :param callback: function applied to every element - :param is_table: it is table in query - :param is_target: it is the target in select - :param parent_query: current query (select/update/create/...) where we are now - :return: - new element if it is needed to be replaced - or None to keep element and traverse over it - - Usage: - Create callback function to check or replace nodes - Example: - ```python - def remove_predictors(node, is_table, **kwargs): - if is_table and isinstance(node, Identifier): - if is_predictor(node): - return Constant(None) - - utils.query_traversal(ast_query, remove_predictors) - ``` - - """ - - if stack is None: - stack = [] - - res = callback(node, is_table=is_table, is_target=is_target, parent_query=parent_query, callstack=stack) - stack2 = [node] + stack - - if res is not None: - # node is going to be replaced - return res - - if isinstance(node, ast.Select): - if node.from_table is not None: - node_out = query_traversal(node.from_table, callback, is_table=True, parent_query=node, stack=stack2) - if node_out is not None: - node.from_table = node_out - - array = [] - for node2 in node.targets: - node_out = query_traversal(node2, callback, parent_query=node, is_target=True, stack=stack2) or node2 - if isinstance(node_out, list): - array.extend(node_out) - else: - array.append(node_out) - node.targets = array - - if node.cte is not None: - array = [] - for cte in node.cte: - node_out = query_traversal(cte.query, callback, parent_query=node, stack=stack2) or cte - array.append(node_out) - node.cte = array - - if node.where is not None: - node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.where = node_out - - if node.group_by is not None: - array = [] - for node2 in node.group_by: - node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2 - array.append(node_out) - node.group_by = array - - if node.having is not None: - node_out = query_traversal(node.having, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.having = node_out - - if node.order_by is not None: - array = [] - for node2 in node.order_by: - node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2 - array.append(node_out) - node.order_by = array - - elif isinstance(node, (ast.Union, ast.Intersect, ast.Except)): - node_out = query_traversal(node.left, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.left = node_out - node_out = query_traversal(node.right, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.right = node_out - - elif isinstance(node, ast.Join): - node_out = query_traversal(node.right, callback, is_table=True, parent_query=parent_query, stack=stack2) - if node_out is not None: - node.right = node_out - node_out = query_traversal(node.left, callback, is_table=True, parent_query=parent_query, stack=stack2) - if node_out is not None: - node.left = node_out - if node.condition is not None: - node_out = query_traversal(node.condition, callback, parent_query=parent_query, stack=stack2) - if node_out is not None: - node.condition = node_out - - elif isinstance( - node, (ast.Function, ast.BinaryOperation, ast.UnaryOperation, ast.BetweenOperation, ast.Exists, ast.NotExists) - ): - array = [] - for arg in node.args: - node_out = query_traversal(arg, callback, parent_query=parent_query, stack=stack2) or arg - array.append(node_out) - node.args = array - - if isinstance(node, ast.Function): - if node.from_arg is not None: - node_out = query_traversal(node.from_arg, callback, parent_query=parent_query, stack=stack2) - if node_out is not None: - node.from_arg = node_out - - elif isinstance(node, ast.WindowFunction): - query_traversal(node.function, callback, parent_query=parent_query, stack=stack2) - if node.partition is not None: - array = [] - for node2 in node.partition: - node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2 - array.append(node_out) - node.partition = array - if node.order_by is not None: - array = [] - for node2 in node.order_by: - node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2 - array.append(node_out) - node.order_by = array - - elif isinstance(node, ast.TypeCast): - node_out = query_traversal(node.arg, callback, parent_query=parent_query, stack=stack2) - if node_out is not None: - node.arg = node_out - - elif isinstance(node, ast.Tuple): - array = [] - for node2 in node.items: - node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2 - array.append(node_out) - node.items = array - - elif isinstance(node, ast.Insert): - if node.table is not None: - node_out = query_traversal(node.table, callback, is_table=True, parent_query=node, stack=stack2) - if node_out is not None: - node.table = node_out - - if node.values is not None: - rows = [] - for row in node.values: - items = [] - for item in row: - item2 = query_traversal(item, callback, parent_query=node, stack=stack2) or item - items.append(item2) - rows.append(items) - node.values = rows - - if node.from_select is not None: - node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.from_select = node_out - - elif isinstance(node, ast.Update): - if node.table is not None: - node_out = query_traversal(node.table, callback, is_table=True, parent_query=node, stack=stack2) - if node_out is not None: - node.table = node_out - - if node.where is not None: - node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.where = node_out - - if node.update_columns is not None: - changes = {} - for k, v in node.update_columns.items(): - v2 = query_traversal(v, callback, parent_query=node, stack=stack2) - if v2 is not None: - changes[k] = v2 - if changes: - node.update_columns.update(changes) - - if node.from_select is not None: - node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.from_select = node_out - - elif isinstance(node, ast.CreateTable): - array = [] - if node.columns is not None: - for node2 in node.columns: - node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2 - array.append(node_out) - node.columns = array - - if node.name is not None: - node_out = query_traversal(node.name, callback, is_table=True, parent_query=node, stack=stack2) - if node_out is not None: - node.name = node_out - - if node.from_select is not None: - node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.from_select = node_out - - elif isinstance(node, ast.Delete): - if node.where is not None: - node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2) - if node_out is not None: - node.where = node_out - - elif isinstance(node, ast.OrderBy): - if node.field is not None: - node_out = query_traversal(node.field, callback, parent_query=parent_query, stack=stack2) - if node_out is not None: - node.field = node_out - - elif isinstance(node, ast.Case): - rules = [] - for condition, result in node.rules: - condition2 = query_traversal(condition, callback, parent_query=parent_query, stack=stack2) - result2 = query_traversal(result, callback, parent_query=parent_query, stack=stack2) - - condition = condition if condition2 is None else condition2 - result = result if result2 is None else result2 - rules.append([condition, result]) - node.rules = rules - default = query_traversal(node.default, callback, parent_query=parent_query, stack=stack2) - if default is not None: - node.default = default - arg = query_traversal(node.arg, callback, parent_query=parent_query, stack=stack2) - if arg is not None: - node.arg = arg - - elif isinstance(node, list): - array = [] - for node2 in node: - node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2 - array.append(node_out) - return array - - # keep original node - return None diff --git a/mindsdb/integrations/utilities/rag/__init__.py b/mindsdb/integrations/utilities/rag/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/rag/loaders/__init__.py b/mindsdb/integrations/utilities/rag/loaders/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/rag/loaders/document_loaders.py b/mindsdb/integrations/utilities/rag/loaders/document_loaders.py deleted file mode 100644 index 3a3d6a4f813..00000000000 --- a/mindsdb/integrations/utilities/rag/loaders/document_loaders.py +++ /dev/null @@ -1,268 +0,0 @@ -"""Custom document loaders to replace langchain document loaders""" - -from typing import Iterator - -import pandas as pd - -from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class BaseDocumentLoader: - """Base class for document loaders""" - - def __init__(self, path: str): - self.path = path - - def lazy_load(self) -> Iterator[SimpleDocument]: - """Load documents lazily""" - raise NotImplementedError("Subclasses must implement lazy_load") - - -class CSVDocumentLoader(BaseDocumentLoader): - """Load CSV files and convert rows to documents""" - - def lazy_load(self) -> Iterator[SimpleDocument]: - """Load CSV file and yield each row as a document""" - try: - df = pd.read_csv(self.path) - - for idx, row in df.iterrows(): - # Convert row to text representation - row_text = ", ".join([f"{col}: {val}" for col, val in row.items() if pd.notna(val)]) - - metadata = { - "source": str(self.path), - "row_index": int(idx), - "total_rows": len(df), - } - - yield SimpleDocument(page_content=row_text, metadata=metadata) - except Exception: - logger.exception(f"Error loading CSV file {self.path}:") - raise - - -class PDFDocumentLoader(BaseDocumentLoader): - """Load PDF files using pymupdf (fitz)""" - - def lazy_load(self) -> Iterator[SimpleDocument]: - """Load PDF file and extract text from all pages""" - try: - import fitz # pymupdf - - with fitz.open(self.path) as pdf: - all_text = [] - for page_num, page in enumerate(pdf): - page_text = page.get_text() - all_text.append(page_text) - - # Yield each page as a separate document - metadata = { - "source": str(self.path), - "page": page_num + 1, - "total_pages": len(pdf), - } - yield SimpleDocument(page_content=page_text, metadata=metadata) - except ImportError: - raise ImportError("pymupdf (fitz) is required for PDF loading. Install it with: pip install pymupdf") - except Exception: - logger.exception(f"Error loading PDF file {self.path}:") - raise - - -class TextDocumentLoader(BaseDocumentLoader): - """Load plain text files with encoding detection""" - - def lazy_load(self) -> Iterator[SimpleDocument]: - """Load text file with proper encoding detection""" - try: - from charset_normalizer import from_bytes - - # Read file as bytes first - with open(self.path, "rb") as f: - byte_str = f.read() - - # Detect encoding - encoding_meta = from_bytes( - byte_str[: 32 * 1024], # Sample first 32KB - steps=32, - chunk_size=1024, - explain=False, - ) - - best_meta = encoding_meta.best() - if best_meta is not None: - encoding = best_meta.encoding - else: - encoding = "utf-8" - - # Decode with detected encoding - try: - text = byte_str.decode(encoding) - except UnicodeDecodeError: - # Fallback to utf-8 with error handling - text = byte_str.decode("utf-8", errors="replace") - - metadata = { - "source": str(self.path), - "encoding": encoding, - } - - yield SimpleDocument(page_content=text, metadata=metadata) - except ImportError: - # Fallback to utf-8 if charset_normalizer not available - try: - with open(self.path, "r", encoding="utf-8") as f: - text = f.read() - metadata = { - "source": str(self.path), - "encoding": "utf-8", - } - yield SimpleDocument(page_content=text, metadata=metadata) - except UnicodeDecodeError: - # Try with error replacement - with open(self.path, "r", encoding="utf-8", errors="replace") as f: - text = f.read() - metadata = { - "source": str(self.path), - "encoding": "utf-8 (with replacement)", - } - yield SimpleDocument(page_content=text, metadata=metadata) - except Exception: - logger.exception(f"Error loading text file {self.path}:") - raise - - -class HTMLDocumentLoader(BaseDocumentLoader): - """Load HTML files and extract text content""" - - def lazy_load(self) -> Iterator[SimpleDocument]: - """Load HTML file and extract text content""" - try: - from bs4 import BeautifulSoup - - with open(self.path, "r", encoding="utf-8", errors="replace") as f: - html_content = f.read() - - soup = BeautifulSoup(html_content, "html.parser") - - # Remove script and style elements - for script in soup(["script", "style"]): - script.decompose() - - # Get text - text = soup.get_text(separator="\n", strip=True) - - metadata = { - "source": str(self.path), - "title": soup.title.string if soup.title else None, - } - - yield SimpleDocument(page_content=text, metadata=metadata) - except ImportError: - # Fallback to html.parser from standard library - try: - from html.parser import HTMLParser - - class TextExtractor(HTMLParser): - def __init__(self): - super().__init__() - self.text = [] - self.skip = False - - def handle_starttag(self, tag, attrs): - if tag in ["script", "style"]: - self.skip = True - - def handle_endtag(self, tag): - if tag in ["script", "style"]: - self.skip = False - - def handle_data(self, data): - if not self.skip: - self.text.append(data) - - with open(self.path, "r", encoding="utf-8", errors="replace") as f: - html_content = f.read() - - parser = TextExtractor() - parser.feed(html_content) - text = "\n".join(parser.text) - - metadata = { - "source": str(self.path), - } - - yield SimpleDocument(page_content=text, metadata=metadata) - except Exception: - logger.exception(f"Error loading HTML file {self.path}:") - raise - except Exception: - logger.exception(f"Error loading HTML file {self.path}:") - raise - - -class MarkdownDocumentLoader(BaseDocumentLoader): - """Load Markdown files as text""" - - def lazy_load(self) -> Iterator[SimpleDocument]: - """Load markdown file as plain text""" - try: - from charset_normalizer import from_bytes - - # Read file as bytes first - with open(self.path, "rb") as f: - byte_str = f.read() - - # Detect encoding - encoding_meta = from_bytes( - byte_str[: 32 * 1024], # Sample first 32KB - steps=32, - chunk_size=1024, - explain=False, - ) - - best_meta = encoding_meta.best() - if best_meta is not None: - encoding = best_meta.encoding - else: - encoding = "utf-8" - - # Decode with detected encoding - try: - text = byte_str.decode(encoding) - except UnicodeDecodeError: - # Fallback to utf-8 with error handling - text = byte_str.decode("utf-8", errors="replace") - - metadata = { - "source": str(self.path), - "encoding": encoding, - } - - yield SimpleDocument(page_content=text, metadata=metadata) - except ImportError: - # Fallback to utf-8 if charset_normalizer not available - try: - with open(self.path, "r", encoding="utf-8") as f: - text = f.read() - metadata = { - "source": str(self.path), - "encoding": "utf-8", - } - yield SimpleDocument(page_content=text, metadata=metadata) - except UnicodeDecodeError: - # Try with error replacement - with open(self.path, "r", encoding="utf-8", errors="replace") as f: - text = f.read() - metadata = { - "source": str(self.path), - "encoding": "utf-8 (with replacement)", - } - yield SimpleDocument(page_content=text, metadata=metadata) - except Exception: - logger.exception(f"Error loading markdown file {self.path}:") - raise diff --git a/mindsdb/integrations/utilities/rag/loaders/file_loader.py b/mindsdb/integrations/utilities/rag/loaders/file_loader.py deleted file mode 100644 index 23a3f44c46f..00000000000 --- a/mindsdb/integrations/utilities/rag/loaders/file_loader.py +++ /dev/null @@ -1,51 +0,0 @@ -import pathlib -from typing import Iterator, List - -from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument -from mindsdb.integrations.utilities.rag.loaders.document_loaders import ( - BaseDocumentLoader, - CSVDocumentLoader, - PDFDocumentLoader, - TextDocumentLoader, - HTMLDocumentLoader, - MarkdownDocumentLoader, -) - - -class FileLoader: - """Loads files of various types into vector database document representation""" - - def __init__(self, path: str): - self.path = path - - def _get_loader_from_extension(self, extension: str, path: str) -> BaseDocumentLoader: - """Get appropriate loader based on file extension""" - if extension == ".pdf": - return PDFDocumentLoader(path) - if extension == ".csv": - return CSVDocumentLoader(path) - if extension == ".html": - return HTMLDocumentLoader(path) - if extension == ".md": - return MarkdownDocumentLoader(path) - return TextDocumentLoader(path) - - def _lazy_load_documents_from_file(self, path: str) -> Iterator[SimpleDocument]: - """Load documents from file based on extension""" - file_extension = pathlib.Path(path).suffix - loader = self._get_loader_from_extension(file_extension, path) - - for doc in loader.lazy_load(): - # Ensure extension is in metadata - if "extension" not in doc.metadata: - doc.metadata["extension"] = file_extension - yield doc - - def load(self) -> List[SimpleDocument]: - """Loads a file and converts the contents into a vector database Document representation""" - return list(self.lazy_load()) - - def lazy_load(self) -> Iterator[SimpleDocument]: - """Loads a file and converts the contents into a vector database Document representation""" - for doc in self._lazy_load_documents_from_file(self.path): - yield doc diff --git a/mindsdb/integrations/utilities/rag/rerankers/__init__.py b/mindsdb/integrations/utilities/rag/rerankers/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py b/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py deleted file mode 100644 index 1af44388872..00000000000 --- a/mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +++ /dev/null @@ -1,632 +0,0 @@ -from __future__ import annotations - -import re -import os -import json -import math -import asyncio -import logging -import random -from typing import Any, List, Optional, Tuple - -from openai import AsyncOpenAI, AsyncAzureOpenAI -from pydantic import BaseModel - -from mindsdb.integrations.utilities.rag.settings import ( - DEFAULT_RERANKING_MODEL, - DEFAULT_LLM_ENDPOINT, - DEFAULT_RERANKER_N, - DEFAULT_RERANKER_LOGPROBS, - DEFAULT_RERANKER_TOP_LOGPROBS, - DEFAULT_RERANKER_MAX_TOKENS, - DEFAULT_VALID_CLASS_TOKENS, - RerankerMode, -) - -from mindsdb.interfaces.knowledge_base.providers.bedrock import AsyncBedrockClient -from mindsdb.interfaces.knowledge_base.providers.gemini import GeminiClient -from mindsdb.interfaces.knowledge_base.providers.snowflake import SnowflakeClient - - -log = logging.getLogger(__name__) - - -def get_event_loop(): - try: - loop = asyncio.get_running_loop() - except RuntimeError: - # If no running loop exists, create a new one - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - return loop - - -class BaseLLMReranker(BaseModel): - filtering_threshold: float = 0.0 # Default threshold for filtering - provider: str = "openai" - model: str = DEFAULT_RERANKING_MODEL # Model to use for reranking - temperature: float = 0.0 # Temperature for the model - api_key: Optional[str] = None - base_url: Optional[str] = None - api_version: Optional[str] = None - num_docs_to_keep: Optional[int] = None # How many of the top documents to keep after reranking & compressing. - method: str = "no-logprobs" # Scoring method: 'multi-class' or 'no-logprobs' - mode: RerankerMode = RerankerMode.POINTWISE - _api_key_var: str = "OPENAI_API_KEY" - client: Optional[AsyncOpenAI | AsyncBedrockClient | GeminiClient | SnowflakeClient] = None - _semaphore: Optional[asyncio.Semaphore] = None - max_concurrent_requests: int = 20 - max_retries: int = 4 - retry_delay: float = 1.0 - request_timeout: float = 20.0 # Timeout for API requests - early_stop: bool = True # Whether to enable early stopping - early_stop_threshold: float = 0.8 # Confidence threshold for early stopping - n: int = DEFAULT_RERANKER_N # Number of completions to generate - logprobs: bool = DEFAULT_RERANKER_LOGPROBS # Whether to include log probabilities - top_logprobs: int = DEFAULT_RERANKER_TOP_LOGPROBS # Number of top log probabilities to include - max_tokens: int = DEFAULT_RERANKER_MAX_TOKENS # Maximum tokens to generate - valid_class_tokens: List[str] = DEFAULT_VALID_CLASS_TOKENS - - class Config: - arbitrary_types_allowed = True - extra = "allow" - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._semaphore = asyncio.Semaphore(self.max_concurrent_requests) - self._init_client() - - def _init_client(self): - if self.client is None: - if self.provider == "google": - self.provider = "gemini" - - if self.provider == "azure_openai": - azure_api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY") - azure_api_endpoint = self.base_url or os.environ.get("AZURE_OPENAI_ENDPOINT") - azure_api_version = self.api_version or os.environ.get("AZURE_OPENAI_API_VERSION") - self.client = AsyncAzureOpenAI( - api_key=azure_api_key, - azure_endpoint=azure_api_endpoint, - api_version=azure_api_version, - timeout=self.request_timeout, - max_retries=2, - ) - self.method = "multi-class" - elif self.provider == "bedrock": - kwargs = self.model_extra.copy() - self.client = AsyncBedrockClient(**kwargs) - elif self.provider == "gemini": - self.client = GeminiClient(api_key=self.api_key) - elif self.provider == "snowflake": - kwargs = self.model_extra.copy() - self.client = SnowflakeClient(api_key=self.api_key, **kwargs) - elif self.provider in ("openai", "ollama"): - if self.provider == "ollama": - if self.api_key is None: - self.api_key = "n/a" - else: - self.method = "multi-class" - - api_key_var: str = "OPENAI_API_KEY" - openai_api_key = self.api_key or os.getenv(api_key_var) - if not openai_api_key: - raise ValueError(f"OpenAI API key not found in environment variable {api_key_var}") - - base_url = self.base_url or DEFAULT_LLM_ENDPOINT - self.client = AsyncOpenAI( - api_key=openai_api_key, base_url=base_url, timeout=self.request_timeout, max_retries=2 - ) - else: - raise NotImplementedError(f'Provider "{self.provider}" is not supported') - - async def _call_llm(self, messages) -> str: - if self.provider in ("azure_openai", "openai", "ollama"): - response = await self.client.chat.completions.create( - model=self.model, - messages=messages, - ) - return response.choices[0].message.content - else: - return await self.client.acompletion(model_name=self.model, messages=messages) - - async def _rank(self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None) -> List[Tuple[str, float]]: - ranked_results = [] - - # Process in larger batches for better throughput - batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs)) - for i in range(0, len(query_document_pairs), batch_size): - batch = query_document_pairs[i : i + batch_size] - - results = await asyncio.gather( - *[ - self._backoff_wrapper(query=query, document=document, rerank_callback=rerank_callback) - for (query, document) in batch - ], - return_exceptions=True, - ) - - for idx, result in enumerate(results): - if isinstance(result, Exception): - log.error(f"Error processing document {i + idx}: {str(result)}") - raise RuntimeError(f"Error during reranking: {result}") from result - - score = result["relevance_score"] - - ranked_results.append((batch[idx][1], score)) - - # Check if we should stop early - try: - high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold] - can_stop_early = ( - self.early_stop # Early stopping is enabled - and self.num_docs_to_keep # We have a target number of docs - and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs - and score >= self.early_stop_threshold # Current doc is good enough - ) - - if can_stop_early: - log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence") - return ranked_results - except Exception as e: - # Don't let early stopping errors stop the whole process - log.warning(f"Error in early stopping check: {e}") - - return ranked_results - - async def _backoff_wrapper(self, query: str, document: str, rerank_callback=None) -> Any: - async with self._semaphore: - for attempt in range(self.max_retries): - try: - if self.method == "multi-class": - rerank_data = await self.search_relevancy_score(query, document) - elif self.method == "no-logprobs": - rerank_data = await self.search_relevancy_no_logprob(query, document) - else: - rerank_data = await self.search_relevancy(query, document) - if rerank_callback is not None: - rerank_callback(rerank_data) - return rerank_data - - except Exception as e: - if attempt == self.max_retries - 1: - log.error(f"Failed after {self.max_retries} attempts: {str(e)}") - raise - # Exponential backoff with jitter - retry_delay = self.retry_delay * (2**attempt) + random.uniform(0, 0.1) - await asyncio.sleep(retry_delay) - - async def search_relevancy(self, query: str, document: str) -> Any: - response = await self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": "Rate the relevance of the document to the query. Respond with 'yes' or 'no'.", - }, - {"role": "user", "content": f"Query: {query}\nDocument: {document}\nIs this document relevant?"}, - ], - temperature=self.temperature, - n=1, - logprobs=True, - max_completion_tokens=1, - ) - - # Extract response and logprobs - answer = response.choices[0].message.content - logprob = response.choices[0].logprobs.content[0].logprob - - # Convert answer to score using the model's confidence - if answer.lower().strip() == "yes": - score = logprob # If yes, use the model's confidence - elif answer.lower().strip() == "no": - score = 1 - logprob # If no, invert the confidence - else: - score = 0.5 * logprob # For unclear answers, reduce confidence - - rerank_data = { - "document": document, - "relevance_score": score, - } - - return rerank_data - - async def search_relevancy_no_logprob(self, query: str, document: str) -> Any: - prompt = ( - f"Score the relevance between search query and user message on scale between 0 and 100 per cents. " - f"Consider semantic meaning, key concepts, and contextual relevance. " - f"Return ONLY a numerical score between 0 and 100 per cents. No other text. Stop after sending a number. " - f"Search query: {query}" - ) - - answer = await self._call_llm( - messages=[{"role": "system", "content": prompt}, {"role": "user", "content": document}], - ) - - try: - value = re.findall(r"[\d]+", answer)[0] - score = float(value) / 100 - score = max(0.0, min(score, 1.0)) - except (ValueError, IndexError): - score = 0.0 - - rerank_data = { - "document": document, - "relevance_score": score, - } - - return rerank_data - - async def search_relevancy_score(self, query: str, document: str) -> Any: - """ - This method is used to score the relevance of a document to a query. - - Args: - query: The query to score the relevance of. - document: The document to score the relevance of. - - Returns: - A dictionary with the document and the relevance score. - """ - - log.debug("Start search_relevancy_score") - log.debug(f"Reranker query: {query[:5]}") - log.debug(f"Reranker document: {document[:50]}") - log.debug(f"Reranker model: {self.model}") - log.debug(f"Reranker temperature: {self.temperature}") - log.debug(f"Reranker n: {self.n}") - log.debug(f"Reranker logprobs: {self.logprobs}") - log.debug(f"Reranker top_logprobs: {self.top_logprobs}") - log.debug(f"Reranker max_tokens: {self.max_tokens}") - log.debug(f"Reranker valid_class_tokens: {self.valid_class_tokens}") - - response = await self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": """ - You are an intelligent assistant that evaluates how relevant a given document chunk is to a user's search query. - Your task is to analyze the similarity between the search query and the document chunk, and return **only the class label** that best represents the relevance: - - - "class_1": Not relevant or conflicting (document has nothing to do with the query or there are contradictions) - - "class_2": Slightly relevant (document has a little to do with the query and there are no contradictions) - - "class_3": Moderately relevant (document has something to do with the query but it may not be relevant to the user's intent) - - "class_4": Highly relevant (document has everything to do with the query and it is very relevant) - - Respond with only one of: "class_1", "class_2", "class_3", or "class_4". - - Examples: - - Search query: "How to reset a router to factory settings?" - Document chunk: "Computers often come with customizable parental control settings." - Score: class_1 - - Search query: "Symptoms of vitamin D deficiency" - Document chunk: "Vitamin D deficiency has been linked to fatigue, bone pain, and muscle weakness." - Score: class_4 - - Search query: "Best practices for onboarding remote employees" - Document chunk: "An employee handbook can be useful for new hires, outlining company policies and benefits." - Score: class_2 - - Search query: "Benefits of mindfulness meditation" - Document chunk: "Practicing mindfulness has shown to reduce stress and improve focus in multiple studies." - Score: class_3 - - Search query: "What is Kubernetes used for?" - Document chunk: "Kubernetes is an open-source system for automating deployment, scaling, and management of containerized applications." - Score: class_4 - - Search query: "Books about in French but not history" - Document chunk: "Book about The French history began in 1789 and radically transformed society." - Score: class_1 - - Search query: "Machine learning algorithms for image classification" - Document chunk: "Convolutional Neural Networks (CNNs) are particularly effective in image classification tasks." - Score: class_4 - - Search query: "How to improve focus while working remotely" - Document chunk: "Creating a dedicated workspace and setting a consistent schedule can significantly improve focus during remote work." - Score: class_4 - - Search query: "Carbon emissions from electric vehicles vs gas cars" - Document chunk: "Electric vehicles produce zero emissions while driving, but battery production has environmental impacts." - Score: class_3 - - Search query: "Time zones in the United States" - Document chunk: "The U.S. is divided into six primary time zones: Eastern, Central, Mountain, Pacific, Alaska, and Hawaii-Aleutian." - Score: class_4 - """, - }, - { - "role": "user", - "content": f""" - Now evaluate the following pair: - - Search query: {query} - Document chunk: {document} - - Which class best represents the relevance? - """, - }, - ], - temperature=self.temperature, - n=self.n, - logprobs=self.logprobs, - top_logprobs=self.top_logprobs, - max_completion_tokens=self.max_tokens, - ) - - # Extract response and logprobs - token_logprobs = response.choices[0].logprobs.content - - # Find the token that contains the class number - # Instead of just taking the last token, search for the actual class number token - class_token_logprob = None - for token_logprob in reversed(token_logprobs): - if token_logprob.token in self.valid_class_tokens: - class_token_logprob = token_logprob - break - - # If we couldn't find a class token, fall back to the last non-empty token - if class_token_logprob is None: - log.warning("No class token logprob found, using the last token as fallback") - class_token_logprob = token_logprobs[-1] - - top_logprobs = class_token_logprob.top_logprobs - - # Create a map of 'class_1' -> probability, using token combinations - class_probs = {} - for top_token in top_logprobs: - full_label = f"class_{top_token.token}" - prob = math.exp(top_token.logprob) - class_probs[full_label] = prob - # Optional: normalize in case some are missing - total_prob = sum(class_probs.values()) - class_probs = {k: v / total_prob for k, v in class_probs.items()} - # Assign weights to classes - class_weights = {"class_1": 0.25, "class_2": 0.5, "class_3": 0.75, "class_4": 1.0} - # Compute the final smooth score - score = sum(class_weights.get(class_label, 0) * prob for class_label, prob in class_probs.items()) - if score is not None: - if score > 1.0: - score = 1.0 - elif score < 0.0: - score = 0.0 - - rerank_data = {"document": document, "relevance_score": score} - log.debug(f"Reranker score: {score}") - log.debug("End search_relevancy_score") - return rerank_data - - def get_scores(self, query: str, documents: list[str]): - query_document_pairs = [(query, doc) for doc in documents] - # Create event loop and run async code - - documents_and_scores = get_event_loop().run_until_complete(self._rank(query_document_pairs)) - - scores = [score for _, score in documents_and_scores] - return scores - - -def _strip_code_fences(text: str) -> str: - """Strip code fences from text, handling cases where first line has content after fence.""" - stripped = text.strip() - if stripped.startswith("```") and stripped.endswith("```"): - lines = stripped.splitlines() - # Check if first line has content after the fence (e.g., ```json) - first_line = lines[0] if lines else "" - if first_line.strip() == "```" or (first_line.startswith("```") and len(first_line.strip()) > 3): - # Drop first fence line (with or without language specifier) - lines = lines[1:] - # Drop trailing fence lines - while lines and lines[-1].strip().startswith("```"): - lines.pop() - stripped = "\n".join(lines).strip() - return stripped - - -class ListwiseLLMReranker(BaseLLMReranker): - mode: RerankerMode = RerankerMode.LISTWISE - max_document_characters: int = 3000 - max_documents_per_batch: int = 50 # Maximum documents to rank in a single LLM call - document_separator: str = "\n---DOCUMENT_SEPARATOR---\n" # Unique separator to avoid conflicts - - async def _rank(self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None) -> List[Tuple[str, float]]: - if not query_document_pairs: - return [] - - query = query_document_pairs[0][0] - documents = [document for _, document in query_document_pairs] - - # Handle large document sets by batching - if len(documents) > self.max_documents_per_batch: - log.info(f"Batching {len(documents)} documents into groups of {self.max_documents_per_batch}") - return await self._rank_with_batching(query, documents, rerank_callback) - - # Use _rank_single_batch for consistency - return await self._rank_single_batch(query_document_pairs, rerank_callback) - - async def _rank_with_batching( - self, query: str, documents: List[str], rerank_callback=None - ) -> List[Tuple[str, float]]: - """Rank documents in batches to avoid overwhelming the LLM with too many documents.""" - batch_size = self.max_documents_per_batch - num_batches = (len(documents) + batch_size - 1) // batch_size - - all_results: List[Tuple[str, float]] = [] - - for batch_idx in range(num_batches): - start_idx = batch_idx * batch_size - end_idx = min(start_idx + batch_size, len(documents)) - batch_docs = documents[start_idx:end_idx] - - # Create query-document pairs for this batch - batch_pairs = [(query, doc) for doc in batch_docs] - - # Rank this batch - batch_results = await self._rank_single_batch(batch_pairs, rerank_callback) - all_results.extend(batch_results) - - # Sort all results by score to get final ranking - all_results.sort(key=lambda item: item[1], reverse=True) - return all_results - - async def _rank_single_batch( - self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None - ) -> List[Tuple[str, float]]: - """Rank a single batch of documents.""" - query = query_document_pairs[0][0] - documents = [document for _, document in query_document_pairs] - - messages = self._build_messages(query, documents) - - for attempt in range(self.max_retries): - try: - content = await self._call_llm(messages) - - scores = self._extract_scores(content, len(documents)) - return list(zip(documents, scores)) - except Exception as exc: - if attempt == self.max_retries - 1: - log.error(f"Failed listwise reranking batch after {self.max_retries} attempts: {exc}") - raise - retry_delay = self.retry_delay * (2**attempt) + random.uniform(0, 0.1) - await asyncio.sleep(retry_delay) - - return [] - - def _build_messages(self, query: str, documents: List[str]) -> List[dict]: - document_blocks = [] - for idx, document in enumerate(documents, start=1): - # Remove any existing 'Document [N]:' prefix from content - cleaned_doc = self._clean_document_prefix(document) - truncated = self._truncate_document(cleaned_doc) - document_blocks.append(f"Document {idx}:\n{truncated}") - - docs_text = self.document_separator.join(document_blocks) - system_prompt = ( - "You are an expert reranker. Given a user query and a list of candidate " - "documents, you must rank the documents from most to least relevant. " - 'Only respond with JSON following the schema: {"ranking": [' - '{"doc_index": <1-based document index>, "score": }]}.' - ) - - user_prompt = ( - f""" - Query: - {query} - - Documents: - {docs_text} - - Return the ranking as JSON. Make sure every document appears once. Scores must be between 0 and 1. - """ - ).strip() - - return [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ] - - def _clean_document_prefix(self, document: str) -> str: - """Remove 'Document [N]:' prefix if present in the document content.""" - pattern = r"^Document\s+\d+:\s*" - return re.sub(pattern, "", document, count=1) - - def _truncate_document(self, document: str) -> str: - if len(document) <= self.max_document_characters: - return document - return document[: self.max_document_characters] + "..." - - def _extract_scores(self, content: str, num_documents: int) -> List[float]: - sanitized = _strip_code_fences(content) - fallback_scores = self._fallback_scores(num_documents) - parsed_scores = fallback_scores.copy() - - try: - parsed = json.loads(sanitized) - except json.JSONDecodeError as exc: - log.warning(f"Failed to parse listwise reranker response as JSON: {exc}. Using fallback scores.") - return parsed_scores - - ranking = parsed.get("ranking", []) if isinstance(parsed, dict) else parsed - if not isinstance(ranking, list): - log.warning("Listwise reranker response missing 'ranking' list. Using fallback scores.") - return parsed_scores - - assignment_order = 0 - assigned: dict[int, float] = {} - - for rank_position, entry in enumerate(ranking): - doc_index: Optional[int] = None - score: Optional[float] = None - - if isinstance(entry, dict): - doc_index = entry.get("doc_index") - score = entry.get("score") - elif isinstance(entry, (list, tuple)) and entry: - doc_index = entry[0] - if len(entry) > 1: - score = entry[1] - elif isinstance(entry, int): - doc_index = entry - - if doc_index is None: - continue - - if isinstance(doc_index, str) and doc_index.isdigit(): - doc_index = int(doc_index) - - if not isinstance(doc_index, int): - continue - - # Accept either 0-based or 1-based indices - if doc_index <= 0: - adjusted_index = doc_index - else: - adjusted_index = doc_index - 1 - - if adjusted_index < 0 or adjusted_index >= num_documents: - continue - - normalized_score = self._normalize_score(score) - if normalized_score is None: - normalized_score = fallback_scores[min(rank_position, num_documents - 1)] - - assigned[adjusted_index] = normalized_score - assignment_order = max(assignment_order, rank_position + 1) - - next_rank = assignment_order - for doc_idx in range(num_documents): - if doc_idx in assigned: - parsed_scores[doc_idx] = assigned[doc_idx] - else: - parsed_scores[doc_idx] = fallback_scores[min(next_rank, num_documents - 1)] - next_rank += 1 - - return parsed_scores - - def _normalize_score(self, score: Any) -> Optional[float]: - if score is None: - return None - try: - value = float(score) - except (TypeError, ValueError): - return None - - if math.isnan(value) or math.isinf(value): - return None - - if value > 1: - value = 1.0 - elif value < 0: - value = 0.0 - - return value - - def _fallback_scores(self, length: int) -> List[float]: - if length <= 0: - return [] - return [max(0.0, (length - idx) / length) for idx in range(length)] diff --git a/mindsdb/integrations/utilities/rag/settings.py b/mindsdb/integrations/utilities/rag/settings.py deleted file mode 100644 index c4eb9a6a162..00000000000 --- a/mindsdb/integrations/utilities/rag/settings.py +++ /dev/null @@ -1,63 +0,0 @@ -from enum import Enum -from typing import List, Any, Optional, Dict - -from pydantic import BaseModel, Field, ConfigDict - - -DEFAULT_CHUNK_SIZE = 1000 -DEFAULT_CHUNK_OVERLAP = 200 -DEFAULT_LLM_MODEL = "gpt-4o" -DEFAULT_LLM_ENDPOINT = "https://api.openai.com/v1" -DEFAULT_LLM_MODEL_PROVIDER = "openai" -DEFAULT_RERANKING_MODEL = "gpt-4o" -DEFAULT_RERANKER_N = 1 -DEFAULT_RERANKER_LOGPROBS = True -DEFAULT_RERANKER_TOP_LOGPROBS = 4 -DEFAULT_RERANKER_MAX_TOKENS = 100 -DEFAULT_VALID_CLASS_TOKENS = ["1", "2", "3", "4"] - - -class LLMConfig(BaseModel): - model_name: str = Field(default=DEFAULT_LLM_MODEL, description="LLM model to use for generation") - provider: str = Field( - default=DEFAULT_LLM_MODEL_PROVIDER, - description="LLM model provider to use for generation", - ) - params: Dict[str, Any] = Field(default_factory=dict) - model_config = ConfigDict(protected_namespaces=()) - - -class RerankerMode(str, Enum): - POINTWISE = "pointwise" - LISTWISE = "listwise" - - @classmethod - def _missing_(cls, value): - if isinstance(value, str): - value = value.lower() - for member in cls: - if member.value == value: - return member - return None - - -class RerankerConfig(BaseModel): - model: str = DEFAULT_RERANKING_MODEL - base_url: Optional[str] = None - filtering_threshold: float = 0.5 - num_docs_to_keep: Optional[int] = None - mode: RerankerMode = Field( - default=RerankerMode.POINTWISE, - description="Reranking mode to use. 'pointwise' for individual scoring, '" - "listwise' for joint scoring of all documents.", - ) - max_concurrent_requests: int = 20 - max_retries: int = 3 - retry_delay: float = 1.0 - early_stop: bool = True # Whether to enable early stopping - early_stop_threshold: float = 0.8 # Confidence threshold for early stopping - n: int = DEFAULT_RERANKER_N # Number of completions to generate - logprobs: bool = DEFAULT_RERANKER_LOGPROBS # Whether to include log probabilities - top_logprobs: int = DEFAULT_RERANKER_TOP_LOGPROBS # Number of top log probabilities to include - max_tokens: int = DEFAULT_RERANKER_MAX_TOKENS # Maximum tokens to generate - valid_class_tokens: List[str] = DEFAULT_VALID_CLASS_TOKENS # Valid class tokens to look for in the response diff --git a/mindsdb/integrations/utilities/rag/splitters/__init__.py b/mindsdb/integrations/utilities/rag/splitters/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py b/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py deleted file mode 100644 index 525bcafc99b..00000000000 --- a/mindsdb/integrations/utilities/rag/splitters/custom_splitters.py +++ /dev/null @@ -1,383 +0,0 @@ -"""Custom text splitter implementations to replace langchain splitters""" - -import re -from typing import List, Callable, Optional, Tuple, Any -from html.parser import HTMLParser - -from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class RecursiveCharacterTextSplitter: - """ - Custom implementation of RecursiveCharacterTextSplitter to replace langchain's version. - Splits text recursively by trying different separators in order. - """ - - def __init__( - self, - chunk_size: int = 1000, - chunk_overlap: int = 200, - length_function: Callable[[str], int] = len, - separators: Optional[List[str]] = None, - ): - """ - Initialize RecursiveCharacterTextSplitter - - Args: - chunk_size: Maximum size of chunks (measured by length_function) - chunk_overlap: Overlap between chunks - length_function: Function to measure text length (default: len) - separators: List of separators to try, in order of priority - """ - if separators is None: - separators = ["\n\n", "\n", ". ", " ", ""] - self.chunk_size = chunk_size - self.chunk_overlap = chunk_overlap - self.length_function = length_function - self.separators = separators - - def split_text(self, text: str) -> List[str]: - """ - Split text into chunks - - Args: - text (str): Text to split - - Returns: - List of text chunks - """ - if self.length_function(text) <= self.chunk_size: - return [text] - - chunks = [] - start_idx = 0 - - while start_idx < len(text): - # Get the next chunk - end_idx = min(start_idx + self.chunk_size, len(text)) - - # If we're at the end, add the remaining text - if end_idx >= len(text): - chunks.append(text[start_idx:end_idx]) - break - - # Try to find a good split point using separators - split_idx = None - for separator in self.separators: - if separator == "": - # Last resort: split at chunk_size - split_idx = end_idx - break - - # Look for separator near the end of the chunk - # Search in the second half of the chunk for a good break point - search_start = max(start_idx, start_idx + self.chunk_size // 2) - pos = text.rfind(separator, search_start, end_idx) - - if pos != -1 and pos > start_idx: - split_idx = pos + len(separator) - break - - if split_idx is None: - # Fallback: split at chunk_size - split_idx = end_idx - - # Extract chunk - chunks.append(text[start_idx:split_idx]) - - # Move start_idx forward, accounting for overlap - start_idx = max(start_idx + 1, split_idx - self.chunk_overlap) - - return chunks - - def split_documents(self, documents: List[SimpleDocument]) -> List[SimpleDocument]: - """ - Split documents into chunks - - Args: - documents: List of SimpleDocument objects to split - - Returns: - List of SimpleDocument chunks with preserved metadata - """ - split_docs = [] - for doc in documents: - chunks = self.split_text(doc.page_content) - for chunk in chunks: - # Preserve metadata from original document - split_docs.append( - SimpleDocument(page_content=chunk, metadata=doc.metadata.copy() if doc.metadata else {}) - ) - return split_docs - - def create_documents(self, texts: List[str], metadatas: Optional[List[dict]] = None) -> List[SimpleDocument]: - """ - Create documents from a list of texts (compatible with langchain interface) - This method splits each text into chunks and creates a document for each chunk. - - Args: - texts: List of text strings - metadatas: Optional list of metadata dicts (one per text) - - Returns: - List of SimpleDocument objects - """ - if metadatas is None: - metadatas = [{}] * len(texts) - - docs = [] - for text, metadata in zip(texts, metadatas): - chunks = self.split_text(text) - for chunk in chunks: - docs.append(SimpleDocument(page_content=chunk, metadata=metadata.copy() if metadata else {})) - - return docs - - @classmethod - def from_language( - cls, language: Any, chunk_size: int = 1000, chunk_overlap: int = 200, **kwargs - ) -> "RecursiveCharacterTextSplitter": - """ - Create a RecursiveCharacterTextSplitter with language-specific separators - - Args: - language: Language enum or string (e.g., Language.PYTHON or "python") - chunk_size: Maximum size of chunks - chunk_overlap: Overlap between chunks - **kwargs: Additional arguments - - Returns: - RecursiveCharacterTextSplitter instance with language-specific separators - """ - # Get language name as string (handle both enum and string) - if hasattr(language, "value"): - lang_name = language.value.lower() - elif hasattr(language, "name"): - lang_name = language.name.lower() - else: - lang_name = str(language).lower() - - # Language-specific separators (based on langchain's implementation) - language_separators = { - "python": ["\n\n", "\n", "def ", "class ", " ", " ", ""], - "javascript": ["\n\n", "\n", "function ", "class ", " ", " ", ""], - "typescript": ["\n\n", "\n", "function ", "class ", " ", " ", ""], - "java": ["\n\n", "\n", "public ", "private ", "class ", " ", " ", ""], - "cpp": ["\n\n", "\n", "namespace ", "class ", " ", " ", ""], - "c": ["\n\n", "\n", " ", " ", ""], - "go": ["\n\n", "\n", "func ", " ", " ", ""], - "rust": ["\n\n", "\n", "fn ", " ", " ", ""], - "ruby": ["\n\n", "\n", "def ", "class ", " ", " ", ""], - "php": ["\n\n", "\n", "function ", "class ", " ", " ", ""], - "swift": ["\n\n", "\n", "func ", "class ", " ", " ", ""], - "kotlin": ["\n\n", "\n", "fun ", "class ", " ", " ", ""], - "scala": ["\n\n", "\n", "def ", "class ", " ", " ", ""], - } - - # Get separators for language, or use default - separators = language_separators.get(lang_name, ["\n\n", "\n", ". ", " ", ""]) - - return cls(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=separators, **kwargs) - - -class MarkdownHeaderTextSplitter: - """ - Custom implementation of MarkdownHeaderTextSplitter to replace langchain's version. - Splits markdown text by headers. - """ - - def __init__(self, headers_to_split_on: List[Tuple[str, str]]): - """ - Initialize MarkdownHeaderTextSplitter - - Args: - headers_to_split_on: List of tuples (header_marker, header_name) - e.g., [("#", "Header 1"), ("##", "Header 2")] - """ - self.headers_to_split_on = headers_to_split_on - # Sort by header level (more # = higher level) in reverse order - # to match from most specific to least specific - self.headers_to_split_on = sorted(headers_to_split_on, key=lambda x: len(x[0]), reverse=True) - - def split_text(self, text: str) -> List[SimpleDocument]: - """ - Split markdown text by headers - - Args: - text: Markdown text to split - - Returns: - List of SimpleDocument objects, each containing a section - """ - lines = text.split("\n") - documents = [] - current_chunk_lines = [] - current_metadata = {} - current_header_stack = [] # Track header hierarchy - - i = 0 - while i < len(lines): - line = lines[i] - matched_header = None - - # Check if this line matches any header pattern - for header_marker, header_name in self.headers_to_split_on: - # Match header pattern: optional whitespace, header marker, space, header text - pattern = rf"^\s*{re.escape(header_marker)}\s+(.+)$" - match = re.match(pattern, line) - if match: - matched_header = (header_marker, header_name, match.group(1)) - break - - if matched_header: - # Save previous chunk if it has content - if current_chunk_lines: - chunk_text = "\n".join(current_chunk_lines).strip() - if chunk_text: - documents.append(SimpleDocument(page_content=chunk_text, metadata=current_metadata.copy())) - - # Start new chunk - header_marker, header_name, header_text = matched_header - current_chunk_lines = [line] # Include header in chunk - - # Update header stack - remove headers at same or lower level - header_level = len(header_marker) - current_header_stack = [h for h in current_header_stack if len(h[0]) < header_level] - current_header_stack.append((header_marker, header_name, header_text)) - - # Build metadata with header hierarchy - current_metadata = {} - for idx, (marker, name, text) in enumerate(current_header_stack): - current_metadata[f"{name.lower().replace(' ', '_')}_{idx}"] = text - # Also add the most recent header - if idx == len(current_header_stack) - 1: - current_metadata["header"] = text - current_metadata["header_level"] = len(marker) - else: - # Add line to current chunk - current_chunk_lines.append(line) - - i += 1 - - # Add final chunk - if current_chunk_lines: - chunk_text = "\n".join(current_chunk_lines).strip() - if chunk_text: - documents.append(SimpleDocument(page_content=chunk_text, metadata=current_metadata.copy())) - - # If no headers found, return entire text as one document - if not documents: - documents.append(SimpleDocument(page_content=text, metadata={})) - - return documents - - -class HTMLHeaderParser(HTMLParser): - """HTML parser to extract header tags and their positions""" - - def __init__(self, headers_to_split_on: List[Tuple[str, str]]): - super().__init__() - self.headers_to_split_on = {tag.lower() for tag, _ in headers_to_split_on} - self.header_positions = [] # List of (position, tag, text) - self.current_position = 0 - self.current_tag = None - self.current_text = "" - - def handle_starttag(self, tag, attrs): - if tag.lower() in self.headers_to_split_on: - self.current_tag = tag.lower() - self.current_text = "" - - def handle_endtag(self, tag): - if tag.lower() == self.current_tag: - text = self.current_text.strip() - if text: - self.header_positions.append((self.current_position, tag.lower(), text)) - self.current_tag = None - self.current_text = "" - - def handle_data(self, data): - if self.current_tag: - self.current_text += data - self.current_position += len(data) - - -class HTMLHeaderTextSplitter: - """ - Custom implementation of HTMLHeaderTextSplitter to replace langchain's version. - Splits HTML text by header tags. - """ - - def __init__(self, headers_to_split_on: List[Tuple[str, str]]): - """ - Initialize HTMLHeaderTextSplitter - - Args: - headers_to_split_on: List of tuples (tag_name, header_name) - e.g., [("h1", "Header 1"), ("h2", "Header 2")] - """ - self.headers_to_split_on = headers_to_split_on - # Create mapping from tag to header name - self.tag_to_name = {tag.lower(): name for tag, name in headers_to_split_on} - - def split_text(self, text: str) -> List[SimpleDocument]: - """ - Split HTML text by header tags - - Args: - text: HTML text to split - - Returns: - List of SimpleDocument objects, each containing a section - """ - # Parse HTML to find header positions - parser = HTMLHeaderParser(self.headers_to_split_on) - parser.feed(text) - header_positions = parser.header_positions - - if not header_positions: - # No headers found, return entire text as one document - return [SimpleDocument(page_content=text, metadata={})] - - documents = [] - current_pos = 0 - current_metadata = {} - current_header_stack = [] # Track header hierarchy - - for header_pos, tag, header_text in header_positions: - # Extract content before this header - if header_pos > current_pos: - chunk_text = text[current_pos:header_pos].strip() - if chunk_text: - documents.append(SimpleDocument(page_content=chunk_text, metadata=current_metadata.copy())) - - # Update header stack - remove headers at same or lower level - header_level = int(tag[1]) if tag[1].isdigit() else 6 # h1=1, h2=2, etc. - current_header_stack = [h for h in current_header_stack if int(h[0][1]) < header_level] - current_header_stack.append((tag, self.tag_to_name.get(tag, tag), header_text)) - - # Build metadata with header hierarchy - current_metadata = {} - for idx, (tag_name, header_name, text) in enumerate(current_header_stack): - current_metadata[f"{header_name.lower().replace(' ', '_')}_{idx}"] = text - # Also add the most recent header - if idx == len(current_header_stack) - 1: - current_metadata["header"] = text - current_metadata["header_level"] = header_level - - current_pos = header_pos - - # Add final chunk - if current_pos < len(text): - chunk_text = text[current_pos:].strip() - if chunk_text: - documents.append(SimpleDocument(page_content=chunk_text, metadata=current_metadata.copy())) - - # If no documents created, return entire text - if not documents: - documents.append(SimpleDocument(page_content=text, metadata={})) - - return documents diff --git a/mindsdb/integrations/utilities/rag/splitters/file_splitter.py b/mindsdb/integrations/utilities/rag/splitters/file_splitter.py deleted file mode 100644 index dbcc1473f08..00000000000 --- a/mindsdb/integrations/utilities/rag/splitters/file_splitter.py +++ /dev/null @@ -1,120 +0,0 @@ -from dataclasses import dataclass -from typing import Callable, List - -from mindsdb.integrations.utilities.rag.splitters.custom_splitters import ( - RecursiveCharacterTextSplitter, - MarkdownHeaderTextSplitter, - HTMLHeaderTextSplitter, -) -from mindsdb.interfaces.knowledge_base.preprocessing.document_types import SimpleDocument -from mindsdb.interfaces.knowledge_base.preprocessing.models import TextChunkingConfig - -from mindsdb.utilities import log - -DEFAULT_CHUNK_SIZE = 1000 -DEFAULT_CHUNK_OVERLAP = 50 -DEFAULT_MARKDOWN_HEADERS_TO_SPLIT_ON = [ - ("#", "Header 1"), - ("##", "Header 2"), - ("###", "Header 3"), -] -DEFAULT_HTML_HEADERS_TO_SPLIT_ON = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ("h4", "Header 4"), -] - - -logger = log.getLogger(__name__) - - -@dataclass -class FileSplitterConfig: - """Represents configuration needed to split a file into chunks for retrieval.""" - - # Target chunk size in characters. Not all splitters will adhere exactly to this (it's more of a guideline) - chunk_size: int = DEFAULT_CHUNK_SIZE - # How many characters each chunk should overlap. Not all splitters will adhere exactly to this (it's more of a guideline) - chunk_overlap: int = DEFAULT_CHUNK_OVERLAP - # Chunking parameters are passed as a TextChunkingConfig - text_chunking_config: TextChunkingConfig = None - # Default recursive splitter to use for text files, or unsupported files - recursive_splitter: RecursiveCharacterTextSplitter = None - # Splitter to use for MD splitting - markdown_splitter: MarkdownHeaderTextSplitter = MarkdownHeaderTextSplitter( - headers_to_split_on=DEFAULT_MARKDOWN_HEADERS_TO_SPLIT_ON - ) - # Splitter to use for HTML splitting - html_splitter: HTMLHeaderTextSplitter = HTMLHeaderTextSplitter(headers_to_split_on=DEFAULT_HTML_HEADERS_TO_SPLIT_ON) - - def __post_init__(self): - if self.text_chunking_config is None: - self.text_chunking_config = TextChunkingConfig(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap) - - if self.recursive_splitter is None: - self.recursive_splitter = RecursiveCharacterTextSplitter( - chunk_size=self.text_chunking_config.chunk_size, - chunk_overlap=self.text_chunking_config.chunk_overlap, - length_function=self.text_chunking_config.length_function, - separators=self.text_chunking_config.separators, - ) - - -class FileSplitter: - """Splits Documents that represent various file types into chunks for retrieval.""" - - def __init__(self, config: FileSplitterConfig): - """ - Args: - config (FileSplitterConfig): Configuration for the file splitter. - """ - self.config = config - self._extension_map = { - ".pdf": self._recursive_splitter_fn, - ".md": self._markdown_splitter_fn, - ".html": self._html_splitter_fn, - } - self.default_splitter = self._recursive_splitter_fn - - def _split_func_by_extension(self, extension) -> Callable: - return self._extension_map.get(extension, self.default_splitter)() - - def split_documents(self, documents: List[SimpleDocument], default_failover: bool = True) -> List[SimpleDocument]: - """Splits a list of documents representing files using the appropriate splitting & chunking strategies - - Args: - documents (List[SimpleDocument]): List of documents representing files to split. - default_failover (bool, optional): Whether to use the default splitter as a fallback if the file type is not supported. Defaults to True. - - Returns: - List[SimpleDocument]: List of documents representing the split files. - """ - split_documents = [] - for document in documents: - extension = document.metadata.get("extension") if document.metadata else None - split_func = self._split_func_by_extension(extension=extension) - try: - split_documents += split_func(document.page_content) - except Exception as e: - logger.exception(f"Error splitting document with extension {extension}:") - if not default_failover: - raise ValueError(f"Error splitting document with extension {extension}") from e - # Try default splitter as a failover, if enabled. - split_func = self._split_func_by_extension(extension=None) - split_documents += split_func(document.page_content) - return split_documents - - def _markdown_splitter_fn(self) -> Callable: - return self.config.markdown_splitter.split_text - - def _html_splitter_fn(self) -> Callable: - return self.config.html_splitter.split_text - - def _recursive_splitter_fn(self) -> Callable: - # Recursive splitter is a TextSplitter where split_text returns List[str]. - def recursive_split(content: str) -> List[SimpleDocument]: - split_content = self.config.recursive_splitter.split_text(content) - return [SimpleDocument(page_content=c, metadata={}) for c in split_content] - - return recursive_split diff --git a/mindsdb/integrations/utilities/sql_utils.py b/mindsdb/integrations/utilities/sql_utils.py deleted file mode 100644 index e123b9ed837..00000000000 --- a/mindsdb/integrations/utilities/sql_utils.py +++ /dev/null @@ -1,541 +0,0 @@ -from enum import Enum -from typing import Any, Optional -import pandas as pd -import datetime as dt - -from mindsdb.api.executor.utilities.sql import query_df -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.base import ASTNode - -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.utilities.config import config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -AGGREGATE_FUNCTIONS = {"count", "sum", "avg", "min", "max", "first", "last", "median"} - - -def is_aggregate_function(node: ast.ASTNode) -> bool: - """Check if AST node is aggregate function - - Args: - node: AST node to check - - Returns: - bool: True if node is aggregate function - """ - return isinstance(node, ast.Function) and hasattr(node, "op") and node.op.lower() in AGGREGATE_FUNCTIONS - - -def has_aggregate_function(targets: list[ast.ASTNode]) -> bool: - """Check if any of AST node in the list is aggregate function - - Args: - targets: list of AST nodes to check - - Returns: - bool: True if node is aggregate function - """ - return any(is_aggregate_function(target) for target in targets) - - -class FilterOperator(Enum): - """ - Enum for filter operators. - """ - - EQUAL = "=" - NOT_EQUAL = "!=" - LESS_THAN = "<" - LESS_THAN_OR_EQUAL = "<=" - GREATER_THAN = ">" - GREATER_THAN_OR_EQUAL = ">=" - IN = "IN" - NOT_IN = "NOT IN" - BETWEEN = "BETWEEN" - NOT_BETWEEN = "NOT BETWEEN" - LIKE = "LIKE" - NOT_LIKE = "NOT LIKE" - IS_NULL = "IS NULL" - IS_NOT_NULL = "IS NOT NULL" - IS = "IS" - IS_NOT = "IS NOT" - - -class FilterCondition: - """ - Base class for filter conditions. - """ - - def __init__(self, column: str, op: FilterOperator, value: Any): - self.column = column - self.op = op - self.value = value - self.applied = False - - def __eq__(self, __value: object) -> bool: - if isinstance(__value, FilterCondition): - return self.column == __value.column and self.op == __value.op and self.value == __value.value - else: - return False - - def __repr__(self) -> str: - return f""" - FilterCondition( - column={self.column}, - op={self.op}, - value={self.value} - ) - """ - - -class KeywordSearchArgs: - def __init__(self, column: str, query: str): - """ - Args: - column: The column to search in. - query: The search query string. - """ - self.column = column - self.query = query - - -class SortColumn: - def __init__(self, column: str, ascending: bool = True): - self.column = column - self.ascending = ascending - self.applied = False - - -def make_sql_session(): - from mindsdb.api.executor.controllers.session_controller import SessionController - - sql_session = SessionController() - sql_session.database = config.get("default_project") - return sql_session - - -def conditions_to_filter(binary_op: ASTNode): - conditions = extract_comparison_conditions(binary_op) - - filters = {} - for op, arg1, arg2 in conditions: - if op != "=": - raise NotImplementedError - filters[arg1] = arg2 - return filters - - -def extract_comparison_conditions(binary_op: ASTNode, ignore_functions=False, strict=True): - """Extracts all simple comparison conditions that must be true from an AST node. - Does NOT support 'or' conditions. - """ - conditions = [] - captured_nodes = set() # Track nodes we've captured as raw conditions to avoid processing their children - - def _extract_comparison_conditions(node: ASTNode, **kwargs): - # Check if this node is a child of an already-captured node - callstack = kwargs.get("callstack", []) - for parent in callstack: - if id(parent) in captured_nodes: - # Skip processing children of captured nodes - return - - if isinstance(node, ast.BinaryOperation): - op = node.op.lower() - if op == "and": - # Want to separate individual conditions, not include 'and' as its own condition. - return - - arg1, arg2 = node.args - if ignore_functions and isinstance(arg1, ast.Function): - # handle lower/upper - if arg1.op.lower() in ("lower", "upper"): - if isinstance(arg1.args[0], ast.Identifier): - arg1 = arg1.args[0] - - if not isinstance(arg1, ast.Identifier): - # Only support [identifier] =//>=/<=/etc [constant] comparisons. - if strict: - raise NotImplementedError(f"Not implemented arg1: {arg1}") - else: - conditions.append(node) - captured_nodes.add(id(node)) # Mark this node as captured - return - - if isinstance(arg2, ast.Constant): - value = arg2.value - elif isinstance(arg2, ast.Tuple): - value = [i.value for i in arg2.items] - else: - # When strict=False, return the entire node as a raw condition - # (similar to how arg1 non-identifier case is handled) - if strict: - raise NotImplementedError(f"Not implemented arg2: {arg2}") - else: - conditions.append(node) - captured_nodes.add(id(node)) # Mark this node as captured - return - - conditions.append([op, arg1.parts[-1], value]) - if isinstance(node, ast.BetweenOperation): - var, up, down = node.args - if not ( - isinstance(var, ast.Identifier) and isinstance(up, ast.Constant) and isinstance(down, ast.Constant) - ): - raise NotImplementedError(f"Not implemented: {node}") - - op = node.op.lower() - conditions.append([op, var.parts[-1], (up.value, down.value)]) - - query_traversal(binary_op, _extract_comparison_conditions) - return conditions - - -def project_dataframe(df, targets, table_columns): - """ - case-insensitive projection - 'select A' and 'select a' return different column case but with the same content - """ - - columns = [] - df_cols_idx = {col.lower(): col for col in df.columns} - df_col_rename = {} - - for target in targets: - if isinstance(target, ast.Star): - for col in table_columns: - col_df = df_cols_idx.get(col.lower()) - if col_df is not None: - df_col_rename[col_df] = col - columns.append(col) - - break - elif isinstance(target, ast.Identifier): - col = target.parts[-1] - col_df = df_cols_idx.get(col.lower()) - if col_df is not None: - if hasattr(target, "alias") and isinstance(target.alias, ast.Identifier): - df_col_rename[col_df] = target.alias.parts[0] - else: - df_col_rename[col_df] = col - columns.append(col) - else: - raise NotImplementedError - - if len(df) == 0: - df = pd.DataFrame([], columns=columns) - else: - # add absent columns - for col in set(columns) & set(df.columns) ^ set(columns): - df[col] = None - - # filter by columns - df = df[columns] - - # adapt column names to projection - if len(df_col_rename) > 0: - df.rename(columns=df_col_rename, inplace=True) - return df - - -def _evaluate_interval_expression(node: ASTNode) -> Optional[dt.date]: - """Evaluate INTERVAL expressions to get actual date values. - - Args: - node: AST node that may contain INTERVAL expressions - - Returns: - date if the expression can be evaluated, None otherwise - """ - if isinstance(node, ast.BinaryOperation): - op = node.op.lower() - arg1, arg2 = node.args - - # Handle CURRENT_DATE - INTERVAL 'X day' - if op == "-": - # Check for CURRENT_DATE function - if isinstance(arg1, ast.Function) and arg1.op.upper() == "CURRENT_DATE": - if isinstance(arg2, ast.Interval): - interval_value = arg2.args[0] if arg2.args else None - # Handle both Constant node and direct string value - if interval_value: - if isinstance(interval_value, ast.Constant): - interval_str = str(interval_value.value).lower() - elif isinstance(interval_value, str): - interval_str = interval_value.lower() - else: - interval_str = str(interval_value).lower() - - # Parse interval like '90 day', '30 days', etc. - import re - - match = re.search(r"(\d+)\s*(day|days|d)", interval_str) - if match: - days = int(match.group(1)) - result = dt.date.today() - dt.timedelta(days=days) - return result - # Also handle if arg1 is CURRENT_DATE as an identifier/constant - elif isinstance(arg1, ast.Identifier) and arg1.parts[-1].upper() == "CURRENT_DATE": - if isinstance(arg2, ast.Interval): - interval_value = arg2.args[0] if arg2.args else None - # Handle both Constant node and direct string value - if interval_value: - if isinstance(interval_value, ast.Constant): - interval_str = str(interval_value.value).lower() - elif isinstance(interval_value, str): - interval_str = interval_value.lower() - else: - interval_str = str(interval_value).lower() - - import re - - match = re.search(r"(\d+)\s*(day|days|d)", interval_str) - if match: - days = int(match.group(1)) - result = dt.date.today() - dt.timedelta(days=days) - return result - - # Handle CURRENT_DATE + INTERVAL 'X day' - elif op == "+": - if isinstance(arg1, ast.Function) and arg1.op.upper() == "CURRENT_DATE": - if isinstance(arg2, ast.Interval): - interval_value = arg2.args[0] if arg2.args else None - if interval_value and isinstance(interval_value, ast.Constant): - interval_str = str(interval_value.value).lower() - import re - - match = re.search(r"(\d+)\s*(day|days|d)", interval_str) - if match: - days = int(match.group(1)) - return dt.date.today() + dt.timedelta(days=days) - elif isinstance(arg1, ast.Identifier) and arg1.parts[-1].upper() == "CURRENT_DATE": - if isinstance(arg2, ast.Interval): - interval_value = arg2.args[0] if arg2.args else None - if interval_value and isinstance(interval_value, ast.Constant): - interval_str = str(interval_value.value).lower() - import re - - match = re.search(r"(\d+)\s*(day|days|d)", interval_str) - if match: - days = int(match.group(1)) - return dt.date.today() + dt.timedelta(days=days) - - return None - - -def _extract_date_from_raw_condition(condition: ASTNode) -> Optional[tuple]: - """Try to extract a date value and column from a raw condition for API pushdown. - - Args: - condition: Raw condition AST node - - Returns: - Tuple of (column_name, operator, date_value) if extractable, None otherwise - """ - if isinstance(condition, ast.BinaryOperation): - op = condition.op.lower() - arg1, arg2 = condition.args - - # Handle CAST(column AS DATE) >= CURRENT_DATE - INTERVAL 'X day' - if isinstance(arg1, ast.TypeCast): - # type_name might be Identifier or string, handle both - if isinstance(arg1.type_name, ast.Identifier): - type_name_str = arg1.type_name.parts[-1].upper() - else: - type_name_str = str(arg1.type_name).upper() - - if type_name_str in ("DATE", "DATETIME", "TIMESTAMP"): - if isinstance(arg1.arg, ast.Identifier): - column_name = arg1.arg.parts[-1] - date_value = _evaluate_interval_expression(arg2) - if date_value: - return (column_name, op, date_value) - - # Handle column >= CURRENT_DATE - INTERVAL 'X day' - elif isinstance(arg1, ast.Identifier): - column_name = arg1.parts[-1] - date_value = _evaluate_interval_expression(arg2) - if date_value: - return (column_name, op, date_value) - - return None - - -def _is_date_expression_static(node): - """Check if a node represents a date expression (CURRENT_DATE, INTERVAL, etc.) - This is a static version that can be used outside filter_dataframe. - """ - if isinstance(node, ast.BinaryOperation): - op = node.op.lower() - arg1, arg2 = node.args - - # Check for CURRENT_DATE - INTERVAL or CURRENT_DATE + INTERVAL - if op in ("-", "+"): - # Check if one side is CURRENT_DATE (function or identifier) - is_current_date = False - if isinstance(arg1, ast.Function) and arg1.op.upper() == "CURRENT_DATE": - is_current_date = True - elif isinstance(arg1, ast.Identifier) and arg1.parts[-1].upper() == "CURRENT_DATE": - is_current_date = True - - # Check if other side is INTERVAL - is_interval = isinstance(arg2, ast.Interval) - - if is_current_date and is_interval: - return True - - # Also check if we can evaluate it (for validation) - if _evaluate_interval_expression(node) is not None: - return True - elif isinstance(node, ast.Function) and node.op.upper() == "CURRENT_DATE": - return True - elif isinstance(node, ast.Identifier) and node.parts[-1].upper() == "CURRENT_DATE": - return True - return False - - -def _get_date_columns_from_raw_condition(condition: ASTNode) -> list: - """Extract column names that should be cast to date/datetime from raw conditions. - - Args: - condition: Raw condition AST node - - Returns: - List of column names that need date casting - """ - date_columns = [] - - def _traverse_for_date_columns(node): - if isinstance(node, ast.BinaryOperation): - arg1, arg2 = node.args - - # Check if arg2 is a date expression using the same logic - is_date_expr = _is_date_expression_static(arg2) - - # If comparing to a date expression, the column needs casting - if is_date_expr: - if isinstance(arg1, ast.Identifier): - date_columns.append(arg1.parts[-1]) - elif isinstance(arg1, ast.TypeCast) and isinstance(arg1.arg, ast.Identifier): - # Already has CAST, but ensure the column is cast in dataframe - date_columns.append(arg1.arg.parts[-1]) - - _traverse_for_date_columns(condition) - return date_columns - - -def filter_dataframe(df: pd.DataFrame, conditions: list, raw_conditions=None, order_by=None): - # convert list of conditions to ast. - # assumes that list was got from extract_comparison_conditions - where_query = None - date_columns_to_cast = set() - - for op, arg1, arg2 in conditions: - op = op.lower() - - if op == "between": - item = ast.BetweenOperation(args=[ast.Identifier(arg1), ast.Constant(arg2[0]), ast.Constant(arg2[1])]) - else: - if isinstance(arg2, (tuple, list)): - arg2 = ast.Tuple(arg2) - - # Check if arg2 is a date string (ISO format) and arg1 might be a date column - # If so, we need to cast the column to timestamp for proper comparison - arg1_identifier = ast.Identifier(arg1) - if isinstance(arg2, str) and len(arg2) >= 10 and arg2[4] == "-" and arg2[7] == "-": - # Looks like a date string (YYYY-MM-DD format) - # Check if the column exists and might be a date column - if arg1 in df.columns: - # Cast column to TIMESTAMP for proper date comparison - cast_column = ast.TypeCast(arg=arg1_identifier, type_name="TIMESTAMP") - # Cast the date string to TIMESTAMP as well - cast_value = ast.TypeCast(arg=ast.Constant(arg2), type_name="TIMESTAMP") - item = ast.BinaryOperation(op=op, args=[cast_column, cast_value]) - date_columns_to_cast.add(arg1) - else: - item = ast.BinaryOperation(op=op, args=[arg1_identifier, ast.Constant(arg2)]) - else: - if isinstance(arg2, ASTNode): - item = ast.BinaryOperation(op=op, args=[arg1_identifier, arg2]) - else: - item = ast.BinaryOperation(op=op, args=[arg1_identifier, ast.Constant(arg2)]) - - if where_query is None: - where_query = item - else: - where_query = ast.BinaryOperation(op="and", args=[where_query, item]) - - # Process raw conditions - detect date columns and cast them, also add CAST to condition if needed - processed_raw_conditions = [] - # date_columns_to_cast is already initialized above, continue using it - - for condition in raw_conditions or []: - # Extract date columns that need casting - date_cols = _get_date_columns_from_raw_condition(condition) - date_columns_to_cast.update(date_cols) - - # If condition has a column without CAST but comparing to date, add CAST - if isinstance(condition, ast.BinaryOperation): - arg1, arg2 = condition.args - - # Check if arg2 is a date expression - is_date_expr = _is_date_expression_static(arg2) - - # If comparing to date expression and column doesn't have CAST, add it - if is_date_expr and isinstance(arg1, ast.Identifier): - # Wrap column in CAST to TIMESTAMP - # TypeCast expects type_name as a string (not Identifier) - cast_column = ast.TypeCast(arg=arg1, type_name="TIMESTAMP") - condition = ast.BinaryOperation(op=condition.op, args=[cast_column, arg2]) - - processed_raw_conditions.append(condition) - - # Cast date columns in dataframe (from both raw conditions and regular conditions) - for col_name in date_columns_to_cast: - if col_name in df.columns: - if not pd.api.types.is_datetime64_any_dtype(df[col_name]): - try: - df[col_name] = pd.to_datetime(df[col_name], errors="coerce") - except Exception as e: - logger.warning(f"[SQL Utils] Failed to cast column {col_name} to datetime: {e}") - pass # If casting fails, let DuckDB handle it via CAST in query - - if processed_raw_conditions: - for condition in processed_raw_conditions: - if where_query is None: - where_query = condition - else: - where_query = ast.BinaryOperation(op="and", args=[where_query, condition]) - - query = ast.Select(targets=[ast.Star()], from_table=ast.Identifier("df"), where=where_query) - - if order_by: - query.order_by = order_by - - result = query_df(df, query) - if len(result) == 0 and len(df) > 0: - logger.warning( - f"[SQL Utils] Filter returned 0 rows from {len(df)} input rows - this might indicate a filtering issue" - ) - return result - - -def sort_dataframe(df, order_by: list): - cols = [] - ascending = [] - for order in order_by: - if not isinstance(order, ast.OrderBy): - continue - - col = order.field.parts[-1] - if col not in df.columns: - continue - - cols.append(col) - ascending.append(False if order.direction.lower() == "desc" else True) - if len(cols) > 0: - df = df.sort_values(by=cols, ascending=ascending) - return df diff --git a/mindsdb/integrations/utilities/test_utils.py b/mindsdb/integrations/utilities/test_utils.py deleted file mode 100644 index e1c710b6e8e..00000000000 --- a/mindsdb/integrations/utilities/test_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -from mindsdb.integrations.handlers.postgres_handler import Handler as PGHandler - - -PG_HANDLER_NAME = 'test_handler' -PG_CONNECTION_DATA = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo" -} - - -class HandlerControllerMock: - def __init__(self): - self.handlers = { - PG_HANDLER_NAME: PGHandler( - PG_HANDLER_NAME, - **{"connection_data": PG_CONNECTION_DATA} - ) - } - - def get_handler(self, name): - return self.handlers[name] - - def get(self, name): - return { - 'id': 0, - 'name': PG_HANDLER_NAME - } diff --git a/mindsdb/integrations/utilities/time_series_utils.py b/mindsdb/integrations/utilities/time_series_utils.py deleted file mode 100644 index a98b3050865..00000000000 --- a/mindsdb/integrations/utilities/time_series_utils.py +++ /dev/null @@ -1,206 +0,0 @@ -import numpy as np -import pandas as pd -from pandas.tseries.frequencies import to_offset - -# handle optional dependency -try: - import hierarchicalforecast # noqa: F401 - from hierarchicalforecast.core import HierarchicalReconciliation - from hierarchicalforecast.methods import BottomUp - from hierarchicalforecast.utils import aggregate -except (ImportError, SystemError): - HierarchicalReconciliation = None - BottomUp = None - aggregate = None - -from mindsdb.utilities import log - -DEFAULT_FREQUENCY = "D" -DEFAULT_RECONCILER = BottomUp - - -def transform_to_nixtla_df(df, settings_dict, exog_vars=[]): - """Transform dataframes into the specific format required by StatsForecast. - - Nixtla packages require dataframes to have the following columns: - unique_id -> the grouping column. If multiple groups are specified then - we join them into one name using a / char. - ds -> the date series - y -> the target variable for prediction - - You can optionally include exogenous regressors after these three columns, but - they must be numeric. - """ - nixtla_df = df.copy() - - # Resample every group - freq = settings_dict["frequency"] - resampled_df = pd.DataFrame(columns=nixtla_df.columns) - if settings_dict["group_by"] and settings_dict["group_by"] != ["__group_by"]: - for group, groupdf in nixtla_df.groupby(by=settings_dict["group_by"]): - groupdf.index = pd.to_datetime(groupdf.pop(settings_dict["order_by"])) - resampled_groupdf = pd.DataFrame(groupdf[settings_dict["target"]].resample(freq).mean()) - for k, v in zip(settings_dict["group_by"], group): - resampled_groupdf[k] = v - resampled_groupdf = resampled_groupdf.reset_index() - resampled_df = pd.concat([resampled_df, resampled_groupdf]) - nixtla_df = resampled_df - - # Transform group columns into single unique_id column - if len(settings_dict["group_by"]) > 1: - for col in settings_dict["group_by"]: - nixtla_df[col] = nixtla_df[col].astype(str) - nixtla_df["unique_id"] = nixtla_df[settings_dict["group_by"]].agg("/".join, axis=1) - group_col = "ignore this" - else: - group_col = settings_dict["group_by"][0] - - # Rename columns to statsforecast names - nixtla_df = nixtla_df.rename( - {settings_dict["target"]: "y", settings_dict["order_by"]: "ds", group_col: "unique_id"}, axis=1 - ) - - if "unique_id" not in nixtla_df.columns: - # add to dataframe as it is expected by statsforecast - nixtla_df["unique_id"] = "1" - - columns_to_keep = ["unique_id", "ds", "y"] + exog_vars - nixtla_df["ds"] = pd.to_datetime(nixtla_df["ds"]) - return nixtla_df[columns_to_keep] - - -def get_results_from_nixtla_df(nixtla_df, model_args): - """Transform dataframes generated by StatsForecast back to their original format. - - This will return the dataframe to the original format supplied by the MindsDB query. - """ - return_df = nixtla_df.reset_index(drop=True if "unique_id" in nixtla_df.columns else False) - if len(model_args["group_by"]) > 0: - if len(model_args["group_by"]) > 1: - for i, group in enumerate(model_args["group_by"]): - return_df[group] = return_df["unique_id"].apply(lambda x: x.split("/")[i]) - else: - group_by_col = model_args["group_by"][0] - return_df[group_by_col] = return_df["unique_id"] - - return return_df.drop(["unique_id"], axis=1).rename({"ds": model_args["order_by"]}, axis=1) - - -def infer_frequency(df, time_column, default=DEFAULT_FREQUENCY): - try: # infer frequency from time column - date_series = pd.to_datetime(df.sort_values(by=time_column)[time_column]).unique() - inferred_freq = pd.infer_freq(date_series) # call this first to get e.g. months & other irregular periods right - if inferred_freq is None: - values, counts = np.unique(np.diff(date_series), return_counts=True) - delta = values[np.argmax(counts)] - inferred_freq = to_offset(pd.to_timedelta(delta)).freqstr - except TypeError: - inferred_freq = default - return inferred_freq if inferred_freq is not None else default - - -def get_model_accuracy_dict(nixtla_results_df, metric=None): - """Calculates accuracy for each model in the nixtla results df.""" - if metric is None: - try: - from sklearn.metrics import r2_score - except ImportError as e: - raise ImportError("sklearn is not installed, please install it with `pip install scikit-learn`") from e - metric = r2_score - accuracy_dict = {} - for column in nixtla_results_df.columns: - if column in ["unique_id", "ds", "y", "cutoff"]: - continue - model_error = metric(nixtla_results_df["y"], nixtla_results_df[column]) - accuracy_dict[column] = model_error - return accuracy_dict - - -def get_best_model_from_results_df(nixtla_results_df, metric=None): - """Gets the best model based, on lowest error, from a results df - with a column for each nixtla model. - """ - if metric is None: - try: - from sklearn.metrics import r2_score - except ImportError as e: - raise ImportError("sklearn is not installed, please install it with `pip install scikit-learn`") from e - metric = r2_score - best_model, current_accuracy = None, 0 - accuracy_dict = get_model_accuracy_dict(nixtla_results_df, metric) - for model, accuracy in accuracy_dict.items(): - if accuracy > current_accuracy: - best_model, current_accuracy = model, accuracy - return best_model - - -def spec_hierarchy_from_list(col_list): - """Gets the hierarchy spec from the list of hierarchy cols""" - spec = [["Total"]] - for i in range(len(col_list)): - spec.append(["Total"] + col_list[: i + 1]) - return spec - - -def get_hierarchy_from_df(df, model_args): - """Extracts hierarchy from the raw df, using the provided spec and args. - - The "hierarchy" model arg is a list of format - [, , ..., ] - where each element is a level in the hierarchy. - - We return a tuple (nixtla_df, hier_df, hier_dict) where: - nixtla_df is a dataframe in the format nixtla packages uses for training - hier_df is a matrix of 0s and 1s showing the hierarchical structure - hier_dict is a dictionary with the hierarchical structure. See the unit test - in tests/unit/ml_handlers/test_time_series_utils.py for an example. - """ - if HierarchicalReconciliation is not None: - spec = spec_hierarchy_from_list(model_args["hierarchy"]) - - nixtla_df = df.rename({model_args["order_by"]: "ds", model_args["target"]: "y"}, axis=1) - nixtla_df["ds"] = pd.to_datetime(nixtla_df["ds"]) - for col in model_args["group_by"]: - nixtla_df[col] = nixtla_df[col].astype(str) # grouping columns need to be string format - nixtla_df.insert(0, "Total", "total") - - nixtla_df, hier_df, hier_dict = aggregate(nixtla_df, spec) # returns (nixtla_df, hierarchy_df, hierarchy_dict) - return nixtla_df, hier_df, hier_dict - else: - log.logger.warning( - "HierarchicalForecast is not installed, but `get_hierarchy_from_df` has been called. This should never happen." - ) # noqa - - -def reconcile_forecasts(nixtla_df, forecast_df, hierarchy_df, hierarchy_dict): - """Reconciles forecast results according to the hierarchy.""" - if HierarchicalReconciliation is not None: - reconcilers = [DEFAULT_RECONCILER()] - hrec = HierarchicalReconciliation(reconcilers=reconcilers) - reconciled_df = hrec.reconcile(Y_hat_df=forecast_df, Y_df=nixtla_df, S=hierarchy_df, tags=hierarchy_dict) - return get_results_from_reconciled_df(reconciled_df, hierarchy_df) - else: - log.logger.warning( - "HierarchicalForecast is not installed, but `reconcile_forecasts` has been called. This should never happen." - ) # noqa - - -def get_results_from_reconciled_df(reconciled_df, hierarchy_df): - """Formats the reconciled df into a normal Nixtla results df. - - First drops the model output columns that haven't been reconciled. - Then drops rows corresponding to higher level predictions that were not - in the original dataframe, e.g. the total for each grouping. - """ - # Drop unnecessary columns - for col in reconciled_df.columns: - if col not in ["ds", "y"]: - if "BottomUp" not in col: - results_df = reconciled_df.drop(col, axis=1) # removes original forecast column - break - - # Drop higher-level rows - lowest_level_ids = hierarchy_df.columns - results_df = results_df[results_df.index.isin(lowest_level_ids)] - results_df.index = results_df.index.str.replace("total/", "") - return results_df diff --git a/mindsdb/integrations/utilities/utils.py b/mindsdb/integrations/utilities/utils.py deleted file mode 100644 index f45c29d3446..00000000000 --- a/mindsdb/integrations/utilities/utils.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import Any - -import sys - - -def format_exception_error(exception): - try: - exception_type, _exception_object, exception_traceback = sys.exc_info() - filename = exception_traceback.tb_frame.f_code.co_filename - line_number = exception_traceback.tb_lineno - error_message = f"{exception_type.__name__}: {exception}, raised at: {filename}#{line_number}" - except Exception: - error_message = str(exception) - return error_message - - -def dict_to_yaml(d, indent=0): - yaml_str = "" - for k, v in d.items(): - yaml_str += " " * indent + str(k) + ": " - if isinstance(v, dict): - yaml_str += "\n" + dict_to_yaml(v, indent + 2) - else: - yaml_str += str(v) + "\n" - return yaml_str - - -# Mocks won't always have 'name' attribute. -def get_class_name(instance: Any, default: str = "unknown"): - if hasattr(instance.__class__, "name"): - return instance.__class__.name - return default diff --git a/mindsdb/interfaces/__init__.py b/mindsdb/interfaces/__init__.py deleted file mode 100644 index 8b137891791..00000000000 --- a/mindsdb/interfaces/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mindsdb/interfaces/agents/AGENT_FLOW_DIAGRAM.md b/mindsdb/interfaces/agents/AGENT_FLOW_DIAGRAM.md deleted file mode 100644 index f247eb0ba45..00000000000 --- a/mindsdb/interfaces/agents/AGENT_FLOW_DIAGRAM.md +++ /dev/null @@ -1,1086 +0,0 @@ -# MindsDB Agent Flow Diagram - -## Overview -This diagram shows the complete flow of how the MindsDB agent processes a question and returns an answer. - -```mermaid -flowchart TD - Start([User Question/Messages]) --> Controller[AgentsController.get_completion] - - Controller --> GetLLMParams[Get Agent LLM Parameters
Combine default config with agent params] - GetLLMParams --> CreateAgent[Create PydanticAIAgent Instance] - - CreateAgent --> InitAgent[Initialize Agent] - InitAgent --> InitModel[Initialize LLM Model Instance
from llm_params] - InitAgent --> InitSQLToolkit[Initialize SQL Toolkit
with tables & knowledge bases] - InitAgent --> GetSystemPrompt[Get System Prompt
from agent params] - - InitModel --> ExtractMessages[Extract Current Prompt & Message History] - InitSQLToolkit --> ExtractMessages - GetSystemPrompt --> ExtractMessages - - ExtractMessages --> ConvertMessages[Convert Messages to Pydantic AI Format
role/content or question/answer] - - ConvertMessages --> SetupTrace[Setup Langfuse Trace
for observability] - - SetupTrace --> BuildDataCatalog[Build Data Catalog] - BuildDataCatalog --> GetTables[Get Usable Table Names] - BuildDataCatalog --> GetKBs[Get Usable Knowledge Base Names] - - GetTables --> BuildTableCatalog[For Each Table:
- Get sample data LIMIT 5
- Get metadata SHOW COLUMNS] - GetKBs --> BuildKBCatalog[For Each KB:
- Get sample data LIMIT 3
- Get metadata from information_schema] - - BuildTableCatalog --> CombineCatalog[Combine All Catalog Entries] - BuildKBCatalog --> CombineCatalog - - CombineCatalog --> PlanningStep[PLANNING STEP] - PlanningStep --> CreatePlanningAgent[Create Planning Agent
with PlanResponse output type] - CreatePlanningAgent --> BuildPlanningPrompt[Build Planning Prompt
Data Catalog + Question + Planning Instructions] - BuildPlanningPrompt --> GeneratePlan[Generate Execution Plan
via LLM] - - GeneratePlan --> ValidatePlan{Plan Steps
> MAX_EXPLORATORY_QUERIES?} - ValidatePlan -->|Yes| AdjustPlan[Adjust Plan to Max Steps] - ValidatePlan -->|No| MainLoop - AdjustPlan --> MainLoop[MAIN AGENT LOOP] - - MainLoop --> BuildBasePrompt[Build Base Prompt
Data Catalog + SQL Instructions + Plan + Question] - BuildBasePrompt --> CheckExploratoryResults{Previous
Exploratory
Results?} - CheckExploratoryResults -->|Yes| AddExploratoryContext[Add Exploratory Query Results
to Prompt Context] - CheckExploratoryResults -->|No| CheckErrorContext{Error
Context?} - AddExploratoryContext --> CheckErrorContext - CheckErrorContext -->|Yes| AddErrorContext[Add Error Context
to Prompt] - CheckErrorContext -->|No| CheckMaxQueries{Exploratory
Queries >=
MAX?} - AddErrorContext --> CheckMaxQueries - - CheckMaxQueries -->|Yes| AddMaxWarning[Add Max Queries Warning] - CheckMaxQueries -->|No| RunAgent - AddMaxWarning --> RunAgent[Run Agent with Prompt
Generate AgentResponse] - - RunAgent --> ParseResponse[Parse AgentResponse
sql_query, type, short_description] - ParseResponse --> CheckResponseType{Response
Type?} - - CheckResponseType -->|FINAL_TEXT| ReturnText[Return Text Response
to User] - CheckResponseType -->|FINAL_QUERY| ExecuteFinalSQL[Execute Final SQL Query] - CheckResponseType -->|EXPLORATORY| ExecuteExploratorySQL[Execute Exploratory SQL Query] - - ExecuteFinalSQL --> CheckFinalError{Execution
Error?} - CheckFinalError -->|Yes| RetryFinal{Retry Count
< MAX_RETRIES?} - RetryFinal -->|Yes| IncrementRetry[Increment Retry Count
Add Error to Context] - IncrementRetry --> BuildBasePrompt - RetryFinal -->|No| RaiseError[Raise RuntimeError] - CheckFinalError -->|No| ReturnFinalData[Return Final Query Results
as DataFrame] - - ExecuteExploratorySQL --> CheckExploratoryError{Execution
Error?} - CheckExploratoryError -->|Yes| RetryExploratory{Retry Count
< MAX_RETRIES?} - RetryExploratory -->|Yes| IncrementExploratoryRetry[Increment Retry Count
Add Error to Context] - IncrementExploratoryRetry --> BuildBasePrompt - RetryExploratory -->|No| AddErrorToResults[Add Error to
Exploratory Results] - CheckExploratoryError -->|No| FormatResults[Format Query Results
as Markdown Table] - - AddErrorToResults --> CheckMaxExploratory{Exploratory
Count >=
MAX?} - FormatResults --> IncrementExploratoryCount[Increment Exploratory Query Count] - IncrementExploratoryCount --> CheckMaxExploratory - - CheckMaxExploratory -->|Yes| ForceFinal[Force Final Query
in Next Iteration] - CheckMaxExploratory -->|No| AddToContext[Add Results to
Exploratory Context] - ForceFinal --> AddToContext - AddToContext --> BuildBasePrompt - - ReturnText --> ValidateSelectTargets{Select Targets
Specified?} - ReturnFinalData --> ValidateSelectTargets - - ValidateSelectTargets -->|Yes| EnsureColumns[Ensure All Expected Columns
Exist in Result
Add Missing with Nulls] - ValidateSelectTargets -->|No| ReturnDataFrame - EnsureColumns --> ReturnDataFrame[Return DataFrame
with answer column] - - ReturnDataFrame --> End([End: Return to User]) - RaiseError --> End - - style Start fill:#e1f5ff - style End fill:#e1f5ff - style PlanningStep fill:#fff4e1 - style MainLoop fill:#fff4e1 - style ReturnText fill:#e8f5e9 - style ReturnFinalData fill:#e8f5e9 - style RaiseError fill:#ffebee -``` - -## Key Components - -### 1. **AgentsController** (`agents_controller.py`) -- Entry point for agent operations -- Handles CRUD operations for agents -- `get_completion()` method orchestrates the flow -- Combines default LLM config with agent-specific params - -### 2. **PydanticAIAgent** (`pydantic_ai_agent.py`) -- Core agent implementation using Pydantic AI framework -- Manages LLM model instance -- Handles message conversion and history -- Orchestrates planning and execution loops - -### 3. **Data Catalog Builder** (`data_catalog_builder.py`) -- Builds comprehensive data catalog of available tables and knowledge bases -- For each table: fetches sample data (5 rows) and metadata (column info) -- For each KB: fetches sample data (3 rows) and metadata -- Caches catalog entries for performance - -### 4. **SQL Toolkit** (`sql_toolkit.py`) -- Executes SQL queries with permission checking -- Validates table/knowledge base access -- Handles query parsing and execution via MindsDB command executor -- Enforces read-only operations (SELECT, SHOW, DESCRIBE, etc.) - -### 5. **Agent Modes** (`modes/`) -- **SQL Mode** (`sql.py`): Returns SQL queries only (final_query or exploratory_query) -- **Text-SQL Mode** (`text_sql.py`): Can return text responses, SQL queries, or both -- Both modes use structured `AgentResponse` with Pydantic models - -## Flow Details - -### Planning Phase -1. Agent receives question and message history -2. Builds data catalog of available data sources -3. Creates a planning agent with `PlanResponse` output type -4. LLM generates step-by-step execution plan -5. Plan includes estimated number of steps - -### Execution Phase -1. Agent enters main loop with plan context -2. For each iteration: - - Builds prompt with data catalog, SQL instructions, plan, and question - - Includes previous exploratory query results if any - - Includes error context if retrying - - LLM generates `AgentResponse` with: - - `sql_query`: The SQL to execute - - `type`: final_query, exploratory_query, or final_text - - `short_description`: Description of query purpose -3. Executes SQL query based on response type: - - **FINAL_QUERY**: Execute and return results immediately - - **EXPLORATORY_QUERY**: Execute, format results, add to context, continue loop - - **FINAL_TEXT**: Return text response without SQL execution -4. Error handling with retry logic (up to MAX_RETRIES) -5. Maximum exploratory queries limit (MAX_EXPLORATORY_QUERIES = 20) - -### Response Formatting -1. Validates select targets if specified (from original query) -2. Ensures all expected columns exist in result -3. Returns DataFrame with answer column -4. Includes trace_id for observability - -## Configuration - -- **MAX_EXPLORATORY_QUERIES**: 20 (maximum exploratory queries before forcing final) -- **MAX_RETRIES**: 3 (maximum retries per query on error) -- **System Prompt**: Configurable via agent params (`prompt_template`) -- **LLM Model**: Configurable via agent params or default_llm config -- **Data Sources**: Specified in agent params (`data.tables`, `data.knowledge_bases`) - -## Observability - -- Langfuse integration for tracing -- Trace includes metadata (user_id, session_id, company_id, model_name) -- Status updates streamed during execution -- Error tracking and reporting - -## Simplified Text Flow - -``` -1. USER QUESTION - ↓ -2. AgentsController.get_completion() - - Gets agent from database - - Combines LLM params (default + agent-specific) - ↓ -3. Create PydanticAIAgent - - Initialize LLM model instance - - Initialize SQL toolkit (tables & knowledge bases) - - Get system prompt - ↓ -4. Extract & Convert Messages - - Extract current prompt (last user message) - - Convert message history to Pydantic AI format - ↓ -5. Build Data Catalog - - For each table: sample data (5 rows) + metadata - - For each KB: sample data (3 rows) + metadata - ↓ -6. PLANNING PHASE - - Create planning agent - - Generate execution plan via LLM - - Validate plan (max steps = 20) - ↓ -7. MAIN EXECUTION LOOP - ↓ - a. Build Prompt - - Data catalog - - SQL instructions - - Execution plan - - Current question - - Previous exploratory results (if any) - - Error context (if retrying) - ↓ - b. Run Agent → Get AgentResponse - - sql_query: SQL to execute - - type: final_query | exploratory_query | final_text - - short_description: Query description - ↓ - c. Handle Response Type: - - IF final_text: - → Return text response to user - → END - - IF final_query: - → Execute SQL - → IF error: retry (max 3 times) - → IF success: Return results as DataFrame - → END - - IF exploratory_query: - → Execute SQL - → IF error: retry or add to error context - → IF success: Format results as markdown - → Add to exploratory context - → Increment exploratory count - → IF count >= 20: Force final query next - → Loop back to step 7a - ↓ -8. Validate & Format Response - - Check select targets (if specified) - - Ensure all expected columns exist - - Return DataFrame with answer column - ↓ -9. RETURN TO USER -``` - -## Key Decision Points - -1. **Planning vs Direct Execution**: Agent always creates a plan first to structure the approach -2. **Exploratory vs Final Queries**: Agent can explore data before generating final answer -3. **SQL vs Text Response**: Agent can return SQL results or text explanation -4. **Error Handling**: Retry logic (3 attempts) with error context accumulation -5. **Query Limits**: Maximum 20 exploratory queries to prevent infinite loops - ---- - -# Simplified Flow Diagram - -## Core Flow: Input → Processing → Catalog → Plan → Loop → Query Logic - -```mermaid -flowchart TD - Input([INPUT
User Question/Messages]) --> InputProcessing[INPUT PROCESSING] - - InputProcessing --> ExtractPrompt[Extract Current Prompt
from messages] - InputProcessing --> ConvertHistory[Convert Message History
to Pydantic AI format] - InputProcessing --> InitComponents[Initialize Components
LLM Model, SQL Toolkit, System Prompt] - - ExtractPrompt --> RealTimeCatalog[REAL-TIME DATA CATALOG] - ConvertHistory --> RealTimeCatalog - InitComponents --> RealTimeCatalog - - RealTimeCatalog --> FetchTables[Fetch Table Metadata
Sample Data LIMIT 5
Column Info SHOW COLUMNS] - RealTimeCatalog --> FetchKBs[Fetch Knowledge Base Metadata
Sample Data LIMIT 3
KB Schema Info] - - FetchTables --> CombineCatalog[Combine Catalog
Tables + Knowledge Bases] - FetchKBs --> CombineCatalog - - CombineCatalog --> Plan[PLAN] - Plan --> PlanningAgent[Planning Agent
PlanResponse Output] - PlanningAgent --> GeneratePlan[Generate Execution Plan
Step-by-step approach
Estimated steps] - - GeneratePlan --> MainLoop[MAIN AGENT LOOP] - - MainLoop --> KnowledgeBuffer[KNOWLEDGE BUFFER] - KnowledgeBuffer --> AccumulatedResults[Accumulated Exploratory Results
Previous Query Results
Error Context] - - AccumulatedResults --> BuildPrompt[Build Prompt] - BuildPrompt --> PromptComponents[Prompt Components:
• Data Catalog
• SQL Instructions
• Execution Plan
• Current Question
• Knowledge Buffer
• Error Context] - - PromptComponents --> BuildQuery[BUILD QUERY] - BuildQuery --> RunLLM[Run LLM Agent
Generate AgentResponse] - - RunLLM --> QueryType{QUERY TYPE LOGIC} - - QueryType -->|FINAL_TEXT| TextResponse[Return Text Response
No SQL Execution] - QueryType -->|FINAL_QUERY| ExecuteFinal[Execute Final SQL
Return Results
END] - QueryType -->|EXPLORATORY_QUERY| ExecuteExploratory[Execute Exploratory SQL
Format Results as Markdown] - - ExecuteExploratory --> UpdateBuffer[Update Knowledge Buffer
Add Results to Context] - UpdateBuffer --> CheckMax{Exploratory
Count >= 20?} - CheckMax -->|No| MainLoop - CheckMax -->|Yes| ForceFinal[Force Final Query
Next Iteration] - ForceFinal --> MainLoop - - TextResponse --> Output([OUTPUT
DataFrame with Answer]) - ExecuteFinal --> Output - - style Input fill:#e1f5ff - style InputProcessing fill:#fff3e0 - style RealTimeCatalog fill:#f3e5f5 - style Plan fill:#fff4e1 - style MainLoop fill:#e8f5e9 - style KnowledgeBuffer fill:#e3f2fd - style BuildQuery fill:#fce4ec - style QueryType fill:#fff9c4 - style Output fill:#e1f5ff -``` - -## Simplified Stage Breakdown - -### 1. **INPUT** -- User question/messages -- Can be: list of dicts (role/content), DataFrame, or legacy format - -### 2. **INPUT PROCESSING** -- Extract current prompt (last user message) -- Convert message history to Pydantic AI format -- Initialize LLM model, SQL toolkit, system prompt - -### 3. **REAL-TIME DATA CATALOG** -- **For Tables**: Query sample data (LIMIT 5) + metadata (SHOW COLUMNS) -- **For Knowledge Bases**: Query sample data (LIMIT 3) + schema info -- Built dynamically on each request (with caching) -- Provides context about available data sources - -### 4. **PLAN** -- Planning agent generates execution plan -- Step-by-step approach to answer the question -- Estimates number of steps needed -- Validated against max exploratory queries (20) - -### 5. **MAIN AGENT LOOP → KNOWLEDGE BUFFER** -- **Knowledge Buffer** accumulates: - - Results from previous exploratory queries (formatted as markdown) - - Error context from failed queries - - Query descriptions and outcomes -- Buffer grows with each iteration -- Used to inform next query generation - -### 6. **BUILD QUERY → TYPE OF QUERY LOGIC** - -**Build Query:** -- Combines: Data Catalog + SQL Instructions + Plan + Question + Knowledge Buffer -- Sends to LLM agent -- Receives structured `AgentResponse` - -**Query Type Logic:** - -``` -┌─────────────────────────────────────────┐ -│ QUERY TYPE DECISION │ -└─────────────────────────────────────────┘ - │ - ┌───────────┼───────────┐ - │ │ │ - ▼ ▼ ▼ - FINAL_TEXT FINAL_QUERY EXPLORATORY - │ │ │ - │ │ │ - Return Text Execute SQL Execute SQL - (No SQL) Return Data Add to Buffer - │ │ │ - │ │ └───► Loop Back - │ │ - └───────────┴───────────┐ - │ - ▼ - OUTPUT -``` - -**FINAL_TEXT:** -- Agent determines answer can be given without SQL -- Returns text response immediately -- END - -**FINAL_QUERY:** -- Agent generates final SQL query -- Executes query -- Returns results as DataFrame -- END - -**EXPLORATORY_QUERY:** -- Agent needs to explore data first -- Executes SQL query -- Formats results as markdown table -- Adds to Knowledge Buffer -- Increments exploratory count -- Loops back to Main Agent Loop -- If count >= 20: Forces final query next iteration - -## Key Concepts - -### Knowledge Buffer -The knowledge buffer is a growing context that accumulates: -- **Exploratory Query Results**: Formatted as "Query: {sql}\nDescription: {desc}\nResult: {markdown_table}" -- **Error Context**: Failed queries with error messages (last 3 errors) -- **Query History**: All previous queries and their outcomes - -This buffer allows the agent to: -- Learn from previous queries -- Avoid repeating failed queries -- Build upon discovered information -- Make informed decisions about next steps - -### Real-Time Data Catalog -Built fresh on each request (with caching for performance): -- **Tables**: Sample rows + column metadata -- **Knowledge Bases**: Sample rows + schema information -- Provides LLM with current state of available data -- Enables accurate query generation - -### Query Type Logic -The agent intelligently decides query type: -- **Exploratory**: When more information is needed -- **Final Query**: When ready to answer the question -- **Final Text**: When SQL is not needed - -This decision happens at each iteration based on: -- Current question -- Knowledge buffer contents -- Data catalog information -- Execution plan - ---- - -# Ultra-Simplified Flow - -## Core Stages: Input Processing → Planning → Exploration Loop → Final Answer - -```mermaid -flowchart LR - Input([INPUT
Question]) --> InputProcessing[INPUT PROCESSING
Extract prompt
Build data catalog
Initialize components] - - InputProcessing --> Planning[PLANNING
Generate execution plan
Step-by-step approach] - - Planning --> ExplorationLoop[AGENT EXPLORATION LOOP] - - ExplorationLoop --> GenerateQuery[Generate Query
via LLM] - GenerateQuery --> ExecuteQuery[Execute SQL Query] - ExecuteQuery --> CheckType{Query Type?} - - CheckType -->|Exploratory| AddToContext[Add Results
to Context] - AddToContext --> ExplorationLoop - - CheckType -->|Final Query| FinalAnswer[FINAL ANSWER
Return Results] - CheckType -->|Final Text| FinalAnswer - - FinalAnswer --> Output([OUTPUT
DataFrame]) - - style Input fill:#e1f5ff - style InputProcessing fill:#fff3e0 - style Planning fill:#fff4e1 - style ExplorationLoop fill:#e8f5e9 - style FinalAnswer fill:#c8e6c9 - style Output fill:#e1f5ff -``` - -## Ultra-Simplified Stage Description - -### 1. **INPUT PROCESSING** -- Extract user question from messages -- Build real-time data catalog (tables + knowledge bases) -- Initialize LLM model and SQL toolkit -- Prepare system prompt - -### 2. **PLANNING** -- Generate execution plan via planning agent -- Create step-by-step approach to answer question -- Estimate number of steps needed - -### 3. **AGENT EXPLORATION LOOP** -``` -LOOP: - ├─ Generate Query (LLM creates SQL based on plan + context) - ├─ Execute Query - ├─ Check Query Type: - │ ├─ EXPLORATORY → Add results to context → LOOP - │ ├─ FINAL_QUERY → Exit loop → Final Answer - │ └─ FINAL_TEXT → Exit loop → Final Answer - └─ (Max 20 exploratory queries) -``` - -### 4. **FINAL ANSWER** -- Return results as DataFrame -- Format with expected columns -- Include trace_id for observability - -## Flow Summary - -``` -INPUT (Question) - ↓ -INPUT PROCESSING - • Extract prompt - • Build data catalog - • Initialize components - ↓ -PLANNING - • Generate execution plan - ↓ -AGENT EXPLORATION LOOP - • Generate query → Execute → Check type - • If exploratory: Add to context → Loop - • If final: Exit loop - ↓ -FINAL ANSWER - • Return DataFrame -``` - -**Key Points:** -- **Input Processing**: Prepares everything needed (catalog, model, toolkit) -- **Planning**: Creates a structured approach before execution -- **Exploration Loop**: Iteratively explores data until ready to answer -- **Final Answer**: Returns formatted results to user - ---- - -# Context Window Limitations & Missing RAG/Summarization - -## Current Context Accumulation Points (No Limits/Compression) - -```mermaid -flowchart TD - Start([User Question]) --> InputProcessing[INPUT PROCESSING] - - InputProcessing --> BuildCatalog[Build Data Catalog] - BuildCatalog --> CatalogSize{Data Catalog Size} - CatalogSize -->|No Limit| FullCatalog[Full Data Catalog
⚠️ ALL tables/KBs
⚠️ ALL sample data 5 rows
⚠️ ALL metadata
⚠️ NO summarization] - - InputProcessing --> MessageHistory[Message History] - MessageHistory --> HistorySize{History Size} - HistorySize -->|No Limit| FullHistory[Full Message History
⚠️ ALL previous messages
⚠️ NO summarization
⚠️ NO truncation] - - FullCatalog --> PlanningPrompt[PLANNING PROMPT] - FullHistory --> PlanningPrompt - PlanningPrompt --> PlanningContext[Planning Context Size
⚠️ Data Catalog + History
⚠️ Can exceed context window] - - PlanningPrompt --> GeneratePlan[Generate Plan] - GeneratePlan --> MainLoop[MAIN AGENT LOOP] - - MainLoop --> BuildBasePrompt[Build Base Prompt] - FullCatalog --> BuildBasePrompt - BuildBasePrompt --> BasePromptSize{Base Prompt Size} - BasePromptSize -->|No Limit| FullBasePrompt[Base Prompt Contains:
⚠️ FULL Data Catalog
⚠️ FULL Execution Plan
⚠️ SQL Instructions
⚠️ Current Question] - - MainLoop --> AccumulateExploratory[Accumulate Exploratory Results] - AccumulateExploratory --> ExploratorySize{Exploratory Results} - ExploratorySize -->|No Limit| FullExploratory[All Exploratory Results
⚠️ ALL query results
⚠️ ALL markdown tables
⚠️ NO summarization
⚠️ Grows with each iteration] - - MainLoop --> AccumulateErrors[Accumulate Errors] - AccumulateErrors --> ErrorSize{Error Context} - ErrorSize -->|Limited to 3| Last3Errors[Last 3 Errors Only
✅ Limited] - - FullBasePrompt --> CombinePrompt[Combine Prompt for LLM] - FullExploratory --> CombinePrompt - Last3Errors --> CombinePrompt - FullHistory --> CombinePrompt - - CombinePrompt --> FinalContextSize{⚠️ FINAL CONTEXT SIZE
⚠️ NO LIMITS
⚠️ NO COMPRESSION} - FinalContextSize -->|Can Exceed| ContextWindow[Context Window Limit
❌ May Fail or Truncate] - FinalContextSize -->|Within Limit| LLMCall[LLM Call] - - LLMCall --> NextIteration{Query Type?} - NextIteration -->|Exploratory| AccumulateExploratory - NextIteration -->|Final| End([End]) - - style FullCatalog fill:#ffebee - style FullHistory fill:#ffebee - style FullBasePrompt fill:#ffebee - style FullExploratory fill:#ffebee - style PlanningContext fill:#fff3e0 - style FinalContextSize fill:#ffcdd2 - style ContextWindow fill:#f44336,color:#fff - style Last3Errors fill:#c8e6c9 -``` - -## Context Window Bottlenecks - -### 1. **Data Catalog** 🔴 **NO LIMITS** -**Location**: Built once, used in planning and main loop prompts - -**Current Behavior:** -- Includes ALL tables and knowledge bases -- For each table: 5 sample rows + full column metadata -- For each KB: 3 sample rows + full schema info -- **No size limits** -- **No summarization** -- **No RAG/retrieval** - everything included regardless of relevance - -**Potential Size:** -- 10 tables × (5 rows + metadata) = ~500-1000 tokens per table -- 5 KBs × (3 rows + schema) = ~300-500 tokens per KB -- **Total: 5,000-15,000+ tokens** (can be much larger) - -**Where Used:** -- Planning prompt (line 436) -- Base prompt in main loop (line 468) -- **Included in EVERY LLM call** - -**Missing Solutions:** -- ❌ No RAG to retrieve only relevant tables/KBs -- ❌ No summarization of catalog -- ❌ No size-based filtering -- ❌ No relevance scoring - ---- - -### 2. **Message History** 🔴 **NO LIMITS** -**Location**: Passed to agent in main loop (line 505) - -**Current Behavior:** -- Includes ALL previous messages in conversation -- **No truncation** -- **No summarization** -- **No compression** - -**Potential Size:** -- Long conversations: 50+ messages × ~200 tokens = **10,000+ tokens** -- Can grow indefinitely - -**Where Used:** -- Main loop agent call (line 503-505) -- **Included in EVERY iteration** - -**Missing Solutions:** -- ❌ No message history summarization -- ❌ No sliding window truncation -- ❌ No compression of old messages -- ❌ No relevance-based filtering - ---- - -### 3. **Exploration Loop Context** 🔴 **NO LIMITS** -**Location**: Accumulated in `exploratory_query_results` list (line 427, 490-492) - -**Current Behavior:** -- Accumulates ALL exploratory query results -- Each result includes: SQL query + description + full markdown table -- **No size limits** -- **No summarization** -- **Grows with each iteration** (up to 20 queries) - -**Potential Size:** -- 20 exploratory queries × (query + description + markdown table) -- Each markdown table: 100-500 rows × 10 columns = **2,000-10,000 tokens per result** -- **Total: 40,000-200,000+ tokens** (worst case) - -**Where Used:** -- Added to prompt in every iteration (line 489-492) -- **Included in EVERY subsequent LLM call** - -**Missing Solutions:** -- ❌ No summarization of query results -- ❌ No compression of markdown tables -- ❌ No size limits on accumulated results -- ❌ No selective retention (keep only relevant results) -- ❌ No RAG to retrieve only relevant past queries - ---- - -### 4. **Base Prompt** 🔴 **NO LIMITS** -**Location**: Built once, used in every iteration (line 468) - -**Current Behavior:** -- Includes FULL data catalog -- Includes FULL execution plan -- Includes SQL instructions -- Includes current question -- **No compression** - -**Potential Size:** -- Data catalog: 5,000-15,000 tokens -- Plan: 500-1,000 tokens -- SQL instructions: 1,000-2,000 tokens -- Question: 100-500 tokens -- **Total: 6,600-18,500 tokens** (before adding exploratory results) - -**Where Used:** -- Every iteration of main loop (line 488) - -**Missing Solutions:** -- ❌ No compression of base prompt -- ❌ No dynamic catalog filtering -- ❌ No plan summarization - ---- - -### 5. **Error Context** 🟡 **PARTIALLY LIMITED** -**Location**: Accumulated errors (line 426, 497-500, 553) - -**Current Behavior:** -- Keeps last 3 errors only (line 553) -- ✅ **Has limit** (better than others) -- But still adds to context each iteration - -**Potential Size:** -- 3 errors × ~500 tokens = **1,500 tokens** - ---- - -## Total Context Window Usage (Worst Case) - -``` -Data Catalog: 15,000 tokens -Message History: 10,000 tokens -Base Prompt: 18,500 tokens -Exploratory Results: 200,000 tokens (20 queries × 10k each) -Error Context: 1,500 tokens -───────────────────────────────────── -TOTAL: 245,000 tokens -``` - -**Typical Context Windows:** -- GPT-4: 128k tokens -- GPT-4 Turbo: 128k tokens -- Claude 3.5: 200k tokens -- **Risk: Exceeds context window in worst case** - ---- - -## Missing RAG/Summarization Solutions - -### Where RAG Could Help: - -1. **Data Catalog RAG** 🔴 **NOT IMPLEMENTED** - - Use semantic search to retrieve only relevant tables/KBs - - Filter catalog based on question relevance - - Reduce catalog from 15k → 2k tokens - -2. **Message History Summarization** 🔴 **NOT IMPLEMENTED** - - Summarize old messages (>10 messages ago) - - Keep recent messages verbatim - - Reduce history from 10k → 2k tokens - -3. **Exploratory Results Summarization** 🔴 **NOT IMPLEMENTED** - - Summarize query results instead of full markdown - - Keep only key insights - - Use RAG to retrieve relevant past queries - - Reduce results from 200k → 10k tokens - -4. **Dynamic Catalog Filtering** 🔴 **NOT IMPLEMENTED** - - Filter catalog based on current question - - Remove irrelevant tables/KBs - - Use relevance scoring - -5. **Plan Compression** 🔴 **NOT IMPLEMENTED** - - Summarize plan after first few iterations - - Keep only active steps - ---- - -## Recommended Improvements - -### Priority 1: Exploratory Results Compression -- **Impact**: High (can reduce 200k → 10k tokens) -- **Implementation**: Summarize markdown tables, keep only key insights -- **Location**: After each exploratory query execution (line 578-583) - -### Priority 2: Data Catalog RAG -- **Impact**: High (can reduce 15k → 2k tokens) -- **Implementation**: Semantic search to retrieve relevant tables/KBs -- **Location**: Before building planning prompt (line 420) - -### Priority 3: Message History Summarization -- **Impact**: Medium (can reduce 10k → 2k tokens) -- **Implementation**: Summarize messages older than N turns -- **Location**: Before passing to agent (line 505) - -### Priority 4: Dynamic Base Prompt Compression -- **Impact**: Medium (can reduce 18k → 8k tokens) -- **Implementation**: Compress plan, filter catalog dynamically -- **Location**: Before building base prompt (line 468) - ---- - -# Recommended Best Practices: Views & Catalog Limits - -## Recommended Approach: Scoped Views + 10 Object Limit - -```mermaid -flowchart TD - Start([User Problem/Question]) --> AnalyzeScope[Analyze Problem Scope] - - AnalyzeScope --> CreateViews[CREATE VIEWS
for Problem Scope] - CreateViews --> View1[View 1: Filtered Data
Only relevant columns
Only relevant rows] - CreateViews --> View2[View 2: Aggregated Data
Pre-computed metrics
Summary tables] - CreateViews --> View3[View 3: Joined Data
Pre-joined relationships
Denormalized for query] - - View1 --> LimitCatalog[LIMIT CATALOG
Max 10 Objects] - View2 --> LimitCatalog - View3 --> LimitCatalog - - LimitCatalog --> CountObjects{Object Count?} - CountObjects -->|> 10| ReduceObjects[Reduce to 10 Objects
✅ Prioritize views
✅ Remove redundant tables
✅ Keep only essential KBs] - CountObjects -->|<= 10| ConfigureAgent[Configure Agent] - ReduceObjects --> ConfigureAgent - - ConfigureAgent --> AgentConfig["Agent Configuration
data.tables: view1, view2, view3
data.knowledge_bases: kb1, kb2
Total: ≤ 10 objects"] - - AgentConfig --> BuildCatalog[Build Data Catalog] - BuildCatalog --> CatalogSize{Catalog Size} - CatalogSize -->|Small| EfficientCatalog[Efficient Catalog
✅ 10 objects max
✅ Views = scoped data
✅ ~2,000-5,000 tokens
✅ Fits in context window] - - EfficientCatalog --> Planning[PLANNING] - Planning --> EfficientPlan[Efficient Plan
✅ Focused on scope
✅ Fewer steps needed
✅ Clearer context] - - EfficientPlan --> MainLoop[MAIN AGENT LOOP] - MainLoop --> EfficientExecution[Efficient Execution
✅ Smaller prompts
✅ Faster responses
✅ Lower costs] - - EfficientExecution --> Success([Success
Within Context Limits]) - - style CreateViews fill:#c8e6c9 - style LimitCatalog fill:#fff9c4 - style EfficientCatalog fill:#c8e6c9 - style EfficientPlan fill:#c8e6c9 - style EfficientExecution fill:#c8e6c9 - style Success fill:#4caf50,color:#fff - style ReduceObjects fill:#fff3e0 -``` - -## Best Practices: Views for Problem Scoping - -### 1. **Create Scoped Views** ✅ **RECOMMENDED** - -**Purpose**: Limit data to only what's relevant for the problem - -**Example Workflow:** -``` -Original Problem: "Analyze sales performance by region for Q4 2023" - -❌ BAD: Give agent access to: - - sales_raw (1M rows, 50 columns) - - customers (500k rows, 30 columns) - - products (10k rows, 20 columns) - - orders (2M rows, 40 columns) - - inventory, shipping, returns, etc. - Total: 20+ tables, millions of rows - -✅ GOOD: Create scoped views: - CREATE VIEW sales_q4_2023 AS - SELECT - region, - SUM(amount) as total_sales, - COUNT(*) as order_count, - AVG(amount) as avg_order_value - FROM sales_raw - WHERE date >= '2023-10-01' - AND date < '2024-01-01' - GROUP BY region; - - Then give agent access to: - - sales_q4_2023 (pre-filtered, pre-aggregated) - Total: 1 view, ~100 rows -``` - -**Benefits:** -- ✅ Reduces data catalog size dramatically -- ✅ Pre-filters irrelevant data -- ✅ Pre-aggregates for faster queries -- ✅ Clearer context for LLM -- ✅ Faster query execution - ---- - -### 2. **Limit to 10 Objects Maximum** ✅ **RECOMMENDED** - -**Rule**: Agent catalog should contain **≤ 10 objects** (tables + views + knowledge bases) - -**Why 10?** -- Each object: ~500-1,000 tokens (sample data + metadata) -- 10 objects: ~5,000-10,000 tokens (manageable) -- Fits comfortably in context window -- Keeps agent focused on relevant data - -**Object Counting:** -``` -✅ GOOD Examples: - -Example 1: Sales Analysis Agent - - sales_summary_view (1) - - sales_by_region_view (2) - - sales_by_product_view (3) - - sales_by_month_view (4) - - products_kb (5) - Total: 5 objects ✅ - -Example 2: Customer Support Agent - - customer_tickets_view (1) - - customer_history_view (2) - - product_docs_kb (3) - - faq_kb (4) - Total: 4 objects ✅ - -Example 3: Financial Reporting Agent - - revenue_summary_view (1) - - expenses_summary_view (2) - - budget_view (3) - - financial_kb (4) - Total: 4 objects ✅ - -❌ BAD Examples: - -Example 1: Too Many Tables - - table1, table2, table3, ..., table15 - Total: 15 objects ❌ (exceeds limit) - -Example 2: Unscoped Access - - sales_raw (1) - - customers (2) - - products (3) - - orders (4) - - inventory (5) - - shipping (6) - - returns (7) - - payments (8) - - reviews (9) - - analytics (10) - - logs (11) - Total: 11 objects ❌ (exceeds limit) -``` - ---- - -## Recommended Agent Setup Pattern - -### Step-by-Step Guide - -```mermaid -flowchart LR - Step1[1. Understand Problem
Scope & Requirements] --> Step2[2. Identify Data Needs
What tables/columns?] - Step2 --> Step3[3. Create Scoped Views
Filter, aggregate, join] - Step3 --> Step4[4. Count Objects
Tables + Views + KBs] - Step4 --> Step5{Count <= 10?} - Step5 -->|No| Step6[6. Reduce Objects
Combine views
Remove redundant] - Step6 --> Step4 - Step5 -->|Yes| Step7["7. Configure Agent
data.tables: views
data.knowledge_bases: kbs"] - Step7 --> Step8[8. Test Agent
Verify context size
Check performance] - - style Step1 fill:#e3f2fd - style Step3 fill:#c8e6c9 - style Step4 fill:#fff9c4 - style Step7 fill:#c8e6c9 - style Step8 fill:#4caf50,color:#fff -``` - -### Example: Sales Analysis Agent Setup - -```sql --- Step 1-3: Create scoped views -CREATE VIEW sales_q4_summary AS -SELECT - region, - product_category, - SUM(amount) as total_sales, - COUNT(DISTINCT customer_id) as unique_customers, - AVG(amount) as avg_order_value -FROM sales_raw s -JOIN products p ON s.product_id = p.id -WHERE s.date >= '2023-10-01' AND s.date < '2024-01-01' -GROUP BY region, product_category; - -CREATE VIEW top_customers_q4 AS -SELECT - customer_id, - customer_name, - SUM(amount) as total_spent, - COUNT(*) as order_count -FROM sales_raw s -JOIN customers c ON s.customer_id = c.id -WHERE s.date >= '2023-10-01' AND s.date < '2024-01-01' -GROUP BY customer_id, customer_name -ORDER BY total_spent DESC -LIMIT 100; - --- Step 4-7: Configure agent (≤ 10 objects) -CREATE AGENT sales_analyst -USING - model_name = 'gpt-4', - data = { - 'tables': ['sales_q4_summary', 'top_customers_q4'], - 'knowledge_bases': ['product_docs'] - }; - --- Total: 3 objects ✅ (well under 10 limit) -``` - ---- - -## Context Size Comparison - -### Without Best Practices ❌ -``` -Data Catalog: - - 20 tables × 1,000 tokens = 20,000 tokens - - 5 KBs × 500 tokens = 2,500 tokens - Total: 22,500 tokens - -Message History: 10,000 tokens -Base Prompt: 18,500 tokens -Exploratory Results: 50,000 tokens -───────────────────────────────── -TOTAL: 101,000 tokens -⚠️ Risk of exceeding context window -``` - -### With Best Practices ✅ -``` -Data Catalog: - - 8 views × 600 tokens = 4,800 tokens - - 2 KBs × 400 tokens = 800 tokens - Total: 5,600 tokens (75% reduction) - -Message History: 10,000 tokens -Base Prompt: 8,000 tokens (smaller catalog) -Exploratory Results: 15,000 tokens (fewer queries needed) -───────────────────────────────── -TOTAL: 38,600 tokens -✅ Well within context window -✅ Faster responses -✅ Lower costs -``` - ---- - -## Key Recommendations Summary - -### ✅ DO: -1. **Create views** to scope data to problem domain -2. **Limit to 10 objects** maximum in agent catalog -3. **Pre-filter and pre-aggregate** data in views -4. **Use views instead of raw tables** when possible -5. **Combine related data** into single views -6. **Test catalog size** before deploying agent - -### ❌ DON'T: -1. **Don't give agent access to all tables** in database -2. **Don't exceed 10 objects** in catalog -3. **Don't use raw tables** when views would work -4. **Don't include redundant data** (multiple tables with same info) -5. **Don't skip scoping** - always create problem-specific views - ---- - -## Benefits of This Approach - -1. **Context Window Management** ✅ - - Keeps total context under 50k tokens - - Fits comfortably in all modern LLM context windows - - Reduces risk of truncation - -2. **Performance** ✅ - - Faster query execution (pre-aggregated views) - - Faster LLM responses (smaller prompts) - - Lower token costs - -3. **Accuracy** ✅ - - Agent focuses on relevant data - - Less confusion from irrelevant tables - - Clearer context for better queries - -4. **Maintainability** ✅ - - Views document data scope - - Easy to update when problem changes - - Clear separation of concerns - diff --git a/mindsdb/interfaces/agents/__init__.py b/mindsdb/interfaces/agents/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/interfaces/agents/agents_controller.py b/mindsdb/interfaces/agents/agents_controller.py deleted file mode 100644 index 504c2891af6..00000000000 --- a/mindsdb/interfaces/agents/agents_controller.py +++ /dev/null @@ -1,410 +0,0 @@ -import datetime -from typing import Dict, Iterator, List, Union, Tuple, Optional, Any, Text -import copy - -from enum import Enum -from pydantic import BaseModel -from sqlalchemy.orm.attributes import flag_modified -from sqlalchemy import null -import pandas as pd - -from mindsdb.interfaces.storage import db -from mindsdb.interfaces.storage.db import Predictor -from mindsdb.utilities.context import context as ctx -from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.interfaces.model.functions import PredictorRecordNotFound -from mindsdb.interfaces.model.model_controller import ModelController -from mindsdb.utilities.config import config -from mindsdb.utilities.utils import validate_pydantic_params -from mindsdb.utilities import log -from mindsdb.interfaces.agents.utils.sql_toolkit import MindsDBQuery - -from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError - -from .utils.constants import ASSISTANT_COLUMN, SUPPORTED_PROVIDERS, PROVIDER_TO_MODELS -from .utils.pydantic_ai_model_factory import get_llm_provider -from .pydantic_ai_agent import check_agent_llm - -logger = log.getLogger(__name__) - -default_project = config.get("default_project") - - -def check_agent_data(data): - tables = data.get("tables", []) - knowledge_bases = data.get("knowledge_bases", []) - if tables or knowledge_bases: - sql_toolkit = MindsDBQuery(tables=tables, knowledge_bases=knowledge_bases) - - if tables and len(sql_toolkit.get_usable_table_names(lazy=False)) == 0: - raise ValueError(f"No tables found: {tables}") - - if knowledge_bases and len(sql_toolkit.get_usable_knowledge_base_names(lazy=False)) == 0: - raise ValueError(f"No knowledge bases found: {knowledge_bases}") - - -class AgentParamsData(BaseModel): - knowledge_bases: List[str] | None = None - tables: List[str] | None = None - - class Config: - extra = "forbid" - - -class AgentMode(Enum): - TEXT = "text" - SQL = "sql" - - -class AgentParams(BaseModel): - prompt_template: str | None = None - model: Dict[Text, Any] | None = None - data: AgentParamsData | None = None - timeout: int | None = None - mode: AgentMode = AgentMode.TEXT - - class Config: - extra = "forbid" - - -class AgentsController: - """Handles CRUD operations at the database level for Agents""" - - assistant_column = ASSISTANT_COLUMN - - def __init__( - self, - project_controller: ProjectController = None, - model_controller: ModelController = None, - ): - if project_controller is None: - project_controller = ProjectController() - if model_controller is None: - model_controller = ModelController() - self.project_controller = project_controller - self.model_controller = model_controller - - def check_model_provider(self, model_name: str, provider: str = None) -> Tuple[Optional[str], str]: - """ - Checks if a model exists, and gets the provider of the model. - - The provider is either the provider of the model or the provider given as an argument. - - Parameters: - model_name (str): The name of the model - provider (str): The provider to check - - Returns: - model (dict): The model object - provider (str): The provider of the model - """ - model = None - - # Handle the case when model_name is None (using default LLM) - if model_name is None: - return model, provider - - try: - model_name_no_version, model_version = Predictor.get_name_and_version(model_name) - model = self.model_controller.get_model(model_name_no_version, version=model_version) - provider = "mindsdb" if model.get("provider") is None else model.get("provider") - except PredictorRecordNotFound: - if not provider: - # If provider is not given, get it from the model name - provider = get_llm_provider({"model_name": model_name}) - - elif provider not in SUPPORTED_PROVIDERS and model_name not in PROVIDER_TO_MODELS.get(provider, []): - raise ValueError(f"Model with name does not exist for provider {provider}: {model_name}") - - return model, provider - - def get_agent(self, agent_name: str, project_name: str = default_project) -> Optional[db.Agents]: - """ - Gets an agent by name. - - Parameters: - agent_name (str): The name of the agent - project_name (str): The name of the containing project - must exist - - Returns: - agent (Optional[db.Agents]): The database agent object - """ - - project = self.project_controller.get(name=project_name) - agent_query = db.Agents.query.filter( - db.Agents.name == agent_name, - db.Agents.project_id == project.id, - db.Agents.company_id == ctx.company_id, - db.Agents.deleted_at == null(), - ) - if ctx.enforce_user_id: - agent_query = agent_query.filter(db.Agents.user_id == ctx.user_id) - return agent_query.first() - - def get_agent_by_id(self, id: int, project_name: str = default_project) -> db.Agents: - """ - Gets an agent by id. - - Parameters: - id (int): The id of the agent - project_name (str): The name of the containing project - must exist - - Returns: - agent (db.Agents): The database agent object - """ - - project = self.project_controller.get(name=project_name) - agent_query = db.Agents.query.filter( - db.Agents.id == id, - db.Agents.project_id == project.id, - db.Agents.company_id == ctx.company_id, - db.Agents.deleted_at == null(), - ) - if ctx.enforce_user_id: - agent_query = agent_query.filter(db.Agents.user_id == ctx.user_id) - return agent_query.first() - - def get_agents(self, project_name: str) -> List[dict]: - """ - Gets all agents in a project. - - Parameters: - project_name (str): The name of the containing project - must exist - - Returns: - all-agents (List[db.Agents]): List of database agent object - """ - - all_agents = db.Agents.query.filter(db.Agents.company_id == ctx.company_id, db.Agents.deleted_at == null()) - if ctx.enforce_user_id: - all_agents = all_agents.filter(db.Agents.user_id == ctx.user_id) - - if project_name is not None: - project = self.project_controller.get(name=project_name) - - all_agents = all_agents.filter(db.Agents.project_id == project.id) - - return all_agents.all() - - def add_agent( - self, - name: str, - project_name: str = None, - model: dict = None, - params: Dict[str, Any] = None, - ) -> db.Agents: - """ - Adds an agent to the database. - - Parameters: - name (str): The name of the new agent - project_name (str): The containing project - model: Dict, parameters for the model to use - - provider: The provider of the model (e.g., 'openai', 'google') - - Other model-specific parameters like 'api_key', 'model_name', etc. - - params (Dict[str, str]): Parameters to use when running the agent - data: Dict, data sources for an agent, keys: - - knowledge_bases: List of KBs to use - - tables: list of tables to use - _api_key: API key for the provider (e.g., openai_api_key) - - Returns: - agent (db.Agents): The created agent - - Raises: - EntityExistsError: Agent with given name already exists, or model with given name does not exist. - """ - if project_name is None: - project_name = default_project - project = self.project_controller.get(name=project_name) - - agent = self.get_agent(name, project_name) - - if agent is not None: - raise EntityExistsError("Agent already exists", name) - - # No need to copy params since we're not preserving the original reference - params = params or {} - params["model"] = model - - # check agent params - validate_pydantic_params(params, AgentParams, "agent") - - # check llm works - llm_params = self.get_agent_llm_params(model) - check_agent_llm(llm_params) - - # check data - data = params.get("data", {}) - if data: - check_agent_data(data) - - agent = db.Agents( - name=name, - project_id=project.id, - company_id=ctx.company_id, - user_id=ctx.user_id, - user_class=ctx.user_class, - params=params, - ) - - db.session.add(agent) - db.session.commit() - - return agent - - def update_agent( - self, - agent_name: str, - project_name: str = default_project, - name: str = None, - model: dict = None, - params: Dict[str, Any] = None, - ): - """ - Updates an agent in the database. - - Parameters: - agent_name (str): The name of the new agent, or existing agent to update - project_name (str): The containing project - name (str): The updated name of the agent - model dict: model parameters - params: (Dict[str, str]): Parameters to use when running the agent - - Returns: - agent (db.Agents): The created or updated agent - - Raises: - EntityExistsError: if agent with new name already exists - EntityNotExistsError: if agent with name not found - """ - - existing_agent = self.get_agent(agent_name, project_name=project_name) - if existing_agent is None: - raise EntityNotExistsError(f"Agent with name not found: {agent_name}") - existing_params = existing_agent.params or {} - - is_demo = (existing_agent.params or {}).get("is_demo", False) - if is_demo: - raise ValueError("It is forbidden to change properties of the demo object") - - if name is not None and name != agent_name: - # Check to see if updated name already exists - agent_with_new_name = self.get_agent(name, project_name=project_name) - if agent_with_new_name is not None: - raise EntityExistsError(f"Agent with updated name already exists: {name}") - existing_agent.name = name - - params = params or {} - - if model: - params["model"] = model - - if params: - validate_pydantic_params(params, AgentParams, "agent") - else: - # do nothing - return existing_agent - - if model: - # check llm works - llm_params = self.get_agent_llm_params(model) - check_agent_llm(llm_params) - - data = params.get("data", {}) - if data: - check_agent_data(data) - - # Merge params on update - existing_params.update(params) - # Remove None values entirely. - params = {k: v for k, v in existing_params.items() if v is not None} - existing_agent.params = params - # Some versions of SQL Alchemy won't handle JSON updates correctly without this. - # See: https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.attributes.flag_modified - flag_modified(existing_agent, "params") - db.session.commit() - - return existing_agent - - def delete_agent(self, agent_name: str, project_name: str = default_project): - """ - Deletes an agent by name. - - Parameters: - agent_name (str): The name of the agent to delete - project_name (str): The name of the containing project - - Raises: - ValueError: Agent does not exist. - """ - - agent = self.get_agent(agent_name, project_name) - if agent is None: - raise ValueError(f"Agent with name does not exist: {agent_name}") - if isinstance(agent.params, dict) and agent.params.get("is_demo") is True: - raise ValueError("Unable to delete demo object") - - agent.deleted_at = datetime.datetime.now() - db.session.commit() - - def get_agent_llm_params(self, model_params): - """ - Get agent LLM parameters by combining default config with user provided parameters. - Uses the same pattern as knowledge bases get_model_params function. - """ - - combined_model_params = copy.deepcopy(config.get("default_llm", {})) - - if model_params: - # If provider mismatches - don't use default values (same as knowledge bases) - if "provider" in model_params and model_params["provider"] != combined_model_params.get("provider"): - return model_params - - combined_model_params.update(model_params) - - # Remove use_default_llm flag if present - combined_model_params.pop("use_default_llm", None) - - return combined_model_params - - def get_completion( - self, - agent: db.Agents, - messages: list[Dict[str, str]], - project_name: str = default_project, - tools: list[Any] = None, - stream: bool = False, - params: dict | None = None, - ) -> Union[Iterator[object], pd.DataFrame]: - """ - Queries an agent to get a completion. - - Parameters: - agent (db.Agents): Existing agent to get completion from - messages (list[Dict[str, str]]): Chat history to send to the agent - project_name (str): Project the agent belongs to (default mindsdb) - tools (list[Any]): Tools to use while getting the completion - stream (bool): Whether to stream the response - params (dict | None): params to redefine agent params - - Returns: - response (Union[Iterator[object], pd.DataFrame]): Completion as a DataFrame or iterator of completion chunks - - Raises: - ValueError: Agent's model does not exist. - """ - # Extract SQL context from params if present - - from .pydantic_ai_agent import PydanticAIAgent - - # Get agent parameters and combine with default LLM parameters at runtime - llm_params = self.get_agent_llm_params(agent.params.get("model")) - - pydantic_agent = PydanticAIAgent(agent, llm_params=llm_params) - - if stream: - return pydantic_agent.get_completion(messages, stream=True, params=params) - else: - return pydantic_agent.get_completion(messages, params=params) diff --git a/mindsdb/interfaces/agents/chart_agent.py b/mindsdb/interfaces/agents/chart_agent.py deleted file mode 100644 index 0a609adcfc4..00000000000 --- a/mindsdb/interfaces/agents/chart_agent.py +++ /dev/null @@ -1,339 +0,0 @@ -"""Chart generation agent using Pydantic AI""" - -from typing import Optional, Dict, Any -import copy -import pandas as pd - -from pydantic_ai import Agent -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Identifier, Constant, OrderBy - -from mindsdb.utilities import log -from mindsdb.interfaces.agents.utils.chart_toolkit import ChartConfig -from mindsdb.interfaces.agents.utils.pydantic_ai_model_factory import get_model_instance_from_kwargs -from mindsdb.interfaces.agents.modes import prompts -from mindsdb.utilities.exception import QueryError -from mindsdb.integrations.utilities.query_traversal import query_traversal -from mindsdb.api.executor.utilities.sql import query_dfs - - -logger = log.getLogger(__name__) - - -def _replace_table_refs_with_df(query_ast): - """Replace all table references in the query AST with the identifier 'df' (in-place). - Chart-generated queries may use subqueries and reference the original table; we run - against a single dataframe, so all table refs must point to 'df'. - """ - - def replace_table(node, is_table, **kwargs): - if is_table and isinstance(node, Identifier): - node.parts = ["df"] - - query_traversal(query_ast, replace_table) - - -def _fix_order_by_string_literals(query_ast): - """Replace ORDER BY string literals (Constant) with column identifiers (Identifier) in-place. - DuckDB rejects ORDER BY non-integer literals; the LLM often generates ORDER BY 'column alias'. - Converting to Identifier ensures the rendered SQL uses ORDER BY "column alias" (valid). - """ - - def fix_order_by(node, **kwargs): - if isinstance(node, OrderBy) and node.field is not None and isinstance(node.field, Constant): - node.field = Identifier(parts=[str(node.field.value)]) - - query_traversal(query_ast, fix_order_by) - - -def _prepare_chart_data_query(data_query_string: str, df: pd.DataFrame): - """Parse chart-generated SQL, fix common LLM mistakes (table refs, ORDER BY literals), run on df.""" - query_ast = parse_sql(data_query_string) - _replace_table_refs_with_df(query_ast) - _fix_order_by_string_literals(query_ast) - return query_dfs({"df": df}, query_ast, session=None) - - -class ChartAgent: - """Lightweight agent for generating Chart.js configurations""" - - MAX_RETRIES = 3 - - def __init__(self, executor=None): - """ - Initialize Chart Agent using system default LLM configuration. - - Args: - executor: FakeMysqlProxy instance for executing queries. If None, will create one when needed. - """ - self.args = self._initialize_args() - - # Provider model instance - self.model_instance = get_model_instance_from_kwargs(self.args) - - # System prompt for chart generation - self.system_prompt = self.args.get( - "prompt_template", "You are an expert at generating Chart.js configurations from SQL queries" - ) - - # Executor for query execution - self.executor = executor - - def _initialize_args(self) -> dict: - """ - Initialize the arguments for agent execution using system default LLM config. - - Returns: - dict: Final parameters for agent execution - """ - from mindsdb.utilities.config import config - - # Get default LLM config from system config - args = copy.deepcopy(config.get("default_llm", {})) - - # Remove use_default_llm flag if present - args.pop("use_default_llm", None) - - if "model_name" not in args: - raise ValueError("No model name provided for chart agent. Please configure default_llm in system settings.") - - return args - - def _generate_data_catalog( - self, - query: str, - df: pd.DataFrame, - ) -> str: - """ - Generate a data catalog by executing the query with LIMIT 5 and analyzing the results. - - Args: - query: SQL query string - df: dataframe with all data that got by executing query - - Returns: - str: Data catalog string with sample data and column analysis - """ - sample_df = df.head(5) - if sample_df.empty: - return "Data Catalog:\nNote: Query returned no rows." - - # Build data catalog string - catalog_parts = ["=== DATA CATALOG ==="] - catalog_parts.append(f"\nSample Query:\n{query}") - - # Column analysis using column metadata from result_set - catalog_parts.append("\nColumn Analysis:") - - for col_name in sample_df.columns: - col_type = str(sample_df[col_name].dtype) - # Map pandas dtypes to more readable types - if "int" in col_type: - type_desc = "integer" - elif "float" in col_type: - type_desc = "float" - elif "bool" in col_type: - type_desc = "boolean" - elif "datetime" in col_type or "timestamp" in col_type: - type_desc = "datetime/timestamp" - elif "object" in col_type or "string" in col_type: - type_desc = "string/text" - else: - type_desc = col_type - - # Get sample values (non-null) - sample_values = sample_df[col_name].dropna().head(3).tolist() - sample_str = ", ".join(str(v) for v in sample_values[:3]) - - catalog_parts.append(f" - {col_name}: {type_desc}") - if sample_str: - catalog_parts.append(f" Sample values: {sample_str}") - - # Sample data as CSV-like format - catalog_parts.append("\nSample Data (first 5 rows):") - # Convert DataFrame to CSV string - catalog_parts.append(sample_df.to_csv(index=False, lineterminator="\n")) - - return "\n".join(catalog_parts) - - def generate_chart_config( - self, - query: str, - df: pd.DataFrame, - prompt: Optional[str] = None, - error_context: Optional[str] = None, - retry_count: Optional[int] = None, - ) -> ChartConfig: - """ - Generate Chart.js configuration and data query from SQL query. - - Args: - query: SQL query string - df: dataframe with all data that got by executing query - prompt: Optional prompt describing chart intent - error_context: Optional error context from previous failed attempts - retry_count: Optional retry attempt number for error messages - - Returns: - ChartConfig: Pydantic model with chartjs_config and data_query_string - """ - # Generate data catalog - data_catalog = self._generate_data_catalog(query, df) - - # Create agent with ChartConfig as output type - agent = Agent(self.model_instance, system_prompt=self.system_prompt, output_type=ChartConfig) - - # Build prompt for chart generation - chart_prompt = f"""Given the following SQL query and optional intent, generate a Chart.js configuration and data transformation query. - -SQL Query: -{query} -""" - - if prompt: - chart_prompt += f"\nChart Intent: {prompt}\n" - - chart_prompt += f"\nSample Data Catalog:\n{data_catalog}\nInstructions:\n{prompts.sql_description}\n\n{prompts.chart_generation_prompt}" - - # Add error context if provided (for retry attempts) - if error_context: - chart_prompt += f"\n\nPrevious query errors:\n{error_context}" - if retry_count is not None: - chart_prompt += f"\n\nPlease fix the query and try again. This is retry attempt {retry_count} of {self.MAX_RETRIES}." - - logger.debug("ChartAgent.generate_chart_config: Sending prompt to LLM") - - try: - # Generate chart config - result = agent.run_sync(chart_prompt) - chart_config = result.output - - logger.debug( - f"ChartAgent.generate_chart_config: Received chart config with type: {chart_config.chartjs_config.get('type', 'unknown')}" - ) - - return chart_config - except Exception as e: - logger.error(f"ChartAgent.generate_chart_config: Error generating chart config: {e}") - # Log the raw result if available for debugging - if hasattr(e, "result") and hasattr(e.result, "output"): - logger.debug(f"ChartAgent.generate_chart_config: Raw output: {e.result.output}") - raise - - def generate_chart_with_data( - self, - query: str, - df: pd.DataFrame, - prompt: Optional[str] = None, - ) -> Dict[str, Any]: - """ - Generate Chart.js configuration, execute data query, and populate datasets. - Args: - query: SQL query string - df: dataframe with all data that got by executing query - prompt: Optional prompt describing chart intent - context: Optional context dict for query execution - params: Optional params dict for query execution - Returns: - dict: Dictionary with 'data_query_string' and populated 'chartjs_config' - Raises: - QueryError: If query execution fails - ValueError: If data structure is invalid - """ - - # Initialize retry tracking - retry_count = 0 - accumulated_errors = [] - # Use provided executor or create a new one - - # Retry loop for query execution - while retry_count <= self.MAX_RETRIES: - # Generate chart configuration (with data catalog and error context if retrying) - error_context = None - if accumulated_errors: - # Format error context from accumulated errors (keep last 3) - error_context = "\n---\n".join(accumulated_errors[-3:]) - logger.debug( - f"ChartAgent.generate_chart_with_data: Regenerating chart config with error context (attempt {retry_count}/{self.MAX_RETRIES})" - ) - - chart_config = self.generate_chart_config( - query, df, prompt, error_context=error_context, retry_count=retry_count if retry_count > 0 else None - ) - try: - logger.debug( - f"ChartAgent.generate_chart_with_data: Executing transformed query on provided dataframe: {chart_config.data_query_string[:100]}..." - ) - data_df = _prepare_chart_data_query(chart_config.data_query_string, df) - - if data_df.empty: - raise ValueError( - "Data query returned no rows. Please check your query filters or data availability." - ) - # Validate DataFrame structure - if len(data_df.columns) < 2: - raise ValueError( - f"Data query must return at least 2 columns (labels and at least one dataset). Got {len(data_df.columns)} column(s)." - ) - # Populate Chart.js config with data - chartjs_config = chart_config.chartjs_config.copy() - # First column is labels - labels = data_df.iloc[:, 0].tolist() - chartjs_config["labels"] = labels - # Remaining columns are datasets - existing_datasets = chartjs_config.get("datasets", []) - num_data_columns = len(data_df.columns) - 1 # Excluding labels column - # If datasets is empty or doesn't match column count, create datasets from columns - if not existing_datasets or len(existing_datasets) != num_data_columns: - datasets = [] - for col_idx in range(1, len(data_df.columns)): - col_name = data_df.columns[col_idx] - dataset = {"label": str(col_name), "data": []} - # Try to preserve properties from existing dataset if available - dataset_idx = col_idx - 1 - if dataset_idx < len(existing_datasets): - existing_dataset = existing_datasets[dataset_idx] - # Copy properties like backgroundColor, borderColor, etc. - for key in ["backgroundColor", "borderColor", "borderWidth", "fill"]: - if key in existing_dataset: - dataset[key] = existing_dataset[key] - datasets.append(dataset) - else: - # Use existing datasets structure, just populate data - datasets = existing_datasets - # Populate data arrays - for dataset_idx, dataset in enumerate(datasets): - col_idx = dataset_idx + 1 - if col_idx < len(data_df.columns): - dataset["data"] = data_df.iloc[:, col_idx].tolist() - chartjs_config["datasets"] = datasets - # Return response - return {"data_query_string": chart_config.data_query_string, "chartjs_config": chartjs_config} - except (QueryError, ValueError) as e: - # Extract error message - if isinstance(e, QueryError): - error_message = e.db_error_msg or str(e) - failed_query = ( - e.failed_query - if hasattr(e, "failed_query") - else (chart_config.data_query_string if "chart_config" in locals() else query) - ) - else: - error_message = str(e) - failed_query = chart_config.data_query_string if "chart_config" in locals() else query - # Accumulate error for context - accumulated_errors.append(f"Query: {failed_query}\nError: {error_message}") - # Check if we should retry - if retry_count < self.MAX_RETRIES: - logger.warning( - f"ChartAgent.generate_chart_with_data: Query execution failed (retry {retry_count + 1}/{self.MAX_RETRIES}): {error_message}" - ) - retry_count += 1 - # Continue loop to retry with new chart config - continue - else: - # All retries exhausted, raise the error - logger.error( - f"ChartAgent.generate_chart_with_data: Query execution failed after {self.MAX_RETRIES} retries. Last error: {error_message}" - ) - raise e diff --git a/mindsdb/interfaces/agents/modes/__init__.py b/mindsdb/interfaces/agents/modes/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/interfaces/agents/modes/base.py b/mindsdb/interfaces/agents/modes/base.py deleted file mode 100644 index 1ec13d9242c..00000000000 --- a/mindsdb/interfaces/agents/modes/base.py +++ /dev/null @@ -1,18 +0,0 @@ -from pydantic import BaseModel, Field - - -class PlanResponse(BaseModel): - plan: str = Field( - ..., description="A step-by-step plan for solving the question, identifying data sources and steps needed" - ) - estimated_steps: int = Field(..., description="Estimated number of steps needed to solve the question") - - -class TestResponse(BaseModel): - text: str = Field(..., description="Text response to the user") - - -class ResponseType: - FINAL_QUERY = "final_query" # this is the final query - EXPLORATORY = "exploratory_query" # this is a query to explore and collect info to solve the challenge (e.g., distinct values of a categorical column, schema inference, etc.) - FINAL_TEXT = "final_text" # this is the final query diff --git a/mindsdb/interfaces/agents/modes/prompts.py b/mindsdb/interfaces/agents/modes/prompts.py deleted file mode 100644 index bab009e1b30..00000000000 --- a/mindsdb/interfaces/agents/modes/prompts.py +++ /dev/null @@ -1,280 +0,0 @@ -sql_description = """ -MindsDB SQL is mostly compatible with MySQL and DuckDB syntax. - -- ONLY use tables, views, and predictors that appear in the Data Catalog provided to you. Never reference a table or model (e.g. mindsdb.sentiment_analyzer) that is not listed in the catalog—referencing a non-existent table causes "X not found. Available tables: [...]". If you need sentiment or other analysis, use only the tables from the catalog and express the logic in SQL. -- When writing the SQL query, make sure the select explicit names for the columns accordingly to the question. - -Example: -SELECT movie_id, movie_description, age, name FROM someschema.movies WHERE whatever...; -Instead of: -SELECT * FROM somedb.movies WHERE whatever...; - -- When composing JOIN queries, qualify every referenced column with its table (or table alias) (e.g., `movies.title`) so it is always clear which table provides each column. - -- Date math & windows - -Prefer simple interval arithmetic over dialect-specific functions. -To subtract months: max_ts - INTERVAL 8 MONTH -To subtract days: max_ts - INTERVAL 30 DAY - -Date types might be stored in string format, if you have an error related to it (e.g. `No operator matches the given name and argument types`), use explicit type cast: -CAST(max_ts AS TIMESTAMP) - INTERVAL 30 DAY - -Use DATE_TRUNC('month', timestamp_expression) for month bucketing. - -- Monthly aggregation pattern - -When asked for “last N months from the most recent date in column X”, follow this structure: - -SELECT - DATE_TRUNC('month', CAST(X AS TIMESTAMP)) AS month, - COUNT(*) AS total -FROM . -WHERE CAST(X AS TIMESTAMP) >= ( - SELECT DATE_TRUNC( - 'month', - MAX(CAST(X AS TIMESTAMP)) - INTERVAL MONTH - ) - FROM .
-) -GROUP BY month -ORDER BY month; - - -Replace with the number of months to go back excluding the current one (for “past 9 months including current”, use 8). - -- COUNT and aggregates - -Use COUNT(*) or COUNT(column). Never use COUNT() with empty parentheses. - -If you change the SELECT list, update GROUP BY accordingly (or use GROUP BY 1, 2, ...). - -- Ensure that all columns in the SELECT clause are either aggregated (using functions like SUM, COUNT, etc.) or explicitly included in the GROUP BY clause. -- Error handling behavior - -When you see an error like “function X does not exist”, do not try random alternative names (e.g., dateadd → DATE_ADD). - -Instead, rewrite the logic using: - -Intervals (ts - INTERVAL 8 MONTH), or - -Simpler built-ins that you know are valid (e.g., just DATE_TRUNC with interval arithmetic). - -- If an error says “requires 2 positional arguments, but 3 were provided”, remove the extra argument rather than reshuffling parameter order. -- If a MySQL function is not supported by MindsDB, try the DuckDB equivalent function. -- If you are unsure of the values of a possible categorical column, you can always write a query to explore the distinct values of that column to understand the data. -- If Metadata about a table is unknown, assume that all columns are of type varchar. -- When casting varchars to something else simply use the CAST function, for example: CAST(year AS INTEGER), or CAST(year AS FLOAT), or CAST(year AS DATE), or CAST(year AS BOOLEAN), etc. -- ALWAYS: When writing queries that involve time, use the time functions in MindsDB SQL, or duckdb functions. -- ALWAYS: Include the name of the schema/database in query, for example, instead of `SELECT * FROM movies WHERE ...` write `SELECT * FROM somedb.movies WHERE ...`; -- ALWAYS: When columns contain spaces, special characters or are reserved words, use backticks (`) to quote the column name, for example, `column name` instead of [column name]. -- `ILIKE` is only supported with some data sources; for portable case-insensitive matching use LOWER(column) LIKE LOWER('%pattern%') instead of column ILIKE '%pattern%'. -- ALWAYS: Use the exact same letter case for table and column names as they appear in the Data Catalog. For example, if the catalog lists a column as `CreatedAt`, write `CreatedAt` in your query — not `createdat` or `CREATEDAT`. Mismatched case may cause "column not found" errors. -""" - -sql_with_kb_description = """ - -MindsDB SQL is compatible with MySQL and DuckDB syntax, with additional features for knowledge bases. - -When the question requires to filter by something semantically, use the knowledge bases available when possible. -You can determine what knowledge bases are relevant given the data catalog. - -Example: -Knowledge Base Metadata: -kb,kb_insert_query,parent_query_id_column,parent_query_content_columns,parent_query_metadata_columns -mindsdb.somekb,"INSERT INTO somekb SELECT movie_id AS `id`, movie_description AS content FROM somedb.movies",id,content,age, name - -This tells you that somekb is a knowledge base that was created from the movies table -and that you can use it so search information about the movies descriptions, which is better than -trying to do keyword search on the movies table. For that here is a detailed description of how to use knowledge bases. - -**Knowledge Base Queries:** -Knowledge bases are semantic search tables that allow you to find relevant entries using semantic text search and metadata filtering. - -Example queries: -- Semantic search: - `SELECT * FROM mindsdb.kb_name WHERE content LIKE 'your semantic search query' AND relevance >= 0.5` -- Metadata filtering: - `SELECT * FROM mindsdb.kb_name WHERE metadata_column = 'value'` -- Combined semantic and metadata filtering: - `SELECT * FROM mindsdb.kb_name WHERE content LIKE 'your semantic search query' AND metadata_column = 'value' AND relevance >= 0.5` - -Where output columns will be: id,chunk_id,chunk_content,,relevance,distance -use `relevance > 0.5` to filter for relevant results. - -From the knowledge base, you can identify where id came from, and what content comes from, so when you SELECT, you can rename columns accordingly. - -For example, to find up to 10,000 movies that are excellent, not horror, and have an age group of PG-13 or higher (assuming a metadata column named "age"): -instead of searching the movies table semantically SELECT * FROM somedb.movies WHERE age >= 13 AND description LIKE '%excellent%' LIMIT 10000; -which is prone to missing results as it is likely to miss results where people write similar things to excellent and horror, but in a different way. -as such, you can search the knowledge base, which does not require any exact matches, it will filter by most relevant results. -``` -SELECT - id as movie_id, - chunk_id as movie_description_chunk_id, - chunk_content as movie_description_chunk, - age as rated_age, -- assuming age is a metadata column - name as movie_name, -- assuming name is a metadata column - relevance -FROM mindsdb.movies_kb -WHERE content LIKE 'excellent, not horror' AND age >= 13 AND relevance >= 0.5 -LIMIT 10000; -``` - - -or since there can be multiple chunks for a movie that match the query you can aggregate results: - -``` -SELECT - id AS movie_id, - LIST(chunk_id) AS movie_description_chunk_ids, - LIST(chunk_content) AS movie_description_chunks, - MAX(relevance) AS max_relevance, -- the most relevant chunk - age AS rated_age, -- assuming age is a metadata column, same for each id - name AS movie_name -- assuming name is a metadata column, same for each id -FROM mindsdb.movies_kb -WHERE content LIKE 'excellent, not horror' AND age >= 13 AND relevance >= 0.5 -GROUP BY id, age, name -ORDER BY max_relevance DESC -LIMIT 10000; -``` - - -Suppose you have a table `movies_data` from which the knowledge base was created. To answer: "Show the count of excellent, non-horror, PG-13+ movies by release year": - -``` -SELECT release_year, COUNT(*) FROM db.movies_data -WHERE movie_id IN ( - SELECT DISTINCT id FROM mindsdb.movies_kb - WHERE content LIKE 'excellent, not horror' AND age >= 13 AND relevance >= 0.5 LIMIT 10000 -) -GROUP BY release_year; -``` - -- NEVER use `ILIKE` in knowledge base `content` condition (ILIKE is not supported in knowledge bases) only LIKE is supported. - -- AVOID direct joins between tables and knowledge bases. Instead, use `WHERE IN (SELECT DISTINCT id FROM knowledge_base) ...` -OR use the knowledge base as a subquery and join on that, for example: -`SELECT * FROM
t JOIN (SELECT id, LIST(chunk_content) FROM knowledge_base WHERE content LIKE 'your semantic search query' AND metadata_col=something ... GROUP BY id LIMIT 10000 ) AS kb ON t. = kb.id WHERE ... ` -The WHERE clause for the tables conditions must come AFTER all JOINs are completed - -- ALWAYS: It is important to set an appropriate LIMIT on knowledge base queries to avoid missing results; the default limit is 10, so if you need more than 10, set it accordingly. When unsure LIMIT 10000 is recommended. - -- When writing the SQL query, make sure the select renames the columns accordingly to the question. - -""" - -markdown_instructions = """ -**IMPORTANT FORMATTING REQUIREMENTS:** -- Always format your responses in Markdown -- When presenting tabular data or query results, organize them as Markdown tables -- Use proper Markdown table syntax with headers and aligned columns -- For example: -| Column1 | Column2 | Column3 | -|---------|---------|---------| -| Value1 | Value2 | Value3 | -- Use other Markdown formatting (headers, lists, code blocks) as appropriate to make responses clear and well-structured -""" - -planning_prompt_base = """ -Before writing any SQL queries, create a plan for how to solve the question. - -The plan should: -1. Identify what data sources (tables or knowledge bases) are relevant to answer the question -1.1 When referring to tables or knowledge bases in the plan. Always include the name of the schema/database in the plan along with the table name. For example: database_name.table_name or database_name.knowledge_base_name -2. Outline the steps needed to solve the question, each step may correspond to some exploration query that you may need to do, describe the exploratory step, but do not write the query. -2.1 {response_instruction} -2.2 Note: exploratory steps, can be for example: -- if we can see that we will filter by the value of one column that is categorical we may need to first explore the DISTINCT values of that column to understand what values to filter by in WHERE col= -3. Specify what information might need to be explored or collected -4. Keep the number of steps to a minimum (try to solve with as few steps as possible) -5. Maximum number of steps should not exceed - -Do NOT write any SQL queries in the plan. Just describe: -- What data you will use -- What steps you will take -- What information you might need to explore first - -Example plan format: -Step 1: Sample table X to understand available columns and data -Step 2: Check distinct values in column Y to understand the data -Step 3: Query table X with filters based on the question requirements -Step 4: Run a test query with a LIMIT 10 to make sure the query is working as expected -Step 5: Aggregate results as needed - -Keep steps concise and focused on solving the question efficiently. -""" - -chart_generation_prompt = """ -You are an expert at generating Chart.js configurations from SQL queries. Your task is to: - -IMPORTANT CONTEXT - HOW THE CHART WILL BE USED: -The chart configuration you generate will be used directly in the frontend with Chart.js. The frontend code will be: -```javascript -const ctx = document.getElementById('myChart'); -const chart = new Chart(ctx, chartConfig); -``` -Where `chartConfig` is the exact dictionary you generate in the `chartjs_config` field. - -This means: -- Your configuration must be a complete, valid Chart.js configuration object -- It must be compatible with Chart.js v3+ API -- It will be passed directly to `new Chart(ctx, chartConfig)` without any modifications -- The configuration must include all required fields (especially 'type') and be ready for immediate use - -1. Analyze the provided SQL query to understand the data structure and relationships -2. Determine the most appropriate chart type from: 'line', 'bar', 'pie', or 'doughnut' -3. Generate a Chart.js configuration dictionary with the following REQUIRED structure: - - `type`: REQUIRED - MUST always be included. One of 'line', 'bar', 'pie', or 'doughnut'. This field is MANDATORY and must never be omitted. - - `options`: Chart.js options object (e.g., responsive, plugins.title, scales for line/bar charts) - - `labels`: Empty array [] (will be populated from the first column of query results) - - `datasets`: Array of dataset objects, each with: - - `label`: The column name (from the data query, excluding the first column) - - `data`: Empty array [] (will be populated programmatically from query results) - - Additional dataset-specific properties (e.g., `backgroundColor`, `borderColor` for line/bar charts) - - IMPORTANT: The 'type' field is REQUIRED and MUST always be present in the chartjs_config. You MUST include it in every response. Choose the most appropriate chart type for the data unless the user explicitly asks for a different chart type. - - Make sure you specify if needed the axis scales and types - -4. Generate a data transformation SQL query string with the following format: - SELECT labels, , , ... - FROM ( -
order by desc limit 1" - """ - last_values = {} - for query, info in l_query.get_init_queries(): - response = dn.query(query=query, session=session) - data = response.data_frame - columns_info = response.columns - - if len(data) == 0: - value = None - else: - row = list(data.iloc[0]) - - idx = None - for i, col in enumerate(columns_info): - if col.name.upper() == info["column_name"].upper(): - idx = i - break - - if idx is None or len(row) == 1: - value = row[0] - else: - value = row[idx] - - if value is not None: - last_values[info["table_name"]] = {info["column_name"]: value} - - return last_values - - # Context - - def get_current_context(self) -> str: - """ - returns current context name - """ - try: - context_stack = ctx.context_stack or [] - except AttributeError: - context_stack = [] - if len(context_stack) > 0: - return context_stack[-1] - else: - return "" - - def set_context(self, object_type: str = None, object_id: int = None): - """ - Updates current context name, using object name and id - Previous context names are stored on lower levels of stack - """ - try: - context_stack = ctx.context_stack or [] - except AttributeError: - context_stack = [] - context_stack.append(self.gen_context_name(object_type, object_id)) - ctx.context_stack = context_stack - - def release_context(self, object_type: str = None, object_id: int = None): - """ - Removed current context (defined by object type and id) and restored previous one - """ - try: - context_stack = ctx.context_stack or [] - except AttributeError: - context_stack = [] - if len(context_stack) == 0: - return - context_name = self.gen_context_name(object_type, object_id) - if context_stack[-1] == context_name: - context_stack.pop() - ctx.context_stack = context_stack - - def gen_context_name(self, object_type: str, object_id: int) -> str: - """ - Generated name of the context according to object type and name - :return: context name - """ - - if object_type is None: - return "" - if object_id is not None: - object_type += "-" + str(object_id) - return object_type - - def get_context_vars(self, object_type: str, object_id: int) -> List[dict]: - """ - Return variables stored in context (defined by object type and id) - - :return: list of all context variables related to context name how they stored in context table - """ - context_name = self.gen_context_name(object_type, object_id) - vars = [] - query = db.session.query(db.QueryContext).filter_by(context_name=context_name, company_id=ctx.company_id) - if ctx.enforce_user_id: - query = query.filter(db.QueryContext.user_id == ctx.user_id) - for rec in query: - if rec.values is not None: - vars.append(rec.values) - - return vars - - # DB - def _get_context_record(self, context_name: str, query_str: str) -> db.QueryContext: - """ - Find and return record for context and query string - """ - - query = db.session.query(db.QueryContext).filter_by( - query=query_str, - context_name=context_name, - company_id=ctx.company_id, - ) - if ctx.enforce_user_id: - query = query.filter(db.QueryContext.user_id == ctx.user_id) - return query.first() - - def __add_context_record(self, context_name: str, query_str: str, values: dict) -> db.QueryContext: - """ - Creates record (for context and query string) with values and returns it - """ - rec = db.QueryContext( - query=query_str, - context_name=context_name, - company_id=ctx.company_id, - user_id=ctx.user_id, - values=values, - ) - db.session.add(rec) - return rec - - def __update_context_record(self, context_name: str, query_str: str, values: dict): - """ - Updates context record with new values - """ - rec = self._get_context_record(context_name, query_str) - rec.values = values - db.session.commit() - - def get_query(self, query_id: int) -> RunningQuery: - """ - Get running query by id - """ - - query = db.Queries.query.filter(db.Queries.id == query_id, db.Queries.company_id == ctx.company_id) - if ctx.enforce_user_id: - query = query.filter(db.Queries.user_id == ctx.user_id) - rec = query.first() - - if rec is None: - raise RuntimeError(f"Query not found: {query_id}") - return RunningQuery(rec) - - def create_query(self, query: ASTNode, database: str = None) -> RunningQuery: - """ - Create a new running query from AST query - """ - - # remove old queries - filters = [ - db.Queries.company_id == ctx.company_id, - db.Queries.finished_at < (dt.datetime.now() - dt.timedelta(days=1)), - ] - if ctx.enforce_user_id: - filters.append(db.Queries.user_id == ctx.user_id) - remove_query = db.session.query(db.Queries).filter(*filters) - for rec in remove_query.all(): - self.get_query(rec.id).remove_from_task() - db.session.delete(rec) - - rec = db.Queries( - sql=str(query), - database=database, - company_id=ctx.company_id, - user_id=ctx.user_id, - ) - - db.session.add(rec) - db.session.commit() - return RunningQuery(rec) - - def list_queries(self) -> List[dict]: - """ - Get list of all running queries with metadata - """ - - query = db.session.query(db.Queries).filter(db.Queries.company_id == ctx.company_id) - if ctx.enforce_user_id: - query = query.filter(db.Queries.user_id == ctx.user_id) - return [RunningQuery(record).get_info() for record in query] - - def cancel_query(self, query_id: int): - """ - Cancels running query by id - """ - query = db.Queries.query.filter(db.Queries.id == query_id, db.Queries.company_id == ctx.company_id) - if ctx.enforce_user_id: - query = query.filter(db.Queries.user_id == ctx.user_id) - rec = query.first() - if rec is None: - raise RuntimeError(f"Query not found: {query_id}") - - self.get_query(rec.id).remove_from_task() - - # the query in progress will fail when it tries to update status - db.session.delete(rec) - db.session.commit() - - -query_context_controller = QueryContextController() diff --git a/mindsdb/interfaces/query_context/last_query.py b/mindsdb/interfaces/query_context/last_query.py deleted file mode 100644 index 7e00a08c846..00000000000 --- a/mindsdb/interfaces/query_context/last_query.py +++ /dev/null @@ -1,291 +0,0 @@ -from typing import Union, List -import copy -from collections import defaultdict - -from mindsdb_sql_parser.ast import ( - Identifier, - Select, - BinaryOperation, - Last, - Constant, - Star, - ASTNode, - NullConstant, - OrderBy, - Function, - TypeCast, -) -from mindsdb.integrations.utilities.query_traversal import query_traversal - - -class LastQuery: - """ - Wrapper for AST query. - Intended to ind, track, update last values in query - """ - - def __init__(self, query: ASTNode): - self.query_orig = None - self.query = None - - # check query type - if not isinstance(query, Select): - # just skip it - return - - self.last_idx = defaultdict(list) - last_tables = self._find_last_columns(query) - if last_tables is None: - return - - self.query = query - - self.last_tables = last_tables - - def _find_last_columns(self, query: ASTNode) -> Union[dict, None]: - """ - This function: - - Searches LAST column in the input query - - Replaces it with constants and memorises link to these constants - - Link to constants will be used to inject values to query instead of LAST - - Provide checks: - - if it is possible to find the table for column - - if column in select target - - Generates and returns last_column variable which is dict - last_columns[table_name] = { - 'table':
, - 'column': , - 'links': [, ... ], - 'target_idx': , - 'gen_init_query': if true: to generate query to initial values for LAST - } - """ - - # index last variables in query - tables_idx = defaultdict(dict) - conditions = [] - - def replace_last_in_tree(node: ASTNode, injected: Constant): - """ - Recursively searches LAST in AST tree. Goes only into functions and type casts - When LAST is found - it is replaced with injected constant - """ - # go into functions and type casts - if isinstance(node, TypeCast): - if isinstance(node.arg, Last): - node.arg = injected - return injected - return replace_last_in_tree(node.arg, injected) - if isinstance(node, Function): - for i, arg in enumerate(node.args): - if isinstance(arg, Last): - node.args[i] = injected - return injected - found = replace_last_in_tree(arg, injected) - if found: - return found - - def index_query(node, is_table, parent_query, **kwargs): - parent_query_id = id(parent_query) - last = None - if is_table and isinstance(node, Identifier): - # memorize table - tables_idx[parent_query_id][node.parts[-1]] = node - if node.alias is not None: - tables_idx[parent_query_id][node.alias.parts[-1]] = node - - # find last in where - if isinstance(node, BinaryOperation): - if isinstance(node.args[0], Identifier): - col = node.args[0] - gen_init_query = True - - # col > last - if isinstance(node.args[1], Last): - last = Constant(None) - # inject constant - node.args[1] = last - - # col > coalesce(last, 0) OR col > cast(coalense(last ...)) - else: - injected = Constant(None) - last = replace_last_in_tree(node.args[1], injected) - gen_init_query = False - - if last is not None: - # memorize - conditions.append( - { - "query_id": parent_query_id, - "condition": node, - "last": last, - "column": col, - "gen_init_query": gen_init_query, # generate query to fetch initial last values from table - } - ) - - # find lasts - query_traversal(query, index_query) - - if len(conditions) == 0: - return - - self.query_orig = copy.deepcopy(query) - - for info in conditions: - self.last_idx[info["query_id"]].append(info) - - # index query targets - query_id = id(query) - tables = tables_idx[query_id] - is_star_in_target = False - target_idx = {} - for i, target in enumerate(query.targets): - if isinstance(target, Star): - is_star_in_target = True - continue - elif not isinstance(target, Identifier): - continue - - col_name = target.parts[-1] - if len(target.parts) > 1: - table_name = target.parts[-2] - table = tables.get(table_name) - elif len(tables) == 1: - table = list(tables.values())[0] - else: - continue - - target_idx[(table.parts[-1], col_name)] = i - - # make info about query - - last_columns = {} - for parent_query_id, items in self.last_idx.items(): - for info in items: - col = info["column"] - last = info["last"] - tables = tables_idx[parent_query_id] - - uniq_tables = len(set([id(v) for v in tables.values()])) - if len(col.parts) > 1: - table = tables.get(col.parts[-2]) - if table is None: - raise ValueError("cant find table") - elif uniq_tables == 1: - table = list(tables.values())[0] - else: - # or just skip it? - raise ValueError("cant find table") - - col_name = col.parts[-1] - - table_name = table.parts[-1] - if table_name not in last_columns: - # check column in target - target_idx = target_idx.get((table_name, col_name)) - if target_idx is None: - if is_star_in_target: - # will try to get by name - ... - else: - raise ValueError("Last value should be in query target") - - last_columns[table_name] = { - "table": table, - "column": col_name, - "links": [last], - "target_idx": target_idx, - "gen_init_query": info["gen_init_query"], - } - - elif last_columns[table_name]["column"] == col_name: - last_columns[table_name]["column"].append(last) - else: - raise ValueError("possible to use only one column") - - return last_columns - - def to_string(self) -> str: - """ - String representation of the query - Used to identify query in query_context table - """ - query = self.query_orig - if isinstance(query.from_table, Select) and query.targets == [Star()]: - # simplify nested query - if ( - query.group_by is None - and query.order_by is None - and query.having is None - and query.distinct is False - and query.where is None - and query.limit is None - and query.offset is None - and query.cte is None - ): - query = copy.deepcopy(query.from_table) - query.parentheses = False - query.alias = None - - return query.to_string() - - def get_last_columns(self) -> List[dict]: - """ - Return information about LAST columns in query - :return: - """ - return [ - { - "table": info["table"], - "table_name": table_name, - "column_name": info["column"], - "target_idx": info["target_idx"], - "gen_init_query": info["gen_init_query"], - } - for table_name, info in self.last_tables.items() - ] - - def apply_values(self, values: dict) -> ASTNode: - """ - Fills query with new values and return it - """ - for table_name, info in self.last_tables.items(): - value = values.get(table_name, {}).get(info["column"]) - for last in info["links"]: - last.value = value - - return self.query - - def get_init_queries(self): - """ - A generator of queries to get initial value of the last - """ - - back_up_values = [] - # replace values - for items in self.last_idx.values(): - for info in items: - node = info["condition"] - back_up_values.append([node.op, node.args[1]]) - node.op = "is not" - node.args[1] = NullConstant() - - query2 = copy.deepcopy(self.query) - - # return values - for items in self.last_idx.values(): - for info in items: - node = info["condition"] - op, arg1 = back_up_values.pop(0) - node.op = op - node.args[1] = arg1 - - for info in self.get_last_columns(): - if not info["gen_init_query"]: - continue - col = Identifier(info["column_name"]) - query2.targets = [col] - query2.order_by = [OrderBy(col, direction="DESC")] - query2.limit = Constant(1) - yield query2, info diff --git a/mindsdb/interfaces/query_context/query_task.py b/mindsdb/interfaces/query_context/query_task.py deleted file mode 100644 index 97cbbdcbf26..00000000000 --- a/mindsdb/interfaces/query_context/query_task.py +++ /dev/null @@ -1,18 +0,0 @@ -from mindsdb.api.executor.sql_query import SQLQuery -from mindsdb.interfaces.query_context.context_controller import query_context_controller -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.interfaces.tasks.task import BaseTask - - -class QueryTask(BaseTask): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.query_id = self.object_id - - def run(self, stop_event): - try: - session = SessionController() - SQLQuery(None, query_id=self.query_id, session=session, stop_event=stop_event) - finally: - # clear task - query_context_controller.get_query(self.query_id).remove_from_task() diff --git a/mindsdb/interfaces/storage/__init__.py b/mindsdb/interfaces/storage/__init__.py deleted file mode 100644 index 8b137891791..00000000000 --- a/mindsdb/interfaces/storage/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mindsdb/interfaces/storage/db.py b/mindsdb/interfaces/storage/db.py deleted file mode 100644 index f235068d578..00000000000 --- a/mindsdb/interfaces/storage/db.py +++ /dev/null @@ -1,658 +0,0 @@ -import json -import orjson -import datetime -import os -from typing import Dict, List, Optional - -import numpy as np -from sqlalchemy import ( - JSON, - Boolean, - Column, - DateTime, - Index, - Integer, - LargeBinary, - Numeric, - String, - UniqueConstraint, - create_engine, - text, - types, -) -from sqlalchemy.exc import OperationalError -from sqlalchemy.orm import ( - Mapped, - mapped_column, - declarative_base, - relationship, - scoped_session, - sessionmaker, -) -from sqlalchemy.sql.schema import ForeignKey -from mind_castle.sqlalchemy_type import SecretData - -from mindsdb.utilities.json_encoder import CustomJSONEncoder -from mindsdb.utilities.config import config -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID - - -class Base: - __allow_unmapped__ = True - - -Base = declarative_base(cls=Base) - -session, engine = None, None - - -def init(connection_str: str = None): - global Base, session, engine - if connection_str is None: - connection_str = config["storage_db"] - # Use orjson with our CustomJSONEncoder.default for JSON serialization - _default_json = CustomJSONEncoder().default - - def _json_serializer(value): - return orjson.dumps( - value, - default=_default_json, - option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME, - ).decode("utf-8") - - base_args = { - "pool_size": 30, - "max_overflow": 200, - "json_serializer": _json_serializer, - } - engine = create_engine(connection_str, echo=False, **base_args) - session = scoped_session(sessionmaker(bind=engine, autoflush=True)) - Base.query = session.query_property() - - -def serializable_insert(record: Base, try_count: int = 100): - """Do serializeble insert. If fail - repeat it {try_count} times. - - Args: - record (Base): sqlalchey record to insert - try_count (int): count of tryes to insert record - """ - commited = False - while not commited: - session.connection(execution_options={"isolation_level": "SERIALIZABLE"}) - if engine.name == "postgresql": - session.execute(text("LOCK TABLE PREDICTOR IN EXCLUSIVE MODE")) - session.add(record) - try: - session.commit() - except OperationalError: - # catch 'SerializationFailure' (it should be in str(e), but it may depend on engine) - session.rollback() - try_count += -1 - if try_count == 0: - raise - else: - commited = True - - -# Source: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable -class NumpyEncoder(json.JSONEncoder): - """Special json encoder for numpy types""" - - def default(self, obj): - if isinstance(obj, np.integer): - return int(obj) - elif isinstance(obj, np.floating): - return float(obj) - elif isinstance(obj, np.ndarray): - return obj.tolist() - return json.JSONEncoder.default(self, obj) - - -class Array(types.TypeDecorator): - """Float Type that replaces commas with dots on input""" - - impl = types.String - - def process_bind_param(self, value, dialect): # insert - if isinstance(value, str): - return value - elif value is None: - return value - else: - return ",|,|,".join(value) - - def process_result_value(self, value, dialect): # select - return value.split(",|,|,") if value is not None else None - - -class Json(types.TypeDecorator): - """Float Type that replaces commas with dots on input""" - - impl = types.String - - def process_bind_param(self, value, dialect): # insert - return json.dumps(value, cls=NumpyEncoder) if value is not None else None - - def process_result_value(self, value, dialect): # select - if isinstance(value, dict): - return value - return json.loads(value) if value is not None else None - - -# Use MindsDB's "Json" column type as a backend for mind-castle -class SecretDataJson(SecretData): - impl = Json - - -class PREDICTOR_STATUS: - __slots__ = () - COMPLETE = "complete" - TRAINING = "training" - FINETUNING = "finetuning" - GENERATING = "generating" - ERROR = "error" - VALIDATION = "validation" - DELETED = "deleted" # TODO remove it? - - -PREDICTOR_STATUS = PREDICTOR_STATUS() - - -class Predictor(Base): - __tablename__ = "predictor" - - id = Column(Integer, primary_key=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - deleted_at = Column(DateTime) - name = Column(String) - data = Column(Json) # A JSON -- should be everything returned by `get_model_data`, I think - to_predict = Column(Array) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - mindsdb_version = Column(String) - integration_id = Column(ForeignKey("integration.id", name="fk_integration_id")) - data_integration_ref = Column(Json) - fetch_data_query = Column(String) - learn_args = Column(Json) - update_status = Column(String, default="up_to_date") - status = Column(String) - active = Column(Boolean, default=True) - training_data_columns_count = Column(Integer) - training_data_rows_count = Column(Integer) - training_start_at = Column(DateTime) - training_stop_at = Column(DateTime) - label = Column(String, nullable=True) - version = Column(Integer, default=1) - code = Column(String, nullable=True) - dtype_dict = Column(Json, nullable=True) - project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False) - training_phase_current = Column(Integer) - training_phase_total = Column(Integer) - training_phase_name = Column(String) - training_metadata = Column(JSON, default={}, nullable=False) - - @staticmethod - def get_name_and_version(full_name): - name_no_version = full_name - version = None - parts = full_name.split(".") - if len(parts) > 1 and parts[-1].isdigit(): - version = int(parts[-1]) - name_no_version = ".".join(parts[:-1]) - return name_no_version, version - - -Index( - "predictor_index", - Predictor.company_id, - Predictor.user_id, - Predictor.name, - Predictor.version, - Predictor.active, - Predictor.deleted_at, # would be good to have here nullsfirst(Predictor.deleted_at) - unique=True, -) - - -class Project(Base): - __tablename__ = "project" - - id = Column(Integer, primary_key=True) - created_at = Column(DateTime, default=datetime.datetime.now) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - deleted_at = Column(DateTime) - name = Column(String, nullable=False) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - metadata_: dict = Column("metadata", JSON, nullable=True) - __table_args__ = (UniqueConstraint("name", "company_id", "user_id", name="unique_project_name_company_id_user_id"),) - - -class Integration(Base): - __tablename__ = "integration" - id = Column(Integer, primary_key=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - name = Column(String, nullable=False) - engine = Column(String, nullable=False) - data = Column(SecretDataJson(os.environ.get("MINDSDB_DATA_ENCRYPTION_TYPE", "none"))) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - - __table_args__ = ( - UniqueConstraint("name", "company_id", "user_id", name="unique_integration_name_company_id_user_id"), - ) - - -class File(Base): - __tablename__ = "file" - id = Column(Integer, primary_key=True) - name = Column(String, nullable=False) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - source_file_path = Column(String, nullable=False) - file_path = Column(String, nullable=False) - row_count = Column(Integer, nullable=False) - columns = Column(Json, nullable=False) - created_at = Column(DateTime, default=datetime.datetime.now) - metadata_: dict = Column("metadata", JSON, nullable=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - __table_args__ = (UniqueConstraint("name", "company_id", "user_id", name="unique_file_name_company_id_user_id"),) - - -class View(Base): - __tablename__ = "view" - id = Column(Integer, primary_key=True) - name = Column(String, nullable=False) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - query = Column(String, nullable=False) - project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False) - __table_args__ = (UniqueConstraint("name", "company_id", "user_id", name="unique_view_name_company_id_user_id"),) - - -class JsonStorage(Base): - __tablename__ = "json_storage" - id = Column(Integer, primary_key=True) - resource_group = Column(String) - resource_id = Column(Integer) - name = Column(String) - content = Column(JSON) - encrypted_content = Column(LargeBinary, nullable=True) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - - def to_dict(self) -> Dict: - return { - "id": self.id, - "resource_group": self.resource_group, - "resource_id": self.resource_id, - "name": self.name, - "content": self.content, - "encrypted_content": self.encrypted_content, - "company_id": self.company_id, - "user_id": self.user_id, - } - - -class Jobs(Base): - __tablename__ = "jobs" - id = Column(Integer, primary_key=True) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - user_class = Column(Integer, nullable=True) - active = Column(Boolean, default=True) - - name = Column(String, nullable=False) - project_id = Column(Integer, nullable=False) - query_str = Column(String, nullable=False) - if_query_str = Column(String, nullable=True) - start_at = Column(DateTime, default=datetime.datetime.now) - end_at = Column(DateTime) - next_run_at = Column(DateTime) - schedule_str = Column(String) - - deleted_at = Column(DateTime) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - - -class JobsHistory(Base): - __tablename__ = "jobs_history" - id = Column(Integer, primary_key=True) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - - job_id = Column(Integer) - - query_str = Column(String) - start_at = Column(DateTime) - end_at = Column(DateTime) - - error = Column(String) - created_at = Column(DateTime, default=datetime.datetime.now) - updated_at = Column(DateTime, default=datetime.datetime.now) - - __table_args__ = (UniqueConstraint("job_id", "start_at", name="uniq_job_history_job_id_start"),) - - -class ChatBots(Base): - __tablename__ = "chat_bots" - id = Column(Integer, primary_key=True) - - name = Column(String, nullable=False) - project_id = Column(Integer, nullable=False) - agent_id = Column(ForeignKey("agents.id", name="fk_agent_id")) - - # To be removed when existing chatbots are backfilled with newly created Agents. - model_name = Column(String) - database_id = Column(Integer) - params = Column(JSON) - - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - webhook_token = Column(String) - - def as_dict(self) -> Dict: - return { - "id": self.id, - "name": self.name, - "project_id": self.project_id, - "agent_id": self.agent_id, - "model_name": self.model_name, - "params": self.params, - "webhook_token": self.webhook_token, - "created_at": self.created_at, - "database_id": self.database_id, - } - - -class ChatBotsHistory(Base): - __tablename__ = "chat_bots_history" - id = Column(Integer, primary_key=True) - chat_bot_id = Column(Integer, nullable=False) - type = Column(String) # TODO replace to enum - text = Column(String) - user = Column(String) - destination = Column(String) - sent_at = Column(DateTime, default=datetime.datetime.now) - error = Column(String) - - -class Triggers(Base): - __tablename__ = "triggers" - id = Column(Integer, primary_key=True) - - name = Column(String, nullable=False) - project_id = Column(Integer, nullable=False) - - database_id = Column(Integer, nullable=False) - table_name = Column(String, nullable=False) - query_str = Column(String, nullable=False) - columns = Column(String) # list of columns separated by delimiter - - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - - -class Tasks(Base): - __tablename__ = "tasks" - id = Column(Integer, primary_key=True) - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - user_class = Column(Integer, nullable=True) - - # trigger, chatbot - object_type = Column(String, nullable=False) - object_id = Column(Integer, nullable=False) - - last_error = Column(String) - active = Column(Boolean, default=True) - reload = Column(Boolean, default=False) - - # for running in concurrent processes - run_by = Column(String) - alive_time = Column(DateTime(timezone=True)) - - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - - -class AgentSkillsAssociation(Base): - __tablename__ = "agent_skills" - - agent_id: Mapped[int] = mapped_column(ForeignKey("agents.id"), primary_key=True) - skill_id: Mapped[int] = mapped_column(ForeignKey("skills.id"), primary_key=True) - parameters: Mapped[dict] = mapped_column(JSON, default={}, nullable=True) - - agent = relationship("Agents", back_populates="skills_relationships") - skill = relationship("Skills", back_populates="agents_relationships") - - -class Skills(Base): - __tablename__ = "skills" - id = Column(Integer, primary_key=True) - agents_relationships: Mapped[List["Agents"]] = relationship(AgentSkillsAssociation, back_populates="skill") - name = Column(String, nullable=False) - project_id = Column(Integer, nullable=False) - type = Column(String, nullable=False) - params = Column(JSON) - - created_at = Column(DateTime, default=datetime.datetime.now) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - deleted_at = Column(DateTime) - - def as_dict(self) -> Dict: - return { - "id": self.id, - "name": self.name, - "project_id": self.project_id, - "agent_ids": [rel.agent.id for rel in self.agents_relationships], - "type": self.type, - "params": self.params, - "created_at": self.created_at, - } - - -class Agents(Base): - __tablename__ = "agents" - id = Column(Integer, primary_key=True) - skills_relationships: Mapped[List["Skills"]] = relationship(AgentSkillsAssociation, back_populates="agent") - company_id = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id = Column(String, default=DEFAULT_USER_ID, nullable=False) - user_class = Column(Integer, nullable=True) - - name = Column(String, nullable=False) - project_id = Column(Integer, nullable=False) - - model_name = Column(String, nullable=True) - provider = Column(String, nullable=True) - params = Column(JSON) - - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - deleted_at = Column(DateTime) - - def as_dict(self) -> Dict: - skills = [] - skills_extra_parameters = {} - for rel in self.skills_relationships: - skill = rel.skill - # Skip auto-generated SQL skills - if skill.params.get("description", "").startswith("Auto-generated SQL skill for agent"): - continue - skills.append(skill.as_dict()) - skills_extra_parameters[skill.name] = rel.parameters or {} - - params = self.params.copy() - - agent_dict = { - "id": self.id, - "name": self.name, - "project_id": self.project_id, - "updated_at": self.updated_at, - "created_at": self.created_at, - } - - if self.model_name: - agent_dict["model_name"] = self.model_name - - if self.provider: - agent_dict["provider"] = self.provider - - # Since skills were depreciated, they are only used with Minds - # Minds expects the parameters to be provided as is without breaking them down - if skills: - agent_dict["skills"] = skills - agent_dict["skills_extra_parameters"] = skills_extra_parameters - agent_dict["params"] = params - else: - data = params.pop("data", {}) - model = params.pop("model", {}) - prompt_template = params.pop("prompt_template", None) - if data: - agent_dict["data"] = data - if model: - agent_dict["model"] = model - if prompt_template: - agent_dict["prompt_template"] = prompt_template - if params: - agent_dict["params"] = params - - return agent_dict - - -class KnowledgeBase(Base): - __tablename__ = "knowledge_base" - id = Column(Integer, primary_key=True) - name = Column(String, nullable=False) - project_id = Column(Integer, nullable=False) - params = Column(JSON) - - vector_database_id = Column( - ForeignKey("integration.id", name="fk_knowledge_base_vector_database_id"), - doc="fk to the vector database integration", - ) - vector_database = relationship( - "Integration", - foreign_keys=[vector_database_id], - doc="vector database integration", - ) - - vector_database_table = Column(String, doc="table name in the vector database") - - embedding_model_id = Column( - ForeignKey("predictor.id", name="fk_knowledge_base_embedding_model_id"), - doc="fk to the embedding model", - ) - - embedding_model = relationship("Predictor", foreign_keys=[embedding_model_id], doc="embedding model") - query_id = Column(Integer, nullable=True) - - created_at = Column(DateTime, default=datetime.datetime.now) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - - __table_args__ = (UniqueConstraint("name", "project_id", name="unique_knowledge_base_name_project_id"),) - - def as_dict(self, with_secrets: Optional[bool] = True) -> Dict: - params = self.params.copy() - embedding_model = params.pop("embedding_model", None) - reranking_model = params.pop("reranking_model", None) - - if not with_secrets: - for key in ("api_key", "private_key"): - for el in (embedding_model, reranking_model): - if el and key in el: - el[key] = "******" - - return { - "id": self.id, - "name": self.name, - "project_id": self.project_id, - "vector_database": None if self.vector_database is None else self.vector_database.name, - "vector_database_table": self.vector_database_table, - "updated_at": self.updated_at, - "created_at": self.created_at, - "query_id": self.query_id, - "embedding_model": embedding_model, - "reranking_model": reranking_model, - "metadata_columns": params.pop("metadata_columns", None), - "content_columns": params.pop("content_columns", None), - "id_column": params.pop("id_column", None), - "params": params, - } - - -class QueryContext(Base): - __tablename__ = "query_context" - id: int = Column(Integer, primary_key=True) - company_id: str = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id: str = Column(String, default=DEFAULT_USER_ID, nullable=False) - - query: str = Column(String, nullable=False) - context_name: str = Column(String, nullable=False) - values: dict = Column(JSON) - - updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now) - - -class Queries(Base): - __tablename__ = "queries" - id: int = Column(Integer, primary_key=True) - company_id: str = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id: str = Column(String, default=DEFAULT_USER_ID, nullable=False) - - sql: str = Column(String, nullable=False) - database: str = Column(String, nullable=True) - - started_at: datetime.datetime = Column(DateTime) - finished_at: datetime.datetime = Column(DateTime) - - parameters = Column(JSON, default={}) - context = Column(JSON, default={}) - processed_rows = Column(Integer, default=0) - error: str = Column(String, nullable=True) - - updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now) - - -class LLMLog(Base): - __tablename__ = "llm_log" - id: int = Column(Integer, primary_key=True) - company_id: str = Column(String, default=DEFAULT_COMPANY_ID, nullable=False) - user_id: str = Column(String, default=DEFAULT_USER_ID, nullable=False) - api_key: str = Column(String, nullable=True) - model_id: int = Column(Integer, nullable=True) - model_group: str = Column(String, nullable=True) - input: str = Column(JSON, nullable=True) - output: str = Column(JSON, nullable=True) - start_time: datetime = Column(DateTime, nullable=False) - end_time: datetime = Column(DateTime, nullable=True) - cost: float = Column(Numeric(5, 2), nullable=True) - prompt_tokens: int = Column(Integer, nullable=True) - completion_tokens: int = Column(Integer, nullable=True) - total_tokens: int = Column(Integer, nullable=True) - success: bool = Column(Boolean, nullable=False, default=True) - exception: str = Column(String, nullable=True) - traceback: str = Column(String, nullable=True) - stream: bool = Column(Boolean, default=False, comment="Is this completion done in 'streaming' mode") - metadata_: dict = Column("metadata", JSON, nullable=True) - - -class LLMData(Base): - """ - Stores the question/answer pairs of an LLM call so examples can be used - for self improvement with DSPy - """ - - __tablename__ = "llm_data" - id: int = Column(Integer, primary_key=True) - input: str = Column(String, nullable=False) - output: str = Column(String, nullable=False) - model_id: int = Column(Integer, nullable=False) - created_at: datetime = Column(DateTime, default=datetime.datetime.now) - updated_at: datetime = Column(DateTime, onupdate=datetime.datetime.now) diff --git a/mindsdb/interfaces/storage/fs.py b/mindsdb/interfaces/storage/fs.py deleted file mode 100644 index dae393e8edc..00000000000 --- a/mindsdb/interfaces/storage/fs.py +++ /dev/null @@ -1,609 +0,0 @@ -import os -import io -import shutil -import filecmp -import tarfile -import hashlib -from pathlib import Path -from abc import ABC, abstractmethod -from typing import Union, Optional -from dataclasses import dataclass -from datetime import datetime -import threading - -if os.name == "posix": - import fcntl - -import psutil - -from mindsdb.utilities.config import Config - -if Config()["permanent_storage"]["location"] == "s3": - import boto3 - from botocore.exceptions import ClientError as S3ClientError -else: - S3ClientError = FileNotFoundError - -from mindsdb.utilities.context import context as ctx -import mindsdb.utilities.profiler as profiler -from mindsdb.utilities import log -from mindsdb.utilities.fs import safe_extract - -logger = log.getLogger(__name__) - - -@dataclass(frozen=True) -class RESOURCE_GROUP: - PREDICTOR = "predictor" - INTEGRATION = "integration" - TAB = "tab" - SYSTEM = "system" - - -RESOURCE_GROUP = RESOURCE_GROUP() - - -DIR_LOCK_FILE_NAME = "dir.lock" -DIR_LAST_MODIFIED_FILE_NAME = "last_modified.txt" -SERVICE_FILES_NAMES = (DIR_LOCK_FILE_NAME, DIR_LAST_MODIFIED_FILE_NAME) - - -def compare_recursive(comparison: filecmp.dircmp) -> bool: - """Check output of dircmp and return True if the directories do not differ - - Args: - comparison (filecmp.dircmp): dirs comparison - - Returns: - bool: True if dirs do not differ - """ - if comparison.left_only or comparison.right_only or comparison.diff_files: - return False - for sub_comparison in comparison.subdirs.values(): - if compare_recursive(sub_comparison) is False: - return False - return True - - -def compare_directories(dir1: str, dir2: str) -> bool: - """Compare two directories - - Args: - dir1 (str): dir to compare - dir2 (str): dir to compare - - Returns: - bool: True if dirs do not differ - """ - dcmp = filecmp.dircmp(dir1, dir2) - return compare_recursive(dcmp) - - -def copy(src, dst): - if os.path.isdir(src): - if os.path.exists(dst): - if compare_directories(src, dst): - return - shutil.rmtree(dst, ignore_errors=True) - shutil.copytree(src, dst, dirs_exist_ok=True) - else: - if os.path.exists(dst): - if hashlib.md5(open(src, "rb").read()).hexdigest() == hashlib.md5(open(dst, "rb").read()).hexdigest(): - return - try: - os.remove(dst) - except Exception: - pass - shutil.copy2(src, dst) - - -class BaseFSStore(ABC): - """Base class for file storage""" - - def __init__(self): - self.config = Config() - self.storage = self.config["paths"]["storage"] - - @abstractmethod - def get(self, local_name, base_dir): - """Copy file/folder from storage to {base_dir} - - Args: - local_name (str): name of resource (file/folder) - base_dir (str): path to copy the resource - """ - pass - - @abstractmethod - def put(self, local_name, base_dir): - """Copy file/folder from {base_dir} to storage - - Args: - local_name (str): name of resource (file/folder) - base_dir (str): path to folder with the resource - """ - pass - - @abstractmethod - def delete(self, remote_name): - """Delete file/folder from storage - - Args: - remote_name (str): name of resource - """ - pass - - -def get_dir_size(path: str): - total = 0 - with os.scandir(path) as it: - for entry in it: - if entry.is_file(): - total += entry.stat().st_size - elif entry.is_dir(): - total += get_dir_size(entry.path) - return total - - -class AbsentFSStore(BaseFSStore): - """Storage class that does not store anything. It is just a dummy.""" - - def get(self, *args, **kwargs): - pass - - def put(self, *args, **kwargs): - pass - - def delete(self, *args, **kwargs): - pass - - -class LocalFSStore(BaseFSStore): - """Storage that stores files locally""" - - def __init__(self): - super().__init__() - - def get(self, local_name, base_dir): - remote_name = local_name - src = os.path.join(self.storage, remote_name) - dest = os.path.join(base_dir, local_name) - if not os.path.exists(dest) or get_dir_size(src) != get_dir_size(dest): - copy(src, dest) - - def put(self, local_name, base_dir, compression_level=9): - remote_name = local_name - copy(os.path.join(base_dir, local_name), os.path.join(self.storage, remote_name)) - - def delete(self, remote_name): - path = Path(self.storage).joinpath(remote_name) - try: - if path.is_file(): - path.unlink(missing_ok=True) - else: - shutil.rmtree(path) - except FileNotFoundError: - pass - - -class FileLock: - """file lock to make safe concurrent access to directory - works as context - """ - - @staticmethod - def lock_folder_path(relative_path: Path) -> Path: - """Args: - relative_path (Path): path to resource directory relative to storage root - - Returns: - Path: abs path to folder with lock file - """ - config = Config() - root_storage_path = Path(config.paths["root"]) - return config.paths["locks"] / relative_path.relative_to(root_storage_path) - - def __init__(self, relative_path: Path, mode: str = "w"): - """Args: - relative_path (Path): path to resource directory relative to storage root - mode (str): lock for read (r) or write (w) - """ - if os.name != "posix": - return - - self._local_path = FileLock.lock_folder_path(relative_path) - self._lock_file_name = DIR_LOCK_FILE_NAME - self._lock_file_path = self._local_path / self._lock_file_name - self._mode = fcntl.LOCK_EX if mode == "w" else fcntl.LOCK_SH - - if self._lock_file_path.is_file() is False: - self._local_path.mkdir(parents=True, exist_ok=True) - try: - self._lock_file_path.write_text("") - except Exception: - pass - - def __enter__(self): - if os.name != "posix": - return - - try: - # On at least some systems, LOCK_EX can only be used if the file - # descriptor refers to a file opened for writing. - self._lock_fd = os.open(self._lock_file_path, os.O_RDWR | os.O_CREAT) - fcntl.lockf(self._lock_fd, self._mode | fcntl.LOCK_NB) - except (ValueError, FileNotFoundError): - # file probably was deleted between open and lock - logger.error(f"Cant accure lock on {self._local_path}") - raise FileNotFoundError - except BlockingIOError: - logger.error(f"Directory is locked by another process: {self._local_path}") - fcntl.lockf(self._lock_fd, self._mode) - - def __exit__(self, exc_type, exc_value, traceback): - if os.name != "posix": - return - - try: - fcntl.lockf(self._lock_fd, fcntl.LOCK_UN) - os.close(self._lock_fd) - except Exception: - pass - - -class S3FSStore(BaseFSStore): - """Storage that stores files in amazon s3""" - - dt_format = "%d.%m.%y %H:%M:%S.%f" - - def __init__(self): - super().__init__() - if "s3_credentials" in self.config["permanent_storage"]: - self.s3 = boto3.client("s3", **self.config["permanent_storage"]["s3_credentials"]) - else: - self.s3 = boto3.client("s3") - self.bucket = self.config["permanent_storage"]["bucket"] - self._thread_lock = threading.Lock() - - def _get_remote_last_modified(self, object_name: str) -> datetime: - """get time when object was created/modified - - Args: - object_name (str): name if file in bucket - - Returns: - datetime - """ - last_modified = self.s3.get_object_attributes( - Bucket=self.bucket, Key=object_name, ObjectAttributes=["Checksum"] - )["LastModified"] - last_modified = last_modified.replace(tzinfo=None) - return last_modified - - @profiler.profile() - def _get_local_last_modified(self, base_dir: str, local_name: str) -> datetime: - """get 'last_modified' that saved locally - - Args: - base_dir (str): path to base folder - local_name (str): folder name - - Returns: - datetime | None - """ - last_modified_file_path = Path(base_dir) / local_name / DIR_LAST_MODIFIED_FILE_NAME - if last_modified_file_path.is_file() is False: - return None - try: - last_modified_text = last_modified_file_path.read_text() - last_modified_datetime = datetime.strptime(last_modified_text, self.dt_format) - except Exception: - return None - return last_modified_datetime - - @profiler.profile() - def _save_local_last_modified(self, base_dir: str, local_name: str, last_modified: datetime): - """Save 'last_modified' to local folder - - Args: - base_dir (str): path to base folder - local_name (str): folder name - last_modified (datetime) - """ - last_modified_file_path = Path(base_dir) / local_name / DIR_LAST_MODIFIED_FILE_NAME - last_modified_text = last_modified.strftime(self.dt_format) - last_modified_file_path.write_text(last_modified_text) - - @profiler.profile() - def _download(self, base_dir: str, remote_ziped_name: str, local_ziped_path: str, last_modified: datetime = None): - """download file to s3 and unarchive it - - Args: - base_dir (str) - remote_ziped_name (str) - local_ziped_path (str) - last_modified (datetime, optional) - """ - os.makedirs(base_dir, exist_ok=True) - - remote_size = self.s3.get_object_attributes( - Bucket=self.bucket, Key=remote_ziped_name, ObjectAttributes=["ObjectSize"] - )["ObjectSize"] - if (remote_size * 2) > psutil.virtual_memory().available: - fh = io.BytesIO() - self.s3.download_fileobj(self.bucket, remote_ziped_name, fh) - with tarfile.open(fileobj=fh) as tar: - safe_extract(tar, path=base_dir) - else: - self.s3.download_file(self.bucket, remote_ziped_name, local_ziped_path) - shutil.unpack_archive(local_ziped_path, base_dir) - os.remove(local_ziped_path) - - # os.system(f'chmod -R 777 {base_dir}') - - if last_modified is None: - last_modified = self._get_remote_last_modified(remote_ziped_name) - self._save_local_last_modified(base_dir, remote_ziped_name.replace(".tar.gz", ""), last_modified) - - @profiler.profile() - def get(self, local_name, base_dir): - remote_name = local_name - remote_ziped_name = f"{remote_name}.tar.gz" - local_ziped_name = f"{local_name}.tar.gz" - local_ziped_path = os.path.join(base_dir, local_ziped_name) - - folder_path = Path(base_dir) / local_name - with FileLock(folder_path, mode="r"): - local_last_modified = self._get_local_last_modified(base_dir, local_name) - remote_last_modified = self._get_remote_last_modified(remote_ziped_name) - if local_last_modified is not None and local_last_modified == remote_last_modified: - return - - with FileLock(folder_path, mode="w"): - self._download(base_dir, remote_ziped_name, local_ziped_path, last_modified=remote_last_modified) - - @profiler.profile() - def put(self, local_name, base_dir, compression_level=9): - # NOTE: This `make_archive` function is implemente poorly and will create an empty archive file even if - # the file/dir to be archived doesn't exist or for some other reason can't be archived - remote_name = local_name - remote_zipped_name = f"{remote_name}.tar.gz" - - dir_path = Path(base_dir) / remote_name - dir_size = sum(f.stat().st_size for f in dir_path.glob("**/*") if f.is_file()) - if (dir_size * 2) < psutil.virtual_memory().available: - old_cwd = os.getcwd() - fh = io.BytesIO() - with self._thread_lock: - os.chdir(base_dir) - with tarfile.open(fileobj=fh, mode="w:gz", compresslevel=compression_level) as tar: - for path in dir_path.iterdir(): - if path.is_file() and path.name in SERVICE_FILES_NAMES: - continue - tar.add(path.relative_to(base_dir)) - os.chdir(old_cwd) - fh.seek(0) - - self.s3.upload_fileobj(fh, self.bucket, remote_zipped_name) - else: - shutil.make_archive(os.path.join(base_dir, remote_name), "gztar", root_dir=base_dir, base_dir=local_name) - - self.s3.upload_file(os.path.join(base_dir, remote_zipped_name), self.bucket, remote_zipped_name) - os.remove(os.path.join(base_dir, remote_zipped_name)) - - last_modified = self._get_remote_last_modified(remote_zipped_name) - self._save_local_last_modified(base_dir, local_name, last_modified) - - @profiler.profile() - def delete(self, remote_name): - self.s3.delete_object(Bucket=self.bucket, Key=remote_name) - - -def FsStore(): - storage_location = Config()["permanent_storage"]["location"] - if storage_location == "absent": - return AbsentFSStore() - if storage_location == "local": - return LocalFSStore() - if storage_location == "s3": - return S3FSStore() - raise Exception(f"Location: '{storage_location}' not supported") - - -class FileStorage: - def __init__(self, resource_group: str, resource_id: int, root_dir: str = "content", sync: bool = True): - """ - Args: - resource_group (str) - resource_id (int) - root_dir (str) - sync (bool) - """ - - self.resource_group = resource_group - self.resource_id = resource_id - self.root_dir = root_dir - self.sync = sync - - # Folder naming: {resource_group}_{company_id}_{user_id}_{resource_id} - self.folder_name = f"{resource_group}_{ctx.company_id}_{ctx.user_id}_{resource_id}" - - config = Config() - self.fs_store = FsStore() - self.content_path = Path(config["paths"][root_dir]) - self.resource_group_path = self.content_path / resource_group - - self.folder_path = self.resource_group_path / self.folder_name - if self.folder_path.exists() is False: - self.folder_path.mkdir(parents=True, exist_ok=True) - - @profiler.profile() - def push(self, compression_level: int = 9): - with FileLock(self.folder_path, mode="r"): - self._push_no_lock(compression_level=compression_level) - - @profiler.profile() - def _push_no_lock(self, compression_level: int = 9): - self.fs_store.put(str(self.folder_name), str(self.resource_group_path), compression_level=compression_level) - - @profiler.profile() - def push_path(self, path, compression_level: int = 9): - # TODO implement push per element - self.push(compression_level=compression_level) - - @profiler.profile() - def pull(self): - try: - self.fs_store.get(str(self.folder_name), str(self.resource_group_path)) - except (FileNotFoundError, S3ClientError): - pass - - @profiler.profile() - def pull_path(self, path): - # TODO implement pull per element - self.pull() - - @profiler.profile() - def file_set(self, name, content): - if self.sync is True: - self.pull() - - with FileLock(self.folder_path, mode="w"): - dest_abs_path = self.folder_path / name - - with open(dest_abs_path, "wb") as fd: - fd.write(content) - - if self.sync is True: - self._push_no_lock() - - @profiler.profile() - def file_get(self, name): - if self.sync is True: - self.pull() - dest_abs_path = self.folder_path / name - with FileLock(self.folder_path, mode="r"): - with open(dest_abs_path, "rb") as fd: - return fd.read() - - @profiler.profile() - def add(self, path: Union[str, Path], dest_rel_path: Optional[Union[str, Path]] = None): - """Copy file/folder to persist storage - - Examples: - Copy file 'args.json' to '{storage}/args.json' - >>> fs.add('/path/args.json') - - Copy file 'args.json' to '{storage}/folder/opts.json' - >>> fs.add('/path/args.json', 'folder/opts.json') - - Copy folder 'folder' to '{storage}/folder' - >>> fs.add('/path/folder') - - Copy folder 'folder' to '{storage}/path/folder' - >>> fs.add('/path/folder', 'path/folder') - - Args: - path (Union[str, Path]): path to the resource - dest_rel_path (Optional[Union[str, Path]]): relative path in storage to file or folder - """ - if self.sync is True: - self.pull() - with FileLock(self.folder_path, mode="w"): - path = Path(path) - if isinstance(dest_rel_path, str): - dest_rel_path = Path(dest_rel_path) - - if dest_rel_path is None: - dest_abs_path = self.folder_path / path.name - else: - dest_abs_path = self.folder_path / dest_rel_path - - copy(str(path), str(dest_abs_path)) - - if self.sync is True: - self._push_no_lock() - - @profiler.profile() - def get_path(self, relative_path: Union[str, Path]) -> Path: - """Return path to file or folder - - Examples: - get path to 'opts.json': - >>> fs.get_path('folder/opts.json') - ... /path/{storage}/folder/opts.json - - Args: - relative_path (Union[str, Path]): Path relative to the storage folder - - Returns: - Path: path to requested file or folder - """ - if self.sync is True: - self.pull() - - with FileLock(self.folder_path, mode="r"): - if isinstance(relative_path, str): - relative_path = Path(relative_path) - # relative_path = relative_path.resolve() - - if relative_path.is_absolute(): - raise TypeError("FSStorage.get_path() got absolute path as argument") - - ret_path = self.folder_path / relative_path - if not ret_path.exists(): - # raise Exception('Path does not exists') - os.makedirs(ret_path) - - return ret_path - - def delete(self, relative_path: Union[str, Path] = "."): - path = (self.folder_path / relative_path).resolve() - if isinstance(relative_path, str): - relative_path = Path(relative_path) - - if relative_path.is_absolute(): - raise TypeError("FSStorage.delete() got absolute path as argument") - - # complete removal - if path == self.folder_path.resolve(): - with FileLock(self.folder_path, mode="w"): - self.fs_store.delete(self.folder_name) - # NOTE on some fs .rmtree is not working if any file is open - shutil.rmtree(str(self.folder_path)) - - # region del file lock - lock_folder_path = FileLock.lock_folder_path(self.folder_path) - try: - shutil.rmtree(lock_folder_path) - except FileNotFoundError: - logger.warning("Tried to delete file not found: %s", lock_folder_path) - # endregion - return - - if self.sync is True: - self.pull() - - with FileLock(self.folder_path, mode="w"): - if path.exists() is False: - raise Exception("Path does not exists") - - if path.is_file(): - path.unlink(missing_ok=True) - else: - path.rmdir() - - if self.sync is True: - self._push_no_lock() - - -class FileStorageFactory: - def __init__(self, resource_group: str, root_dir: str = "content", sync: bool = True): - self.resource_group = resource_group - self.root_dir = root_dir - self.sync = sync - - def __call__(self, resource_id: int): - return FileStorage( - resource_group=self.resource_group, root_dir=self.root_dir, sync=self.sync, resource_id=resource_id - ) diff --git a/mindsdb/interfaces/storage/json.py b/mindsdb/interfaces/storage/json.py deleted file mode 100644 index 565d2467898..00000000000 --- a/mindsdb/interfaces/storage/json.py +++ /dev/null @@ -1,169 +0,0 @@ -from mindsdb.utilities.functions import decrypt_json, encrypt_json -from mindsdb.utilities.config import config -from mindsdb.interfaces.storage import db -from mindsdb.interfaces.storage.fs import RESOURCE_GROUP -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class JsonStorage: - def __init__(self, resource_group: str, resource_id: int): - self.resource_group = resource_group - self.resource_id = resource_id - - def __setitem__(self, key, value): - if isinstance(value, dict) is False: - raise TypeError(f"got {type(value)} instead of dict") - existing_record = self.get_record(key) - if existing_record is None: - record = db.JsonStorage( - name=key, - resource_group=self.resource_group, - resource_id=self.resource_id, - company_id=ctx.company_id, - user_id=ctx.user_id, - content=value, - ) - db.session.add(record) - else: - existing_record.content = value - db.session.commit() - - def set(self, key, value): - self[key] = value - - def __getitem__(self, key): - record = self.get_record(key) - if record is None: - return None - return record.content - - def get(self, key): - return self[key] - - def get_record(self, key): - record = ( - db.session.query(db.JsonStorage) - .filter_by( - name=key, - resource_group=self.resource_group, - resource_id=self.resource_id, - company_id=ctx.company_id, - user_id=ctx.user_id, - ) - .first() - ) - return record - - def get_all_records(self): - records = ( - db.session.query(db.JsonStorage) - .filter_by( - resource_group=self.resource_group, - resource_id=self.resource_id, - company_id=ctx.company_id, - user_id=ctx.user_id, - ) - .all() - ) - return records - - def __repr__(self): - records = self.get_all_records() - names = [x.name for x in records] - return f"json_storage({names})" - - def __len__(self): - records = self.get_all_records() - return len(records) - - def __delitem__(self, key): - record = self.get_record(key) - if record is not None: - try: - db.session.delete(record) - db.session.commit() - except Exception: - db.session.rollback() - logger.exception("cant delete record from JSON storage:") - - def delete(self, key): - del self[key] - - def clean(self): - json_records = self.get_all_records() - for record in json_records: - db.session.delete(record) - try: - db.session.commit() - except Exception: - db.session.rollback() - logger.exception("cant delete records from JSON storage:") - - -class EncryptedJsonStorage(JsonStorage): - def __init__(self, resource_group: str, resource_id: int): - super().__init__(resource_group, resource_id) - self.secret_key = config.get("secret_key", "dummy-key") - - def __setitem__(self, key: str, value: dict) -> None: - if isinstance(value, dict) is False: - raise TypeError(f"got {type(value)} instead of dict") - - encrypted_value = encrypt_json(value, self.secret_key) - - existing_record = self.get_record(key) - if existing_record is None: - record = db.JsonStorage( - name=key, - resource_group=self.resource_group, - resource_id=self.resource_id, - company_id=ctx.company_id, - user_id=ctx.user_id, - encrypted_content=encrypted_value, - ) - db.session.add(record) - else: - existing_record.encrypted_content = encrypted_value - db.session.commit() - - def set_bytes(self, key: str, encrypted_value: bytes): - existing_record = self.get_record(key) - if existing_record is None: - record = db.JsonStorage( - name=key, - resource_group=self.resource_group, - resource_id=self.resource_id, - company_id=ctx.company_id, - user_id=ctx.user_id, - encrypted_content=encrypted_value, - ) - db.session.add(record) - else: - existing_record.encrypted_content = encrypted_value - db.session.commit() - - def set_str(self, key: str, encrypted_value: str): - self.set_bytes(key, encrypted_value.encode()) - - def __getitem__(self, key: str) -> dict: - record = self.get_record(key) - if record is None: - return None - return decrypt_json(record.encrypted_content, self.secret_key) - - -def get_json_storage(resource_id: int, resource_group: str = RESOURCE_GROUP.PREDICTOR): - return JsonStorage( - resource_group=resource_group, - resource_id=resource_id, - ) - - -def get_encrypted_json_storage(resource_id: int, resource_group: str = RESOURCE_GROUP.PREDICTOR): - return EncryptedJsonStorage( - resource_group=resource_group, - resource_id=resource_id, - ) diff --git a/mindsdb/interfaces/storage/model_fs.py b/mindsdb/interfaces/storage/model_fs.py deleted file mode 100644 index 67c628e0720..00000000000 --- a/mindsdb/interfaces/storage/model_fs.py +++ /dev/null @@ -1,326 +0,0 @@ -import os -import re -import json -import io -import zipfile -from typing import Union - -import mindsdb.interfaces.storage.db as db - -from .fs import RESOURCE_GROUP, FileStorageFactory, SERVICE_FILES_NAMES -from .json import get_json_storage, get_encrypted_json_storage - - -JSON_STORAGE_FILE = "json_storage.json" - - -class ModelStorage: - """ - This class deals with all model-related storage requirements, from setting status to storing artifacts. - """ - - def __init__(self, predictor_id): - storageFactory = FileStorageFactory(resource_group=RESOURCE_GROUP.PREDICTOR, sync=True) - self.fileStorage = storageFactory(predictor_id) - self.predictor_id = predictor_id - - # -- fields -- - - def _get_model_record(self, model_id: int, check_exists: bool = False) -> Union[db.Predictor, None]: - """Get model record by id - - Args: - model_id (int): model id - check_exists (bool): true if need to check that model exists - - Returns: - Union[db.Predictor, None]: model record - - Raises: - KeyError: if `check_exists` is True and model does not exists - """ - model_record = db.Predictor.query.get(self.predictor_id) - if check_exists is True and model_record is None: - raise KeyError("Model does not exists") - return model_record - - def get_info(self): - rec = self._get_model_record(self.predictor_id) - return dict(status=rec.status, to_predict=rec.to_predict, data=rec.data, learn_args=rec.learn_args) - - def status_set(self, status, status_info=None): - rec = self._get_model_record(self.predictor_id) - rec.status = status - if status_info is not None: - rec.data = status_info - db.session.commit() - - def training_state_set(self, current_state_num=None, total_states=None, state_name=None): - rec = self._get_model_record(self.predictor_id) - if current_state_num is not None: - rec.training_phase_current = current_state_num - if total_states is not None: - rec.training_phase_total = total_states - if state_name is not None: - rec.training_phase_name = state_name - db.session.commit() - - def training_state_get(self): - rec = self._get_model_record(self.predictor_id) - return [rec.training_phase_current, rec.training_phase_total, rec.training_phase_name] - - def columns_get(self): - rec = self._get_model_record(self.predictor_id) - return rec.dtype_dict - - def columns_set(self, columns): - # columns: {name: dtype} - - rec = self._get_model_record(self.predictor_id) - rec.dtype_dict = columns - db.session.commit() - - # files - - def file_get(self, name): - return self.fileStorage.file_get(name) - - def file_set(self, name, content): - self.fileStorage.file_set(name, content) - - def folder_get(self, name): - # pull folder and return path - name = name.lower().replace(" ", "_") - name = re.sub(r"([^a-z^A-Z^_\d]+)", "_", name) - - self.fileStorage.pull_path(name) - return str(self.fileStorage.get_path(name)) - - def folder_sync(self, name): - # sync abs path - name = name.lower().replace(" ", "_") - name = re.sub(r"([^a-z^A-Z^_\d]+)", "_", name) - - self.fileStorage.push_path(name) - - def file_list(self): ... - - def file_del(self, name): ... - - # jsons - - def json_set(self, name, data): - json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR) - return json_storage.set(name, data) - - def encrypted_json_set(self, name: str, data: dict) -> None: - json_storage = get_encrypted_json_storage( - resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR - ) - return json_storage.set(name, data) - - def json_get(self, name): - json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR) - return json_storage.get(name) - - def encrypted_json_get(self, name: str) -> dict: - json_storage = get_encrypted_json_storage( - resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR - ) - return json_storage.get(name) - - def json_list(self): ... - - def json_del(self, name): ... - - def delete(self): - self.fileStorage.delete() - json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR) - json_storage.clean() - - -class HandlerStorage: - """ - This class deals with all handler-related storage requirements, from storing metadata to synchronizing folders - across instances. - """ - - def __init__(self, integration_id: int, root_dir: str = None, is_temporal=False): - args = {} - if root_dir is not None: - args["root_dir"] = root_dir - storageFactory = FileStorageFactory(resource_group=RESOURCE_GROUP.INTEGRATION, sync=False, **args) - self.fileStorage = storageFactory(integration_id) - self.integration_id = integration_id - self.is_temporal = is_temporal - # do not sync with remote storage - - def __convert_name(self, name): - name = name.lower().replace(" ", "_") - return re.sub(r"([^a-z^A-Z^_\d]+)", "_", name) - - def is_empty(self): - """check if storage directory is empty - - Returns: - bool: true if dir is empty - """ - for path in self.fileStorage.folder_path.iterdir(): - if path.is_file() and path.name in SERVICE_FILES_NAMES: - continue - return False - return True - - def get_connection_args(self): - rec = db.Integration.query.get(self.integration_id) - return rec.data - - def update_connection_args(self, connection_args: dict) -> None: - """update integration connection args - - Args: - connection_args (dict): new connection args - """ - rec = db.Integration.query.get(self.integration_id) - if rec is None: - raise KeyError("Can't find integration") - rec.data = connection_args - db.session.commit() - - # files - - def file_get(self, name): - self.fileStorage.pull_path(name) - return self.fileStorage.file_get(name) - - def file_set(self, name, content): - self.fileStorage.file_set(name, content) - if not self.is_temporal: - self.fileStorage.push_path(name) - - def file_list(self): ... - - def file_del(self, name): ... - - # folder - - def folder_get(self, name): - """Copies folder from remote to local file system and returns its path - - :param name: name of the folder - """ - name = self.__convert_name(name) - - self.fileStorage.pull_path(name) - return str(self.fileStorage.get_path(name)) - - def folder_sync(self, name): - # sync abs path - if self.is_temporal: - return - name = self.__convert_name(name) - self.fileStorage.push_path(name) - - # jsons - - def json_set(self, name, content): - json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION) - return json_storage.set(name, content) - - def encrypted_json_set(self, name: str, content: dict) -> None: - json_storage = get_encrypted_json_storage( - resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION - ) - return json_storage.set(name, content) - - def json_get(self, name): - json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION) - return json_storage.get(name) - - def encrypted_json_get(self, name: str) -> dict: - json_storage = get_encrypted_json_storage( - resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION - ) - return json_storage.get(name) - - def json_list(self): ... - - def json_del(self, name): ... - - def export_files(self) -> bytes: - json_storage = self.export_json_storage() - - if self.is_empty() and not json_storage: - return None - - folder_path = self.folder_get("") - - zip_fd = io.BytesIO() - - with zipfile.ZipFile(zip_fd, "w", zipfile.ZIP_DEFLATED) as zipf: - for root, dirs, files in os.walk(folder_path): - for file_name in files: - if file_name in SERVICE_FILES_NAMES: - continue - abs_path = os.path.join(root, file_name) - zipf.write(abs_path, os.path.relpath(abs_path, folder_path)) - - # If JSON storage is not empty, add it to the zip file. - if json_storage: - json_str = json.dumps(json_storage) - zipf.writestr(JSON_STORAGE_FILE, json_str) - - zip_fd.seek(0) - return zip_fd.read() - - def import_files(self, content: bytes): - folder_path = self.folder_get("") - - zip_fd = io.BytesIO() - zip_fd.write(content) - zip_fd.seek(0) - - with zipfile.ZipFile(zip_fd, "r") as zip_ref: - for name in zip_ref.namelist(): - # If JSON storage file is in the zip file, import the content to the JSON storage. - # Thereafter, remove the file from the folder. - if name == JSON_STORAGE_FILE: - json_storage = zip_ref.read(JSON_STORAGE_FILE) - self.import_json_storage(json_storage) - - else: - zip_ref.extract(name, folder_path) - - self.folder_sync("") - - def export_json_storage(self) -> list[dict]: - json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION) - - records = [] - for record in json_storage.get_all_records(): - record_dict = record.to_dict() - if record_dict.get("encrypted_content"): - record_dict["encrypted_content"] = record_dict["encrypted_content"].decode() - records.append(record_dict) - - return records - - def import_json_storage(self, records: bytes) -> None: - json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION) - - encrypted_json_storage = get_encrypted_json_storage( - resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION - ) - - records = json.loads(records.decode()) - - for record in records: - if record["encrypted_content"]: - encrypted_json_storage.set_str(record["name"], record["encrypted_content"]) - else: - json_storage.set(record["name"], record["content"]) - - def delete(self): - self.fileStorage.delete() - json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION) - json_storage.clean() diff --git a/mindsdb/interfaces/tabs/__init__.py b/mindsdb/interfaces/tabs/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/interfaces/tabs/tabs_controller.py b/mindsdb/interfaces/tabs/tabs_controller.py deleted file mode 100644 index e4b89a4926e..00000000000 --- a/mindsdb/interfaces/tabs/tabs_controller.py +++ /dev/null @@ -1,304 +0,0 @@ -import json -from json import JSONDecodeError -from typing import Dict, List -from pathlib import Path - -from mindsdb.utilities import log -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.exception import EntityNotExistsError -from mindsdb.interfaces.storage.fs import FileStorageFactory, RESOURCE_GROUP, FileStorage - - -logger = log.getLogger(__name__) - - -TABS_FILENAME = "tabs" - - -def get_storage(): - # deprecated - - storageFactory = FileStorageFactory(resource_group=RESOURCE_GROUP.TAB, sync=True) - - # resource_id is useless for 'tabs' - # use constant - return storageFactory(0) - - -class TabsController: - """Tool for adding, editing, and deleting user's tabs - - Attributes: - storage_factory (FileStorageFactory): callable object which returns tabs file storage - """ - - def __init__(self) -> None: - self.storage_factory = FileStorageFactory(resource_group=RESOURCE_GROUP.TAB, sync=True) - - def _get_file_storage(self) -> FileStorage: - """Get user's tabs file storage - NOTE: file storage depend is company_id and user_id sensitive, so need to recreate it each time - - Returns: - FileStorage - """ - return self.storage_factory(0) - - def _get_next_tab_id(self) -> int: - """Get next free tab id - - Returns: - int: id for next tab - """ - tabs_files = self._get_tabs_files() - tabs_ids = list(tabs_files.keys()) - if len(tabs_ids) == 0: - return 1 - return max(tabs_ids) + 1 - - def _get_tabs_files(self) -> Dict[int, Path]: - """Get list of paths to each tab file - - Returns: - Dict[int, Path] - """ - tabs = {} - for child in self._get_file_storage().folder_path.iterdir(): - if (child.is_file() and child.name.startswith("tab_")) is False: - continue - tab_id = child.name.replace("tab_", "") - if tab_id.isnumeric() is False: - continue - tabs[int(tab_id)] = child - return tabs - - def _get_tabs_meta(self) -> List[Dict]: - """Get tabs info without content - - Returns: - List[Dict] - """ - all_tabs = self.get_all() - for tab in all_tabs: - del tab["content"] - return all_tabs - - def _load_tab_data(self, tab_id: int, raw_data) -> Dict: - """Load tab JSON while handling trailing garbage.""" - if isinstance(raw_data, bytes): - raw_data_str = raw_data.decode("utf-8") - else: - raw_data_str = raw_data - - try: - return json.loads(raw_data_str) - except JSONDecodeError: - decoder = json.JSONDecoder() - stripped = raw_data_str.lstrip() - data, idx = decoder.raw_decode(stripped) - - trailing = stripped[idx:].strip() - if trailing: - logger.warning( - "Detected trailing data in tab %s/%s/%s, attempting to sanitize", - ctx.company_id, - ctx.user_id, - tab_id, - ) - try: - sanitized_bytes = json.dumps(data).encode("utf-8") - self._get_file_storage().file_set(f"tab_{tab_id}", sanitized_bytes) - except Exception as rewrite_error: - logger.warning( - "Failed to rewrite sanitized tab %s/%s/%s: %s", - ctx.company_id, - ctx.user_id, - tab_id, - rewrite_error, - ) - return data - - def _migrate_legacy(self) -> None: - """Convert old single-file tabs storage to multiple files""" - file_storage = self._get_file_storage() - try: - file_data = file_storage.file_get(TABS_FILENAME) - except FileNotFoundError: - return - except Exception: - file_storage.delete() - return - - try: - data = json.loads(file_data) - except Exception: - file_storage.delete() - return - - if isinstance(data, dict) is False or isinstance(data.get("tabs"), str) is False: - file_storage.delete() - return - - try: - tabs_list = json.loads(data["tabs"]) - except Exception: - file_storage.delete() - return - - if isinstance(tabs_list, list) is False: - file_storage.delete() - return - - for tab in tabs_list: - tab_id = self._get_next_tab_id() - - b_types = json.dumps( - {"index": tab.get("index", 0), "name": tab.get("name", "undefined"), "content": tab.get("value", "")} - ).encode("utf-8") - file_storage.file_set(f"tab_{tab_id}", b_types) - - file_storage.delete(TABS_FILENAME) - - def get_all(self) -> List[Dict]: - """Get list of all tabs - - Returns: - List[Dict]: all tabs data - """ - self._get_file_storage().pull() - self._migrate_legacy() - - tabs_files = self._get_tabs_files() - tabs_list = [] - for tab_id, tab_path in tabs_files.items(): - try: - data = self._load_tab_data(tab_id, tab_path.read_text()) - except Exception as e: - logger.error(f"Can't read data of tab {ctx.company_id}/{ctx.user_id}/{tab_id}: {e}") - continue - tabs_list.append({"id": tab_id, **data}) - - tabs_list.sort(key=lambda x: x["index"]) - return tabs_list - - def get(self, tab_id: int) -> Dict: - """Get data of single tab - - Args: - tab_id (int): id of the tab - - Returns: - dict: tabs data - """ - if isinstance(tab_id, int) is False: - raise ValueError("Tab id must be integer") - - try: - raw_tab_data = self._get_file_storage().file_get(f"tab_{tab_id}") - except FileNotFoundError as e: - raise EntityNotExistsError(f"tab {tab_id}") from e - - try: - data = self._load_tab_data(tab_id, raw_tab_data) - except Exception as e: - logger.error(f"Can't read data of tab {ctx.company_id}/{ctx.user_id}/{tab_id}: {e}") - raise Exception(f"Can't read data of tab: {e}") from e - - return {"id": tab_id, **data} - - def add(self, index: int = None, name: str = "undefined", content: str = "") -> Dict: - """Add new tab - - Args: - index (int, optional): index of new tab - name (str, optional): name of new tab - content (str, optional): content of new tab - - Returns: - dict: new tab meta info: id, name and index - """ - file_storage = self._get_file_storage() - tab_id = self._get_next_tab_id() - - reorder_required = index is not None - if index is None: - all_tabs = self.get_all() - if len(all_tabs) == 0: - index = 0 - else: - index = max([x.get("index", 0) for x in all_tabs]) + 1 - - data_bytes = json.dumps({"index": index, "name": name, "content": content}).encode("utf-8") - file_storage.file_set(f"tab_{tab_id}", data_bytes) - - if reorder_required: - all_tabs = self.get_all() - all_tabs.sort(key=lambda x: (x["index"], 0 if x["id"] == tab_id else 1)) - file_storage.sync = False - for tab_index, tab in enumerate(all_tabs): - tab["index"] = tab_index - data_bytes = json.dumps(tab).encode("utf-8") - file_storage.file_set(f"tab_{tab['id']}", data_bytes) - file_storage.sync = True - file_storage.push() - - return {"id": tab_id, "index": index, "name": name} - - def modify(self, tab_id: int, index: int = None, name: str = None, content: str = None) -> Dict: - """Modify the tab - - Args: - tab_id (int): if of the tab to modify - index (int, optional): tab's new index - name (str, optional): tab's new name - content (str, optional): tab's new content - - Returns: - dict: new tab meta info: id, name and index - """ - file_storage = self._get_file_storage() - current_data = self.get(tab_id) - - # region modify index - if index is not None and current_data["index"] != index: - current_data["index"] = index - all_tabs = [x for x in self.get_all() if x["id"] != tab_id] - all_tabs.sort(key=lambda x: x["index"]) - file_storage.sync = False - for tab_index, tab in enumerate(all_tabs): - if tab_index < index: - tab["index"] = tab_index - else: - tab["index"] = tab_index + 1 - data_bytes = json.dumps(tab).encode("utf-8") - file_storage.file_set(f"tab_{tab['id']}", data_bytes) - file_storage.sync = True - file_storage.push() - # endregion - - # region modify name - if name is not None and current_data["name"] != name: - current_data["name"] = name - # endregion - - # region modify content - if content is not None and current_data["content"] != content: - current_data["content"] = content - # endregion - - data_bytes = json.dumps(current_data).encode("utf-8") - file_storage.file_set(f"tab_{tab_id}", data_bytes) - - return {"id": current_data["id"], "index": current_data["index"], "name": current_data["name"]} - - def delete(self, tab_id: int): - file_storage = self._get_file_storage() - try: - file_storage.file_get(f"tab_{tab_id}") - except FileNotFoundError as e: - raise EntityNotExistsError(f"tab {tab_id}") from e - - file_storage.delete(f"tab_{tab_id}") - - -tabs_controller = TabsController() diff --git a/mindsdb/interfaces/tasks/__init__.py b/mindsdb/interfaces/tasks/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/interfaces/tasks/task.py b/mindsdb/interfaces/tasks/task.py deleted file mode 100644 index a0717e28065..00000000000 --- a/mindsdb/interfaces/tasks/task.py +++ /dev/null @@ -1,16 +0,0 @@ -from mindsdb.interfaces.storage import db - - -class BaseTask: - - def __init__(self, task_id, object_id): - self.task_id = task_id - self.object_id = object_id - - def run(self, stop_event): - raise NotImplementedError - - def set_error(self, message): - task_record = db.Tasks.query.get(self.task_id) - task_record.last_error = str(message) - db.session.commit() diff --git a/mindsdb/interfaces/tasks/task_monitor.py b/mindsdb/interfaces/tasks/task_monitor.py deleted file mode 100644 index 6e47da3ea89..00000000000 --- a/mindsdb/interfaces/tasks/task_monitor.py +++ /dev/null @@ -1,148 +0,0 @@ -import datetime as dt -import os -import socket -import time -from threading import Event - -import sqlalchemy as sa - -from mindsdb.interfaces.storage import db -from mindsdb.utilities import log -from mindsdb.utilities.config import Config - -from .task_thread import TaskThread - -logger = log.getLogger(__name__) - - -class TaskMonitor: - MONITOR_INTERVAL_SECONDS = 2 - LOCK_EXPIRED_SECONDS = MONITOR_INTERVAL_SECONDS * 30 - - def __init__(self): - self._active_tasks = {} - - def start(self, stop_event: Event = None): - config = Config() - db.init() - self.config = config - - while True: - try: - self.check_tasks() - - db.session.rollback() # disable cache - time.sleep(self.MONITOR_INTERVAL_SECONDS) - - except (SystemExit, KeyboardInterrupt): - self.stop_all_tasks() - return - - except Exception: - logger.exception("Error in TaskMonitor.start") - db.session.rollback() - - if stop_event is not None and stop_event.is_set(): - return - - def stop_all_tasks(self): - active_tasks = list(self._active_tasks.keys()) - for task_id in active_tasks: - self.stop_task(task_id) - - def check_tasks(self): - allowed_tasks = set() - - for task in db.session.query(db.Tasks).filter(db.Tasks.active == True): # noqa - allowed_tasks.add(task.id) - - # start new tasks - if task.id not in self._active_tasks: - self.start_task(task) - - # Check active tasks - active_tasks = list(self._active_tasks.items()) - for task_id, task in active_tasks: - if task_id not in allowed_tasks: - # old task - self.stop_task(task_id) - - elif not task.is_alive(): - # dead task - self.stop_task(task_id) - - else: - # need to be reloaded ? - record = db.Tasks.query.get(task_id) - if record.reload: - record.reload = False - self.stop_task(task_id) - else: - # set alive time of running tasks - self._set_alive(task_id) - - def _lock_task(self, task): - run_by = f"{socket.gethostname()} {os.getpid()}" - db_date = db.session.query(sa.func.current_timestamp()).first()[0] - if task.run_by == run_by: - # already locked - task.alive_time = db_date - - elif task.alive_time is None: - # not locked yet - task.run_by = run_by - task.alive_time = db_date - - elif db_date - task.alive_time > dt.timedelta(seconds=self.LOCK_EXPIRED_SECONDS): - # lock expired - task.run_by = run_by - task.alive_time = db_date - - else: - return False - - db.session.commit() - return True - - def _set_alive(self, task_id): - db_date = db.session.query(sa.func.current_timestamp()).first()[0] - task = db.Tasks.query.get(task_id) - task.alive_time = db_date - db.session.commit() - - def _unlock_task(self, task_id): - task = db.Tasks.query.get(task_id) - if task is not None: - task.alive_time = None - db.session.commit() - - def start_task(self, task): - if not self._lock_task(task): - # can't lock, skip - return - - thread = TaskThread(task.id) - - thread.start() - self._active_tasks[task.id] = thread - - def stop_task(self, task_id: int): - thread = self._active_tasks[task_id] - thread.stop() - thread.join(1) - - if thread.is_alive(): - # don't delete task, wait next circle - return - - del self._active_tasks[task_id] - self._unlock_task(task_id) - - -def start(verbose=False): - monitor = TaskMonitor() - monitor.start() - - -if __name__ == "__main__": - start() diff --git a/mindsdb/interfaces/tasks/task_thread.py b/mindsdb/interfaces/tasks/task_thread.py deleted file mode 100644 index 8b9eb7ca9e5..00000000000 --- a/mindsdb/interfaces/tasks/task_thread.py +++ /dev/null @@ -1,63 +0,0 @@ -import traceback -import threading -from mindsdb.utilities.context import context as ctx -from mindsdb.interfaces.storage import db -from mindsdb.utilities import log - -from mindsdb.interfaces.triggers.trigger_task import TriggerTask -from mindsdb.interfaces.chatbot.chatbot_task import ChatBotTask -from mindsdb.interfaces.query_context.query_task import QueryTask - -logger = log.getLogger(__name__) - - -class TaskThread(threading.Thread): - def __init__(self, task_id): - threading.Thread.__init__(self) - self.task_id = task_id - self._stop_event = threading.Event() - self.object_type = None - self.object_id = None - - def run(self): - # create context and session - - task_record = db.Tasks.query.get(self.task_id) - if task_record is None: - logger.error(f"Task record not found: {self.task_id}") - return - - ctx.set_default() - ctx.company_id = task_record.company_id - ctx.user_id = task_record.user_id - if task_record.user_class is not None: - ctx.user_class = task_record.user_class - ctx.task_id = task_record.id - - self.object_type = task_record.object_type - self.object_id = task_record.object_id - - logger.info(f"Task starting: {self.object_type}.{self.object_id}") - try: - if self.object_type == "trigger": - trigger = TriggerTask(self.task_id, self.object_id) - trigger.run(self._stop_event) - - elif self.object_type == "chatbot": - bot = ChatBotTask(self.task_id, self.object_id) - bot.run(self._stop_event) - - elif self.object_type == "query": - query = QueryTask(self.task_id, self.object_id) - query.run(self._stop_event) - - except Exception: - logger.exception("Error during task processing:") - task_record.last_error = traceback.format_exc() - - db.session.commit() - - def stop(self): - logger.info(f"Task stopping: {self.object_type}.{self.object_id}") - - self._stop_event.set() diff --git a/mindsdb/interfaces/triggers/__init__.py b/mindsdb/interfaces/triggers/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/interfaces/triggers/trigger_task.py b/mindsdb/interfaces/triggers/trigger_task.py deleted file mode 100644 index a329c10a109..00000000000 --- a/mindsdb/interfaces/triggers/trigger_task.py +++ /dev/null @@ -1,89 +0,0 @@ -import copy -import traceback -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Data, Identifier -from mindsdb.integrations.utilities.query_traversal import query_traversal - -from mindsdb.interfaces.storage import db - -from mindsdb.api.executor.controllers.session_controller import SessionController -from mindsdb.api.executor.command_executor import ExecuteCommands - -from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.utilities import log -from mindsdb.interfaces.tasks.task import BaseTask -from mindsdb.utilities.context import context as ctx - -logger = log.getLogger(__name__) - - -class TriggerTask(BaseTask): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.command_executor = None - self.query = None - - # callback might be without context - self._ctx_dump = ctx.dump() - - def run(self, stop_event): - trigger = db.Triggers.query.get(self.object_id) - - # parse query - self.query = parse_sql(trigger.query_str) - - session = SessionController() - - # prepare executor - project_controller = ProjectController() - project = project_controller.get(trigger.project_id) - - session.database = project.name - - self.command_executor = ExecuteCommands(session) - - # subscribe - database = session.integration_controller.get_by_id(trigger.database_id) - data_handler = session.integration_controller.get_data_handler(database["name"]) - - columns = trigger.columns - if columns is not None: - if columns == "": - columns = None - else: - columns = columns.split("|") - - data_handler.subscribe(stop_event, self._callback, trigger.table_name, columns=columns) - - def _callback(self, row, key=None): - logger.debug(f"trigger call: {row}, {key}") - - # set up environment - ctx.load(self._ctx_dump) - - try: - if key is not None: - row.update(key) - table = [row] - - # inject data to query - query = copy.deepcopy(self.query) - - def find_table(node, is_table, **kwargs): - if is_table: - if isinstance(node, Identifier) and len(node.parts) == 1 and node.parts[0] == "TABLE_DELTA": - # replace with data - return Data(table, alias=node.alias) - - query_traversal(query, find_table) - - # exec query - ret = self.command_executor.execute_command(query) - if ret.error_code is not None: - self.set_error(ret.error_message) - - except Exception: - logger.exception("Error during trigger call processing") - self.set_error(str(traceback.format_exc())) - - db.session.commit() diff --git a/mindsdb/interfaces/triggers/triggers_controller.py b/mindsdb/interfaces/triggers/triggers_controller.py deleted file mode 100644 index 3822502a133..00000000000 --- a/mindsdb/interfaces/triggers/triggers_controller.py +++ /dev/null @@ -1,171 +0,0 @@ -from mindsdb_sql_parser.ast import Identifier - -from mindsdb_sql_parser import parse_sql, ParsingException - -from mindsdb.interfaces.storage import db -from mindsdb.interfaces.database.projects import ProjectController -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.config import config - -from mindsdb.api.executor.controllers.session_controller import SessionController - - -class TriggersController: - OBJECT_TYPE = "trigger" - - def add(self, name, project_name, table, query_str, columns=None): - if project_name is None: - project_name = config.get("default_project") - project_controller = ProjectController() - project = project_controller.get(name=project_name) - - from mindsdb.api.executor.controllers.session_controller import ( - SessionController, - ) - - session = SessionController() - - # check exists - trigger = self.get_trigger_record(name, project_name) - if trigger is not None: - raise Exception(f"Trigger already exists: {name}") - - # check table - if len(table.parts) < 2: - raise Exception(f"Database or table not found: {table}") - - table_name = Identifier(parts=table.parts[1:]).to_string() - db_name = table.parts[0] - - db_integration = session.integration_controller.get(db_name) - db_handler = session.integration_controller.get_data_handler(db_name) - - if not hasattr(db_handler, "subscribe"): - raise Exception(f"Handler {db_integration['engine']} doest support subscription") - - df = db_handler.get_tables().data_frame - column = "table_name" - if column not in df.columns: - column = df.columns[0] - tables = list(df[column]) - - # check only if tables are visible - if len(tables) > 0 and table_name not in tables: - raise Exception(f"Table {table_name} not found in {db_name}") - - columns_str = None - if columns is not None and len(columns) > 0: - # join to string with delimiter - columns_str = "|".join([col.parts[-1] for col in columns]) - - # check sql - try: - parse_sql(query_str) - except ParsingException as e: - raise ParsingException(f"Unable to parse: {query_str}: {e}") from e - - # create job record - record = db.Triggers( - name=name, - project_id=project.id, - database_id=db_integration["id"], - table_name=table_name, - query_str=query_str, - columns=columns_str, - ) - db.session.add(record) - db.session.flush() - - task_record = db.Tasks( - company_id=ctx.company_id, - user_id=ctx.user_id, - user_class=ctx.user_class, - object_type=self.OBJECT_TYPE, - object_id=record.id, - ) - db.session.add(task_record) - db.session.commit() - - def delete(self, name, project_name): - # check exists - - trigger = self.get_trigger_record(name, project_name) - if trigger is None: - raise Exception(f"Trigger doesn't exist: {name}") - - task_query = db.Tasks.query.filter( - db.Tasks.object_type == self.OBJECT_TYPE, - db.Tasks.object_id == trigger.id, - db.Tasks.company_id == ctx.company_id, - ) - if ctx.enforce_user_id: - task_query = task_query.filter(db.Tasks.user_id == ctx.user_id) - task = task_query.first() - - if task is not None: - db.session.delete(task) - - db.session.delete(trigger) - - db.session.commit() - - def get_trigger_record(self, name, project_name): - project_controller = ProjectController() - project = project_controller.get(name=project_name) - - filters = [ - db.Triggers.project_id == project.id, - db.Triggers.name == name, - db.Tasks.object_type == self.OBJECT_TYPE, - db.Tasks.company_id == ctx.company_id, - ] - if ctx.enforce_user_id: - filters.append(db.Tasks.user_id == ctx.user_id) - query = db.session.query(db.Triggers).join(db.Tasks, db.Triggers.id == db.Tasks.object_id).filter(*filters) - return query.first() - - def get_list(self, project_name=None): - session = SessionController() - - filters = [ - db.Tasks.object_type == self.OBJECT_TYPE, - db.Tasks.company_id == ctx.company_id, - ] - if ctx.enforce_user_id: - filters.append(db.Tasks.user_id == ctx.user_id) - query = ( - db.session.query( - db.Tasks.object_id, - db.Triggers.project_id, - db.Triggers.name, - db.Triggers.database_id, - db.Triggers.table_name, - db.Triggers.query_str, - db.Tasks.last_error, - ) - .join(db.Triggers, db.Triggers.id == db.Tasks.object_id) - .filter(*filters) - ) - - project_controller = ProjectController() - if project_name is not None: - project = project_controller.get(name=project_name) - query = query.filter(db.Triggers.project_id == project.id) - - database_names = {i["id"]: i["name"] for i in session.database_controller.get_list()} - - project_names = {i.id: i.name for i in project_controller.get_list()} - data = [] - for record in query: - data.append( - { - "id": record.object_id, - "project": project_names[record.project_id], - "name": record.name, - "database": database_names.get(record.database_id, "?"), - "table": record.table_name, - "query": record.query_str, - "last_error": record.last_error, - } - ) - return data diff --git a/mindsdb/interfaces/variables/__init__.py b/mindsdb/interfaces/variables/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/interfaces/variables/variables_controller.py b/mindsdb/interfaces/variables/variables_controller.py deleted file mode 100644 index 0ce0b8238cb..00000000000 --- a/mindsdb/interfaces/variables/variables_controller.py +++ /dev/null @@ -1,91 +0,0 @@ -import os -from typing import Callable - -from mindsdb_sql_parser import Function, Constant, Variable - -from mindsdb.utilities import log -from mindsdb.interfaces.storage.fs import RESOURCE_GROUP -from mindsdb.interfaces.storage.json import get_json_storage -from mindsdb.utilities.context import context as ctx - - -logger = log.getLogger(__name__) - - -ENV_VAR_PREFIX = "MDB_" - - -class VariablesController: - def __init__(self) -> None: - self._storage = get_json_storage(resource_id=0, resource_group=RESOURCE_GROUP.SYSTEM) - self._store_key = "variables" - self._data = None - - def _get_data(self) -> dict: - if self._data is None: - self._data = self._storage.get(self._store_key) - if self._data is None: - self._data = {} - return {**self._data, **ctx.params} - - def get_value(self, name: str): - data = self._get_data() - if name not in data: - raise ValueError(f"Variable {name} is not defined") - return data[name] - - def set_value(self, name: str, value): - data = self._get_data() - data[name] = value - self._storage.set(self._store_key, data) - - def _from_env(self, name: Constant) -> str: - # gets variable value from environment. - # available names are restricted by ENV_VAR_PREFIX to don't provide access to arbitrary venv variable - - var_name = name.value - if not var_name.startswith(ENV_VAR_PREFIX): - raise ValueError(f"Can access only to variable names starting with {ENV_VAR_PREFIX}") - if var_name not in os.environ: - raise ValueError(f"Environment variable {var_name} is not defined") - return os.environ[var_name] - - def _get_function(self, name: str) -> Callable: - if name == "from_env": - return self._from_env - raise ValueError(f"Function {name} is not found") - - def set_variable(self, name: str, value): - # store new value for variable in database - # if value is a function - extract value using this function - - name = name.lower() - if isinstance(value, Function): - fnc = self._get_function(value.op) - value = fnc(*value.args) - - elif isinstance(value, Constant): - value = value.value - - else: - # ignore - return - - self.set_value(name, value) - - def fill_parameters(self, var): - # recursively check input and fill Variables if they exist there - - if isinstance(var, Variable): - return self.get_value(var.value.lower()) - if isinstance(var, Function): - fnc = self._get_function(var.op) - return fnc(*var.args) - elif isinstance(var, dict): - return {key: self.fill_parameters(value) for key, value in var.items()} - elif isinstance(var, list): - return [self.fill_parameters(value) for value in var] - return var - - -variables_controller = VariablesController() diff --git a/mindsdb/metrics/__init__.py b/mindsdb/metrics/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/metrics/metrics.py b/mindsdb/metrics/metrics.py deleted file mode 100644 index 8bbdbca44d6..00000000000 --- a/mindsdb/metrics/metrics.py +++ /dev/null @@ -1,50 +0,0 @@ -from http import HTTPStatus -import functools -import time -import os - -from prometheus_client import Histogram, Summary - - -INTEGRATION_HANDLER_QUERY_TIME = Summary( - 'mindsdb_integration_handler_query_seconds', - 'How long integration handlers take to answer queries', - ('integration', 'response_type') -) - -INTEGRATION_HANDLER_RESPONSE_SIZE = Summary( - 'mindsdb_integration_handler_response_size', - 'How many rows are returned by an integration handler query', - ('integration', 'response_type') -) - -_REST_API_LATENCY = Histogram( - 'mindsdb_rest_api_latency_seconds', - 'How long REST API requests take to complete, grouped by method, endpoint, and status', - ('method', 'endpoint', 'status') -) - - -def api_endpoint_metrics(method: str, uri: str): - def decorator_metrics(endpoint_func): - @functools.wraps(endpoint_func) - def wrapper_metrics(*args, **kwargs): - if os.environ.get('PROMETHEUS_MULTIPROC_DIR', None) is None: - return endpoint_func(*args, **kwargs) - time_before_query = time.perf_counter() - try: - response = endpoint_func(*args, **kwargs) - except Exception as e: - # Still record metrics for unexpected exceptions. - elapsed_seconds = time.perf_counter() - time_before_query - api_latency_with_labels = _REST_API_LATENCY.labels( - method, uri, HTTPStatus.INTERNAL_SERVER_ERROR.value) - api_latency_with_labels.observe(elapsed_seconds) - raise e - elapsed_seconds = time.perf_counter() - time_before_query - status = response.status_code if hasattr(response, 'status_code') else HTTPStatus.OK.value - api_latency_with_labels = _REST_API_LATENCY.labels(method, uri, status) - api_latency_with_labels.observe(elapsed_seconds) - return response - return wrapper_metrics - return decorator_metrics diff --git a/mindsdb/metrics/server.py b/mindsdb/metrics/server.py deleted file mode 100644 index d138928c6d5..00000000000 --- a/mindsdb/metrics/server.py +++ /dev/null @@ -1,26 +0,0 @@ -import os - -from flask import Flask, Response -from prometheus_client import generate_latest, multiprocess, CollectorRegistry - -from mindsdb.utilities import log - -_CONTENT_TYPE_LATEST = str('text/plain; version=0.0.4; charset=utf-8') -logger = log.getLogger(__name__) - - -def init_metrics(app: Flask): - prometheus_dir = os.environ.get('PROMETHEUS_MULTIPROC_DIR', None) - if prometheus_dir is None: - logger.info("PROMETHEUS_MULTIPROC_DIR environment variable is not set. Metrics server won't be started.") - return - elif not os.path.isdir(prometheus_dir): - os.makedirs(prometheus_dir) - # See: https://prometheus.github.io/client_python/multiprocess/ - registry = CollectorRegistry() - multiprocess.MultiProcessCollector(registry) - - # It's important that the PROMETHEUS_MULTIPROC_DIR env variable is set, and the dir is empty. - @app.route('/metrics') - def metrics(): - return Response(generate_latest(registry), mimetype=_CONTENT_TYPE_LATEST) diff --git a/mindsdb/migrations/README.md b/mindsdb/migrations/README.md deleted file mode 100644 index a158bfc4709..00000000000 --- a/mindsdb/migrations/README.md +++ /dev/null @@ -1,38 +0,0 @@ - -Execution alembic should perform in mindsdb/migrations directory -with adding mindsdb folder to python path. Example: - -`cd mindsdb/migrations` - -`env PYTHONPATH=../../ alembic upgrade head` - -## Create migration - -- alembic revision --autogenerate -m - -Creating migration is required after changing database models during development process. - -## Create 'checkpoint' migration - -During the development of the app, more and more migrations are added. When running the app in a new environment, significant time is required to apply all migrations one-by-one. To avoid this, it's useful to create a new 'base' migration which: - - completely creates the current state of the DB if migrations have never been applied before (app running in a new environment) - - does nothing otherwise (app running in an existing environment) - -To create new 'checkpoint' migration: - - Create a new 'base' migration - - Edit the beginning of the `upgrade` method to prevent it from running in existing environments (see previous 'checkpoint' migrations for examples) - - Add statements at the end of the `upgrade` method to initialize required data (such as the default project) - - Clear the body of the `downgrade` method - - Set `down_revision` to the ID of the previous migration - - Set the ID of the created 'checkpoint' migration to `current_checkpoint` in `migrate.py` - -## Apply all migrations - -Migrations are applying automatically at start of application -In case when database version is newer than backend then not perform migrations. - -## Manual manage database version -- alembic upgrade +2 -- alembic downgrade -1 -- alembic upgrade -- and other alembic commands diff --git a/mindsdb/migrations/__init__.py b/mindsdb/migrations/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/migrations/alembic.ini b/mindsdb/migrations/alembic.ini deleted file mode 100644 index e113ad78d37..00000000000 --- a/mindsdb/migrations/alembic.ini +++ /dev/null @@ -1,65 +0,0 @@ -# A generic, single database configuration. - -[alembic] -# path to migration scripts -script_location = ./ - -# template used to generate migration files -file_template = %%(year)d-%%(month).2d-%%(day).2d_%%(rev)s_%%(slug)s - -# sys.path path, will be prepended to sys.path if present. -# defaults to the current working directory. -prepend_sys_path = . - -# timezone to use when rendering the date within the migration file -# as well as the filename. -# If specified, requires the python-dateutil library that can be -# installed by adding `alembic[tz]` to the pip requirements -# string value is passed to dateutil.tz.gettz() -# leave blank for localtime -# timezone = - -# max length of characters to apply to the -# "slug" field -# truncate_slug_length = 40 - -# set to 'true' to run the environment during -# the 'revision' command, regardless of autogenerate -# revision_environment = false - -# set to 'true' to allow .pyc and .pyo files without -# a source .py file to be detected as revisions in the -# versions/ directory -# sourceless = false - -# version location specification; This defaults -# to migrations1/versions. When using multiple version -# directories, initial revisions must be specified with --version-path. -# The path separator used here should be the separator specified by "version_path_separator" -# version_locations = %(here)s/bar:%(here)s/bat:migrations1/versions - -# version path separator; As mentioned above, this is the character used to split -# version_locations. Valid values are: -# -# version_path_separator = : -# version_path_separator = ; -# version_path_separator = space -version_path_separator = os # default: use os.pathsep - -# the output encoding used when revision files -# are written from script.py.mako -# output_encoding = utf-8 - -#sqlalchemy.url = driver://user:pass@localhost/dbname - - -[post_write_hooks] -# post_write_hooks defines scripts or Python functions that are run -# on newly generated revision scripts. See the documentation for further -# detail and examples - -# format using "black" - use the console_scripts runner, against the "black" entrypoint -# hooks = black -# black.type = console_scripts -# black.entrypoint = black -# black.options = -l 79 REVISION_SCRIPT_FILENAME diff --git a/mindsdb/migrations/env.py b/mindsdb/migrations/env.py deleted file mode 100644 index 4cd1de0ed85..00000000000 --- a/mindsdb/migrations/env.py +++ /dev/null @@ -1,79 +0,0 @@ -from alembic import context -from sqlalchemy import engine_from_config, pool - -from mindsdb.interfaces.storage import db -from mindsdb.utilities.config import config as app_config - -# this is the Alembic Config object, which provides -# access to the values within the .ini file in use. -config = context.config - -# add your model's MetaData object here -# for 'autogenerate' support -# from myapp import mymodel -# target_metadata = mymodel.Base.metadata - -# initialize - -db.init() - -target_metadata = db.Base.metadata - -config.set_main_option("sqlalchemy.url", app_config["storage_db"]) - - -# other values from the config, defined by the needs of env.py, -# can be acquired: -# my_important_option = config.get_main_option("my_important_option") -# ... etc. - - -def run_migrations_offline(): - """Run migrations in 'offline' mode. - - This configures the context with just a URL - and not an Engine, though an Engine is acceptable - here as well. By skipping the Engine creation - we don't even need a DBAPI to be available. - - Calls to context.execute() here emit the given string to the - script output. - - """ - url = config.get_main_option("sqlalchemy.url") - context.configure( - url=url, - target_metadata=target_metadata, - literal_binds=True, - dialect_opts={"paramstyle": "named"}, - render_as_batch=True, - ) - - with context.begin_transaction(): - context.run_migrations() - - -def run_migrations_online(): - """Run migrations in 'online' mode. - - In this scenario we need to create an Engine - and associate a connection with the context. - - """ - connectable = engine_from_config( - config.get_section(config.config_ini_section), - prefix="sqlalchemy.", - poolclass=pool.NullPool, - ) - - with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata, render_as_batch=True) - - with context.begin_transaction(): - context.run_migrations() - - -if context.is_offline_mode(): - run_migrations_offline() -else: - run_migrations_online() diff --git a/mindsdb/migrations/migrate.py b/mindsdb/migrations/migrate.py deleted file mode 100644 index 2ac55b127a2..00000000000 --- a/mindsdb/migrations/migrate.py +++ /dev/null @@ -1,91 +0,0 @@ -from pathlib import Path - -from alembic.command import upgrade, autogen # noqa -from alembic.config import Config -from alembic.script import ScriptDirectory -from alembic.script.revision import ResolutionError -from alembic.operations import Operations -from alembic.migration import MigrationContext -from alembic import util - -import mindsdb.interfaces.storage.db as db -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -# This is a migration that is like a 'base version'. Applying only this -# migration to a fresh DB is equivalent to applying all previous migrations. -current_checkpoint = "9f150e4f9a05" - - -def apply_checkpoint_migration(script) -> None: - """Apply the checkpoint migration to the database.""" - with db.engine.begin() as connection: - context = MigrationContext.configure( - connection, - opts={ - "as_sql": False, - "starting_rev": None, # ignore current version - "destination_rev": current_checkpoint, - }, - ) - revision = script.get_revision(current_checkpoint) - if not revision: - raise util.CommandError(f"Migration {current_checkpoint} not found.") - - op = Operations(context) - revision.module.upgrade(op) - context.stamp(script, current_checkpoint) - connection.commit() - - -def get_current_revision() -> str | None: - """Get the current revision of the database. - - Returns: - str | None: The current revision of the database. - """ - with db.engine.begin() as conn: - mc = MigrationContext.configure(conn) - return mc.get_current_revision() - - -def migrate_to_head(): - """Trying to update database to head revision. - If alembic unable to recognize current revision (In case when database version is newer than backend) - then do nothing. - """ - logger.debug("Applying database migrations") - - config_file = Path(__file__).parent / "alembic.ini" - config = Config(config_file) - - # mindsdb can runs not from project directory - script_location_abc = config_file.parent / config.get_main_option("script_location") - config.set_main_option("script_location", str(script_location_abc)) - - script = ScriptDirectory.from_config(config) - cur_revision = get_current_revision() - if cur_revision is None: - apply_checkpoint_migration(script) - cur_revision = get_current_revision() - - try: - script.revision_map.get_revision(cur_revision) - except ResolutionError: - raise Exception("Database version higher than application.") - - head_rev = script.get_current_head() - if cur_revision == head_rev: - logger.debug("The database is in its current state, no updates are required.") - return - - logger.info("Migrations are available. Applying updates to the database.") - upgrade(config=config, revision="head") - - -if __name__ == "__main__": - # have to import this because - # all env initialization happens here - db.init() - migrate_to_head() diff --git a/mindsdb/migrations/script.py.mako b/mindsdb/migrations/script.py.mako deleted file mode 100644 index 25a7a78c4d4..00000000000 --- a/mindsdb/migrations/script.py.mako +++ /dev/null @@ -1,26 +0,0 @@ -"""${message} - -Revision ID: ${up_revision} -Revises: ${down_revision | comma,n} -Create Date: ${create_date} - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -${imports if imports else ""} - -# revision identifiers, used by Alembic. -revision = ${repr(up_revision)} -down_revision = ${repr(down_revision)} -branch_labels = ${repr(branch_labels)} -depends_on = ${repr(depends_on)} - - -def upgrade(): - ${upgrades if upgrades else "pass"} - - -def downgrade(): - ${downgrades if downgrades else "pass"} diff --git a/mindsdb/migrations/versions/2021-11-30_17c3d2384711_init.py b/mindsdb/migrations/versions/2021-11-30_17c3d2384711_init.py deleted file mode 100644 index 0d6f7e1cd30..00000000000 --- a/mindsdb/migrations/versions/2021-11-30_17c3d2384711_init.py +++ /dev/null @@ -1,178 +0,0 @@ -import datetime - -from alembic.autogenerate import produce_migrations, render, api -from alembic import context -from sqlalchemy import UniqueConstraint -from sqlalchemy.orm import declarative_base -from sqlalchemy import Column, Integer, String, DateTime, Boolean, Index - -# required for code execution -from alembic import op # noqa -import sqlalchemy as sa # noqa - -import mindsdb.interfaces.storage.db # noqa -from mindsdb.interfaces.storage.db import Json, Array -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -# revision identifiers, used by Alembic. -revision = '17c3d2384711' -down_revision = None -branch_labels = None -depends_on = None - -# ========================================== current database state ======================================== - - -class Base: - __allow_unmapped__ = True - - -Base = declarative_base(cls=Base) - -# Source: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable - - -class Semaphor(Base): - __tablename__ = 'semaphor' - - id = Column(Integer, primary_key=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - entity_type = Column('entity_type', String) - entity_id = Column('entity_id', Integer) - action = Column(String) - company_id = Column(Integer) - uniq_const = UniqueConstraint('entity_type', 'entity_id') - - -class Datasource(Base): - __tablename__ = 'datasource' - - id = Column(Integer, primary_key=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - name = Column(String) - data = Column(String) # Including, e.g. the query used to create it and even the connection info when there's no integration associated with it -- A JSON - creation_info = Column(String) - analysis = Column(String) # A JSON - company_id = Column(Integer) - mindsdb_version = Column(String) - datasources_version = Column(String) - integration_id = Column(Integer) - - -class Predictor(Base): - __tablename__ = 'predictor' - - id = Column(Integer, primary_key=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - name = Column(String) - data = Column(Json) # A JSON -- should be everything returned by `get_model_data`, I think - to_predict = Column(Array) - company_id = Column(Integer) - mindsdb_version = Column(String) - native_version = Column(String) - datasource_id = Column(Integer) - is_custom = Column(Boolean) # to del - learn_args = Column(Json) - update_status = Column(String, default='up_to_date') - - json_ai = Column(Json, nullable=True) - code = Column(String, nullable=True) - lightwood_version = Column(String, nullable=True) - dtype_dict = Column(Json, nullable=True) - - -class AITable(Base): - __tablename__ = 'ai_table' - id = Column(Integer, primary_key=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - name = Column(String) - integration_name = Column(String) - integration_query = Column(String) - query_fields = Column(Json) - predictor_name = Column(String) - predictor_columns = Column(Json) - company_id = Column(Integer) - - -class Log(Base): - __tablename__ = 'log' - - id = Column(Integer, primary_key=True) - created_at = Column(DateTime, default=datetime.datetime.now) - log_type = Column(String) # log, info, warning, traceback etc - source = Column(String) # file + line - company_id = Column(Integer) - payload = Column(String) - created_at_index = Index("some_index", "created_at_index") - - -class Integration(Base): - __tablename__ = 'integration' - id = Column(Integer, primary_key=True) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - name = Column(String, nullable=False) - data = Column(Json) - company_id = Column(Integer) - - -class Stream(Base): - __tablename__ = 'stream' - id = Column(Integer, primary_key=True) - name = Column(String, nullable=False) - stream_in = Column(String, nullable=False) - stream_out = Column(String, nullable=False) - anomaly_stream = Column(String) - integration = Column(String) - predictor = Column(String, nullable=False) - company_id = Column(Integer) - updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) - created_at = Column(DateTime, default=datetime.datetime.now) - type = Column(String, default='unknown') - connection_info = Column(Json, default={}) - learning_params = Column(Json, default={}) - learning_threshold = Column(Integer, default=0) - - -# ==================================================================================================== - - -def upgrade(): - ''' - First migration. - Generates a migration script by difference between model and database and executes it - ''' - - target_metadata = Base.metadata - - mc = context.get_context() - - migration_script = produce_migrations(mc, target_metadata) - - autogen_context = api.AutogenContext( - mc, autogenerate=True - ) - - # Seems to be the only way to apply changes to the database - template_args = {} - render._render_python_into_templatevars( - autogen_context, migration_script, template_args - ) - - code = template_args['upgrades'] - code = code.replace('\n ', '\n') - logger.info('\nPerforming database changes:') - logger.info(code) - exec(code) - - -def downgrade(): - - # We don't know state to downgrade - raise NotImplementedError() diff --git a/mindsdb/migrations/versions/2022-01-26_47f97b83cee4_views.py b/mindsdb/migrations/versions/2022-01-26_47f97b83cee4_views.py deleted file mode 100644 index 5dae5258f96..00000000000 --- a/mindsdb/migrations/versions/2022-01-26_47f97b83cee4_views.py +++ /dev/null @@ -1,38 +0,0 @@ -"""views - -Revision ID: 47f97b83cee4 -Revises: 17c3d2384711 -Create Date: 2022-01-26 12:07:05.075977 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '47f97b83cee4' -down_revision = '17c3d2384711' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'view', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('query', sa.String(), nullable=False), - sa.Column('datasource_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint(['datasource_id'], ['datasource.id'], ), - sa.PrimaryKeyConstraint('id') - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('view') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2022-02-09_27c5aca9e47e_db_files.py b/mindsdb/migrations/versions/2022-02-09_27c5aca9e47e_db_files.py deleted file mode 100644 index 566298a77cf..00000000000 --- a/mindsdb/migrations/versions/2022-02-09_27c5aca9e47e_db_files.py +++ /dev/null @@ -1,256 +0,0 @@ -"""db files - -Revision ID: 27c5aca9e47e -Revises: 47f97b83cee4 -Create Date: 2022-02-09 10:43:29.854671 - -""" -import json -import datetime - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db -from sqlalchemy.sql import text - - -# revision identifiers, used by Alembic. -revision = '27c5aca9e47e' -down_revision = '47f97b83cee4' -branch_labels = None -depends_on = None - - -def upgrade(): - op.drop_table('ai_table') - - conn = op.get_bind() - - # views was created with unnamed fk. Therefore need recreate it - op.create_table( - 'view_tmp', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('query', sa.String(), nullable=False), - sa.Column('integration_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint(['integration_id'], ['integration.id'], name='fk_integration_id'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_view_name_company_id') - ) - conn.execute(text(""" - insert into view_tmp (id, name, company_id, query, integration_id) - select id, name, company_id, query, datasource_id from view; - """)) - op.drop_table('view') - op.rename_table('view_tmp', 'view') - - op.create_table( - 'analysis', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('analysis', mindsdb.interfaces.storage.db.Json(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - with op.batch_alter_table('datasource', schema=None) as batch_op: - batch_op.add_column(sa.Column('analysis_id', sa.Integer(), nullable=True)) - batch_op.create_foreign_key('fk_ds_analysis_id', 'analysis', ['analysis_id'], ['id']) - batch_op.add_column(sa.Column('ds_class', sa.String(), nullable=True)) - - session = sa.orm.Session(bind=conn) - dsatasources = conn.execute(sa.text('select id, analysis from datasource')).fetchall() - for row in dsatasources: - if row['analysis'] is not None: - # NOTE 'returning' is relatively new in sqlite, so better will be use select after insert. - conn.execute( - text(""" - insert into analysis (analysis) select analysis from datasource where id = :id; - """), { - 'id': row['id'] - } - ) - analysis_id = conn.execute(text(""" - select id from analysis order by id desc limit 1; - """)).fetchall() - conn.execute( - text(""" - update datasource set analysis_id = :analysis_id where id = :id - """), { - 'analysis_id': analysis_id[0][0], - 'id': row['id'] - } - ) - - with op.batch_alter_table('datasource', schema=None) as batch_op: - batch_op.drop_column('analysis') - - op.create_table( - 'file', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('source_file_path', sa.String(), nullable=False), - sa.Column('file_path', sa.String(), nullable=False), - sa.Column('row_count', sa.Integer(), nullable=False), - sa.Column('columns', mindsdb.interfaces.storage.db.Json(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True, server_default=sa.func.current_timestamp()), - sa.Column('updated_at', sa.DateTime(), nullable=True, server_default=sa.func.current_timestamp(), server_onupdate=sa.func.current_timestamp()), - sa.Column('analysis_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['analysis_id'], ['analysis.id'], name='fk_analysis_id'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_file_name_company_id') - ) - - # delete ds where data is none - dsatasources = conn.execute(text('select * from datasource')).fetchall() - for ds in dsatasources: - if ds['data'] is None: - conn.execute(text('delete from datasource where id = :id'), {'id': ds['id']}) - continue - ds_data = json.loads(ds['data']) - creation_info = json.loads(ds['creation_info']) - datasource_name = ds_data.get('source_type') - if datasource_name == 'file': - created_at = None - if isinstance(ds['created_at'], str): - created_at = datetime.datetime.fromisoformat(ds['created_at']) - elif isinstance(ds['created_at'], [float, int]): - created_at = datetime.fromtimestamp(ds['created_at']) - - updated_at = None - if isinstance(ds['updated_at'], str): - updated_at = datetime.datetime.fromisoformat(ds['updated_at']) - elif isinstance(ds['updated_at'], [float, int]): - updated_at = datetime.fromtimestamp(ds['updated_at']) - - file = mindsdb.interfaces.storage.db.File( - name=ds['name'], - company_id=ds['company_id'], - source_file_path=ds_data['source'], - file_path=creation_info['args'][0], - row_count=ds_data['row_count'], - columns=ds_data['columns'], - created_at=created_at, - updated_at=updated_at, - analysis_id=ds['analysis_id'] - ) - session.add(file) - session.flush() - # ds_data['file_id'] = file.id - ds_data['source'] = { - 'mindsdb_file_name': ds['name'] - # 'source': ds_data['source'] - } - conn.execute( - text(""" - update datasource set data = :ds_data where id = :id; - """), { - 'id': ds['id'], - 'ds_data': json.dumps(ds_data) - } - ) - - conn.execute( - text(""" - update datasource - set integration_id = (select id from integration where name = :datasource_name and company_id = :company_id), - ds_class = :ds_class - where id = :id - """), { - 'datasource_name': datasource_name, - 'company_id': ds['company_id'], - 'ds_class': creation_info['class'], - 'id': ds['id'] - } - ) - - session.commit() - - op.rename_table('datasource', 'dataset') - - with op.batch_alter_table('dataset', schema=None) as batch_op: - batch_op.create_foreign_key('fk_integration_id', 'integration', ['integration_id'], ['id']) - - # NOTE two different 'batch' is necessary, in other way FK is not creating - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.alter_column('datasource_id', new_column_name='dataset_id') - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.create_foreign_key('fk_predictor_dataset_id', 'dataset', ['dataset_id'], ['id']) - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.create_unique_constraint('unique_predictor_name_company_id', ['name', 'company_id']) - - with op.batch_alter_table('integration', schema=None) as batch_op: - batch_op.create_unique_constraint('unique_integration_name_company_id', ['name', 'company_id']) - - with op.batch_alter_table('dataset', schema=None) as batch_op: - batch_op.create_unique_constraint('unique_dataset_name_company_id', ['name', 'company_id']) - - -def downgrade(): - with op.batch_alter_table('integration', schema=None) as batch_op: - batch_op.drop_constraint('unique_integration_name_company_id', type_='unique') - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_constraint('unique_predictor_name_company_id', type_='unique') - - with op.batch_alter_table('dataset', schema=None) as batch_op: - batch_op.drop_constraint('unique_dataset_name_company_id', type_='unique') - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_constraint('fk_predictor_dataset_id', type_='foreignkey') - batch_op.alter_column('dataset_id', new_column_name='datasource_id') - - with op.batch_alter_table('dataset', schema=None) as batch_op: - batch_op.drop_constraint('fk_integration_id', type_='foreignkey') - batch_op.add_column(sa.Column('analysis', sa.VARCHAR(), nullable=True)) - batch_op.drop_constraint('fk_ds_analysis_id', type_='foreignkey') - batch_op.drop_column('ds_class') - - op.rename_table('dataset', 'datasource') - - op.drop_table('file') - - conn = op.get_bind() - conn.execute(text(""" - update datasource set analysis = (select analysis from analysis where id = analysis_id) - """)) - - with op.batch_alter_table('datasource', schema=None) as batch_op: - batch_op.drop_column('analysis_id') - - op.drop_table('analysis') - - op.create_table( - 'ai_table', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('integration_name', sa.String(), nullable=False), - sa.Column('integration_query', sa.String(), nullable=False), - sa.Column('query_fields', mindsdb.interfaces.storage.db.Json(), nullable=False), - sa.Column('predictor_name', sa.String(), nullable=False), - sa.Column('predictor_columns', mindsdb.interfaces.storage.db.Json(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - op.create_table( - 'view_tmp', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('query', sa.String(), nullable=False), - sa.Column('datasource_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint(['datasource_id'], ['integration.id'], name='fk_datasource_id'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_name_company_id') - ) - conn.execute(text(""" - insert into view_tmp (id, name, company_id, query, datasource_id) - select id, name, company_id, query, integration_id from view; - """)) - op.drop_table('view') - op.rename_table('view_tmp', 'view') diff --git a/mindsdb/migrations/versions/2022-05-25_d74c189b87e6_predictor_integration.py b/mindsdb/migrations/versions/2022-05-25_d74c189b87e6_predictor_integration.py deleted file mode 100644 index 03124d449ff..00000000000 --- a/mindsdb/migrations/versions/2022-05-25_d74c189b87e6_predictor_integration.py +++ /dev/null @@ -1,156 +0,0 @@ -"""predictor-integration - -Revision ID: d74c189b87e6 -Revises: 27c5aca9e47e -Create Date: 2022-05-25 15:00:16.284158 - -""" -import json - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - - -# revision identifiers, used by Alembic. -revision = 'd74c189b87e6' -down_revision = '27c5aca9e47e' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('integration_id', sa.Integer(), nullable=True)) - batch_op.add_column(sa.Column('fetch_data_query', sa.String(), nullable=True)) - batch_op.create_foreign_key('fk_integration_id', 'integration', ['integration_id'], ['id']) - - conn = op.get_bind() - - conn.execute(text(''' - insert into integration (name, data, company_id, created_at, updated_at) - select - 'files' as name, - '{}' as data, - company_id, - '2022-05-01 00:00:00.000000' as created_at, - '2022-05-01 00:00:00.000000' as updated_at - from (select distinct company_id from integration) t1 - ''')) - - predictors = conn.execute(text(''' - select t1.id, t1.company_id, t2.data - from predictor t1 left join dataset t2 on t1.id = t2.id - where dataset_id is not null and t2.data is not null - ''')).fetchall() - for row in predictors: - data = row['data'] - try: - data = json.loads(data) - except Exception: - continue - - if 'source_type' not in data: - continue - integration_name = data.get('source_type') - if isinstance(integration_name, str) is False or len(integration_name) == 0: - continue - if integration_name.lower() == 'file': - integration_name = 'files' - - fetch_data_query = data.get('source') - if isinstance(fetch_data_query, dict) is False: - continue - - if integration_name == 'files': - file_name = fetch_data_query.get('mindsdb_file_name') - if isinstance(file_name, str) is False or len(file_name) == 0: - continue - fetch_data_query = f'select * from {file_name}' - else: - fetch_data_query = fetch_data_query.get('query') - if isinstance(fetch_data_query, str) is False or len(fetch_data_query) == 0: - continue - - query = ''' - select id - from integration - where company_id = :company_id and lower(name) = lower(:name) - ''' - if row['company_id'] is None: - query = ''' - select id - from integration - where company_id is null and lower(name) = lower(:name) - ''' - integration = conn.execute(text(query), { - 'company_id': row['company_id'], - 'name': integration_name - }).fetchone() - if integration is None: - continue - - conn.execute(text(''' - update predictor - set integration_id = :integration_id, fetch_data_query = :fetch_data_query - where id = :predictor_id - '''), { - 'integration_id': integration.id, - 'fetch_data_query': fetch_data_query, - 'predictor_id': row['id'] - }) - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('dataset_id') - with op.batch_alter_table('file', schema=None) as batch_op: - batch_op.drop_constraint('unique_file_name_company_id', type_='unique') - batch_op.drop_constraint('fk_analysis_id', type_='foreignkey') - batch_op.drop_column('analysis_id') - with op.batch_alter_table('dataset', schema=None) as batch_op: - batch_op.drop_constraint('fk_ds_analysis_id', type_='foreignkey') - batch_op.drop_column('analysis_id') - op.drop_table('analysis') - op.drop_table('dataset') - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('dataset_id', sa.INTEGER(), nullable=True)) - batch_op.drop_constraint('fk_integration_id', type_='foreignkey') - batch_op.create_foreign_key('fk_predictor_dataset_id', 'dataset', ['dataset_id'], ['id']) - batch_op.drop_column('fetch_data_query') - batch_op.drop_column('integration_id') - - op.create_table( - 'analysis', - sa.Column('id', sa.INTEGER(), nullable=False), - sa.Column('analysis', sa.VARCHAR(), nullable=False), - sa.Column('created_at', sa.DATETIME(), nullable=True), - sa.Column('updated_at', sa.DATETIME(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - op.create_table( - 'dataset', - sa.Column('id', sa.INTEGER(), nullable=False), - sa.Column('updated_at', sa.DATETIME(), nullable=True), - sa.Column('created_at', sa.DATETIME(), nullable=True), - sa.Column('name', sa.VARCHAR(), nullable=True), - sa.Column('data', sa.VARCHAR(), nullable=True), - sa.Column('creation_info', sa.VARCHAR(), nullable=True), - sa.Column('company_id', sa.INTEGER(), nullable=True), - sa.Column('mindsdb_version', sa.VARCHAR(), nullable=True), - sa.Column('datasources_version', sa.VARCHAR(), nullable=True), - sa.Column('integration_id', sa.INTEGER(), nullable=True), - sa.Column('analysis_id', sa.INTEGER(), nullable=True), - sa.Column('ds_class', sa.VARCHAR(), nullable=True), - sa.ForeignKeyConstraint(['analysis_id'], ['analysis.id'], name='fk_ds_analysis_id'), - sa.ForeignKeyConstraint(['integration_id'], ['integration.id'], name='fk_integration_id'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_dataset_name_company_id') - ) - - with op.batch_alter_table('file', schema=None) as batch_op: - batch_op.add_column(sa.Column('analysis_id', sa.INTEGER(), nullable=True)) - batch_op.create_foreign_key('fk_analysis_id', 'analysis', ['analysis_id'], ['id']) - batch_op.create_unique_constraint('unique_file_name_company_id', ['name', 'company_id']) diff --git a/mindsdb/migrations/versions/2022-07-08_999bceb904df_integration_args.py b/mindsdb/migrations/versions/2022-07-08_999bceb904df_integration_args.py deleted file mode 100644 index f4ccb539e75..00000000000 --- a/mindsdb/migrations/versions/2022-07-08_999bceb904df_integration_args.py +++ /dev/null @@ -1,106 +0,0 @@ -"""integration-args - -Revision ID: 999bceb904df -Revises: d74c189b87e6 -Create Date: 2022-07-08 10:58:19.822618 - -""" -import json - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '999bceb904df' -down_revision = 'd74c189b87e6' -branch_labels = None -depends_on = None - - -def upgrade(): - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - with op.batch_alter_table('integration', schema=None) as batch_op: - batch_op.add_column(sa.Column('engine', sa.String())) - - integrations = conn.execute(text(''' - select id, name, data from integration - ''')).fetchall() - - for row in integrations: - try: - data = json.loads(row['data']) - except Exception: - data = {} - - if 'test' in data: - del data['test'] - if 'publish' in data: - del data['publish'] - if 'enabled' in data: - del data['enabled'] - if 'database_name' in data: - if row['name'] is None: - row['name'] = data['database_name'] - del data['database_name'] - integration_type = data.get('type') - if integration_type is None: - if row['name'] == 'files': - integration_type = 'files' - if row['name'] == 'views': - integration_type = 'views' - if 'type' in data: - del data['type'] - - conn.execute( - text(""" - update integration - set engine = :integration_type, - data = :integration_data - where id = :integration_id - """), { - 'integration_type': integration_type, - 'integration_data': json.dumps(data), - 'integration_id': row['id'] - } - ) - - session.commit() - - -def downgrade(): - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - integrations = conn.execute(sa.text(''' - select id, name, type, data from integration - ''')).fetchall() - - for row in integrations: - try: - data = json.loads(row['data']) - except Exception: - data = {} - if row['engine'] is not None: - data['type'] = row['engine'] - - conn.execute( - text(""" - update integration - set data = :integration_data - where id = :integration_id - """), { - 'integration_data': json.dumps(data), - 'integration_id': row['id'] - } - ) - - with op.batch_alter_table('integration', schema=None) as batch_op: - batch_op.drop_column('engine') - - session.commit() diff --git a/mindsdb/migrations/versions/2022-07-15_b5b53e0ea7f8_training_data_rows_columns_count.py b/mindsdb/migrations/versions/2022-07-15_b5b53e0ea7f8_training_data_rows_columns_count.py deleted file mode 100644 index dd1c6aca899..00000000000 --- a/mindsdb/migrations/versions/2022-07-15_b5b53e0ea7f8_training_data_rows_columns_count.py +++ /dev/null @@ -1,29 +0,0 @@ -"""training_data_rows_columns_count - -Revision ID: b5b53e0ea7f8 -Revises: 999bceb904df -Create Date: 2022-07-15 12:21:40.523039 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = 'b5b53e0ea7f8' -down_revision = '999bceb904df' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('training_data_columns_count', sa.Integer(), nullable=True)) - batch_op.add_column(sa.Column('training_data_rows_count', sa.Integer(), nullable=True)) - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('training_data_rows_count') - batch_op.drop_column('training_data_columns_count') diff --git a/mindsdb/migrations/versions/2022-07-22_6e834843e7e9_training_time.py b/mindsdb/migrations/versions/2022-07-22_6e834843e7e9_training_time.py deleted file mode 100644 index f79c54170c2..00000000000 --- a/mindsdb/migrations/versions/2022-07-22_6e834843e7e9_training_time.py +++ /dev/null @@ -1,33 +0,0 @@ -"""training_time - -Revision ID: 6e834843e7e9 -Revises: b5b53e0ea7f8 -Create Date: 2022-07-22 13:32:34.796604 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '6e834843e7e9' -down_revision = 'b5b53e0ea7f8' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('training_start_at', sa.DateTime(), nullable=True)) - batch_op.add_column(sa.Column('training_stop_at', sa.DateTime(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('training_stop_at') - batch_op.drop_column('training_start_at') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2022-08-19_976f15a37e6a_predictors_versioning.py b/mindsdb/migrations/versions/2022-08-19_976f15a37e6a_predictors_versioning.py deleted file mode 100644 index a941888ee69..00000000000 --- a/mindsdb/migrations/versions/2022-08-19_976f15a37e6a_predictors_versioning.py +++ /dev/null @@ -1,42 +0,0 @@ -"""predictors-versioning - -Revision ID: 976f15a37e6a -Revises: 6e834843e7e9 -Create Date: 2022-08-19 11:22:52.085339 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '976f15a37e6a' -down_revision = '6e834843e7e9' -branch_labels = None -depends_on = None - - -def upgrade(): - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('deleted_at', sa.DateTime(), nullable=True)) - batch_op.add_column(sa.Column('active', sa.Boolean(), nullable=True)) - - conn.execute(text(''' - update predictor set active = :val; - '''), { - 'val': True - }) - - session.commit() - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('active') - batch_op.drop_column('deleted_at') diff --git a/mindsdb/migrations/versions/2022-08-25_6a54ba55872e_view_integration.py b/mindsdb/migrations/versions/2022-08-25_6a54ba55872e_view_integration.py deleted file mode 100644 index 8f6b423d4c4..00000000000 --- a/mindsdb/migrations/versions/2022-08-25_6a54ba55872e_view_integration.py +++ /dev/null @@ -1,36 +0,0 @@ -"""view_integration - -Revision ID: 6a54ba55872e -Revises: 6e834843e7e9 -Create Date: 2022-08-25 14:52:44.269082 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '6a54ba55872e' -down_revision = '976f15a37e6a' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - - with op.batch_alter_table('view', schema=None) as batch_op: - batch_op.drop_constraint('fk_integration_id', type_='foreignkey') - batch_op.drop_column('integration_id') - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('view', schema=None) as batch_op: - batch_op.add_column(sa.Column('integration_id', sa.INTEGER(), autoincrement=False, nullable=False)) - batch_op.create_foreign_key('fk_integration_id', 'integration', ['integration_id'], ['id']) - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2022-08-29_473e8f239481_straighten.py b/mindsdb/migrations/versions/2022-08-29_473e8f239481_straighten.py deleted file mode 100644 index d9937352e0d..00000000000 --- a/mindsdb/migrations/versions/2022-08-29_473e8f239481_straighten.py +++ /dev/null @@ -1,81 +0,0 @@ -"""straighten - -Revision ID: 473e8f239481 -Revises: 6a54ba55872e -Create Date: 2022-08-29 11:12:11.307317 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '473e8f239481' -down_revision = '6a54ba55872e' -branch_labels = None -depends_on = None - - -def upgrade(): - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - conn.execute(text(''' - delete from file - where exists ( - select 1 from file as t2 - where file.name = t2.name - and file.company_id = t2.company_id - and file.id < t2.id - ) - ''')) - session.commit() - - with op.batch_alter_table('file', schema=None) as batch_op: - batch_op.create_unique_constraint('unique_file_name_company_id', ['name', 'company_id']) - - conn.execute(text(''' - delete from integration where engine is null - ''')) - session.commit() - - with op.batch_alter_table('integration', schema=None) as batch_op: - batch_op.alter_column( - 'engine', - existing_type=sa.VARCHAR(), - nullable=False - ) - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_constraint('unique_predictor_name_company_id', type_='unique') - - conn.execute(text(''' - delete from semaphor - ''')) - session.commit() - - try: - with op.batch_alter_table('semaphor', schema=None) as batch_op: - batch_op.create_unique_constraint('uniq_const', ['entity_type', 'entity_id']) - except Exception: - pass - - -def downgrade(): - with op.batch_alter_table('semaphor', schema=None) as batch_op: - batch_op.drop_constraint('uniq_const', type_='unique') - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.create_unique_constraint('unique_predictor_name_company_id', ['name', 'company_id']) - - with op.batch_alter_table('integration', schema=None) as batch_op: - batch_op.alter_column( - 'engine', - existing_type=sa.VARCHAR(), - nullable=True - ) - - with op.batch_alter_table('file', schema=None) as batch_op: - batch_op.drop_constraint('unique_file_name_company_id', type_='unique') diff --git a/mindsdb/migrations/versions/2022-09-06_96d5fef10caa_data_integration_id.py b/mindsdb/migrations/versions/2022-09-06_96d5fef10caa_data_integration_id.py deleted file mode 100644 index ec44f2a175b..00000000000 --- a/mindsdb/migrations/versions/2022-09-06_96d5fef10caa_data_integration_id.py +++ /dev/null @@ -1,69 +0,0 @@ -"""data_integration_id - -Revision ID: 96d5fef10caa -Revises: 473e8f239481 -Create Date: 2022-09-06 15:20:02.382203 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '96d5fef10caa' -down_revision = '473e8f239481' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('data_integration_id', sa.Integer(), nullable=True)) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - result = conn.execute(text(''' - select 1 from integration where name = 'lightwood'; - ''')).fetchall() - if len(result) == 0: - conn.execute(text(''' - insert into integration (name, engine, data) - values ('lightwood', 'lightwood', '{}') - ''')) - conn.execute(text(''' - update predictor set data_integration_id = integration_id - where exists (select 1 from integration where integration.id = predictor.integration_id); - ''')) - conn.execute(text(''' - update predictor set integration_id = (select id from integration where name = 'lightwood'); - ''')) - session.commit() - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.alter_column( - 'integration_id', - existing_type=sa.INTEGER(), - nullable=False - ) - batch_op.create_foreign_key('fk_data_integration_id', 'integration', ['data_integration_id'], ['id']) - - -def downgrade(): - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - conn.execute(text(''' - update predictor set integration_id = data_integration_id; - ''')) - session.commit() - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_constraint('fk_data_integration_id', type_='foreignkey') - batch_op.alter_column( - 'integration_id', - existing_type=sa.INTEGER(), - nullable=True - ) - batch_op.drop_column('data_integration_id') diff --git a/mindsdb/migrations/versions/2022-09-08_87b2df2b83e1_predictor_status.py b/mindsdb/migrations/versions/2022-09-08_87b2df2b83e1_predictor_status.py deleted file mode 100644 index d1ffea6ab99..00000000000 --- a/mindsdb/migrations/versions/2022-09-08_87b2df2b83e1_predictor_status.py +++ /dev/null @@ -1,74 +0,0 @@ -"""predictor-status - -Revision ID: 87b2df2b83e1 -Revises: 96d5fef10caa -Create Date: 2022-09-08 14:47:45.238710 - -""" -import json - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '87b2df2b83e1' -down_revision = '96d5fef10caa' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('status', sa.String(), nullable=True)) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - predictors = conn.execute(text(''' - select id, data, update_status, json_ai, code from predictor - ''')).fetchall() - - for row in predictors: - try: - data = json.loads(row['data']) - except Exception: - data = None - - status = None - # assume older models are complete, only temporary - if 'status' in (data or {}): - status = data['status'] - elif 'error' in (data or {}): - status = 'error' - elif row['update_status'] == 'available': - status = 'complete' - elif row['json_ai'] is None and row['code'] is None: - status = 'generating' - elif data is None: - status = 'error' - elif 'training_log' in (data or {}): - status = 'training' - elif 'error' not in (data or {}): - status = 'complete' - else: - status = 'error' - - conn.execute( - text(""" - update predictor - set status = :status - where id = :predictor_id - """), { - 'status': status, - 'predictor_id': row['id'] - } - ) - session.commit() - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('status') diff --git a/mindsdb/migrations/versions/2022-09-19_3d5e70105df7_content_storage.py b/mindsdb/migrations/versions/2022-09-19_3d5e70105df7_content_storage.py deleted file mode 100644 index af68209d0b6..00000000000 --- a/mindsdb/migrations/versions/2022-09-19_3d5e70105df7_content_storage.py +++ /dev/null @@ -1,59 +0,0 @@ -"""content_storage - -Revision ID: 3d5e70105df7 -Revises: 87b2df2b83e1 -Create Date: 2022-09-19 11:30:09.182435 - -""" -from pathlib import Path -import shutil - -from mindsdb.utilities.config import Config - - -# revision identifiers, used by Alembic. -revision = '3d5e70105df7' -down_revision = '87b2df2b83e1' -branch_labels = None -depends_on = None - - -def upgrade(): - config = Config() - is_cloud = config.get('cloud', False) - if is_cloud is True: - return - - storage_path = Path(config.paths['storage']) - for item in storage_path.iterdir(): - if item.is_file() and item.name.startswith('predictor_'): - original_name = item.name - temp_file_path = item.parent / f'{original_name}_temp' - item.replace(temp_file_path) - item.mkdir(exist_ok=True) - temp_file_path.replace(item / original_name) - - root_path = Path(config.paths['root']) - if (root_path / 'datasources').is_dir(): - shutil.rmtree(root_path / 'datasources') - if (root_path / 'integrations').is_dir(): - shutil.rmtree(root_path / 'integrations') - if (root_path / 'predictors').is_dir(): - shutil.rmtree(root_path / 'predictors') - - -def downgrade(): - config = Config() - is_cloud = config.get('cloud', False) - if is_cloud is True: - return - - storage_path = Path(config.paths['storage']) - for item in storage_path.iterdir(): - if item.is_dir() and item.name.startswith('predictor_'): - original_name = item.name - if (item / original_name).is_file(): - temp_dir_path = item.parent / f'{original_name}_temp' - item.replace(temp_dir_path) - (temp_dir_path / original_name).replace(item.parent / original_name) - shutil.rmtree(temp_dir_path) diff --git a/mindsdb/migrations/versions/2022-09-29_cada7d2be947_json_storage.py b/mindsdb/migrations/versions/2022-09-29_cada7d2be947_json_storage.py deleted file mode 100644 index 486a81e7811..00000000000 --- a/mindsdb/migrations/versions/2022-09-29_cada7d2be947_json_storage.py +++ /dev/null @@ -1,90 +0,0 @@ -"""json_storage - -Revision ID: cada7d2be947 -Revises: 3d5e70105df7 -Create Date: 2022-09-29 15:52:32.695026 - -""" -import json - -from alembic import op -import sqlalchemy as sa - -import mindsdb.interfaces.storage.db as db -from mindsdb.interfaces.storage.fs import RESOURCE_GROUP - - -# revision identifiers, used by Alembic. -revision = 'cada7d2be947' -down_revision = '3d5e70105df7' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'json_storage', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('resource_group', sa.String(), nullable=True), - sa.Column('resource_id', sa.Integer(), nullable=True), - sa.Column('name', sa.String(), nullable=True), - sa.Column('content', sa.JSON(), nullable=True), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - predictors = conn.execute(sa.text(''' - select id, json_ai from predictor - ''')).fetchall() - - for row in predictors: - try: - jai = json.loads(row['json_ai']) - except Exception: - continue - - if jai is None or len(jai) == 0: - continue - - record = db.JsonStorage( - resource_group=RESOURCE_GROUP.PREDICTOR, - resource_id=row['id'], - name='json_ai', - content=jai, - company_id=None - ) - session.add(record) - - session.commit() - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('json_ai') - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('json_ai', sa.JSON, nullable=True)) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - jsons = conn.execute(sa.text(''' - select resource_id, name, content - from json_storage - where resource_group = 'predictor' and name = 'json_ai' - ''')).fetchall() - - for row in jsons: - predicrtor_record = ( - session.query(db.Predictor) - .filter_by(company_id=None, id=row['resource_id']) - .first() - ) - if predicrtor_record is None: - continue - predicrtor_record.json_ai = row['content'] - - session.commit() - - op.drop_table('json_storage') diff --git a/mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py b/mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py deleted file mode 100644 index e616cd6e7fc..00000000000 --- a/mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +++ /dev/null @@ -1,129 +0,0 @@ -"""projects - -Revision ID: 43c52d23845a -Revises: cada7d2be947 -Create Date: 2022-10-14 09:59:44.589745 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - -import mindsdb.interfaces.storage.db as db - - -# revision identifiers, used by Alembic. -revision = '43c52d23845a' -down_revision = 'cada7d2be947' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'project', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('deleted_at', sa.DateTime(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_project_name_company_id') - ) - - project_table = sa.Table( - 'project', - sa.MetaData(), - sa.Column('id', sa.Integer()), - sa.Column('name', sa.String()), - sa.Column('company_id', sa.Integer()), - ) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - conn.execute( - project_table.insert().values( - name='mindsdb' - ) - ) - - project_record = conn.execute( - project_table.select().where(project_table.c.name == 'mindsdb') - ).fetchone() - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('project_id', sa.Integer())) - batch_op.create_foreign_key('fk_project_id', 'project', ['project_id'], ['id']) - - conn.execute(sa.sql.text(''' - update predictor set project_id = :project_id - '''), {'project_id': project_record.id}) - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.alter_column( - 'project_id', - existing_type=sa.INTEGER(), - nullable=False - ) - - with op.batch_alter_table('view', schema=None) as batch_op: - batch_op.add_column(sa.Column('project_id', sa.Integer())) - batch_op.create_foreign_key('fk_project_id', 'project', ['project_id'], ['id']) - - conn.execute(sa.sql.text(''' - update view set project_id = :project_id - '''), {'project_id': project_record.id}) - - with op.batch_alter_table('view', schema=None) as batch_op: - batch_op.alter_column( - 'project_id', - existing_type=sa.INTEGER(), - nullable=False - ) - - views = conn.execute(sa.text(''' - select id, name from view - where exists (select 1 from predictor where view.name = predictor.name) - ''')).fetchall() - - for row in views: - conn.execute( - text(""" - update view - set name = :name - where id = :view_id - """), { - 'name': f"{row['name']}_view", - 'view_id': row['id'] - } - ) - - session.commit() - - -def downgrade(): - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - view_integration = db.Integration.query.filter_by(name='views').first() - if view_integration is None: - views_integration = db.Integration( - name='views', - data={}, - engine='views', - company_id=None - ) - session.add(views_integration) - session.commit() - - with op.batch_alter_table('view', schema=None) as batch_op: - batch_op.drop_constraint('fk_project_id', type_='foreignkey') - batch_op.drop_column('project_id') - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_constraint('fk_project_id', type_='foreignkey') - batch_op.drop_column('project_id') - - op.drop_table('project') diff --git a/mindsdb/migrations/versions/2022-11-07_1e60096fc817_predictor_version.py b/mindsdb/migrations/versions/2022-11-07_1e60096fc817_predictor_version.py deleted file mode 100644 index 313fefa5390..00000000000 --- a/mindsdb/migrations/versions/2022-11-07_1e60096fc817_predictor_version.py +++ /dev/null @@ -1,66 +0,0 @@ -"""predictor_version - -Revision ID: 1e60096fc817 -Revises: 43c52d23845a -Create Date: 2022-11-07 16:43:47.301692 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - - -# revision identifiers, used by Alembic. -revision = '1e60096fc817' -down_revision = '43c52d23845a' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('label', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('version', sa.Integer(), nullable=True)) - - # update current predictor versions - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - key0 = (None, None, None) - models = conn.execute(sa.text(''' - select company_id, project_id, name, id - from predictor - order by company_id, project_id, name, created_at - ''')).fetchall() - for model in models: - key = (model['company_id'], model['project_id'], model['name'].lower()) - - # it is different name or project or company - if key != key0: - version = 1 - key0 = key - - conn.execute( - text(""" - update predictor - set version = :version - where id = :id - """), { - 'version': version, - 'id': model['id'] - } - ) - version += 1 - - session.commit() - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('version') - batch_op.drop_column('label') - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2022-11-11_d429095b570f_data_integration_id.py b/mindsdb/migrations/versions/2022-11-11_d429095b570f_data_integration_id.py deleted file mode 100644 index 166577a5faa..00000000000 --- a/mindsdb/migrations/versions/2022-11-11_d429095b570f_data_integration_id.py +++ /dev/null @@ -1,106 +0,0 @@ -"""data-integration-id - -Revision ID: d429095b570f -Revises: 1e60096fc817 -Create Date: 2022-11-11 14:00:58.386307 - -""" -import json - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import text - -import mindsdb.interfaces.storage.db as db - - -# revision identifiers, used by Alembic. -revision = 'd429095b570f' -down_revision = '1e60096fc817' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('data_integration_ref', db.Json(), nullable=True)) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - view_integration = conn.execute(text(''' - select id from integration where name = 'views' - ''')).fetchone() - if view_integration is not None: - views_integration_id = view_integration['id'] - - predictors = conn.execute(text(''' - select id, data_integration_id from predictor - ''')).fetchall() - - for predictor in predictors: - data_integration_ref = None - if predictor['data_integration_id'] is not None: - data_integration_ref = {'type': 'integration', 'id': predictor['data_integration_id']} - if predictor['data_integration_id'] == views_integration_id: - data_integration_ref = {'type': 'view'} - if isinstance(data_integration_ref, dict): - data_integration_ref = json.dumps(data_integration_ref) - conn.execute(text(''' - update predictor set data_integration_ref = :data_integration_ref where id = :id - '''), { - 'data_integration_ref': data_integration_ref, - 'id': predictor['id'] - }) - - session.commit() - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_constraint('fk_data_integration_id', type_='foreignkey') - batch_op.drop_column('data_integration_id') - - conn.execute(text(''' - delete from integration where name = 'views' - ''')) - session.commit() - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('data_integration_id', sa.INTEGER(), nullable=True)) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - - views_integration = db.Integration( - name='views', - data={}, - engine='views', - company_id=None - ) - session.add(views_integration) - session.commit() - - predictors = conn.execute(text(''' - select id, data_integration_ref from predictor - ''')).fetchall() - - for predictor in predictors: - data_integration_ref = predictor['data_integration_ref'] - if data_integration_ref is None: - continue - data_integration_ref = json.loads(data_integration_ref) - data_integration_id = data_integration_ref.get('id') - if data_integration_ref['type'] == 'view': - data_integration_id = views_integration.id - - conn.execute(text(''' - update predictor set data_integration_id = :data_integration_id where id = :id - '''), { - 'data_integration_id': data_integration_id, - 'id': predictor['id'] - }) - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.create_foreign_key('fk_data_integration_id', 'integration', ['data_integration_id'], ['id']) - batch_op.drop_column('data_integration_ref') diff --git a/mindsdb/migrations/versions/2022-12-26_459218b0844c_fix_unique_constraint.py b/mindsdb/migrations/versions/2022-12-26_459218b0844c_fix_unique_constraint.py deleted file mode 100644 index 4f71e12765b..00000000000 --- a/mindsdb/migrations/versions/2022-12-26_459218b0844c_fix_unique_constraint.py +++ /dev/null @@ -1,34 +0,0 @@ -"""fix_unique_constraint - -Revision ID: 459218b0844c -Revises: d429095b570f -Create Date: 2022-12-26 13:40:57.141241 - -""" -from alembic import op - -revision = '459218b0844c' -down_revision = 'd429095b570f' -branch_labels = None -depends_on = None - - -def upgrade(): - - # try - for sqlite database - try: - op.execute("ALTER TABLE project DROP CONSTRAINT IF EXISTS unique_integration_name_company_id") - op.execute("ALTER TABLE project DROP CONSTRAINT IF EXISTS unique_project_name_company_id") - except Exception: - pass - - try: - with op.batch_alter_table('project', schema=None) as batch_op: - batch_op.create_unique_constraint('unique_project_name_company_id', ['name', 'company_id']) - except Exception: - pass - - -def downgrade(): - # do nothing - ... diff --git a/mindsdb/migrations/versions/2023-02-02_b6d0a47294ac_jobs.py b/mindsdb/migrations/versions/2023-02-02_b6d0a47294ac_jobs.py deleted file mode 100644 index 12628c6486e..00000000000 --- a/mindsdb/migrations/versions/2023-02-02_b6d0a47294ac_jobs.py +++ /dev/null @@ -1,56 +0,0 @@ -"""jobs - -Revision ID: b6d0a47294ac -Revises: 459218b0844c -Create Date: 2023-02-02 11:33:06.283055 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = 'b6d0a47294ac' -down_revision = '459218b0844c' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'jobs_history', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('job_id', sa.Integer(), nullable=True), - sa.Column('start_at', sa.DateTime(), nullable=True), - sa.Column('end_at', sa.DateTime(), nullable=True), - sa.Column('error', sa.String(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('job_id', 'start_at', name='uniq_job_history_job_id_start') - ) - op.create_table( - 'jobs', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('query_str', sa.String(), nullable=False), - sa.Column('start_at', sa.DateTime(), nullable=True), - sa.Column('end_at', sa.DateTime(), nullable=True), - sa.Column('next_run_at', sa.DateTime(), nullable=True), - sa.Column('schedule_str', sa.String(), nullable=True), - sa.Column('deleted_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('jobs') - op.drop_table('jobs_history') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-02-17_ee63d868fa84_predictor_integration_null.py b/mindsdb/migrations/versions/2023-02-17_ee63d868fa84_predictor_integration_null.py deleted file mode 100644 index ee4fa76a64a..00000000000 --- a/mindsdb/migrations/versions/2023-02-17_ee63d868fa84_predictor_integration_null.py +++ /dev/null @@ -1,42 +0,0 @@ -"""predictor_integration_null - -Revision ID: ee63d868fa84 -Revises: b6d0a47294ac -Create Date: 2023-02-17 13:48:49.464644 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = 'ee63d868fa84' -down_revision = 'b6d0a47294ac' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.alter_column( - 'integration_id', - existing_type=sa.INTEGER(), - nullable=True - ) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.alter_column( - 'integration_id', - existing_type=sa.INTEGER(), - nullable=False - ) - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-02-25_3154382dab17_training_progress.py b/mindsdb/migrations/versions/2023-02-25_3154382dab17_training_progress.py deleted file mode 100644 index ac5e8749313..00000000000 --- a/mindsdb/migrations/versions/2023-02-25_3154382dab17_training_progress.py +++ /dev/null @@ -1,36 +0,0 @@ -"""training_progress - -Revision ID: 3154382dab17 -Revises: ee63d868fa84 -Create Date: 2023-02-25 15:12:02.828938 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '3154382dab17' -down_revision = 'ee63d868fa84' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('training_phase_current', sa.Integer(), nullable=True)) - batch_op.add_column(sa.Column('training_phase_total', sa.Integer(), nullable=True)) - batch_op.add_column(sa.Column('training_phase_name', sa.String(), nullable=True)) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('training_phase_name') - batch_op.drop_column('training_phase_total') - batch_op.drop_column('training_phase_current') - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-02-27_ef04cdbe51ed_jobs_user_class.py b/mindsdb/migrations/versions/2023-02-27_ef04cdbe51ed_jobs_user_class.py deleted file mode 100644 index 25f35507e24..00000000000 --- a/mindsdb/migrations/versions/2023-02-27_ef04cdbe51ed_jobs_user_class.py +++ /dev/null @@ -1,33 +0,0 @@ -"""jobs_user_class - -Revision ID: ef04cdbe51ed -Revises: 3154382dab17 -Create Date: 2023-02-27 11:32:35.298375 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = 'ef04cdbe51ed' -down_revision = '3154382dab17' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('jobs', schema=None) as batch_op: - batch_op.add_column(sa.Column('user_class', sa.Integer(), nullable=True)) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('jobs', schema=None) as batch_op: - batch_op.drop_column('user_class') - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-04-11_b8be148dbc85_jobs_history_query.py b/mindsdb/migrations/versions/2023-04-11_b8be148dbc85_jobs_history_query.py deleted file mode 100644 index c53d853936c..00000000000 --- a/mindsdb/migrations/versions/2023-04-11_b8be148dbc85_jobs_history_query.py +++ /dev/null @@ -1,35 +0,0 @@ -"""jobs_history_query - -Revision ID: b8be148dbc85 -Revises: ef04cdbe51ed -Create Date: 2023-04-11 17:35:16.273293 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = 'b8be148dbc85' -down_revision = 'ef04cdbe51ed' -branch_labels = None -depends_on = None - - -def upgrade(): - - with op.batch_alter_table('jobs_history', schema=None) as batch_op: - batch_op.add_column(sa.Column('query_str', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('updated_at', sa.DateTime(), nullable=True, - server_default=sa.func.current_timestamp())) - - # ### end Alembic commands ### - - -def downgrade(): - - with op.batch_alter_table('jobs_history', schema=None) as batch_op: - batch_op.drop_column('updated_at') - batch_op.drop_column('query_str') - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-05-24_6d748f2c7b0b_remove_streams.py b/mindsdb/migrations/versions/2023-05-24_6d748f2c7b0b_remove_streams.py deleted file mode 100644 index ed8af146e88..00000000000 --- a/mindsdb/migrations/versions/2023-05-24_6d748f2c7b0b_remove_streams.py +++ /dev/null @@ -1,46 +0,0 @@ -"""remove streams - -Revision ID: 6d748f2c7b0b -Revises: b8be148dbc85 -Create Date: 2023-05-24 15:22:11.969791 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '6d748f2c7b0b' -down_revision = 'b8be148dbc85' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('stream') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'stream', - sa.Column('id', sa.INTEGER(), nullable=False), - sa.Column('name', sa.VARCHAR(), nullable=False), - sa.Column('stream_in', sa.VARCHAR(), nullable=False), - sa.Column('stream_out', sa.VARCHAR(), nullable=False), - sa.Column('anomaly_stream', sa.VARCHAR(), nullable=True), - sa.Column('integration', sa.VARCHAR(), nullable=True), - sa.Column('predictor', sa.VARCHAR(), nullable=False), - sa.Column('company_id', sa.INTEGER(), nullable=True), - sa.Column('updated_at', sa.DATETIME(), nullable=True), - sa.Column('created_at', sa.DATETIME(), nullable=True), - sa.Column('type', sa.VARCHAR(), nullable=True), - sa.Column('connection_info', sa.VARCHAR(), nullable=True), - sa.Column('learning_params', sa.VARCHAR(), nullable=True), - sa.Column('learning_threshold', sa.INTEGER(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-05-31_aaecd7012a78_chatbot.py b/mindsdb/migrations/versions/2023-05-31_aaecd7012a78_chatbot.py deleted file mode 100644 index 238a503bc3d..00000000000 --- a/mindsdb/migrations/versions/2023-05-31_aaecd7012a78_chatbot.py +++ /dev/null @@ -1,41 +0,0 @@ -"""chatbot - -Revision ID: aaecd7012a78 -Revises: b8be148dbc85 -Create Date: 2023-05-18 10:53:35.256508 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = 'aaecd7012a78' -down_revision = '6d748f2c7b0b' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'chat_bots', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('user_class', sa.Integer(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('model_name', sa.String(), nullable=False), - sa.Column('database_id', sa.Integer()), - sa.Column('params', sa.JSON(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - -def downgrade(): - - op.drop_table('chat_bots') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-06-16_9d6271bb2c38_update_chat_bots_table.py b/mindsdb/migrations/versions/2023-06-16_9d6271bb2c38_update_chat_bots_table.py deleted file mode 100644 index f04b8d13b06..00000000000 --- a/mindsdb/migrations/versions/2023-06-16_9d6271bb2c38_update_chat_bots_table.py +++ /dev/null @@ -1,28 +0,0 @@ -"""update_chat_bots_table - -Revision ID: 9d6271bb2c38 -Revises: aaecd7012a78 -Create Date: 2023-06-16 12:08:50.170809 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '9d6271bb2c38' -down_revision = 'aaecd7012a78' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.add_column(sa.Column('chat_engine', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('is_running', sa.Boolean(), default=True)) - - -def downgrade(): - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.drop_column('chat_engine') - batch_op.drop_column('is_running') diff --git a/mindsdb/migrations/versions/2023-06-19_b5bf593ba659_create_chat_bots_history_table.py b/mindsdb/migrations/versions/2023-06-19_b5bf593ba659_create_chat_bots_history_table.py deleted file mode 100644 index 73f42bec086..00000000000 --- a/mindsdb/migrations/versions/2023-06-19_b5bf593ba659_create_chat_bots_history_table.py +++ /dev/null @@ -1,35 +0,0 @@ -"""create_chat_bots_history_table - -Revision ID: b5bf593ba659 -Revises: 9d6271bb2c38 -Create Date: 2023-06-19 09:56:37.108680 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'b5bf593ba659' -down_revision = '9d6271bb2c38' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'chat_bots_history', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('chat_bot_id', sa.Integer(), nullable=False), - sa.Column('type', sa.String()), - sa.Column('text', sa.String()), - sa.Column('user', sa.String()), - sa.Column('destination', sa.String()), - sa.Column('sent_at', sa.DateTime()), - sa.Column('error', sa.String()), - sa.PrimaryKeyConstraint('id'), - ) - - -def downgrade(): - op.drop_table('chat_bots_history') diff --git a/mindsdb/migrations/versions/2023-06-27_607709e1615b_update_project_names.py b/mindsdb/migrations/versions/2023-06-27_607709e1615b_update_project_names.py deleted file mode 100644 index 722b4938ff4..00000000000 --- a/mindsdb/migrations/versions/2023-06-27_607709e1615b_update_project_names.py +++ /dev/null @@ -1,60 +0,0 @@ -"""update_project_names - -Revision ID: 607709e1615b -Revises: 4c26ad04eeaa -Create Date: 2023-06-27 18:33:29.436607 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '607709e1615b' -down_revision = 'b5bf593ba659' -branch_labels = None -depends_on = None - - -def upgrade(): - - def _rename(table): - conn = op.get_bind() - - data = conn.execute( - table - .select() - .where(table.c.name.like("%.%")) - ).fetchall() - - for row in data: - name = row[0] - name2 = name.replace('.', '_') - - op.execute( - table - .update() - .where(table.c.name == name) - .values({'name': name2}) - ) - - projects = sa.Table( - 'project', - sa.MetaData(), - sa.Column('name', sa.String()), - ) - - _rename(projects) - - integrations = sa.Table( - 'integration', - sa.MetaData(), - sa.Column('name', sa.String()), - ) - - _rename(integrations) - - -def downgrade(): - pass diff --git a/mindsdb/migrations/versions/2023-07-13_a57506731839_triggers.py b/mindsdb/migrations/versions/2023-07-13_a57506731839_triggers.py deleted file mode 100644 index c8f30da17f4..00000000000 --- a/mindsdb/migrations/versions/2023-07-13_a57506731839_triggers.py +++ /dev/null @@ -1,54 +0,0 @@ -"""triggers - -Revision ID: a57506731839 -Revises: 607709e1615b -Create Date: 2023-07-13 18:24:59.186856 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -revision = 'a57506731839' -down_revision = '607709e1615b' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'tasks', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('user_class', sa.Integer(), nullable=True), - sa.Column('object_type', sa.String(), nullable=False), - sa.Column('object_id', sa.Integer(), nullable=False), - sa.Column('last_error', sa.String(), nullable=True), - sa.Column('active', sa.Boolean(), nullable=True), - sa.Column('reload', sa.Boolean(), nullable=True), - sa.Column('run_by', sa.String(), nullable=True), - sa.Column('alive_time', sa.DateTime(timezone=True), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'triggers', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('database_id', sa.Integer(), nullable=False), - sa.Column('table_name', sa.String(), nullable=False), - sa.Column('columns', sa.String(), nullable=True), - sa.Column('query_str', sa.String(), nullable=False), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - -def downgrade(): - op.drop_table('triggers') - op.drop_table('tasks') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-07-19_ad04ee0bd385_chatbot_to_task.py b/mindsdb/migrations/versions/2023-07-19_ad04ee0bd385_chatbot_to_task.py deleted file mode 100644 index 6fc0a2a57d7..00000000000 --- a/mindsdb/migrations/versions/2023-07-19_ad04ee0bd385_chatbot_to_task.py +++ /dev/null @@ -1,38 +0,0 @@ -"""chatbot_to_task - -Revision ID: ad04ee0bd385 -Revises: a57506731839 -Create Date: 2023-07-19 16:18:14.791761 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = 'ad04ee0bd385' -down_revision = 'a57506731839' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.drop_column('chat_engine') - batch_op.drop_column('user_class') - batch_op.drop_column('company_id') - batch_op.drop_column('is_running') - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.add_column(sa.Column('is_running', sa.BOOLEAN(), nullable=True)) - batch_op.add_column(sa.Column('company_id', sa.INTEGER(), nullable=True)) - batch_op.add_column(sa.Column('user_class', sa.INTEGER(), nullable=True)) - batch_op.add_column(sa.Column('chat_engine', sa.VARCHAR(), nullable=True)) - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-08-29_b0382f5be48d_predictor_hostname.py b/mindsdb/migrations/versions/2023-08-29_b0382f5be48d_predictor_hostname.py deleted file mode 100644 index 6c12d3860eb..00000000000 --- a/mindsdb/migrations/versions/2023-08-29_b0382f5be48d_predictor_hostname.py +++ /dev/null @@ -1,26 +0,0 @@ -"""predictor-hostname - -Revision ID: b0382f5be48d -Revises: 011e6f2dd9c2 -Create Date: 2023-08-29 17:19:55.372394 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'b0382f5be48d' -down_revision = '011e6f2dd9c2' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('hostname', sa.String(), nullable=True)) - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_column('hostname') diff --git a/mindsdb/migrations/versions/2023-08-31_4c26ad04eeaa_add_skills_table.py b/mindsdb/migrations/versions/2023-08-31_4c26ad04eeaa_add_skills_table.py deleted file mode 100644 index 2759f4ca046..00000000000 --- a/mindsdb/migrations/versions/2023-08-31_4c26ad04eeaa_add_skills_table.py +++ /dev/null @@ -1,32 +0,0 @@ -"""add_skills_table - -Revision ID: 4c26ad04eeaa -Revises: b5bf593ba659 -Create Date: 2023-08-31 17:04:26.898015 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '4c26ad04eeaa' -down_revision = 'd44ab65a6a35' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'skills', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('type', sa.String(), nullable=False), - sa.Column('params', sa.JSON(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - -def downgrade(): - op.drop_table('skills') diff --git a/mindsdb/migrations/versions/2023-09-06_d44ab65a6a35_add_agents_table.py b/mindsdb/migrations/versions/2023-09-06_d44ab65a6a35_add_agents_table.py deleted file mode 100644 index 0a7c980ab4e..00000000000 --- a/mindsdb/migrations/versions/2023-09-06_d44ab65a6a35_add_agents_table.py +++ /dev/null @@ -1,35 +0,0 @@ -"""add_agents_table - -Revision ID: d44ab65a6a35 -Revises: ad04ee0bd385 -Create Date: 2023-09-06 11:32:08.777661 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'd44ab65a6a35' -down_revision = 'ad04ee0bd385' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'agents', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('user_class', sa.Integer(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('model_name', sa.String(), nullable=False), - sa.Column('params', sa.JSON(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id')) - - -def downgrade(): - op.drop_table('agents') diff --git a/mindsdb/migrations/versions/2023-09-06_e187961e844a_add_agent_skills_table.py b/mindsdb/migrations/versions/2023-09-06_e187961e844a_add_agent_skills_table.py deleted file mode 100644 index 15d11754e05..00000000000 --- a/mindsdb/migrations/versions/2023-09-06_e187961e844a_add_agent_skills_table.py +++ /dev/null @@ -1,29 +0,0 @@ -"""add agent_skills_table - -Revision ID: e187961e844a -Revises: 4c26ad04eeaa -Create Date: 2023-09-06 13:56:17.803484 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'e187961e844a' -down_revision = '4c26ad04eeaa' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'agent_skills', - sa.Column('agent_id', sa.Integer(), nullable=False), - sa.Column('skill_id', sa.Integer(), nullable=False), - sa.PrimaryKeyConstraint('agent_id', 'skill_id') - ) - - -def downgrade(): - op.drop_table('agent_skills') diff --git a/mindsdb/migrations/versions/2023-09-18_011e6f2dd9c2_backfill_agent_id.py b/mindsdb/migrations/versions/2023-09-18_011e6f2dd9c2_backfill_agent_id.py deleted file mode 100644 index 3dd73da5559..00000000000 --- a/mindsdb/migrations/versions/2023-09-18_011e6f2dd9c2_backfill_agent_id.py +++ /dev/null @@ -1,82 +0,0 @@ -"""backfill_agent_id - -Revision ID: 011e6f2dd9c2 -Revises: f16d4ab03091 -Create Date: 2023-09-18 11:02:36.795544 - -""" -from alembic import op -import datetime -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '011e6f2dd9c2' -down_revision = 'f16d4ab03091' -branch_labels = None -depends_on = None - - -def upgrade(): - conn = op.get_bind() - chatbots_table = sa.Table( - 'chat_bots', - sa.MetaData(), - sa.Column('id', sa.Integer()), - sa.Column('project_id', sa.Integer()), - sa.Column('agent_id', sa.Integer()), - sa.Column('name', sa.String()), - sa.Column('model_name', sa.String()) - ) - - agents_table = sa.Table( - 'agents', - sa.MetaData(), - sa.Column('id', sa.Integer()), - sa.Column('company_id', sa.Integer()), - sa.Column('user_class', sa.Integer()), - sa.Column('name', sa.String()), - sa.Column('project_id', sa.Integer()), - sa.Column('model_name', sa.String()), - sa.Column('updated_at', sa.DateTime()), - sa.Column('created_at', sa.DateTime()) - ) - - tasks_table = sa.Table( - 'tasks', - sa.MetaData(), - sa.Column('company_id', sa.Integer()), - sa.Column('user_class', sa.Integer()), - sa.Column('object_type', sa.String()), - sa.Column('object_id', sa.Integer()) - ) - - all_chatbots = conn.execute(chatbots_table.select()).fetchall() - for chatbot_row in all_chatbots: - id, project_id, _, name, model_name = chatbot_row - - # Get the corresponding task. - task_select = tasks_table.select().where(tasks_table.c.object_type == 'chatbot').where(tasks_table.c.object_id == id) - task_row = conn.execute(task_select).first() - if task_row is None: - continue - company_id, user_class, _, _ = task_row - # Create the new agent. - op.execute(agents_table.insert().values( - company_id=company_id, - user_class=user_class, - name=name, - project_id=project_id, - model_name=model_name, - updated_at=datetime.datetime.now(), - created_at=datetime.datetime.now() - )) - - # Get the new agent and associate the chatbot with it. - created_agent = conn.execute(agents_table.select().where(agents_table.c.name == name)).first() - agent_id = created_agent[0] - op.execute(chatbots_table.update().where(chatbots_table.c.id == id).values(agent_id=agent_id)) - - -def downgrade(): - pass diff --git a/mindsdb/migrations/versions/2023-09-18_f16d4ab03091_add_agent_id.py b/mindsdb/migrations/versions/2023-09-18_f16d4ab03091_add_agent_id.py deleted file mode 100644 index 956fc0adba2..00000000000 --- a/mindsdb/migrations/versions/2023-09-18_f16d4ab03091_add_agent_id.py +++ /dev/null @@ -1,28 +0,0 @@ -"""add_agent_id - -Revision ID: f16d4ab03091 -Revises: e187961e844a -Create Date: 2023-09-18 10:49:36.290319 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'f16d4ab03091' -down_revision = 'e187961e844a' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('chat_bots') as batch_op: - batch_op.add_column(sa.Column('agent_id', sa.Integer())) - batch_op.create_foreign_key('fk_agent_id', 'agents', ['agent_id'], ['id']) - - -def downgrade(): - with op.batch_alter_table('chat_bots') as batch_op: - batch_op.drop_constraint('fk_agent_id', type_='foreignkey') - batch_op.drop_column('agent_id') diff --git a/mindsdb/migrations/versions/2023-09-20_309db3d07cf4_add_knowledge_base.py b/mindsdb/migrations/versions/2023-09-20_309db3d07cf4_add_knowledge_base.py deleted file mode 100644 index 03fb0b8ccbd..00000000000 --- a/mindsdb/migrations/versions/2023-09-20_309db3d07cf4_add_knowledge_base.py +++ /dev/null @@ -1,54 +0,0 @@ -"""add knowledge base - -Revision ID: 309db3d07cf4 -Revises: 6cb02dfd7f61 -Create Date: 2023-09-20 13:48:39.422306 - -""" -import sqlalchemy as sa -from alembic import op - -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = "309db3d07cf4" -down_revision = "6cb02dfd7f61" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "knowledge_base", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=False), - sa.Column("project_id", sa.Integer(), nullable=False), - sa.Column("params", sa.JSON(), nullable=True), - sa.Column("vector_database_id", sa.Integer(), nullable=True), - sa.Column("vector_database_table", sa.String(), nullable=True), - sa.Column("embedding_model_id", sa.Integer(), nullable=True), - sa.Column("created_at", sa.DateTime(), nullable=True), - sa.Column("updated_at", sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint( - ["embedding_model_id"], - ["predictor.id"], - name="fk_knowledge_base_embedding_model_id", - ), - sa.ForeignKeyConstraint( - ["vector_database_id"], - ["integration.id"], - name="fk_knowledge_base_vector_database_id", - ), - sa.PrimaryKeyConstraint("id"), - sa.UniqueConstraint( - "name", "project_id", name="unique_knowledge_base_name_project_id" - ), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table("knowledge_base") - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-10-03_6cb02dfd7f61_query_context.py b/mindsdb/migrations/versions/2023-10-03_6cb02dfd7f61_query_context.py deleted file mode 100644 index 42bd8b45c50..00000000000 --- a/mindsdb/migrations/versions/2023-10-03_6cb02dfd7f61_query_context.py +++ /dev/null @@ -1,38 +0,0 @@ -"""query_context - -Revision ID: 6cb02dfd7f61 -Revises: b0382f5be48d -Create Date: 2023-10-03 12:36:00.008731 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '6cb02dfd7f61' -down_revision = 'b0382f5be48d' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'query_context', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('query', sa.String(), nullable=False), - sa.Column('context_name', sa.String(), nullable=False), - sa.Column('values', sa.JSON(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - -def downgrade(): - - op.drop_table('query_context') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-11-01_c67822e96833_jobs_active.py b/mindsdb/migrations/versions/2023-11-01_c67822e96833_jobs_active.py deleted file mode 100644 index 34c090f1236..00000000000 --- a/mindsdb/migrations/versions/2023-11-01_c67822e96833_jobs_active.py +++ /dev/null @@ -1,33 +0,0 @@ -"""jobs_active - -Revision ID: c67822e96833 -Revises: 309db3d07cf4 -Create Date: 2023-11-01 15:42:53.249859 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = 'c67822e96833' -down_revision = '309db3d07cf4' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - - with op.batch_alter_table('jobs', schema=None) as batch_op: - batch_op.add_column(sa.Column('active', sa.Boolean(), nullable=True)) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('jobs', schema=None) as batch_op: - batch_op.drop_column('active') - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2023-12-25_4b3c9d63e89c_predictor_index.py b/mindsdb/migrations/versions/2023-12-25_4b3c9d63e89c_predictor_index.py deleted file mode 100644 index 94697224e4b..00000000000 --- a/mindsdb/migrations/versions/2023-12-25_4b3c9d63e89c_predictor_index.py +++ /dev/null @@ -1,44 +0,0 @@ -"""predictor_index - -Revision ID: 4b3c9d63e89c -Revises: c67822e96833 -Create Date: 2023-12-25 20:50:08.275299 - -""" -from alembic import op -import sqlalchemy as sa # noqa - - -# revision identifiers, used by Alembic. -revision = '4b3c9d63e89c' -down_revision = 'c67822e96833' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('agent_skills', schema=None) as batch_op: - try: - batch_op.create_foreign_key('agent_skills_agent_id_fk', 'agents', ['agent_id'], ['id']) - batch_op.create_foreign_key('agent_skills_skill_id_fk', 'skills', ['skill_id'], ['id']) - except Exception: - pass - - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.create_index( - 'predictor_index', - ['company_id', 'name', 'version', 'active', 'deleted_at'], - unique=True - ) - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.drop_index('predictor_index') - - with op.batch_alter_table('agent_skills', schema=None) as batch_op: - try: - batch_op.drop_constraint('agent_skills_agent_id_fk', type_='foreignkey') - batch_op.drop_constraint('agent_skills_skill_id_fk', type_='foreignkey') - except Exception: - pass diff --git a/mindsdb/migrations/versions/2024-02-02_5a5c49313e52_job_condition.py b/mindsdb/migrations/versions/2024-02-02_5a5c49313e52_job_condition.py deleted file mode 100644 index 944f59c22f2..00000000000 --- a/mindsdb/migrations/versions/2024-02-02_5a5c49313e52_job_condition.py +++ /dev/null @@ -1,32 +0,0 @@ -"""job_condition - -Revision ID: 5a5c49313e52 -Revises: 4b3c9d63e89c -Create Date: 2024-02-02 11:17:07.246330 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = '5a5c49313e52' -down_revision = '4b3c9d63e89c' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('jobs', schema=None) as batch_op: - batch_op.add_column(sa.Column('if_query_str', sa.String(), nullable=True)) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('jobs', schema=None) as batch_op: - batch_op.drop_column('if_query_str') - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2024-02-12_9461892bd889_llm_log.py b/mindsdb/migrations/versions/2024-02-12_9461892bd889_llm_log.py deleted file mode 100644 index d86f643a010..00000000000 --- a/mindsdb/migrations/versions/2024-02-12_9461892bd889_llm_log.py +++ /dev/null @@ -1,44 +0,0 @@ -"""llm_log - -Revision ID: 9461892bd889 -Revises: 5a5c49313e52 -Create Date: 2024-02-12 19:25:32.302526 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '9461892bd889' -down_revision = '5a5c49313e52' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'llm_log', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('api_key', sa.String(), nullable=True), - sa.Column('model_id', sa.Integer(), nullable=False), - sa.Column('input', sa.String(), nullable=True), - sa.Column('output', sa.String(), nullable=True), - sa.Column('start_time', sa.DateTime(), nullable=False), - sa.Column('end_time', sa.DateTime(), nullable=True), - sa.Column('prompt_tokens', sa.Integer(), nullable=True), - sa.Column('completion_tokens', sa.Integer(), nullable=True), - sa.Column('total_tokens', sa.Integer(), nullable=True), - sa.Column('success', sa.Boolean(), nullable=False), - sa.PrimaryKeyConstraint('id') - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('llm_log') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2024-04-25_2958416fbe75_drop_semaphor.py b/mindsdb/migrations/versions/2024-04-25_2958416fbe75_drop_semaphor.py deleted file mode 100644 index 6e0419398b0..00000000000 --- a/mindsdb/migrations/versions/2024-04-25_2958416fbe75_drop_semaphor.py +++ /dev/null @@ -1,36 +0,0 @@ -"""drop-semaphor - -Revision ID: 2958416fbe75 -Revises: 9461892bd889 -Create Date: 2024-04-25 18:30:54.051212 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '2958416fbe75' -down_revision = '9461892bd889' -branch_labels = None -depends_on = None - - -def upgrade(): - op.drop_table('semaphor') - - -def downgrade(): - op.create_table( - 'semaphor', - sa.Column('id', sa.INTEGER(), nullable=False), - sa.Column('updated_at', sa.DATETIME(), nullable=True), - sa.Column('created_at', sa.DATETIME(), nullable=True), - sa.Column('entity_type', sa.VARCHAR(), nullable=True), - sa.Column('entity_id', sa.INTEGER(), nullable=True), - sa.Column('action', sa.VARCHAR(), nullable=True), - sa.Column('company_id', sa.INTEGER(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('entity_type', 'entity_id', name='uniq_const') - ) diff --git a/mindsdb/migrations/versions/2024-06-06_cbedc4968d5d_store_llm_data.py b/mindsdb/migrations/versions/2024-06-06_cbedc4968d5d_store_llm_data.py deleted file mode 100644 index 5a03ca88e20..00000000000 --- a/mindsdb/migrations/versions/2024-06-06_cbedc4968d5d_store_llm_data.py +++ /dev/null @@ -1,34 +0,0 @@ -"""store llm data - -Revision ID: cbedc4968d5d -Revises: 2958416fbe75 -Create Date: 2024-06-06 13:59:45.158089 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = 'cbedc4968d5d' -down_revision = '2958416fbe75' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'llm_data', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('input', sa.String(), nullable=False), - sa.Column('output', sa.String(), nullable=False), - sa.Column('model_id', sa.String(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()), - sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), onupdate=sa.func.now()), - sa.PrimaryKeyConstraint('id') - ) - - -def downgrade(): - op.drop_table('llm_data') diff --git a/mindsdb/migrations/versions/2024-07-09_bfc6f44f5bc9_agent_model_optional.py b/mindsdb/migrations/versions/2024-07-09_bfc6f44f5bc9_agent_model_optional.py deleted file mode 100644 index 967b4555c72..00000000000 --- a/mindsdb/migrations/versions/2024-07-09_bfc6f44f5bc9_agent_model_optional.py +++ /dev/null @@ -1,50 +0,0 @@ -"""agent_model_optional - -Revision ID: bfc6f44f5bc9 -Revises: 2958416fbe75 -Create Date: 2024-07-09 14:50:40.193842 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = 'bfc6f44f5bc9' -down_revision = 'cbedc4968d5d' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('agents', schema=None) as batch_op: - batch_op.alter_column( - 'model_name', - existing_type=sa.VARCHAR(), - nullable=True - ) - - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.alter_column( - 'model_name', - existing_type=sa.VARCHAR(), - nullable=True - ) - - -def downgrade(): - - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.alter_column( - 'model_name', - existing_type=sa.VARCHAR(), - nullable=False - ) - - with op.batch_alter_table('agents', schema=None) as batch_op: - batch_op.alter_column( - 'model_name', - existing_type=sa.VARCHAR(), - nullable=False - ) diff --git a/mindsdb/migrations/versions/2024-07-19_45eb2eb61f70_add_provider_to_agent.py b/mindsdb/migrations/versions/2024-07-19_45eb2eb61f70_add_provider_to_agent.py deleted file mode 100644 index 7400a73df2f..00000000000 --- a/mindsdb/migrations/versions/2024-07-19_45eb2eb61f70_add_provider_to_agent.py +++ /dev/null @@ -1,47 +0,0 @@ -"""add provider to agent - -Revision ID: 45eb2eb61f70 -Revises: 459a4cd24933 -Create Date: 2024-07-19 00:48:47.629700 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import table, select, update -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '45eb2eb61f70' -down_revision = 'bfc6f44f5bc9' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('agents', schema=None) as batch_op: - batch_op.add_column(sa.Column('provider', sa.String(), nullable=True)) - - # code for migrating 'provider' from 'params' to its own column - agents = table('agents', - sa.Column('id', sa.Integer, primary_key=True), - sa.Column('params', sa.JSON), - sa.Column('model_name', sa.String()), - sa.Column('provider', sa.String())) - - conn = op.get_bind() - for agent in conn.execute(select(agents)): - if agent.params and 'provider' in agent.params: - conn.execute(update(agents).where(agents.c.id == agent.id).values(provider=agent.params['provider'])) - if agent.model_name is None: - conn.execute(update(agents).where(agents.c.id == agent.id).values(model_name=agent.params['model_name'])) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('agents', schema=None) as batch_op: - batch_op.drop_column('provider') - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2024-08-12_8e17ff6b75e9_agents_deleted_at.py b/mindsdb/migrations/versions/2024-08-12_8e17ff6b75e9_agents_deleted_at.py deleted file mode 100644 index 39cad18f8e0..00000000000 --- a/mindsdb/migrations/versions/2024-08-12_8e17ff6b75e9_agents_deleted_at.py +++ /dev/null @@ -1,42 +0,0 @@ -"""agents-deleted-at - -Revision ID: 8e17ff6b75e9 -Revises: 45eb2eb61f70 -Create Date: 2024-08-12 19:13:44.327111 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '8e17ff6b75e9' -down_revision = '45eb2eb61f70' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('agents', schema=None) as batch_op: - batch_op.add_column(sa.Column('deleted_at', sa.DateTime(), nullable=True)) - - with op.batch_alter_table('skills', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_at', sa.DateTime(), nullable=True)) - batch_op.add_column(sa.Column('updated_at', sa.DateTime(), nullable=True)) - batch_op.add_column(sa.Column('deleted_at', sa.DateTime(), nullable=True)) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('skills', schema=None) as batch_op: - batch_op.drop_column('deleted_at') - batch_op.drop_column('updated_at') - batch_op.drop_column('created_at') - - with op.batch_alter_table('agents', schema=None) as batch_op: - batch_op.drop_column('deleted_at') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2024-10-07_6c57ed39a82b_added_webhook_token_to_chat_bots.py b/mindsdb/migrations/versions/2024-10-07_6c57ed39a82b_added_webhook_token_to_chat_bots.py deleted file mode 100644 index 2189edbc259..00000000000 --- a/mindsdb/migrations/versions/2024-10-07_6c57ed39a82b_added_webhook_token_to_chat_bots.py +++ /dev/null @@ -1,27 +0,0 @@ -"""added webhook_token to chat_bots - -Revision ID: 6c57ed39a82b -Revises: 8e17ff6b75e9 -Create Date: 2024-10-07 16:40:14.141878 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '6c57ed39a82b' -down_revision = '8e17ff6b75e9' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.add_column(sa.Column('webhook_token', sa.VARCHAR(), nullable=True)) - - -def downgrade(): - with op.batch_alter_table('chat_bots', schema=None) as batch_op: - batch_op.drop_column('webhook_token') diff --git a/mindsdb/migrations/versions/2024-11-15_9d559f68d535_add_llm_log_columns.py b/mindsdb/migrations/versions/2024-11-15_9d559f68d535_add_llm_log_columns.py deleted file mode 100644 index adf726145f7..00000000000 --- a/mindsdb/migrations/versions/2024-11-15_9d559f68d535_add_llm_log_columns.py +++ /dev/null @@ -1,41 +0,0 @@ -"""add_llm_log_columns - -Revision ID: 9d559f68d535 -Revises: 6c57ed39a82b -Create Date: 2024-11-15 11:24:28.808881 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '9d559f68d535' -down_revision = '6c57ed39a82b' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('llm_log', schema=None) as batch_op: - batch_op.alter_column('company_id', nullable=False) - batch_op.alter_column('model_id', nullable=True) - batch_op.add_column(sa.Column('model_group', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('cost', sa.Numeric(), nullable=True)) - batch_op.add_column(sa.Column('exception', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('traceback', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('stream', sa.Boolean(), default=False)) - batch_op.add_column(sa.Column('metadata', sa.JSON(), nullable=True)) - - -def downgrade(): - with op.batch_alter_table('llm_log', schema=None) as batch_op: - batch_op.alter_column('company_id', nullable=True) - batch_op.alter_column('model_id', nullable=False) - batch_op.drop_column('model_group') - batch_op.drop_column('cost') - batch_op.drop_column('exception') - batch_op.drop_column('traceback') - batch_op.drop_column('stream') - batch_op.drop_column('metadata') diff --git a/mindsdb/migrations/versions/2024-11-19_0f89b523f346_agent_skills_parameters.py b/mindsdb/migrations/versions/2024-11-19_0f89b523f346_agent_skills_parameters.py deleted file mode 100644 index b3aa172912c..00000000000 --- a/mindsdb/migrations/versions/2024-11-19_0f89b523f346_agent_skills_parameters.py +++ /dev/null @@ -1,28 +0,0 @@ -"""agent_skills_parameters - -Revision ID: 0f89b523f346 -Revises: 9d559f68d535 -Create Date: 2024-11-13 15:24:39.796947 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -from sqlalchemy.dialects import sqlite # noqa - -# revision identifiers, used by Alembic. -revision = '0f89b523f346' -down_revision = '9d559f68d535' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('agent_skills', schema=None) as batch_op: - batch_op.add_column(sa.Column('parameters', sa.JSON(), nullable=True)) - - -def downgrade(): - with op.batch_alter_table('agent_skills', schema=None) as batch_op: - batch_op.drop_column('parameters') diff --git a/mindsdb/migrations/versions/2024-11-28_a8a3fac369e7_llm_log_json_in_out.py b/mindsdb/migrations/versions/2024-11-28_a8a3fac369e7_llm_log_json_in_out.py deleted file mode 100644 index 6ec8e05b07d..00000000000 --- a/mindsdb/migrations/versions/2024-11-28_a8a3fac369e7_llm_log_json_in_out.py +++ /dev/null @@ -1,103 +0,0 @@ -"""llm_log_json_in_out - -Revision ID: a8a3fac369e7 -Revises: 0f89b523f346 -Create Date: 2024-11-28 17:19:20.798803 - -""" -import json - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.sql import table -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = 'a8a3fac369e7' -down_revision = '0f89b523f346' -branch_labels = None -depends_on = None - - -def upgrade(): - llm_log_table = table( - 'llm_log', - sa.Column('id', sa.Integer), - sa.Column('input', sa.String), - sa.Column('output', sa.String), - sa.Column('input_json', sa.JSON), - sa.Column('output_json', sa.JSON) - ) - - with op.batch_alter_table('llm_log', schema=None) as batch_op: - batch_op.add_column(sa.Column('input_json', sa.JSON(), nullable=True)) - batch_op.add_column(sa.Column('output_json', sa.JSON(), nullable=True)) - - connection = op.get_bind() - for row in connection.execute(llm_log_table.select()): - try: - input_json = json.loads(row.input) - except Exception: - input_json = None - - output_json = None - try: - if row.output is not None: - output_json = [str(row.output)] - except Exception: - pass - - connection.execute( - llm_log_table.update().where( - llm_log_table.c.id == row.id - ).values(input_json=input_json, output_json=output_json) - ) - - with op.batch_alter_table('llm_log', schema=None) as batch_op: - batch_op.drop_column('input') - batch_op.alter_column('input_json', new_column_name='input') - batch_op.drop_column('output') - batch_op.alter_column('output_json', new_column_name='output') - - -def downgrade(): - llm_log_table = table( - 'llm_log', - sa.Column('id', sa.Integer), - sa.Column('input', sa.JSON), - sa.Column('output', sa.JSON), - sa.Column('input_str', sa.String), - sa.Column('output_str', sa.String) - ) - - with op.batch_alter_table('llm_log', schema=None) as batch_op: - batch_op.add_column(sa.Column('input_str', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('output_str', sa.String(), nullable=True)) - - connection = op.get_bind() - for row in connection.execute(llm_log_table.select()): - input_str = None - if row.input is not None: - try: - input_str = json.dumps(row.input) - except Exception: - pass - - output_str = None - if isinstance(row.output, list): - try: - output_str = '\n'.join(row.output) - except Exception: - pass - - connection.execute( - llm_log_table.update().where( - llm_log_table.c.id == row.id - ).values(input_str=input_str, output_str=output_str) - ) - - with op.batch_alter_table('llm_log', schema=None) as batch_op: - batch_op.drop_column('input') - batch_op.alter_column('input_str', new_column_name='input') - batch_op.drop_column('output') - batch_op.alter_column('output_str', new_column_name='output') diff --git a/mindsdb/migrations/versions/2024-11-29_f6dc924079fa_predictor_training_metadata.py b/mindsdb/migrations/versions/2024-11-29_f6dc924079fa_predictor_training_metadata.py deleted file mode 100644 index 655bc0866aa..00000000000 --- a/mindsdb/migrations/versions/2024-11-29_f6dc924079fa_predictor_training_metadata.py +++ /dev/null @@ -1,44 +0,0 @@ -"""predictor_training_metadata - -Revision ID: f6dc924079fa -Revises: a8a3fac369e7 -Create Date: 2024-11-29 15:06:47.269229 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = 'f6dc924079fa' -down_revision = 'a8a3fac369e7' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('training_metadata', sa.JSON(), nullable=True)) - batch_op.drop_column('is_custom') - batch_op.drop_column('hostname') - - predictor_table = sa.Table( - 'predictor', - sa.MetaData(), - sa.Column('id', sa.Integer()), - sa.Column('training_metadata', sa.JSON()) - ) - - op.execute(predictor_table.update().values(training_metadata={})) - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.alter_column( - 'training_metadata', - nullable=False - ) - - -def downgrade(): - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.add_column(sa.Column('hostname', sa.VARCHAR(), nullable=True)) - batch_op.add_column(sa.Column('is_custom', sa.BOOLEAN(), nullable=True)) - batch_op.drop_column('training_metadata') diff --git a/mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py b/mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py deleted file mode 100644 index cda75c31947..00000000000 --- a/mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +++ /dev/null @@ -1,88 +0,0 @@ -"""project-company - -Revision ID: c06c35f7e8e1 -Revises: f6dc924079fa -Create Date: 2025-01-15 14:14:29.295834 - -""" -from collections import defaultdict - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa -from mindsdb.utilities import log - -# revision identifiers, used by Alembic. -revision = 'c06c35f7e8e1' -down_revision = 'f6dc924079fa' -branch_labels = None -depends_on = None - - -logger = log.getLogger(__name__) - - -def upgrade(): - - """ - convert company_id from null to 0 to make constrain works - duplicated names are renamed - """ - - conn = op.get_bind() - table = sa.Table( - 'project', - sa.MetaData(), - sa.Column('id', sa.Integer()), - sa.Column('name', sa.String()), - sa.Column('company_id', sa.Integer()), - ) - - data = conn.execute( - table - .select() - .where(table.c.company_id == sa.null()) - ).fetchall() - - names = defaultdict(list) - for id, name, _ in data: - names[name].append(id) - - # get duplicated - for name, ids in names.items(): - if len(ids) == 1: - continue - - # rename all except first - for id in ids[1:]: - new_name = f'{name}__{id}' - - op.execute( - table - .update() - .where(table.c.id == id) - .values({'name': new_name}) - ) - logger.warning(f'Found duplicated project name: {name}, renamed to: {new_name}') - - op.execute( - table - .update() - .where(table.c.company_id == sa.null()) - .values({'company_id': 0}) - ) - - -def downgrade(): - table = sa.Table( - 'project', - sa.MetaData(), - sa.Column('company_id', sa.Integer()) - ) - - op.execute( - table - .update() - .where(table.c.company_id == 0) - .values({'company_id': sa.null()}) - ) diff --git a/mindsdb/migrations/versions/2025-02-09_4943359e354a_file_metadata.py b/mindsdb/migrations/versions/2025-02-09_4943359e354a_file_metadata.py deleted file mode 100644 index 3f7a4880805..00000000000 --- a/mindsdb/migrations/versions/2025-02-09_4943359e354a_file_metadata.py +++ /dev/null @@ -1,31 +0,0 @@ -"""file_metadata - -Revision ID: 4943359e354a -Revises: c06c35f7e8e1 -Create Date: 2025-02-09 10:10:55.577407 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '4943359e354a' -down_revision = 'c06c35f7e8e1' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('file', schema=None) as batch_op: - batch_op.add_column(sa.Column('metadata', sa.JSON(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('file', schema=None) as batch_op: - batch_op.drop_column('metadata') - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py b/mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py deleted file mode 100644 index 4552a13c592..00000000000 --- a/mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py +++ /dev/null @@ -1,33 +0,0 @@ -"""del_log_table - -Revision ID: 6ab9903fc59a -Revises: 4943359e354a -Create Date: 2025-02-10 16:50:27.186697 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = '6ab9903fc59a' -down_revision = '4943359e354a' -branch_labels = None -depends_on = None - - -def upgrade(): - op.drop_table('log') - - -def downgrade(): - op.create_table( - 'log', - sa.Column('id', sa.INTEGER(), nullable=False), - sa.Column('created_at', sa.DATETIME(), nullable=True), - sa.Column('log_type', sa.VARCHAR(), nullable=True), - sa.Column('source', sa.VARCHAR(), nullable=True), - sa.Column('company_id', sa.INTEGER(), nullable=True), - sa.Column('payload', sa.VARCHAR(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) diff --git a/mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py b/mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py deleted file mode 100644 index 0725283cf13..00000000000 --- a/mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +++ /dev/null @@ -1,29 +0,0 @@ -"""added_encrypted_content_to_json_storage - -Revision ID: 4521dafe89ab -Revises: 6ab9903fc59a -Create Date: 2025-02-14 12:05:13.102594 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '4521dafe89ab' -down_revision = '6ab9903fc59a' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('json_storage', schema=None) as batch_op: - batch_op.add_column(sa.Column('encrypted_content', sa.LargeBinary(), nullable=True)) - batch_op.alter_column('resource_id', existing_type=sa.Integer(), type_=sa.BigInteger()) - - -def downgrade(): - with op.batch_alter_table('json_storage', schema=None) as batch_op: - batch_op.drop_column('encrypted_content') - batch_op.alter_column('resource_id', existing_type=sa.BigInteger(), type_=sa.Integer()) diff --git a/mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py b/mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py deleted file mode 100644 index d32226a543c..00000000000 --- a/mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +++ /dev/null @@ -1,41 +0,0 @@ -"""added_metadata_to_projects - -Revision ID: 11347c213b36 -Revises: 4521dafe89ab -Create Date: 2025-02-19 18:46:24.014843 - -""" -from alembic import op -from sqlalchemy.orm.attributes import flag_modified -import sqlalchemy as sa - -import mindsdb.interfaces.storage.db as db -from mindsdb.utilities.config import config - - -# revision identifiers, used by Alembic. -revision = '11347c213b36' -down_revision = '4521dafe89ab' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('project', schema=None) as batch_op: - batch_op.add_column(sa.Column('metadata', sa.JSON(), nullable=True)) - - conn = op.get_bind() - session = sa.orm.Session(bind=conn) - session.commit() - - project = session.query(db.Project).filter_by(name='mindsdb').first() - if project: - project.name = config.get('default_project') - project.metadata_ = {"is_default": True} - flag_modified(project, 'metadata_') - session.commit() - - -def downgrade(): - with op.batch_alter_table('project', schema=None) as batch_op: - batch_op.drop_column('metadata') diff --git a/mindsdb/migrations/versions/2025-03-21_fda503400e43_queries.py b/mindsdb/migrations/versions/2025-03-21_fda503400e43_queries.py deleted file mode 100644 index 94d3636fb3e..00000000000 --- a/mindsdb/migrations/versions/2025-03-21_fda503400e43_queries.py +++ /dev/null @@ -1,45 +0,0 @@ -"""queries - -Revision ID: fda503400e43 -Revises: 11347c213b36 -Create Date: 2025-03-21 18:50:20.795930 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = 'fda503400e43' -down_revision = '11347c213b36' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - 'queries', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('sql', sa.String(), nullable=False), - sa.Column('started_at', sa.DateTime(), nullable=True), - sa.Column('finished_at', sa.DateTime(), nullable=True), - sa.Column('parameters', sa.JSON(), nullable=True), - sa.Column('context', sa.JSON(), nullable=True), - sa.Column('processed_rows', sa.Integer(), nullable=True), - sa.Column('error', sa.String(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - - with op.batch_alter_table('knowledge_base', schema=None) as batch_op: - batch_op.add_column(sa.Column('query_id', sa.INTEGER(), nullable=True)) - - -def downgrade(): - with op.batch_alter_table('knowledge_base', schema=None) as batch_op: - batch_op.drop_column('query_id') - - op.drop_table('queries') diff --git a/mindsdb/migrations/versions/2025-04-22_53502b6d63bf_query_database.py b/mindsdb/migrations/versions/2025-04-22_53502b6d63bf_query_database.py deleted file mode 100644 index eaae069429f..00000000000 --- a/mindsdb/migrations/versions/2025-04-22_53502b6d63bf_query_database.py +++ /dev/null @@ -1,27 +0,0 @@ -"""query_database - -Revision ID: 53502b6d63bf -Revises: fda503400e43 -Create Date: 2025-04-22 16:30:15.139978 - -""" -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '53502b6d63bf' -down_revision = 'fda503400e43' -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table('queries', schema=None) as batch_op: - batch_op.add_column(sa.Column('database', sa.String(), nullable=True)) - - -def downgrade(): - with op.batch_alter_table('queries', schema=None) as batch_op: - batch_op.drop_column('database') diff --git a/mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py b/mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py deleted file mode 100644 index 9384679e4ac..00000000000 --- a/mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +++ /dev/null @@ -1,360 +0,0 @@ -"""checkpoint_1 - -Revision ID: 9f150e4f9a05 -Revises: 53502b6d63bf -Create Date: 2025-05-21 12:25:55.556388 - -""" -import datetime - -from alembic.operations import Operations -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = '9f150e4f9a05' -down_revision = '53502b6d63bf' -branch_labels = None -depends_on = None - - -def upgrade(op: Operations = None): - # region skip migration if it is existing app, apply if it is new app - if op is None: - # 'op' is passed only from migrate.py when applying checkpoin migration - return - connection = op.get_bind() - inspector = sa.inspect(connection) - if 'alembic_version' in inspector.get_table_names(): - # If version_num exists, then it is existing app - result = connection.execute(sa.text("SELECT version_num FROM alembic_version")) - current_version = result.scalar() - if current_version is not None: - return - # endregion - - op.create_table( - 'agents', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('user_class', sa.Integer(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('model_name', sa.String(), nullable=True), - sa.Column('provider', sa.String(), nullable=True), - sa.Column('params', sa.JSON(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('deleted_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'chat_bots_history', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('chat_bot_id', sa.Integer(), nullable=False), - sa.Column('type', sa.String(), nullable=True), - sa.Column('text', sa.String(), nullable=True), - sa.Column('user', sa.String(), nullable=True), - sa.Column('destination', sa.String(), nullable=True), - sa.Column('sent_at', sa.DateTime(), nullable=True), - sa.Column('error', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'file', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('source_file_path', sa.String(), nullable=False), - sa.Column('file_path', sa.String(), nullable=False), - sa.Column('row_count', sa.Integer(), nullable=False), - sa.Column('columns', mindsdb.interfaces.storage.db.Json(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('metadata', sa.JSON(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_file_name_company_id') - ) - op.create_table( - 'integration', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('engine', sa.String(), nullable=False), - sa.Column('data', mindsdb.interfaces.storage.db.Json(), nullable=True), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_integration_name_company_id') - ) - op.create_table( - 'jobs', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('user_class', sa.Integer(), nullable=True), - sa.Column('active', sa.Boolean(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('query_str', sa.String(), nullable=False), - sa.Column('if_query_str', sa.String(), nullable=True), - sa.Column('start_at', sa.DateTime(), nullable=True), - sa.Column('end_at', sa.DateTime(), nullable=True), - sa.Column('next_run_at', sa.DateTime(), nullable=True), - sa.Column('schedule_str', sa.String(), nullable=True), - sa.Column('deleted_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'jobs_history', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('job_id', sa.Integer(), nullable=True), - sa.Column('query_str', sa.String(), nullable=True), - sa.Column('start_at', sa.DateTime(), nullable=True), - sa.Column('end_at', sa.DateTime(), nullable=True), - sa.Column('error', sa.String(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('job_id', 'start_at', name='uniq_job_history_job_id_start') - ) - op.create_table( - 'json_storage', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('resource_group', sa.String(), nullable=True), - sa.Column('resource_id', sa.Integer(), nullable=True), - sa.Column('name', sa.String(), nullable=True), - sa.Column('content', sa.JSON(), nullable=True), - sa.Column('encrypted_content', sa.LargeBinary(), nullable=True), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'llm_data', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('input', sa.String(), nullable=False), - sa.Column('output', sa.String(), nullable=False), - sa.Column('model_id', sa.Integer(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'llm_log', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=False), - sa.Column('api_key', sa.String(), nullable=True), - sa.Column('model_id', sa.Integer(), nullable=True), - sa.Column('model_group', sa.String(), nullable=True), - sa.Column('input', sa.JSON(), nullable=True), - sa.Column('output', sa.JSON(), nullable=True), - sa.Column('start_time', sa.DateTime(), nullable=False), - sa.Column('end_time', sa.DateTime(), nullable=True), - sa.Column('cost', sa.Numeric(precision=5, scale=2), nullable=True), - sa.Column('prompt_tokens', sa.Integer(), nullable=True), - sa.Column('completion_tokens', sa.Integer(), nullable=True), - sa.Column('total_tokens', sa.Integer(), nullable=True), - sa.Column('success', sa.Boolean(), nullable=False), - sa.Column('exception', sa.String(), nullable=True), - sa.Column('traceback', sa.String(), nullable=True), - sa.Column('stream', sa.Boolean(), nullable=True, comment="Is this completion done in 'streaming' mode"), - sa.Column('metadata', sa.JSON(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'project', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('deleted_at', sa.DateTime(), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('metadata', sa.JSON(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_project_name_company_id') - ) - op.create_table( - 'queries', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('sql', sa.String(), nullable=False), - sa.Column('database', sa.String(), nullable=True), - sa.Column('started_at', sa.DateTime(), nullable=True), - sa.Column('finished_at', sa.DateTime(), nullable=True), - sa.Column('parameters', sa.JSON(), nullable=True), - sa.Column('context', sa.JSON(), nullable=True), - sa.Column('processed_rows', sa.Integer(), nullable=True), - sa.Column('error', sa.String(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'query_context', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('query', sa.String(), nullable=False), - sa.Column('context_name', sa.String(), nullable=False), - sa.Column('values', sa.JSON(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'skills', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('type', sa.String(), nullable=False), - sa.Column('params', sa.JSON(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('deleted_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'tasks', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('user_class', sa.Integer(), nullable=True), - sa.Column('object_type', sa.String(), nullable=False), - sa.Column('object_id', sa.Integer(), nullable=False), - sa.Column('last_error', sa.String(), nullable=True), - sa.Column('active', sa.Boolean(), nullable=True), - sa.Column('reload', sa.Boolean(), nullable=True), - sa.Column('run_by', sa.String(), nullable=True), - sa.Column('alive_time', sa.DateTime(timezone=True), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'triggers', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('database_id', sa.Integer(), nullable=False), - sa.Column('table_name', sa.String(), nullable=False), - sa.Column('query_str', sa.String(), nullable=False), - sa.Column('columns', sa.String(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'agent_skills', - sa.Column('agent_id', sa.Integer(), nullable=False), - sa.Column('skill_id', sa.Integer(), nullable=False), - sa.Column('parameters', sa.JSON(), nullable=True), - sa.ForeignKeyConstraint(['agent_id'], ['agents.id'], ), - sa.ForeignKeyConstraint(['skill_id'], ['skills.id'], ), - sa.PrimaryKeyConstraint('agent_id', 'skill_id') - ) - op.create_table( - 'chat_bots', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('agent_id', sa.Integer(), nullable=True), - sa.Column('model_name', sa.String(), nullable=True), - sa.Column('database_id', sa.Integer(), nullable=True), - sa.Column('params', sa.JSON(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('webhook_token', sa.String(), nullable=True), - sa.ForeignKeyConstraint(['agent_id'], ['agents.id'], name='fk_agent_id'), - sa.PrimaryKeyConstraint('id') - ) - op.create_table( - 'predictor', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('deleted_at', sa.DateTime(), nullable=True), - sa.Column('name', sa.String(), nullable=True), - sa.Column('data', mindsdb.interfaces.storage.db.Json(), nullable=True), - sa.Column('to_predict', mindsdb.interfaces.storage.db.Array(), nullable=True), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('mindsdb_version', sa.String(), nullable=True), - sa.Column('native_version', sa.String(), nullable=True), - sa.Column('integration_id', sa.Integer(), nullable=True), - sa.Column('data_integration_ref', mindsdb.interfaces.storage.db.Json(), nullable=True), - sa.Column('fetch_data_query', sa.String(), nullable=True), - sa.Column('learn_args', mindsdb.interfaces.storage.db.Json(), nullable=True), - sa.Column('update_status', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.Column('active', sa.Boolean(), nullable=True), - sa.Column('training_data_columns_count', sa.Integer(), nullable=True), - sa.Column('training_data_rows_count', sa.Integer(), nullable=True), - sa.Column('training_start_at', sa.DateTime(), nullable=True), - sa.Column('training_stop_at', sa.DateTime(), nullable=True), - sa.Column('label', sa.String(), nullable=True), - sa.Column('version', sa.Integer(), nullable=True), - sa.Column('code', sa.String(), nullable=True), - sa.Column('lightwood_version', sa.String(), nullable=True), - sa.Column('dtype_dict', mindsdb.interfaces.storage.db.Json(), nullable=True), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('training_phase_current', sa.Integer(), nullable=True), - sa.Column('training_phase_total', sa.Integer(), nullable=True), - sa.Column('training_phase_name', sa.String(), nullable=True), - sa.Column('training_metadata', sa.JSON(), nullable=False), - sa.ForeignKeyConstraint(['integration_id'], ['integration.id'], name='fk_integration_id'), - sa.ForeignKeyConstraint(['project_id'], ['project.id'], name='fk_project_id'), - sa.PrimaryKeyConstraint('id') - ) - with op.batch_alter_table('predictor', schema=None) as batch_op: - batch_op.create_index('predictor_index', ['company_id', 'name', 'version', 'active', 'deleted_at'], unique=True) - - op.create_table( - 'view', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('company_id', sa.Integer(), nullable=True), - sa.Column('query', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint(['project_id'], ['project.id'], name='fk_project_id'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'company_id', name='unique_view_name_company_id') - ) - op.create_table( - 'knowledge_base', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('project_id', sa.Integer(), nullable=False), - sa.Column('params', sa.JSON(), nullable=True), - sa.Column('vector_database_id', sa.Integer(), nullable=True), - sa.Column('vector_database_table', sa.String(), nullable=True), - sa.Column('embedding_model_id', sa.Integer(), nullable=True), - sa.Column('query_id', sa.Integer(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint(['embedding_model_id'], ['predictor.id'], name='fk_knowledge_base_embedding_model_id'), - sa.ForeignKeyConstraint(['vector_database_id'], ['integration.id'], name='fk_knowledge_base_vector_database_id'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name', 'project_id', name='unique_knowledge_base_name_project_id') - ) - - # Insert default project - op.bulk_insert( - sa.table( - 'project', - sa.Column('name', sa.String()), - sa.Column('company_id', sa.Integer()), - sa.Column('metadata', sa.JSON()), - sa.Column('created_at', sa.DateTime()) - ), - [{ - 'name': 'mindsdb', - 'company_id': 0, - 'metadata': {'is_default': True}, - 'created_at': datetime.datetime.now() - }] - ) - - -def downgrade(): - # do nothging, since it is checkpoint migration - pass diff --git a/mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py b/mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py deleted file mode 100644 index bbc6c3fb9b0..00000000000 --- a/mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +++ /dev/null @@ -1,118 +0,0 @@ -"""added data catalog tables - -Revision ID: a44643042fe8 -Revises: 9f150e4f9a05 -Create Date: 2025-05-28 17:20:57.300313 - -""" - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa -from mindsdb.interfaces.storage.db import Array - - -# revision identifiers, used by Alembic. -revision = "a44643042fe8" -down_revision = "9f150e4f9a05" -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - "meta_tables", - sa.Column("id", sa.Integer(), primary_key=True), - sa.Column( - "integration_id", - sa.Integer(), - sa.ForeignKey("integration.id"), - nullable=False, - ), - sa.Column("name", sa.String(), nullable=False), - sa.Column("schema", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("type", sa.String(), nullable=True), - sa.Column("row_count", sa.Integer(), nullable=True), - ) - - op.create_table( - "meta_columns", - sa.Column("id", sa.Integer(), primary_key=True), - sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), nullable=False), - sa.Column("name", sa.String(), nullable=False), - sa.Column("data_type", sa.String(), nullable=False), - sa.Column("default_value", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("is_nullable", sa.Boolean(), nullable=True), - ) - - op.create_table( - "meta_column_statistics", - sa.Column( - "column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column("most_common_values", Array(), nullable=True), - sa.Column("most_common_frequencies", Array(), nullable=True), - sa.Column("null_percentage", sa.Numeric(5, 2), nullable=True), - sa.Column("distinct_values_count", sa.Integer(), nullable=True), - sa.Column("minimum_value", sa.String(), nullable=True), - sa.Column("maximum_value", sa.String(), nullable=True), - ) - - op.create_table( - "meta_primary_keys", - sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), primary_key=True), - sa.Column( - "column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column("ordinal_position", sa.Integer(), nullable=True), - sa.Column("constraint_name", sa.String(), nullable=True), - ) - - op.create_table( - "meta_foreign_keys", - sa.Column( - "parent_table_id", - sa.Integer(), - sa.ForeignKey("meta_tables.id"), - primary_key=True, - ), - sa.Column( - "parent_column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column( - "child_table_id", - sa.Integer(), - sa.ForeignKey("meta_tables.id"), - primary_key=True, - ), - sa.Column( - "child_column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column("constraint_name", sa.String(), nullable=True), - ) - - -def downgrade(): - op.drop_table("meta_tables") - - op.drop_table("meta_columns") - - op.drop_table("meta_column_statistics") - - op.drop_table("meta_primary_keys") - - op.drop_table("meta_foreign_keys") diff --git a/mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py b/mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py deleted file mode 100644 index 824123d756f..00000000000 --- a/mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +++ /dev/null @@ -1,58 +0,0 @@ -"""updated data catalog data types - -Revision ID: 608e376c19a7 -Revises: a44643042fe8 -Create Date: 2025-06-09 23:20:34.739735 - -""" - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = "608e376c19a7" -down_revision = "a44643042fe8" -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table("meta_tables", schema=None) as batch_op: - batch_op.alter_column( - "row_count", - type_=sa.BigInteger(), - existing_type=sa.Integer(), - existing_nullable=True, - existing_server_default=None, - ) - - with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op: - batch_op.alter_column( - "distinct_values_count", - type_=sa.BigInteger(), - existing_type=sa.Integer(), - existing_nullable=True, - existing_server_default=None, - ) - - -def downgrade(): - with op.batch_alter_table("meta_tables", schema=None) as batch_op: - batch_op.alter_column( - "row_count", - type_=sa.Integer(), - existing_type=sa.BigInteger(), - existing_nullable=True, - existing_server_default=None, - ) - - with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op: - batch_op.alter_column( - "distinct_values_count", - type_=sa.Integer(), - existing_type=sa.BigInteger(), - existing_nullable=True, - existing_server_default=None, - ) diff --git a/mindsdb/migrations/versions/2025-10-24_54ed56beb47a_convert_company_id_to_string.py b/mindsdb/migrations/versions/2025-10-24_54ed56beb47a_convert_company_id_to_string.py deleted file mode 100644 index c504e24bd8c..00000000000 --- a/mindsdb/migrations/versions/2025-10-24_54ed56beb47a_convert_company_id_to_string.py +++ /dev/null @@ -1,106 +0,0 @@ -"""convert_company_id_to_string - -Revision ID: 54ed56beb47a -Revises: 608e376c19a7 -Create Date: 2025-10-24 15:05:30.187143 - -""" - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - - -# revision identifiers, used by Alembic. -revision = "54ed56beb47a" -down_revision = "608e376c19a7" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table("agents", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("file", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("integration", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("jobs", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("jobs_history", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("json_storage", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("llm_log", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=False) - - with op.batch_alter_table("predictor", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("project", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("queries", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("query_context", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("tasks", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - with op.batch_alter_table("view", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.INTEGER(), type_=sa.String(), existing_nullable=True) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table("view", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("tasks", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("query_context", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("queries", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("project", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("predictor", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("llm_log", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=False) - - with op.batch_alter_table("json_storage", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("jobs_history", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("jobs", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("integration", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("file", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - with op.batch_alter_table("agents", schema=None) as batch_op: - batch_op.alter_column("company_id", existing_type=sa.String(), type_=sa.INTEGER(), existing_nullable=True) - - # ### end Alembic commands ### diff --git a/mindsdb/migrations/versions/2025-10-27_86b172b78a5b_removed_data_catalog_tables.py b/mindsdb/migrations/versions/2025-10-27_86b172b78a5b_removed_data_catalog_tables.py deleted file mode 100644 index 6438103da75..00000000000 --- a/mindsdb/migrations/versions/2025-10-27_86b172b78a5b_removed_data_catalog_tables.py +++ /dev/null @@ -1,114 +0,0 @@ -"""removed_data_catalog_tables - -Revision ID: 86b172b78a5b -Revises: 54ed56beb47a -Create Date: 2025-10-27 11:14:14.671837 - -""" - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa -from mindsdb.interfaces.storage.db import Array - - -# revision identifiers, used by Alembic. -revision = "86b172b78a5b" -down_revision = "54ed56beb47a" -branch_labels = None -depends_on = None - - -def upgrade(): - op.drop_table("meta_foreign_keys") - op.drop_table("meta_primary_keys") - op.drop_table("meta_column_statistics") - op.drop_table("meta_columns") - op.drop_table("meta_tables") - - -def downgrade(): - op.create_table( - "meta_tables", - sa.Column("id", sa.Integer(), primary_key=True), - sa.Column( - "integration_id", - sa.Integer(), - sa.ForeignKey("integration.id"), - nullable=False, - ), - sa.Column("name", sa.String(), nullable=False), - sa.Column("schema", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("type", sa.String(), nullable=True), - sa.Column("row_count", sa.BigInteger(), nullable=True), # Updated data type - ) - - op.create_table( - "meta_columns", - sa.Column("id", sa.Integer(), primary_key=True), - sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), nullable=False), - sa.Column("name", sa.String(), nullable=False), - sa.Column("data_type", sa.String(), nullable=False), - sa.Column("default_value", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("is_nullable", sa.Boolean(), nullable=True), - ) - - op.create_table( - "meta_column_statistics", - sa.Column( - "column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column("most_common_values", Array(), nullable=True), - sa.Column("most_common_frequencies", Array(), nullable=True), - sa.Column("null_percentage", sa.Numeric(5, 2), nullable=True), - sa.Column("distinct_values_count", sa.BigInteger(), nullable=True), # Updated data type - sa.Column("minimum_value", sa.String(), nullable=True), - sa.Column("maximum_value", sa.String(), nullable=True), - ) - - op.create_table( - "meta_primary_keys", - sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), primary_key=True), - sa.Column( - "column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column("ordinal_position", sa.Integer(), nullable=True), - sa.Column("constraint_name", sa.String(), nullable=True), - ) - - op.create_table( - "meta_foreign_keys", - sa.Column( - "parent_table_id", - sa.Integer(), - sa.ForeignKey("meta_tables.id"), - primary_key=True, - ), - sa.Column( - "parent_column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column( - "child_table_id", - sa.Integer(), - sa.ForeignKey("meta_tables.id"), - primary_key=True, - ), - sa.Column( - "child_column_id", - sa.Integer(), - sa.ForeignKey("meta_columns.id"), - primary_key=True, - ), - sa.Column("constraint_name", sa.String(), nullable=True), - ) diff --git a/mindsdb/migrations/versions/2026-01-12_f64112749455_add_user_id_field.py b/mindsdb/migrations/versions/2026-01-12_f64112749455_add_user_id_field.py deleted file mode 100644 index b42a14d1c5a..00000000000 --- a/mindsdb/migrations/versions/2026-01-12_f64112749455_add_user_id_field.py +++ /dev/null @@ -1,497 +0,0 @@ -"""add user_id field and make company_id non-nullable - -Revision ID: f64112749455 -Revises: 86b172b78a5b -Create Date: 2026-01-12 14:52:20.431290 - -""" - -import re -import shutil -from pathlib import Path - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa -from mindsdb.utilities import log -from mindsdb.utilities.config import Config -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID - -logger = log.getLogger(__name__) - -# revision identifiers, used by Alembic. -revision = "f64112749455" -down_revision = "86b172b78a5b" -branch_labels = None -depends_on = None - -# Old company_id patterns that need to be migrated to DEFAULT_COMPANY_ID -# These patterns represent legacy company_id values that should now use the default -OLD_COMPANY_ID_PATTERNS = ["0", "None"] - -# Resource groups that have folders on disk that may need renaming -RESOURCE_GROUPS_WITH_FOLDERS = ["file", "tab", "predictor", "integration", "system"] - - -def _get_new_folder_name(old_name: str) -> str: - """Convert old folder name to new format with DEFAULT_COMPANY_ID and DEFAULT_USER_ID. - - Old formats (company_id was None or 0, no user_id): - - {resource_group}_None_{id} - - {resource_group}_0_{id} - - {resource_group}_{company_id}_{id} (real company_id, no user_id) - - New format: - - {resource_group}_{company_id}_{user_id}_{id} - - Args: - old_name: The old folder name - - Returns: - The new folder name with company_id and user_id, or None if no change needed - """ - # First, handle old patterns with None or 0 as company_id - for pattern in OLD_COMPANY_ID_PATTERNS: - # Match patterns like "file_None_123" or "tab_0_0" - regex = rf"^([a-z]+)_{re.escape(pattern)}_(.+)$" - match = re.match(regex, old_name) - if match: - resource_group = match.group(1) - resource_id = match.group(2) - return f"{resource_group}_{DEFAULT_COMPANY_ID}_{DEFAULT_USER_ID}_{resource_id}" - - # Then, handle old format with real company_id but no user_id - # Match patterns like "file_9_123" (company_id_resource_id, where company_id is not a UUID) - # but NOT patterns that already have user_id like "file_9_abc-def_123" - regex = r"^([a-z]+)_([^_]+)_(\d+)$" - match = re.match(regex, old_name) - if match: - resource_group = match.group(1) - company_id = match.group(2) - resource_id = match.group(3) - # Skip if company_id looks like a UUID (already in new format or DEFAULT_COMPANY_ID) - if len(company_id) == 36 and company_id.count("-") == 4: - return None - # Skip if this is an old pattern we already handled - if company_id in OLD_COMPANY_ID_PATTERNS: - return None - return f"{resource_group}_{company_id}_{DEFAULT_USER_ID}_{resource_id}" - - return None - - -def _migrate_storage_folders(): - """Rename storage folders from old company_id patterns to DEFAULT_COMPANY_ID. - - This handles folders like: - - file_None_123 -> file_{DEFAULT_COMPANY_ID}_123 - - tab_None_0 -> tab_{DEFAULT_COMPANY_ID}_0 - - predictor_0_456 -> predictor_{DEFAULT_COMPANY_ID}_456 - """ - try: - config = Config() - content_path = Path(config["paths"]["content"]) - - if not content_path.exists(): - logger.info("Content path does not exist, skipping folder migration") - return - - for resource_group in RESOURCE_GROUPS_WITH_FOLDERS: - resource_group_path = content_path / resource_group - - if not resource_group_path.exists(): - continue - - for folder in resource_group_path.iterdir(): - if not folder.is_dir(): - continue - - new_name = _get_new_folder_name(folder.name) - if new_name is None: - continue - - new_path = resource_group_path / new_name - - if new_path.exists(): - logger.warning( - "Target folder already exists, merging: %s -> %s", - folder, - new_path, - ) - # Merge contents: copy files from old folder to new, then remove old - for item in folder.iterdir(): - dest = new_path / item.name - if not dest.exists(): - if item.is_dir(): - shutil.copytree(item, dest) - else: - shutil.copy2(item, dest) - shutil.rmtree(folder) - else: - logger.info("Renaming folder: %s -> %s", folder.name, new_name) - folder.rename(new_path) - - except Exception as e: - logger.warning("Error migrating storage folders: %s", e) - # Don't fail the migration if folder rename fails - # The data is still accessible, just at the old path - - -def _migrate_file_paths(): - """Update file_path column in the file table to new format with company_id and user_id. - - Old formats: - - file_None_123 - - file_0_123 - - file_{company_id}_123 (real company_id, no user_id) - - New format: - - file_{company_id}_{user_id}_123 - """ - connection = op.get_bind() - - # Step 1: Update file_path for files with None or 0 company_id pattern - # These get DEFAULT_COMPANY_ID and DEFAULT_USER_ID - for pattern in OLD_COMPANY_ID_PATTERNS: - # For each file record matching the old pattern, update to new format - # We need to extract the file_id and construct the new path - if connection.dialect.name == "sqlite": - # SQLite: Use substr to extract file_id and construct new path - old_prefix = f"file_{pattern}_" - old_prefix_filter = old_prefix.replace("_", "\\_") - new_prefix = f"file_{DEFAULT_COMPANY_ID}_{DEFAULT_USER_ID}_" - connection.execute( - sa.text( - f"UPDATE file SET file_path = '{new_prefix}' || substr(file_path, {len(old_prefix) + 1}) " - f"WHERE file_path LIKE '{old_prefix_filter}%'" - ) - ) - else: - # PostgreSQL/MySQL: Use CONCAT and SUBSTRING - old_prefix = f"file_{pattern}_" - old_prefix_filter = old_prefix.replace("_", "\\_") - new_prefix = f"file_{DEFAULT_COMPANY_ID}_{DEFAULT_USER_ID}_" - connection.execute( - sa.text( - f"UPDATE file SET file_path = CONCAT('{new_prefix}', SUBSTRING(file_path, {len(old_prefix) + 1})) " - f"WHERE file_path LIKE '{old_prefix_filter}%'" - ) - ) - - # Step 2: Update file_path for files with real company_id but no user_id - # These keep their company_id and get DEFAULT_USER_ID added - # Pattern: file_{company_id}_{file_id} where company_id is NOT a UUID - # We update these by inserting DEFAULT_USER_ID between company_id and file_id - - # First, get all file records that need updating (those without user_id in path) - # A path without user_id looks like: file_{company_id}_{file_id} - # A path with user_id looks like: file_{company_id}_{user_id}_{file_id} - # We can identify old format by checking if file_path matches the expected new format - - # Get records where file_path doesn't contain DEFAULT_USER_ID or any other UUID-like user_id - # This is tricky with just SQL, so we'll update based on the file record's company_id and id - result = connection.execute(sa.text("SELECT id, company_id, file_path FROM file")) - rows = result.fetchall() - - for row in rows: - file_id, company_id, file_path = row - expected_new_path = f"file_{company_id}_{DEFAULT_USER_ID}_{file_id}" - - # Skip if already in new format - if file_path == expected_new_path: - continue - - # Skip if file_path already has 4 parts (already has user_id) - parts = file_path.split("_") if file_path else [] - if len(parts) >= 4: - # Already has user_id in path, might need company_id update only - # Check if it starts with file_{company_id}_{user_id}_ - if parts[0] == "file" and parts[1] == company_id: - continue - - # Update to new format - connection.execute( - sa.text("UPDATE file SET file_path = :new_path WHERE id = :file_id"), - {"new_path": expected_new_path, "file_id": file_id}, - ) - - logger.info("Migrated file_path column to new format with user_id") - - -def _revert_file_paths(): - """Revert file_path column to old format without user_id (for downgrade). - - New format: file_{company_id}_{user_id}_{file_id} - Old format: file_{company_id}_{file_id} - - For records with DEFAULT_COMPANY_ID, revert to file_None_{file_id} - """ - connection = op.get_bind() - - # Get all file records and revert their paths - result = connection.execute(sa.text("SELECT id, company_id, file_path FROM file")) - rows = result.fetchall() - - for row in rows: - file_id, company_id, file_path = row - - # Determine old format company_id - if company_id == DEFAULT_COMPANY_ID: - old_company_id = "None" - else: - old_company_id = company_id - - old_path = f"file_{old_company_id}_{file_id}" - - # Skip if already in old format - if file_path == old_path: - continue - - connection.execute( - sa.text("UPDATE file SET file_path = :old_path WHERE id = :file_id"), - {"old_path": old_path, "file_id": file_id}, - ) - - logger.info("Reverted file_path column to old format without user_id") - - -def _revert_storage_folders(): - """Revert storage folders to old naming without user_id (for downgrade). - - New format: {resource_group}_{company_id}_{user_id}_{resource_id} - Old format: {resource_group}_{company_id}_{resource_id} - - For folders with DEFAULT_COMPANY_ID, revert to {resource_group}_None_{resource_id} - """ - try: - config = Config() - content_path = Path(config["paths"]["content"]) - - if not content_path.exists(): - return - - for resource_group in RESOURCE_GROUPS_WITH_FOLDERS: - resource_group_path = content_path / resource_group - - if not resource_group_path.exists(): - continue - - for folder in resource_group_path.iterdir(): - if not folder.is_dir(): - continue - - # Match new format: {resource_group}_{company_id}_{user_id}_{resource_id} - # where user_id is DEFAULT_USER_ID (UUID format) - parts = folder.name.split("_") - - # Need at least 4 parts for new format with user_id - if len(parts) < 4: - continue - - # Check if this looks like new format with user_id - # Pattern: resource_group_company_id_user_id_resource_id - # For UUIDs with dashes, they get split, so we need to handle that - prefix_with_user = f"{resource_group}_{DEFAULT_COMPANY_ID}_{DEFAULT_USER_ID}_" - if folder.name.startswith(prefix_with_user): - # Revert to old format with 'None' company_id - resource_id = folder.name[len(prefix_with_user) :] - old_name = f"{resource_group}_None_{resource_id}" - old_path = resource_group_path / old_name - - if not old_path.exists(): - logger.info("Reverting folder: %s -> %s", folder.name, old_name) - folder.rename(old_path) - continue - - # Check for real company_id with DEFAULT_USER_ID - # Pattern: {resource_group}_{company_id}_{DEFAULT_USER_ID}_{resource_id} - # This is harder to detect, try to match by finding DEFAULT_USER_ID in the name - user_id_pattern = f"_{DEFAULT_USER_ID}_" - if user_id_pattern in folder.name: - # Extract company_id and resource_id - before_user_id = folder.name.split(user_id_pattern)[0] - after_user_id = folder.name.split(user_id_pattern)[1] - - # before_user_id should be "{resource_group}_{company_id}" - if before_user_id.startswith(f"{resource_group}_"): - company_id = before_user_id[len(f"{resource_group}_") :] - resource_id = after_user_id - old_name = f"{resource_group}_{company_id}_{resource_id}" - old_path = resource_group_path / old_name - - if not old_path.exists(): - logger.info("Reverting folder: %s -> %s", folder.name, old_name) - folder.rename(old_path) - - except Exception as e: - logger.warning("Error reverting storage folders: %s", e) - - -# Tables that need user_id column added (considering only the tables that already have company_id column) -TABLES_WITH_USER_ID = [ - "predictor", - "project", - "integration", - "file", - "view", - "json_storage", - "jobs", - "jobs_history", - "tasks", - "agents", - "query_context", - "queries", - "llm_log", -] - -# Old unique constraints to drop (name -> table) (considering only the constraints that already have company_id column) -OLD_CONSTRAINTS = { - "unique_project_name_company_id": "project", - "unique_integration_name_company_id": "integration", - "unique_file_name_company_id": "file", - "unique_view_name_company_id": "view", -} - -# New unique constraints to create (name -> (table, columns)) (considering only the constraints that already have company_id column) -NEW_CONSTRAINTS = { - "unique_project_name_company_id_user_id": ("project", ["name", "company_id", "user_id"]), - "unique_integration_name_company_id_user_id": ("integration", ["name", "company_id", "user_id"]), - "unique_file_name_company_id_user_id": ("file", ["name", "company_id", "user_id"]), - "unique_view_name_company_id_user_id": ("view", ["name", "company_id", "user_id"]), -} - - -def _is_sqlite(): - """Check if the current database is SQLite.""" - bind = op.get_bind() - return bind.dialect.name == "sqlite" - - -def upgrade(): - # First, update any NULL, empty, or legacy '0' company_id values to DEFAULT_COMPANY_ID before making the column non-nullable - # Note: '0' was the legacy integer value that got converted to string by a previous migration - # Also handle 'None' string which may have been stored when Python None was converted to string - for table_name in TABLES_WITH_USER_ID: - op.execute( - f"UPDATE {table_name} SET company_id = '{DEFAULT_COMPANY_ID}' WHERE company_id IS NULL OR company_id = '' OR company_id = '0' OR company_id = 'None'" - ) - - # Migrate file paths in the database before schema changes - # This updates file_path column from patterns like 'file_None_123' to 'file_{DEFAULT_COMPANY_ID}_123' - _migrate_file_paths() - - # Add user_id column and make company_id non-nullable with default DEFAULT_COMPANY_ID for all tables - for table_name in TABLES_WITH_USER_ID: - with op.batch_alter_table(table_name, schema=None) as batch_op: - batch_op.add_column(sa.Column("user_id", sa.String(), nullable=False, server_default=DEFAULT_USER_ID)) - # Make company_id non-nullable with default DEFAULT_COMPANY_ID - batch_op.alter_column( - "company_id", existing_type=sa.String(), nullable=False, server_default=DEFAULT_COMPANY_ID - ) - - # Drop old unique constraints and create new ones with user_id - # For SQLite, we need to use batch_alter_table which recreates the table - # For PostgreSQL/MySQL, we can use DROP CONSTRAINT directly - if _is_sqlite(): - # SQLite: Use batch_alter_table to drop and recreate constraints - # batch_alter_table handles this by recreating the table without the constraint - for constraint_name, table_name in OLD_CONSTRAINTS.items(): - try: - with op.batch_alter_table(table_name, schema=None) as batch_op: - batch_op.drop_constraint(constraint_name, type_="unique") - except Exception: - # Constraint might not exist or have a different name in SQLite - logger.warning(f"Could not drop constraint {constraint_name} from table {table_name}, it may not exist") - else: - # PostgreSQL/MySQL: Use standard SQL - for constraint_name, table_name in OLD_CONSTRAINTS.items(): - try: - op.drop_constraint(constraint_name, table_name, type_="unique") - except Exception: - logger.warning(f"Could not drop constraint {constraint_name} from table {table_name}, it may not exist") - - # Create new constraints with user_id - for constraint_name, (table_name, columns) in NEW_CONSTRAINTS.items(): - try: - with op.batch_alter_table(table_name, schema=None) as batch_op: - batch_op.create_unique_constraint(constraint_name, columns) - except Exception: - logger.exception(f"Failed to create constraint {constraint_name} for table {table_name}") - raise - - # Update predictor_index to include user_id - with op.batch_alter_table("predictor", schema=None) as batch_op: - try: - batch_op.drop_index("predictor_index") - except Exception: - logger.exception("Failed to drop index predictor_index from table predictor") - raise - - batch_op.create_index( - "predictor_index", ["company_id", "user_id", "name", "version", "active", "deleted_at"], unique=True - ) - - # Migrate storage folders on disk - # This renames folders like 'file_None_123' to 'file_{DEFAULT_COMPANY_ID}_123' - # and 'tab_None_0' to 'tab_{DEFAULT_COMPANY_ID}_0' - _migrate_storage_folders() - - -def downgrade(): - # Revert storage folders first (before database changes) - _revert_storage_folders() - - # Revert file paths in the database - _revert_file_paths() - - # Restore original predictor_index without user_id - with op.batch_alter_table("predictor", schema=None) as batch_op: - try: - batch_op.drop_index("predictor_index") - except Exception: - logger.exception("Failed to drop index predictor_index from table predictor") - raise - - batch_op.create_index("predictor_index", ["company_id", "name", "version", "active", "deleted_at"], unique=True) - - # Drop new unique constraints and restore old ones - if _is_sqlite(): - for constraint_name, (table_name, _) in NEW_CONSTRAINTS.items(): - try: - with op.batch_alter_table(table_name, schema=None) as batch_op: - batch_op.drop_constraint(constraint_name, type_="unique") - except Exception: - logger.warning(f"Could not drop constraint {constraint_name} from table {table_name}, it may not exist") - else: - for constraint_name, (table_name, _) in NEW_CONSTRAINTS.items(): - try: - op.drop_constraint(constraint_name, table_name, type_="unique") - except Exception: - logger.warning(f"Could not drop constraint {constraint_name} from table {table_name}, it may not exist") - - # Restore old constraints without user_id - old_constraint_columns = { - "unique_project_name_company_id": ("project", ["name", "company_id"]), - "unique_integration_name_company_id": ("integration", ["name", "company_id"]), - "unique_file_name_company_id": ("file", ["name", "company_id"]), - "unique_view_name_company_id": ("view", ["name", "company_id"]), - } - - for constraint_name, (table_name, columns) in old_constraint_columns.items(): - try: - with op.batch_alter_table(table_name, schema=None) as batch_op: - batch_op.create_unique_constraint(constraint_name, columns) - except Exception: - logger.exception(f"Failed to create constraint {constraint_name} for table {table_name}") - raise - - # Remove user_id column and revert company_id to nullable for all tables - for table_name in TABLES_WITH_USER_ID: - with op.batch_alter_table(table_name, schema=None) as batch_op: - batch_op.drop_column("user_id") - # Revert company_id to nullable without default - batch_op.alter_column("company_id", existing_type=sa.String(), nullable=True, server_default=None) - - # Set company_id back to legacy 'None' value for records that had DEFAULT_COMPANY_ID - for table_name in TABLES_WITH_USER_ID: - op.execute(f"UPDATE {table_name} SET company_id = '0' WHERE company_id = '{DEFAULT_COMPANY_ID}'") diff --git a/mindsdb/migrations/versions/2026-01-20_6c840e4668bd_del_lightwood_version_field.py b/mindsdb/migrations/versions/2026-01-20_6c840e4668bd_del_lightwood_version_field.py deleted file mode 100644 index 29a30a1714e..00000000000 --- a/mindsdb/migrations/versions/2026-01-20_6c840e4668bd_del_lightwood_version_field.py +++ /dev/null @@ -1,29 +0,0 @@ -"""del_lightwood_version_field - -Revision ID: 6c840e4668bd -Revises: f64112749455 -Create Date: 2026-01-20 18:05:30.706658 - -""" - -from alembic import op -import sqlalchemy as sa -import mindsdb.interfaces.storage.db # noqa - -# revision identifiers, used by Alembic. -revision = "6c840e4668bd" -down_revision = "f64112749455" -branch_labels = None -depends_on = None - - -def upgrade(): - with op.batch_alter_table("predictor", schema=None) as batch_op: - batch_op.drop_column("native_version") - batch_op.drop_column("lightwood_version") - - -def downgrade(): - with op.batch_alter_table("predictor", schema=None) as batch_op: - batch_op.add_column(sa.Column("lightwood_version", sa.VARCHAR(), nullable=True)) - batch_op.add_column(sa.Column("native_version", sa.VARCHAR(), nullable=True)) diff --git a/mindsdb/migrations/versions/__init__.py b/mindsdb/migrations/versions/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/utilities/__init__.py b/mindsdb/utilities/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/utilities/api_status.py b/mindsdb/utilities/api_status.py deleted file mode 100644 index 5af7f863625..00000000000 --- a/mindsdb/utilities/api_status.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import json - -from mindsdb.utilities.config import config -from mindsdb.utilities import log - - -logger = log.getLogger(__name__) -_api_status_file = None - - -def _get_api_status_file(): - global _api_status_file - if _api_status_file is None: - # Use a temporary file that can be shared across processes. - temp_dir = config["paths"]["tmp"] - _api_status_file = os.path.join(temp_dir, "mindsdb_api_status.json") - # Overwrite the file if it exists. - if os.path.exists(_api_status_file): - try: - os.remove(_api_status_file) - except OSError: - logger.exception(f"Error removing existing API status file: {_api_status_file}") - - return _api_status_file - - -def get_api_status(): - """Get the current API status from the shared file.""" - status_file = _get_api_status_file() - try: - if os.path.exists(status_file): - with open(status_file, "r") as f: - return json.load(f) - except (json.JSONDecodeError, IOError): - pass - return {} - - -def set_api_status(api_name: str, status: bool): - """Set the status of an API in the shared file.""" - status_file = _get_api_status_file() - current_status = get_api_status() - current_status[api_name] = status - - # Write atomically to avoid race conditions. - temp_file = status_file + ".tmp" - try: - with open(temp_file, "w") as f: - json.dump(current_status, f) - os.replace(temp_file, status_file) - except IOError: - # Clean up temp file if it exists. - if os.path.exists(temp_file): - try: - os.remove(temp_file) - except OSError: - pass diff --git a/mindsdb/utilities/auth.py b/mindsdb/utilities/auth.py deleted file mode 100644 index af8ea710699..00000000000 --- a/mindsdb/utilities/auth.py +++ /dev/null @@ -1,77 +0,0 @@ -import requests -import textwrap - -from mindsdb.utilities.config import Config - - -def get_aws_meta_data() -> dict: - ''' returns aws metadata for current instance - - Returns: - dict: aws metadata - ''' - aws_meta_data = { - 'public-hostname': None, - 'ami-id': None, - 'instance-id': None - } - aws_token = requests.put("http://169.254.169.254/latest/api/token", headers={'X-aws-ec2-metadata-token-ttl-seconds': '30'}).text - for key in aws_meta_data.keys(): - resp = requests.get( - f'http://169.254.169.254/latest/meta-data/{key}', - headers={'X-aws-ec2-metadata-token': aws_token}, - timeout=1 - ) - if resp.status_code != 200: - continue - aws_meta_data[key] = resp.text - if aws_meta_data['instance-id'] is None: - raise Exception('That is not an AWS environment') - return aws_meta_data - - -def register_oauth_client(): - ''' register new oauth client if it is not existed - ''' - config = Config() - aws_meta_data = get_aws_meta_data() - - current_aws_meta_data = config.get('aws_meta_data', {}) - oauth_meta = config.get('auth', {}).get('oauth') - if oauth_meta is None: - return - - public_hostname = aws_meta_data['public-hostname'] - if ( - current_aws_meta_data.get('public-hostname') != public_hostname - or oauth_meta.get('client_id') is None - ): - resp = requests.post( - f'https://{oauth_meta["server_host"]}/auth/register_client', - json={ - 'client_name': f'aws_marketplace_{public_hostname}', - 'client_uri': public_hostname, - 'grant_types': 'authorization_code', - 'redirect_uris': textwrap.dedent(f''' - https://{public_hostname}/api/auth/callback - https://{public_hostname}/api/auth/callback/cloud_home - '''), - 'response_types': 'code', - 'scope': 'openid profile aws_marketplace', - 'token_endpoint_auth_method': 'client_secret_basic' - }, - timeout=10 - ) - - if resp.status_code != 200: - raise Exception(f'Wrong answer from auth server: {resp.status_code}, {resp.text}') - keys = resp.json() - Config().update({ - 'aws_meta_data': aws_meta_data, - 'auth': { - 'oauth': { - 'client_id': keys['client_id'], - 'client_secret': keys['client_secret'] - } - } - }) diff --git a/mindsdb/utilities/cache.py b/mindsdb/utilities/cache.py deleted file mode 100644 index 9157e20d224..00000000000 --- a/mindsdb/utilities/cache.py +++ /dev/null @@ -1,287 +0,0 @@ -""" -How to use it: - - from mindsdb.utilities.cache import get_cache, dataframe_checksum, json_checksum - - # namespace of cache - cache = get_cache('predict') - - key = dataframe_checksum(df) # or json_checksum, depends on object type - df_predict = cache(key) - - if df_predict is None: - # no cache, save it - df_predict = predictor.predict(df) - cache.set(key, df_predict) - - - -Configuration: - -- max_size size of cache in count of records, default is 500 -- serializer, module for serialization, default is dill - -It can be set via: -- get_cache function: - cache = get_cache('predict', max_size=2) -- using specific cache class: - cache = FileCache('predict', max_size=2) -- using mindsdb config file: - "cache": { - "type": "redis", - "max_size": 2 - } - -Cache engines: - -Can be specified in mindsdb config json. Possible values: -- local - for FileCache, default -- redis - for RedisCache -By default is used local redis server. You can specify - "cache": { - "type": "redis", - "connection": { - "host": "127.0.0.1", - "port": 6379 - } - } - -How to test: - - env PYTHONPATH=./ pytest tests/unit/test_cache.py - -""" - -import os -import time -from abc import ABC -from pathlib import Path -import re -import hashlib -import typing as t - -import pandas as pd -import walrus - -from mindsdb.utilities.config import Config -from mindsdb.utilities.json_encoder import CustomJSONEncoder -from mindsdb.interfaces.storage.fs import FileLock -from mindsdb.utilities.context import context as ctx - -_CACHE_MAX_SIZE = 500 - - -def dataframe_checksum(df: pd.DataFrame): - original_columns = df.columns - df.columns = list(range(len(df.columns))) - result = hashlib.sha256(str(df.values).encode()).hexdigest() - df.columns = original_columns - return result - - -def json_checksum(obj: t.Union[dict, list]): - checksum = str_checksum(CustomJSONEncoder().encode(obj)) - return checksum - - -def str_checksum(obj: str): - checksum = hashlib.sha256(obj.encode()).hexdigest() - return checksum - - -class BaseCache(ABC): - def __init__(self, max_size=None, serializer=None): - self.config = Config() - if max_size is None: - max_size = self.config["cache"].get("max_size", _CACHE_MAX_SIZE) - self.max_size = max_size - if serializer is None: - serializer_module = self.config["cache"].get("serializer") - if serializer_module == "pickle": - import pickle as s_module - else: - import dill as s_module - self.serializer = s_module - - # default functions - - def set_df(self, name, df): - return self.set(name, df) - - def get_df(self, name): - return self.get(name) - - def serialize(self, value): - return self.serializer.dumps(value) - - def deserialize(self, value): - return self.serializer.loads(value) - - -class FileCache(BaseCache): - def __init__(self, category, path=None, **kwargs): - super().__init__(**kwargs) - - if path is None: - path = self.config["paths"]["cache"] - - cache_path = Path(path) / category - - company_id = ctx.company_id - user_id = ctx.user_id - if company_id is not None and user_id is not None: - cache_path = cache_path / str(company_id) / str(user_id) - cache_path.mkdir(parents=True, exist_ok=True) - - self.path = cache_path - - def clear_old_cache(self): - with FileLock(self.path): - # buffer to delete, to not run delete on every adding - buffer_size = 5 - - if self.max_size is None: - return - - cur_count = len(os.listdir(self.path)) - - if cur_count > self.max_size + buffer_size: - try: - files = sorted(Path(self.path).iterdir(), key=os.path.getmtime) - for file in files[: cur_count - self.max_size]: - self.delete_file(file) - except FileNotFoundError: - pass - - def file_path(self, name): - # Sanitize the key to avoid table (file) names with backticks and slashes. - sanitized_name = re.sub(r"[^\w\-.]", "_", name) - return self.path / sanitized_name - - def set_df(self, name, df): - path = self.file_path(name) - df.to_pickle(path) - self.clear_old_cache() - - def set(self, name, value): - path = self.file_path(name) - value = self.serialize(value) - - with open(path, "wb") as fd: - fd.write(value) - self.clear_old_cache() - - def get_df(self, name): - path = self.file_path(name) - with FileLock(self.path): - if not os.path.exists(path): - return None - value = pd.read_pickle(path) - return value - - def get(self, name): - path = self.file_path(name) - - with FileLock(self.path): - if not os.path.exists(path): - return None - with open(path, "rb") as fd: - value = fd.read() - value = self.deserialize(value) - return value - - def delete(self, name): - path = self.file_path(name) - self.delete_file(path) - - def delete_file(self, path): - os.unlink(path) - - -class RedisCache(BaseCache): - def __init__(self, category, connection_info=None, **kwargs): - super().__init__(**kwargs) - - self.category = category - - if connection_info is None: - # if no params will be used local redis - connection_info = self.config["cache"].get("connection", {}) - self.client = walrus.Database(**connection_info) - - def clear_old_cache(self, key_added): - if self.max_size is None: - return - - # buffer to delete, to not run delete on every adding - buffer_size = 5 - - cur_count = self.client.hlen(self.category) - - # remove oldest - if cur_count > self.max_size + buffer_size: - # 5 is buffer to delete, to not run delete on every adding - - keys = self.client.hgetall(self.category) - # to list - keys = list(keys.items()) - # sort by timestamp - keys.sort(key=lambda x: x[1]) - - for key, _ in keys[: cur_count - self.max_size]: - self.delete_key(key) - - def redis_key(self, name): - return f"{self.category}_{name}" - - def set(self, name, value): - key = self.redis_key(name) - value = self.serialize(value) - - self.client.set(key, value) - # using key with category name to store all keys with modify time - self.client.hset(self.category, key, int(time.time() * 1000)) - - self.clear_old_cache(key) - - def get(self, name): - key = self.redis_key(name) - value = self.client.get(key) - if value is None: - # no value in cache - return None - return self.deserialize(value) - - def delete(self, name): - key = self.redis_key(name) - - self.delete_key(key) - - def delete_key(self, key): - self.client.delete(key) - self.client.hdel(self.category, key) - - -class NoCache: - """ - class for no cache mode - """ - - def __init__(self, *args, **kwargs): - pass - - def get(self, name): - return None - - def set(self, name, value): - pass - - -def get_cache(category, **kwargs): - config = Config() - if config.get("cache")["type"] == "redis": - return RedisCache(category, **kwargs) - if config.get("cache")["type"] == "none": - return NoCache(category, **kwargs) - else: - return FileCache(category, **kwargs) diff --git a/mindsdb/utilities/config.py b/mindsdb/utilities/config.py deleted file mode 100644 index e3fff3bf023..00000000000 --- a/mindsdb/utilities/config.py +++ /dev/null @@ -1,762 +0,0 @@ -import os -import sys -import json -import argparse -import datetime -import dataclasses -from pathlib import Path -from copy import deepcopy -from urllib.parse import urlparse - -from appdirs import user_data_dir - -# NOTE do not `import from mindsdb` here - - -def get_bool_env_var(env_name: str) -> bool: - """Read an environment variable and return its value as a boolean. - - Args: - env_name (str): name of the environment variable to read. - - Returns: - bool: True or False, or None if the variable is not set or empty. - - Raises: - ValueError: if the value is set but does not match any known boolean representation. - """ - value = os.environ.get(env_name) - if value is None or value == "": - return None - match value.lower(): - case "1" | "true" | "on" | "yes" | "y": - value = True - case "0" | "false" | "off" | "no" | "n": - value = False - case _: - raise ValueError(f"Expected a boolean value for the environment variable '{env_name}', but got '{value}'") - return value - - -def get_list_env_var(env_name: str) -> list[str]: - """Read an environment variable and return its value as a list of strings. - - The value is expected to be a comma-separated string. Whitespace around - each item is stripped, and empty items are ignored. - - Args: - env_name (str): name of the environment variable to read. - - Returns: - list[str]: list of non-empty strings, or None if the variable is not set or empty. - """ - value = os.environ.get(env_name) - if value is None or value.strip() == "": - return None - return [item.strip() for item in value.split(",") if item.strip()] - - -def _merge_key_recursive(target_dict, source_dict, key): - if key not in target_dict: - target_dict[key] = source_dict[key] - elif not isinstance(target_dict[key], dict) or not isinstance(source_dict[key], dict): - target_dict[key] = source_dict[key] - else: - for k in list(source_dict[key].keys()): - _merge_key_recursive(target_dict[key], source_dict[key], k) - - -def _merge_configs(original_config: dict, override_config: dict) -> dict: - for key in list(override_config.keys()): - _merge_key_recursive(original_config, override_config, key) - return original_config - - -def _overwrite_configs(original_config: dict, override_config: dict) -> dict: - """Overwrite original config with override config.""" - for key in list(override_config.keys()): - original_config[key] = override_config[key] - return original_config - - -def create_data_dir(path: Path) -> None: - """Create a directory and checks that it is writable. - - Args: - path (Path): path to create and check - - Raises: - NotADirectoryError: if path exists, but it is not a directory - PermissionError: if path exists/created, but it is not writable - Exception: if directory could not be created - """ - if path.exists() and not path.is_dir(): - raise NotADirectoryError(f"The path is not a directory: {path}") - - try: - path.mkdir(mode=0o777, exist_ok=True, parents=True) - except Exception as e: - raise Exception("MindsDB storage directory could not be created") from e - - if not os.access(path, os.W_OK): - raise PermissionError(f"The directory is not allowed for writing: {path}") - - -@dataclasses.dataclass(frozen=True) -class HTTP_AUTH_TYPE: - SESSION: str = "session" - TOKEN: str = "token" - SESSION_OR_TOKEN: str = "session_or_token" - - -HTTP_AUTH_TYPE = HTTP_AUTH_TYPE() - - -class Config: - """Application config. Singletone, initialized just once. Re-initialyze if `config.auto.json` is changed. - The class loads multiple configs and merge then in one. If a config option defined in multiple places (config file, - env var, cmd arg, etc), then it will be resolved in following order of priority: - - default config values (lowest priority) - - `config.json` provided by the user - - `config.auto.json` - - config values collected from env vars - - values from cmd args (most priority) - - Attributes: - __instance (Config): instance of 'Config' to make it singleton - _config (dict): application config, the result of merging other configs - _user_config (dict): config provided by the user (usually with cmd arg `--config=config.json`) - _env_config (dict): config collected from different env vars - _auto_config (dict): config that is editd by the app itself (e.g. when you change values in GUI) - _default_config (dict): config with default values - config_path (Path): path to the `config.json` provided by the user - storage_root_path (Path): path to storage root folder - auto_config_path (Path): path to `config.auto.json` - auto_config_mtime (float): mtime of `config.auto.json` when it was loaded to `self._auto_config` - _cmd_args (argparse.Namespace): cmd args - use_docker_env (bool): is the app run in docker env - """ - - __instance: "Config" = None - - _config: dict = None - _user_config: dict = None - _env_config: dict = None - _auto_config: dict = None - _default_config: dict = None - config_path: Path = None - storage_root_path: Path = None - auto_config_path: Path = None - auto_config_mtime: float = 0 - _cmd_args: argparse.Namespace = None - use_docker_env: bool = os.environ.get("MINDSDB_DOCKER_ENV", False) is not False - - def __new__(cls, *args, **kwargs) -> "Config": - """Make class singletone and initialize config.""" - if cls.__instance is not None: - return cls.__instance - - self = super().__new__(cls, *args, **kwargs) - cls.__instance = self - - self.fetch_user_config() - - # region determine root path - if self.storage_root_path is None: - if isinstance(os.environ.get("MINDSDB_STORAGE_DIR"), str): - self.storage_root_path = os.environ["MINDSDB_STORAGE_DIR"] - elif "root" in self._user_config.get("paths", {}): - self.storage_root_path = self.user_config["paths"]["root"] - else: - self.storage_root_path = os.path.join(user_data_dir("mindsdb", "mindsdb"), "var/") - self.storage_root_path = Path(self.storage_root_path) - create_data_dir(self.storage_root_path) - # endregion - - # region prepare default config - api_host = "127.0.0.1" if not self.use_docker_env else "0.0.0.0" - self._default_config = { - "permanent_storage": {"location": "absent"}, - "storage_db": ( - "sqlite:///" - + str(self.storage_root_path / "mindsdb.sqlite3.db") - + "?check_same_thread=False&timeout=30" - ), - "paths": { - "root": self.storage_root_path, - "content": self.storage_root_path / "content", - "storage": self.storage_root_path / "storage", - "static": self.storage_root_path / "static", - "tmp": self.storage_root_path / "tmp", - "log": self.storage_root_path / "log", - "cache": self.storage_root_path / "cache", - "locks": self.storage_root_path / "locks", - }, - "auth": { - "http_auth_type": HTTP_AUTH_TYPE.SESSION_OR_TOKEN, # token | session | session_or_token - "http_auth_enabled": False, - "http_permanent_session_lifetime": datetime.timedelta(days=31), - "username": "mindsdb", - "password": "", - "token": None, # MINDSDB_AUTH_TOKEN - }, - "logging": { - "handlers": { - "console": { - "enabled": True, - "formatter": "default", - "level": "INFO", # MINDSDB_CONSOLE_LOG_LEVEL or MINDSDB_LOG_LEVEL (obsolete) - }, - "file": { - "enabled": False, - "level": "INFO", # MINDSDB_FILE_LOG_LEVEL - "filename": "app.log", - "maxBytes": 1 << 19, # 0.5 Mb - "backupCount": 3, - }, - }, - "resources_log": {"enabled": False, "level": "INFO", "interval": 60}, - }, - "gui": {"open_on_start": True, "autoupdate": True}, - "debug": False, - "environment": "local", - "integrations": {}, - "api": { - "http": { - "host": api_host, - "port": "47334", - "restart_on_failure": True, - "max_restart_count": 1, - "max_restart_interval_seconds": 60, - "a2wsgi": {"workers": 10, "send_queue_size": 10}, - }, - "mysql": { - "host": api_host, - "port": "47335", - "database": "mindsdb", - "ssl": True, - "restart_on_failure": True, - "max_restart_count": 1, - "max_restart_interval_seconds": 60, - }, - "litellm": { - "host": "0.0.0.0", # API server binds to all interfaces by default - "port": "8000", - }, - "mcp": { - "cors": { - "enabled": True, - "allow_origins": [], - "allow_origin_regex": r"https?://(localhost|127\.0\.0\.1)(:\d+)?", - "allow_headers": ["*"], - }, - "rate_limit": { - "enabled": False, - "requests_per_minute": 60, - }, - "oauth": { - "enabled": False, # MINDSDB_MCP_OAUTH_ENABLED - "issuer_url": "", # MINDSDB_MCP_OAUTH_ISSUER_URL - "client_id": "", # MINDSDB_MCP_OAUTH_CLIENT_ID - "client_secret": "", # MINDSDB_MCP_OAUTH_CLIENT_SECRET - "scope": "mcp:tools", # MINDSDB_MCP_OAUTH_SCOPE - "public_url": "", # MINDSDB_MCP_OAUTH_PUBLIC_URL - }, - "dns_rebinding_protection": False, # MINDSDB_MCP_DNS_REBINDING_PROTECTION - }, - }, - "cache": {"type": "local"}, - "ml_task_queue": {"type": "local"}, - "url_file_upload": {"enabled": True, "allowed_origins": [], "disallowed_origins": []}, - "file_upload_domains": [], # deprecated, use config[url_file_upload][allowed_origins] instead - "web_crawling_allowed_sites": [], - "cloud": False, - "jobs": {"disable": False}, - "tasks": {"disable": False}, - "default_project": "mindsdb", - "default_llm": {}, - "default_embedding_model": {}, - "default_reranking_model": {}, - "data_catalog": { - "enabled": False, - }, - "data_stream": { - "fetch_size": 10000, - }, - "byom": { - "enabled": False, - }, - "pid_file_content": None, - "default_ml_engine": None, - "knowledge_bases": { - "disable_autobatch": False, - "disable_pgvector_autobatch": True, - "storage": None, - }, - } - # endregion - - # region find 'auto' config file, create if not exists - auto_config_name = "config.auto.json" - auto_config_path = self.storage_root_path.joinpath(auto_config_name) - if not auto_config_path.is_file(): - auto_config_path.write_text("{}") - self.auto_config_path = auto_config_path - # endregion - - self.prepare_env_config() - - self.fetch_auto_config() - self.merge_configs() - - return cls.__instance - - def prepare_env_config(self) -> None: - """Collect config values from env vars to self._env_config""" - self._env_config = { - "logging": {"handlers": {"console": {}, "file": {}}}, - "api": { - "http": {}, - "mcp": {"cors": {}, "rate_limit": {}, "oauth": {}}, - }, - "auth": {}, - "paths": {}, - "permanent_storage": {}, - "ml_task_queue": {}, - "gui": {}, - "byom": {}, - "knowledge_bases": {}, - } - - # region storage root path - if os.environ.get("MINDSDB_STORAGE_DIR", "") != "": - storage_root_path = Path(os.environ["MINDSDB_STORAGE_DIR"]) - self._env_config["paths"] = { - "root": storage_root_path, - "content": storage_root_path / "content", - "storage": storage_root_path / "storage", - "static": storage_root_path / "static", - "tmp": storage_root_path / "tmp", - "log": storage_root_path / "log", - "cache": storage_root_path / "cache", - "locks": storage_root_path / "locks", - } - # endregion - - # region vars: permanent storage disabled? - if os.environ.get("MINDSDB_STORAGE_BACKUP_DISABLED", "").lower() in ( - "1", - "true", - ): - self._env_config["permanent_storage"] = {"location": "absent"} - # endregion - - # region vars: ml queue - if os.environ.get("MINDSDB_ML_QUEUE_TYPE", "").lower() == "redis": - self._env_config["ml_task_queue"] = { - "type": "redis", - "host": os.environ.get("MINDSDB_ML_QUEUE_HOST", "localhost"), - "port": int(os.environ.get("MINDSDB_ML_QUEUE_PORT", 6379)), - "db": int(os.environ.get("MINDSDB_ML_QUEUE_DB", 0)), - "username": os.environ.get("MINDSDB_ML_QUEUE_USERNAME"), - "password": os.environ.get("MINDSDB_ML_QUEUE_PASSWORD"), - } - # endregion - - # region vars: username and password - http_username = os.environ.get("MINDSDB_USERNAME") - http_password = os.environ.get("MINDSDB_PASSWORD") - - if bool(http_username) != bool(http_password): - raise ValueError( - "Both MINDSDB_USERNAME and MINDSDB_PASSWORD must be set together and must be non-empty strings." - ) - - # If both username and password are set, enable HTTP auth. - if http_username and http_password: - self._env_config["auth"]["http_auth_enabled"] = True - self._env_config["auth"]["username"] = http_username - self._env_config["auth"]["password"] = http_password - # endregion - - http_auth_type = os.environ.get("MINDSDB_HTTP_AUTH_TYPE", "").lower() - if http_auth_type in dataclasses.astuple(HTTP_AUTH_TYPE): - self._env_config["auth"]["http_auth_type"] = http_auth_type - elif http_auth_type != "": - raise ValueError(f"Wrong value of env var MINDSDB_HTTP_AUTH_TYPE={http_auth_type}") - - mindsdb_auth_token = os.environ.get("MINDSDB_AUTH_TOKEN", "") - if mindsdb_auth_token != "": - self._env_config["auth"]["token"] = mindsdb_auth_token - - # region logging - if os.environ.get("MINDSDB_LOG_LEVEL", "") != "": - self._env_config["logging"]["handlers"]["console"]["level"] = os.environ["MINDSDB_LOG_LEVEL"] - self._env_config["logging"]["handlers"]["console"]["enabled"] = True - if os.environ.get("MINDSDB_CONSOLE_LOG_LEVEL", "") != "": - self._env_config["logging"]["handlers"]["console"]["level"] = os.environ["MINDSDB_CONSOLE_LOG_LEVEL"] - self._env_config["logging"]["handlers"]["console"]["enabled"] = True - if os.environ.get("MINDSDB_FILE_LOG_LEVEL", "") != "": - self._env_config["logging"]["handlers"]["file"]["level"] = os.environ["MINDSDB_FILE_LOG_LEVEL"] - self._env_config["logging"]["handlers"]["file"]["enabled"] = True - # endregion - - if os.environ.get("MINDSDB_DB_CON", "") != "": - self._env_config["storage_db"] = os.environ["MINDSDB_DB_CON"] - url = urlparse(self._env_config["storage_db"]) - is_valid = url.scheme and (url.netloc or url.scheme == "sqlite") - if not is_valid: - raise ValueError( - f"Invalid MINDSDB_DB_CON value: {os.environ['MINDSDB_DB_CON']!r}\n" - f"Expected format: scheme://user:password@host:port/database\n" - "Examples:\n" - " - postgresql://user:pass@localhost:5432/database\n" - " - sqlite:///path/to/database.db" - ) - - if os.environ.get("MINDSDB_DEFAULT_PROJECT", "") != "": - self._env_config["default_project"] = os.environ["MINDSDB_DEFAULT_PROJECT"].lower() - - if os.environ.get("MINDSDB_DEFAULT_LLM_API_KEY", "") != "": - self._env_config["default_llm"] = {"api_key": os.environ["MINDSDB_DEFAULT_LLM_API_KEY"]} - if os.environ.get("MINDSDB_DEFAULT_EMBEDDING_MODEL_API_KEY", "") != "": - self._env_config["default_embedding_model"] = { - "api_key": os.environ["MINDSDB_DEFAULT_EMBEDDING_MODEL_API_KEY"] - } - if os.environ.get("MINDSDB_DEFAULT_RERANKING_MODEL_API_KEY", "") != "": - self._env_config["default_reranking_model"] = { - "api_key": os.environ["MINDSDB_DEFAULT_RERANKING_MODEL_API_KEY"] - } - - # Reranker configuration from environment variables - reranker_config = {} - if os.environ.get("MINDSDB_RERANKER_N", "") != "": - try: - reranker_config["n"] = int(os.environ["MINDSDB_RERANKER_N"]) - except ValueError: - raise ValueError(f"MINDSDB_RERANKER_N must be an integer, got: {os.environ['MINDSDB_RERANKER_N']}") - - if os.environ.get("MINDSDB_RERANKER_LOGPROBS", "") != "": - logprobs_value = os.environ["MINDSDB_RERANKER_LOGPROBS"].lower() - if logprobs_value in ("true", "1", "yes", "y"): - reranker_config["logprobs"] = True - elif logprobs_value in ("false", "0", "no", "n"): - reranker_config["logprobs"] = False - else: - raise ValueError( - f"MINDSDB_RERANKER_LOGPROBS must be a boolean value, got: {os.environ['MINDSDB_RERANKER_LOGPROBS']}" - ) - - if os.environ.get("MINDSDB_RERANKER_TOP_LOGPROBS", "") != "": - try: - reranker_config["top_logprobs"] = int(os.environ["MINDSDB_RERANKER_TOP_LOGPROBS"]) - except ValueError: - raise ValueError( - f"MINDSDB_RERANKER_TOP_LOGPROBS must be an integer, got: {os.environ['MINDSDB_RERANKER_TOP_LOGPROBS']}" - ) - - if os.environ.get("MINDSDB_RERANKER_MAX_TOKENS", "") != "": - try: - reranker_config["max_tokens"] = int(os.environ["MINDSDB_RERANKER_MAX_TOKENS"]) - except ValueError: - raise ValueError( - f"MINDSDB_RERANKER_MAX_TOKENS must be an integer, got: {os.environ['MINDSDB_RERANKER_MAX_TOKENS']}" - ) - - if os.environ.get("MINDSDB_RERANKER_VALID_CLASS_TOKENS", "") != "": - try: - reranker_config["valid_class_tokens"] = os.environ["MINDSDB_RERANKER_VALID_CLASS_TOKENS"].split(",") - except ValueError: - raise ValueError( - f"MINDSDB_RERANKER_VALID_CLASS_TOKENS must be a comma-separated list of strings, got: {os.environ['MINDSDB_RERANKER_VALID_CLASS_TOKENS']}" - ) - - if reranker_config: - if "default_reranking_model" not in self._env_config: - self._env_config["default_reranking_model"] = {} - self._env_config["default_reranking_model"].update(reranker_config) - if get_bool_env_var("MINDSDB_DATA_CATALOG_ENABLED") is True: - self._env_config["data_catalog"] = {"enabled": True} - - if get_bool_env_var("MINDSDB_NO_STUDIO") is True: - self._env_config["gui"]["open_on_start"] = False - self._env_config["gui"]["autoupdate"] = False - - mindsdb_gui_autoupdate = get_bool_env_var("MINDSDB_GUI_AUTOUPDATE") - if mindsdb_gui_autoupdate is not None: - self._env_config["gui"]["autoupdate"] = mindsdb_gui_autoupdate - - if os.environ.get("MINDSDB_PID_FILE_CONTENT", "") != "": - try: - self._env_config["pid_file_content"] = json.loads(os.environ["MINDSDB_PID_FILE_CONTENT"]) - except json.JSONDecodeError as e: - raise ValueError(f"MINDSDB_PID_FILE_CONTENT contains invalid JSON: {e}") - - mindsdb_byom_enabled = os.environ.get("MINDSDB_BYOM_ENABLED", "").lower() - if mindsdb_byom_enabled in ("0", "false"): - self._env_config["byom"]["enabled"] = False - elif mindsdb_byom_enabled in ("1", "true"): - self._env_config["byom"]["enabled"] = True - elif mindsdb_byom_enabled != "": - raise ValueError(f"Wrong value of env var MINDSDB_BYOM_ENABLED={mindsdb_byom_enabled}") - - # region MCP config - mindsdb_mcp_enabled = get_bool_env_var("MINDSDB_MCP_CORS_ENABLED") - if mindsdb_mcp_enabled is not None: - self._env_config["api"]["mcp"]["cors"]["enabled"] = mindsdb_mcp_enabled - mindsdb_mcp_allow_origins = get_list_env_var("MINDSDB_MCP_ALLOW_ORIGINS") - if isinstance(mindsdb_mcp_allow_origins, list): - self._env_config["api"]["mcp"]["cors"]["allow_origins"] = mindsdb_mcp_allow_origins - mindsdb_mcp_allow_headers = get_list_env_var("MINDSDB_MCP_ALLOW_HEADERS") - if isinstance(mindsdb_mcp_allow_headers, list): - self._env_config["api"]["mcp"]["cors"]["allow_headers"] = mindsdb_mcp_allow_headers - mindsdb_mcp_allow_origin_regex = os.environ.get("MINDSDB_MCP_ALLOW_ORIGIN_REGEXP", "") - if mindsdb_mcp_allow_origin_regex != "": - self._env_config["api"]["mcp"]["cors"]["allow_origin_regex"] = mindsdb_mcp_allow_origin_regex - mindsdb_mcp_rate_limit_enabled = get_bool_env_var("MINDSDB_MCP_RATE_LIMIT_ENABLED") - if mindsdb_mcp_rate_limit_enabled is not None: - self._env_config["api"]["mcp"]["rate_limit"]["enabled"] = mindsdb_mcp_rate_limit_enabled - mindsdb_mcp_rate_limit_rpm = os.environ.get("MINDSDB_MCP_RATE_LIMIT_RPM", "") - if mindsdb_mcp_rate_limit_rpm != "": - self._env_config["api"]["mcp"]["rate_limit"]["requests_per_minute"] = int(mindsdb_mcp_rate_limit_rpm) - - mindsdb_mcp_oauth_enabled = get_bool_env_var("MINDSDB_MCP_OAUTH_ENABLED") - if mindsdb_mcp_oauth_enabled is not None: - self._env_config["api"]["mcp"]["oauth"]["enabled"] = mindsdb_mcp_oauth_enabled - mindsdb_mcp_oauth_issuer_url = os.environ.get("MINDSDB_MCP_OAUTH_ISSUER_URL", "") - if mindsdb_mcp_oauth_issuer_url != "": - self._env_config["api"]["mcp"]["oauth"]["issuer_url"] = mindsdb_mcp_oauth_issuer_url - mindsdb_mcp_oauth_client_id = os.environ.get("MINDSDB_MCP_OAUTH_CLIENT_ID", "") - if mindsdb_mcp_oauth_client_id != "": - self._env_config["api"]["mcp"]["oauth"]["client_id"] = mindsdb_mcp_oauth_client_id - mindsdb_mcp_oauth_client_secret = os.environ.get("MINDSDB_MCP_OAUTH_CLIENT_SECRET", "") - if mindsdb_mcp_oauth_client_secret != "": - self._env_config["api"]["mcp"]["oauth"]["client_secret"] = mindsdb_mcp_oauth_client_secret - mindsdb_mcp_oauth_scope = os.environ.get("MINDSDB_MCP_OAUTH_SCOPE", "") - if mindsdb_mcp_oauth_scope != "": - self._env_config["api"]["mcp"]["oauth"]["scope"] = mindsdb_mcp_oauth_scope - mindsdb_mcp_oauth_public_url = os.environ.get("MINDSDB_MCP_OAUTH_PUBLIC_URL", "") - if mindsdb_mcp_oauth_public_url != "": - self._env_config["api"]["mcp"]["oauth"]["public_url"] = mindsdb_mcp_oauth_public_url - mindsdb_mcp_dns_rebinding_protection = get_bool_env_var("MINDSDB_MCP_DNS_REBINDING_PROTECTION") - if mindsdb_mcp_dns_rebinding_protection is not None: - self._env_config["api"]["mcp"]["dns_rebinding_protection"] = mindsdb_mcp_dns_rebinding_protection - # endregion - - # Keep env-based KB defaults out of config.auto.json overrides. - - def fetch_auto_config(self) -> bool: - """Load dict readed from config.auto.json to `auto_config`. - Do it only if `auto_config` was not loaded before or config.auto.json been changed. - - Returns: - bool: True if config was loaded or updated - """ - try: - if ( - self.auto_config_path.is_file() - and self.auto_config_path.read_text() != "" - and self.auto_config_mtime != self.auto_config_path.stat().st_mtime - ): - self._auto_config = json.loads(self.auto_config_path.read_text()) - self.auto_config_mtime = self.auto_config_path.stat().st_mtime - return True - except json.JSONDecodeError as e: - raise ValueError( - f"The 'auto' configuration file ({self.auto_config_path}) contains invalid JSON: {e}\nFile content: {self.auto_config_path.read_text()}" - ) - except FileNotFoundError: - # this shouldn't happen during normal work, but it looks like it happens - # when using `prefect` as a result of race conditions or something else. - return False - return False - - def fetch_user_config(self) -> bool: - """Read config provided by the user to `user_config`. Do it only if `user_config` was not loaded before. - - Returns: - bool: True if config was loaded - """ - if self._user_config is None: - cmd_args_config = self.cmd_args.config - if isinstance(os.environ.get("MINDSDB_CONFIG_PATH"), str): - self.config_path = os.environ["MINDSDB_CONFIG_PATH"] - elif isinstance(cmd_args_config, str): - self.config_path = cmd_args_config - - if isinstance(self.config_path, str): - self.config_path = Path(self.config_path) - if not self.config_path.is_file(): - raise FileNotFoundError(f"The configuration file was not found at the path: {self.config_path}") - try: - self._user_config = json.loads(self.config_path.read_text()) - except json.JSONDecodeError as e: - raise ValueError(f"The configuration file ({self.config_path}) contains invalid JSON: {e}") - else: - self._user_config = {} - return True - return False - - def ensure_auto_config_is_relevant(self) -> None: - """Check if auto config has not been changed. If changed - reload main config.""" - updated = self.fetch_auto_config() - if updated: - self.merge_configs() - - def merge_configs(self) -> None: - """Merge multiple configs to one.""" - new_config = deepcopy(self._default_config) - _merge_configs(new_config, self._user_config) - - if getattr(self.cmd_args, "no_studio", None) is True: - new_config["gui"]["open_on_start"] = False - new_config["gui"]["autoupdate"] = False - - _merge_configs(new_config, self._auto_config or {}) - _merge_configs(new_config, self._env_config or {}) - - # region create dirs - for key, value in new_config["paths"].items(): - if isinstance(value, str): - new_config["paths"][key] = Path(value) - elif isinstance(value, Path) is False: - raise ValueError(f"Unexpected path value: {value}") - create_data_dir(new_config["paths"][key]) - # endregion - - self._config = new_config - - def __getitem__(self, key): - self.ensure_auto_config_is_relevant() - return self._config[key] - - def get(self, key, default=None): - self.ensure_auto_config_is_relevant() - return self._config.get(key, default) - - def get_all(self): - self.ensure_auto_config_is_relevant() - return self._config - - def update(self, data: dict, overwrite: bool = False) -> None: - """ - Update values in `auto` config. - Args: - data (dict): data to update in `auto` config. - overwrite (bool): if True, overwrite existing keys, otherwise merge them. - - False (default): Merge recursively. Existing nested dictionaries are preserved - and only the specified keys in `data` are updated. - - True: Overwrite completely. Existing keys are replaced entirely with values - from `data`, discarding any nested structure not present in `data`. - """ - self.ensure_auto_config_is_relevant() - - if overwrite: - _overwrite_configs(self._auto_config, data) - else: - _merge_configs(self._auto_config, data) - - self.auto_config_path.write_text(json.dumps(self._auto_config, indent=4)) - - self.auto_config_mtime = self.auto_config_path.stat().st_mtime - - self.merge_configs() - - def raise_warnings(self, logger) -> None: - """Show warnings about config options""" - - if "storage_dir" in self._config: - logger.warning("The 'storage_dir' config option is no longer supported. Use 'paths.root' instead.") - - if "log" in self._config: - logger.warning("The 'log' config option is no longer supported. Use 'logging' instead.") - - file_upload_domains = self._config.get("file_upload_domains") - if isinstance(file_upload_domains, list) and len(file_upload_domains) > 0: - allowed_origins = self._config["url_file_upload"]["allowed_origins"] - if isinstance(allowed_origins, list) and len(allowed_origins) == 0: - self._config["url_file_upload"]["allowed_origins"] = file_upload_domains - logger.warning( - 'Config option "file_upload_domains" is deprecated, ' - 'use config["url_file_upload"]["allowed_origins"] instead.' - ) - - @property - def cmd_args(self): - if self._cmd_args is None: - self.parse_cmd_args() - return self._cmd_args - - def parse_cmd_args(self) -> None: - """Collect cmd args to self._cmd_args (accessable as self.cmd_args)""" - if self._cmd_args is not None: - return - - # if it is not mindsdb run, then set args to empty - if (sys.modules["__main__"].__package__ or "").lower() != "mindsdb" and os.environ.get( - "MINDSDB_RUNTIME" - ) != "1": - self._cmd_args = argparse.Namespace( - api=None, - config=None, - install_handlers=None, - verbose=False, - no_studio=False, - version=False, - ml_task_queue_consumer=None, - agent=None, - project=None, - update_gui=False, - mcp_stdio=False, - ) - return - - parser = argparse.ArgumentParser(description="CL argument for mindsdb server") - parser.add_argument("--api", type=str, default=None) - parser.add_argument("--config", type=str, default=None) - parser.add_argument("--install-handlers", type=str, default=None) - parser.add_argument("--verbose", action="store_true") - parser.add_argument("--no_studio", action="store_true") - parser.add_argument("-v", "--version", action="store_true") - parser.add_argument("--ml_task_queue_consumer", action="store_true", default=None) - parser.add_argument( - "--agent", - type=str, - default=None, - help="Name of the agent to use with litellm APIs", - ) - parser.add_argument( - "--project", - type=str, - default=None, - help="Project containing the agent (default: mindsdb)", - ) - - parser.add_argument("--project-name", type=str, default=None, help="MindsDB project name") - parser.add_argument("--update-gui", action="store_true", default=False, help="Update GUI and exit") - parser.add_argument("--mcp-stdio", action="store_true", default=False, help="Run MCP with STDIO transport") - - self._cmd_args = parser.parse_args() - - @property - def paths(self): - return self._config["paths"] - - @property - def user_config(self): - return self._user_config - - @property - def auto_config(self): - return self._auto_config - - @property - def env_config(self): - return self._env_config - - @property - def is_cloud(self): - return self._config.get("cloud", False) - - -config = Config() diff --git a/mindsdb/utilities/constants.py b/mindsdb/utilities/constants.py deleted file mode 100644 index 261ec719f45..00000000000 --- a/mindsdb/utilities/constants.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Global constants for MindsDB. -These values should not be user-configurable. -""" - -# Default identifiers for single-tenant/local deployments -DEFAULT_COMPANY_ID = "00000000-0000-0000-0000-000000000000" -DEFAULT_USER_ID = "00000000-0000-0000-0000-000000000000" diff --git a/mindsdb/utilities/context.py b/mindsdb/utilities/context.py deleted file mode 100644 index 73dedb4e60a..00000000000 --- a/mindsdb/utilities/context.py +++ /dev/null @@ -1,88 +0,0 @@ -from contextvars import ContextVar -from typing import Any -from copy import deepcopy -from contextlib import contextmanager - -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID - - -class Context: - """Thread independent storage""" - - __slots__ = ("_storage",) - - def __init__(self, storage) -> None: - object.__setattr__(self, "_storage", storage) - self.set_default() - - def set_default(self) -> None: - self._storage.set( - { - "company_id": DEFAULT_COMPANY_ID, - "user_id": DEFAULT_USER_ID, - # When True, DB queries should be scoped by ctx.user_id (in addition to company_id). - # Services can intentionally disable this to perform company-wide reads and apply - # their own permissioning layer on top. - "enforce_user_id": True, - "session_id": "", - "task_id": None, - "user_class": 0, - "profiling": { - "level": 0, - "enabled": False, - "pointer": None, - "tree": None, - }, - "used_handlers": set(), - "params": {}, - } - ) - - @contextmanager - def without_user_id_scope(self): - """Temporarily disable user_id scoping in this context.""" - previous = getattr(self, "enforce_user_id", True) - self.enforce_user_id = False - try: - yield - finally: - self.enforce_user_id = previous - - def __getattr__(self, name: str) -> Any: - storage = self._storage.get({}) - if name not in storage: - raise AttributeError(name) - return storage[name] - - def __setattr__(self, name: str, value: Any) -> None: - storage = deepcopy(self._storage.get({})) - storage[name] = value - self._storage.set(storage) - - def __delattr__(self, name: str) -> None: - storage = deepcopy(self._storage.get({})) - if name not in storage: - raise AttributeError(name) - del storage[name] - self._storage.set(storage) - - def dump(self) -> dict: - storage = deepcopy(self._storage.get({})) - return storage - - def load(self, storage: dict) -> None: - self._storage.set(storage) - - def get_metadata(self, **kwargs) -> dict: - return { - "company_id": self.company_id if self.company_id else DEFAULT_COMPANY_ID, - "user_id": self.user_id if self.user_id else DEFAULT_USER_ID, - "session_id": self.session_id, - "enforce_user_id": self.enforce_user_id, - "user_class": self.user_class, - **kwargs, - } - - -_context_var = ContextVar("mindsdb.context") -context = Context(_context_var) diff --git a/mindsdb/utilities/context_executor.py b/mindsdb/utilities/context_executor.py deleted file mode 100644 index b75a8b9c78e..00000000000 --- a/mindsdb/utilities/context_executor.py +++ /dev/null @@ -1,59 +0,0 @@ -import time -import types -from concurrent.futures import ThreadPoolExecutor -import contextvars - - -class ContextThreadPoolExecutor(ThreadPoolExecutor): - '''Handles copying context variables to threads created by ThreadPoolExecutor''' - def __init__(self, max_workers=None): - self.context = contextvars.copy_context() - # ThreadPoolExecutor does not propagate context to threads by default, so we need a custom initializer. - super().__init__(max_workers=max_workers, initializer=self._set_child_context) - - def _set_child_context(self): - for var, value in self.context.items(): - var.set(value) - - -def execute_in_threads(func, tasks, thread_count=3, queue_size_k=1.5): - """ - Should be used as generator. - Can accept input tasks as generator and keep queue size the same to not overflow the RAM - - :param func: callable, function to execute in threads - :param tasks: generator or iterable, list of input for function - :param thread_count: number of threads - :param queue_size_k: how a queue for workers is bigger than count of threads - :return: yield results - """ - executor = ContextThreadPoolExecutor(max_workers=thread_count) - - queue_size = int(thread_count * queue_size_k) - - if not isinstance(tasks, types.GeneratorType): - tasks = iter(tasks) - - futures = None - while futures is None or len(futures) > 0: - if futures is None: - futures = [] - - # add new portion - for i in range(queue_size): - try: - args = next(tasks) - futures.append(executor.submit(func, args)) - except StopIteration: - break - - # save results - for task in futures: - if task.done(): - yield task.result() - - # remove completed tasks - futures[:] = [t for t in futures if not t.done()] - - time.sleep(0.1) - executor.shutdown(wait=False) diff --git a/mindsdb/utilities/exception.py b/mindsdb/utilities/exception.py deleted file mode 100644 index 39f3adbaea0..00000000000 --- a/mindsdb/utilities/exception.py +++ /dev/null @@ -1,119 +0,0 @@ -from textwrap import indent - - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import ERR - - -class MindsDBError(Exception): - pass - - -class BaseEntityException(MindsDBError): - """Base exception for entitys errors - - Attributes: - message (str): error message - entity_name (str): entity name - """ - - def __init__(self, message: str, entity_name: str = None) -> None: - self.message = message - self.entity_name = entity_name or "unknown" - - def __str__(self) -> str: - return f"{self.message}: {self.entity_name}" - - -class EntityExistsError(BaseEntityException): - """Raise when entity exists, but should not""" - - def __init__(self, message: str = None, entity_name: str = None) -> None: - if message is None: - message = "Entity exists error" - super().__init__(message, entity_name) - - -class EntityNotExistsError(BaseEntityException): - """Raise when entity not exists, but should""" - - def __init__(self, message: str = None, entity_name: str = None) -> None: - if message is None: - message = "Entity does not exists error" - super().__init__(message, entity_name) - - -class ParsingError(MindsDBError): - pass - - -class QueryError(MindsDBError): - def __init__( - self, - db_name: str | None = None, - db_type: str | None = None, - db_error_msg: str | None = None, - failed_query: str | None = None, - is_external: bool = True, - is_expected: bool = False, - ) -> None: - self.mysql_error_code = ERR.ER_UNKNOWN_ERROR - self.db_name = db_name - self.db_type = db_type - self.db_error_msg = db_error_msg - self.failed_query = failed_query - self.is_external = is_external - self.is_expected = is_expected - - def __str__(self) -> str: - return format_db_error_message( - db_name=self.db_name, - db_type=self.db_type, - db_error_msg=self.db_error_msg, - failed_query=self.failed_query, - is_external=self.is_external, - ) - - -def format_db_error_message( - db_name: str | None = None, - db_type: str | None = None, - db_error_msg: str | None = None, - failed_query: str | None = None, - is_external: bool = True, -) -> str: - """Format the error message for the database query. - - Args: - db_name (str | None): The name of the database. - db_type (str | None): The type of the database. - db_error_msg (str | None): The error message. - failed_query (str | None): The failed query. - is_external (bool): True if error appeared in external database, False if in internal duckdb - - Returns: - str: The formatted error message. - """ - error_message = "Failed to execute external database query during query processing." - if is_external: - error_message = ( - "An error occurred while executing a derived query on the external " - "database during processing of your original SQL query." - ) - else: - error_message = ( - "An error occurred while processing an internally generated query derived from your original SQL statement." - ) - if db_name is not None or db_type is not None: - error_message += "\n\nDatabase Details:" - if db_name is not None: - error_message += f"\n- Name: {db_name}" - if db_type is not None: - error_message += f"\n- Type: {db_type}" - - if db_error_msg is not None: - error_message += f"\n\nError:\n{indent(db_error_msg, ' ')}" - - if failed_query is not None: - error_message += f"\n\nFailed Query:\n{indent(failed_query, ' ')}" - - return error_message diff --git a/mindsdb/utilities/fs.py b/mindsdb/utilities/fs.py deleted file mode 100644 index dbed99cc8be..00000000000 --- a/mindsdb/utilities/fs.py +++ /dev/null @@ -1,345 +0,0 @@ -import os -import sys -import json -import time -import tempfile -import threading -from pathlib import Path -from typing import Generator - -import tarfile -import zipfile - -import psutil - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -def get_tmp_dir() -> Path: - return Path(tempfile.gettempdir()).joinpath("mindsdb") - - -def _get_process_mark_id(unified: bool = False) -> str: - """Creates a text that can be used to identify process+thread - Args: - unified: bool, if True then result will be same for same process+thread - Returns: - mark of process+thread - """ - mark = f"{os.getpid()}-{threading.get_native_id()}" - if unified is True: - return mark - return f"{mark}-{str(time.time()).replace('.', '')}" - - -def create_process_mark(folder="learn"): - p = get_tmp_dir().joinpath(f"processes/{folder}/") - p.mkdir(parents=True, exist_ok=True) - mark = _get_process_mark_id() - p.joinpath(mark).touch() - return mark - - -def set_process_mark(folder: str, mark: str) -> None: - """touch new file which will be process mark - - Args: - folder (str): where create the file - mark (str): file name - - Returns: - str: process mark - """ - p = get_tmp_dir().joinpath(f"processes/{folder}/") - p.mkdir(parents=True, exist_ok=True) - mark = f"{os.getpid()}-{threading.get_native_id()}-{mark}" - p.joinpath(mark).touch() - return mark - - -def delete_process_mark(folder: str = "learn", mark: str | None = None): - if mark is None: - mark = _get_process_mark_id() - p = get_tmp_dir().joinpath(f"processes/{folder}/").joinpath(mark) - p.unlink(missing_ok=True) - - -def clean_process_marks(): - """delete all existing processes marks""" - logger.debug("Deleting PIDs..") - p = get_tmp_dir().joinpath("processes/") - if p.exists() is False: - return - for path in p.iterdir(): - if path.is_dir() is False: - return - for file in path.iterdir(): - file.unlink(missing_ok=True) - - -def get_processes_dir_files_generator() -> Generator[tuple[Path, int, int], None, None]: - """Get files from processes dir - - Yields: - tuple(Path, int, int): file object, process id and thread id - """ - p = get_tmp_dir().joinpath("processes/") - if p.exists() is False: - return - for path in p.iterdir(): - if path.is_dir() is False: - continue - for file in path.iterdir(): - parts = file.name.split("-") - process_id = int(parts[0]) - thread_id = int(parts[1]) - yield file, process_id, thread_id - - -def clean_unlinked_process_marks() -> list[int]: - """delete marks that does not have corresponded processes/threads - - Returns: - list[int]: list with ids of unexisting processes - """ - deleted_pids = [] - - for file, process_id, thread_id in get_processes_dir_files_generator(): - try: - process = psutil.Process(process_id) - if process.status() in (psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD): - raise psutil.NoSuchProcess(process_id) - - threads = process.threads() - try: - next(t for t in threads if t.id == thread_id) - except StopIteration: - logger.warning(f"We have mark for process/thread {process_id}/{thread_id} but it does not exists") - deleted_pids.append(process_id) - file.unlink(missing_ok=True) - - except psutil.AccessDenied: - logger.warning(f"access to {process_id} denied") - continue - - except psutil.NoSuchProcess: - logger.warning(f"We have mark for process/thread {process_id}/{thread_id} but it does not exists") - deleted_pids.append(process_id) - file.unlink(missing_ok=True) - return deleted_pids - - -class PidFileLock: - """Cross-platform exclusive file lock context manager. - Uses fcntl.flock on Unix and msvcrt.locking on Windows. - - Attributes: - _lock_file_path (Path): path to lock file - _blocking (bool): if True, waits until the lock becomes available, otherwise raises OSError immediately if lock is held - _fh (int): lock file descriptor - """ - - def __init__(self, lock_file_path: Path, blocking: bool = True): - self._lock_file_path = lock_file_path - self._blocking = blocking - self._fh = None - - def __enter__(self): - self._lock_file_path.parent.mkdir(parents=True, exist_ok=True) - self._fh = open(self._lock_file_path, "a+") - try: - if sys.platform == "win32": - import msvcrt - - # NOTE if file is locked, LK_LOCK will raise OSError after 10 seconds, LK_NBLCK immediately - mode = msvcrt.LK_LOCK if self._blocking else msvcrt.LK_NBLCK - self._fh.seek(0) - msvcrt.locking(self._fh.fileno(), mode, 1) - else: - import fcntl - - flags = fcntl.LOCK_EX - if not self._blocking: - flags |= fcntl.LOCK_NB - fcntl.flock(self._fh.fileno(), flags) - except (OSError, IOError): - self._fh.close() - self._fh = None - logger.error(f"Failed to acquire lock on {self._lock_file_path}") - raise - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if self._fh is None: - return False - try: - if sys.platform == "win32": - import msvcrt - - self._fh.seek(0) - msvcrt.locking(self._fh.fileno(), msvcrt.LK_UNLCK, 1) - else: - import fcntl - - fcntl.flock(self._fh.fileno(), fcntl.LOCK_UN) - except (OSError, IOError): - pass - finally: - try: - self._fh.close() - except (OSError, IOError): - pass - self._fh = None - return False - - -def create_pid_file(config): - """ - Create mindsdb process pid file. Check if previous process exists and is running - If pid_file_content is provided, it will be used to create the pid file with the content as key-value pairs. - If pid_file_content is not provided, the pid file will be created with the pid number only. - """ - - if os.environ.get("USE_PIDFILE") != "1": - return - - p = get_tmp_dir() - p.mkdir(parents=True, exist_ok=True) - pid_file = p.joinpath("pid") - lock_file = p.joinpath("pid.lock") - - with PidFileLock(lock_file): - if pid_file.exists(): - pid_file_data_str = pid_file.read_text().strip() - pid = None - try: - pid_file_data = json.loads(pid_file_data_str) - if isinstance(pid_file_data, dict): - pid = pid_file_data.get("pid") - else: - pid = pid_file_data - except json.JSONDecodeError: - try: - pid = int(pid_file_data_str) - except Exception: - pass - logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON, removing") - - if pid is not None: - try: - psutil.Process(int(pid)) - raise Exception(f"Found PID file with existing process: {pid} {pid_file}") - except (psutil.Error, ValueError): - pass - logger.warning(f"Found existing PID file {pid_file}({pid}), removing") - - pid_file.unlink(missing_ok=True) - - pid_file_content = config["pid_file_content"] - if pid_file_content is None or len(pid_file_content) == 0: - pid_file_data_str = str(os.getpid()) - else: - pid_file_data = {"pid": os.getpid()} - for key, value in pid_file_content.items(): - value_path = value.split(".") - value_obj = config - for path_part in value_path: - value_obj = value_obj.get(path_part) if value_obj else None - pid_file_data[key] = value_obj - - pid_file_data_str = json.dumps(pid_file_data) - pid_file.write_text(pid_file_data_str) - - -def delete_pid_file(): - """ - Remove existing process pid file if it matches current process - """ - - if os.environ.get("USE_PIDFILE") != "1": - return - - pid_file = get_tmp_dir().joinpath("pid") - lock_file = get_tmp_dir().joinpath("pid.lock") - - with PidFileLock(lock_file): - if not pid_file.exists(): - return - - pid_file_data_str = pid_file.read_text().strip() - pid = None - try: - pid_file_data = json.loads(pid_file_data_str) - if isinstance(pid_file_data, dict): - pid = pid_file_data.get("pid") - else: - # It's a simple number (old format or pid_file_content=None format) - pid = pid_file_data - except json.JSONDecodeError: - logger.warning(f"Found existing PID file {pid_file} but it is not a valid JSON") - - if pid is not None and str(pid) != str(os.getpid()): - logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid") - return - - pid_file.unlink(missing_ok=True) - - -def __is_within_directory(directory, target): - abs_directory = os.path.realpath(directory) - abs_target = os.path.realpath(target) - try: - return os.path.commonpath([abs_directory, abs_target]) == abs_directory - except ValueError: - # can be raised on windows - return False - - -def __get_tar_members(archivefile, members): - if members is None: - return archivefile.getmembers() - - resolved_members = [] - for member in members: - if isinstance(member, tarfile.TarInfo): - resolved_members.append(member) - else: - resolved_members.append(archivefile.getmember(member)) - return resolved_members - - -def safe_extract(archivefile, path=".", members=None, *, numeric_owner=False): - """ - Safely extract an archivefile, preventing path traversal attacks. - """ - if isinstance(archivefile, zipfile.ZipFile): - for member in archivefile.namelist(): - member_path = os.path.join(path, member) - if not __is_within_directory(path, member_path): - raise Exception("Attempted Path Traversal in Zip File") - archivefile.extractall(path, members) - return - - if isinstance(archivefile, tarfile.TarFile): - # for py >= 3.12 - if hasattr(archivefile, "data_filter"): - archivefile.extractall(path, members=members, numeric_owner=numeric_owner, filter="data") - return - - # for py < 3.12 - for member in __get_tar_members(archivefile, members): - if member.issym() or member.islnk(): - raise Exception(f"Security Alert: Link entries are not allowed in tar file: {member.name}") - - if not (member.isfile() or member.isdir()): - raise Exception(f"Security Alert: Unsupported tar member type detected for member: {member.name}") - - member_path = os.path.join(path, member.name) - if not __is_within_directory(path, member_path): - raise Exception( - f"Security Alert: Attempted path traversal in tar file detected for member: {member.name}" - ) - - archivefile.extract(member, path=path, numeric_owner=numeric_owner) diff --git a/mindsdb/utilities/functions.py b/mindsdb/utilities/functions.py deleted file mode 100644 index 11967a1b292..00000000000 --- a/mindsdb/utilities/functions.py +++ /dev/null @@ -1,181 +0,0 @@ -import os -import base64 -import hashlib -import json -import datetime -import textwrap -from contextlib import ContextDecorator - -from cryptography.fernet import Fernet -from mindsdb_sql_parser.ast import Identifier - -from mindsdb.utilities.fs import create_process_mark, delete_process_mark, set_process_mark -from mindsdb.utilities import log -from mindsdb.utilities.config import Config - - -logger = log.getLogger(__name__) - - -def get_handler_install_message(handler_name): - if Config().use_docker_env: - container_id = os.environ.get("HOSTNAME", "") - return textwrap.dedent(f"""\ - To install the {handler_name} handler, run the following in your terminal outside the docker container - ({container_id} is the ID of this container): - - docker exec {container_id} pip install 'mindsdb[{handler_name}]'""") - else: - return textwrap.dedent(f"""\ - To install the {handler_name} handler, run the following in your terminal: - - pip install 'mindsdb[{handler_name}]' # If you installed mindsdb via pip - pip install '.[{handler_name}]' # If you installed mindsdb from source""") - - -def cast_row_types(row, field_types): - """ """ - keys = [x for x in row.keys() if x in field_types] - for key in keys: - t = field_types[key] - if t == "Timestamp" and isinstance(row[key], (int, float)): - timestamp = datetime.datetime.fromtimestamp(row[key], datetime.timezone.utc) - row[key] = timestamp.strftime("%Y-%m-%d %H:%M:%S") - elif t == "Date" and isinstance(row[key], (int, float)): - timestamp = datetime.datetime.fromtimestamp(row[key], datetime.timezone.utc) - row[key] = timestamp.strftime("%Y-%m-%d") - elif t == "Int" and isinstance(row[key], (int, float, str)): - try: - logger.debug(f"cast {row[key]} to {int(row[key])}") - row[key] = int(row[key]) - except Exception: - pass - - -class mark_process(ContextDecorator): - def __init__(self, name: str, custom_mark: str = None): - self.name = name - self.custom_mark = custom_mark - self.mark = None - - def __enter__(self): - if self.custom_mark is None: - self.mark = create_process_mark(self.name) - else: - self.mark = set_process_mark(self.name, self.custom_mark) - - def __exit__(self, exc_type, exc, tb): - delete_process_mark(self.name, self.mark) - return False - - -def init_lexer_parsers(): - from mindsdb_sql_parser.lexer import MindsDBLexer - from mindsdb_sql_parser.parser import MindsDBParser - - return MindsDBLexer(), MindsDBParser() - - -def resolve_table_identifier(identifier: Identifier, default_database: str = None) -> tuple: - parts = identifier.parts - - parts_count = len(parts) - if parts_count == 1: - return (None, parts[0]) - elif parts_count == 2: - return (parts[0], parts[1]) - else: - raise Exception(f"Table identifier must contain max 2 parts: {parts}") - - -def resolve_model_identifier(identifier: Identifier) -> tuple: - """ - Splits a model identifier into its database, model name, and version components. - - The identifier may contain one, two, or three parts. - The function supports both quoted and unquoted identifiers, and normalizes names to lowercase if unquoted. - - Examples: - >>> resolve_model_identifier(Identifier(parts=['a', 'b'])) - ('a', 'b', None) - >>> resolve_model_identifier(Identifier(parts=['a', '1'])) - (None, 'a', 1) - >>> resolve_model_identifier(Identifier(parts=['a'])) - (None, 'a', None) - >>> resolve_model_identifier(Identifier(parts=['a', 'b', 'c'])) - (None, None, None) # not found - - Args: - identifier (Identifier): The identifier object containing parts and is_quoted attributes. - - Returns: - tuple: (database_name, model_name, model_version) - - database_name (str or None): The name of the database/project, or None if not specified. - - model_name (str or None): The name of the model, or None if not found. - - model_version (int or None): The model version as an integer, or None if not specified. - """ - model_name = None - db_name = None - version = None - model_name_quoted = None - db_name_quoted = None - - match identifier.parts, identifier.is_quoted: - case [model_name], [model_name_quoted]: - ... - case [model_name, str(version)], [model_name_quoted, _] if version.isdigit(): - ... - case [model_name, int(version)], [model_name_quoted, _]: - ... - case [db_name, model_name], [db_name_quoted, model_name_quoted]: - ... - case [db_name, model_name, str(version)], [db_name_quoted, model_name_quoted, _] if version.isdigit(): - ... - case [db_name, model_name, int(version)], [db_name_quoted, model_name_quoted, _]: - ... - case [db_name, model_name, str(version)], [db_name_quoted, model_name_quoted, _]: - # for back compatibility. May be delete? - return (None, None, None) - case _: - ... # may be raise ValueError? - - if model_name_quoted is False: - model_name = model_name.lower() - - if db_name_quoted is False: - db_name = db_name.lower() - - if isinstance(version, int) or isinstance(version, str) and version.isdigit(): - version = int(version) - else: - version = None - - return db_name, model_name, version - - -def encrypt(string: bytes, key: str) -> bytes: - hashed_string = hashlib.sha256(key.encode()).digest() - - fernet_key = base64.urlsafe_b64encode(hashed_string) - - cipher = Fernet(fernet_key) - return cipher.encrypt(string) - - -def decrypt(encripted: bytes, key: str) -> bytes: - hashed_string = hashlib.sha256(key.encode()).digest() - - fernet_key = base64.urlsafe_b64encode(hashed_string) - - cipher = Fernet(fernet_key) - return cipher.decrypt(encripted) - - -def encrypt_json(data: dict, key: str) -> bytes: - json_str = json.dumps(data) - return encrypt(json_str.encode(), key) - - -def decrypt_json(encrypted_data: bytes, key: str) -> dict: - decrypted = decrypt(encrypted_data, key) - return json.loads(decrypted) diff --git a/mindsdb/utilities/hooks/__init__.py b/mindsdb/utilities/hooks/__init__.py deleted file mode 100644 index b644a739971..00000000000 --- a/mindsdb/utilities/hooks/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -def empty_fn(*args, **kwargs): - pass - - -try: - from mindsdb.utilities.hooks.after_predict import after_predict -except ImportError: - after_predict = empty_fn - - -try: - from mindsdb.utilities.hooks.after_api_query import after_api_query -except ImportError: - after_api_query = empty_fn - after_palm_query = empty_fn - - -try: - from mindsdb.utilities.hooks.profiling import send_profiling_results -except ImportError: - send_profiling_results = empty_fn - - -try: - from mindsdb.utilities.hooks.openai_query import before_openai_query -except ImportError: - before_openai_query = empty_fn - before_palm_query = empty_fn - -try: - from mindsdb.utilities.hooks.openai_query import after_openai_query -except ImportError: - after_openai_query = empty_fn - after_palm_query = empty_fn diff --git a/mindsdb/utilities/hooks/profiling.py b/mindsdb/utilities/hooks/profiling.py deleted file mode 100644 index a8512cac46f..00000000000 --- a/mindsdb/utilities/hooks/profiling.py +++ /dev/null @@ -1,78 +0,0 @@ -import os -import json - -import psycopg - -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -MINDSDB_PROFILING_ENABLED = os.environ.get("MINDSDB_PROFILING_ENABLED") in ("1", "true") -MINDSDB_PROFILING_DB_HOST = os.environ.get("MINDSDB_PROFILING_DB_HOST") -MINDSDB_PROFILING_DB_USER = os.environ.get("MINDSDB_PROFILING_DB_USER") -MINDSDB_PROFILING_DB_PASSWORD = os.environ.get("MINDSDB_PROFILING_DB_PASSWORD") - - -def set_level(node, level, internal_id): - internal_id["id"] += 1 - node["level"] = level - node["value"] = node["stop_at"] - node["start_at"] - node["value_thread"] = node["stop_at_thread"] - node["start_at_thread"] - node["value_process"] = node["stop_at_process"] - node["start_at_process"] - node["internal_id"] = internal_id["id"] - - accum = 0 - for child_node in node["children"]: - set_level(child_node, level + 1, internal_id) - accum += child_node["value"] - node["self"] = node["value"] - accum - - -def send_profiling_results(profiling_data: dict): - if MINDSDB_PROFILING_ENABLED is False: - return - - profiling = profiling_data - set_level(profiling["tree"], 0, {"id": 0}) - - time_start_at = profiling["tree"]["time_start_at"] - del profiling["tree"]["time_start_at"] - - try: - connection = psycopg.connect( - host=MINDSDB_PROFILING_DB_HOST, - port=5432, - user=MINDSDB_PROFILING_DB_USER, - password=MINDSDB_PROFILING_DB_PASSWORD, - dbname="postgres", - connect_timeout=5, - ) - except Exception: - logger.warning("cant get acceess to profiling database") - return - cur = connection.cursor() - cur.execute( - """ - insert into profiling - (data, query, time, hostname, environment, api, total_time, company_id, user_id, instance_id) - values - (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) - """, - ( - json.dumps(profiling["tree"]), - profiling.get("query", "?"), - time_start_at, - profiling["hostname"], - profiling.get("environment", "?"), - profiling.get("api", "?"), - profiling["tree"]["value"], - profiling["company_id"], - profiling["user_id"], - profiling["instance_id"], - ), - ) - - connection.commit() - cur.close() - connection.close() diff --git a/mindsdb/utilities/json_encoder.py b/mindsdb/utilities/json_encoder.py deleted file mode 100644 index 7a08825d3a7..00000000000 --- a/mindsdb/utilities/json_encoder.py +++ /dev/null @@ -1,47 +0,0 @@ -from datetime import datetime, date, timedelta -from decimal import Decimal -import pandas as pd -import numpy as np -import orjson -from flask.json.provider import DefaultJSONProvider - - -class CustomJSONEncoder: - def default(self, obj): - if isinstance(obj, timedelta): - return str(obj) - if isinstance(obj, datetime): - return obj.strftime("%Y-%m-%d %H:%M:%S.%f") - if isinstance(obj, date): - return obj.strftime("%Y-%m-%d") - if isinstance(obj, Decimal): - return float(obj) - if isinstance(obj, np.bool_): - return bool(obj) - if isinstance(obj, np.ndarray): - return obj.tolist() - if pd.isnull(obj): - return None - - return str(obj) - - -class ORJSONProvider(DefaultJSONProvider): - """ - Use orjson to serialize data instead of flask json provider. - """ - - def dumps(self, obj, **kwargs): - return orjson.dumps( - obj, - option=( - orjson.OPT_SERIALIZE_NUMPY - | orjson.OPT_NON_STR_KEYS - # keep this for using CustomJSON encoder - | orjson.OPT_PASSTHROUGH_DATETIME - ), - default=CustomJSONEncoder().default, - ).decode("utf-8") - - def loads(self, s, **kwargs): - return orjson.loads(s) diff --git a/mindsdb/utilities/langfuse.py b/mindsdb/utilities/langfuse.py deleted file mode 100644 index 92320c48d5e..00000000000 --- a/mindsdb/utilities/langfuse.py +++ /dev/null @@ -1,293 +0,0 @@ -import os -import typing -from typing import TYPE_CHECKING - -from mindsdb.utilities import log - -if TYPE_CHECKING: - from langfuse._client.span import LangfuseSpan - from langfuse.langchain import CallbackHandler - -logger = log.getLogger(__name__) - -# Define Langfuse public key. -LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY", "langfuse_public_key") - -# Define Langfuse secret key. -LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY", "langfuse_secret_key") - -# Define Langfuse host. -LANGFUSE_HOST = os.getenv("LANGFUSE_HOST", "http://localhost:3000") - -# Define Langfuse environment. -LANGFUSE_ENVIRONMENT = os.getenv("LANGFUSE_ENVIRONMENT", "local") - -# Define Langfuse release. -LANGFUSE_RELEASE = os.getenv("LANGFUSE_RELEASE", "local") - -# Define Langfuse debug mode. -LANGFUSE_DEBUG = os.getenv("LANGFUSE_DEBUG", "false").lower() == "true" - -# Define Langfuse timeout. -LANGFUSE_TIMEOUT = int(os.getenv("LANGFUSE_TIMEOUT", 10)) - -# Define Langfuse sample rate. -LANGFUSE_SAMPLE_RATE = float(os.getenv("LANGFUSE_SAMPLE_RATE", 1.0)) - -# Define if Langfuse is disabled. -LANGFUSE_DISABLED = os.getenv("LANGFUSE_DISABLED", "false").lower() == "true" or LANGFUSE_ENVIRONMENT == "local" -LANGFUSE_FORCE_RUN = os.getenv("LANGFUSE_FORCE_RUN", "false").lower() == "true" - - -class LangfuseClientWrapper: - """ - Langfuse client wrapper. Defines Langfuse client configuration and initializes Langfuse client. - """ - - def __init__( - self, - public_key: str = LANGFUSE_PUBLIC_KEY, - secret_key: str = LANGFUSE_SECRET_KEY, - host: str = LANGFUSE_HOST, - environment: str = LANGFUSE_ENVIRONMENT, - release: str = LANGFUSE_RELEASE, - debug: bool = LANGFUSE_DEBUG, - timeout: int = LANGFUSE_TIMEOUT, - sample_rate: float = LANGFUSE_SAMPLE_RATE, - disable: bool = LANGFUSE_DISABLED, - force_run: bool = LANGFUSE_FORCE_RUN, - ) -> None: - """ - Initialize Langfuse client. - - Args: - public_key (str): Langfuse public key. - secret_key (str): Langfuse secret key. - host (str): Langfuse host. - release (str): Langfuse release. - timeout (int): Langfuse timeout. - sample_rate (float): Langfuse sample rate. - """ - - self.metadata = None - self.public_key = public_key - self.secret_key = secret_key - self.host = host - self.environment = environment - self.release = release - self.debug = debug - self.timeout = timeout - self.sample_rate = sample_rate - self.disable = disable - self.force_run = force_run - - self.client = None - self.trace = None - self.metadata = None - self.tags = None - - # Check if Langfuse is disabled. - if LANGFUSE_DISABLED and not LANGFUSE_FORCE_RUN: - logger.info("Langfuse is disabled.") - return - - logger.info("Langfuse enabled") - logger.debug(f"LANGFUSE_PUBLIC_KEY: {LANGFUSE_PUBLIC_KEY}") - logger.debug(f"LANGFUSE_SECRET_KEY: {'*' * len(LANGFUSE_SECRET_KEY)}") - logger.debug(f"LANGFUSE_HOST: {LANGFUSE_HOST}") - logger.debug(f"LANGFUSE_ENVIRONMENT: {LANGFUSE_ENVIRONMENT}") - logger.debug(f"LANGFUSE_RELEASE: {LANGFUSE_RELEASE}") - logger.debug(f"LANGFUSE_DEBUG: {LANGFUSE_DEBUG}") - logger.debug(f"LANGFUSE_TIMEOUT: {LANGFUSE_TIMEOUT}") - logger.debug(f"LANGFUSE_SAMPLE_RATE: {LANGFUSE_SAMPLE_RATE * 100}%") - - try: - from langfuse import Langfuse - except ImportError: - logger.error("Langfuse is not installed. Please install it with `pip install langfuse`.") - return - - self.client = Langfuse( - public_key=public_key, - secret_key=secret_key, - host=host, - environment=environment, - release=release, - debug=debug, - timeout=timeout, - sample_rate=sample_rate, - ) - - def setup_trace( - self, - name: str, - input: typing.Optional[typing.Any] = None, - tags: typing.Optional[typing.List] = None, - metadata: typing.Optional[typing.Dict] = None, - user_id: str = None, - session_id: str = None, - ) -> None: - """ - Setup trace. If Langfuse is disabled, nothing will be done. - Args: - name (str): Trace name. - input (dict): Trace input. - tags (dict): Trace tags. - metadata (dict): Trace metadata. - user_id (str): User ID. - session_id (str): Session ID. - """ - - if self.client is None: - logger.debug("Langfuse is disabled.") - return - - self.set_metadata(metadata) - self.set_tags(tags) - - try: - # SDK v3+: root observation is a span; trace attributes are set via update_trace. - self.trace = self.client.start_span(name=name, input=input, metadata=self.metadata) - self.trace.update_trace(tags=self.tags, user_id=user_id, session_id=session_id) - except Exception: - logger.exception("Something went wrong while creating Langfuse trace") - return - - logger.info(f"Langfuse trace configured with ID: {self.trace.trace_id}") - - def get_trace_id(self) -> typing.Optional[str]: - """ - Get trace ID. If Langfuse is disabled, returns None. - """ - - if self.client is None: - logger.debug("Langfuse is disabled.") - return "" - - if self.trace is None: - logger.debug("Langfuse trace is not setup.") - return "" - - return self.trace.trace_id - - def start_span(self, name: str, input: typing.Optional[typing.Any] = None) -> typing.Optional["LangfuseSpan"]: - """ - Create span. If Langfuse is disabled, nothing will be done. - - Args: - name (str): Span name. - input (dict): Span input. - """ - - if self.client is None: - logger.debug("Langfuse is disabled.") - return None - - return self.trace.start_span(name=name, input=input) - - def end_span_stream(self, span: typing.Optional["LangfuseSpan"] = None) -> None: - """ - End span. If Langfuse is disabled, nothing will happen. - Args: - span (Any): Span object. - """ - - if self.client is None: - logger.debug("Langfuse is disabled.") - return - - span.end() - self.client.flush() - - def end_span( - self, span: typing.Optional["LangfuseSpan"] = None, output: typing.Optional[typing.Any] = None - ) -> None: - """ - End trace. If Langfuse is disabled, nothing will be done. - - Args: - span (Any): Span object. - output (Any): Span output. - """ - - if self.client is None: - logger.debug("Langfuse is disabled.") - return - - if span is None: - logger.debug("Langfuse span is not created.") - return - - if output is not None: - span.update(output=output) - span.end() - self.trace.update_trace(output=output) - - metadata = self.metadata or {} - - try: - # Ensure all batched traces are sent before fetching. - self.client.flush() - metadata["tool_usage"] = self._get_tool_usage() - self.trace.update_trace(metadata=metadata) - except Exception: - logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.trace_id}:") - - def get_langchain_handler(self) -> typing.Optional["CallbackHandler"]: - """ - Get Langchain handler. If Langfuse is disabled, returns None. - """ - - if self.client is None: - logger.debug("Langfuse is disabled.") - return None - - try: - from langfuse.langchain import CallbackHandler - except ImportError: - logger.debug("langfuse.langchain CallbackHandler is not available (install langchain extra if needed).") - return None - - return CallbackHandler(public_key=self.public_key) - - def set_metadata(self, custom_metadata: dict = None) -> None: - """ - Get default metadata. - """ - self.metadata = custom_metadata or {} - - self.metadata["environment"] = self.environment - self.metadata["release"] = self.release - - def set_tags(self, custom_tags: typing.Optional[typing.List] = None) -> None: - """ - Get default tags. - """ - self.tags = custom_tags or [] - - self.tags.append(self.environment) - self.tags.append(self.release) - - def _get_tool_usage(self) -> typing.Dict: - """Retrieves tool usage information from a langfuse trace. - Note: assumes trace marks an action with string `AgentAction` - """ - from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError - - tool_usage = {} - - try: - fetched_trace = self.client.api.trace.get(self.trace.trace_id) - steps = [s.name for s in fetched_trace.observations if s.name] - for step in steps: - if "AgentAction" in step: - tool_name = step.split("-")[1] - if tool_name not in tool_usage: - tool_usage[tool_name] = 0 - tool_usage[tool_name] += 1 - except TraceNotFoundError: - logger.warning(f"Langfuse trace {self.trace.trace_id} not found") - except Exception: - logger.exception(f"Something went wrong while processing Langfuse trace {self.trace.trace_id}:") - - return tool_usage diff --git a/mindsdb/utilities/log.py b/mindsdb/utilities/log.py deleted file mode 100644 index 2ae311a61da..00000000000 --- a/mindsdb/utilities/log.py +++ /dev/null @@ -1,642 +0,0 @@ -import re -import os -import json -import logging -import threading -from typing import Any -import warnings -from logging.config import dictConfig - -from mindsdb.utilities.config import config as app_config - -# Suppress Pydantic warnings for third-party libraries -# TODO: Work on a better solution to this -warnings.filterwarnings("ignore", message="Field.*has conflict with protected namespace.*", category=UserWarning) - - -logging_initialized = False - - -class JsonFormatter(logging.Formatter): - def format(self, record): - record_message = super().format(record) - log_record = { - "process_name": record.processName, - "name": record.name, - "message": record_message, - "level": record.levelname, - "time": record.created, - } - return json.dumps(log_record) - - -class ColorFormatter(logging.Formatter): - green = "\x1b[32;20m" - default = "\x1b[39;20m" - yellow = "\x1b[33;20m" - red = "\x1b[31;20m" - bold_red = "\x1b[31;1m" - reset = "\x1b[0m" - format = "%(asctime)s %(processName)15s %(levelname)-8s %(name)s: %(message)s" - - FORMATS = { - logging.DEBUG: logging.Formatter(green + format + reset), - logging.INFO: logging.Formatter(default + format + reset), - logging.WARNING: logging.Formatter(yellow + format + reset), - logging.ERROR: logging.Formatter(red + format + reset), - logging.CRITICAL: logging.Formatter(bold_red + format + reset), - } - - def format(self, record): - log_fmt = self.FORMATS.get(record.levelno) - return log_fmt.format(record) - - -FORMATTERS = { - "default": {"()": ColorFormatter}, - "json": {"()": JsonFormatter}, - "file": {"format": "%(asctime)s %(processName)15s %(levelname)-8s %(name)s: %(message)s"}, -} - - -class LogSanitizer: - """Log Sanitizer""" - - SENSITIVE_KEYS = { - "password", - "passwd", - "pwd", - "token", - "access_token", - "refresh_token", - "bearer_token", - "api_key", - "apikey", - "api-key", - "openai_api_key", - "secret", - "secret_key", - "client_secret", - "credentials", - "auth", - "authorization", - "private_key", - "private-key", - "session_id", - "sessionid", - "credit_card", - "card_number", - "cvv", - } - - def __init__(self, mask: str | None = None): - self.mask = mask or "********" - self._compile_patterns() - - def _compile_patterns(self): - self.search_pattern = re.compile( - r"\b(" + "|".join(re.escape(key) for key in self.SENSITIVE_KEYS) + r")\b", re.IGNORECASE - ) - self.patterns = [] - for key in self.SENSITIVE_KEYS: - # Patterns for: key=value, key: value, "key": "value", 'key': 'value' - # Note: negative lookahead (?!%) excludes Python format placeholders like %s, %d, etc. - patterns = [ - re.compile(f'{key}["\s]*[:=]["\s]*(?!%)([^\s,}}\\]"\n]+)', re.IGNORECASE), - re.compile(f'"{key}"["\s]*:["\s]*"([^"]+)"', re.IGNORECASE), - re.compile(f"'{key}'['\s]*:['\s]*'([^']+)'", re.IGNORECASE), - ] - self.patterns.extend(patterns) - - def _replace(self, m) -> str: - return m.group(0).replace(m.group(1), self.mask) - - def sanitize_text(self, text: str) -> str: - if self.search_pattern.search(text): - for pattern in self.patterns: - text = pattern.sub(self._replace, text) - return text - - def sanitize_dict(self, data: dict) -> dict: - if not isinstance(data, dict): - return data - - sanitized = {} - for key, value in data.items(): - if any(sensitive in str(key).lower() for sensitive in self.SENSITIVE_KEYS): - sanitized[key] = self.mask - elif isinstance(value, dict): - sanitized[key] = self.sanitize_dict(value) - elif isinstance(value, list): - sanitized[key] = [self.sanitize_dict(item) if isinstance(item, dict) else item for item in value] - else: - sanitized[key] = value - return sanitized - - def sanitize(self, data: Any) -> Any: - if isinstance(data, dict): - return self.sanitize_dict(data) - elif isinstance(data, str): - return self.sanitize_text(data) - elif isinstance(data, (list, tuple)): - return type(data)(self.sanitize(item) for item in data) - return data - - -class SanitizingMixin: - """Mixin for sanitizing log records.""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.sanitizer = LogSanitizer() - - def sanitize_record(self, record): - """Sanitize a log record before emitting.""" - if ( - hasattr(record, "args") - and isinstance(record.args, (list, tuple)) - and len(record.args) > 0 - and isinstance(record.msg, str) - ): - record.msg = record.msg % record.args - record.args = [] - - if isinstance(record.msg, str): - record.msg = self.sanitizer.sanitize_text(record.msg) - elif isinstance(record.msg, dict): - record.msg = self.sanitizer.sanitize_dict(record.msg) - - if hasattr(record, "args") and record.args: - record.args = self.sanitizer.sanitize(record.args) - - return record - - -class StreamSanitizingHandler(SanitizingMixin, logging.StreamHandler): - def emit(self, record): - record = self.sanitize_record(record) - super().emit(record) - - -class FileSanitizingHandler(SanitizingMixin, logging.handlers.RotatingFileHandler): - def emit(self, record): - record = self.sanitize_record(record) - super().emit(record) - - -def get_console_handler_config_level() -> int: - console_handler_config = app_config["logging"]["handlers"]["console"] - return getattr(logging, console_handler_config["level"]) - - -def get_file_handler_config_level() -> int: - file_handler_config = app_config["logging"]["handlers"]["file"] - return getattr(logging, file_handler_config["level"]) - - -def get_mindsdb_log_level() -> int: - console_handler_config_level = get_console_handler_config_level() - file_handler_config_level = get_file_handler_config_level() - - return min(console_handler_config_level, file_handler_config_level) - - -def get_handlers_config(process_name: str) -> dict: - handlers_config = {} - console_handler_config = app_config["logging"]["handlers"]["console"] - console_handler_config_level = getattr(logging, console_handler_config["level"]) - if console_handler_config["enabled"] is True: - handlers_config["console"] = { - "class": "mindsdb.utilities.log.StreamSanitizingHandler", - "formatter": console_handler_config.get("formatter", "default"), - "level": console_handler_config_level, - "stream": console_handler_config.get("stream", "ext://sys.stderr"), - } - - file_handler_config = app_config["logging"]["handlers"]["file"] - file_handler_config_level = getattr(logging, file_handler_config["level"]) - if file_handler_config["enabled"] is True: - file_name = file_handler_config["filename"] - if process_name is not None: - if "." in file_name: - parts = file_name.rpartition(".") - file_name = f"{parts[0]}_{process_name}.{parts[2]}" - else: - file_name = f"{file_name}_{process_name}" - handlers_config["file"] = { - "class": "mindsdb.utilities.log.FileSanitizingHandler", - "formatter": "file", - "level": file_handler_config_level, - "filename": app_config.paths["log"] / file_name, - "maxBytes": file_handler_config["maxBytes"], # 0.5 Mb - "backupCount": file_handler_config["backupCount"], - } - return handlers_config - - -def configure_logging(process_name: str = None): - handlers_config = get_handlers_config(process_name) - mindsdb_log_level = get_mindsdb_log_level() - - logging_config = dict( - version=1, - formatters=FORMATTERS, - handlers=handlers_config, - loggers={ - "": { # root logger - "handlers": list(handlers_config.keys()), - "level": mindsdb_log_level, - }, - "__main__": { - "level": mindsdb_log_level, - }, - "mindsdb": { - "level": mindsdb_log_level, - }, - "alembic": { - "level": mindsdb_log_level, - }, - }, - ) - - dictConfig(logging_config) - - -def initialize_logging(process_name: str = None) -> None: - """Initialyze logging""" - global logging_initialized - if not logging_initialized: - configure_logging(process_name) - logging_initialized = True - - -# I would prefer to leave code to use logging.getLogger(), but there are a lot of complicated situations -# in MindsDB with processes being spawned that require logging to be configured again in a lot of cases. -# Using a custom logger-getter like this lets us do that logic here, once. -def getLogger(name=None): - """ - Get a new logger, configuring logging first if it hasn't been done yet. - """ - initialize_logging() - return logging.getLogger(name) - - -def log_ram_info(logger: logging.Logger) -> None: - """Log RAM/memory information to the provided logger. - - This function logs memory usage information: total, available, used memory in GB and memory - usage percentage. The logging only occurs if the logger is enabled for DEBUG level. - - Args: - logger (logging.Logger): The logger instance to use for outputting memory information. - """ - if logger.isEnabledFor(logging.DEBUG) is False: - return - - try: - import psutil - - memory = psutil.virtual_memory() - total_memory_gb = memory.total / (1024**3) - available_memory_gb = memory.available / (1024**3) - used_memory_gb = memory.used / (1024**3) - memory_percent = memory.percent - logger.debug( - f"Memory: {total_memory_gb:.1f}GB total, {available_memory_gb:.1f}GB available, {used_memory_gb:.1f}GB used ({memory_percent:.1f}%)" - ) - except Exception as e: - logger.debug(f"Failed to get memory information: {e}") - - -def log_system_info(logger: logging.Logger) -> None: - """Log detailed system information for debugging purposes. - - The function only logs system information (if the logger is configured for DEBUG level): - - Operating system details (OS type, version, distribution, architecture) - - CPU information (processor type, physical and logical core counts) - - Memory information (total, available, used memory in GB and percentage) - - GPU information (NVIDIA, AMD, Intel graphics cards with memory details) - - Args: - logger (logging.Logger): The logger instance to use for outputting system information. - Must be configured for DEBUG level to see the output. - - Returns: - None - - Note: - - For Linux systems, attempts to detect distribution via /etc/os-release, /etc/issue, or lsb_release - - For Windows systems, uses wmic commands to get detailed OS and GPU information - - For macOS systems, uses sw_vers and system_profiler commands - - GPU detection supports NVIDIA (via nvidia-smi), AMD (via rocm-smi), and fallback methods - - All subprocess calls have timeout protection to prevent hanging - - If any system information gathering fails, it logs the error and continues - """ - if logger.isEnabledFor(logging.DEBUG) is False: - return - - try: - import os - import shutil - import psutil - import platform - import subprocess - - # region OS information - os_system = platform.system() - os_release = platform.release() - os_machine = platform.machine() - - os_details = [] - - if os_system == "Linux": - # Try to detect Linux distribution - distro_info = "Unknown Linux" - try: - # Check for /etc/os-release (most modern distributions) - if os.path.exists("/etc/os-release"): - with open("/etc/os-release", "r") as f: - os_release_data = {} - for line in f: - if "=" in line: - key, value = line.strip().split("=", 1) - os_release_data[key] = value.strip('"') - - if "PRETTY_NAME" in os_release_data: - distro_info = os_release_data["PRETTY_NAME"] - elif "NAME" in os_release_data and "VERSION" in os_release_data: - distro_info = f"{os_release_data['NAME']} {os_release_data['VERSION']}" - elif "ID" in os_release_data: - distro_info = os_release_data["ID"].title() - # Fallback to /etc/issue - elif os.path.exists("/etc/issue"): - with open("/etc/issue", "r") as f: - issue_content = f.read().strip() - if issue_content: - distro_info = issue_content.split("\n")[0] - # Fallback to lsb_release - else: - try: - result = subprocess.run(["lsb_release", "-d"], capture_output=True, text=True, timeout=2) - if result.returncode == 0: - distro_info = result.stdout.split(":")[-1].strip() - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass - except Exception: - pass - - os_details.append(f"{distro_info} (kernel {os_release})") - - elif os_system == "Windows": - os_name = "Windows" - os_version = "unknown" - try: - result = subprocess.run( - ["wmic", "os", "get", "Caption,Version", "/format:list"], capture_output=True, text=True, timeout=3 - ) - if result.returncode == 0: - windows_info = {} - for line in result.stdout.strip().split("\n"): - if "=" in line: - key, value = line.strip().split("=", 1) - windows_info[key] = value.strip() - - if "Caption" in windows_info and "Version" in windows_info: - os_name = windows_info["Caption"] - os_version = windows_info["Version"] - except Exception: - pass - os_details.append(f"{os_name} {os_release} (version {os_version})") - - elif os_system == "Darwin": # macOS - os_name = "macOS" - os_version = "unknown" - try: - result = subprocess.run( - ["sw_vers", "-productName", "-productVersion"], capture_output=True, text=True, timeout=3 - ) - if result.returncode == 0: - lines = result.stdout.strip().split("\n") - if len(lines) >= 2: - os_name = lines[0].strip() - os_version = lines[1].strip() - except Exception: - pass - os_details.append(f"{os_name} {os_release} (version {os_version})") - else: - os_details.append(f"{os_system} {os_release}") - - os_details.append(f"({os_machine})") - os_info = " ".join(os_details) - logger.debug(f"Operating System: {os_info}") - # endregion - - # region CPU information - cpu_info = platform.processor() - if not cpu_info or cpu_info == "": - cpu_info = platform.machine() - cpu_count = psutil.cpu_count(logical=False) - cpu_count_logical = psutil.cpu_count(logical=True) - logger.debug(f"CPU: {cpu_info} ({cpu_count} physical cores, {cpu_count_logical} logical cores)") - # endregion - - # memory information - log_ram_info(logger) - - # region GPU information - gpu_info = [] - try: - # Check for NVIDIA GPU (works on Linux, Windows, macOS) - nvidia_smi_path = shutil.which("nvidia-smi") - if nvidia_smi_path: - try: - result = subprocess.run( - [nvidia_smi_path, "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"], - capture_output=True, - text=True, - timeout=3, - ) - if result.returncode == 0: - for line in result.stdout.strip().split("\n"): - if line.strip(): - parts = line.split(", ") - if len(parts) >= 2: - gpu_name = parts[0].strip() - gpu_memory = parts[1].strip() - gpu_info.append(f"{gpu_name} ({gpu_memory}MB)") - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass - - # Check for AMD GPU (rocm-smi on Linux, wmic on Windows) - if not gpu_info: # Only check AMD if no NVIDIA GPU found - if platform.system() == "Windows": - # Use wmic on Windows to detect AMD GPU - try: - result = subprocess.run( - ["wmic", "path", "win32_VideoController", "get", "name"], - capture_output=True, - text=True, - timeout=3, - ) - if result.returncode == 0: - for line in result.stdout.strip().split("\n"): - line = line.strip() - if line and line != "Name" and "AMD" in line.upper(): - gpu_info.append(line) - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass - else: - # Use rocm-smi on Linux/macOS - rocm_smi_path = shutil.which("rocm-smi") - if rocm_smi_path: - try: - result = subprocess.run( - [rocm_smi_path, "--showproductname"], capture_output=True, text=True, timeout=3 - ) - if result.returncode == 0: - for line in result.stdout.strip().split("\n"): - if "Product Name" in line: - gpu_name = line.split(":")[-1].strip() - gpu_info.append(gpu_name) - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass - - # Fallback: Try to detect any GPU using platform-specific methods - if not gpu_info: - if platform.system() == "Windows": - try: - # Use wmic to get all video controllers - result = subprocess.run( - ["wmic", "path", "win32_VideoController", "get", "name"], - capture_output=True, - text=True, - timeout=3, - ) - if result.returncode == 0: - for line in result.stdout.strip().split("\n"): - line = line.strip() - if ( - line - and line != "Name" - and any( - keyword in line.upper() - for keyword in ["NVIDIA", "AMD", "INTEL", "RADEON", "GEFORCE"] - ) - ): - gpu_info.append(line) - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass - elif platform.system() == "Darwin": # macOS - try: - # Use system_profiler on macOS - result = subprocess.run( - ["system_profiler", "SPDisplaysDataType"], capture_output=True, text=True, timeout=3 - ) - if result.returncode == 0: - for line in result.stdout.strip().split("\n"): - if "Chipset Model:" in line: - gpu_name = line.split(":")[-1].strip() - gpu_info.append(gpu_name) - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass - - except Exception: - pass - - if gpu_info: - logger.debug(f"GPU: {', '.join(gpu_info)}") - else: - logger.debug("GPU: Not detected or not supported") - # endregion - - except Exception as e: - logger.debug(f"Failed to get system information: {e}") - - -def resources_log_thread(stop_event: threading.Event, interval: int = 60): - """Log resources information to the logger - - Args: - stop_event (Event): Event to stop the thread - interval (int): Interval in seconds to log resources information - - Returns: - None - - Note: - Output shows: - - RAM: total, available, used memory in GB and memory usage percentage - - Consumed RAM: sum of rss, and percentage of total memory used - - CPU usage: average CPU usage for last period - - Active queries: number of active SQL queries - """ - from mindsdb.utilities.fs import get_tmp_dir - - logger = getLogger(__name__) - while stop_event.wait(timeout=interval) is False: - try: - import psutil - - main_process = psutil.Process(os.getpid()) - children = main_process.children(recursive=True) - - total_memory_info = { - "main_process": { - "pid": main_process.pid, - "name": main_process.name(), - "memory_info": main_process.memory_info(), - "memory_percent": main_process.memory_percent(), - }, - "children": [], - "total_memory": {"rss": 0, "vms": 0, "percent": 0}, - } - - for child in children: - try: - child_info = { - "pid": child.pid, - "name": child.name(), - "memory_info": child.memory_info(), - "memory_percent": child.memory_percent(), - } - total_memory_info["children"].append(child_info) - - total_memory_info["total_memory"]["rss"] += child.memory_info().rss - total_memory_info["total_memory"]["vms"] += child.memory_info().vms - total_memory_info["total_memory"]["percent"] += child.memory_percent() - except (psutil.NoSuchProcess, psutil.AccessDenied): - continue - - total_memory_info["total_memory"]["rss"] += main_process.memory_info().rss - total_memory_info["total_memory"]["vms"] += main_process.memory_info().vms - total_memory_info["total_memory"]["percent"] += main_process.memory_percent() - - memory = psutil.virtual_memory() - total_memory_gb = memory.total / (1024**3) - available_memory_gb = memory.available / (1024**3) - used_memory_gb = memory.used / (1024**3) - memory_percent = memory.percent - cpu_usage = psutil.cpu_percent() - - active_http_queries = 0 - p = get_tmp_dir().joinpath("processes/http_query/") - if p.exists() and p.is_dir(): - for _ in p.iterdir(): - active_http_queries += 1 - - active_mysql_queries = 0 - p = get_tmp_dir().joinpath("processes/mysql_query/") - if p.exists() and p.is_dir(): - for _ in p.iterdir(): - active_mysql_queries += 1 - - level = app_config["logging"]["resources_log"]["level"] - logger.log( - logging.getLevelName(level), - f"RAM: {total_memory_gb:.1f}GB total, {available_memory_gb:.1f}GB available, {used_memory_gb:.1f}GB used ({memory_percent:.1f}%)\n" - f"Consumed RAM: {total_memory_info['total_memory']['rss'] / (1024**2):.1f}Mb, {total_memory_info['total_memory']['percent']:.2f}%\n" - f"CPU usage: {cpu_usage}% {interval}s\n" - f"Active queries: {active_http_queries}/HTTP {active_mysql_queries}/MySQL", - ) - except Exception as e: - logger.debug(f"Failed to get memory information: {e}") diff --git a/mindsdb/utilities/ml_task_queue/__init__.py b/mindsdb/utilities/ml_task_queue/__init__.py deleted file mode 100644 index d1e147238bb..00000000000 --- a/mindsdb/utilities/ml_task_queue/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -""" - Tasks queue allows to limit load by ordering tasks through a queue. - Current implementation use Redis as backend for the queue. To run MindsDB with tasks queue need: - 1. config mindsdb to use tasks queue by one of: - - fill 'ml_task_queue' key in config.json: - { - "ml_task_queue": { - "type": "redis", // required - "host": "...", - "port": "...", - "db": "...", - "username": "...", - "password": "..." - } - } - - or set env vars: - MINDSDB_ML_QUEUE_TYPE=redis # required - MINDSDB_ML_QUEUE_HOST=... - MINDSDB_ML_QUEUE_PORT=... - MINDSDB_ML_QUEUE_DB=... - MINDSDB_ML_QUEUE_USERNAME=... - MINDSDB_ML_QUEUE_PASSWORD=... - 2. run mindsdb with arg --ml_task_queue_consumer - - In redis there is two types of entities used: streams (for distributing tasks) and regular - key-value storage with ttl (to transfer dataframes and some other data). Dataframes are not - transfer via streams to make stream messages lightweight. - - Taks queue may work in single instnace to limit load on it, ot it may work in distributed - system. In that case mindsdb may be splitted into two modules: parser/planner/executioner (PPE) - and ML. - - ┌─────────────┐ ┌─────────────┐ - │ │ │ │ - │ MindsDB PPE │ │ MindsDB PPE │ - │ │ │ │ - └───────────┬─┘ └─┬─────┬─▲───┘ - │ │ │ │ - ┌▼─────▼┐ │ │ - │ │ ┌─▼─┴─────────┐ - │ Queue │ │ │ - │ │ │ Cache │ - ├───────┤ │ │ - │ Task │ ├─────────────┤ - ├───────┤ │ Dataframe │ - │ Task │ ├─────────────┤ - ├───────┤ │ Status │ - │ Task │ └─┬─▲─────────┘ - └┬─────┬┘ │ │ - │ │ │ │ - │ │ │ │ - ┌───────────▼─┐ ┌─▼─────▼─┴───┐ - │ │ │ │ - │ MindsDB ML │ │ MindsDB ML │ - │ │ │ │ - └─────────────┘ └─────────────┘ -""" \ No newline at end of file diff --git a/mindsdb/utilities/ml_task_queue/base.py b/mindsdb/utilities/ml_task_queue/base.py deleted file mode 100644 index 28844bcbea4..00000000000 --- a/mindsdb/utilities/ml_task_queue/base.py +++ /dev/null @@ -1,14 +0,0 @@ -from mindsdb.utilities.ml_task_queue.utils import wait_redis_ping - - -class BaseRedisQueue: - def wait_redis_ping(self, timeout: int = 30) -> None: - """ wait when redis.ping return True - - Args: - timeout (int): seconds to wait for success ping - - Raises: - RedisConnectionError: if `ping` did not return `True` within `timeout` seconds - """ - return wait_redis_ping(self.db, timeout) diff --git a/mindsdb/utilities/ml_task_queue/const.py b/mindsdb/utilities/ml_task_queue/const.py deleted file mode 100644 index f2efabde06f..00000000000 --- a/mindsdb/utilities/ml_task_queue/const.py +++ /dev/null @@ -1,26 +0,0 @@ -from enum import Enum - - -TASKS_STREAM_NAME = b'ml-tasks' -TASKS_STREAM_CONSUMER_GROUP_NAME = 'ml_executors' -TASKS_STREAM_CONSUMER_NAME = 'ml_executor' - - -class ML_TASK_TYPE(Enum): - LEARN = b'learn' - PREDICT = b'predict' - FINETUNE = b'finetune' - DESCRIBE = b'describe' - CREATE_VALIDATION = b'create_validation' - CREATE_ENGINE = b'create_engine' - UPDATE_ENGINE = b'update_engine' - UPDATE = b'update' - FUNC_CALL = b'func_call' - - -class ML_TASK_STATUS(Enum): - WAITING = b'waiting' - PROCESSING = b'processing' - COMPLETE = b'complete' - ERROR = b'error' - TIMEOUT = b'timeout' diff --git a/mindsdb/utilities/ml_task_queue/consumer.py b/mindsdb/utilities/ml_task_queue/consumer.py deleted file mode 100644 index 2b7cde022a1..00000000000 --- a/mindsdb/utilities/ml_task_queue/consumer.py +++ /dev/null @@ -1,246 +0,0 @@ -import os -import time -import signal -import tempfile -import threading -from pathlib import Path -from functools import wraps -from collections.abc import Callable - -import psutil -from walrus import Database -from pandas import DataFrame -from redis.exceptions import ConnectionError as RedisConnectionError - -from mindsdb.utilities.config import Config -from mindsdb.utilities.context import context as ctx -from mindsdb.integrations.libs.process_cache import process_cache -from mindsdb.utilities.ml_task_queue.utils import RedisKey, StatusNotifier, to_bytes, from_bytes -from mindsdb.utilities.ml_task_queue.base import BaseRedisQueue -from mindsdb.utilities.fs import clean_unlinked_process_marks -from mindsdb.utilities.functions import mark_process -from mindsdb.utilities.ml_task_queue.const import ( - ML_TASK_TYPE, - ML_TASK_STATUS, - TASKS_STREAM_NAME, - TASKS_STREAM_CONSUMER_NAME, - TASKS_STREAM_CONSUMER_GROUP_NAME, -) -from mindsdb.utilities import log -from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 - -logger = log.getLogger(__name__) - - -def _save_thread_link(func: Callable) -> Callable: - """Decorator for MLTaskConsumer. - Save thread in which func is executed to a list. - """ - - @wraps(func) - def wrapper(self, *args, **kwargs) -> None: - current_thread = threading.current_thread() - self._listen_message_threads.append(current_thread) - try: - result = func(self, *args, **kwargs) - finally: - self._listen_message_threads.remove(current_thread) - return result - - return wrapper - - -class MLTaskConsumer(BaseRedisQueue): - """Listener of ML tasks queue and tasks executioner. - Each new message waited and executed in separate thread. - - Attributes: - _ready_event (Event): set if ready to start new queue listen thread - _stop_event (Event): set if need to stop all threads/processes - cpu_stat (list[float]): CPU usage statistic. Each value is 0-100 float representing CPU usage in % - _collect_cpu_stat_thread (Thread): pointer to thread that collecting CPU usage statistic - _listen_message_threads (list[Thread]): list of pointers to threads where queue messages are listening/processing - db (Redis): database object - cache: redis cache abstrtaction - consumer_group: redis consumer group object - """ - - def __init__(self) -> None: - self._ready_event = threading.Event() - self._ready_event.set() - - self._stop_event = threading.Event() - self._stop_event.clear() - - process_cache.init() - - # region collect cpu usage statistic - self.cpu_stat = [0] * 10 - self._collect_cpu_stat_thread = threading.Thread( - target=self._collect_cpu_stat, name="MLTaskConsumer._collect_cpu_stat" - ) - self._collect_cpu_stat_thread.start() - # endregion - - self._listen_message_threads = [] - - # region connect to redis - config = Config().get("ml_task_queue", {}) - self.db = Database( - host=config.get("host", "localhost"), - port=config.get("port", 6379), - db=config.get("db", 0), - username=config.get("username"), - password=config.get("password"), - protocol=3, - ) - self.wait_redis_ping(60) - - self.db.Stream(TASKS_STREAM_NAME) - self.cache = self.db.cache() - self.consumer_group = self.db.consumer_group(TASKS_STREAM_CONSUMER_GROUP_NAME, [TASKS_STREAM_NAME]) - self.consumer_group.create() - self.consumer_group.consumer(TASKS_STREAM_CONSUMER_NAME) - # endregion - - def _collect_cpu_stat(self) -> None: - """Collect CPU usage statistic. Executerd in thread.""" - while self._stop_event.is_set() is False: - self.cpu_stat = self.cpu_stat[1:] - self.cpu_stat.append(psutil.cpu_percent()) - time.sleep(1) - - def get_avg_cpu_usage(self) -> float: - """get average CPU usage for last period (10s by default) - - Returns: - float: 0-100 value, average CPU usage - """ - return sum(self.cpu_stat) / len(self.cpu_stat) - - def wait_free_resources(self) -> None: - """Sleep in thread untill there are free resources. Checks: - - avg CPU usage is less than 60% - - current CPU usage is less than 60% - - current tasks count is less than (N CPU cores) / 8 - """ - config = Config() - is_cloud = config.get("cloud", False) - processes_dir = Path(tempfile.gettempdir()).joinpath("mindsdb/processes/learn/") - while True: - while self.get_avg_cpu_usage() > 60 or max(self.cpu_stat[-3:]) > 60: - time.sleep(1) - if is_cloud and processes_dir.is_dir(): - clean_unlinked_process_marks() - while (len(list(processes_dir.iterdir())) * 8) >= os.cpu_count(): - time.sleep(1) - clean_unlinked_process_marks() - if (self.get_avg_cpu_usage() > 60 or max(self.cpu_stat[-3:]) > 60) is False: - return - - @_save_thread_link - def _listen(self) -> None: - """Listen message queue untill get new message. Execute task.""" - message = None - while message is None: - self.wait_free_resources() - self.wait_redis_ping() - if self._stop_event.is_set(): - return - - try: - message = self.consumer_group.read(count=1, block=1000, consumer=TASKS_STREAM_CONSUMER_NAME) - except RedisConnectionError: - logger.exception("Can't connect to Redis:") - self._stop_event.set() - return - except Exception: - self._stop_event.set() - raise - - if message.get(TASKS_STREAM_NAME) is None or len(message.get(TASKS_STREAM_NAME)) == 0: - message = None - - try: - message = message[TASKS_STREAM_NAME][0][0] - message_id = message[0].decode() - message_content = message[1] - self.consumer_group.streams[TASKS_STREAM_NAME].ack(message_id) - self.consumer_group.streams[TASKS_STREAM_NAME].delete(message_id) - - payload = from_bytes(message_content[b"payload"]) - task_type = ML_TASK_TYPE(message_content[b"task_type"]) - model_id = int(message_content[b"model_id"]) - - redis_key = RedisKey(message_content.get(b"redis_key")) - - # region read dataframe - dataframe_bytes = self.cache.get(redis_key.dataframe) - dataframe = None - if dataframe_bytes is not None: - dataframe = from_bytes(dataframe_bytes) - self.cache.delete(redis_key.dataframe) - # endregion - - ctx.load(payload["context"]) - finally: - self._ready_event.set() - - try: - task = process_cache.apply_async( - task_type=task_type, model_id=model_id, payload=payload, dataframe=dataframe - ) - status_notifier = StatusNotifier(redis_key, ML_TASK_STATUS.PROCESSING, self.db, self.cache) - status_notifier.start() - result = task.result() - except Exception as e: - self.wait_redis_ping() - status_notifier.stop() - exception_bytes = to_bytes(e) - self.cache.set(redis_key.exception, exception_bytes, 10) - self.db.publish(redis_key.status, ML_TASK_STATUS.ERROR.value) - self.cache.set(redis_key.status, ML_TASK_STATUS.ERROR.value, 180) - else: - self.wait_redis_ping() - status_notifier.stop() - if isinstance(result, DataFrame): - dataframe_bytes = to_bytes(result) - self.cache.set(redis_key.dataframe, dataframe_bytes, 10) - self.db.publish(redis_key.status, ML_TASK_STATUS.COMPLETE.value) - self.cache.set(redis_key.status, ML_TASK_STATUS.COMPLETE.value, 180) - - def run(self) -> None: - """Start new listen thread each time when _ready_event is set""" - self._ready_event.set() - while self._stop_event.is_set() is False: - self._ready_event.wait(timeout=1) - if self._ready_event.is_set() is False: - continue - self._ready_event.clear() - threading.Thread(target=self._listen, name="MLTaskConsumer._listen").start() - self.stop() - - def stop(self) -> None: - """Stop all executing threads""" - self._stop_event.set() - for thread in (*self._listen_message_threads, self._collect_cpu_stat_thread): - try: - if thread.is_alive(): - thread.join() - except Exception: - pass - - -@mark_process(name="internal", custom_mark="ml_task_consumer") -def start(verbose: bool) -> None: - """Create task queue consumer and start listen the queue""" - consumer = MLTaskConsumer() - signal.signal(signal.SIGTERM, lambda _x, _y: consumer.stop()) - try: - consumer.run() - except Exception as e: - consumer.stop() - logger.error(f"Got exception: {e}", flush=True) - raise - finally: - logger.info("Consumer process stopped", flush=True) diff --git a/mindsdb/utilities/ml_task_queue/producer.py b/mindsdb/utilities/ml_task_queue/producer.py deleted file mode 100644 index 52dc669e5f5..00000000000 --- a/mindsdb/utilities/ml_task_queue/producer.py +++ /dev/null @@ -1,78 +0,0 @@ -import pickle - -from walrus import Database -from pandas import DataFrame - -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.config import Config -from mindsdb.utilities.ml_task_queue.utils import RedisKey, to_bytes -from mindsdb.utilities.ml_task_queue.task import Task -from mindsdb.utilities.ml_task_queue.base import BaseRedisQueue -from mindsdb.utilities.ml_task_queue.const import TASKS_STREAM_NAME, ML_TASK_TYPE, ML_TASK_STATUS -from mindsdb.utilities import log -from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 - -logger = log.getLogger(__name__) - - -class MLTaskProducer(BaseRedisQueue): - """Interface around the redis for putting tasks to the queue - - Attributes: - db (Redis): database object - stream - cache - pubsub - """ - - def __init__(self) -> None: - config = Config().get("ml_task_queue", {}) - - self.db = Database( - host=config.get("host", "localhost"), - port=config.get("port", 6379), - db=config.get("db", 0), - username=config.get("username"), - password=config.get("password"), - protocol=3, - ) - self.wait_redis_ping(60) - - self.stream = self.db.Stream(TASKS_STREAM_NAME) - self.cache = self.db.cache() - self.pubsub = self.db.pubsub() - - def apply_async(self, task_type: ML_TASK_TYPE, model_id: int, payload: dict, dataframe: DataFrame = None) -> Task: - """Add tasks to the queue - - Args: - task_type (ML_TASK_TYPE): type of the task - model_id (int): model identifier - payload (dict): lightweight model data that will be added to stream message - dataframe (DataFrame): dataframe will be transfered via regular redis storage - - Returns: - Task: object representing the task - """ - try: - payload = pickle.dumps(payload, protocol=5) - redis_key = RedisKey.new() - message = { - "task_type": task_type.value, - "company_id": ctx.company_id, - "user_id": ctx.user_id, - "model_id": model_id, - "payload": payload, - "redis_key": redis_key.base, - } - - self.wait_redis_ping() - if dataframe is not None: - self.cache.set(redis_key.dataframe, to_bytes(dataframe), 180) - self.cache.set(redis_key.status, ML_TASK_STATUS.WAITING, 180) - - self.stream.add(message) - return Task(self.db, redis_key) - except ConnectionError: - logger.exception("Cant send message to redis: connect failed") - raise diff --git a/mindsdb/utilities/ml_task_queue/task.py b/mindsdb/utilities/ml_task_queue/task.py deleted file mode 100644 index e7e771c6524..00000000000 --- a/mindsdb/utilities/ml_task_queue/task.py +++ /dev/null @@ -1,79 +0,0 @@ -from collections.abc import Callable - -import redis -from pandas import DataFrame - -from mindsdb.utilities.ml_task_queue.utils import RedisKey, from_bytes -from mindsdb.utilities.ml_task_queue.const import ML_TASK_STATUS - - -class Task: - """Abstraction for ML task. Should have interface similat to concurrent.futures.Future - - Attributes: - db (Redis): database object - redis_key (RedisKey): redis keys associated with task - dataframe (DataFrame): task result - exception (Exception): task exeuton runtime exception - _timeout (int): max time without status updating - """ - - def __init__(self, connection: redis.Redis, redis_key: RedisKey) -> None: - self.db = connection - self.redis_key = redis_key - self.dataframe = None - self.exception = None - self._timeout = 60 - - def subscribe(self) -> ML_TASK_STATUS: - """return tasks status untill it is not done or failed""" - pubsub = self.db.pubsub() - cache = self.db.cache() - pubsub.subscribe(self.redis_key.status) - while msg := pubsub.get_message(timeout=self._timeout): - if msg["type"] not in pubsub.PUBLISH_MESSAGE_TYPES: - continue - ml_task_status = ML_TASK_STATUS(msg["data"]) - if ml_task_status == ML_TASK_STATUS.COMPLETE: - dataframe_bytes = cache.get(self.redis_key.dataframe) - if dataframe_bytes is not None: - self.dataframe = from_bytes(dataframe_bytes) - cache.delete(self.redis_key.dataframe) - elif ml_task_status == ML_TASK_STATUS.ERROR: - exception_bytes = cache.get(self.redis_key.exception) - if exception_bytes is not None: - self.exception = from_bytes(exception_bytes) - yield ml_task_status - else: - # there is no mesasges, timeout - ml_task_status = ML_TASK_STATUS.TIMEOUT - yield ml_task_status - - def wait(self, status: ML_TASK_STATUS = ML_TASK_STATUS.COMPLETE) -> None: - """block threasd untill task is not done or failed""" - for status in self.subscribe(): - if status in (ML_TASK_STATUS.WAITING, ML_TASK_STATUS.PROCESSING): - continue - if status == ML_TASK_STATUS.ERROR: - if self.exception is not None: - raise self.exception - else: - raise Exception("Unknown error during ML task execution") - if status == ML_TASK_STATUS.TIMEOUT: - raise Exception(f"Can't get answer in {self._timeout} seconds") - if status == ML_TASK_STATUS.COMPLETE: - return - raise KeyError("Unknown task status") - - def result(self) -> DataFrame: - """wait task is done and return result - - Returns: - DataFrame: task result - """ - self.wait() - return self.dataframe - - def add_done_callback(self, fn: Callable) -> None: - """need for compatability with concurrent.futures.Future interface""" - pass diff --git a/mindsdb/utilities/ml_task_queue/utils.py b/mindsdb/utilities/ml_task_queue/utils.py deleted file mode 100644 index 504f52fdb29..00000000000 --- a/mindsdb/utilities/ml_task_queue/utils.py +++ /dev/null @@ -1,121 +0,0 @@ -import time -import pickle -import socket -import threading - -from walrus import Database -from redis.exceptions import ConnectionError as RedisConnectionError - -from mindsdb.utilities.context import context as ctx -from mindsdb.utilities.ml_task_queue.const import ML_TASK_STATUS -from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 - - -def to_bytes(obj: object) -> bytes: - """dump object into bytes - - Args: - obj (object): object to convert - - Returns: - bytes - """ - return pickle.dumps(obj, protocol=5) - - -def from_bytes(b: bytes) -> object: - """load object from bytes - - Args: - b (bytes): - - Returns: - object - """ - return pickle.loads(b) - - -def wait_redis_ping(db: Database, timeout: int = 30): - """Wait when redis.ping return True - - Args: - db (Database): redis db object - timeout (int): seconds to wait for success ping - - Raises: - RedisConnectionError: if `ping` did not return `True` within `timeout` seconds - """ - end_time = time.time() + timeout - while time.time() <= end_time: - try: - if db.ping() is True: - break - except RedisConnectionError: - pass - time.sleep(2) - else: - raise RedisConnectionError - - -class RedisKey: - """The class responsible for unique task keys in redis - - Attributes: - _base_key (bytes): prefix for keys - """ - - @staticmethod - def new() -> "RedisKey": - timestamp = str(time.time()).replace(".", "") - return RedisKey(f"{timestamp}-{ctx.company_id}-{ctx.user_id}-{socket.gethostname()}".encode()) - - def __init__(self, base_key: bytes) -> None: - self._base_key = base_key - - @property - def base(self) -> bytes: - return self._base_key - - @property - def status(self) -> str: - return (self._base_key + b"-status").decode() - - @property - def dataframe(self) -> str: - return (self._base_key + b"-dataframe").decode() - - @property - def exception(self) -> str: - return (self._base_key + b"-exception").decode() - - -class StatusNotifier(threading.Thread): - """Worker that updates task status in redis with fixed frequency""" - - def __init__(self, redis_key: RedisKey, ml_task_status: ML_TASK_STATUS, db, cache) -> None: - threading.Thread.__init__(self) - self.redis_key = redis_key - self.ml_task_status = ml_task_status - self.db = db - self.cache = cache - self._stop_event = threading.Event() - - def set_status(self, ml_task_status: ML_TASK_STATUS): - """change status - - Args: - ml_task_status (ML_TASK_STATUS): new status - """ - self.ml_task_status = ml_task_status - - def stop(self) -> None: - """stop status updating""" - self._stop_event.set() - - def run(self): - """start update status with fixed frequency""" - while not self._stop_event.is_set(): - wait_redis_ping(self.db) - self.db.publish(self.redis_key.status, self.ml_task_status.value) - self.cache.set(self.redis_key.status, self.ml_task_status.value, 180) - time.sleep(5) diff --git a/mindsdb/utilities/otel/__init__.py b/mindsdb/utilities/otel/__init__.py deleted file mode 100644 index c381f0fe561..00000000000 --- a/mindsdb/utilities/otel/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -import os - -# By default, we have Open Telemetry SDK enabled on all envs, except for local which is disabled by default. -OTEL_SDK_DISABLED = (os.getenv("OTEL_SDK_DISABLED", "false").lower() == "true" - or os.getenv("OTEL_SERVICE_ENVIRONMENT", "local").lower() == "local") - -# If you want to enable Open Telemetry on local for some reason please set OTEL_SDK_FORCE_RUN to true -OTEL_SDK_FORCE_RUN = os.getenv("OTEL_SDK_FORCE_RUN", "false").lower() == "true" - -OTEL_ENABLED = not OTEL_SDK_DISABLED or OTEL_SDK_FORCE_RUN - -def increment_otel_query_request_counter(metadata: dict) -> None: - pass - -trace = None -if OTEL_ENABLED: - try: - from mindsdb.utilities.otel.prepare import trace - from mindsdb.utilities.otel.metric_handlers import increment_otel_query_request_counter - except Exception: - pass - diff --git a/mindsdb/utilities/otel/logger.py b/mindsdb/utilities/otel/logger.py deleted file mode 100644 index 5836fb3179a..00000000000 --- a/mindsdb/utilities/otel/logger.py +++ /dev/null @@ -1,25 +0,0 @@ -import logging - -from opentelemetry._logs import set_logger_provider -from opentelemetry.sdk._logs._internal.export import LogExporter -from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler -from opentelemetry.sdk._logs.export import BatchLogRecordProcessor -from opentelemetry.sdk.resources import Resource - -from mindsdb.utilities.log import get_mindsdb_log_level - - -def setup_logger(resource: Resource, exporter: LogExporter) -> None: - """ - Setup OpenTelemetry logging - """ - mindsdb_log_level = get_mindsdb_log_level() - - logger_provider = LoggerProvider(resource=resource) - set_logger_provider(logger_provider) - - logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter)) - handler = LoggingHandler(level=mindsdb_log_level, logger_provider=logger_provider) - - # Attach OTLP handler to root logger - logging.getLogger().addHandler(handler) diff --git a/mindsdb/utilities/otel/meter.py b/mindsdb/utilities/otel/meter.py deleted file mode 100644 index 645c2048fe1..00000000000 --- a/mindsdb/utilities/otel/meter.py +++ /dev/null @@ -1,19 +0,0 @@ -from opentelemetry import metrics -from opentelemetry.sdk.metrics import MeterProvider -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.metrics.export import ( - MetricExporter, - PeriodicExportingMetricReader, -) - - -def setup_meter(resource: Resource, exporter: MetricExporter) -> None: - """ - Setup OpenTelemetry metrics - """ - - metric_reader = PeriodicExportingMetricReader(exporter=exporter) - provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) - - # Sets the global default meter provider - metrics.set_meter_provider(provider) diff --git a/mindsdb/utilities/otel/metric_handlers/__init__.py b/mindsdb/utilities/otel/metric_handlers/__init__.py deleted file mode 100644 index cb616e0182a..00000000000 --- a/mindsdb/utilities/otel/metric_handlers/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from mindsdb.utilities.otel.prepare import metrics, OTEL_SERVICE_NAME - -_query_request_counter = None - - -def get_query_request_counter(): - """ - Get the query request counter - """ - global _query_request_counter - - # Create the counter if it does not exist - if _query_request_counter is None: - meter_name = f"{OTEL_SERVICE_NAME}.query_service_meter" - - # Get the meter from the main metrics object - meter = metrics.get_meter(meter_name) - - _query_request_counter = meter.create_counter( - name="query_request_count", - description="Counts the number of times the SQL query is called", - unit="1", - ) - - return _query_request_counter - -def increment_otel_query_request_counter(metadata: dict) -> None: - query_request_counter = get_query_request_counter() - query_request_counter.add(1, metadata) diff --git a/mindsdb/utilities/otel/prepare.py b/mindsdb/utilities/otel/prepare.py deleted file mode 100644 index 579aa803d41..00000000000 --- a/mindsdb/utilities/otel/prepare.py +++ /dev/null @@ -1,198 +0,0 @@ -import os -import typing - -from opentelemetry import trace # noqa: F401 -from opentelemetry import metrics # noqa: F401 -from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter as OTLPLogExporterGRPC -from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter as OTLPLogExporterHTTP -from opentelemetry.sdk._logs._internal.export import LogExporter -from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPMetricExporterGRPC -from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPMetricExporterHTTP -from opentelemetry.sdk.metrics.export import MetricExporter, ConsoleMetricExporter -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as OTLPSpanExporterGRPC -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as OTLPSpanExporterHTTP -from opentelemetry.sdk.trace.export import SpanExporter, ConsoleSpanExporter -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace.sampling import TraceIdRatioBased - -from mindsdb.utilities.otel.logger import setup_logger -from mindsdb.utilities.otel.meter import setup_meter -from mindsdb.utilities.otel.tracer import setup_tracer -from mindsdb.utilities.utils import parse_csv_attributes -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -# Check OpenTelemetry exporter type -OTEL_EXPORTER_TYPE = os.getenv("OTEL_EXPORTER_TYPE", "console") # console or otlp - -# Define OpenTelemetry exporter protocol -OTEL_EXPORTER_PROTOCOL = os.getenv("OTEL_EXPORTER_PROTOCOL", "grpc") # grpc or http - -# Define OTLP endpoint. If not set, the default OTLP endpoint will be used -OTEL_OTLP_ENDPOINT = os.getenv("OTEL_OTLP_ENDPOINT", "http://localhost:4317") - -# Define OTLP logging endpoint. If not set, the default OTLP logging endpoint will be used -OTEL_OTLP_LOGGING_ENDPOINT = os.getenv("OTEL_OTLP_LOGGING_ENDPOINT", OTEL_OTLP_ENDPOINT) - -# Define OTLP tracing endpoint. If not set, the default OTLP tracing endpoint will be used -OTEL_OTLP_TRACING_ENDPOINT = os.getenv("OTEL_OTLP_TRACING_ENDPOINT", OTEL_OTLP_ENDPOINT) - -# Define OTLP metrics endpoint. If not set, the default OTLP metrics endpoint will be used -OTEL_OTLP_METRICS_ENDPOINT = os.getenv("OTEL_OTLP_METRICS_ENDPOINT", OTEL_OTLP_ENDPOINT) - -# Define service name -OTEL_SERVICE_NAME = os.getenv("OTEL_SERVICE_NAME", "mindsdb") - -# Define service instace ID -OTEL_SERVICE_INSTANCE_ID = os.getenv("OTEL_SERVICE_INSTANCE_ID", "mindsdb-instance") - -# The name of the environment we"re on, by default local for development, this is set differently per-env in our Helm -# chart values files -OTEL_SERVICE_ENVIRONMENT = os.getenv("OTEL_SERVICE_ENVIRONMENT", "local").lower() - -# Define service release -OTEL_SERVICE_RELEASE = os.getenv("OTEL_SERVICE_RELEASE", "local").lower() - -# Define how often to capture traces -OTEL_TRACE_SAMPLE_RATE = float(os.getenv("OTEL_TRACE_SAMPLE_RATE", "1.0")) - -# Define extra attributes -OTEL_EXTRA_ATTRIBUTES = os.getenv("OTEL_EXTRA_ATTRIBUTES", "") - -# Define if OpenTelemetry logging is disabled. By default, it is disabled. -OTEL_LOGGING_DISABLED = os.getenv("OTEL_LOGGING_DISABLED", "true").lower() == "true" - -# Define if OpenTelemetry tracing is disabled. By default, it is enabled. -OTEL_TRACING_DISABLED = os.getenv("OTEL_TRACING_DISABLED", "false").lower() == "true" - -# Define if OpenTelemetry metrics is disabled. By default, it is disabled. -OTEL_METRICS_DISABLED = os.getenv("OTEL_METRICS_DISABLED", "true").lower() == "true" - - -def get_otel_attributes() -> dict: - """ - Get OpenTelemetry attributes - - Returns: - dict: OpenTelemetry attributes - """ - - base_attributes = { - "service.name": OTEL_SERVICE_NAME, - "service.instance.id": OTEL_SERVICE_INSTANCE_ID, - "environment": OTEL_SERVICE_ENVIRONMENT, - "release": OTEL_SERVICE_RELEASE, - } - - extra_attributes = {} - try: - extra_attributes = parse_csv_attributes(OTEL_EXTRA_ATTRIBUTES) - except Exception as e: - logger.error(f"Failed to parse OTEL_EXTRA_ATTRIBUTES: {e}") - - attributes = {**extra_attributes, **base_attributes} # Base attributes take precedence over extra attributes - - return attributes - - -def get_logging_exporter() -> typing.Optional[LogExporter]: - """ - Get OpenTelemetry logging exporter. - - Returns: - OTLPLogExporter: OpenTelemetry logging exporter - """ - - if OTEL_EXPORTER_TYPE == "otlp": - - if OTEL_EXPORTER_PROTOCOL == "grpc": - return OTLPLogExporterGRPC( - endpoint=OTEL_OTLP_LOGGING_ENDPOINT, - insecure=True - ) - - elif OTEL_EXPORTER_PROTOCOL == "http": - return OTLPLogExporterHTTP( - endpoint=OTEL_OTLP_LOGGING_ENDPOINT - ) - - return None - - -def get_span_exporter() -> SpanExporter: - """ - Get OpenTelemetry span exporter - - Returns: - OTLPSpanExporter: OpenTelemetry span exporter - """ - - if OTEL_EXPORTER_TYPE == "otlp": - - if OTEL_EXPORTER_PROTOCOL == "grpc": - return OTLPSpanExporterGRPC( - endpoint=OTEL_OTLP_TRACING_ENDPOINT, - insecure=True - ) - - elif OTEL_EXPORTER_PROTOCOL == "http": - return OTLPSpanExporterHTTP( - endpoint=OTEL_OTLP_TRACING_ENDPOINT - ) - - return ConsoleSpanExporter() - - -def get_metrics_exporter() -> typing.Optional[MetricExporter]: - """ - Get OpenTelemetry metrics exporter - - Returns: - OTLPLogExporter: OpenTelemetry metrics exporter - """ - - if OTEL_EXPORTER_TYPE == "otlp": - - if OTEL_EXPORTER_PROTOCOL == "grpc": - return OTLPMetricExporterGRPC( - endpoint=OTEL_OTLP_METRICS_ENDPOINT, - insecure=True - ) - - elif OTEL_EXPORTER_PROTOCOL == "http": - return OTLPMetricExporterHTTP( - endpoint=OTEL_OTLP_METRICS_ENDPOINT - ) - - return ConsoleMetricExporter() - - -logger.info("OpenTelemetry enabled") -logger.info(f"OpenTelemetry exporter type: {OTEL_EXPORTER_TYPE}") -logger.info(f"OpenTelemetry service name: {OTEL_SERVICE_NAME}") -logger.info(f"OpenTelemetry service environment: {OTEL_SERVICE_ENVIRONMENT}") -logger.info(f"OpenTelemetry service release: {OTEL_SERVICE_RELEASE}") -logger.info(f"OpenTelemetry trace sample rate: {OTEL_TRACE_SAMPLE_RATE}") -logger.info(f"OpenTelemetry extra attributes: {OTEL_EXTRA_ATTRIBUTES}") - -# Define OpenTelemetry resources (e.g., service name) -attributes = get_otel_attributes() - -# Define OpenTelemetry sampler -sampler = TraceIdRatioBased(OTEL_TRACE_SAMPLE_RATE) - -# Define OpenTelemetry resources (e.g., service name) -resource = Resource(attributes=attributes) - -if not OTEL_LOGGING_DISABLED: - logger.info("OpenTelemetry Logging is enabled") - setup_logger(resource, get_logging_exporter()) - -if not OTEL_TRACING_DISABLED: - logger.info("OpenTelemetry Tracing is enabled") - setup_tracer(resource, sampler, get_span_exporter()) - -if not OTEL_METRICS_DISABLED: - logger.info("OpenTelemetry Metrics is enabled") - setup_meter(resource, get_metrics_exporter()) diff --git a/mindsdb/utilities/otel/tracer.py b/mindsdb/utilities/otel/tracer.py deleted file mode 100644 index e3893d5e37b..00000000000 --- a/mindsdb/utilities/otel/tracer.py +++ /dev/null @@ -1,16 +0,0 @@ -from opentelemetry import trace -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter -from opentelemetry.sdk.trace.sampling import Sampler - - -def setup_tracer(resource: Resource, sampler: Sampler, exporter: SpanExporter) -> None: - """ - Setup OpenTelemetry tracing - """ - # Set the tracer provider with the custom resource - trace.set_tracer_provider(TracerProvider(resource=resource, sampler=sampler)) - - # Replace the default span processor with the custom one - trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(exporter)) diff --git a/mindsdb/utilities/partitioning.py b/mindsdb/utilities/partitioning.py deleted file mode 100644 index 013432abf8f..00000000000 --- a/mindsdb/utilities/partitioning.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -from typing import Iterable, Callable -import pandas as pd - -from mindsdb.utilities.config import Config -from mindsdb.utilities.context_executor import execute_in_threads - - -def get_max_thread_count() -> int: - """ - Calculate the maximum number of threads allowed for the system. - """ - # workers count - is_cloud = Config().is_cloud - if is_cloud: - max_threads = int(os.getenv('MINDSDB_MAX_PARTITIONING_THREADS', 10)) - else: - max_threads = os.cpu_count() - 3 - - if max_threads < 1: - max_threads = 1 - - return max_threads - - -def split_data_frame(df: pd.DataFrame, partition_size: int) -> Iterable[pd.DataFrame]: - """ - Split data frame into chunks with partition_size and yield them out - """ - chunk = 0 - while chunk * partition_size < len(df): - # create results with partition - df1 = df.iloc[chunk * partition_size: (chunk + 1) * partition_size] - chunk += 1 - yield df1 - - -def process_dataframe_in_partitions(df: pd.DataFrame, callback: Callable, partition_size: int) -> Iterable: - """ - Splits dataframe into partitions and apply callback on each partition - - :param df: input dataframe - :param callback: function to apply on each partition - :param partition_size: size of each partition - :return: yield results - """ - - # tasks - - tasks = split_data_frame(df, partition_size) - - max_threads = get_max_thread_count() - - chunk_count = int(len(df) / partition_size) - # don't exceed chunk_count - if chunk_count > 0: - max_threads = min(max_threads, chunk_count) - - if max_threads == 1: - # don't spawn threads - - for task in tasks: - yield callback(task) - - else: - for result in execute_in_threads(callback, tasks, thread_count=max_threads): - yield result diff --git a/mindsdb/utilities/profiler/README.md b/mindsdb/utilities/profiler/README.md deleted file mode 100644 index 4098079ed11..00000000000 --- a/mindsdb/utilities/profiler/README.md +++ /dev/null @@ -1,68 +0,0 @@ -## Overview - -The module allows you to analyze performance of individual query. - -It stores in context structure: -``` -{ - 'enabled': True, - 'tree': { - 'start_at': 123456, - 'stop_at': None, - 'name': 'root node', - 'children': [{ - 'start_at': 234567, - 'stop_at': None, - 'name': 'child node', - 'children': [] - }], - }, - 'pointer': [1], - 'level': 0 -} -``` - - enabled - is profiling enabled at the moment or not - - tree - nested dict with tree nodes - - pointer - list of integers which indicates index of node chiled on each level. Using that list is possible to get current node. - - level - interer, indicates how deep in the tree we are at the moment. This value is changing even if `enabled is False`. It required because `enabled` may be cahnged at any moment. If `enabled is True` then we start to collect nodes only if `level == 1`. - -Also initial profiling structure may be expanded with additional keys using `.set_meta` method. - -Tree node structure: -``` -{ - 'start_at': timestamp, - 'stop_at': timestamp, - 'name': str, - 'children': [list of nodes] -} -``` - -To start/stop collect info need to do: -``` -set profiling=true; --- execute investigated queries -set profiling=false; -``` - -## API - -There are 3 ways to use: - -1. manually add start/stop in the code: -``` -start('my tag') -function() -stop() -``` -2. context: -``` -with Context('my tag'): - function() -``` -3. decorator: -``` -@profile('my tag') -def function(): - ... -``` diff --git a/mindsdb/utilities/profiler/__init__.py b/mindsdb/utilities/profiler/__init__.py deleted file mode 100644 index b03311ca2a4..00000000000 --- a/mindsdb/utilities/profiler/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindsdb.utilities.profiler.profiler import ( - start, - stop, - Context, - profile, - enable, - disable, - set_meta -) - -__all__ = [ - 'start', - 'stop', - 'Context', - 'profile', - 'enable', - 'disable', - 'set_meta' -] diff --git a/mindsdb/utilities/profiler/profiler.py b/mindsdb/utilities/profiler/profiler.py deleted file mode 100644 index d84c459d443..00000000000 --- a/mindsdb/utilities/profiler/profiler.py +++ /dev/null @@ -1,143 +0,0 @@ -import time -from datetime import datetime, timezone -from functools import wraps - -import mindsdb.utilities.hooks as hooks -from mindsdb.utilities.config import Config -from mindsdb.utilities.context import context as ctx - - -def _get_current_node(profiling: dict) -> dict: - """return the node that the pointer points to - - Args: - profiling (dict): whole profiling data - - Returns: - dict: current node - """ - current_node = profiling["tree"] - for child_index in profiling["pointer"]: - current_node = current_node["children"][child_index] - return current_node - - -def start_node(tag: str): - """Add a new node to profiling - - Args: - tag (str): name of new node - """ - profiling = ctx.profiling - new_node = { - "start_at": time.perf_counter(), - "start_at_thread": time.thread_time(), - "start_at_process": time.process_time(), - "stop_at": None, - "name": tag, - "children": [], - } - if profiling["pointer"] is None: - if profiling["level"] != 1: - # profiling was activated not in the root of nodes tree - return - profiling["pointer"] = [] - profiling["tree"] = new_node - profiling["tree"]["time_start_at"] = datetime.now(timezone.utc) - else: - current_node = _get_current_node(profiling) - profiling["pointer"].append(len(current_node["children"])) - current_node["children"].append(new_node) - - -def stop_current_node(): - """Mark current node as completed and move pointer up""" - profiling = ctx.profiling - if profiling["pointer"] is None: - # profiling was activated not in the root of nodes tree - return - current_node = _get_current_node(profiling) - current_node["stop_at"] = time.perf_counter() - current_node["stop_at_thread"] = time.thread_time() - current_node["stop_at_process"] = time.process_time() - if len(profiling["pointer"]) > 0: - profiling["pointer"] = profiling["pointer"][:-1] - else: - if ctx.profiling["enabled"] is True: - _send_profiling_results() - profiling["pointer"] = None - - -def set_meta(**kwargs): - """Add any additional info to profiling data - - Args: - **kwargs (dict): metadata to add - """ - if profiling_enabled() is True: - ctx.profiling.update(kwargs) - - -def _send_profiling_results(): - """Send profiling results to storage""" - ctx.profiling["company_id"] = ctx.company_id - ctx.profiling["user_id"] = ctx.user_id - ctx.profiling["hostname"] = Config().get("aws_meta_data", {}).get("public-hostname", "?") - ctx.profiling["instance_id"] = Config().get("aws_meta_data", {}).get("instance-id", "?") - hooks.send_profiling_results(ctx.profiling) - - -def enable(): - ctx.profiling["enabled"] = True - - -def disable(): - ctx.profiling["enabled"] = False - - -def profiling_enabled(): - try: - return ctx.profiling["enabled"] is True - except AttributeError: - return False - - -def start(tag): - """add new node to profiling data""" - ctx.profiling["level"] += 1 - if profiling_enabled() is True: - start_node(tag) - - -def stop(): - """finalize current node and move pointer up""" - ctx.profiling["level"] -= 1 - if profiling_enabled() is True: - stop_current_node() - - -class Context: - def __init__(self, tag): - self.tag = tag - - def __enter__(self): - start(self.tag) - - def __exit__(self, exc_type, exc_value, traceback): - stop() - - -def profile(tag: str = None): - def decorator(function): - @wraps(function) - def wrapper(*args, **kwargs): - if profiling_enabled() is True: - with Context(tag or f"{function.__name__}|{function.__module__}"): - result = function(*args, **kwargs) - else: - result = function(*args, **kwargs) - return result - - return wrapper - - return decorator diff --git a/mindsdb/utilities/ps.py b/mindsdb/utilities/ps.py deleted file mode 100644 index 1c7c44a8bb5..00000000000 --- a/mindsdb/utilities/ps.py +++ /dev/null @@ -1,78 +0,0 @@ -import sys -import time -from collections import namedtuple -import psutil - - -def get_child_pids(pid): - p = psutil.Process(pid=pid) - return p.children(recursive=True) - - -def net_connections(): - """Cross-platform psutil.net_connections like interface""" - if sys.platform.lower().startswith("linux"): - return psutil.net_connections() - - all_connections = [] - Pconn = None - for p in psutil.process_iter(["pid"]): - try: - process = psutil.Process(p.pid) - connections = process.net_connections() - if connections: - for conn in connections: - # Adding pid to the returned instance - # for consistency with psutil.net_connections() - if Pconn is None: - fields = list(conn._fields) - fields.append("pid") - _conn = namedtuple("Pconn", fields) - for attr in conn._fields: - setattr(_conn, attr, getattr(conn, attr)) - _conn.pid = p.pid - all_connections.append(_conn) - - except (psutil.AccessDenied, psutil.ZombieProcess, psutil.NoSuchProcess): - pass - return all_connections - - -def is_port_in_use(port_num): - """Check does any of child process uses specified port.""" - parent_process = psutil.Process() - child_pids = [x.pid for x in parent_process.children(recursive=True)] - conns = net_connections() - portsinuse = [x.laddr[1] for x in conns if x.pid in child_pids and x.status == "LISTEN"] - portsinuse.sort() - return int(port_num) in portsinuse - - -def wait_func_is_true(func, timeout, *args, **kwargs): - start_time = time.time() - - result = func(*args, **kwargs) - while result is False and (time.time() - start_time) < timeout: - time.sleep(2) - result = func(*args, **kwargs) - - return result - - -def wait_port(port_num, timeout): - return wait_func_is_true(func=is_port_in_use, timeout=timeout, port_num=port_num) - - -def get_listen_ports(pid): - try: - p = psutil.Process(pid) - cons = p.net_connections() - cons = [x.laddr.port for x in cons] - except Exception: - return [] - return cons - - -def is_pid_listen_port(pid, port): - ports = get_listen_ports(pid) - return int(port) in ports diff --git a/mindsdb/utilities/render/README.md b/mindsdb/utilities/render/README.md deleted file mode 100644 index 39e515a9826..00000000000 --- a/mindsdb/utilities/render/README.md +++ /dev/null @@ -1,44 +0,0 @@ - -# Render - -Renderer is using to convert AST-query to sql string using different sql dialects. - -## How to use - -```python -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -renderer = SqlalchemyRender('mysql') # select dialect -sql = renderer.get_string(ast_query, with_failback=True) -``` - -If with_failback==True: in case if sqlalchemy unable to render query -string will be returned from sql representation of AST-tree (with method to_string) - -## Parameterized queries - -For getting query with parametes use `get_exec_params` function of sqlachemy render (as alternative to get_string) -It doesn't inject params to query but returned them separated -```python -query_str, params = renderer.get_exec_params(ast_query) -``` -- query_str: insert into table values (%s, %s) -- params: [[1,2], [3,4]] - -In handler this function could be used for bulk insert (for example executemany in postgres) - -## Architecture - -Only one renderer is available at the moment: SqlalchemyRender. -- It converts AST-query to sqlalchemy query. -It uses [imperative](https://docs.sqlalchemy.org/en/14/orm/mapping_styles.html#orm-imperative-mapping) mapping for this -- Then created sqlalchemy object is compiled inside sqlalchemy using chosen dialect - -Supported dialects at the moment: mysql, postgresql, sqlite, mssql, oracle - -Notes: -- it is not possible to use more than 2 part in table name - - it can be (integration.table) or (schema.table) - - but can't be (integration.schema.table) -- sometimes conditions in rendered sql can be slightly changed, for example 'not a=b' to 'a!=b' - diff --git a/mindsdb/utilities/render/__init__.py b/mindsdb/utilities/render/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/utilities/render/sqlalchemy_render.py b/mindsdb/utilities/render/sqlalchemy_render.py deleted file mode 100644 index b07c34296d8..00000000000 --- a/mindsdb/utilities/render/sqlalchemy_render.py +++ /dev/null @@ -1,902 +0,0 @@ -import re -import datetime as dt - -import sqlalchemy as sa -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.orm import aliased -from sqlalchemy.engine.interfaces import Dialect -from sqlalchemy.dialects import mysql, postgresql, sqlite, mssql, oracle -from sqlalchemy.schema import CreateTable, DropTable -from sqlalchemy.sql import operators, ColumnElement, functions as sa_fnc -from sqlalchemy.sql.expression import ClauseElement - -from mindsdb_sql_parser import ast - - -RESERVED_WORDS = {"collation"} - -sa_type_names = [ - key - for key, val in sa.types.__dict__.items() - if hasattr(val, "__module__") and val.__module__ in ("sqlalchemy.sql.sqltypes", "sqlalchemy.sql.type_api") -] - -types_map = {} -for type_name in sa_type_names: - types_map[type_name.upper()] = getattr(sa.types, type_name) -types_map["BOOL"] = types_map["BOOLEAN"] -types_map["DEC"] = types_map["DECIMAL"] - - -class RenderError(Exception): ... - - -# https://github.com/sqlalchemy/sqlalchemy/discussions/9483?sort=old#discussioncomment-5312979 -class INTERVAL(ColumnElement): - def __init__(self, info): - self.info = info - self.type = sa.Interval() - - -@compiles(INTERVAL) -def _compile_interval(element, compiler, **kw): - items = element.info.split(" ", maxsplit=1) - if compiler.dialect.name == "oracle" and len(items) == 2: - # replace to singular names (remove leading S if exists) - if items[1].upper().endswith("S"): - items[1] = items[1][:-1] - - if getattr(compiler.dialect, "driver", None) == "snowflake" or compiler.dialect.name == "postgresql": - # quote all - args = " ".join(map(str, items)) - args = f"'{args}'" - else: - # quote first element - items[0] = f"'{items[0]}'" - args = " ".join(items) - return "INTERVAL " + args - - -# region definitions of custom clauses for GROUP BY ROLLUP -# This will work also in DuckDB, as it use postgres dialect -class GroupByRollup(ClauseElement): - def __init__(self, *columns): - self.columns = columns - - -@compiles(GroupByRollup) -def visit_group_by_rollup(element, compiler, **kw): - columns = ", ".join([compiler.process(col, **kw) for col in element.columns]) - if compiler.dialect.name in ("mysql", "default"): - return f"{columns} WITH ROLLUP" - else: - return f"ROLLUP({columns})" - - -# endregion - - -class AttributedStr(str): - """ - Custom str-like object to pass it to `_requires_quotes` method with `is_quoted` flag - """ - - def __new__(cls, string, is_quoted: bool): - obj = str.__new__(cls, string) - obj.is_quoted = is_quoted - return obj - - def replace(self, *args): - obj = super().replace(*args) - return AttributedStr(obj, self.is_quoted) - - -def get_is_quoted(identifier: ast.Identifier): - quoted = getattr(identifier, "is_quoted", []) - # len can be different - quoted = quoted + [None] * (len(identifier.parts) - len(quoted)) - return quoted - - -dialects = { - "mysql": mysql, - "postgresql": postgresql, - "postgres": postgresql, - "sqlite": sqlite, - "mssql": mssql, - "oracle": oracle, -} - - -class SqlalchemyRender: - def __init__(self, dialect_name: str | Dialect): - if isinstance(dialect_name, str): - dialect = dialects[dialect_name].dialect - else: - dialect = dialect_name - - # override dialect's preparer - if hasattr(dialect, "preparer") and dialect.preparer.__name__ != "MDBPreparer": - - class MDBPreparer(dialect.preparer): - def _requires_quotes(self, value: str) -> bool: - # check force-quote flag - if isinstance(value, AttributedStr): - if value.is_quoted: - return True - - lc_value = value.lower() - return ( - lc_value in self.reserved_words - or value[0] in self.illegal_initial_characters - or not self.legal_characters.match(str(value)) - # Override sqlalchemy behavior: don't require to quote mixed- or upper-case - # or (lc_value != value) - ) - - dialect.preparer = MDBPreparer - - # remove double percent signs - # https://docs.sqlalchemy.org/en/14/faq/sqlexpressions.html#why-are-percent-signs-being-doubled-up-when-stringifying-sql-statements - self.dialect = dialect(paramstyle="named") - self.dialect.div_is_floordiv = False - - self.selects_stack = [] - - if dialect_name == "mssql": - # update version to MS_2012_VERSION for supports_multivalues_insert and offset - self.dialect.server_version_info = (12,) - self.dialect._setup_version_attributes() - elif dialect_name == "mysql": - # update version for support float cast - self.dialect.server_version_info = (8, 0, 17) - - def to_column(self, identifier: ast.Identifier) -> sa.Column: - # because sqlalchemy doesn't allow columns consist from parts therefore we do it manually - - parts2 = [] - - quoted = get_is_quoted(identifier) - for i, is_quoted in zip(identifier.parts, quoted): - if isinstance(i, ast.Star): - part = "*" - elif is_quoted or i.lower() in RESERVED_WORDS: - # quote anyway - part = self.dialect.identifier_preparer.quote_identifier(i) - else: - # quote if required - part = self.dialect.identifier_preparer.quote(i) - - parts2.append(part) - text = ".".join(parts2) - if identifier.is_outer and self.dialect.name == "oracle": - text += "(+)" - return sa.column(text, is_literal=True) - - def get_alias(self, alias): - if alias is None or len(alias.parts) == 0: - return None - if len(alias.parts) > 1: - raise NotImplementedError(f"Multiple alias {alias.parts}") - - if self.selects_stack: - self.selects_stack[-1]["aliases"].append(alias) - - is_quoted = get_is_quoted(alias)[0] - return AttributedStr(alias.parts[0], is_quoted) - - def make_unique_alias(self, name): - if self.selects_stack: - aliases = self.selects_stack[-1]["aliases"] - for i in range(10): - name2 = f"{name}_{i}" - if name2 not in aliases: - aliases.append(name2) - return name2 - - def to_expression(self, t): - # simple type - if isinstance(t, str) or isinstance(t, int) or isinstance(t, float) or t is None: - t = ast.Constant(t) - - if isinstance(t, ast.Star): - col = sa.text("*") - elif isinstance(t, ast.Last): - col = self.to_column(ast.Identifier(parts=["last"])) - elif isinstance(t, ast.Constant): - col = sa.literal(t.value) - if t.alias: - alias = self.get_alias(t.alias) - else: - if t.value is None: - alias = "NULL" - else: - alias = str(t.value) - col = col.label(alias) - elif isinstance(t, ast.Identifier): - # sql functions - col = None - if len(t.parts) == 1: - if isinstance(t.parts[0], str): - name = t.parts[0].upper() - if name == "CURRENT_DATE": - col = sa_fnc.current_date() - elif name == "CURRENT_TIME": - col = sa_fnc.current_time() - elif name == "CURRENT_TIMESTAMP": - col = sa_fnc.current_timestamp() - elif name == "CURRENT_USER": - col = sa_fnc.current_user() - if col is None: - col = self.to_column(t) - if t.alias: - alias_name = self.get_alias(t.alias) - # Skip self-referencing aliases (e.g., "column AS column") - if len(t.parts) == 1 and t.parts[0] == alias_name: - pass # Don't add alias if it matches the column name - else: - col = col.label(alias_name) - elif isinstance(t, ast.Select): - sub_stmt = self.prepare_select(t) - col = sub_stmt.scalar_subquery() - if t.alias: - alias = self.get_alias(t.alias) - col = col.label(alias) - elif isinstance(t, ast.Function): - col = self.to_function(t) - if t.alias: - alias = self.get_alias(t.alias) - col = col.label(alias) - else: - alias = self.make_unique_alias(str(t.op)) - if alias: - col = col.label(alias) - - elif isinstance(t, ast.BinaryOperation): - ops = { - "+": operators.add, - "-": operators.sub, - "*": operators.mul, - "/": operators.truediv, - "%": operators.mod, - "=": operators.eq, - "!=": operators.ne, - "<>": operators.ne, - ">": operators.gt, - "<": operators.lt, - ">=": operators.ge, - "<=": operators.le, - "is": operators.is_, - "is not": operators.is_not, - "like": operators.like_op, - "not like": operators.not_like_op, - "in": operators.in_op, - "not in": operators.not_in_op, - "||": operators.concat_op, - } - functions = { - "and": sa.and_, - "or": sa.or_, - } - - arg0 = self.to_expression(t.args[0]) - arg1 = self.to_expression(t.args[1]) - - op = t.op.lower() - if op in ("in", "not in"): - if t.args[1].parentheses: - arg1 = [arg1] - if isinstance(arg1, sa.sql.selectable.ColumnClause): - raise NotImplementedError(f"Required list argument for: {op}") - - sa_op = ops.get(op) - - if sa_op is not None: - if isinstance(arg0, sa.TextClause): - # text doesn't have operate method, reverse operator - col = arg1.reverse_operate(sa_op, arg0) - elif isinstance(arg1, sa.TextClause): - # both args are text, return text - col = sa.text(f"{arg0.compile(dialect=self.dialect)} {op} {arg1.compile(dialect=self.dialect)}") - else: - col = arg0.operate(sa_op, arg1) - - elif t.op.lower() in functions: - func = functions[t.op.lower()] - col = func(arg0, arg1) - else: - # for unknown operators wrap arguments into parens - if isinstance(t.args[0], ast.BinaryOperation): - arg0 = arg0.self_group() - if isinstance(t.args[1], ast.BinaryOperation): - arg1 = arg1.self_group() - - col = arg0.op(t.op)(arg1) - - if t.alias: - alias = self.get_alias(t.alias) - col = col.label(alias) - - elif isinstance(t, ast.UnaryOperation): - # not or munus - opmap = { - "NOT": "__invert__", - "-": "__neg__", - } - arg = self.to_expression(t.args[0]) - - method = opmap[t.op.upper()] - col = getattr(arg, method)() - if t.alias: - alias = self.get_alias(t.alias) - col = col.label(alias) - - elif isinstance(t, ast.BetweenOperation): - col0 = self.to_expression(t.args[0]) - lim_down = self.to_expression(t.args[1]) - lim_up = self.to_expression(t.args[2]) - - col = sa.between(col0, lim_down, lim_up) - elif isinstance(t, ast.Interval): - col = INTERVAL(t.args[0]) - if t.alias: - alias = self.get_alias(t.alias) - col = col.label(alias) - - elif isinstance(t, ast.WindowFunction): - func = self.to_expression(t.function) - - partition = None - if t.partition is not None: - partition = [self.to_expression(i) for i in t.partition] - - order_by = None - if t.order_by is not None: - order_by = [] - for f in t.order_by: - col0 = self.to_expression(f.field) - if f.direction == "DESC": - col0 = col0.desc() - order_by.append(col0) - - rows, range_ = None, None - if t.modifier is not None: - words = t.modifier.lower().split() - if words[1] == "between" and words[4] == "and": - # frame options - # rows/groups BETWEEN <> <> AND <> <> - # https://docs.sqlalchemy.org/en/20/core/sqlelement.html#sqlalchemy.sql.expression.over - items = [] - for word1, word2 in (words[2:4], words[5:7]): - if word1 == "unbounded": - items.append(None) - elif (word1, word2) == ("current", "row"): - items.append(0) - elif word1.isdigits(): - val = int(word1) - if word2 == "preceding": - val = -val - elif word2 != "following": - continue - items.append(val) - if len(items) == 2: - if words[0] == "rows": - rows = tuple(items) - elif words[0] == "range": - range_ = tuple(items) - - col = sa.over(func, partition_by=partition, order_by=order_by, range_=range_, rows=rows) - - if t.alias: - col = col.label(self.get_alias(t.alias)) - elif isinstance(t, ast.TypeCast): - arg = self.to_expression(t.arg) - type = self.get_type(t.type_name) - if t.precision is not None: - type = type(*t.precision) - col = sa.cast(arg, type) - - if t.alias: - alias = self.get_alias(t.alias) - col = col.label(alias) - else: - alias = self.make_unique_alias("cast") - if alias: - col = col.label(alias) - elif isinstance(t, ast.Parameter): - col = sa.column(t.value, is_literal=True) - if t.alias: - raise RenderError("Parameter aliases are not supported in the renderer") - elif isinstance(t, ast.Tuple): - col = sa.tuple_(*[self.to_expression(i) for i in t.items]) - elif isinstance(t, ast.Variable): - col = sa.column(t.to_string(), is_literal=True) - elif isinstance(t, ast.Latest): - col = sa.column(t.to_string(), is_literal=True) - elif isinstance(t, ast.Exists): - sub_stmt = self.prepare_select(t.query) - col = sub_stmt.exists() - elif isinstance(t, ast.NotExists): - sub_stmt = self.prepare_select(t.query) - col = ~sub_stmt.exists() - elif isinstance(t, ast.Case): - col = self.prepare_case(t) - else: - # some other complex object? - raise NotImplementedError(f"Column {t}") - - return col - - def prepare_case(self, t: ast.Case): - conditions = [] - for condition, result in t.rules: - conditions.append((self.to_expression(condition), self.to_expression(result))) - default = None - if t.default is not None: - default = self.to_expression(t.default) - - value = None - if t.arg is not None: - value = self.to_expression(t.arg) - - col = sa.case(*conditions, else_=default, value=value) - if t.alias: - col = col.label(self.get_alias(t.alias)) - return col - - def to_function(self, t): - if t.namespace is not None: - op = getattr(sa.func, t.namespace) - else: - op = sa.func - op = getattr(op, t.op) - if t.from_arg is not None: - arg = t.args[0].to_string() - from_arg = self.to_expression(t.from_arg) - - fnc = op(arg, from_arg) - else: - args = [self.to_expression(i) for i in t.args] - if t.distinct: - # set first argument to distinct - args[0] = args[0].distinct() - fnc = op(*args) - return fnc - - def get_type(self, typename): - # TODO how to get type - if not isinstance(typename, str): - # sqlalchemy type - return typename - - typename = typename.upper() - if re.match(r"^INT[\d]+$", typename): - typename = "BIGINT" - if re.match(r"^FLOAT[\d]+$", typename): - typename = "FLOAT" - - return types_map[typename] - - def prepare_join(self, join): - # join tree to table list - - if isinstance(join.right, ast.Join): - raise NotImplementedError("Wrong join AST") - - items = [] - - if isinstance(join.left, ast.Join): - # dive to next level - items.extend(self.prepare_join(join.left)) - else: - # this is first table - items.append(dict(table=join.left)) - - # all properties set to right table - items.append( - dict(table=join.right, join_type=join.join_type, is_implicit=join.implicit, condition=join.condition) - ) - - return items - - def get_table_name(self, table_name): - schema = None - if isinstance(table_name, ast.Identifier): - parts = table_name.parts - quoted = get_is_quoted(table_name) - - if len(parts) > 2: - # TODO tests is failing - raise NotImplementedError(f"Path to long: {table_name.parts}") - - if len(parts) == 2: - schema = AttributedStr(parts[-2], quoted[-2]) - - table_name = AttributedStr(parts[-1], quoted[-1]) - - return schema, table_name - - def to_table(self, node, is_lateral=False): - if isinstance(node, ast.Identifier): - schema, table_name = self.get_table_name(node) - - table = sa.table(table_name, schema=schema) - - if node.alias: - table = aliased(table, name=self.get_alias(node.alias)) - - elif isinstance(node, (ast.Select, ast.Union, ast.Intersect, ast.Except)): - sub_stmt = self.prepare_select(node) - alias = None - if node.alias: - alias = self.get_alias(node.alias) - if is_lateral: - table = sub_stmt.lateral(alias) - else: - table = sub_stmt.subquery(alias) - - else: - # TODO tests are failing - raise NotImplementedError(f"Table {node.__name__}") - - return table - - def prepare_select(self, node): - if isinstance(node, (ast.Union, ast.Except, ast.Intersect)): - return self.prepare_union(node) - - cols = [] - - self.selects_stack.append({"aliases": []}) - - for t in node.targets: - col = self.to_expression(t) - cols.append(col) - - query = sa.select(*cols) - - if node.cte is not None: - for cte in node.cte: - if cte.columns is not None and len(cte.columns) > 0: - raise NotImplementedError("CTE columns") - - stmt = self.prepare_select(cte.query) - alias = cte.name - - query = query.add_cte(stmt.cte(self.get_alias(alias), nesting=True)) - - if node.distinct is True: - query = query.distinct() - elif isinstance(node.distinct, list): - columns = [self.to_expression(c) for c in node.distinct] - query = query.distinct(*columns) - - if node.from_table is not None: - from_table = node.from_table - - if isinstance(from_table, ast.Join): - join_list = self.prepare_join(from_table) - # first table - table = self.to_table(join_list[0]["table"]) - query = query.select_from(table) - - # other tables - has_explicit_join = False - for item in join_list[1:]: - join_type = item["join_type"] - table = self.to_table(item["table"], is_lateral=("LATERAL" in join_type)) - if item["is_implicit"]: - # add to from clause - if has_explicit_join: - # sqlalchemy doesn't support implicit join after explicit - # convert it to explicit - query = query.join(table, sa.text("1=1")) - else: - query = query.select_from(table) - else: - has_explicit_join = True - if item["condition"] is None: - # otherwise, sqlalchemy raises "Don't know how to join to ..." - condition = sa.text("1=1") - else: - condition = self.to_expression(item["condition"]) - - if "ASOF" in join_type or "RIGHT" in join_type: - raise NotImplementedError(f"Unsupported join type: {join_type}") - - is_full = False - is_outer = False - if join_type in ("LEFT JOIN", "LEFT OUTER JOIN"): - is_outer = True - if join_type == "FULL JOIN": - is_full = True - - # perform join - query = query.join(table, condition, isouter=is_outer, full=is_full) - elif isinstance(from_table, (ast.Union, ast.Intersect, ast.Except)): - alias = None - if from_table.alias: - alias = self.get_alias(from_table.alias) - table = self.prepare_union(from_table).subquery(alias) - query = query.select_from(table) - - elif isinstance(from_table, ast.Select): - table = self.to_table(from_table) - query = query.select_from(table) - - elif isinstance(from_table, ast.Identifier): - table = self.to_table(from_table) - query = query.select_from(table) - - elif isinstance(from_table, ast.NativeQuery): - alias = None - if from_table.alias: - alias = from_table.alias.parts[-1] - table = sa.text(from_table.query).columns().subquery(alias) - query = query.select_from(table) - else: - raise NotImplementedError(f"Select from {from_table}") - - if node.where is not None: - query = query.filter(self.to_expression(node.where)) - - if node.group_by is not None: - cols = [self.to_expression(i) for i in node.group_by] - if getattr(node.group_by[-1], "with_rollup", False): - query = query.group_by(GroupByRollup(*cols)) - else: - query = query.group_by(*cols) - - if node.having is not None: - query = query.having(self.to_expression(node.having)) - - if node.order_by is not None: - order_by = [] - for f in node.order_by: - col0 = self.to_expression(f.field) - if f.direction.upper() == "DESC": - col0 = col0.desc() - elif f.direction.upper() == "ASC": - col0 = col0.asc() - if f.nulls.upper() == "NULLS FIRST": - col0 = sa.nullsfirst(col0) - elif f.nulls.upper() == "NULLS LAST": - col0 = sa.nullslast(col0) - order_by.append(col0) - - query = query.order_by(*order_by) - - if node.limit is not None: - query = query.limit(node.limit.value) - - if node.offset is not None: - query = query.offset(node.offset.value) - - if node.mode is not None: - if node.mode == "FOR UPDATE": - query = query.with_for_update() - else: - raise NotImplementedError(f"Select mode: {node.mode}") - - self.selects_stack.pop() - - return query - - def prepare_union(self, from_table): - step1 = self.prepare_select(from_table.left) - step2 = self.prepare_select(from_table.right) - - if isinstance(from_table, ast.Except): - func = sa.except_ if from_table.unique else sa.except_all - elif isinstance(from_table, ast.Intersect): - func = sa.intersect if from_table.unique else sa.intersect_all - else: - func = sa.union if from_table.unique else sa.union_all - - return func(step1, step2) - - def prepare_create_table(self, ast_query): - columns = [] - - for col in ast_query.columns: - default = None - if col.default is not None: - if isinstance(col.default, str): - default = sa.text(col.default) - - if isinstance(col.type, str) and col.type.lower() == "serial": - col.is_primary_key = True - col.type = "INT" - - kwargs = { - "primary_key": col.is_primary_key, - "server_default": default, - } - if col.nullable is not None: - kwargs["nullable"] = col.nullable - - columns.append(sa.Column(col.name, self.get_type(col.type), **kwargs)) - - schema, table_name = self.get_table_name(ast_query.name) - - metadata = sa.MetaData() - table = sa.Table(table_name, metadata, schema=schema, *columns) - - return CreateTable(table) - - def prepare_drop_table(self, ast_query): - if len(ast_query.tables) != 1: - raise NotImplementedError("Only one table is supported") - - schema, table_name = self.get_table_name(ast_query.tables[0]) - - metadata = sa.MetaData() - table = sa.Table(table_name, metadata, schema=schema) - return DropTable(table, if_exists=ast_query.if_exists) - - def prepare_insert(self, ast_query, with_params=False): - params = None - schema, table_name = self.get_table_name(ast_query.table) - - names = [] - columns = [] - - if ast_query.columns is None: - raise NotImplementedError("Columns is required in insert query") - for col in ast_query.columns: - columns.append( - sa.Column( - col.name, - # self.get_type(col.type) - ) - ) - # check doubles - if col.name in names: - raise RenderError(f"Columns name double: {col.name}") - names.append(col.name) - - table = sa.table(table_name, schema=schema, *columns) - - if ast_query.values is not None: - values = [] - - if ast_query.is_plain and with_params: - for i in range(len(ast_query.columns)): - values.append(sa.column("%s", is_literal=True)) - - values = [values] - params = ast_query.values - else: - for row in ast_query.values: - row = [self.to_expression(val) for val in row] - values.append(row) - - stmt = table.insert().values(values) - else: - # is insert from subselect - subquery = self.prepare_select(ast_query.from_select) - stmt = table.insert().from_select(names, subquery) - - return stmt, params - - def prepare_update(self, ast_query): - if ast_query.from_select is not None: - raise NotImplementedError("Render of update with sub-select is not implemented") - - schema, table_name = self.get_table_name(ast_query.table) - - columns = [] - - to_update = {} - for col, value in ast_query.update_columns.items(): - columns.append( - sa.Column( - col, - ) - ) - - to_update[col] = self.to_expression(value) - - table = sa.table(table_name, schema=schema, *columns) - - stmt = table.update().values(**to_update) - - if ast_query.where is not None: - stmt = stmt.where(self.to_expression(ast_query.where)) - - return stmt - - def prepare_delete(self, ast_query: ast.Delete): - schema, table_name = self.get_table_name(ast_query.table) - - columns = [] - - table = sa.table(table_name, schema=schema, *columns) - - stmt = table.delete() - - if ast_query.where is not None: - stmt = stmt.where(self.to_expression(ast_query.where)) - - return stmt - - def get_query(self, ast_query, with_params=False): - params = None - if isinstance(ast_query, (ast.Select, ast.Union, ast.Except, ast.Intersect)): - stmt = self.prepare_select(ast_query) - elif isinstance(ast_query, ast.Insert): - stmt, params = self.prepare_insert(ast_query, with_params=with_params) - elif isinstance(ast_query, ast.Update): - stmt = self.prepare_update(ast_query) - elif isinstance(ast_query, ast.Delete): - stmt = self.prepare_delete(ast_query) - elif isinstance(ast_query, ast.CreateTable): - stmt = self.prepare_create_table(ast_query) - elif isinstance(ast_query, ast.DropTables): - stmt = self.prepare_drop_table(ast_query) - else: - raise NotImplementedError(f"Unknown statement: {ast_query.__class__.__name__}") - return stmt, params - - def get_string(self, ast_query, with_failback=True): - """ - Render query to sql string - - :param ast_query: query to render - :param with_failback: switch to standard render in case of error - :return: - """ - sql, _ = self.get_exec_params(ast_query, with_failback=with_failback, with_params=False) - return sql - - def get_exec_params(self, ast_query, with_failback=True, with_params=True): - """ - Render query with separated parameters and placeholders - :param ast_query: query to render - :param with_failback: switch to standard render in case of error - :return: sql query and parameters - """ - - if isinstance(ast_query, (ast.CreateTable, ast.DropTables)): - render_func = render_ddl_query - else: - render_func = render_dml_query - - try: - stmt, params = self.get_query(ast_query, with_params=with_params) - - sql = render_func(stmt, self.dialect) - - return sql, params - - except (SQLAlchemyError, NotImplementedError, AttributeError) as e: - if not with_failback: - raise e - - sql_query = str(ast_query) - if self.dialect.name == "postgresql": - sql_query = sql_query.replace("`", "") - return sql_query, None - - -def render_dml_query(statement, dialect): - class LiteralCompiler(dialect.statement_compiler): - def render_literal_value(self, value, type_): - if isinstance(value, (str, dt.date, dt.datetime, dt.timedelta)): - return "'{}'".format(str(value).replace("'", "''")) - - return super(LiteralCompiler, self).render_literal_value(value, type_) - - return str(LiteralCompiler(dialect, statement, compile_kwargs={"literal_binds": True})) - - -def render_ddl_query(statement, dialect): - class LiteralCompiler(dialect.ddl_compiler): - def render_literal_value(self, value, type_): - if isinstance(value, (str, dt.date, dt.datetime, dt.timedelta)): - return "'{}'".format(str(value).replace("'", "''")) - - return super(LiteralCompiler, self).render_literal_value(value, type_) - - return str(LiteralCompiler(dialect, statement, compile_kwargs={"literal_binds": True})) diff --git a/mindsdb/utilities/security.py b/mindsdb/utilities/security.py deleted file mode 100644 index dd8033fc309..00000000000 --- a/mindsdb/utilities/security.py +++ /dev/null @@ -1,97 +0,0 @@ -from urllib.parse import urlparse -import socket -import ipaddress - - -def is_private_url(url: str): - """ - Raises exception if url is private - - :param url: url to check - """ - - hostname = urlparse(url).hostname - if not hostname: - # Unable find hostname in url - return True - ip = socket.gethostbyname(hostname) - return ipaddress.ip_address(ip).is_private - - -def clear_filename(filename: str) -> str: - """ - Removes path symbols from filename which could be used for path injection - :param filename: input filename - :return: output filename - """ - - if not filename: - return filename - badchars = '\\/:*?"<>|' - for c in badchars: - filename = filename.replace(c, "") - return filename - - -def _split_url(url: str) -> tuple[str, str]: - """ - Splits the URL into scheme and netloc. - - Args: - url (str): The URL to split. - - Returns: - tuple[str, str]: The scheme and netloc of the URL. - - Raises: - ValueError: If the URL does not include protocol and host name. - """ - parsed_url = urlparse(url) - if not (parsed_url.scheme and parsed_url.netloc): - raise ValueError(f"URL must include protocol and host name: {url}") - return parsed_url.scheme.lower(), parsed_url.netloc.lower() - - -def validate_urls(urls: str | list[str], allowed_urls: list[str], disallowed_urls: list[str] | None = None) -> bool: - """ - Checks if the provided URL(s) is/are from an allowed host. - - This function parses the URL(s) and checks the origin (scheme + netloc) - against a list of allowed hosts. - - Examples: - validate_urls("http://site.com/file", ["site.com"]) -> Exception - validate_urls("https://site.com/file", ["https://site.com"]) -> True - validate_urls("http://site.com/file", ["https://site.com"]) -> False - validate_urls("https://site.com/file", ["https://example.com"]) -> False - validate_urls("site.com/file", ["https://site.com"]) -> Exception - - Args: - urls (str | list[str]): The URL(s) to check. Can be a single URL (str) or a list of URLs (list). - allowed_urls (list[str]): The list of allowed URLs. - disallowed_urls (list[str]): The list of disallowed URLs. If provided, the function - will return False if the URL is in the disallowed list. - - Returns: - bool: True if the URL(s) is/are from an allowed host and not in the disallowed list, False otherwise. - """ - if disallowed_urls is None: - disallowed_urls = [] - - allowed_origins = [_split_url(url) for url in allowed_urls] - disallowed_origins = [_split_url(url) for url in disallowed_urls] - - if isinstance(urls, str): - urls = [urls] - - if allowed_origins: - for url in urls: - if _split_url(url) not in allowed_origins: - return False - - if disallowed_origins: - for url in urls: - if _split_url(url) in disallowed_origins: - return False - - return True diff --git a/mindsdb/utilities/sentry.py b/mindsdb/utilities/sentry.py deleted file mode 100644 index 467a14c7f43..00000000000 --- a/mindsdb/utilities/sentry.py +++ /dev/null @@ -1,46 +0,0 @@ -# Prepare sentry.io for error and exception tracking -import sentry_sdk -import os -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - -# Provide your sentry.io DSN here -SENTRY_IO_DSN = os.environ.get("SENTRY_IO_DSN", "") -# Define the environment -SENTRY_IO_ENVIRONMENT = os.environ.get("SENTRY_IO_ENVIRONMENT", "local").lower() -# This is set to our SHA when deployed so we know what version this occurred in -SENTRY_IO_RELEASE = os.environ.get("SENTRY_IO_RELEASE", "local").lower() -# How often to capture traces, 1.0 means 100%. -SENTRY_IO_TRACE_SAMPLE_RATE = float(os.environ.get("SENTRY_IO_TRACE_SAMPLE_RATE", "1.0")) -# How often to capture profiling, 1.0 means 100%. -SENTRY_IO_PROFILING_SAMPLE_RATE = float(os.environ.get("SENTRY_IO_PROFILING_SAMPLE_RATE", "1.0")) -# By default we have sentry.io enabled on all envs, except for local which is disabled by default -# If you want to enable sentry.io on local for some reason (eg: profiling) please set SENTRY_IO_FORCE_RUN to true -SENTRY_IO_DISABLED = True if (os.environ.get("SENTRY_IO_DISABLED", "false").lower() == "true" or SENTRY_IO_ENVIRONMENT == "local") else False -SENTRY_IO_FORCE_RUN = True if os.environ.get("SENTRY_IO_FORCE_RUN", "false").lower() == "true" else False - - -# If we're not disabled, or if we have forced sentry to run -if SENTRY_IO_DSN and (not SENTRY_IO_DISABLED or SENTRY_IO_FORCE_RUN): - logger.info("Sentry.io enabled") - logger.info(f"SENTRY_IO_DSN: {SENTRY_IO_DSN}") - logger.info(f"SENTRY_IO_ENVIRONMENT: {SENTRY_IO_ENVIRONMENT}") - logger.info(f"SENTRY_IO_RELEASE: {SENTRY_IO_RELEASE}") - logger.info(f"SENTRY_IO_TRACE_SAMPLE_RATE: {SENTRY_IO_TRACE_SAMPLE_RATE * 100}%") - logger.info(f"SENTRY_IO_PROFILING_SAMPLE_RATE: {SENTRY_IO_PROFILING_SAMPLE_RATE * 100}%") - - sentry_sdk.init( - dsn=SENTRY_IO_DSN, - # Set traces_sample_rate to 1.0 to capture 100% - # of transactions for tracing. - traces_sample_rate=SENTRY_IO_TRACE_SAMPLE_RATE, - # Set profiles_sample_rate to 1.0 to profile 100% - # of sampled transactions. - # We recommend adjusting this value in production. - profiles_sample_rate=SENTRY_IO_PROFILING_SAMPLE_RATE, - # What environment we're on, by default development - environment=SENTRY_IO_ENVIRONMENT, - # What release/image/etc we're using, injected in Helm/Kubernetes to be the image tag - release=SENTRY_IO_RELEASE, - ) diff --git a/mindsdb/utilities/sql.py b/mindsdb/utilities/sql.py deleted file mode 100644 index b90c45812c1..00000000000 --- a/mindsdb/utilities/sql.py +++ /dev/null @@ -1,83 +0,0 @@ -def _is_in_quotes(pos: int, quote_positions: list[tuple[int, int]]) -> bool: - """ - Check if a position is within any quoted string. - - Args: - pos (int): The position to check. - quote_positions (list[tuple[int, int]]): A list of tuples, each containing the start and - end positions of a quoted string. - - Returns: - bool: True if the position is within any quoted string, False otherwise. - """ - return any(start < pos < end for start, end in quote_positions) - - -def clear_sql(sql: str) -> str: - '''Remove comments (--, /**/, and oracle-stype #) and trailing ';' from sql - Note: written mostly by LLM - - Args: - sql (str): The SQL query to clear. - - Returns: - str: The cleared SQL query. - ''' - if sql is None: - raise ValueError('sql query is None') - - # positions of (', ", `) - quote_positions = [] - for quote_char in ["'", '"', '`']: - i = 0 - while i < len(sql): - if sql[i] == quote_char and (i == 0 or sql[i - 1] != '\\'): - start = i - i += 1 - while i < len(sql) and (sql[i] != quote_char or sql[i - 1] == '\\'): - i += 1 - if i < len(sql): - quote_positions.append((start, i)) - i += 1 - - # del /* */ comments - result = [] - i = 0 - while i < len(sql): - if i + 1 < len(sql) and sql[i:i + 2] == '/*' and not _is_in_quotes(i, quote_positions): - # skip until */ - i += 2 - while i + 1 < len(sql) and sql[i:i + 2] != '*/': - i += 1 - if i + 1 < len(sql): - i += 2 # skip */ - else: - i += 1 - else: - result.append(sql[i]) - i += 1 - - sql = ''.join(result) - - # del -- and # comments - result = [] - i = 0 - while i < len(sql): - if i + 1 < len(sql) and sql[i:i + 2] == '--' and not _is_in_quotes(i, quote_positions): - while i < len(sql) and sql[i] != '\n': - i += 1 - elif sql[i] == '#' and not _is_in_quotes(i, quote_positions): - while i < len(sql) and sql[i] != '\n': - i += 1 - else: - result.append(sql[i]) - i += 1 - - sql = ''.join(result) - - # del ; at the end - sql = sql.rstrip() - if sql and sql[-1] == ';': - sql = sql[:-1].rstrip() - - return sql.strip(' \n\t') diff --git a/mindsdb/utilities/starters.py b/mindsdb/utilities/starters.py deleted file mode 100644 index e0f4fef1883..00000000000 --- a/mindsdb/utilities/starters.py +++ /dev/null @@ -1,59 +0,0 @@ -def start_http(*args, **kwargs): - from mindsdb.utilities.log import initialize_logging - - initialize_logging("http") - - from mindsdb.api.http.start import start - - start(*args, **kwargs) - - -def start_mysql(*args, **kwargs): - from mindsdb.utilities.log import initialize_logging - - initialize_logging("mysql") - - from mindsdb.api.mysql.start import start - - start(*args, **kwargs) - - -def start_tasks(*args, **kwargs): - from mindsdb.utilities.log import initialize_logging - - initialize_logging("tasks") - - from mindsdb.interfaces.tasks.task_monitor import start - - start(*args, **kwargs) - - -def start_ml_task_queue(*args, **kwargs): - from mindsdb.utilities.log import initialize_logging - - initialize_logging("ml_task_queue") - - from mindsdb.utilities.ml_task_queue.consumer import start - - start(*args, **kwargs) - - -def start_scheduler(*args, **kwargs): - from mindsdb.utilities.log import initialize_logging - - initialize_logging("scheduler") - - from mindsdb.interfaces.jobs.scheduler import start - - start(*args, **kwargs) - - -def start_litellm(*args, **kwargs): - """Start the LiteLLM server""" - from mindsdb.utilities.log import initialize_logging - - initialize_logging("litellm") - - from mindsdb.api.litellm.start import start - - start(*args, **kwargs) diff --git a/mindsdb/utilities/types/__init__.py b/mindsdb/utilities/types/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/mindsdb/utilities/types/column.py b/mindsdb/utilities/types/column.py deleted file mode 100644 index e8d258468d3..00000000000 --- a/mindsdb/utilities/types/column.py +++ /dev/null @@ -1,30 +0,0 @@ -from dataclasses import dataclass, field, MISSING - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -@dataclass(kw_only=True, slots=True) -class Column: - name: str = field(default=MISSING) - alias: str | None = None - table_name: str | None = None - table_alias: str | None = None - type: MYSQL_DATA_TYPE | None = None - database: str | None = None - flags: dict = None - charset: str | None = None - original_type: str | None = None - dtype: str | None = None - - def __post_init__(self): - if self.alias is None: - self.alias = self.name - if self.table_alias is None: - self.table_alias = self.table_name - - def get_hash_name(self, prefix): - table_name = self.table_name if self.table_alias is None else self.table_alias - name = self.name if self.alias is None else self.alias - - name = f"{prefix}_{table_name}_{name}" - return name diff --git a/mindsdb/utilities/utils.py b/mindsdb/utilities/utils.py deleted file mode 100644 index 160b03fe79c..00000000000 --- a/mindsdb/utilities/utils.py +++ /dev/null @@ -1,57 +0,0 @@ -import csv -import re -import typing - -from pydantic import BaseModel, ValidationError - - -def parse_csv_attributes(csv_attributes: typing.Optional[str] = "") -> typing.Dict[str, str]: - """ - Parse the raw_attributes variable, which uses the CSV format: - key=value,another=something_else - - Returns: - dict: Parsed key-value pairs as a dictionary. - """ - attributes = {} - - if not csv_attributes: - return attributes # Return empty dictionary if the variable is not set - - try: - # Use CSV reader to handle parsing the input - reader = csv.reader([csv_attributes]) - for row in reader: - for pair in row: - # Match key=value pattern - match = re.match(r"^\s*([^=]+?)\s*=\s*(.+?)\s*$", pair) - if match: - key, value = match.groups() - attributes[key.strip()] = value.strip() - else: - raise ValueError(f"Invalid attribute format: {pair}") - except Exception as e: - raise ValueError(f"Failed to parse csv_attributes='{csv_attributes}': {e}") from e - - return attributes - - -def validate_pydantic_params(params: dict, schema: type[BaseModel], subject: str): - # check names and types - try: - schema.model_validate(params) - except ValidationError as e: - problems = [] - for error in e.errors(): - parameter = ".".join([str(i) for i in error["loc"]]) - param_type = error["type"] - if param_type == "extra_forbidden": - msg = f"Parameter '{parameter}' is not allowed" - else: - msg = f"Error in '{parameter}' (type: {param_type}): {error['msg']}. Input: {repr(error['input'])}" - problems.append(msg) - - msg = "\n".join(problems) - if len(problems) > 1: - msg = "\n" + msg - raise ValueError(f"Problem with {subject} parameters: {msg}") from e diff --git a/mindsdb/utilities/wizards.py b/mindsdb/utilities/wizards.py deleted file mode 100644 index 5a7d10ce5da..00000000000 --- a/mindsdb/utilities/wizards.py +++ /dev/null @@ -1,49 +0,0 @@ -from datetime import datetime, timedelta - - -def make_ssl_cert(file_path): - from cryptography import x509 - from cryptography.x509.oid import NameOID - from cryptography.hazmat.backends import default_backend - from cryptography.hazmat.primitives import hashes - from cryptography.hazmat.primitives import serialization - from cryptography.hazmat.primitives.asymmetric import rsa - - key = rsa.generate_private_key( - public_exponent=65537, - key_size=2048, - backend=default_backend(), - ) - - name = x509.Name([ - x509.NameAttribute(NameOID.COMMON_NAME, 'mdb_autogen'), - x509.NameAttribute(NameOID.COUNTRY_NAME, 'US'), - x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, 'California'), - x509.NameAttribute(NameOID.LOCALITY_NAME, 'Berkeley'), - x509.NameAttribute(NameOID.ORGANIZATION_NAME, 'MindsDB') - ]) - - now = datetime.utcnow() - cert = ( - x509.CertificateBuilder() - .subject_name(name) - .issuer_name(name) - .public_key(key.public_key()) - .serial_number(1) - .not_valid_before(now - timedelta(days=10 * 365)) - .not_valid_after(now + timedelta(days=10 * 365)) - .add_extension( - x509.BasicConstraints(ca=True, path_length=0), - False - ) - .sign(key, hashes.SHA256(), default_backend()) - ) - cert_pem = cert.public_bytes(encoding=serialization.Encoding.PEM) - key_pem = key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.TraditionalOpenSSL, - encryption_algorithm=serialization.NoEncryption(), - ) - - with open(file_path, 'wb') as f: - f.write(key_pem + cert_pem) diff --git a/otel-collector-config.yaml b/otel-collector-config.yaml deleted file mode 100644 index 9e356cef18b..00000000000 --- a/otel-collector-config.yaml +++ /dev/null @@ -1,42 +0,0 @@ -receivers: - otlp: - protocols: - grpc: - endpoint: 0.0.0.0:4317 - http: - endpoint: 0.0.0.0:4318 - -exporters: - debug: - verbosity: normal - -processors: - batch: - -extensions: - health_check: - pprof: - zpages: - - -service: - extensions: [health_check, pprof, zpages] - telemetry: - logs: - level: "debug" - encoding: "console" - metrics: - level: "detailed" - pipelines: - logs: - receivers: [otlp] - processors: [batch] - exporters: [debug] - metrics: - receivers: [otlp] - processors: [batch] - exporters: [debug] - traces: - receivers: [otlp] - processors: [batch] - exporters: [debug] diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 8f57ac83c62..00000000000 --- a/pyproject.toml +++ /dev/null @@ -1,31 +0,0 @@ -[build-system] - -# @TODO: We should figure out version limitations for these -requires = [ - "setuptools", - "wheel", -] - - - - -[tool.ruff] -exclude = [ - ".venv", - "build", - "tests/unused" -] -line-length = 120 -target-version = "py310" - -[tool.ruff.lint] -ignore = [ - "E501", # Line too long - "C901", # Function is too complex - "E721" # Do not compare types, use 'isinstance()': https://www.flake8rules.com/rules/E721.html -] - -[tool.ruff.lint.per-file-ignores] -"mindsdb/__main__.py" = ["E402"] -"mindsdb/api/http/start.py" = ["E402"] -"mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py" = ["E241"] \ No newline at end of file diff --git a/requirements/requirements-agents.txt b/requirements/requirements-agents.txt deleted file mode 100644 index e96657bb724..00000000000 --- a/requirements/requirements-agents.txt +++ /dev/null @@ -1,14 +0,0 @@ -openai<3.0.0,>=2.11.0 - -# When using agents, some LLMs may require the 'transformers' library (like Ollama): -transformers==5.5.0 - -# Required for KB -mindsdb-evaluator == 0.0.21 -mcp~=1.26.0 # Required for MCP server - -# A2A requirements -httpx==0.28.1 -jwcrypto==1.5.6 -# fastmcp (via pydantic-ai) requires typing-extensions>=4.15.0 (py-key-value-aio chain) -typing-extensions>=4.15.0,<5 diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt deleted file mode 100644 index 90573374e35..00000000000 --- a/requirements/requirements-dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -pre-commit>=2.16.0 -watchfiles==0.19.0 -setuptools==78.1.1 -wheel -deptry==0.20.0 -twine -importlib_metadata==7.2.1 #fix twine bug -ruff==0.11.11 diff --git a/requirements/requirements-kb.txt b/requirements/requirements-kb.txt deleted file mode 100644 index 334e7c0f352..00000000000 --- a/requirements/requirements-kb.txt +++ /dev/null @@ -1,2 +0,0 @@ -lxml==5.3.0 # Is this transitive dependency? -faiss-cpu==1.13.2 # default vector storage diff --git a/requirements/requirements-langfuse.txt b/requirements/requirements-langfuse.txt deleted file mode 100644 index 7cd73e32d75..00000000000 --- a/requirements/requirements-langfuse.txt +++ /dev/null @@ -1 +0,0 @@ -langfuse==3.2.5 \ No newline at end of file diff --git a/requirements/requirements-opentelemetry.txt b/requirements/requirements-opentelemetry.txt deleted file mode 100644 index 0b262f9b35a..00000000000 --- a/requirements/requirements-opentelemetry.txt +++ /dev/null @@ -1,6 +0,0 @@ -opentelemetry-api==1.39.1 -opentelemetry-sdk==1.39.1 -opentelemetry-exporter-otlp==1.39.1 -opentelemetry-instrumentation-requests==0.60b1 -opentelemetry-instrumentation-flask==0.60b1 -opentelemetry-distro==0.60b1 \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt deleted file mode 100644 index 60607799622..00000000000 --- a/requirements/requirements-test.txt +++ /dev/null @@ -1,24 +0,0 @@ -scipy==1.15.3 # MacOS has issue with higher versions -docker >= 5.0.3 -openai<3.0.0,>=2.9.0 -pytest >= 8.3.5, < 9.0.0 -pytest-subtests -pytest-xdist -pytest-cov -pytest-json-report==1.5.0 -pytest-metadata==3.1.1 -python-dotenv==1.1.1 -responses -coveralls -locust -ollama >= 0.1.7 # Langchain tests -anthropic >= 0.21.3 # Langchain tests -langchain-google-genai>=2.0.0 # Langchain tests -mindsdb-sdk -filelock==3.20.3 -mysql-connector-python==9.1.0 -walrus==0.9.3 -pymongo == 4.8.0 -pytest-json-report==1.5.0 -appdirs >= 1.0.0 -pgvector==0.3.6 # Required for knowledge bases tests diff --git a/requirements/requirements.txt b/requirements/requirements.txt deleted file mode 100644 index 3a1542cf0f7..00000000000 --- a/requirements/requirements.txt +++ /dev/null @@ -1,57 +0,0 @@ -packaging -flask == 3.1.3 -werkzeug == 3.1.6 -flask-restx >= 1.3.0, < 2.0.0 -pandas==2.3.1 -python-multipart == 0.0.26 -cryptography>=46.0.5 -psycopg[binary] -psutil~=7.0 -sqlalchemy >= 2.0.0, < 3.0.0 -psycopg2-binary # This is required for using sqlalchemy with postgres -alembic >= 1.3.3 -redis==6.4.0 -walrus==0.9.3 -flask-compress >= 1.0.0 -appdirs >= 1.0.0 -mindsdb-sql-parser ~= 0.13.8 -pydantic == 2.12.5 -duckdb == 1.3.0; sys_platform == "win32" -duckdb ~= 1.3.2; sys_platform != "win32" -requests == 2.33.0 -dateparser==1.2.0 -dill == 0.3.6 -numpy ~= 2.0 -pytz -botocore -boto3 >= 1.34.131 -python-dateutil -lark -prometheus-client==0.20.0 -sentry-sdk[flask] == 2.14.0 -pyaml==23.12.0 -uvicorn>=0.30.0, <1.0.0 # For all HTTP-based APIs -a2wsgi ~= 1.10.10 # WSGI wrapper for flask+starlette -starlette>=0.49.1 -sse-starlette==2.3.3 -pydantic_core>=2.33.2 -pyjwt==2.12.0 -# files reading -pymupdf==1.27.2 -filetype -charset-normalizer -openpyxl # used by pandas to read txt and xlsx files -xlrd>=2.0.1 # used by pandas to read legacy .xls files -aipdf==0.0.7.2 -pyarrow<=19.0.0 # used by pandas to read feather files in Files handler -orjson==3.11.6 - -mind-castle==0.5.0 -pydantic-ai==1.77.0 # Required for Pydantic AI agents - -bs4 # for rag HTMLDocumentLoader -urllib3>=2.6.3 # not directly required, pinned by Snyk to avoid a vulnerability - -# kb providers -aiobotocore==3.4.0 -google-genai==1.70.0 diff --git a/scripts/init-dbs.sh b/scripts/init-dbs.sh deleted file mode 100755 index d490d85457e..00000000000 --- a/scripts/init-dbs.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# List the databases you want to ensure exist -DBS=( - "mindsdb" - "kb" - "langfuse" -) - -POSTGRES_USER="${POSTGRES_USER:-postgres}" -POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-postgres}" -POSTGRES_HOST="${POSTGRES_HOST:-postgres}" -POSTGRES_DB="${POSTGRES_DB:-postgres}" - -for db in "${DBS[@]}"; do - echo "Ensuring database '${db}' exists..." - PGPASSWORD=$POSTGRES_PASSWORD psql -v ON_ERROR_STOP=1 \ - -U "${POSTGRES_USER}" \ - -h "${POSTGRES_HOST}" \ - -d "${POSTGRES_DB}" < /dev/null; then - print_error "uv is not installed. Install it with: curl -LsSf https://astral.sh/uv/install.sh | sh" - exit 1 -fi -print_success "Found uv: $(uv --version)" - -# Check Python version -PYTHON_PATH=$(uv python find "$PYTHON_VERSION" 2>/dev/null || true) -if [[ -z "$PYTHON_PATH" ]]; then - print_warning "Python $PYTHON_VERSION not found, installing..." - uv python install "$PYTHON_VERSION" - PYTHON_PATH=$(uv python find "$PYTHON_VERSION") -fi -print_success "Using Python: $PYTHON_PATH" - -# Create/activate virtual environment if not already in one -if [[ -z "$VIRTUAL_ENV" ]]; then - if [[ ! -d "env" ]]; then - print_header "Creating virtual environment" - uv venv env --python "$PYTHON_VERSION" - fi - print_success "Activating virtual environment" - source env/bin/activate -fi - -# -# INSTALL DEPENDENCIES -# -print_header "Installing Dependencies" - -echo "Installing test requirements..." -uv pip install -r requirements/requirements-test.txt - -# Also install mindsdb itself for integration tests -echo "Installing mindsdb..." -uv pip install -e . - -print_success "Dependencies installed" - -# -# RUN INTEGRATION TESTS -# -print_header "Running Integration Tests" - -mkdir -p reports - -if [[ "$RUN_SLOW" == "true" ]]; then - echo "Running integration tests (with slow tests)..." - make integration_tests_slow -else - echo "Running integration tests (fast mode)..." - make integration_tests -fi - -print_success "Integration tests completed" - -print_header "Done!" -echo "Summary:" -echo " - Integration tests: completed" -echo "" -if [[ "$RUN_SLOW" == "true" ]]; then - echo "Ran with --runslow flag (full test suite)" -else - echo "Ran in fast mode (skipped slow tests)" -fi diff --git a/scripts/run_unit_tests.sh b/scripts/run_unit_tests.sh deleted file mode 100755 index 1764d33c231..00000000000 --- a/scripts/run_unit_tests.sh +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env bash -# -# Local Unit Test Runner for macOS -# Replicates the GitHub Actions workflow from .github/workflows/tests_unit.yml -# -# Usage: -# ./run_unit_tests.sh # Run all steps (checks + tests) -# ./run_unit_tests.sh --tests-only # Skip code checks, only run tests -# ./run_unit_tests.sh --checks-only # Only run static code checks -# ./run_unit_tests.sh --fast # Run tests without --runslow flag -# ./run_unit_tests.sh --help # Show this help -# - -set -e - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Default options -RUN_CHECKS=true -RUN_TESTS=true -RUN_SLOW=true -PYTHON_VERSION="${PYTHON_VERSION:-3.11}" - -# Handlers to install (from the workflow) -HANDLERS_TO_INSTALL=( - postgres - mysql - salesforce - snowflake - timescaledb - mssql - oracle - redshift - bigquery - web - databricks - statsforecast - chromadb - agents - kb -) - -print_header() { - echo -e "\n${BLUE}============================================${NC}" - echo -e "${BLUE} $1${NC}" - echo -e "${BLUE}============================================${NC}\n" -} - -print_success() { - echo -e "${GREEN}✓ $1${NC}" -} - -print_warning() { - echo -e "${YELLOW}⚠ $1${NC}" -} - -print_error() { - echo -e "${RED}✗ $1${NC}" -} - -show_help() { - echo "Local Unit Test Runner for macOS" - echo "" - echo "Usage: $0 [OPTIONS]" - echo "" - echo "Options:" - echo " --tests-only Skip static code checks, only run unit tests" - echo " --checks-only Only run static code checks (no tests)" - echo " --fast Run tests without --runslow flag (faster)" - echo " --help Show this help message" - echo "" - echo "Environment variables:" - echo " PYTHON_VERSION Python version to use (default: 3.11)" - echo "" - echo "Examples:" - echo " $0 # Run everything" - echo " $0 --tests-only # Just run tests" - echo " $0 --fast # Run fast tests only" - echo "" -} - -# Parse arguments -while [[ $# -gt 0 ]]; do - case $1 in - --tests-only) - RUN_CHECKS=false - shift - ;; - --checks-only) - RUN_TESTS=false - shift - ;; - --fast) - RUN_SLOW=false - shift - ;; - --help|-h) - show_help - exit 0 - ;; - *) - print_error "Unknown option: $1" - show_help - exit 1 - ;; - esac -done - -# Change to repo root -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR" - -print_header "MindsDB Unit Test Runner" - -# Check for uv -if ! command -v uv &> /dev/null; then - print_error "uv is not installed. Install it with: curl -LsSf https://astral.sh/uv/install.sh | sh" - exit 1 -fi -print_success "Found uv: $(uv --version)" - -# Check Python version -PYTHON_PATH=$(uv python find "$PYTHON_VERSION" 2>/dev/null || true) -if [[ -z "$PYTHON_PATH" ]]; then - print_warning "Python $PYTHON_VERSION not found, installing..." - uv python install "$PYTHON_VERSION" - PYTHON_PATH=$(uv python find "$PYTHON_VERSION") -fi -print_success "Using Python: $PYTHON_PATH" - -# Create/activate virtual environment if not already in one -if [[ -z "$VIRTUAL_ENV" ]]; then - if [[ ! -d "env" ]]; then - print_header "Creating virtual environment" - uv venv env --python "$PYTHON_VERSION" - fi - print_success "Activating virtual environment" - source env/bin/activate -fi - -# -# STATIC CODE CHECKS -# -if [[ "$RUN_CHECKS" == "true" ]]; then - print_header "Running Static Code Checks" - - # Check for print statements - echo "Checking for print statements..." - if uv run tests/scripts/check_print_statements.py; then - print_success "No forbidden print statements found" - else - print_error "Found forbidden print statements" - exit 1 - fi - - # Install dev requirements for pre-commit - echo "Installing dev requirements..." - uv pip install -r requirements/requirements-dev.txt - - # Run pre-commit on changed files - echo "Running pre-commit on changed files..." - # Get the base branch (usually main or staging) - BASE_BRANCH=$(git rev-parse --abbrev-ref HEAD@{upstream} 2>/dev/null | sed 's|origin/||' || echo "main") - - if git diff --quiet HEAD 2>/dev/null; then - # No uncommitted changes, run on commits since base - MERGE_BASE=$(git merge-base HEAD "origin/$BASE_BRANCH" 2>/dev/null || echo "HEAD~1") - if pre-commit run --show-diff-on-failure --color=always --from-ref "$MERGE_BASE" --to-ref HEAD; then - print_success "Pre-commit checks passed" - else - print_warning "Pre-commit checks had issues (continuing anyway)" - fi - else - # Has uncommitted changes, run on staged/unstaged files - if pre-commit run --show-diff-on-failure --color=always; then - print_success "Pre-commit checks passed" - else - print_warning "Pre-commit checks had issues (continuing anyway)" - fi - fi - - # Check requirements files - echo "Checking requirements files..." - if uv run tests/scripts/check_requirements.py; then - print_success "Requirements files are valid" - else - print_error "Requirements files have issues" - exit 1 - fi -fi - -# -# INSTALL DEPENDENCIES -# -if [[ "$RUN_TESTS" == "true" ]]; then - print_header "Installing Dependencies" - - # Build handler extras list -HANDLER_EXTRAS=() -for handler in "${HANDLERS_TO_INSTALL[@]}"; do - HANDLER_EXTRAS+=(".[${handler}]") - done - - echo "Installing mindsdb with handlers: ${HANDLERS_TO_INSTALL[*]}" - uv pip install ".[agents,kb]" \ - -r requirements/requirements-test.txt \ - "${HANDLER_EXTRAS[@]}" - - # Clone parser tests - PARSER_VERSION=$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) - if [[ ! -d "parser_tests" ]]; then - echo "Cloning mindsdb_sql_parser tests (v$PARSER_VERSION)..." - git clone --branch "v$PARSER_VERSION" https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests - else - print_success "parser_tests already exists" - fi - - print_success "Dependencies installed" - - # - # RUN UNIT TESTS -# -print_header "Running Unit Tests" - -mkdir -p reports - -if [[ "$RUN_SLOW" == "true" ]]; then - echo "Running unit tests (with slow tests)..." - make unit_tests_slow -else - echo "Running unit tests (fast mode)..." - make unit_tests -fi - - print_success "Unit tests completed" - - # - # COVERAGE REPORT - # - print_header "Coverage Report" - - # Generate handler coverage report - echo "Generating handler coverage report..." - if uv run tests/scripts/check_handler_coverage.py > pytest-coverage-handlers.txt 2>/dev/null; then - print_success "Handler coverage report: pytest-coverage-handlers.txt" - else - print_warning "Could not generate handler coverage report" - fi - - # Generate HTML coverage report - echo "Generating HTML coverage report..." - if COVERAGE_FILE=.coverage.unit uv run coverage html -d reports/htmlcov 2>/dev/null; then - print_success "HTML coverage report: reports/htmlcov/index.html" - - # Open in browser on macOS - if [[ "$(uname)" == "Darwin" ]]; then - echo "Opening coverage report in browser..." - open reports/htmlcov/index.html - fi - else - print_warning "Could not generate HTML coverage report" - fi -fi - -print_header "Done!" -echo "Summary:" -[[ "$RUN_CHECKS" == "true" ]] && echo " - Static code checks: completed" -[[ "$RUN_TESTS" == "true" ]] && echo " - Unit tests: completed" -echo "" -echo "Generated files:" -[[ -f "pytest.xml" ]] && echo " - pytest.xml (JUnit test results)" -[[ -f "coverage.xml" ]] && echo " - coverage.xml (Coverage XML)" -[[ -f "pytest-coverage.txt" ]] && echo " - pytest-coverage.txt (Coverage summary)" -[[ -f "pytest-coverage-handlers.txt" ]] && echo " - pytest-coverage-handlers.txt (Handler coverage)" -[[ -d "reports/htmlcov" ]] && echo " - reports/htmlcov/index.html (HTML coverage report)" diff --git a/scripts/test-artifacts.sh b/scripts/test-artifacts.sh deleted file mode 100755 index f1769663beb..00000000000 --- a/scripts/test-artifacts.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -cd "${REPO_ROOT}" - -HANDLERS_TO_INSTALL=' -postgres -mysql -salesforce -snowflake -timescaledb -mssql -oracle -slack -redshift -bigquery -clickhouse -web -databricks -github -ms_teams -statsforecast -chromadb -confluence -' - -HANDLERS_TO_VERIFY=' -mysql -salesforce -postgres -snowflake -timescaledb -mssql -oracle -slack -file -redshift -bigquery -confluence -' - -INSTALL_HANDLERS=() -while IFS= read -r handler; do - handler=${handler//$'\r'/} - [[ -z "${handler}" || "${handler}" =~ ^[[:space:]]*# ]] && continue - INSTALL_HANDLERS+=("${handler}") -done <<< "${HANDLERS_TO_INSTALL}" - -HANDLER_EXTRAS=() -for handler in "${INSTALL_HANDLERS[@]}"; do - HANDLER_EXTRAS+=(".[${handler}]") -done - -uv pip install ".[agents,kb]" \ - -r requirements/requirements-test.txt \ - "${HANDLER_EXTRAS[@]}" - -uv pip install --force-reinstall onnxruntime==1.20.1 - -# Ensure parser tests are present (required for render tests when --runslow) -if [[ ! -d parser_tests ]]; then - git clone --branch v$(uv pip show mindsdb_sql_parser | grep Version | cut -d ' ' -f 2) \ - https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests -fi - -# Run the exact test target used in CI -make unit_tests_slow - -# Generate the extra artifacts produced in CI -HANDLERS_TO_INSTALL="${HANDLERS_TO_INSTALL}" \ -HANDLERS_TO_VERIFY="${HANDLERS_TO_VERIFY}" \ -COVERAGE_FAIL_UNDER="80" \ -COVERAGE_FILE=.coverage.unit \ - uv run tests/scripts/check_handler_coverage.py > pytest-coverage-handlers.txt -COVERAGE_FILE=.coverage.unit uv run coverage html -d reports/htmlcov - -# Collect artifacts in a single directory -ARTIFACT_DIR="tests_artifacts" -mkdir -p "${ARTIFACT_DIR}" - -for artifact in pytest.xml coverage.xml .coverage.unit pytest-coverage.txt pytest-coverage-handlers.txt; do - if [[ -f "${artifact}" ]]; then - mv "${artifact}" "${ARTIFACT_DIR}/" - fi -done - -if [[ -d reports/htmlcov ]]; then - rm -rf "${ARTIFACT_DIR}/htmlcov" - mv reports/htmlcov "${ARTIFACT_DIR}/htmlcov" -fi diff --git a/setup.py b/setup.py deleted file mode 100644 index c85e4dad8b6..00000000000 --- a/setup.py +++ /dev/null @@ -1,146 +0,0 @@ -import os -import glob - -from setuptools import find_packages, setup - - -class Deps: - pkgs = [] - pkgs_exclude = ["tests", "tests.*"] - new_links = [] - extras = {} - - -about = {} -with open("mindsdb/__about__.py") as fp: - exec(fp.read(), about) - - -with open("README.md", "r", encoding="utf8") as fh: - long_description = fh.read() - - -def expand_requirements_links(requirements: list) -> list: - """Expand requirements that contain links to other requirement files""" - to_add = [] - to_remove = [] - - for requirement in requirements: - if requirement.startswith("-r "): - if os.path.exists(requirement.split()[1]): - with open(requirement.split()[1]) as fh: - to_add += expand_requirements_links([req.strip() for req in fh.read().splitlines()]) - to_remove.append(requirement) - - for req in to_remove: - requirements.remove(req) - for req in to_add: - requirements.append(req) - - return list(set(requirements)) # Remove duplicates - - -def define_deps(): - """Reads requirements.txt requirements-extra.txt files and preprocess it - to be feed into setuptools. - - This is the only possible way (we found) - how requirements.txt can be reused in setup.py - using dependencies from private github repositories. - - Links must be appendend by `-{StringWithAtLeastOneNumber}` - or something like that, so e.g. `-9231` works as well as - `1.1.0`. This is ignored by the setuptools, but has to be there. - - Warnings: - to make pip respect the links, you have to use - `--process-dependency-links` switch. So e.g.: - `pip install --process-dependency-links {git-url}` - - Returns: - list of packages, extras and dependency links. - """ - with open(os.path.normpath("requirements/requirements.txt")) as req_file: - defaults = [req.strip() for req in req_file.read().splitlines()] - - links = [] - requirements = [] - for r in defaults: - if "git+https" in r: - pkg = r.split("#")[-1] - links.append(r + "-9876543210") - requirements.append(pkg.replace("egg=", "")) - else: - requirements.append(r.strip()) - - extra_requirements = {} - full_requirements = [] - for fn in os.listdir(os.path.normpath("./requirements")): - if fn.startswith("requirements-") and fn.endswith(".txt"): - extra_name = fn.replace("requirements-", "").replace(".txt", "") - with open(os.path.normpath(f"./requirements/{fn}")) as fp: - extra = [req.strip() for req in fp.read().splitlines()] - extra_requirements[extra_name] = extra - full_requirements += extra - - extra_requirements["all_extras"] = list(set(full_requirements)) - extra_requirements["all"] = list(set(full_requirements)) - - full_handlers_requirements = [] - handlers_dir_path = os.path.normpath("./mindsdb/integrations/handlers") - for fn in os.listdir(handlers_dir_path): - if os.path.isdir(os.path.join(handlers_dir_path, fn)) and fn.endswith("_handler"): - base_extra_name = fn.replace("_handler", "") - extra_requirements[base_extra_name] = [] - for req_file_path in glob.glob(os.path.join(handlers_dir_path, fn, "requirements*.txt")): - extra_name = base_extra_name - file_name = os.path.basename(req_file_path) - if file_name != "requirements.txt": - extra_name += "-" + file_name.replace("requirements_", "").replace(".txt", "") - - # If requirements.txt in our handler folder, import them as our extra's requirements - with open(req_file_path) as fp: - extra = expand_requirements_links([req.strip() for req in fp.read().splitlines()]) - - extra_requirements[extra_name] = extra - full_handlers_requirements += extra - - extra_requirements["all_handlers_extras"] = list(set(full_handlers_requirements)) - - with open(os.path.normpath("requirements/requirements-opentelemetry.txt")) as req_file: - extra_requirements["opentelemetry"] = [req.strip() for req in req_file.read().splitlines()] - - with open(os.path.normpath("requirements/requirements-langfuse.txt")) as req_file: - extra_requirements["langfuse"] = [req.strip() for req in req_file.read().splitlines()] - - Deps.pkgs = requirements - Deps.extras = extra_requirements - Deps.new_links = links - - return Deps - - -deps = define_deps() - -setup( - name=about["__title__"], - version=about["__version__"], - url=about["__github__"], - download_url=about["__pypi__"], - license=about["__license__"], - author=about["__author__"], - author_email=about["__email__"], - description=about["__description__"], - long_description=long_description, - long_description_content_type="text/markdown", - packages=find_packages(exclude=deps.pkgs_exclude), - install_requires=deps.pkgs, - dependency_links=deps.new_links, - extras_require=deps.extras, - include_package_data=True, - classifiers=[ - "Programming Language :: Python :: 3", - "Operating System :: OS Independent", - ], - python_requires=">=3.10,<3.14", -) diff --git a/tests/README.md b/tests/README.md deleted file mode 100644 index e556e3c853e..00000000000 --- a/tests/README.md +++ /dev/null @@ -1,75 +0,0 @@ -## Test Structure - -Our tests are organized into several subdirectories, each focusing on different aspects of our application: - -* api: Contains tests related to the MindsDB's API endpoints. -* integration: Contains the integration tests -* load: Contains the load tests -* scripts: Scripts and utilitis used for tests -* unit: This directory contains the unit tests: - * handlers: A subset of unit tests specifically targeting data handlers. - * ml_handlers: A subset of unit tests specifically targeting ML handlers. - -## Installation - -To run the tests, you need to install the necessary dependencies. You can do this by running the following command: - -``` -pip install -r requirements/requirements.txt -r requirements/requirements-test.txt -``` - -This command will install all the required packages as listed in requirements-test.txt and the requirements.txt files. - -> Note: You will also need to install the dependencies required by any of the integrations that the tests you are running use. This can be done by running `pip install .[]` in the root directory of the repository. - -## Runing tests - -### Unit and Integration Tests - -To execute unit or integration tests, use the following pytest command: - -``` -pytest -vx -``` - -For example, to run the unit tests for the handlers, use the following command: - -``` -pytest -vx tests/unit/handlers -``` - -### Load Tests - -For load testing, we use `locust`: -``` -locust -f test_start.py --headless -u -r -``` -Options: - - * --headless: Runs Locust in headless mode without the web UI. - * -u: Specifies the total number of users to simulate. - * -r: Defines the spawn rate of users, i.e the number of users to spawn per second. - - > Note: The load tests require an environment variable called `INTEGRATIONS_CONFIG` to be set containing information related to the testing environments used. These environments will also need be pre-loaded with the necessary data for the tests to run successfully. - -## Generating Test Reports - -We use `pytest` for running tests and `coveralls` for generating test coverage reports. To run the tests and generate a coverage report, use the following command: - -``` -pytest --cov= && coveralls -``` - -For example, to run tests for the HTTP API, use the following command: - -``` -pytest --cov=mindsdb/api/http tests/unit/api/http && coveralls -``` - -For the above command to be successful, you need to either have the `COVERALLS_REPO_TOKEN` environment variable set to your Coveralls token or have a `.coveralls.yml` file in the root directory of the repository with the token. - -Otherwise, you simply run the following command: - -``` -pytest --cov= -``` \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 3178a8fd583..00000000000 --- a/tests/conftest.py +++ /dev/null @@ -1,19 +0,0 @@ -import pytest - - -def pytest_addoption(parser): - parser.addoption("--runslow", action="store_true", default=False, help="run slow tests") - - -def pytest_configure(config): - config.addinivalue_line("markers", "slow: mark test as slow to run") - - -def pytest_collection_modifyitems(config, items): - if config.getoption("--runslow"): - # --runslow given in cli: do not skip slow tests - return - skip_slow = pytest.mark.skip(reason="need --runslow option to run") - for item in items: - if "slow" in item.keywords: - item.add_marker(skip_slow) diff --git a/tests/data/movies.csv b/tests/data/movies.csv deleted file mode 100644 index 4d2eb4174ad..00000000000 --- a/tests/data/movies.csv +++ /dev/null @@ -1,11 +0,0 @@ -movieId,title,genres -1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy -2,Jumanji (1995),Adventure|Children|Fantasy -3,Grumpier Old Men (1995),Comedy|Romance -4,Waiting to Exhale (1995),Comedy|Drama|Romance -5,Father of the Bride Part II (1995),Comedy -6,Heat (1995),Action|Crime|Thriller -7,Sabrina (1995),Comedy|Romance -8,Tom and Huck (1995),Adventure|Children -9,Sudden Death (1995),Action -10,GoldenEye (1995),Action|Adventure|Thriller diff --git a/tests/data/test.html b/tests/data/test.html deleted file mode 100644 index 1f410b54a67..00000000000 --- a/tests/data/test.html +++ /dev/null @@ -1,23 +0,0 @@ - - - -
-

Foo

-

Some intro text about Foo.

-
-

Bar main section

-

Some intro text about Bar.

-

Bar subsection 1

-

Some text about the first subtopic of Bar.

-

Bar subsection 2

-

Some text about the second subtopic of Bar.

-
-
-

Baz

-

Some text about Baz

-
-
-

Some concluding text about Foo

-
- - \ No newline at end of file diff --git a/tests/data/test.pdf b/tests/data/test.pdf deleted file mode 100644 index a9c693722ce..00000000000 Binary files a/tests/data/test.pdf and /dev/null differ diff --git a/tests/data/test.txt b/tests/data/test.txt deleted file mode 100644 index 94e7951eb98..00000000000 --- a/tests/data/test.txt +++ /dev/null @@ -1 +0,0 @@ -This is a test plaintext file! \ No newline at end of file diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py deleted file mode 100644 index 7a47f5c93e9..00000000000 --- a/tests/integration/conftest.py +++ /dev/null @@ -1,229 +0,0 @@ -import io -import os -import uuid -from textwrap import dedent - -import pytest -import requests -from filelock import FileLock - -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID - -int_url_split = os.environ.get("INTERNAL_URL", "localhost").split(":") - -INTERNAL_URL = int_url_split[0] -if len(int_url_split) == 1: - HTTP_PORT = "80" if "svc.cluster.local" in INTERNAL_URL else "47334" -else: - HTTP_PORT = int_url_split[1] - -HTTP_API_ROOT = f"http://{INTERNAL_URL}:{HTTP_PORT}/api" -MYSQL_API_ROOT = INTERNAL_URL - -lock = FileLock("train_finetune.lock") - -# Generate unique test session ID to avoid conflicts between test runs -TEST_SESSION_ID = os.environ.get("TEST_SESSION_ID", uuid.uuid4().hex[:8]) - - -def get_test_resource_name(base_name: str) -> str: - """Generate a unique resource name for this test session.""" - return f"{base_name}_{TEST_SESSION_ID}" - - -def get_test_company_id(base_id: int = 1) -> str: - """Generate a unique company ID for this test session. - - Uses underscores instead of hyphens to be SQL-safe when MindsDB - uses these IDs internally for table names or storage paths. - """ - return f"{TEST_SESSION_ID}_0000_0000_0000_{base_id:012d}" - - -def get_test_user_id(base_id: int = 1) -> str: - """Generate a unique user ID for this test session. - - Uses underscores instead of hyphens to be SQL-safe when MindsDB - uses these IDs internally for table names or storage paths. - """ - return f"{TEST_SESSION_ID}_0000_0000_0001_{base_id:012d}" - - -class TestResourceTracker: - """ - Tracks resources created during tests for cleanup. - Only tracks and cleans test-specific resources. - - IMPORTANT: We do NOT track or drop databases/integrations. - We only clean up the specific rows/resources we created: - - Models, Views, ML Engines, Knowledge Bases, Tabs, Files - """ - - def __init__(self): - self.models = set() # (project_name, model_name) tuples - self.views = set() # (project_name, view_name) tuples - self.ml_engines = set() # ML engine names - self.knowledge_bases = set() # KB names - self.tabs = [] # (company_id, user_id, tab_id) tuples - self.files = set() # File names - - def track_model(self, model_name: str, project_name: str = "mindsdb"): - """Track a model for cleanup.""" - self.models.add((project_name, model_name)) - - def track_view(self, view_name: str, project_name: str = "mindsdb"): - """Track a view for cleanup.""" - self.views.add((project_name, view_name)) - - def track_ml_engine(self, name: str): - """Track an ML engine for cleanup.""" - self.ml_engines.add(name) - - def track_knowledge_base(self, name: str): - """Track a knowledge base for cleanup.""" - self.knowledge_bases.add(name) - - def track_tab(self, company_id: str, user_id: str, tab_id: int): - """Track a tab for cleanup.""" - self.tabs.append((company_id, user_id, tab_id)) - - def track_file(self, name: str): - """Track a file for cleanup.""" - self.files.add(name) - - -# Global tracker instance -_resource_tracker = TestResourceTracker() - - -def get_resource_tracker() -> TestResourceTracker: - """Get the global resource tracker.""" - return _resource_tracker - - -@pytest.fixture(scope="session") -def test_session_id(): - """Unique identifier for this test session.""" - return TEST_SESSION_ID - - -@pytest.fixture(scope="session") -def resource_tracker(): - """Resource tracker for cleanup.""" - return _resource_tracker - - -@pytest.fixture(scope="session") -def train_finetune_lock(): - """ - Fixture to lock the training and fine-tuning process for the session. - Because mindsdb can't have multiple models training/fine-tuning at the same time, - """ - return lock - - -def _execute_cleanup_sql(sql: str, company_id: str = DEFAULT_COMPANY_ID, user_id: str = DEFAULT_USER_ID): - """Execute a SQL statement for cleanup, ignoring errors.""" - try: - headers = {"company-id": company_id, "user-id": user_id} - payload = {"query": sql, "context": {}} - response = requests.post(f"{HTTP_API_ROOT}/sql/query", json=payload, headers=headers, timeout=30) - return response.status_code == 200 - except Exception: - return False - - -def _cleanup_resources(tracker: TestResourceTracker): - """ - Clean up all tracked resources. - - Cleanup order respects dependencies: - 1. Tabs - independent user data - 2. Files - independent uploaded files - 3. Knowledge bases - self-contained - 4. Views - may reference data from integrations - 5. Models - depend on ML engines (must delete before ML engines) - 6. ML engines - used by models (delete after models) - - NOTE: We do NOT drop databases/integrations - only the specific - resources (rows) we created during tests. - """ - # 1. Clean up tabs (independent) - for company_id, user_id, tab_id in tracker.tabs: - try: - headers = {"company-id": company_id, "user-id": user_id} - requests.delete(f"{HTTP_API_ROOT}/tabs/{tab_id}", headers=headers, timeout=10) - except Exception: - pass - - # 2. Clean up files (independent) - for file_name in tracker.files: - try: - requests.delete(f"{HTTP_API_ROOT}/files/{file_name}", timeout=10) - except Exception: - pass - - # 3. Clean up knowledge bases (self-contained) - # Use backticks to quote identifiers in case they contain special chars - for kb_name in tracker.knowledge_bases: - _execute_cleanup_sql(f"DROP KNOWLEDGE BASE IF EXISTS `{kb_name}`") - - # 4. Clean up views (reference data, not models) - for project_name, view_name in tracker.views: - _execute_cleanup_sql(f"DROP VIEW IF EXISTS `{project_name}`.`{view_name}`") - - # 5. Clean up models (depend on ML engines - must delete BEFORE ML engines) - for project_name, model_name in tracker.models: - _execute_cleanup_sql(f"DROP MODEL IF EXISTS `{project_name}`.`{model_name}`") - - # 6. Clean up ML engines (used by models - delete AFTER models) - for engine_name in tracker.ml_engines: - _execute_cleanup_sql(f"DROP ML_ENGINE IF EXISTS `{engine_name}`") - - -@pytest.fixture(scope="session", autouse=True) -def session_cleanup(resource_tracker): - """ - Session-scoped fixture that cleans up test resources after all tests complete. - This runs automatically at the end of the test session. - """ - # Yield to run all tests first - yield - - # Cleanup after all tests - _cleanup_resources(resource_tracker) - - -def create_byom(name: str, target_column: str = "test", company_id=None, user_id=None): - headers = {} - if company_id is not None: - headers["company-id"] = str(company_id) - if user_id is not None: - headers["user-id"] = str(user_id) - - def get_file(): - return io.BytesIO( - dedent(f""" - import pandas as pd - class CustomPredictor(): - def train(self, df, target_column, args=None): - pass - def predict(self, df, *args, **kwargs): - return pd.DataFrame([[1]], columns=['{target_column}']) - def describe(self, model_state, attribute): - return 'x' - """).encode() - ) - - response = requests.put( - f"{HTTP_API_ROOT}/handlers/byom/{name}", - files={ - "code": ("test.py", get_file(), "text/x-python"), - }, - data={ - "type": "inhouse", - }, - headers=headers, - ) - if response.status_code not in (200, 409): - raise Exception(f"Error creating BYOM engine: {response.text}") diff --git a/tests/integration/flows/__init__.py b/tests/integration/flows/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/flows/test_company_independent.py b/tests/integration/flows/test_company_independent.py deleted file mode 100644 index c24b4a68168..00000000000 --- a/tests/integration/flows/test_company_independent.py +++ /dev/null @@ -1,271 +0,0 @@ -import json -import pytest - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from tests.integration.utils.http_test_helpers import HTTPHelperMixin -from tests.integration.conftest import get_test_company_id, get_test_user_id, get_test_resource_name, create_byom - -# Use unique company/user IDs to avoid conflicts between test runs -CID_A = get_test_company_id(1) -CID_B = get_test_company_id(2) -USER_ID_A = get_test_user_id(1) -USER_ID_B = get_test_user_id(2) - -# Unique resource names -DB_NAME_A = get_test_resource_name("test_integration_a") -DB_NAME_B = get_test_resource_name("test_integration_b") -ML_ENGINE_NAME = get_test_resource_name("test_comp_ml") -VIEW_NAME_A = get_test_resource_name("test_view_a") -VIEW_NAME_B = get_test_resource_name("test_view_b") -MODEL_NAME_A = get_test_resource_name("model_a") -MODEL_NAME_B = get_test_resource_name("model_b") - - -class TestCompanyIndependent(HTTPHelperMixin): - def get_db_names(self, company_id: str, user_id: str): - response = self.sql_via_http( - "show databases", company_id=company_id, user_id=user_id, expected_resp_type=RESPONSE_TYPE.TABLE - ) - return [x[0].lower() for x in response["data"]] - - def get_tables_in(self, table, company_id: str, user_id: str): - response = self.sql_via_http( - f"SHOW TABLES FROM {table}", company_id=company_id, user_id=user_id, expected_resp_type=RESPONSE_TYPE.TABLE - ) - return [x[0].lower() for x in response["data"]] - - def get_ml_engines(self, company_id: str, user_id: str): - response = self.sql_via_http( - "SHOW ML_ENGINES", company_id=company_id, user_id=user_id, expected_resp_type=RESPONSE_TYPE.TABLE - ) - return [x[0].lower() for x in response["data"]] - - def assert_list(self, a, b): - a = set(a) - b = set(b) - assert len(a) == len(b) - assert a == b - - def test_add_data_db_http(self): - self.sql_via_http("CREATE PROJECT IF NOT EXISTS mindsdb;", company_id=CID_A, user_id=USER_ID_A) - self.sql_via_http("CREATE PROJECT IF NOT EXISTS mindsdb;", company_id=CID_B, user_id=USER_ID_B) - db_details = { - "type": "postgres", - "connection_data": { - "type": "postgres", - "host": "samples.mindsdb.com", - "port": "5432", - "user": "demo_user", - "password": "demo_password", - "database": "demo", - }, - } - - self.sql_via_http( - f"DROP DATABASE IF EXISTS {DB_NAME_A}", - company_id=CID_A, - user_id=USER_ID_A, - expected_resp_type=RESPONSE_TYPE.OK, - ) - self.sql_via_http( - f"DROP DATABASE IF EXISTS {DB_NAME_B}", - company_id=CID_B, - user_id=USER_ID_B, - expected_resp_type=RESPONSE_TYPE.OK, - ) - - self.sql_via_http( - f""" - CREATE DATABASE {DB_NAME_A} - ENGINE '{db_details["type"]}' - PARAMETERS {json.dumps(db_details["connection_data"])} - """, - company_id=CID_A, - user_id=USER_ID_A, - expected_resp_type=RESPONSE_TYPE.OK, - ) - - databases_names_a = self.get_db_names(CID_A, USER_ID_A) - assert "information_schema" in databases_names_a - assert "mindsdb" in databases_names_a - assert DB_NAME_A.lower() in databases_names_a - - databases_names_b = self.get_db_names(CID_B, USER_ID_B) - assert "information_schema" in databases_names_b - assert "mindsdb" in databases_names_b - - self.sql_via_http( - f"DROP DATABASE IF EXISTS {DB_NAME_B}", - company_id=CID_A, - user_id=USER_ID_A, - expected_resp_type=RESPONSE_TYPE.OK, - ) - self.sql_via_http( - f""" - CREATE DATABASE {DB_NAME_B} - ENGINE '{db_details["type"]}' - PARAMETERS {json.dumps(db_details["connection_data"])} - """, - company_id=CID_B, - user_id=USER_ID_B, - expected_resp_type=RESPONSE_TYPE.OK, - ) - - databases_names_a = self.get_db_names(CID_A, USER_ID_A) - assert DB_NAME_A.lower() in databases_names_a - - databases_names_b = self.get_db_names(CID_B, USER_ID_B) - assert DB_NAME_B.lower() in databases_names_b - - self.sql_via_http( - f"DROP DATABASE {DB_NAME_A}", company_id=CID_A, user_id=USER_ID_A, expected_resp_type=RESPONSE_TYPE.OK - ) - - databases_names_a = self.get_db_names(CID_A, USER_ID_A) - assert DB_NAME_A.lower() not in databases_names_a - - databases_names_b = self.get_db_names(CID_B, USER_ID_B) - assert DB_NAME_B.lower() in databases_names_b - - self.sql_via_http( - f""" - CREATE DATABASE {DB_NAME_A} - ENGINE '{db_details["type"]}' - PARAMETERS {json.dumps(db_details["connection_data"])} - """, - company_id=CID_A, - user_id=USER_ID_A, - expected_resp_type=RESPONSE_TYPE.OK, - ) - - databases_names_a = self.get_db_names(CID_A, USER_ID_A) - assert DB_NAME_A.lower() in databases_names_a - - databases_names_b = self.get_db_names(CID_B, USER_ID_B) - assert DB_NAME_B.lower() in databases_names_b - - response = self.sql_via_http( - f"select * from {DB_NAME_A}.demo_data.home_rentals limit 10", - company_id=CID_A, - user_id=USER_ID_A, - expected_resp_type=RESPONSE_TYPE.TABLE, - ) - assert len(response["data"]) == 10 - - response = self.sql_via_http( - f"select * from {DB_NAME_A}.demo_data.home_rentals limit 10", - company_id=CID_B, - user_id=USER_ID_B, - expected_resp_type=RESPONSE_TYPE.ERROR, - ) - - @pytest.mark.skip(reason="Requires ML handler (lightwood removed)") - def test_add_ml_engine(self): - tracker = self.get_resource_tracker() - - for cid, user_id in [(CID_A, USER_ID_A), (CID_B, USER_ID_B)]: - self.sql_via_http( - f"DROP ML_ENGINE IF EXISTS {ML_ENGINE_NAME}", - company_id=cid, - user_id=user_id, - expected_resp_type=RESPONSE_TYPE.OK, - ) - self.sql_via_http( - f"CREATE ML_ENGINE {ML_ENGINE_NAME} FROM test_ml_engine", - company_id=cid, - user_id=user_id, - expected_resp_type=RESPONSE_TYPE.OK, - ) - tracker.track_ml_engine(ML_ENGINE_NAME) - - assert ML_ENGINE_NAME.lower() in self.get_ml_engines(cid, user_id) - - def test_views(self): - tracker = self.get_resource_tracker() - - # Map of (company_id, user_id) -> (db_name, view_name, model_name) - test_configs = [ - (CID_A, USER_ID_A, DB_NAME_A, VIEW_NAME_A, MODEL_NAME_A), - (CID_B, USER_ID_B, DB_NAME_B, VIEW_NAME_B, MODEL_NAME_B), - ] - - for cid, user_id, db_name, view_name, model_name in test_configs: - self.sql_via_http(f"DROP VIEW IF EXISTS {view_name}", company_id=cid, user_id=user_id) - self.sql_via_http(f"DROP MODEL IF EXISTS {model_name}", company_id=cid, user_id=user_id) - self.sql_via_http( - f""" - CREATE VIEW {view_name} - FROM {db_name} ( - select * from demo_data.home_rentals limit 50 - ) - """, - company_id=cid, - user_id=user_id, - expected_resp_type=RESPONSE_TYPE.OK, - ) - tracker.track_view(view_name) - - tables = self.get_tables_in("mindsdb", cid, user_id) - assert "models" in tables - assert view_name.lower() in tables - - for cid, user_id, db_name, view_name, model_name in test_configs: - response = self.sql_via_http( - f"select * from {view_name}", - company_id=cid, - user_id=user_id, - expected_resp_type=RESPONSE_TYPE.TABLE, - ) - assert len(response["data"]) == 50 - - response = self.sql_via_http( - f"DROP VIEW {view_name}", company_id=cid, user_id=user_id, expected_resp_type=RESPONSE_TYPE.OK - ) - - tables = self.get_tables_in("mindsdb", cid, user_id) - assert "models" in tables - assert view_name.lower() not in tables - - self.sql_via_http( - f"select * from {view_name}", - company_id=cid, - user_id=user_id, - expected_resp_type=RESPONSE_TYPE.ERROR, - ) - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_model(self, train_finetune_lock): - tracker = self.get_resource_tracker() - - # Map of (company_id, user_id) -> (db_name, model_name) - test_configs = [ - (CID_A, USER_ID_A, DB_NAME_A, MODEL_NAME_A), - (CID_B, USER_ID_B, DB_NAME_B, MODEL_NAME_B), - ] - - for cid, user_id, db_name, model_name in test_configs: - create_byom("test_ml_engine", target_column="rental_price", company_id=cid, user_id=user_id) - with train_finetune_lock.acquire(timeout=600): - self.sql_via_http( - f""" - CREATE MODEL {model_name} - FROM {db_name} ( - select * from demo_data.home_rentals limit 50 - ) PREDICT rental_price - USING engine='test_ml_engine', join_learn_process=true - """, - company_id=cid, - user_id=user_id, - expected_resp_type=RESPONSE_TYPE.TABLE, - ) - tracker.track_model(model_name) - - response = self.sql_via_http( - f"select * from {model_name} where sqft = 100", - company_id=cid, - user_id=user_id, - expected_resp_type=RESPONSE_TYPE.TABLE, - ) - assert len(response["data"]), 1 diff --git a/tests/integration/flows/test_http.py b/tests/integration/flows/test_http.py deleted file mode 100644 index f50dca05f2e..00000000000 --- a/tests/integration/flows/test_http.py +++ /dev/null @@ -1,620 +0,0 @@ -import json -from typing import List - -import requests -import pytest - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from tests.integration.conftest import HTTP_API_ROOT, create_byom -from tests.integration.utils.http_test_helpers import HTTPHelperMixin - - -class TestHTTP(HTTPHelperMixin): - # Unique resource names for this test session - POSTGRES_DB_NAME = None - MARIADB_DB_NAME = None - MYSQL_DB_NAME = None - MODEL_NAME = None - FILE_NAME = None - - @staticmethod - def get_files_list(): - response = requests.request("GET", f"{HTTP_API_ROOT}/files/") - assert response.status_code == 200 - response_data = response.json() - assert isinstance(response_data, list) - return response_data - - @classmethod - def setup_class(cls): - cls._sql_via_http_context = {} - # Initialize unique resource names for this test session - cls.POSTGRES_DB_NAME = cls.get_unique_name("test_http_postgres") - cls.MARIADB_DB_NAME = cls.get_unique_name("test_http_mariadb") - cls.MYSQL_DB_NAME = cls.get_unique_name("test_http_mysql") - cls.MODEL_NAME = cls.get_unique_name("p_test_http") - cls.FILE_NAME = cls.get_unique_name("movies") - - def create_database(self, name, db_data): - db_type = db_data["type"] - # Drop any existing DB with this name to avoid conflicts - self.sql_via_http(f"DROP DATABASE IF EXISTS {name};", RESPONSE_TYPE.OK) - self.sql_via_http( - f"CREATE DATABASE {name} WITH ENGINE = '{db_type}', PARAMETERS = {json.dumps(db_data['connection_data'])};", - RESPONSE_TYPE.OK, - ) - - def validate_database_creation(self, name): - res = self.sql_via_http(f"SELECT name FROM information_schema.databases WHERE name='{name}';") - assert name in res["data"][0], f"Expected datasource is not found after creation - {name}: {res}" - - @pytest.mark.parametrize("util_uri", ["util/ping", "util/ping_native"]) - def test_utils(self, util_uri): - """ - Call utilities ping endpoint - THEN check the response is success - """ - - path = f"{HTTP_API_ROOT}/{util_uri}" - response = requests.get(path) - assert response.status_code == 200 - - def test_auth(self): - session = requests.Session() - - response = session.get(f"{HTTP_API_ROOT}/status") - assert response.status_code == 200 - assert response.json()["auth"]["http_auth_enabled"] is False - - response = session.get(f"{HTTP_API_ROOT}/config/") - assert response.status_code == 200 - config_payload = response.json() - assert config_payload["auth"]["http_auth_enabled"] is False - original_auth_config = config_payload["auth"].copy() - - test_failed = True - try: - response = session.get(f"{HTTP_API_ROOT}/tree/") - assert response.status_code == 200 - - response = session.put( - f"{HTTP_API_ROOT}/config/", json={"http_auth_enabled": True, "username": "", "password": ""} - ) - assert response.status_code == 400 - - response = session.put( - f"{HTTP_API_ROOT}/config/", - json={"auth": {"http_auth_enabled": True, "username": "mindsdb", "password": "mindsdb"}}, - ) - assert response.status_code == 200 - - response = session.get(f"{HTTP_API_ROOT}/status") - assert response.status_code == 200 - assert response.json()["auth"]["http_auth_enabled"] is True - - response = session.get(f"{HTTP_API_ROOT}/tree/") - assert response.status_code == 401 - - response = session.post(f"{HTTP_API_ROOT}/login", json={"username": "mindsdb", "password": "mindsdb"}) - assert response.status_code == 200 - - token = response.json().get("token") - session.headers.update({"Authorization": f"Bearer {token}"}) - - response = session.get(f"{HTTP_API_ROOT}/tree/") - assert response.status_code == 200 - - response = session.post(f"{HTTP_API_ROOT}/logout") - assert response.status_code == 200 - - response = session.get(f"{HTTP_API_ROOT}/tree/") - assert response.status_code == 401 - - response = session.post(f"{HTTP_API_ROOT}/login", json={"username": "mindsdb", "password": "mindsdb"}) - assert response.status_code == 200 - token = response.json().get("token") - session.headers.update({"Authorization": f"Bearer {token}"}) - - response = session.put( - f"{HTTP_API_ROOT}/config/", - json={"auth": {"http_auth_enabled": False, "username": "mindsdb", "password": ""}}, - ) - - response = session.get(f"{HTTP_API_ROOT}/status") - assert response.status_code == 200 - assert response.json()["auth"]["http_auth_enabled"] is False - test_failed = False - finally: - restore_response = session.put( - f"{HTTP_API_ROOT}/config/", - json={"auth": original_auth_config}, - ) - if not test_failed: - assert restore_response.status_code == 200, ( - f"Failed to restore auth config, received {restore_response.status_code}" - ) - - def test_gui_is_served(self): - """ - GUI downloaded and available - """ - response = requests.get(HTTP_API_ROOT.split("/api")[0]) - assert response.status_code == 200 - assert response.content.decode().find("") > 0 - - def test_files(self): - """sql-via-http: - upload file - delete file - upload file again - """ - file_name = self.FILE_NAME - self.sql_via_http(f"DROP TABLE IF EXISTS files.{file_name};", RESPONSE_TYPE.OK) - assert file_name not in [file["name"] for file in self.get_files_list()] - - with open("tests/data/movies.csv") as f: - files = {"file": (f"{file_name}.csv", f, "text/csv")} - - response = requests.request("PUT", f"{HTTP_API_ROOT}/files/{file_name}", files=files) - assert response.status_code == 200, f"Error uploading file. Response content: {response.content}" - - assert file_name in [file["name"] for file in self.get_files_list()] - # Track for cleanup - self.get_resource_tracker().track_file(file_name) - - response = requests.delete(f"{HTTP_API_ROOT}/files/{file_name}") - assert response.status_code == 200 - assert file_name not in [file["name"] for file in self.get_files_list()] - - # Upload the file again (to guard against bugs where we still think a deleted file exists) - with open("tests/data/movies.csv") as f: - files = {"file": (f"{file_name}.csv", f, "text/csv")} - - response = requests.request("PUT", f"{HTTP_API_ROOT}/files/{file_name}", files=files) - assert response.status_code == 200 - # Track for cleanup again - self.get_resource_tracker().track_file(file_name) - - def test_sql_select_from_file(self): - file_name = self.FILE_NAME - self.sql_via_http("use mindsdb", RESPONSE_TYPE.OK) - resp = self.sql_via_http(f"select * from files.{file_name}", RESPONSE_TYPE.TABLE) - assert len(resp["data"]) == 10 - assert len(resp["column_names"]) == 3 - - resp = self.sql_via_http( - f"select title, title as t1, title t2 from files.{file_name} limit 10", RESPONSE_TYPE.TABLE - ) - assert len(resp["data"]) == 10 - assert resp["column_names"] == ["title", "t1", "t2"] - assert resp["data"][0][0] == resp["data"][0][1] and resp["data"][0][0] == resp["data"][0][2] - - def test_sql_general_syntax(self): - """test sql in general""" - select_const_int = [ - "select 1", - "select 1;", - "SELECT 1", - "Select 1", - " select 1 ", - """ select - 1; - """, - ] - select_const_int_alias = [ - "select 1 as `2`", - # "select 1 as '2'", https://github.com/mindsdb/mindsdb_sql/issues/198 - 'select 1 as "2"', - "select 1 `2`", - # "select 1 '2'", https://github.com/mindsdb/mindsdb_sql/issues/198 - 'select 1 "2"', - ] - select_const_str = ['select "a"', "select 'a'"] - select_const_str_alias = [ - 'select "a" as b', - "select 'a' as b", - 'select "a" b', - # 'select "a" "b"', # => ab - 'select "a" `b`', - # "select 'a' 'b'" # => ab - ] - bunch = [ - {"queries": select_const_int, "result": 1, "alias": "1"}, - {"queries": select_const_int_alias, "result": 1, "alias": "2"}, - {"queries": select_const_str, "result": "a", "alias": "a"}, - {"queries": select_const_str_alias, "result": "a", "alias": "b"}, - ] - for group in bunch: - queries = group["queries"] - expected_result = group["result"] - expected_alias = group["alias"] - for query in queries: - print(query) - resp = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - try: - assert len(resp["column_names"]) == 1 - assert resp["column_names"][0] == expected_alias - assert len(resp["data"]) == 1 - assert len(resp["data"][0]) == 1 - assert resp["data"][0][0] == expected_result - except Exception: - print(f"Error in query: {query}") - raise - - def test_context_changing(self): - file_name = self.FILE_NAME - resp = self.sql_via_http("use mindsdb", RESPONSE_TYPE.OK) - assert resp["context"]["db"] == "mindsdb" - - resp_1 = self.sql_via_http("show tables", RESPONSE_TYPE.TABLE) - table_names = [x[0] for x in resp_1["data"]] - assert file_name not in table_names - assert "models" in table_names - - resp = self.sql_via_http("use files", RESPONSE_TYPE.OK) - assert resp["context"]["db"] == "files" - - resp_4 = self.sql_via_http("show tables", RESPONSE_TYPE.TABLE) - table_names = [x[0] for x in resp_4["data"]] - assert file_name in table_names - assert "models" not in table_names - - @pytest.mark.parametrize( - "query", - [ - "show function status", - "show function status where db = 'mindsdb'", - "show procedure status", - "show procedure status where db = 'mindsdb'", - "show warnings", - ], - ) - def test_special_queries_empty_table(self, query): - resp = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - assert len(resp["data"]) == 0 - - @pytest.mark.parametrize( - "query", - [ - "show databases", - "show schemas", - "show variables", - "show session status", - "show global variables", - "show engines", - "show charset", - "show collation", - ], - ) - def test_special_queries_not_empty_table(self, query): - resp = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - assert len(resp["data"]) > 0 - - def test_special_queries_show_databases(self): - query = "show databases" - resp = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - assert len(resp["column_names"]) == 1 - assert resp["column_names"][0] == "Database" - db_names = [x[0].lower() for x in resp["data"]] - assert "information_schema" in db_names - assert "mindsdb" in db_names - assert "files" in db_names - - def test_show_tables(self): - self.sql_via_http("use mindsdb", RESPONSE_TYPE.OK) - resp_1 = self.sql_via_http("show tables", RESPONSE_TYPE.TABLE) - resp_2 = self.sql_via_http("show tables from mindsdb", RESPONSE_TYPE.TABLE) - resp_3 = self.sql_via_http("show full tables from mindsdb", RESPONSE_TYPE.TABLE) - assert resp_1["data"].sort() == resp_2["data"].sort() - assert resp_1["data"].sort() == resp_3["data"].sort() - - def test_create_postgres_datasources(self): - db_details = { - "type": "postgres", - "connection_data": { - "host": "samples.mindsdb.com", - "port": "5432", - "user": "demo_user", - "password": "demo_password", - "database": "demo", - }, - } - self.create_database(self.POSTGRES_DB_NAME, db_details) - self.validate_database_creation(self.POSTGRES_DB_NAME) - - def test_create_mariadb_datasources(self): - db_details = { - "type": "mariadb", - "connection_data": { - "type": "mariadb", - "host": "samples.mindsdb.com", - "port": "3307", - "user": "demo_user", - "password": "demo_password", - "database": "test_data", - }, - } - self.create_database(self.MARIADB_DB_NAME, db_details) - self.validate_database_creation(self.MARIADB_DB_NAME) - - def test_create_mysql_datasources(self): - db_details = { - "type": "mysql", - "connection_data": { - "type": "mysql", - "host": "samples.mindsdb.com", - "port": "3306", - "user": "user", - "password": "MindsDBUser123!", - "database": "public", - }, - } - self.create_database(self.MYSQL_DB_NAME, db_details) - self.validate_database_creation(self.MYSQL_DB_NAME) - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_sql_create_predictor(self, train_finetune_lock): - model_name = self.MODEL_NAME - postgres_db = self.POSTGRES_DB_NAME - - self.sql_via_http("USE mindsdb;", RESPONSE_TYPE.OK) - self.sql_via_http(f"DROP MODEL IF EXISTS {model_name};", RESPONSE_TYPE.OK) - - create_byom("test_ml_engine", target_column="rental_price") - with train_finetune_lock.acquire(timeout=600): - self.sql_via_http( - f""" - create predictor {model_name} - from {postgres_db} (select sqft, location, rental_price from demo_data.home_rentals limit 30) - predict rental_price - using engine='test_ml_engine' - """, - RESPONSE_TYPE.TABLE, - ) - # Track for cleanup - self.get_resource_tracker().track_model(model_name) - status = self.await_model(model_name, timeout=120) - assert status == "complete" - - resp = self.sql_via_http( - f""" - select * from mindsdb.{model_name} where sqft = 1000 - """, - RESPONSE_TYPE.TABLE, - ) - sqft_index = resp["column_names"].index("sqft") - rental_price_index = resp["column_names"].index("rental_price") - assert len(resp["data"]) == 1 - assert resp["data"][0][sqft_index] == 1000 - assert resp["data"][0][rental_price_index] > 0 - - def test_list_projects(self): - project_name = "mindsdb" - response = self.api_request("get", "/projects") - assert response.status_code == 200, "Error to get list of projects" - - projects = [i["name"] for i in response.json()] - assert project_name in projects - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_list_models(self): - project_name = "mindsdb" - model_name = self.MODEL_NAME - response = self.api_request("get", f"/projects/{project_name}/models") - assert response.status_code == 200, "Error to get list of models" - models = [i["name"] for i in response.json()] - assert model_name in models - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_make_prediction(self): - project_name = "mindsdb" - model_name = self.MODEL_NAME - payload = {"data": [{"sqft": "1000"}, {"sqft": "500"}]} - response = self.api_request("post", f"/projects/{project_name}/models/{model_name}/predict", payload=payload) - assert response.status_code == 200, "Error to make prediction" - - # 1 prediction result (test byom return always 1 row) - assert len(response.json()) == 1 - - # 1st version of model - response = self.api_request("post", f"/projects/{project_name}/models/{model_name}.1/predict", payload=payload) - assert response.status_code == 200, "Error to make prediction" - - assert len(response.json()) == 1 - - def test_tabs(self): - # Use unique company/user IDs to avoid conflicts with other test runs - COMPANY_1_ID = self.get_unique_company_id(1) - COMPANY_2_ID = self.get_unique_company_id(2) - - USER_ID_1 = self.get_unique_user_id(1) - USER_ID_2 = self.get_unique_user_id(2) - - tracker = self.get_resource_tracker() - - def tabs_requets( - method: str, - url: str = "", - payload: dict = {}, - company_id: str = "0", - user_id: str = "0", - expected_status: int = 200, - ): - resp = self.api_request( - method, - f"/tabs/{url}", - payload=payload, - headers={"company-id": str(company_id), "user-id": str(user_id)}, - ) - assert resp.status_code == expected_status - return resp - - def compare_tabs(ta: dict, tb: dict) -> bool: - for key in ("id", "index", "name", "content"): - if ta.get(key) != tb.get(key): - return False - return True - - def compare_tabs_list(list_a: List[dict], list_b: List[dict]) -> bool: - if len(list_a) != len(list_b): - return False - for i in range(len(list_a)): - if compare_tabs(list_a[i], list_b[i]) is False: - return False - return True - - def tab(company_id: str, user_id: str, tab_number: int): - return { - "name": f"tab_name_{company_id}_{user_id}_{tab_number}", - "content": f"tab_content_{company_id}_{user_id}_{tab_number}", - } - - # users has empty tabs list - for company_id, user_id in ((COMPANY_1_ID, USER_ID_1), (COMPANY_2_ID, USER_ID_2)): - resp = tabs_requets("get", "?mode=new", company_id=company_id) - # Delete all tabs to begin with - for t in resp.json(): - tabs_requets("delete", str(t["id"]), company_id=company_id, user_id=user_id) - # Check that all tabs are deleted - resp = tabs_requets("get", company_id=company_id, user_id=user_id) - assert len(resp.json()) == 0 - - # add tab and check fields - tab_1_1 = tab(COMPANY_1_ID, USER_ID_1, 1) - tabs_requets("post", "?mode=new", payload=tab_1_1, company_id=COMPANY_1_ID, user_id=USER_ID_1) - resp_list = tabs_requets("get", "?mode=new", company_id=COMPANY_1_ID, user_id=USER_ID_1).json() - # Track tabs for cleanup - for t in resp_list: - tracker.track_tab(COMPANY_1_ID, USER_ID_1, t["id"]) - assert len(resp_list) == 1 - resp_1_1 = resp_list[0] - assert resp_1_1["name"] == tab_1_1["name"] - assert resp_1_1["content"] == tab_1_1["content"] - assert isinstance(resp_1_1["id"], int) - assert isinstance(resp_1_1["index"], int) - tab_1_1["id"] = resp_1_1["id"] - tab_1_1["index"] = resp_1_1["index"] - - # second list is empty - resp = tabs_requets("get", "?mode=new", company_id=COMPANY_2_ID, user_id=USER_ID_2).json() - assert len(resp) == 0 - - # add tab to second user - tab_2_1 = tab(COMPANY_2_ID, USER_ID_2, 1) - tabs_requets("post", "?mode=new", payload=tab_2_1, company_id=COMPANY_2_ID, user_id=USER_ID_2) - resp_list = tabs_requets("get", "?mode=new", company_id=COMPANY_2_ID, user_id=USER_ID_2).json() - assert len(resp_list) == 1 - resp_2_1 = resp_list[0] - assert resp_2_1["name"] == tab_2_1["name"] - assert resp_2_1["content"] == tab_2_1["content"] - tab_2_1["id"] = resp_2_1["id"] - tab_2_1["index"] = resp_2_1["index"] - # Track tab for cleanup - tracker.track_tab(COMPANY_2_ID, USER_ID_2, tab_2_1["id"]) - - # add few tabs for tests - tab_1_2 = tab(COMPANY_1_ID, USER_ID_1, 2) - tab_2_2 = tab(COMPANY_2_ID, USER_ID_2, 2) - for tab_dict, company_id, uid in ((tab_1_2, COMPANY_1_ID, USER_ID_1), (tab_2_2, COMPANY_2_ID, USER_ID_2)): - tab_meta = tabs_requets("post", "?mode=new", payload=tab_dict, company_id=company_id, user_id=uid).json()[ - "tab_meta" - ] - tab_dict["id"] = tab_meta["id"] - tab_dict["index"] = tab_meta["index"] - # Track tab for cleanup - tracker.track_tab(company_id, uid, tab_meta["id"]) - - resp_list = tabs_requets("get", "?mode=new", company_id=COMPANY_1_ID, user_id=USER_ID_1).json() - assert compare_tabs_list(resp_list, [tab_1_1, tab_1_2]) - - resp_list = tabs_requets("get", "?mode=new", company_id=COMPANY_2_ID, user_id=USER_ID_2).json() - assert compare_tabs_list(resp_list, [tab_2_1, tab_2_2]) - - # add tab to second index - tab_1_3 = tab(COMPANY_1_ID, USER_ID_1, 3) - tab_1_3["index"] = tab_1_1["index"] + 1 - tab_meta = tabs_requets( - "post", "?mode=new", payload=tab_1_3, company_id=COMPANY_1_ID, user_id=USER_ID_1 - ).json()["tab_meta"] - tab_1_3["id"] = tab_meta["id"] - # Track tab for cleanup - tracker.track_tab(COMPANY_1_ID, USER_ID_1, tab_meta["id"]) - tabs_list = tabs_requets("get", "?mode=new", company_id=COMPANY_1_ID, user_id=USER_ID_1).json() - assert len(tabs_list) == 3 - tab_1_1["index"] = tabs_list[0]["index"] - tab_1_3["index"] = tabs_list[1]["index"] - tab_1_2["index"] = tabs_list[2]["index"] - assert compare_tabs_list(tabs_list, [tab_1_1, tab_1_3, tab_1_2]) - assert tab_1_1["index"] < tab_1_3["index"] < tab_1_2["index"] - - # update tab content and index - tab_1_2["index"] = tab_1_1["index"] + 1 - tab_1_2["content"] = tab_1_2["content"] + "_new" - tab_meta = tabs_requets( - "put", - str(tab_1_2["id"]), - payload={"index": tab_1_2["index"], "content": tab_1_2["content"]}, - company_id=COMPANY_1_ID, - user_id=USER_ID_1, - ).json()["tab_meta"] - assert tab_meta["index"] == tab_1_2["index"] - assert tab_meta["name"] == tab_1_2["name"] - assert tab_meta["id"] == tab_1_2["id"] - tabs_list = tabs_requets("get", "?mode=new", company_id=COMPANY_1_ID, user_id=USER_ID_1).json() - tab_1_3["index"] = tab_1_2["index"] + 1 - assert compare_tabs_list(tabs_list, [tab_1_1, tab_1_2, tab_1_3]) - - # update tab content and name - tab_1_2["content"] = tab_1_2["content"] + "_new" - tab_1_2["name"] = tab_1_2["name"] + "_new" - tabs_requets( - "put", - str(tab_1_2["id"]), - payload={"name": tab_1_2["name"], "content": tab_1_2["content"]}, - company_id=COMPANY_1_ID, - user_id=USER_ID_1, - ) - tabs_list = tabs_requets("get", "?mode=new", company_id=COMPANY_1_ID, user_id=USER_ID_1).json() - assert compare_tabs_list(tabs_list, [tab_1_1, tab_1_2, tab_1_3]) - - # second list does not changed - tabs_list = tabs_requets("get", "?mode=new", company_id=COMPANY_2_ID, user_id=USER_ID_2).json() - assert compare_tabs_list(tabs_list, [tab_2_1, tab_2_2]) - - # get each tab one by one - for company_id, user_id, tabs in ( - (COMPANY_1_ID, USER_ID_1, [tab_1_1, tab_1_2, tab_1_3]), - (COMPANY_2_ID, USER_ID_2, [tab_2_1, tab_2_2]), - ): - for tab_dict in tabs: - tab_resp = tabs_requets("get", str(tab_dict["id"]), company_id=company_id, user_id=user_id).json() - assert compare_tabs(tab_resp, tab_dict) - - # check failures - tabs_requets("get", "99", company_id=COMPANY_1_ID, user_id=USER_ID_1, expected_status=404) - tabs_requets("delete", "99", company_id=COMPANY_1_ID, user_id=USER_ID_1, expected_status=404) - tabs_requets( - "post", - "?mode=new", - payload={"whaaat": "?", "name": "test"}, - company_id=COMPANY_1_ID, - user_id=USER_ID_1, - expected_status=400, - ) - tabs_requets( - "put", "99", payload={"name": "test"}, company_id=COMPANY_1_ID, user_id=USER_ID_1, expected_status=404 - ) - tabs_requets( - "put", - str(tab_1_1["id"]), - payload={"whaaat": "?"}, - company_id=COMPANY_1_ID, - user_id=USER_ID_1, - expected_status=400, - ) diff --git a/tests/integration/flows/test_knowledge_base.py b/tests/integration/flows/test_knowledge_base.py deleted file mode 100644 index 6088b0c58e1..00000000000 --- a/tests/integration/flows/test_knowledge_base.py +++ /dev/null @@ -1,437 +0,0 @@ -import json -import time -import datetime as dt -import os - -import pytest -import mindsdb_sdk - -from mindsdb.utilities import log -from tests.integration.conftest import HTTP_API_ROOT - - -logger = log.getLogger(__name__) - - -class HiddenVar(str): - """ - Doesn't show value of var in console - """ - - def __repr__(self): - return "..." - - -def get_configurations(): - storages = [ - # default storage - {"engine": "default"} - ] - - if "OPENAI_API_KEY" in os.environ: - embedding_model = { - "provider": "openai", - "model_name": "text-embedding-ada-002", - "api_key": HiddenVar(os.environ["OPENAI_API_KEY"]), - } - for storage in storages: - name = f"{storage['engine']}-{embedding_model['provider']}" - yield pytest.param(storage, embedding_model, id=name) - - # TODO: block for enabling bedrock llm provider (after defining AWS_ACCESS_KEY and AWS_SECRET_KEY) - # if "AWS_ACCESS_KEY" in os.environ and "AWS_SECRET_KEY" in os.environ: - # embedding_model = { - # "provider": "bedrock", - # "model_name": "amazon.titan-embed-text-v2:0", - # "aws_access_key_id": HiddenVar(os.environ["AWS_ACCESS_KEY"]), - # "aws_secret_access_key": HiddenVar(os.environ["AWS_SECRET_KEY"]), - # "aws_region_name": os.environ.get("AWS_REGION", "us-east-1"), - # } - # for storage in storages: - # name = f"{storage['engine']}-{embedding_model['provider']}" - # yield pytest.param(storage, embedding_model, id=name) - - -def get_rerank_configurations(): - # configurations with reranking model - - configurations = [] - for params in get_configurations(): - if isinstance(params, list): - storage, embedding_model = params - else: - # is pytest.param - storage, embedding_model = params.values - - # TODO: block for enabling gemini llm provider - # if "GEMINI_API_KEY" in os.environ: - # reranking_model = { - # "provider": "gemini", - # "model_name": "gemini-2.0-flash", - # "api_key": HiddenVar(os.environ["GEMINI_API_KEY"]), - # } - # configurations.append([storage, embedding_model, reranking_model]) - - if embedding_model["provider"] == "openai": - reranking_model = embedding_model.copy() - reranking_model["model_name"] = "gpt-4" - configurations.append([storage, embedding_model, reranking_model]) - elif embedding_model["provider"] == "bedrock": - reranking_model = embedding_model.copy() - reranking_model["model_name"] = "mistral.mistral-large-2402-v1:0" - configurations.append([storage, embedding_model, reranking_model]) - else: - configurations.append([storage, embedding_model, None]) - - for storage, embedding_model, reranking_model in configurations: - name = f"{storage['engine']}-{embedding_model['provider']}-{reranking_model.get('provider', 'x')}" - yield pytest.param(storage, embedding_model, reranking_model, id=name) - - -class KBTestBase: - @classmethod - def setup_class(cls): - cls.con = mindsdb_sdk.connect(HTTP_API_ROOT.removesuffix("/api")) - - cls.create_example_db() - - @classmethod - def create_example_db(cls): - name = "example_db" - - try: - cls.con.databases.get(name) - return name - - except AttributeError: - pass - - cls.con.databases.create( - name, - engine="postgres", - connection_args={ - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo", - "schema": "demo_data", - }, - ) - return name - - def create_vector_db(self, connection_args, name): - connection_args = connection_args.copy() - engine = connection_args.pop("engine") - - # TODO update database parameters. for now keep existing connection - # try: - # self.con.databases.drop(name) - # except RuntimeError as e: - # if "Database does not exists" not in str(e): - # raise e - - try: - self.con.databases.create(name, engine=engine, connection_args=connection_args) - except RuntimeError: - ... - - return name - - def run_sql(self, sql): - logger.debug(">>>", sql) - resp = self.con.query(sql).fetch() - logger.debug("--- response ---") - logger.debug(resp) - return resp - - def create_kb(self, name, storage, embedding_model, reranking_model=None, params=None): - # remove if exists - db_name = f"db_{name}" - table_name = "test_table" - - # -- drop kb -- - try: - kb = self.con.knowledge_bases.get(name) - db_name = kb.storage.db.name - table_name = kb.storage.name - - self.con.knowledge_bases.drop(name) - except Exception: - ... - - # -- drop db -- - - try: - db = self.con.databases.get(db_name) - except Exception: - db = None - - if db is not None: - try: - db.tables.drop(table_name) - except Exception: - ... - try: - self.con.databases.drop(db_name) - except Exception: - ... - - # -- create -- - - # prepare KB - kb_params = { - "embedding_model": embedding_model, - # "metadata_columns": ["status", "category"], - # "content_columns": ["message_body"], - # "id_column": "id", - } - if params is not None: - kb_params.update(params) - - if reranking_model is not None: - kb_params["reranking_model"] = reranking_model - - param_str = "" - if kb_params: - param_items = [] - for k, v in kb_params.items(): - param_items.append(f"{k}={json.dumps(v)}") - param_str = ",".join(param_items) - - if storage["engine"] != "default": - self.create_vector_db(storage, db_name) - - param_str += f", storage = {db_name}.{table_name}" - - # Clean up any existing KB with the same name before creating - self.run_sql(f"DROP KNOWLEDGE BASE IF EXISTS {name}") - - self.run_sql(f""" - create knowledge base {name} - using {param_str} - """) - - -class TestKB(KBTestBase): - @pytest.mark.parametrize("storage, embedding_model, reranking_model", get_rerank_configurations()) - def test_base_syntax(self, storage, embedding_model, reranking_model): - # --- Test data ingestion --- - kb_name = f"test_{storage['engine']}_kb_crm" - - def to_date(s): - return dt.datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f") - - # Create KB and start load in thread - self.create_kb( - kb_name, - storage, - embedding_model, - reranking_model, - params={ - "metadata_columns": ["status", "category"], - "content_columns": ["message_body"], - "id_column": "pk", - }, - ) - - logger.debug("start loading") - count_rows = 30 - ret = self.run_sql(f""" - insert into {kb_name} - select * from example_db.demo.crm_demo - order by pk - limit {count_rows} - using batch_size=10, track_column=pk - """) - - if "ID" not in ret.columns: - raise RuntimeError("Query is not partitioned") - - duration = None - for i in range(100): # 200 sec min max - time.sleep(1) - - ret = self.run_sql(f"describe knowledge base {kb_name}") - record = ret.iloc[0] - - if record["INSERT_FINISHED_AT"] is not None: - duration = (to_date(record["INSERT_FINISHED_AT"]) - to_date(record["INSERT_STARTED_AT"])).seconds - logger.debug(f"loading completed in {duration}s") - break - - if record["ERROR"] is not None: - raise RuntimeError(record["ERROR"]) - if duration is None: - raise RuntimeError("Timeout to finish query") - - ret = self.run_sql(f"select * from {kb_name}") - assert len(ret) == count_rows - - # --- test metadata --- - - # -- Metadata search - ret = self.run_sql(f""" - SELECT * - FROM {kb_name} - WHERE category = "Battery"; - """) - assert set(ret.metadata.apply(lambda x: x.get("category"))) == {"Battery"} - - ret = self.run_sql(f""" - SELECT * - FROM {kb_name} - WHERE status = "solving" AND category = "Battery" - """) - assert set(ret.metadata.apply(lambda x: x.get("category"))) == {"Battery"} - assert set(ret.metadata.apply(lambda x: x.get("status"))) == {"solving"} - - # -- Content + metadata search with limit - ret = self.run_sql(f""" - SELECT * - FROM {kb_name} - WHERE status = "solving" AND content = "noise" and reranking=false - LIMIT 5; - """) - assert set(ret.metadata.apply(lambda x: x.get("status"))) == {"solving"} - assert "noise" in ret.chunk_content[0] - assert len(ret) == 5 - - # -- Content + metadata search with limit and re-ranking threshold - ret = self.run_sql(f""" - SELECT * - FROM {kb_name} - WHERE status = "solving" - AND content = "noise" AND reranking=false AND relevance>=0.5 - """) - assert set(ret.metadata.apply(lambda x: x.get("status"))) == {"solving"} - assert "noise" in ret.chunk_content[0] # first line contents word - assert len(ret[ret.relevance < 0.5]) == 0 - - # checking columns - for column in ["id", "chunk_content", "metadata", "distance", "relevance"]: - assert column in ret.columns, f"Column {column} does not exist in response" - - if storage["engine"] == "pgvector": - # some operators don't work with chromadb - - # like / not like - ret = self.run_sql(f"select id, metadata, chunk_content from {kb_name} where category like '%ttery'") - assert len([row for _, row in ret.iterrows() if "Battery" not in str(row["metadata"])]) == 0 - - ret = self.run_sql(f"select id, metadata, chunk_content from {kb_name} where category not like '%ttery'") - assert len([row for _, row in ret.iterrows() if "Battery" in str(row["metadata"])]) == 0 - - # -------- insert values ------------- - - logger.debug("insert from values") - for i in range(2): - # do it twice second time it will be updated - self.run_sql(f""" - insert into {kb_name} (pk, message_body) values - (1000, 'Help'), (1001, 'Thank you'), (1002, 'Thank you') - """) - count_rows += 3 - - ret = self.run_sql(f"select * from {kb_name}") - assert len(ret) == count_rows - - # ---------- selecting by id -------- - - logger.debug("filter by id") - ret = self.run_sql(f"select id, chunk_content from {kb_name} where id = 1001") - assert len(ret) == 1 - assert ret["chunk_content"][0] == "Thank you" - - ret = self.run_sql(f"select id, chunk_content from {kb_name} where id != 1000 limit 4") - assert len(ret) == 4 - assert 1000 not in ret["id"] - - # in, not in - ret = self.run_sql(f"select id, chunk_content from {kb_name} where id in (1001, 1000)") - assert len(ret) == 2 - assert set(ret["id"]) == {1000, 1001} - - ret = self.run_sql(f"select id, chunk_content from {kb_name} where id not in ('1001', '1000') limit 4") - assert len(ret) == 4 - assert "1000" not in list(ret["id"]) - - logger.debug("outer query") - ret = self.run_sql( - f"select chunk_content, count(1) count, max(id) max from {kb_name} where id > 999 group by chunk_content order by max(id) desc" - ) - assert len(ret) == 2 - assert ret["max"][0] == 1002 - assert ret["count"][0] == 2 - assert ret["chunk_content"][0] == "Thank you" - - # ------------------- join with table ------------- - ret = self.run_sql(f""" - select k.chunk_content, t.message_body, k.id, t.pk - from {kb_name} k - join example_db.demo.crm_demo t on t.pk = k.id - where k.content = 'Help' and k.id not in (1000, 1001, 1002) - and k.reranking=false - limit 4 - """) - - row = ret.iloc[0] - assert row["chunk_content"] == row["message_body"] - assert row["id"] == row["pk"] - - # ----------------- delete/update data --------------- - - # delete - self.run_sql(f"delete from {kb_name} where id = 1") - ret = self.run_sql(f"select * from {kb_name} where id = 1") - assert len(ret) == 0 - - self.run_sql(f"delete from {kb_name} where id in (1001, 2)") - ret = self.run_sql(f"select * from {kb_name} where id in (1001, 2)") - assert len(ret) == 0 - - # update - ret = self.run_sql(f"select * from {kb_name} where id = 1002") - chunk_id = ret["chunk_id"][0] - - self.run_sql(f"update {kb_name} set content = 'FINE' where chunk_id = '{chunk_id}'") - ret = self.run_sql(f"select * from {kb_name} where id = 1002") - assert len(ret) == 1 - assert ret["chunk_content"][0] == "FINE" - - # TODO update by id don't work - # should it update all chunks? - - if reranking_model is None: - return - - # ----------------- search with reranking --------------- - - threshold = 0.4 - ret = self.run_sql(f""" - SELECT * - FROM {kb_name} - WHERE status = "solving" AND content = "noise" AND relevance>={threshold} - """) - assert set(ret.metadata.apply(lambda x: x.get("status"))) == {"solving"} - for item in ret.chunk_content: - assert "noise" in item # all lines line contents word - - assert len(ret[ret.relevance < threshold]) == 0 - - # --- evaluate --- - - ret = self.run_sql(f""" - Evaluate knowledge base {kb_name} - using - test_table = files.test_eval_{kb_name}, - generate_data = {{ - 'from_sql': 'SELECT message_body content, pk id FROM example_db.demo.crm_demo order by pk limit 30', - 'count': 2 - }}, - evaluate=true - """) - # at least one found - assert ret["total_found"][0] > 0 - test_df = self.run_sql(f"select * from files.test_eval_{kb_name}") - assert len(test_df) == ret["total"][0] diff --git a/tests/integration/flows/test_mysql_api.py b/tests/integration/flows/test_mysql_api.py deleted file mode 100644 index 70b716a49b3..00000000000 --- a/tests/integration/flows/test_mysql_api.py +++ /dev/null @@ -1,741 +0,0 @@ -import os -import time -import json -import tempfile -import datetime -from decimal import Decimal - -import pytest -import requests -import mysql.connector -import pandas as pd - -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import DATA_C_TYPE_MAP, MYSQL_DATA_TYPE, FIELD_FLAG - -from tests.integration.conftest import MYSQL_API_ROOT, HTTP_API_ROOT, get_test_resource_name, create_byom - -# pymysql.connections.DEBUG = True - - -ML_ENGINE_NAME = "my_byom_engine" - - -class Dlist(list): - """Service class for convenient work with list of dicts(db response). - Assumes keys are already normalized to lowercase.""" - - def __contains__(self, item): - if len(self) == 0: - return False - if item in self.__getitem__(0): - return True - return False - - def get_record(self, key, value): - if key in self: - for x in self: - if x[key] == value: - return x - return None - - -class BaseStuff: - """Contais some helpful set of methods and attributes for tests execution.""" - - predictor_name = "home_rentals" - file_datasource_name = "from_files" - - def query(self, _query, encoding="utf-8", with_description=False): - description = None - with mysql.connector.connect( - host=MYSQL_API_ROOT, - port=47335, - database="mindsdb", - user="mindsdb", - ) as cnx: - # Force mysql to use either the text or binary protocol - with cnx.cursor(prepared=self.use_binary) as cursor: - for subquery in _query.split(";"): - # multiple queries in one string - if subquery.strip() == "": - continue - cursor.execute(subquery) - - if cursor.description: - description = cursor.description - columns = [i[0].lower() for i in cursor.description] - data = cursor.fetchall() - - res = Dlist() - for row in data: - res.append(dict(zip(columns, row))) - - else: - res = {} - - if with_description: - return res, description - return res - - def create_database(self, name, db_data): - db_type = db_data["type"] - # Drop any existing DB with this name to avoid conflicts - self.query(f"DROP DATABASE IF EXISTS {name};") - self.query( - f"CREATE DATABASE {name} WITH ENGINE = '{db_type}', PARAMETERS = {json.dumps(db_data['connection_data'])};" - ) - - def upload_ds(self, df, name): - """Upload pandas df as csv file.""" - self.query(f"DROP TABLE IF EXISTS files.{name};") - with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as f: - df.to_csv(f, index=False) - filename = f.name - f.close() - - with open(filename, "r") as f: - url = f"{HTTP_API_ROOT}/files/{name}" - files = {"file": (f"{name}.csv", f, "text/csv")} - res = requests.put(url, files=files) - res.raise_for_status() - - def verify_file_ds(self, ds_name): - timeout = 10 - threshold = time.time() + timeout - res = "" - while time.time() < threshold: - res = self.query("SHOW tables from files;") - if "tables_in_files" in res and res.get_record("tables_in_files", ds_name): - return - time.sleep(0.3) - assert "tables_in_files" in res and res.get_record("tables_in_files", ds_name), ( - f"file datasource {ds_name} is not ready to use after {timeout} seconds" - ) - - def check_predictor_readiness(self, predictor_name): - timeout = 600 - threshold = time.time() + timeout - res = "" - model_not_found_threshold = time.time() + 30 - check_interval = 1 - while time.time() < threshold: - _query = "SELECT status, error FROM mindsdb.models WHERE name='{}';".format(predictor_name) - res = self.query(_query) - if "status" in res: - if res.get_record("status", "complete"): - break - elif res.get_record("status", "error"): - raise Exception(res[0]["error"]) - elif len(res) == 0 and time.time() > model_not_found_threshold: - raise Exception(f"Model {predictor_name} not found in models table after 30 seconds") - time.sleep(check_interval) - assert "status" in res and res.get_record("status", "complete"), ( - f"predictor {predictor_name} is not complete after {timeout} seconds. Last result: {res}" - ) - - def validate_database_creation(self, name): - res = self.query(f"SELECT name FROM information_schema.databases WHERE name='{name}';") - assert "name" in res and res.get_record("name", name), ( - f"Expected datasource is not found after creation - {name}: {res}" - ) - - -@pytest.fixture(scope="class") -def postgres_datasource(request): - """ - Class-scoped fixture to create the postgres datasource once. - Ensures the database exists for all tests that need it. - """ - helper = BaseStuff() - helper.use_binary = False - db_name = get_test_resource_name("test_demo_postgres") - db_details = { - "type": "postgres", - "connection_data": { - "host": "samples.mindsdb.com", - "port": "5432", - "user": "demo_user", - "password": "demo_password", - "database": "demo", - "schema": "demo", - }, - } - helper.create_database(db_name, db_details) - helper.validate_database_creation(db_name) - request.cls.POSTGRES_DB_NAME = db_name - yield db_name - # Cleanup - try: - helper.query(f"DROP DATABASE IF EXISTS {db_name};") - except Exception: - pass - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -@pytest.mark.usefixtures("postgres_datasource") -class TestMySqlApi(BaseStuff): - # Unique resource names for this test session (initialized in setup_class) - POSTGRES_DB_NAME = None - MARIADB_DB_NAME = None - MYSQL_DB_NAME = None - - @classmethod - def setup_class(cls): - """Initialize unique resource names for this test session.""" - # Note: POSTGRES_DB_NAME is set by the postgres_datasource fixture - cls.MARIADB_DB_NAME = get_test_resource_name("test_demo_mariadb") - cls.MYSQL_DB_NAME = get_test_resource_name("test_demo_mysql") - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - def test_create_postgres_datasources(self, use_binary): - db_details = { - "type": "postgres", - "connection_data": { - "host": "samples.mindsdb.com", - "port": "5432", - "user": "demo_user", - "password": "demo_password", - "database": "demo", - "schema": "demo", - }, - } - self.create_database(self.POSTGRES_DB_NAME, db_details) - self.validate_database_creation(self.POSTGRES_DB_NAME) - - @pytest.mark.parametrize("table_name", ["types_test_data", "types_test_data_with_nulls"]) - def test_response_types(self, use_binary, table_name): - """Test that data conversion is correct. Postgres used as source database. - Used two tables: with and without nulls in rows. This is because pd.DataFrame cast data - with and without nulls in rows differently. - - Note: - - for 'data with nulls' big 'bigint' values returned wrong. This is because - pd.DataFrame cast 'bigint' values with nulls to 'float', therefore precision is lost. - - we cast datatypes in binary protocol only as signed, therefore precision for unsigned - may be lost - - sometimes dt_date returned as datetime instead of date. Most likely reason is mysql.connector. - - Test tables created using: - - create table types_test_data ( - t_char CHAR(10), - t_varchar VARCHAR(100), - t_text TEXT, - t_bytea BYTEA, - t_json JSON, - t_jsonb JSONB, - t_xml XML, - t_uuid UUID, - n_smallint SMALLINT, - n_integer INTEGER, - n_bigint BIGINT, - n_decimal DECIMAL(10,2), - n_numeric NUMERIC(10,4), - n_real REAL, - n_double_precision DOUBLE PRECISION, - n_smallserial SMALLSERIAL, - n_serial SERIAL, - n_bigserial BIGSERIAL, - n_money MONEY, - n_int2 INT2, -- alt for SMALLINT - n_int4 INT4, -- alt for INTEGER - n_int8 INT8, -- alt for BIGINT - n_float4 FLOAT4, -- alt for REAL - n_float8 FLOAT8, -- alt for DOUBLE precision - dt_date DATE, - dt_time TIME, - dt_time_tz TIME WITH TIME ZONE, - dt_timestamp TIMESTAMP, - dt_timestamp_tz TIMESTAMP WITH TIME ZONE, - dt_interval INTERVAL, - dt_timestamptz TIMESTAMPTZ, - dt_timetz TIMETZ - ); - - insert into types_test_data values ( - -- text - 'Test', - 'Test', - 'Test', - E'\\x44656D6F2062696E61727920646174612E', - '{"name": "test"}', - '{"name": "test"}', - 'test123', - 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', - -- numeric - 32767, -- n_smallint (max value) - 2147483647, -- n_integer (max value) - 9223372036854775807, -- n_bigint (max value) - 1234.56, -- n_decimal - 12345.6789, -- n_numeric - 3.14159, -- n_real - 2.7182818284590452, -- n_double_precision - 1, 1, 1, -- serials - 10500.25, -- n_money ? - 255, -- n_int2 (min value) - 42, -- n_int4 - 123456789, -- n_int8 - 0.00123, -- n_float4 - 9.8765432109876, -- n_float8 - -- datetime - '2023-10-15', -- t_date - '14:30:45', -- t_time - '14:30:45+03:00', -- t_time_tz - '2023-10-15 14:30:45', -- t_timestamp - '2023-10-15 14:30:45+03:00', -- t_timestamp_tz - '2 years 3 months 15 days 12 hours 30 minutes 15 seconds', -- t_interval - '2023-10-15 14:30:45+03:00', -- t_timestamptz - '14:30:45+03:00' -- t_timetz - ); - - create table types_test_data_with_nulls (...); - insert into types_test_data_with_nulls values (same as above); - insert into types_test_data_with_nulls DEFAULT VALUES; -- insert nulls for each column, except serials - """ - # Test for response types - res, description = self.query( - f""" - SELECT - -- text types - t_char, t_varchar, t_text, t_bytea, t_json, t_jsonb, t_xml, t_uuid, - -- numeric types - n_smallint, n_integer, n_bigint, n_decimal, n_numeric, n_real, - n_double_precision, n_smallserial, n_serial, n_bigserial, n_money, - -- datetime types - dt_date, - dt_time, - dt_time_tz, - dt_timestamp, - dt_timestamp_tz, - dt_interval, - dt_timestamptz, - dt_timetz - FROM {self.POSTGRES_DB_NAME}.{table_name} order by n_integer NULLS last; - """, - with_description=True, - ) - expected_types = { - # text types - "t_char": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.TEXT], - "t_varchar": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.VARCHAR], - "t_text": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.TEXT], - "t_bytea": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.BINARY], - # text types: fallbacks to varchar - "t_xml": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.VARCHAR], - "t_uuid": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.VARCHAR], - # json types - "t_json": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.JSON], - "t_jsonb": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.JSON], - # numeric types - "n_smallint": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.SMALLINT], - "n_integer": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.INT], - "n_bigint": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.BIGINT], - "n_decimal": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.DECIMAL], - "n_numeric": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.DECIMAL], - "n_real": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.FLOAT], - "n_double_precision": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.DOUBLE], - "n_smallserial": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.SMALLINT], - "n_serial": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.INT], - "n_bigserial": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.BIGINT], - "n_money": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.TEXT], - # datetime types - "dt_date": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.DATE], - "dt_time": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.TIME], - "dt_time_tz": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.TIME], - "dt_timestamp": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.DATETIME], - "dt_timestamp_tz": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.DATETIME], - "dt_interval": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.VARCHAR], - "dt_timestamptz": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.DATETIME], - "dt_timetz": DATA_C_TYPE_MAP[MYSQL_DATA_TYPE.TIME], - } - expected_values = { - "t_char": "Test ", - "t_varchar": "Test", - "t_text": "Test", - "t_bytea": "Demo binary data.", - "t_json": '{"name":"test"}', - "t_jsonb": '{"name":"test"}', - "t_xml": "test123", - "t_uuid": "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", - # numeric types - "n_smallint": 32767, - "n_integer": 2147483647, - "n_bigint": 9223372036854775807, - "n_decimal": Decimal("1234.56"), - "n_numeric": Decimal("12345.6789"), - "n_real": 3.14159, - "n_double_precision": 2.7182818284590452, - "n_smallserial": 1, - "n_serial": 1, - "n_bigserial": 1, - "n_money": "$10,500.25", - # datetime types - "dt_date": datetime.date(2023, 10, 15), - "dt_time": datetime.timedelta(seconds=52245), - "dt_time_tz": datetime.timedelta(seconds=52245 - (3 * 60 * 60)), - "dt_timestamp": datetime.datetime(2023, 10, 15, 14, 30, 45), - "dt_timestamp_tz": datetime.datetime(2023, 10, 15, 11, 30, 45), - "dt_interval": "835 days 12:30:15", - "dt_timestamptz": datetime.datetime(2023, 10, 15, 11, 30, 45), - "dt_timetz": datetime.timedelta(seconds=52245 - (3 * 60 * 60)), - } - num_types = [ - "n_smallint", - "n_integer", - "n_bigint", - "n_decimal", - "n_numeric", - "n_real", - "n_double_precision", - "n_smallserial", - "n_serial", - "n_bigserial", - "n_money", - ] - description_dict = {row[0]: {"type_code": row[1], "flags": row[-2]} for row in description} - row = res[0] - for column_name, expected_type in expected_types.items(): - column_description = description_dict[column_name] - - if column_name == "t_bytea" and isinstance(row[column_name], (bytearray, bytes)): - row[column_name] = row[column_name].decode() - elif column_name == "dt_date" and isinstance(row[column_name], datetime.datetime): - # NOTE sometime mysql.connector returns datetime instead of date for dt_date. This is suspicious, but ok - assert row[column_name].hour == 0 - assert row[column_name].minute == 0 - assert row[column_name].second == 0 - row[column_name] = row[column_name].date() - elif column_name in ("t_json", "t_jsonb") and self.use_binary: - # NOTE 'binary' protocol returns json as bytearray. - # by some reason, if use pytest then result is bytes instead of bytearray, but that is ok - row[column_name] = row[column_name].decode() - - if isinstance(expected_values[column_name], float): - assert abs(row[column_name] - expected_values[column_name]) < 1e-5, ( - f"Expected value {expected_values[column_name]} for column {column_name}, but got {row[column_name]}, use_binary={self.use_binary}, table_name={table_name}" - ) - elif column_name in ("t_json", "t_jsonb"): - assert json.loads(row[column_name]) == json.loads(expected_values[column_name]), ( - f"Expected value {expected_values[column_name]} for column {column_name}, but got {row[column_name]}, use_binary={self.use_binary}, table_name={table_name}" - ) - else: - assert row[column_name] == expected_values[column_name], ( - f"Expected value {expected_values[column_name]} for column {column_name}, but got {row[column_name]}, use_binary={self.use_binary}, table_name={table_name}" - ) - assert column_description["type_code"] == expected_type.code, ( - f"Expected type {expected_type.code} for column {column_name}, but got {column_description['type_code']}, use_binary={self.use_binary}, table_name={table_name}" - ) - - if os.uname().sysname == "Darwin": - # It seems that flags on macos may be modified by mysql.connector on the client side. - continue - assert ( - column_description["flags"] == sum(expected_type.flags) - or column_name in num_types - and column_description["flags"] == (sum(expected_type.flags) + FIELD_FLAG.NUM_FLAG) - ), ( - f"Expected flags {sum(expected_type.flags)} for column {column_name}, but got {column_description['flags']}, use_binary={self.use_binary}, table_name={table_name}" - ) - - def test_create_mariadb_datasources(self, use_binary): - db_details = { - "type": "mariadb", - "connection_data": { - "type": "mariadb", - "host": "samples.mindsdb.com", - "port": "3307", - "user": "demo_user", - "password": "demo_password", - "database": "test_data", - }, - } - self.create_database(self.MARIADB_DB_NAME, db_details) - self.validate_database_creation(self.MARIADB_DB_NAME) - - def test_create_mysql_datasources(self, use_binary): - db_details = { - "type": "mysql", - "connection_data": { - "type": "mysql", - "host": "samples.mindsdb.com", - "port": "3306", - "user": "user", - "password": "MindsDBUser123!", - "database": "public", - }, - } - self.create_database(self.MYSQL_DB_NAME, db_details) - self.validate_database_creation(self.MYSQL_DB_NAME) - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_create_predictor(self, use_binary): - create_byom(ML_ENGINE_NAME, target_column="rental_price") - - self.query(f"DROP MODEL IF EXISTS {self.predictor_name};") - # add file lock here - self.query(f""" - CREATE MODEL {self.predictor_name} - from {self.POSTGRES_DB_NAME} (select * from home_rentals limit 10) - PREDICT rental_price USING engine='{ML_ENGINE_NAME}' - """) - self.check_predictor_readiness(self.predictor_name) - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_making_prediction(self, use_binary): - _query = f""" - SELECT rental_price - FROM {self.predictor_name} - WHERE number_of_rooms = 2 and sqft = 400 and location = 'downtown' and days_on_market = 2 and initial_price= 2500 - USING engine='{ML_ENGINE_NAME}'; - """ - res = self.query(_query) - assert "rental_price" in res, f"error getting prediction from {self.predictor_name} - {res}" - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - @pytest.mark.parametrize("describe_attr", ["model", "features", "ensemble"]) - def test_describe_predictor_attrs(self, describe_attr, use_binary): - self.query(f"describe mindsdb.{self.predictor_name}.{describe_attr};") - - @pytest.mark.parametrize( - "query", - [ - "show databases;", - "show schemas;", - "show tables;", - "show tables from mindsdb;", - "show tables in mindsdb;", - "show full tables from mindsdb;", - "show full tables in mindsdb;", - "show variables;", - "show session status;", - "show global variables;", - "show engines;", - "show warnings;", - "show charset;", - "show collation;", - "show models;", - "show function status where db = 'mindsdb';", - "show procedure status where db = 'mindsdb';", - ], - ) - def test_service_requests(self, query, use_binary): - self.query(query) - - def test_show_columns(self, use_binary): - ret = self.query(f""" - SELECT - * - FROM information_schema.columns - WHERE table_name = 'home_rentals' and table_schema='{self.POSTGRES_DB_NAME}' - """) - assert len(ret) == 8 - # TODO FIX STR->INT casting - # assert sorted([x['ordinal_position'] for x in ret]) == list(range(1, 9)) - - rental_price_column = next(x for x in ret if x["column_name"] == "rental_price") - assert rental_price_column["data_type"] == "int" - assert rental_price_column["column_type"] == "int" - assert rental_price_column["original_type"] == "integer" - assert rental_price_column["numeric_precision"] is not None - - location_column = next(x for x in ret if x["column_name"] == "location") - assert location_column["data_type"] == "varchar" - assert location_column["column_type"].startswith("varchar(") # varchar(###) - assert location_column["original_type"] == "character varying" - assert location_column["numeric_precision"] is None - assert location_column["character_maximum_length"] is not None - assert location_column["character_octet_length"] is not None - - def test_upload_file(self, use_binary): - df = pd.DataFrame( - { - "x1": [x for x in range(100, 210)] + [x for x in range(100, 210)], - "x2": [x * 2 for x in range(100, 210)] + [x * 3 for x in range(100, 210)], - "y": [x * 3 for x in range(100, 210)] + [x * 2 for x in range(100, 210)], - } - ) - self.upload_ds(df, self.file_datasource_name) - self.verify_file_ds(self.file_datasource_name) - - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_train_model_from_files(self, use_binary): - file_predictor_name = "predictor_from_file" - self.query(f"DROP MODEL IF EXISTS mindsdb.{file_predictor_name};") - # add file lock here - _query = f""" - CREATE MODEL mindsdb.{file_predictor_name} - from files (select * from {self.file_datasource_name}) - predict y - USING engine='{ML_ENGINE_NAME}'; - """ - self.query(_query) - self.check_predictor_readiness(file_predictor_name) - - def test_select_from_files(self, use_binary): - _query = f"select * from files.{self.file_datasource_name};" - self.query(_query) - - @pytest.mark.slow - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_ts_train_and_predict(self, subtests, use_binary): - train_df = pd.DataFrame( - { - "gby": ["A" for _ in range(100, 210)] + ["B" for _ in range(100, 210)], - "oby": [x for x in range(100, 210)] + [x for x in range(200, 310)], - "x1": [x for x in range(100, 210)] + [x for x in range(100, 210)], - "x2": [x * 2 for x in range(100, 210)] + [x * 3 for x in range(100, 210)], - "y": [x * 3 for x in range(100, 210)] + [x * 2 for x in range(100, 210)], - } - ) - - test_df = pd.DataFrame( - { - "gby": ["A" for _ in range(210, 220)] + ["B" for _ in range(210, 220)], - "oby": [x for x in range(210, 220)] + [x for x in range(310, 320)], - "x1": [x for x in range(210, 220)] + [x for x in range(210, 220)], - "x2": [x * 2 for x in range(210, 220)] + [x * 3 for x in range(210, 220)], - "y": [x * 3 for x in range(210, 220)] + [x * 2 for x in range(210, 220)], - } - ) - - train_ds_name = "train_ts_file_ds" - test_ds_name = "test_ts_file_ds" - for df, ds_name in [(train_df, train_ds_name), (test_df, test_ds_name)]: - self.upload_ds(df, ds_name) - self.verify_file_ds(ds_name) - - params = [ - ( - "with_group_by_hor1", - f"CREATE MODEL mindsdb.%s from files (select * from {train_ds_name}) PREDICT y ORDER BY oby GROUP BY gby WINDOW 10 HORIZON 1;", - f"SELECT res.gby, res.y as PREDICTED_RESULT FROM files.{test_ds_name} as source JOIN mindsdb.%s as res WHERE source.gby= 'A' LIMIT 1;", - 1, - ), - ( - "no_group_by_hor1", - f"CREATE MODEL mindsdb.%s from files (select * from {train_ds_name}) PREDICT y ORDER BY oby WINDOW 10 HORIZON 1;", - f"SELECT res.gby, res.y as PREDICTED_RESULT FROM files.{test_ds_name} as source JOIN mindsdb.%s as res LIMIT 1;", - 1, - ), - ( - "with_group_by_hor2", - f"CREATE MODEL mindsdb.%s from files (select * from {train_ds_name}) PREDICT y ORDER BY oby GROUP BY gby WINDOW 10 HORIZON 2;", - f"SELECT res.gby, res.y as PREDICTED_RESULT FROM files.{test_ds_name} as source JOIN mindsdb.%s as res WHERE source.gby= 'A' LIMIT 2;", - 2, - ), - ( - "no_group_by_hor2", - f"CREATE MODEL mindsdb.%s from files (select * from {train_ds_name}) PREDICT y ORDER BY oby WINDOW 10 HORIZON 2;", - f"SELECT res.gby, res.y as PREDICTED_RESULT FROM files.{test_ds_name} as source JOIN mindsdb.%s as res LIMIT 2;", - 2, - ), - ] - for predictor_name, create_query, select_query, res_len in params: - # add file lock here - with subtests.test( - msg=predictor_name, - predictor_name=predictor_name, - create_query=create_query, - select_query=select_query, - res_len=res_len, - ): - self.query(f"DROP MODEL IF EXISTS mindsdb.{predictor_name};") - self.query(create_query % predictor_name) - self.check_predictor_readiness(predictor_name) - res = self.query(select_query % predictor_name) - assert len(res) == res_len, f"prediction result {res} contains more that {res_len} records" - - @pytest.mark.slow - @pytest.mark.skip( - reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted." - ) - def test_tableau_queries(self, subtests, use_binary): - test_ds_name = self.file_datasource_name - predictor_name = "predictor_from_file" - integration = "files" - - queries = [ - f""" - SELECT TABLE_NAME,TABLE_COMMENT,IF(TABLE_TYPE='BASE TABLE', 'TABLE', TABLE_TYPE), - TABLE_SCHEMA FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_SCHEMA = '{integration}' - AND TABLE_TYPE='BASE TABLE' ORDER BY TABLE_SCHEMA, TABLE_NAME - """, - f""" - SELECT SUM(1) AS `cnt__0B4A4E8BD11C48FFB4730D4D2C32191A_ok`, - max(`Custom SQL Query`.`x1`) AS `sum_height_ok`, - max(`Custom SQL Query`.`y`) AS `sum_length1_ok` - FROM ( - SELECT res.x1, res.y - FROM files.{test_ds_name} as source - JOIN mindsdb.{predictor_name} as res - ) `Custom SQL Query` - HAVING (COUNT(1) > 0) - """, - f""" - SHOW FULL TABLES FROM {integration} - """, - """ - SELECT `table_name`, `column_name` - FROM `information_schema`.`columns` - WHERE `data_type`='enum' AND `table_schema`='views'; - """, - """ - SHOW KEYS FROM `mindsdb`.`predictors` - """, - """ - show full columns from `predictors` - """, - """ - SELECT `table_name`, `column_name` FROM `information_schema`.`columns` - WHERE `data_type`='enum' AND `table_schema`='mindsdb' - """, - f""" - SELECT `Custom SQL Query`.`x1` AS `height`, - `Custom SQL Query`.`y` AS `length1` - FROM ( - SELECT res.x1, res.y - FROM files.{test_ds_name} as source - JOIN mindsdb.{predictor_name} as res - ) `Custom SQL Query` - LIMIT 100 - """, - f""" - SELECT - `Custom SQL Query`.`x1` AS `x1`, - SUM(`Custom SQL Query`.`y2`) AS `sum_y2_ok` - FROM ( - SELECT res.x1, res.y as y2 - FROM files.{test_ds_name} as source - JOIN mindsdb.{predictor_name} as res - ) `Custom SQL Query` - GROUP BY 1 - """, - f""" - SELECT - `Custom SQL Query`.`x1` AS `x1`, - COUNT(DISTINCT TRUNCATE(`Custom SQL Query`.`y`,0)) AS `ctd_y_ok` - FROM ( - SELECT res.x1, res.y - FROM files.{test_ds_name} as source - JOIN mindsdb.{predictor_name} as res - ) `Custom SQL Query` - GROUP BY 1 - """, - ] - for i, _query in enumerate(queries): - with subtests.test(msg=i, _query=_query): - self.query(_query) diff --git a/tests/integration/flows/test_mysql_api_extended.py b/tests/integration/flows/test_mysql_api_extended.py deleted file mode 100644 index 6839b437e29..00000000000 --- a/tests/integration/flows/test_mysql_api_extended.py +++ /dev/null @@ -1,756 +0,0 @@ -import os -import pytest -import time -from .test_mysql_api import BaseStuff -import mysql.connector - - -@pytest.fixture(scope="module") -def setup_local_db(): - """Module-scoped fixture to create a writeable DB for table tests.""" - db_name = "test_db_local" - helper = BaseStuff() - helper.use_binary = False - - params = {"user": "postgres", "password": "postgres", "host": "postgres", "port": 5432, "database": "postgres"} - - print(f"\n--> [Fixture setup_local_db] Setting up local database: {db_name} on {params['host']}:{params['port']}") - try: - helper.query(f"DROP DATABASE IF EXISTS {db_name}") - create_datasource_sql_via_connector(helper, db_name, "postgres", params) - yield db_name - except (mysql.connector.Error, TimeoutError) as e: - pytest.skip( - f"\n\n--- FIXTURE SETUP FAILED ---\n" - f"Could not connect to the PostgreSQL container ('{params['host']}').\n" - f"Please ensure your Docker Compose environment is running correctly.\n" - f"Original Error: {e}\n" - ) - finally: - print(f"\n--> [Fixture setup_local_db] Tearing down database: {db_name}") - try: - helper.query(f"DROP DATABASE IF EXISTS {db_name};") - except mysql.connector.Error: - pass - - -def create_datasource_sql_via_connector(helper_instance, db_name, engine, parameters, poll_timeout=30, poll_interval=2): - """Helper to create a datasource via a CREATE DATABASE query.""" - params_list = [f'"{k}": "{v}"' if isinstance(v, str) else f'"{k}": {v}' for k, v in parameters.items()] - params_str = ", ".join(params_list) - query_str = f"CREATE DATABASE {db_name} WITH ENGINE = '{engine}', PARAMETERS = {{{params_str}}};" - print(f" [Helper create_datasource] Executing: CREATE DATABASE {db_name}...") - helper_instance.query(query_str) - start_time = time.time() - while True: - try: - helper_instance.validate_database_creation(db_name) - print(f" [Helper create_datasource] DATABASE {db_name} created and validated.") - break - except AssertionError as e: - elapsed_time = time.time() - start_time - if elapsed_time > poll_timeout: - print(f" [Helper create_datasource] ERROR: Timeout after {poll_timeout}s waiting for {db_name}.") - raise TimeoutError(f"Timed out waiting for database {db_name} to be created.") from e - time.sleep(poll_interval) - - -def wait_for_trigger_creation(query_fn, trigger_name, timeout=20, max_interval=5): - """ - Polls information_schema to see if a trigger is visible. - Does not raise an error, returns True (found) or False (not found). - """ - start = time.time() - interval = 1 - print(f"\n[DEBUG] Checking for trigger '{trigger_name}' in information_schema (timeout={timeout}s)...") - while time.time() - start < timeout: - try: - result = query_fn(f"SELECT 1 FROM information_schema.triggers WHERE trigger_name = '{trigger_name}';") - if result: - print(f"[DEBUG] Trigger '{trigger_name}' found in information_schema after {time.time() - start:.2f}s.") - return True - except Exception: - pass - - try: - result = query_fn("SHOW TRIGGERS;") - if result and trigger_name in [row.get("Trigger", row.get("TRIGGER")) for row in result]: - print(f"[DEBUG] Trigger '{trigger_name}' found in SHOW TRIGGERS after {time.time() - start:.2f}s.") - return True - except Exception: - pass - time.sleep(interval) - interval = min(interval * 1.5, max_interval) - - print( - f"[DEBUG] WARNING: Trigger '{trigger_name}' was not found in metadata after {timeout}s. Proceeding with functional test..." - ) - return False - - -def wait_for_trigger_to_fire( - query_fn, db_name, source_table_name, target_table_name, test_id, updated_message, timeout=120, max_interval=10 -): - """ - Polls for a trigger to fire by repeatedly sending the UPDATE command - and checking the target table. This is robust against trigger creation lag. - """ - start = time.time() - interval = 1 - - while time.time() - start < timeout: - print(f"[DEBUG] Firing trigger (elapsed={time.time() - start:.1f}s, interval={interval:.2f}s)...") - query_fn(f"UPDATE {db_name}.{source_table_name} SET message = '{updated_message}' WHERE id = {test_id};") - - time.sleep(interval) - - result = query_fn(f"SELECT id, message FROM {db_name}.{target_table_name} WHERE id = {test_id};") - if result: - elapsed = time.time() - start - print(f"[DEBUG] Trigger fired and verified after {elapsed:.2f}s → {result}") - return result - - print("[DEBUG] Trigger not fired yet. Retrying...") - interval = min(interval * 1.5, max_interval) - - raise TimeoutError(f"Trigger did not fire for id {test_id} within {timeout}s despite repeated attempts.") - - -def wait_for_kb_creation(query_fn, kb_name, timeout=90, poll_interval=1): - """Polls to check if a Knowledge Base has been created successfully.""" - start_time = time.time() - while True: - try: - result = query_fn(f"DESCRIBE KNOWLEDGE_BASE {kb_name};") - if result and result[0]["name"] == kb_name: - print(f" [Helper wait_for_kb] KB {kb_name} created and validated.") - return result - except Exception: - # KB might not be queryable at all yet - pass - - elapsed_time = time.time() - start_time - if elapsed_time > timeout: - print(f" [Helper wait_for_kb] ERROR: Timeout after {timeout}s waiting for {kb_name}.") - raise TimeoutError(f"Timed out waiting for Knowledge Base {kb_name} to be created.") - time.sleep(poll_interval) - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLTables(BaseStuff): - """Test suite for Table operations.""" - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - @pytest.mark.usefixtures("setup_local_db") - def test_table_lifecycle(self, setup_local_db, use_binary): - db_name = setup_local_db - table_name = "test_lifecycle_table" - try: - create_table_query = f"CREATE TABLE {db_name}.{table_name} (id INT, value VARCHAR(255));" - self.query(create_table_query) - result = self.query(f"SHOW TABLES FROM {db_name};") - assert table_name in [list(row.values())[0] for row in result] - replace_query = f"CREATE OR REPLACE TABLE {db_name}.{table_name} (SELECT 2 as id, 'new_data' as value);" - self.query(replace_query) - result = self.query(f"SELECT * FROM {db_name}.{table_name};") - assert result and result[0]["id"] == 2 and result[0]["value"] == "new_data" - finally: - self.query(f"DROP TABLE IF EXISTS {db_name}.{table_name};") - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLTablesNegative(BaseStuff): - """Negative tests for Table operations.""" - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - @pytest.mark.usefixtures("setup_local_db") - def test_create_duplicate_table(self, setup_local_db, use_binary): - db_name = setup_local_db - table_name = "test_duplicate_table" - create_query = f"CREATE TABLE {db_name}.{table_name} (id INT);" - try: - self.query(create_query) - with pytest.raises(Exception) as e: - self.query(create_query) - assert "already exists" in str(e.value).lower() - finally: - self.query(f"DROP TABLE IF EXISTS {db_name}.{table_name};") - - def test_create_table_in_missing_db_raises_error(self, use_binary): - create_query = "CREATE TABLE non_existent_db.non_existent_table (id INT);" - with pytest.raises(Exception) as e: - self.query(create_query) - assert "non_existent_db" or "Database not found" in str(e.value).lower() - - @pytest.mark.usefixtures("setup_local_db") - def test_drop_non_existent_table(self, setup_local_db, use_binary): - db_name = setup_local_db - table_name = "test_non_existent_table" - with pytest.raises(Exception) as e: - self.query(f"DROP TABLE {db_name}.{table_name};") - assert "does not exist" in str(e.value).lower() - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLViews(BaseStuff): - """Test suite for View operations.""" - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - def test_view_lifecycle(self, use_binary): - db_name = "test_sql_view_db" - view_name = "test_sql_view" - try: - self.query(f"DROP VIEW IF EXISTS {view_name};") - self.query(f"DROP DATABASE IF EXISTS {db_name};") - - create_db_query = f""" - CREATE DATABASE {db_name} - WITH ENGINE = 'postgres', PARAMETERS = {{"user": "demo_user", "password": "demo_password", "host": "samples.mindsdb.com", "port": "5432", "database": "demo", "schema": "demo"}}; - """ - self.query(create_db_query) - - create_view_query = ( - f"CREATE VIEW {view_name} AS (SELECT * FROM {db_name}.home_rentals WHERE number_of_rooms = 2);" - ) - self.query(create_view_query) - result = self.query("SHOW VIEWS;") - assert view_name in [row.get("name", row.get("Name", row.get("NAME"))) for row in result] - result = self.query(f"SELECT * FROM {view_name};") - assert len(result) > 0 and all(row["number_of_rooms"] == 2 for row in result) - alter_view_query = ( - f"ALTER VIEW {view_name} AS (SELECT * FROM {db_name}.home_rentals WHERE number_of_rooms = 1);" - ) - self.query(alter_view_query) - result_after_alter = self.query(f"SELECT * FROM {view_name};") - assert len(result_after_alter) > 0 and all(row["number_of_rooms"] == 1 for row in result_after_alter) - finally: - self.query(f"DROP VIEW IF EXISTS {view_name};") - self.query(f"DROP DATABASE IF EXISTS {db_name};") - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLViewsNegative(BaseStuff): - """Negative tests for View operations.""" - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - def test_create_duplicate_view(self, use_binary): - view_name = "test_duplicate_view" - create_query = f"CREATE VIEW {view_name} AS (SELECT 1);" - try: - self.query(f"DROP VIEW IF EXISTS {view_name};") - self.query(create_query) - with pytest.raises(Exception) as e: - self.query(create_query) - assert "already exists" in str(e.value).lower() - finally: - self.query(f"DROP VIEW IF EXISTS {view_name};") - - def test_create_view_on_non_existent_table(self, use_binary): - view_name = "test_bad_source_view" - create_query = f"CREATE VIEW {view_name} AS (SELECT * FROM non_existent_db.non_existent_table);" - with pytest.raises(Exception) as e: - self.query(create_query) - error_str = str(e.value).lower() - assert "not found in the database" in error_str or "table name should contain only one part" in error_str - - def test_drop_non_existent_view(self, use_binary): - view_name = "non_existent_view" - try: - self.query(f"DROP VIEW IF EXISTS {view_name};") - except Exception: - pass - - with pytest.raises(Exception) as e: - self.query(f"DROP VIEW {view_name};") - error_str = str(e.value).lower() - assert "view not found" in error_str or "unknown view" in error_str - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLKnowledgeBases(BaseStuff): - """Test suite for Knowledge Base operations.""" - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - @pytest.fixture - def basic_kb(self, request): - """ - Fixture to create a basic Knowledge Base for alteration tests. - Requires OPENAI_API_KEY. - """ - openai_api_key = os.environ.get("OPENAI_API_KEY") - if not openai_api_key: - pytest.skip("OPENAI_API_KEY environment variable not set. Skipping KB tests.") - - kb_name = "test_alter_kb_local" - embedding_model = "text-embedding-3-small" - - create_kb_query = f""" - CREATE KNOWLEDGE_BASE {kb_name} - USING embedding_model = {{"provider": "openai", "model_name": "{embedding_model}", "api_key": "{openai_api_key}"}}; - """ - try: - self.query(f"DROP KNOWLEDGE_BASE IF EXISTS {kb_name};") - self.query(create_kb_query) - result = wait_for_kb_creation(self.query, kb_name) - assert result and result[0]["name"] == kb_name - yield kb_name - finally: - self.query(f"DROP KNOWLEDGE_BASE IF EXISTS {kb_name};") - - def test_knowledge_base_full_lifecycle(self, use_binary): - openai_api_key = os.environ.get("OPENAI_API_KEY") - if not openai_api_key: - pytest.skip("OPENAI_API_KEY environment variable not set. Skipping Knowledge Base lifecycle test.") - - kb_name = "test_kb_sql" - content_to_insert = "MindsDB helps developers build AI-powered applications." - embedding_model = "text-embedding-3-small" - try: - self.query(f"DROP KNOWLEDGE_BASE IF EXISTS {kb_name};") - create_kb_query = f""" - CREATE KNOWLEDGE_BASE {kb_name} - USING embedding_model = {{"provider": "openai", "model_name": "{embedding_model}", "api_key": "{openai_api_key}"}}; - """ - self.query(create_kb_query) - result = wait_for_kb_creation(self.query, kb_name) - assert result and result[0]["name"] == kb_name and embedding_model in result[0]["embedding_model"] - self.query(f"INSERT INTO {kb_name} (content) VALUES ('{content_to_insert}');") - - # Give insertion a moment to process before querying - time.sleep(2) - - result = self.query(f"SELECT chunk_content FROM {kb_name} WHERE content = 'What is MindsDB?';") - assert result and "MindsDB" in result[0]["chunk_content"] - finally: - self.query(f"DROP KNOWLEDGE_BASE IF EXISTS {kb_name};") - - def test_create_kb_with_invalid_provider(self, use_binary): - kb_name = "test_invalid_provider" - create_query = ( - f'CREATE KNOWLEDGE_BASE {kb_name} USING embedding_model = {{"provider": "non_existent_provider"}};' - ) - with pytest.raises(Exception) as e: - self.query(create_query) - assert "wrong embedding provider" in str(e.value).lower() - - def test_create_kb_with_invalid_api_key(self, use_binary, request): - kb_name = "test_invalid_key_local" - create_query = f'CREATE KNOWLEDGE_BASE {kb_name} USING embedding_model = {{"provider": "openai", "api_key": "this_is_a_fake_key"}};' - try: - with pytest.raises(Exception) as e: - self.query(create_query) - assert ( - "problem with embedding model config" in str(e.value).lower() - or "invalid api key" in str(e.value).lower() - ) - finally: - # Ensure cleanup even if creation fails - self.query(f"DROP KNOWLEDGE_BASE IF EXISTS {kb_name};") - - def test_insert_into_non_existent_kb(self, use_binary): - kb_name = "non_existent_kb" - with pytest.raises(Exception) as e: - self.query(f"INSERT INTO {kb_name} (content) VALUES ('some data');") - error_str = str(e.value).lower() - assert "can't create table" in error_str or "doesn't exist" in error_str or "unknown table" in error_str - - def test_query_non_existent_kb(self, use_binary): - kb_name = "non_existent_kb" - with pytest.raises(Exception) as e: - self.query(f"SELECT * FROM {kb_name} WHERE content = 'some query';") - error_str = str(e.value).lower() - assert "not found in database" in error_str or "doesn't exist" in error_str or "unknown table" in error_str - - def test_create_duplicate_kb(self, use_binary, request): - openai_api_key = os.environ.get("OPENAI_API_KEY") - if not openai_api_key: - pytest.skip("OPENAI_API_KEY environment variable not set. Skipping duplicate KB test.") - - kb_name = "test_duplicate_kb" - embedding_model = "text-embedding-3-small" - create_query = f""" - CREATE KNOWLEDGE_BASE {kb_name} - USING embedding_model = {{"provider": "openai", "model_name": "{embedding_model}", "api_key": "{openai_api_key}"}}; - """ - try: - self.query(f"DROP KNOWLEDGE_BASE IF EXISTS {kb_name};") - self.query(create_query) - wait_for_kb_creation(self.query, kb_name) - with pytest.raises(Exception) as e: - self.query(create_query) - assert "already exists" in str(e.value).lower() - finally: - self.query(f"DROP KNOWLEDGE_BASE IF EXISTS {kb_name};") - - @pytest.mark.usefixtures("basic_kb") - def test_alter_kb_embedding_api_key(self, basic_kb, use_binary): - """Tests altering the api_key of the embedding_model.""" - kb_name = basic_kb - openai_api_key = os.environ.get("OPENAI_API_KEY") - if not openai_api_key: - pytest.skip("OPENAI_API_KEY needed for this alter test.") - - new_api_key = openai_api_key - - alter_query = f""" - ALTER KNOWLEDGE_BASE {kb_name} - USING - embedding_model = {{ 'api_key': '{new_api_key}' }}; - """ - self.query(alter_query) - - time.sleep(1) - - result = self.query(f"SELECT embedding_model FROM information_schema.knowledge_bases WHERE name = '{kb_name}';") - assert result - embedding_model_json = result[0].get("embedding_model") - assert embedding_model_json is not None - - assert '"provider": "openai"' in embedding_model_json - assert '"model_name": "text-embedding-3-small"' in embedding_model_json - assert '"api_key": "' in embedding_model_json - - @pytest.mark.xfail( - reason="Bug: ALTER KNOWLEDGE_BASE does not unset reranking_model. See LINEAR-TICKET-NUMBER: FQE-1716" - ) - @pytest.mark.usefixtures("basic_kb") - def test_alter_kb_reranking_model(self, basic_kb, use_binary): - """Tests adding and then disabling the reranking_model.""" - kb_name = basic_kb - openai_api_key = os.environ.get("OPENAI_API_KEY") - if not openai_api_key: - pytest.skip("OPENAI_API_KEY needed for this alter test.") - - alter_query_add = f""" - ALTER KNOWLEDGE_BASE {kb_name} - USING - reranking_model = {{ 'provider': 'openai', 'model_name': 'gpt-4o', 'api_key': '{openai_api_key}' }}; - """ - self.query(alter_query_add) - - result_add = self.query(f"DESCRIBE KNOWLEDGE_BASE {kb_name};") - assert result_add and '"provider": "openai"' in result_add[0]["RERANKING_MODEL"] - - alter_query_disable = f""" - ALTER KNOWLEDGE_BASE {kb_name} - USING - reranking_model = false; - """ - self.query(alter_query_disable) - - result_disable = self.query(f"DESCRIBE KNOWLEDGE_BASE {kb_name};") - assert result_disable - reranking_model_desc = result_disable[0].get("RERANKING_MODEL") - assert reranking_model_desc is None or reranking_model_desc == "{}" - - @pytest.mark.xfail(reason="Bug: information_schema.knowledge_bases.PARAMS is not updated on ALTER") - @pytest.mark.usefixtures("basic_kb") - def test_alter_kb_preprocessing(self, basic_kb, use_binary): - """Tests altering the preprocessing parameters by checking the PARAMS column.""" - kb_name = basic_kb - - alter_query = f""" - ALTER KNOWLEDGE_BASE {kb_name} - USING - preprocessing = {{ 'chunk_size': 300, 'chunk_overlap': 50 }}; - """ - self.query(alter_query) - - time.sleep(1) - - result = self.query(f"SELECT PARAMS FROM information_schema.knowledge_bases WHERE name = '{kb_name}';") - - assert result, "Query to information_schema returned no results." - - params_json = result[0].get("PARAMS") - assert params_json is not None, "PARAMS column is NULL." - - assert '"preprocessing":' in params_json, "The 'preprocessing' key was not added to the PARAMS column." - assert '"chunk_size": 300' in params_json, "chunk_size was not set correctly in PARAMS." - assert '"chunk_overlap": 50' in params_json, "chunk_overlap was not set correctly in PARAMS." - - -@pytest.fixture(scope="function") -def setup_trigger_db(request): - """Function-scoped fixture to ensure a clean DB for each trigger test.""" - - db_name = "trigger_test_db_local" - - source_table_name = "trigger_source_table" - target_table_name = "trigger_target_table" - - params = {"user": "postgres", "password": "postgres", "host": "postgres", "port": 5432, "database": "postgres"} - helper = BaseStuff() - helper.use_binary = False - - try: - print( - f"\n--> [Fixture setup_trigger_db] Setting up local database: {db_name} on {params['host']}:{params['port']}" - ) - helper.query(f"DROP DATABASE IF EXISTS {db_name}") - - create_datasource_sql_via_connector(helper, db_name, "postgres", params) - - helper.query(f"CREATE TABLE {db_name}.{source_table_name} (id INT, message VARCHAR(255));") - helper.query(f"CREATE TABLE {db_name}.{target_table_name} (id INT, message VARCHAR(255));") - helper.query(f"INSERT INTO {db_name}.{source_table_name} (id, message) VALUES (101, 'initial_update_message');") - helper.query(f"INSERT INTO {db_name}.{source_table_name} (id, message) VALUES (102, 'initial_delete_message');") - - yield db_name, source_table_name, target_table_name - - except (mysql.connector.Error, TimeoutError) as setup_err: - pytest.skip(f"Trigger fixture setup failed. Ensure Docker environment is running. Error: {setup_err}") - - finally: - print(f"\n--> [CLEANUP] Dropping tables and DATABASE: {db_name}") - try: - helper.query(f"DROP TABLE IF EXISTS {db_name}.{source_table_name};") - helper.query(f"DROP TABLE IF EXISTS {db_name}.{target_table_name};") - except mysql.connector.Error as e: - print(f"Warning: Error dropping tables during cleanup: {e}") - pass - - try: - helper.query(f"DROP DATABASE IF EXISTS {db_name};") - except mysql.connector.Error as e: - print(f"Warning: Error dropping database during cleanup: {e}") - pass - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLTriggers(BaseStuff): - """Test suite for Trigger operations.""" - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - @pytest.mark.usefixtures("setup_trigger_db") - def test_trigger_lifecycle(self, setup_trigger_db, use_binary): - db_name, source_table_name, target_table_name = setup_trigger_db - trigger_name = "test_insert_trigger" - test_id = 201 # Use a new ID that will be INSERTED - inserted_message = "this message was inserted" - try: - # Ensure the target table is clean - self.query(f"DELETE FROM {db_name}.{target_table_name};") - - # Pre-drop trigger to ensure clean state - try: - self.query(f"DROP TRIGGER {trigger_name};") - except Exception: - pass - - create_trigger_query = f""" - CREATE TRIGGER {trigger_name} - ON {db_name}.{source_table_name} - (INSERT INTO {db_name}.{target_table_name} (id, message) SELECT id, message FROM TABLE_DELTA); - """ - print("\n[DEBUG] Sending CREATE TRIGGER command...") - self.query(create_trigger_query) - - wait_for_trigger_creation(self.query, trigger_name, timeout=20) - - print("[DEBUG] Schema check complete. Proceeding to functional firing test...") - - # Activate Trigger with an INSERT, not an UPDATE - self.query( - f"INSERT INTO {db_name}.{source_table_name} (id, message) VALUES ({test_id}, '{inserted_message}');" - ) - - # Poll the target table for the new result - result = [] - max_wait_time = 60 - interval = 1 - max_interval = 8 - start_time = time.time() - while time.time() - start_time < max_wait_time: - result = self.query(f"SELECT id, message FROM {db_name}.{target_table_name} WHERE id = {test_id};") - if result: - break - time.sleep(interval) - interval = min(interval * 2, max_interval) - - # Verify - assert result, f"Trigger did not fire for id {test_id} within {max_wait_time}s." - assert result[0]["message"] == inserted_message - - finally: - try: - self.query(f"DROP TRIGGER {trigger_name};") - except Exception: - pass - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLTriggersNegative(BaseStuff): - """Negative tests for Trigger operations.""" - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - @pytest.mark.usefixtures("setup_trigger_db") - def test_create_duplicate_trigger(self, setup_trigger_db, use_binary): - db_name, source_table_name, _ = setup_trigger_db - trigger_name = "duplicate_trigger" - create_query = f"CREATE TRIGGER {trigger_name} ON {db_name}.{source_table_name} (SELECT 1);" - try: - try: - self.query(f"DROP TRIGGER {trigger_name};") - except Exception: - pass - - self.query(create_query) - wait_for_trigger_creation(self.query, trigger_name, timeout=20) - - with pytest.raises(Exception) as e: - self.query(create_query) - assert "already exists" in str(e.value).lower() - finally: - try: - self.query(f"DROP TRIGGER {trigger_name};") - except Exception: - pass - - def test_create_trigger_on_non_existent_table(self, use_binary, request): - trigger_name = "bad_trigger_local" - create_query = f"CREATE TRIGGER {trigger_name} ON non_existent_db.non_existent_table (SELECT 1);" - try: - with pytest.raises(Exception) as e: - self.query(create_query) - error_str = str(e.value).lower() - assert "no integration with name" in error_str or "unknown database" in error_str - finally: - try: - self.query(f"DROP TRIGGER {trigger_name};") - except Exception: - pass - - def test_drop_non_existent_trigger(self, use_binary): - trigger_name = "non_existent_trigger" - try: - self.query(f"DROP TRIGGER {trigger_name};") - except Exception: - pass - - with pytest.raises(Exception) as e: - self.query(f"DROP TRIGGER {trigger_name};") - error_str = str(e.value).lower() - assert "doesn't exist" in error_str or "unknown trigger" in error_str - - -@pytest.mark.parametrize("use_binary", [False, True], indirect=True) -class TestMySQLQueryComposability(BaseStuff): - """Test suite for advanced query composability (CTEs, Subqueries, UNIONs).""" - - db_name = "test_composability_db" - - @pytest.fixture(scope="class") - def composability_db(self): - """Class-scoped fixture to create the postgres DB.""" - print(f"\n--> [Fixture composability_db] Setting up database: {self.db_name}") - db_details = { - "type": "postgres", - "connection_data": { - "host": "samples.mindsdb.com", - "port": "5432", - "user": "demo_user", - "password": "demo_password", - "database": "demo", - "schema": "demo", - }, - } - try: - self.create_database(self.db_name, db_details) - self.validate_database_creation(self.db_name) - yield self.db_name - finally: - print(f"\n--> [Fixture composability_db] Tearing down database: {self.db_name}") - self.query(f"DROP DATABASE IF EXISTS {self.db_name};") - - @pytest.fixture - def use_binary(self, request): - self.use_binary = request.param - - @pytest.mark.usefixtures("composability_db") - def test_common_table_expression_with(self, use_binary): - """ - Tests a query using a WITH clause (CTE). - tests that you can define a temporary result set (cte) and then query that result set. - """ - query = f""" - WITH cte AS ( - SELECT * FROM {self.db_name}.home_rentals WHERE number_of_rooms = 2 - ) - SELECT * FROM cte LIMIT 5; - """ - result = self.query(query) - assert len(result) > 0 - assert all(row["number_of_rooms"] == 2 for row in result) - - @pytest.mark.usefixtures("composability_db") - def test_union_operator(self, use_binary): - """Tests a query using the UNION set operator. - It tests that you can combine the results from two separate queries into one list. - """ - query = f""" - (SELECT sqft, location, number_of_rooms FROM {self.db_name}.home_rentals WHERE number_of_rooms = 1 LIMIT 5) - UNION - (SELECT sqft, location, number_of_rooms FROM {self.db_name}.home_rentals WHERE number_of_rooms = 2 LIMIT 5); - """ - result = self.query(query) - assert len(result) > 0 - assert all(row["number_of_rooms"] in (1, 2) for row in result) - - @pytest.mark.usefixtures("composability_db") - def test_subquery_with_join_and_cte(self, use_binary): - """ - Tests a subquery rewrite for the unsupported 'WHERE IN (SELECT...)' syntax. - This tests CTE, UNION, and JOIN composability. - """ - query = f""" - WITH allowed_rooms AS ( - SELECT 1 as room_num - UNION - SELECT 3 as room_num - ) - SELECT t1.* - FROM {self.db_name}.home_rentals AS t1 - JOIN allowed_rooms AS t2 ON t1.number_of_rooms = t2.room_num - LIMIT 10; - """ - result = self.query(query) - assert len(result) > 0 - assert all(row["number_of_rooms"] in (1, 3) for row in result) - - @pytest.mark.usefixtures("composability_db") - def test_from_subquery(self, use_binary): - """Tests a subquery in the FROM clause. - It tests that you can run a query inside the FROM clause and use its results - as the source table for an outer query. - """ - query = f""" - SELECT * FROM ( - SELECT * FROM {self.db_name}.home_rentals WHERE number_of_rooms = 2 - ) as sub_table - LIMIT 5; - """ - result = self.query(query) - assert len(result) > 0 - assert all(row["number_of_rooms"] == 2 for row in result) diff --git a/tests/integration/flows/test_ts_predictions.py b/tests/integration/flows/test_ts_predictions.py deleted file mode 100644 index 49eaecc4af8..00000000000 --- a/tests/integration/flows/test_ts_predictions.py +++ /dev/null @@ -1,145 +0,0 @@ -import datetime -import random - -import pytest - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from tests.integration.utils.http_test_helpers import HTTPHelperMixin -from tests.integration.conftest import get_test_resource_name - - -def to_dicts(data): - data = [ - {"date": datetime.datetime.strptime(x[0].split(" ")[0], "%Y-%m-%d").date(), "group": x[1], "value": x[2]} - for x in data - ] - data.sort(key=lambda x: x["date"]) - return data - - -@pytest.mark.skip(reason="Disabled after deleting lightwood. No suitable handler available and BYOM usage restricted.") -class TestHTTP(HTTPHelperMixin): - # Unique resource names for this test session (initialized in setup_class) - POSTGRES_DB_NAME = None - VIEW_NAME = None - MODEL_NAME = None - - @classmethod - def setup_class(cls): - cls._sql_via_http_context = {} - # Initialize unique resource names for this test session - cls.POSTGRES_DB_NAME = get_test_resource_name("test_ts_demo_postgres") - cls.VIEW_NAME = get_test_resource_name("testv") - cls.MODEL_NAME = get_test_resource_name("tstest") - - def test_create_model(self, train_finetune_lock): - self.sql_via_http(f"DROP DATABASE IF EXISTS {self.POSTGRES_DB_NAME};", RESPONSE_TYPE.OK) - sql = f""" - CREATE DATABASE {self.POSTGRES_DB_NAME} - WITH ENGINE = "postgres", - PARAMETERS = {{ - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo" - }}; - """ - resp = self.sql_via_http(sql, RESPONSE_TYPE.OK) - - groups = ["a", "b"] - selects = [] - for i in range(30): - day_str = str(datetime.date.today() + datetime.timedelta(days=i)) - for group in groups: - value = random.randint(0, 10) - selects.append(f"select '{day_str}' as date, '{group}' as group, {value} as value") - selects = " union all ".join(selects) - - self.sql_via_http(f"DROP VIEW IF EXISTS {self.VIEW_NAME};", RESPONSE_TYPE.OK) - sql = f""" - create view {self.VIEW_NAME} as ( - select * from {self.POSTGRES_DB_NAME} ({selects}) - ) - """ - self.sql_via_http(sql, RESPONSE_TYPE.OK) - - self.sql_via_http(f"DROP MODEL IF EXISTS mindsdb.{self.MODEL_NAME};", RESPONSE_TYPE.OK) - with train_finetune_lock.acquire(timeout=600): - sql = f""" - CREATE MODEL - mindsdb.{self.MODEL_NAME} - FROM mindsdb (select * from {self.VIEW_NAME}) - PREDICT value - ORDER BY date - GROUP BY group - WINDOW 5 - HORIZON 3; - """ - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - - assert len(resp["data"]) == 1 - status = resp["column_names"].index("STATUS") - assert resp["data"][0][status] == "generating" - - self.await_model(self.MODEL_NAME) - - def test_gt_latest_date(self): - sql = f""" - select p.date, p.group, p.value - from mindsdb.{self.VIEW_NAME} as t join mindsdb.{self.MODEL_NAME} as p - where t.date > LATEST - """ - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - data = to_dicts(resp["data"]) - assert len(data) == 6 - assert len([x for x in data if x["group"] == "a"]) == 3 - assert data[0]["date"] == (datetime.date.today() + datetime.timedelta(days=30)) - - def test_gt_latest_date_empty_join(self): - sql = f""" - select p.date, p.group, p.value - from mindsdb.{self.VIEW_NAME} as t join mindsdb.{self.MODEL_NAME} as p - where t.date > LATEST and t.group = 'wrong' - """ - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - data = to_dicts(resp["data"]) - assert len(data) == 0 - - def test_eq_latest_date(self): - sql = f""" - select p.date, p.group, p.value - from mindsdb.{self.VIEW_NAME} as t join mindsdb.{self.MODEL_NAME} as p - where t.date = LATEST - """ - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - data = to_dicts(resp["data"]) - assert len(data) == 2 - assert len([x for x in data if x["group"] == "a"]) == 1 - assert data[0]["date"] == (datetime.date.today() + datetime.timedelta(days=29)) - - def test_gt_particular_date(self): - since = datetime.date.today() + datetime.timedelta(days=15) - sql = f""" - select p.date, p.group, p.value - from mindsdb.{self.VIEW_NAME} as t join mindsdb.{self.MODEL_NAME} as p - where t.date > '{since}' - """ - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - data = to_dicts(resp["data"]) - assert len(data) == 34 # 14 * 2 + 6 (4 days, 2 groups, 2*3 horizon) - assert len([x for x in data if x["group"] == "a"]) == 17 # 14 + 3 - assert data[0]["date"] == (datetime.date.today() + datetime.timedelta(days=16)) - - def test_eq_particular_date(self): - since = datetime.date.today() + datetime.timedelta(days=15) - sql = f""" - select p.date, p.group, p.value - from mindsdb.{self.VIEW_NAME} as t join mindsdb.{self.MODEL_NAME} as p - where t.date = '{since}' - """ - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - data = to_dicts(resp["data"]) - assert len(data) == 6 # 2 groups * 3 horizon - assert len([x for x in data if x["group"] == "a"]) == 3 - assert data[0]["date"] == (datetime.date.today() + datetime.timedelta(days=16)) diff --git a/tests/integration/handlers/.env.example b/tests/integration/handlers/.env.example deleted file mode 100644 index 341547bc7c0..00000000000 --- a/tests/integration/handlers/.env.example +++ /dev/null @@ -1,42 +0,0 @@ -# ------------------------------------- -# MindsDB Connection Details -# ------------------------------------- -# Local development connection (default) -MINDSDB_PROTOCOL=http -MINDSDB_HOST=127.0.0.1 -MINDSDB_PORT=47334 -MINDSDB_USER= -MINDSDB_PASSWORD= - -# Example for a remote, authenticated connection -# MINDSDB_PROTOCOL=https -# MINDSDB_HOST=cloud.mindsdb.com -# MINDSDB_PORT=443 -# MINDSDB_USER=your_email@example.com -# MINDSDB_PASSWORD=your_mindsdb_password - -# ------------------------------------- -# Data Source Credentials -# ------------------------------------- - -# PostgreSQL -PG_SOURCE_HOST=samples.mindsdb.com -PG_SOURCE_PORT=5432 -PG_SOURCE_DATABASE=demo -PG_SOURCE_USER=demo_user -PG_SOURCE_PASSWORD=demo_password -PG_SOURCE_SCHEMA=sample_data - -# Databricks -DATABRICKS_HOST= -DATABRICKS_WAREHOUSE_ID= -DATABRICKS_TOKEN= -DATABRICKS_CATALOG=samples -DATABRICKS_SCHEMA=tpch - -# Snowflake -SNOWFLAKE_HOST= -SNOWFLAKE_ACCOUNT= -SNOWFLAKE_USER= -SNOWFLAKE_PASSWORD= -SNOWFLAKE_DATABASE=SNOWFLAKE_ \ No newline at end of file diff --git a/tests/integration/handlers/README.md b/tests/integration/handlers/README.md deleted file mode 100644 index d8cd2dc41b4..00000000000 --- a/tests/integration/handlers/README.md +++ /dev/null @@ -1,125 +0,0 @@ -# MindsDB Data Source Integration (DSI) Testing Framework - -This is a configuration-driven, automated testing framework for MindsDB data source integrations using pytest. - -## Core Features - -* **Dynamic Test Generation**: Automatically discovers tables for any connected data source and generates a standard set of health-check tests using pytest's parametrization. -* **Universal Handler Support**: Uses the MindsDB-native `SHOW TABLES` command to ensure autodiscovery works for both SQL databases and API-based handlers (like GitHub). -* **Configuration-Driven**: Easily add new data sources by providing credentials in a .env file without changing any test code. -* **Extensible Custom Tests**: Add complex, custom queries for any data source via simple JSON configuration files. -* **Comprehensive Logging**: utomatically generates a detailed JSON log of every query executed, including duration, response, and errors. - -## Setup - -1. **Create and activate a virtual environment:** - ```bash - uv venv - source .venv/bin/activate # On Windows, use `.venv\Scripts\activate` - ``` - -2. **Install dependencies:** - ```bash - uv pip install -r requirements/requirements-test.txt - ``` - -3. **Configure Environment:** - * Copy the `.env.example` to a new file named `.env`. - * Fill in the .env file with your MindsDB server details and the credentials for any data sources you wish to test. - * **(Optional)**: Fill in the PG_LOG_* variables if you wish to use the script to ingest results into a PostgreSQL database. - -## Running Tests - -To run the full test suite, execute the following command from the project's root directory: - -```bash -# using make -make datasource_integration_tests -``` - -## Architectural Overview -The framework operates on a dynamic test collection model powered by pytest.parametrize. This is a clean and modern approach that avoids generating temporary test files. - -### High-Level Workflow: -1. Configuration: A developer provides credentials and lists the target handlers in the .env file. - -2. Test Collection: When pytest starts, the generate_test_cases_for_parametrization() function in test_data_sources.py runs. It connects to MindsDB, discovers tables for the configured handlers, and builds a list of test case definitions in memory. - -3. Test Execution: pytest uses this in-memory list to dynamically generate and run a test for each definition. - -4. Automatic Logging: A patched version of the MindsDB SDK automatically logs every query's duration, response, and errors to reports/all_handlers_query_log.json. - -## Core Components -- tests/integration/handlers/conftest.py: The main entry point for the DSI framework. It contains all the core fixtures and hooks, including mindsdb_server (which connects to the SDK and patches it for automatic logging) and session_databases (which creates and tears down databases). - -- tests/integration/handlers/test_data_sources.py: The engine of the framework. It contains the logic for discovering handlers, generating test case definitions, and the main test_handler_integrations function that executes the tests. - -- tests/integration/handlers/utils/: Contains helpers for configuration (config.py) and building database connection parameters (helpers.py). - -## Extending the Framework - -### Adding a New Data Handler -This is a configuration-only change. - -1. Add Credentials to .env: Add the necessary environment variables for the new handler (e.g., CLICKHOUSE_HOST). - -2. Update config.py: In tests/integration/handlers/utils/config.py, add a new HANDLERNAME_CREDS dictionary to load these environment variables. - -3. Update HANDLERS_TO_TEST: Add the name of the new handler to the HANDLERS_TO_TEST variable in your .env file. - -The framework will now automatically include the new handler in its test discovery process. - -### Adding Custom Tests -To test specific queries, you can add a custom test configuration. - -1. Create a JSON File: In tests/integration/handlers/configs/, create a new JSON file named after the handler (e.g., postgres.json). - -2. Define Your Queries: Define custom queries, expected columns, and negative tests. - - -``` -{ - "queries": { - "select_with_join": { - "query": "SELECT a.col1, b.col2 FROM {db_name}.table_a a JOIN {db_name}.table_b b ON a.id = b.id", - "expected_columns": ["col1", "col2"], - "min_rows": 1 - } - }, - "negative_tests": [ - { - "query": "SELECT * FROM {db_name}.non_existent_table;", - "expected_error": "does not exist" - } - ] - ``` - -## Reporting and Data Ingestion -The framework automatically creates a detailed query log at reports/all_handlers_query_log.json. - -Additionally, you can ingest these results into a PostgreSQL database for analysis using the provided script: - -``` -python tests/scripts/ingest_report.py -``` - -### Our Priority List for New Data Sources -Here is the prioritized list of the next data sources we plan to integrate into the framework. The checkmarks indicate the data sources that are already tested. - -- Postgres: ✅ - -- GitHub: ✅ - --Databricks: ✅ - -- Parquet in S3 ✅ - -- Jira ✅ ( there is currently a bug in the jira handler) - -- SQL Server ✅ - -- MariaDB ✅ - -- MySQL ✅ - -- Redshift ❌ \ No newline at end of file diff --git a/tests/integration/handlers/__init__.py b/tests/integration/handlers/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/handlers/configs/example.jira.json b/tests/integration/handlers/configs/example.jira.json deleted file mode 100644 index afd8ed2d493..00000000000 --- a/tests/integration/handlers/configs/example.jira.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "Jira Read-Only Test", - "test_type": "read_only", - "queries": { - "select_projects": { - "query": "SELECT * FROM {db_name}.projects LIMIT 5;", - "expected_columns": ["id", "name", "key"], - "min_rows": 1 - } - }, - "negative_tests": [ - { - "query": "SELECT * FROM {db_name}.non_existent_table;", - "expected_error": "Table not found: non_existent_table" - } - ] -} \ No newline at end of file diff --git a/tests/integration/handlers/configs/example.mariadb.json b/tests/integration/handlers/configs/example.mariadb.json deleted file mode 100644 index 7dc5b8b40c0..00000000000 --- a/tests/integration/handlers/configs/example.mariadb.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "name": "Mariadb Read-Only Test", - "test_type": "read_only", - "queries": { - "select_users": { - "query": "SELECT * FROM {db_name}.users;", - "expected_columns": ["id", "username", "email"], - "min_rows": 1 - }, - "select_by_id": { - "query": "SELECT * FROM {db_name}.users where id = 2;", - "expected_columns": ["id", "username", "email"], - "exact_rows": 1 - } - }, - "negative_tests": [ - { - "query": "SELECT * FROM {db_name}.non_existent_table;", - "expected_error": "Table 'test_db.non_existent_table' doesn't exist" - } - ] -} \ No newline at end of file diff --git a/tests/integration/handlers/configs/github.json b/tests/integration/handlers/configs/github.json deleted file mode 100644 index 17bed010f0c..00000000000 --- a/tests/integration/handlers/configs/github.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "name": "GitHub Read-Only Test", - "test_type": "read_only", - "queries": { - "select_issues": { - "query": "SELECT * FROM {db_name}.issues WHERE state = 'closed' LIMIT 5;", - "expected_columns": ["number", "title", "state", "created", "closed"], - "exact_rows": 5 - }, - "select_commits": { - "query": "SELECT * FROM {db_name}.commits LIMIT 10;", - "expected_columns": ["sha", "commit"], - "min_rows": 5 - } - }, - "negative_tests": [ - { - "query": "SELECT * FROM {db_name}.non_existent_table;", - "expected_error": "Table not found: non_existent_table" - } - ] -} diff --git a/tests/integration/handlers/configs/postgres.json b/tests/integration/handlers/configs/postgres.json deleted file mode 100644 index 70e2a1f04bc..00000000000 --- a/tests/integration/handlers/configs/postgres.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "Postgres Read-Only Test", - "test_type": "read_only", - "queries": { - "select_product_join": { - "query": "select sp.product_name, sp.price, sp.number_sold, si.lead_times, si.stock_levels from {db_name}.supplychain_products sp JOIN {db_name}.supplychain_inventory si ON sp.sku = si.sku", - "expected_columns": ["product_name", "price", "number_sold", "lead_times", "stock_levels"], - "min_rows": 1 - } - }, - "negative_tests": [ - { - "query": "SELECT * FROM {db_name}.non_existent_table;", - "expected_error": "relation \"non_existent_table\" does not exist\n" - } - ] -} \ No newline at end of file diff --git a/tests/integration/handlers/conftest.py b/tests/integration/handlers/conftest.py deleted file mode 100644 index 3f0067f020b..00000000000 --- a/tests/integration/handlers/conftest.py +++ /dev/null @@ -1,153 +0,0 @@ -import pytest -import json -import logging -import time -import mindsdb_sdk -from pathlib import Path -from dotenv import load_dotenv -from typing import Generator, Any -from functools import wraps -from tests.integration.handlers.utils import config -from tests.integration.handlers.utils.helpers import get_handlers_info, build_parameters_clause - - -project_root = Path(__file__).parent.parent.parent.parent - - -def pytest_addoption(parser): - """ - Adds command-line options specific to DSI tests. - """ - parser.addoption("--run-dsi-tests", action="store_true", default=False, help="run DSI integration tests") - - -def pytest_configure(config): - """ - Registers the custom 'dsi' mark and loads the .env file. - """ - config.addinivalue_line("markers", "dsi: mark test as part of the DSI framework") - if config.getoption("--run-dsi-tests"): - logging.info("--- DSI: Loading environment variables ---") - if not load_dotenv(override=True): - logging.warning("DSI: Could not find .env file. Using system variables.") - else: - logging.info("DSI: Successfully loaded environment variables from .env file.") - - -@pytest.fixture(scope="session") -def query_log_data(): - """A session-scoped dictionary to store all query logs.""" - return {} - - -def pytest_collection_modifyitems(config, items): - """ - Modifies DSI test items after collection to skip them if the flag is not provided. - """ - if not config.getoption("--run-dsi-tests"): - skip_dsi = pytest.mark.skip(reason="need --run-dsi-tests option to run") - for item in items: - if "dsi" in item.keywords: - item.add_marker(skip_dsi) - - -@pytest.fixture(scope="session") -def mindsdb_server(query_log_data) -> Generator[Any, None, None]: - """ - Establishes a connection to the MindsDB SDK and patches query objects - to automatically log when .fetch() is called. - """ - logging.info("--- DSI: Attempting to connect to SDK ---") - url = f"{config.MINDSDB_PROTOCOL}://{config.MINDSDB_HOST}:{config.MINDSDB_PORT}" - - try: - if config.MINDSDB_USER and config.MINDSDB_PASSWORD: - server = mindsdb_sdk.connect(url=url, login=config.MINDSDB_USER, password=config.MINDSDB_PASSWORD) - else: - server = mindsdb_sdk.connect(url) - logging.info("DSI: Successfully connected to MindsDB via SDK.") - except ConnectionError as e: - pytest.fail(f"DSI: Failed to connect to MindsDB. Error: {e}", pytrace=False) - - original_query_method = server.query - - def patched_query_constructor(sql_query: str): - query_object = original_query_method(sql_query) - original_fetch = query_object.fetch - - @wraps(original_fetch) - def logged_fetch(*args, **kwargs): - handler_name = "dsi_test" - start_time = time.time() - actual_response = None - error = None - try: - response_df = original_fetch(*args, **kwargs) - if response_df is not None: - actual_response = response_df.to_json(orient="records") if not response_df.empty else "[]" - return response_df - except RuntimeError as e: - error = str(e) - raise - finally: - duration = time.time() - start_time - if handler_name not in query_log_data: - query_log_data[handler_name] = [] - query_log_data[handler_name].append( - { - "query": sql_query, - "duration": round(duration, 4), - "actual_response": actual_response, - "error": error, - } - ) - - query_object.fetch = logged_fetch - return query_object - - server.query = patched_query_constructor - yield server - - # Teardown: write the log file. - reports_dir = project_root / "reports" - reports_dir.mkdir(exist_ok=True) - log_filepath = reports_dir / "all_handlers_query_log.json" - with open(log_filepath, "w") as f: - json.dump(query_log_data, f, indent=4) - logging.info(f"DSI: Full query log saved to {log_filepath}") - - -@pytest.fixture(scope="session") -def session_databases(mindsdb_server): - """Creates and tears down databases for each handler for the test session.""" - created_dbs = {} - all_handlers, _ = get_handlers_info(mindsdb_server) - - for handler_info in all_handlers: - handler_name = handler_info["name"] - db_name = f"test_session_{handler_name}" - params_clause, skip_reason = build_parameters_clause(handler_name, handler_info["connection_args"]) - - if skip_reason: - logging.warning(f"DSI: Skipping database creation for {handler_name}: {skip_reason}") - continue - - try: - mindsdb_server.query(f"DROP DATABASE IF EXISTS {db_name};").fetch() - create_query = f"CREATE DATABASE {db_name} WITH ENGINE = '{handler_name}', PARAMETERS = {params_clause};" - mindsdb_server.query(create_query).fetch() - created_dbs[handler_name] = db_name - logging.info(f"DSI: Successfully created database '{db_name}' for handler '{handler_name}'.") - except RuntimeError as e: - logging.exception(f"DSI: Failed to create database for {handler_name}: {e}") - raise - - yield created_dbs - - logging.info("--- DSI: Tearing down session databases ---") - for db_name in created_dbs.values(): - try: - mindsdb_server.query(f"DROP DATABASE IF EXISTS {db_name};").fetch() - logging.info(f"DSI: Successfully dropped database {db_name}.") - except RuntimeError as e: - logging.exception(f"DSI: Failed to drop database {db_name}: {e}") diff --git a/tests/integration/handlers/test_data_sources.py b/tests/integration/handlers/test_data_sources.py deleted file mode 100644 index effa138e0c2..00000000000 --- a/tests/integration/handlers/test_data_sources.py +++ /dev/null @@ -1,119 +0,0 @@ -import pytest -import logging -import json -import os -from pathlib import Path -from typing import Dict, Any, List - -# --- Test Case Generation (I/O-Free, Filtered, and Deterministic) --- - - -def get_handlers_to_test() -> List[str]: - """ - Determines which handlers to test. - 1. If the HANDLERS_TO_TEST environment variable is set, it uses that list. - 2. Otherwise, it discovers handlers by looking for non-example config files. - """ - env_handlers = os.environ.get("HANDLERS_TO_TEST") - if env_handlers: - return [h.strip() for h in env_handlers.split(",")] - - # Fallback to discovering from config files, excluding examples - configs_path = Path(__file__).parent / "configs" - return sorted([p.stem for p in configs_path.glob("*.json") if not p.name.startswith("example.")]) - - -def generate_test_cases_for_parametrization() -> List[Dict[str, Any]]: - """ - Generates a deterministic list of test cases for the specified handlers. - This function is I/O-free and safe for pytest-xdist collection. - """ - test_cases = [] - configs_path = Path(__file__).parent / "configs" - handlers_to_run = get_handlers_to_test() - - for name in handlers_to_run: - config_path = configs_path / f"{name}.json" - - # Add a placeholder for autodiscovery for every handler - test_cases.append({"handler_name": name, "test_type": "autodiscovery"}) - - # Generate specific tests from the handler's config file - if config_path.is_file(): - with open(config_path, "r") as f: - test_config = json.load(f) - - if "queries" in test_config: - for query_name in sorted(test_config["queries"].keys()): - test_cases.append({"handler_name": name, "test_type": "custom", "query_name": query_name}) - - if "negative_tests" in test_config: - for i in range(len(test_config["negative_tests"])): - test_cases.append({"handler_name": name, "test_type": "negative", "test_index": i}) - - return test_cases - - -# --- Main Parametrized Test Function --- - - -def idfn(test_case): - """Generates a unique and descriptive ID for each test case.""" - name = test_case["handler_name"] - ttype = test_case["test_type"] - if ttype == "custom": - return f"{name}-{ttype}-{test_case['query_name']}" - if ttype == "negative": - return f"{name}-{ttype}-negative_{test_case['test_index']}" - return f"{name}-{ttype}" - - -@pytest.mark.dsi -@pytest.mark.parametrize("test_case", generate_test_cases_for_parametrization(), ids=idfn) -def test_handler_integrations(mindsdb_server, session_databases, test_case): - """ - This test function is parametrized with individual test cases, restoring the - original test reporting structure in a way that is safe for parallel execution. - """ - handler_name = test_case["handler_name"] - test_type = test_case["test_type"] - - if handler_name not in session_databases: - pytest.skip(f"Database for handler '{handler_name}' was not set up successfully.") - - db_name = session_databases[handler_name] - config_path = Path(__file__).parent / "configs" / f"{handler_name}.json" - - # --- Test Execution Logic --- - - if test_type == "autodiscovery": - tables_df = mindsdb_server.query(f"SHOW TABLES FROM {db_name};").fetch() - assert not tables_df.empty, "Autodiscovery failed: SHOW TABLES returned no results." - - elif test_type == "custom": - with open(config_path, "r") as f: - test_config = json.load(f) - query_name = test_case["query_name"] - query_details = test_config["queries"][query_name] - query = query_details["query"].format(db_name=db_name) - - logging.info(f"Running custom query '{query_name}': {query}") - select_df = mindsdb_server.query(query).fetch() - assert not select_df.empty, f"Custom query '{query_name}' returned no results." - - elif test_type == "negative": - with open(config_path, "r") as f: - test_config = json.load(f) - test_index = test_case["test_index"] - neg_test = test_config["negative_tests"][test_index] - query = neg_test["query"].format(db_name=db_name) - - logging.info(f"Running negative test #{test_index}: {query}") - with pytest.raises(RuntimeError) as excinfo: - mindsdb_server.query(query).fetch() - - error_str = str(excinfo.value) - expected_error = neg_test["expected_error"] - assert expected_error in error_str, ( - f'Negative test failed. Expected error substring "{expected_error}" not found in actual error: {error_str}' - ) diff --git a/tests/integration/handlers/utils/config.py b/tests/integration/handlers/utils/config.py deleted file mode 100644 index 905ada36864..00000000000 --- a/tests/integration/handlers/utils/config.py +++ /dev/null @@ -1,97 +0,0 @@ -import os -import logging -from dotenv import load_dotenv -from typing import Dict, Optional, Any - -# When tests are run from the project root, python-dotenv finds the .env file automatically. -if load_dotenv(override=True): - logging.info("DSI: Successfully loaded and overrode env variables from .env file.") -else: - logging.warning("DSI: Could not find .env file. Using system variables or default credentials.") - -# --- MindsDB Connection Details --- -MINDSDB_PROTOCOL: str = os.getenv("MINDSDB_PROTOCOL", "http") -MINDSDB_HOST: str = os.getenv("MINDSDB_HOST", "127.0.0.1") -MINDSDB_PORT: str = os.getenv("MINDSDB_PORT", "47334") -MINDSDB_USER: Optional[str] = os.getenv("MINDSDB_USER") -MINDSDB_PASSWORD: Optional[str] = os.getenv("MINDSDB_PASSWORD") - -# --- Test Execution Configuration --- -# Default to handlers that have public credentials for a better out-of-the-box experience. -HANDLERS_TO_TEST: str = os.getenv("HANDLERS_TO_TEST", "postgres,mariadb,mysql,mssql") - -# --- Data Source Credentials (Convention: HANDLERNAME_CREDS) --- -# Handlers with public, default credentials -POSTGRES_CREDS: Dict[str, Any] = { - "host": os.getenv("PG_SOURCE_HOST", "samples.mindsdb.com"), - "port": int(os.getenv("PG_SOURCE_PORT", 5432)), - "database": os.getenv("PG_SOURCE_DATABASE", "demo"), - "user": os.getenv("PG_SOURCE_USER", "demo_user"), - "password": os.getenv("PG_SOURCE_PASSWORD", "demo_password"), - "schema": os.getenv("PG_SOURCE_SCHEMA", "sample_data"), -} - -MARIADB_CREDS: Dict[str, Any] = { - "host": os.getenv("MARIADB_HOST", "samples.mindsdb.com"), - "port": int(os.getenv("MARIADB_PORT", 3307)), - "database": os.getenv("MARIADB_DATABASE", "test_data"), - "user": os.getenv("MARIADB_USER", "demo_user"), - "password": os.getenv("MARIADB_PASSWORD", "demo_password"), -} - -MYSQL_CREDS: Dict[str, Any] = { - "host": os.getenv("MYSQL_HOST", "samples.mindsdb.com"), - "port": int(os.getenv("MYSQL_PORT", 3306)), - "database": os.getenv("MYSQL_DATABASE", "public"), - "user": os.getenv("MYSQL_USER", "user"), - "password": os.getenv("MYSQL_PASSWORD", "MindsDBUser123!"), -} - -MSSQL_CREDS: Dict[str, Any] = { - "host": os.getenv("SQLSERVER_HOST", "samples.mindsdb.com"), - "port": int(os.getenv("SQLSERVER_PORT", 1433)), - "database": os.getenv("SQLSERVER_DATABASE", "demo"), - "user": os.getenv("SQLSERVER_USER", "demo_user"), - "password": os.getenv("SQLSERVER_SA_PASSWORD", "D3mo_Passw0rd"), -} - -# --- Handlers that require .env configuration --- -DATABRICKS_CREDS: Dict[str, Any] = { - "server_hostname": os.getenv("DATABRICKS_HOST"), - "http_path": f"/sql/1.0/warehouses/{os.getenv('DATABRICKS_WAREHOUSE_ID')}", - "access_token": os.getenv("DATABRICKS_TOKEN"), - "catalog": os.getenv("DATABRICKS_CATALOG", "workspace"), - "schema": os.getenv("DATABRICKS_SCHEMA"), -} - -SNOWFLAKE_CREDS: Dict[str, Any] = { - "host": os.getenv("SNOWFLAKE_HOST"), - "user": os.getenv("SNOWFLAKE_USER"), - "password": os.getenv("SNOWFLAKE_PASSWORD"), - "database": os.getenv("SNOWFLAKE_DATABASE"), - "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"), -} - -GITHUB_CREDS: Dict[str, Any] = {"token": os.getenv("GITHUB_TOKEN"), "repository": os.getenv("GITHUB_REPOSITORY")} - -BIGQUERY_CREDS: Dict[str, Any] = { - "project_id": os.getenv("BIGQUERY_PROJECT_ID"), - "dataset": os.getenv("BIGQUERY_DATASET"), - "service_account_json": os.getenv("BIGQUERY_SERVICE_ACCOUNT_JSON"), -} - -S3_CREDS: Dict[str, Any] = { - "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), - "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), - "bucket": os.getenv("S3_BUCKET_NAME"), -} - -# --- PostgreSQL Logging Database Credentials --- -PG_LOG_HOST: Optional[str] = os.getenv("PG_LOG_HOST") -PG_LOG_PORT: Optional[str] = os.getenv("PG_LOG_PORT", "5432") -PG_LOG_DATABASE: Optional[str] = os.getenv("PG_LOG_DATABASE") -PG_LOG_USER: Optional[str] = os.getenv("PG_LOG_USER") -PG_LOG_PASSWORD: Optional[str] = os.getenv("PG_LOG_PASSWORD") - -logging.info(f"DSI: Configuration loaded for MindsDB host: {MINDSDB_HOST}") -logging.info(f"DSI: E2E tests will run for: {HANDLERS_TO_TEST}") diff --git a/tests/integration/handlers/utils/helpers.py b/tests/integration/handlers/utils/helpers.py deleted file mode 100644 index e4fa042fa48..00000000000 --- a/tests/integration/handlers/utils/helpers.py +++ /dev/null @@ -1,84 +0,0 @@ -import logging -import json -from typing import Dict, Any, List, Tuple - -from tests.integration.handlers.utils import config - - -def get_handlers_info(mindsdb_server: Any) -> Tuple[List[Dict[str, Any]], List[str]]: - """ - Discovers connection arguments for specified handlers and identifies which are not installed. - """ - # Let any RuntimeError from the SDK propagate naturally for a better traceback. - installed_handlers_df = mindsdb_server.query( - "SELECT NAME, IMPORT_SUCCESS FROM information_schema.handlers WHERE type = 'data'" - ).fetch() - if installed_handlers_df.empty: - logging.warning("DSI: Did not discover any installed data handlers on the MindsDB server.") - installed_handlers = set() - else: - installed_handlers = set(installed_handlers_df[installed_handlers_df["IMPORT_SUCCESS"]]["NAME"].str.lower()) - - target_handlers_str = config.HANDLERS_TO_TEST - target_handlers_list = [h.strip().lower() for h in target_handlers_str.split(",") if h.strip()] - - uninstalled_handlers = [h for h in target_handlers_list if h not in installed_handlers] - handlers_to_test = [h for h in target_handlers_list if h in installed_handlers] - - if not handlers_to_test: - return [], uninstalled_handlers - - in_clause = " AND LOWER(NAME) IN (" + ", ".join(f"'{h}'" for h in handlers_to_test) + ")" - query = "SELECT NAME, CONNECTION_ARGS FROM information_schema.handlers WHERE type = 'data'" + in_clause - - result_df = mindsdb_server.query(query).fetch() - - handlers = [] - if not result_df.empty: - for _, row in result_df.iterrows(): - handlers.append( - { - "name": row["NAME"], - "connection_args": json.loads(row["CONNECTION_ARGS"]) if row["CONNECTION_ARGS"] else {}, - } - ) - return handlers, uninstalled_handlers - - -def build_parameters_clause(handler_name: str, connection_args: Dict[str, Any]) -> Tuple[str, str]: - """ - Builds the PARAMETERS clause for a CREATE DATABASE query using credentials from config. - """ - creds_variable_name = f"{handler_name.upper()}_CREDS" - creds = getattr(config, creds_variable_name, None) - - if creds is None: - return None, f"No credential variable named '{creds_variable_name}' found in config.py" - - params_dict = {} - missing_creds = [] - - all_possible_keys = set(connection_args.keys()) | set(creds.keys()) - - for key in all_possible_keys: - details = connection_args.get(key, {}) - is_required = details.get("required", False) - value = creds.get(key) - - if is_required and value is None: - missing_creds.append(key.upper()) - continue - - if value is not None: - if handler_name.lower() == "bigquery" and key == "service_account_json": - try: - params_dict[key] = json.loads(value) - except (json.JSONDecodeError, TypeError): - return None, "The BIGQUERY_SERVICE_ACCOUNT_JSON is not a valid JSON string." - else: - params_dict[key] = value - - if missing_creds: - return None, f"Missing required .env variables for {handler_name}: {', '.join(missing_creds)}" - - return json.dumps(params_dict), None diff --git a/tests/integration/rag/__init__.py b/tests/integration/rag/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/tutorials/__init__.py b/tests/integration/tutorials/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/tutorials/test_predict_text_sentiment_openai_tutorial.py b/tests/integration/tutorials/test_predict_text_sentiment_openai_tutorial.py deleted file mode 100644 index cc38bbc74d9..00000000000 --- a/tests/integration/tutorials/test_predict_text_sentiment_openai_tutorial.py +++ /dev/null @@ -1,107 +0,0 @@ -import os - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from tests.integration.utils.http_test_helpers import HTTPHelperMixin - - -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") - - -class QueryStorage: - create_db = """ -CREATE DATABASE example_sentiment_openai_db -WITH ENGINE = "postgres", -PARAMETERS = { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo", - "schema": "demo_data" - }; -""" - check_db_created = """ -SELECT * -FROM example_sentiment_openai_db.amazon_reviews LIMIT 3; -""" - delete_db = """ -DROP DATABASE IF EXISTS example_sentiment_openai_db; -""" - create_engine = """ -CREATE ML_ENGINE openai2 -FROM openai USING openai_api_key='%s'; -""" - delete_engine = """ -DROP ML_ENGINE IF EXISTS openai2; -""" - create_model = """ -CREATE MODEL sentiment_classifier_gpt3 -PREDICT sentiment -USING -engine = 'openai2', -prompt_template = 'describe the sentiment of the reviews -strictly as "positive", "neutral", or "negative". -"I love the product":positive -"It is a scam":negative -"{{review}}.":', -openai_api_key = '%s'; -""" - check_status = """ -SELECT * FROM models -WHERE name = 'sentiment_classifier_gpt3'; -""" - delete_model = """ -DROP MODEL IF EXISTS - mindsdb.sentiment_classifier_gpt3; -""" - prediction = """ -SELECT review, sentiment -FROM sentiment_classifier_gpt3 -WHERE review = 'It is ok.'; -""" - bulk_prediction = """ -SELECT input.review, output.sentiment -FROM example_sentiment_openai_db.amazon_reviews AS input -JOIN sentiment_classifier_gpt3 AS output -LIMIT 5; -""" - - -class TestPredictTextSentimentOpenAI(HTTPHelperMixin): - def setup_class(self): - self.sql_via_http(self, QueryStorage.delete_db, RESPONSE_TYPE.OK) - self.sql_via_http(self, QueryStorage.delete_model, RESPONSE_TYPE.OK) - self.sql_via_http(self, QueryStorage.delete_engine, RESPONSE_TYPE.OK) - - def test_create_db(self): - sql = QueryStorage.create_db - self.sql_via_http(sql, RESPONSE_TYPE.OK) - - def test_db_created(self): - sql = QueryStorage.check_db_created - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - assert len(resp["data"]) >= 3 - - def test_create_engine(self): - sql = QueryStorage.create_engine % OPENAI_API_KEY - self.sql_via_http(sql, RESPONSE_TYPE.OK) - - def test_create_model(self, train_finetune_lock): - with train_finetune_lock.acquire(timeout=600): - sql = QueryStorage.create_model % OPENAI_API_KEY - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - assert len(resp["data"]) == 1 - status = resp["column_names"].index("STATUS") - assert resp["data"][0][status] == "generating" - status = self.await_model_by_query(QueryStorage.check_status, timeout=600) - assert status == "complete" - - def test_prediction(self): - sql = QueryStorage.prediction - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - assert len(resp["data"]) == 1 - - def test_bulk_prediciton(self): - sql = QueryStorage.bulk_prediction - resp = self.sql_via_http(sql, RESPONSE_TYPE.TABLE) - assert len(resp["data"]) == 5 diff --git a/tests/integration/utils/__init__.py b/tests/integration/utils/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/utils/http_test_helpers.py b/tests/integration/utils/http_test_helpers.py deleted file mode 100644 index 2302914cc50..00000000000 --- a/tests/integration/utils/http_test_helpers.py +++ /dev/null @@ -1,165 +0,0 @@ -import requests -import time - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID -from tests.integration.conftest import ( - HTTP_API_ROOT, - get_resource_tracker, - get_test_resource_name, - get_test_company_id, - get_test_user_id, -) - - -class HTTPHelperMixin: - _sql_via_http_context = {} - _resource_tracker = None - - @classmethod - def get_resource_tracker(cls): - """Get or create the resource tracker.""" - if cls._resource_tracker is None: - cls._resource_tracker = get_resource_tracker() - return cls._resource_tracker - - @classmethod - def get_unique_name(cls, base_name: str) -> str: - """Generate a unique resource name for this test session.""" - return get_test_resource_name(base_name) - - @classmethod - def get_unique_company_id(cls, base_id: int = 1) -> str: - """Generate a unique company ID for this test session.""" - return get_test_company_id(base_id) - - @classmethod - def get_unique_user_id(cls, base_id: int = 1) -> str: - """Generate a unique user ID for this test session.""" - return get_test_user_id(base_id) - - @staticmethod - def api_request(method, url, payload=None, headers=None): - method = method.lower() - - fnc = getattr(requests, method) - - url = f"{HTTP_API_ROOT}/{url.lstrip('/')}" - response = fnc(url, json=payload, headers=headers) - - return response - - def sql_via_http( - self, - request: str, - expected_resp_type: str = None, - context: dict = None, - headers: dict = None, - company_id: str = DEFAULT_COMPANY_ID, - user_id: str = DEFAULT_USER_ID, - ) -> dict: - if context is None: - context = self._sql_via_http_context - - if headers is None: - headers = {} - - headers["company-id"] = str(company_id) - headers["user-id"] = str(user_id) - - payload = {"query": request, "context": context} - response = self.api_request("post", "/sql/query", payload, headers) - - assert response.status_code == 200, f"sql/query is not accessible - {response.text}" - response = response.json() - if expected_resp_type is not None: - assert response.get("type") == expected_resp_type, response - else: - assert response.get("type") in [RESPONSE_TYPE.OK, RESPONSE_TYPE.TABLE, RESPONSE_TYPE.ERROR], response - assert isinstance(response.get("context"), dict) - if response["type"] == "table": - assert isinstance(response.get("data"), list) - assert isinstance(response.get("column_names"), list) - elif response["type"] == "error": - assert isinstance(response.get("error_code"), int) - assert isinstance(response.get("error_message"), str) - self._sql_via_http_context = response["context"] - return response - - def await_model(self, model_name: str, project_name: str = "mindsdb", version_number: int = 1, timeout: int = 60): - start = time.time() - status = None - while (time.time() - start) < timeout: - response = self.sql_via_http( - f""" - SELECT status - FROM {project_name}.models - WHERE name='{model_name}' and version = {version_number} - """, - RESPONSE_TYPE.TABLE, - ) - status = response["data"][0][0] - if status in ["complete", "error"]: - break - time.sleep(1) - return status - - def await_model_by_query(self, query, timeout=60): - start = time.time() - status = None - while (time.time() - start) < timeout: - resp = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status_index = [x.lower() for x in resp["column_names"]].index("status") - status = resp["data"][0][status_index] - if status in ["complete", "error"]: - break - time.sleep(1) - return status - - -def get_predictors_list(company_id: str = DEFAULT_COMPANY_ID, user_id: str = DEFAULT_USER_ID): - headers = {} - headers["company-id"] = str(company_id) - headers["user-id"] = str(user_id) - res = requests.get(f"{HTTP_API_ROOT}/predictors/", headers=headers) - assert res.status_code == 200 - return res.json() - - -def get_predictors_names_list(company_id: str = DEFAULT_COMPANY_ID, user_id: str = DEFAULT_USER_ID): - predictors = get_predictors_list(company_id=company_id, user_id=user_id) - return [x["name"] for x in predictors] - - -def check_predictor_exists(name): - assert name in get_predictors_names_list() - - -def check_predictor_not_exists(name): - assert name not in get_predictors_names_list() - - -def get_predictor_data(name): - predictors = get_predictors_list() - for p in predictors: - if p["name"] == name: - return p - return None - - -def wait_predictor_learn(predictor_name): - start_time = time.time() - learn_done = False - while learn_done is False and (time.time() - start_time) < 180: - learn_done = get_predictor_data(predictor_name)["status"] == "complete" - time.sleep(1) - assert learn_done - - -def get_integrations_names(company_id: str = DEFAULT_COMPANY_ID, user_id: str = DEFAULT_USER_ID): - headers = {} - headers["company-id"] = str(company_id) - headers["user-id"] = str(user_id) - res = requests.get(f"{HTTP_API_ROOT}/config/integrations", headers=headers) - assert res.status_code == 200 - return res.json()["integrations"] diff --git a/tests/integration/utils/query_generator.py b/tests/integration/utils/query_generator.py deleted file mode 100644 index 13a83ea6a27..00000000000 --- a/tests/integration/utils/query_generator.py +++ /dev/null @@ -1,74 +0,0 @@ -class QueryGenerator: - """ - A helper class to generate SQL queries for MindsDB Actions. - """ - - @staticmethod - def create_database_query(database_name: str, engine: str, parameters: dict) -> str: - """ - Generate a CREATE DATABASE query with the given parameters. - - :param database_name: The name of the database to create. - :param engine: The database engine to use. - :param parameters: A dictionary of parameters for the engine. - :return: The generated SQL query as a string. - """ - parameter_str = ",\n ".join([f'"{key}": "{value}"' if type(value) == str else f'"{key}": {value}' for key, value in parameters.items()]) - query = f"""CREATE DATABASE {database_name} - WITH ENGINE = '{engine}', - PARAMETERS = {{ - {parameter_str} - }};""" - return query - - @staticmethod - def create_ml_engine_query(ml_engine_name: str, engine: str, parameters: dict) -> str: - """ - Generate a CREATE ML ENGINE query with the given parameters. - - :param ml_engine_name: The name of the ML Engine to create. - :param engine: The ML Engine to use. - :param parameters: A dictionary of parameters for the engine. - :return: The generated SQL query as a string. - """ - parameters = parameters or {} - parameter_str = ",\n ".join([f"{key} = '{value}'" for key, value in parameters.items()]) - using_clause = f"\nUSING\n\t{parameter_str};" if parameters else "" - - return f"""CREATE ML_ENGINE {ml_engine_name} - FROM {engine}{using_clause}; - """ - - @staticmethod - def create_model(model_name: str, target_var: str, parameters: dict) -> str: - """ - Generate a CREATE ML ENGINE query with the given parameters. - - :param ml_engine_name: The name of the ML Engine to create. - :param engine: The ML Engine to use. - :param parameters: A dictionary of parameters for the engine. - :return: The generated SQL query as a string. - """ - parameter_str = ",\n ".join([f'"{key}": "{value}"' for key, value in parameters.items()]) - query = f"""CREATE MODEL {model_name} - PREDICT {target_var} - USING - {parameter_str} - ;""" - return query - - @staticmethod - def simple_select_query(table_name: str, columns: list = None, limit: int = 10) -> str: - """ - Generate a simple SELECT query with the given parameters. - - :param table_name: The name of the table to query. - :param columns: A list of columns to SELECT. - :param limit: The number of rows to LIMIT the query to. - :return: The generated SQL query as a string. - """ - columns = columns or ["*"] - column_str = ", ".join(columns) - limit_str = f" LIMIT {limit}" if limit else "" - query = f"""SELECT {column_str} FROM {table_name}{limit_str};""" - return query diff --git a/tests/integration/utils/test_json_encoder.py b/tests/integration/utils/test_json_encoder.py deleted file mode 100644 index 41200700585..00000000000 --- a/tests/integration/utils/test_json_encoder.py +++ /dev/null @@ -1,61 +0,0 @@ -import numpy as np -import pandas as pd -from datetime import datetime, date, timedelta -import pytest -from flask import Flask - -from mindsdb.utilities.json_encoder import ORJSONProvider - - -@pytest.fixture() -def prov(): - app = Flask(__name__) - app.json = ORJSONProvider(app) - with app.app_context(): - yield app.json - - -def test_dates_and_timedelta_serialization(prov): - payload = { - "d": date(2024, 1, 2), - "dt": datetime(2024, 1, 2, 3, 4, 5), - "td": timedelta(hours=1, minutes=2, seconds=3), - } - s = prov.dumps(payload) - assert '"2024-01-02"' in s - assert '"td":"1:02:03"' in s - - -def test_numpy_scalars_and_arrays(prov): - payload = { - "b": np.bool_(True), - "i": np.int64(42), - "f": np.float64(3.14), - "arr": np.array([1, 2, 3], dtype=np.int32), - } - s = prov.dumps(payload) - # orjson with OPT_SERIALIZE_NUMPY should serialize these - assert '"arr":[1,2,3]' in s - - -def test_pandas_na_to_none(prov): - """ - Test if it calls our CustomJSONEncoder.default - """ - payload = {"x": pd.NA} - s = prov.dumps(payload) - assert '"x":null' in s - - -def test_date_serialization_format(prov): - payload = {"d": date(2024, 7, 9)} - s = prov.dumps(payload) - assert '"d":"2024-07-09"' in s - - -def test_datetime_serialization_format(prov): - dt = datetime(2024, 7, 9, 1, 2, 3, 0) - payload = {"dt": dt} - s = prov.dumps(payload) - # Expect "%Y-%m-%d %H:%M:%S.%f" per CustomJSONEncoder - assert '"dt":"2024-07-09 01:02:03.000000"' in s diff --git a/tests/scripts/check_handler_coverage.py b/tests/scripts/check_handler_coverage.py deleted file mode 100644 index 15f25f6786b..00000000000 --- a/tests/scripts/check_handler_coverage.py +++ /dev/null @@ -1,144 +0,0 @@ -import json -import os -import sys -import subprocess -from typing import Dict, List - - -def parse_handlers_env(value: str | None) -> List[str]: - """Parse newline-separated handler names from an env var.""" - if not value: - return [] - handlers: List[str] = [] - for line in value.splitlines(): - stripped = line.strip() - if not stripped or stripped.startswith("#"): - continue - handlers.append(stripped) - return handlers - - -def run(cmd: list[str], capture: bool = False, env: dict | None = None) -> subprocess.CompletedProcess: - print("[coverage]", " ".join(cmd)) - kwargs: dict = {"text": True} - if env is not None: - kwargs["env"] = env - if capture: - kwargs["stdout"] = subprocess.PIPE - kwargs["stderr"] = subprocess.STDOUT - return subprocess.run(cmd, **kwargs) - - -def build_handler_metrics(handlers: list[str], coverage_file: str) -> Dict[str, Dict[str, float]]: - """Generate coverage metrics per handler directory from an existing coverage data file.""" - env = os.environ.copy() - env["COVERAGE_FILE"] = coverage_file - - # `coverage json` reads the .coverage data created by pytest - result = run(["coverage", "json", "-o", "coverage.json"], env=env) - if result.returncode != 0: - print("[coverage] Failed to produce coverage.json", file=sys.stderr) - sys.exit(result.returncode) - - try: - with open("coverage.json", "r", encoding="utf-8") as fh: - coverage_data = json.load(fh) - except (OSError, json.JSONDecodeError) as exc: - print(f"[coverage] Unable to read coverage.json: {exc}", file=sys.stderr) - sys.exit(1) - - files: Dict[str, dict] = coverage_data.get("files", {}) - metrics: Dict[str, Dict[str, float]] = {} - - for handler in handlers: - prefix = f"mindsdb/integrations/handlers/{handler}_handler" - statements = 0 - missing = 0 - - for path, info in files.items(): - if not (path == f"{prefix}.py" or path.startswith(f"{prefix}/")): - continue - summary = info.get("summary", {}) - statements += summary.get("num_statements", 0) - missing += summary.get("missing_lines", 0) - - metrics[handler] = { - "statements": statements, - "missing": missing, - "coverage": 0.0 if statements == 0 else (1 - missing / statements) * 100, - } - - return metrics - - -def enforce_per_handler_directory_threshold(metrics: Dict[str, Dict[str, float]], threshold: float) -> None: - """Ensure each handler directory meets the threshold when all its files are considered together.""" - failed = False - - for handler, info in metrics.items(): - statements = info["statements"] - coverage_pct = info["coverage"] - if statements == 0: - print( - f"[coverage] No executable statements detected for handler '{handler}'. " - "Ensure tests import the handler package.", - file=sys.stderr, - ) - failed = True - continue - - print( - f"[coverage] Handler '{handler}' coverage: {coverage_pct:.2f}% " - f"({statements - info['missing']}/{statements} statements)" - ) - if coverage_pct < threshold: - print( - f"[coverage] Handler '{handler}' coverage below threshold {threshold:.2f}%.", - file=sys.stderr, - ) - failed = True - - if failed: - print("[coverage] One or more handlers are below the coverage threshold.", file=sys.stderr) - sys.exit(1) - - print("[coverage] All handlers meet the coverage threshold.") - - -def main() -> int: - # Prefer HANDLERS_TO_VERIFY, fallback to HANDLERS_TO_INSTALL - handlers_env = os.environ.get("HANDLERS_TO_VERIFY") or os.environ.get("HANDLERS_TO_INSTALL", "") - handlers = parse_handlers_env(handlers_env) - - print("[coverage] Verifying configured handlers only (not full-suite coverage).") - - if not handlers: - print( - "[coverage] No handlers configured in HANDLERS_TO_VERIFY or HANDLERS_TO_INSTALL; failing.", - file=sys.stderr, - ) - return 1 - - threshold_str = os.environ.get("COVERAGE_FAIL_UNDER", "80") - try: - threshold = float(threshold_str) - except ValueError: - print(f"[coverage] Invalid COVERAGE_FAIL_UNDER={threshold_str!r}", file=sys.stderr) - return 1 - - coverage_file = os.environ.get("COVERAGE_FILE") or ".coverage" - if not os.path.exists(coverage_file): - print( - f"[coverage] Coverage data file {coverage_file!r} not found. " - "Run pytest with coverage before executing this script.", - file=sys.stderr, - ) - return 1 - - metrics = build_handler_metrics(handlers, coverage_file) - enforce_per_handler_directory_threshold(metrics, threshold) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/tests/scripts/check_print_statements.py b/tests/scripts/check_print_statements.py deleted file mode 100644 index fff291ea276..00000000000 --- a/tests/scripts/check_print_statements.py +++ /dev/null @@ -1,37 +0,0 @@ -import glob -import re -import sys - -success = True - - -def check_for_print_statements(): - global success - - files = list( - set(glob.glob("mindsdb/**/*.py", recursive=True)) - - set(glob.glob("**/tests/**", recursive=True)) - - set(glob.glob("docker/**", recursive=True)) - - set(["mindsdb/__main__.py"]) - ) - - pattern = re.compile("\sprint\(") # noqa: W605 - - failed_files = [] - - for file in files: - with open(file, "r") as fh: - if len(re.findall(pattern, fh.read())) > 0: - failed_files.append(file) - - if failed_files: - success = False - print("-- The following files contain print statements. Please remove them: --") - print() - for file in failed_files: - print(file) - - -check_for_print_statements() - -sys.exit(0 if success else 1) diff --git a/tests/scripts/check_requirements.py b/tests/scripts/check_requirements.py deleted file mode 100644 index f3ec6de303e..00000000000 --- a/tests/scripts/check_requirements.py +++ /dev/null @@ -1,489 +0,0 @@ -import glob -import re -import sys -import subprocess -import os -import json - -pattern = "\=|~|>|<| |\n|#|\[" # noqa: W605 - - -def get_requirements_from_file(path, with_snyk: bool = True): - """Takes a requirements file path and extracts only the package names from it""" - - with open(path, "r") as main_f: - reqs = [] - for line in main_f.readlines(): - if with_snyk is False and "pinned by Snyk to avoid a vulnerability" in line: - continue - parts = re.split(pattern, line) - if parts and parts[0]: - reqs.append(parts[0]) - return reqs - - -def get_requirements_with_DEP002(path): - """Extract package names that have 'pinned by Snyk' comment from requirements file""" - no_check_packages = [] - - with open(path, "r") as f: - for line in f.readlines(): - line = line.strip() - if ( - line - and not line.startswith("#") - and ("pinned by Snyk to avoid a vulnerability" in line or "ignore-DEP002" in line) - ): - package_name = re.split(pattern, line)[0] - if package_name: - no_check_packages.append(package_name) - - return no_check_packages - - -MAIN_REQS_PATH = "requirements/requirements.txt" -DEV_REQS_PATH = "requirements/requirements-dev.txt" -TEST_REQS_PATH = "requirements/requirements-test.txt" - - -# Utilities that have their own requirements.txt files. -# These are used only within handlers. -UTILITIES_REQS_PATHS = [ - "mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/requirements.txt", - "mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt", -] - -EXTRA_REQS_PATHS = [ - "requirements/requirements-agents.txt", - "requirements/requirements-kb.txt", -] - - -HANDLER_REQS_PATHS = list( - set(glob.glob("**/requirements*.txt", recursive=True)) - set(glob.glob("requirements/requirements*.txt")) -) - -MAIN_EXCLUDE_PATHS = ["mindsdb/integrations/handlers/.*_handler", "pryproject.toml"] - -# Torch.multiprocessing is imported in a 'try'. Falls back to multiprocessing so we dont NEED it. -# Psycopg2 is needed in core codebase for sqlalchemy. -# lark is required for auto retrieval (RAG utilities). It is used by langchain -# and not explicitly imported in mindsdb. -# transformers is required for langchain_core and not explicitly imported by mindsdb. -# dataprep_ml is for optional features that aren't required. -# opentelemetry and langfuse are metrics/tracing libraries that are only used in the cloud images (they're installed there as extras) -# langchain_aws is used to create agent with bedrock provider; -# if is not installed - error message will be shown, but it is possible to use other providers with agent -# pyodbc is used in mssql but as optional dependency -# litellm is used in KB but as optional dependency in case of using : snowflake, bedrock, gemini llm providers -MAIN_RULE_IGNORES = { - "DEP003": [ - "torch", - "pyarrow", - "langfuse", - "dataprep_ml", - "hierarchicalforecast", # optional dependency in mindsdb/integrations/utilities/time_series_utils.py - ], - "DEP001": [ - "torch", - "pgvector", - "pyarrow", - "openai", - "dataprep_ml", - "opentelemetry", - "langfuse", - "langchain_aws", - "pyodbc", - "sklearn", # optional dependency in mindsdb/integrations/utilities/time_series_utils.py - "hierarchicalforecast", # optional dependency in mindsdb/integrations/utilities/time_series_utils.py - ], - "DEP002": [ - "psycopg2-binary", - "lark", - "transformers", - "langchain-experimental", - "lxml", - "openpyxl", - "xlrd", - "onnxruntime", - "litellm", - "numba", # required in a few files for the hierarchicalforecast. Otherwise, uv may install an old version. - "urllib3", # pinned by Snyk to avoid a vulnerability - "faiss-cpu", - "pyopenssl", - ], -} - - -# The following packages need exceptions. -# Either because 1) they are optional deps of some other packages. E.g.: -# - langchain CAN use openai -# - pypdf and openpyxl are optional deps of langchain, that are used for the file handler -# Or 2) because they are imported in an unusual way. E.g.: -# - pysqlite3 in the chromadb handler -# - dspy-ai in langchain handler - -# The `pyarrow` package used for DataFrame serialization. -# It is not explicitly imported in the code and used as follows: -# modules.append('pyarrow==19.0.0') -BYOM_DEP002_IGNORE_HANLDER_DEPS = ["pyarrow", "scikit-learn"] - -# The `thrift-sasl` package is required establish a connection via to Hive via `pyhive`, but it is not explicitly imported in the code. -HIVE_DEP002_IGNORE_HANDLER_DEPS = ["thrift-sasl"] - -# The `gcsfs` package is required to interact with GCS as a file system. -GCS_DEP002_IGNORE_HANDLER_DEPS = ["gcsfs"] - -LINDORM_DEP002_IGNORE_HANDLER_DEPS = ["protobuf"] - -HUGGINGFACE_DEP002_IGNORE_HANDLER_DEPS = ["torch"] - -RAG_DEP002_IGNORE_HANDLER_DEPS = ["sentence-transformers"] - -SOLR_DEP002_IGNORE_HANDLER_DEPS = ["sqlalchemy-solr"] - -OPENAI_DEP002_IGNORE_HANDLER_DEPS = ["tiktoken"] - -CHROMADB_EP002_IGNORE_HANDLER_DEPS = ["onnxruntime"] - -FRESHDESK_EP002_IGNORE_HANDLER_DEPS = ["python-freshdesk"] - -# The `pyarrow` package is used only if it is installed. -# The handler can work without it. -SNOWFLAKE_DEP003_IGNORE_HANDLER_DEPS = ["pyarrow"] - -DEP002_IGNORE_HANDLER_DEPS = list( - set( - BYOM_DEP002_IGNORE_HANLDER_DEPS - + HIVE_DEP002_IGNORE_HANDLER_DEPS - + GCS_DEP002_IGNORE_HANDLER_DEPS - + LINDORM_DEP002_IGNORE_HANDLER_DEPS - + HUGGINGFACE_DEP002_IGNORE_HANDLER_DEPS - + RAG_DEP002_IGNORE_HANDLER_DEPS - + SOLR_DEP002_IGNORE_HANDLER_DEPS - + OPENAI_DEP002_IGNORE_HANDLER_DEPS - + CHROMADB_EP002_IGNORE_HANDLER_DEPS - + FRESHDESK_EP002_IGNORE_HANDLER_DEPS - ) -) - -DEP003_IGNORE_HANDLER_DEPS = list(set(SNOWFLAKE_DEP003_IGNORE_HANDLER_DEPS)) - -# List of rules we can ignore for specific packages -# Here we ignore any packages in the main requirements.txt for "listed but not used" errors, because they will be used for the core code but not necessarily in a given handler -MAIN_REQUIREMENTS_DEPS = get_requirements_from_file(MAIN_REQS_PATH) + get_requirements_from_file(TEST_REQS_PATH) - -HANDLER_RULE_IGNORES = { - "DEP002": DEP002_IGNORE_HANDLER_DEPS + MAIN_REQUIREMENTS_DEPS, - "DEP001": [ - "tests", - "pyarrow", - "IfxPyDbi", - "ingres_sa_dialect", - "pyodbc", - "freshdesk", - ], # 'tests' is the mindsdb tests folder in the repo root, 'pyarrow' used in snowflake handler - "DEP003": DEP003_IGNORE_HANDLER_DEPS, -} - -PACKAGE_NAME_MAP = { - "azure-storage-blob": ["azure"], - "scylla-driver": ["cassandra"], - "mysql-connector-python": ["mysql"], - "snowflake-connector-python": ["snowflake"], - "snowflake-sqlalchemy": ["snowflake"], - "auto-sklearn": ["autosklearn"], - "google-cloud-aiplatform": ["google"], - "google-cloud-bigquery": ["google"], - "google-cloud-spanner": ["google"], - "sqlalchemy-spanner": ["google"], - "google-auth-httplib2": ["google"], - "google-generativeai": ["google"], - "google-analytics-admin": ["google"], - "google-auth": ["google"], - "google-cloud-storage": ["google"], - "google-genai": ["google"], - "google-auth-oauthlib": ["google_auth_oauthlib"], - "google-api-python-client": ["googleapiclient"], - "ibm-cos-sdk": ["ibm_boto3", "ibm_botocore"], - "binance-connector": ["binance"], - "pysqlite3": ["pysqlite3"], - "atlassian-python-api": ["atlassian"], - "databricks-sql-connector": ["databricks"], - "elasticsearch-dbapi": ["es"], - "pygithub": ["github"], - "python-gitlab": ["gitlab"], - "impyla": ["impala"], - "IfxPy": ["IfxPyDbi"], - "salesforce-merlion": ["merlion"], - "newsapi-python": ["newsapi"], - "pinecone-client": ["pinecone"], - "plaid-python": ["plaid"], - "faiss-cpu": ["faiss"], - "writerai": ["writer"], - "rocketchat_API": ["rocketchat_API"], - "ShopifyAPI": ["shopify"], - "solace-pubsubplus": ["solace"], - "taospy": ["taosrest"], - "weaviate-client": ["weaviate"], - "pymupdf": ["fitz"], - "ibm-db": ["ibm_db_dbi"], - "python-dateutil": ["dateutil"], - "sqlalchemy-redshift": ["redshift_sqlalchemy"], - "sqlalchemy-vertica-python": ["sqla_vertica_python"], - "psycopg2-binary": ["psycopg2"], - "psycopg-binary": ["psycopg"], - "pymongo": ["pymongo", "bson"], - "python-multipart": ["multipart"], - "pydateinfer": ["dateinfer"], - "scikit-learn": ["sklearn"], - "influxdb3-python": ["influxdb_client_3"], - "hubspot-api-client": ["hubspot"], - "eventbrite-python": ["eventbrite"], - "clickhouse-sqlalchemy": ["clickhouse_sqlalchemy"], - "pillow": ["PIL"], - "auto-ts": ["auto_ts"], - "llama-index-readers-web": ["llama_index"], - "llama-index-embeddings-openai": ["llama_index"], - "unifyai": ["unify"], - "botframework-connector": ["botframework"], - "botbuilder-schema": ["botbuilder"], - "opentelemetry-api": ["opentelemetry"], - "opentelemetry-sdk": ["opentelemetry"], - "opentelemetry-exporter-otlp": ["opentelemetry"], - "opentelemetry-instrumentation-requests": ["opentelemetry"], - "opentelemetry-instrumentation-flask": ["opentelemetry"], - "opentelemetry-distro": ["opentelemetry"], - "sqlalchemy-ingres": ["ingres_sa_dialect"], - "pyaml": ["yaml"], - "pydantic_core": ["pydantic"], - "python-dotenv": ["dotenv"], - "pyjwt": ["jwt"], - "sklearn": ["scikit-learn"], - "types-aioboto3": ["aioboto3"], - "ag2": ["autogen"], -} - -# We use this to exit with a non-zero status code if any check fails -# so that when this is running in CI the job will fail -success = True - - -def print_errors(file, errors): - global success - if len(errors) > 0: - success = False - print(f"- {file}") - for line in errors: - print(" " + line) - print() - - -def get_ignores_str(ignores_dict: dict, dep002_ignore: list[str] = None) -> str: - """Get a list of rule ignores for deptry - - Args: - ignores_dict: A dictionary of rule ignores for deptry - dep002_ignore: Additional list of packages to ignore for DEP002 - - Returns: - A string of rule ignores for deptry - """ - - rules = [] - for k, v in ignores_dict.items(): - rules.append(f"{k}={'|'.join(v)}") - if k == "DEP002" and dep002_ignore: - rules[-1] += "|" + "|".join(dep002_ignore) - - return ",".join(rules) - - -def run_deptry(reqs, rule_ignores, path, extra_args=""): - """Run a dependency check with deptry. Return a list of error messages""" - - errors = [] - # Get the full path to deptry executable from the current Python environment - deptry_path = os.path.join(os.path.dirname(sys.executable), "deptry") - try: - result = subprocess.run( - f'{deptry_path} -o deptry.json --no-ansi --known-first-party mindsdb --requirements-files "{reqs}" --per-rule-ignores "{rule_ignores}" --package-module-name-map "{get_ignores_str(PACKAGE_NAME_MAP)}" {extra_args} {path}', - shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - ) - if not os.path.exists("deptry.json"): - if result.returncode != 0: - # There was some issue with running deptry - errors.append(f"Error running deptry: {result.stderr.decode('utf-8')}") - else: - errors.append("Error running deptry: deptry.json was not generated.") - return errors - - with open("deptry.json", "r") as f: - deptry_results = json.loads(f.read()) - for r in deptry_results: - errors.append( - f"{r['location']['line']}:{r['location']['column']}: {r['error']['code']} {r['error']['message']}" - ) - finally: - if os.path.exists("deptry.json"): - os.remove("deptry.json") - return errors - - -def check_for_requirements_duplicates(): - """Checks that handler requirements.txt and the main requirements.txt don't contain any of the same packages""" - - global success - main_reqs = get_requirements_from_file(MAIN_REQS_PATH, with_snyk=False) - - for file in HANDLER_REQS_PATHS: - handler_reqs = get_requirements_from_file(file, with_snyk=False) - - for req in handler_reqs: - if req in main_reqs: - print(f"{req} is in {file} and also in main requirements file.") - success = False - - -def check_relative_reqs(): - """ - Check that relationships between handlers are defined correctly. - - If a parent handler imports another handler in code, we should define that dependency - in the parent handler's requirements.txt like: - - -R mindsdb/integrations/handlers/child_handler/requirements.txt - - This is important to ensure that "pip install mindsdb[parent_handler]" works correctly. - This function checks that for each handler imported from another handler, there is a - corresponding entry in a requirements.txt. - """ - - # regex for finding relative imports of handlers like "from ..file_handler import FileHandler" - # we're going to treat these as errors (and suggest using absolute imports instead) - relative_import_pattern = re.compile("(?:\s|^)(?:from|import) \.\.\w+_handler") # noqa: W605 - - def get_relative_requirements(files): - """Find entries in a requirements.txt that are including another requirements.txt""" - entries = {} - for file in files: - with open(file, "r") as fh: - for line in fh.readlines(): - line = line.lower().strip() - if line.startswith("-r mindsdb/integrations/handlers/"): - entries[line.split("mindsdb/integrations/handlers/")[1].split("/")[0]] = line - - return entries - - for handler_dir in glob.glob("mindsdb/integrations/handlers/*/"): - handler_name = handler_dir.split("/")[-2].split("_handler")[0] - - # regex for finding imports of other handlers like "from mindsdb.integrations.handlers.file_handler import FileHandler" - # excludes the current handler importing parts of itself - import_pattern = re.compile( - f"(?:\s|^)(?:from|import) mindsdb\.integrations\.handlers\.(?!{handler_name}_handler)\w+_handler" - ) # noqa: W605 - - # requirements entries for this handler that point to another handler's requirements file - required_handlers = get_relative_requirements( - [file for file in HANDLER_REQS_PATHS if file.startswith(handler_dir)] - ) - - all_imported_handlers = [] - - # for every python file in this handler's code - for file in glob.glob(f"{handler_dir}/**/*.py", recursive=True): - errors = [] - - # find all the imports of handlers - with open(file, "r") as f: - file_content = f.read() - relative_imported_handlers = [ - match.strip() for match in re.findall(relative_import_pattern, file_content) - ] - handler_import_lines = [match.strip() for match in re.findall(import_pattern, file_content)] - - imported_handlers = { - line: line.split("_handler")[0].split(".")[-1] + "_handler" for line in handler_import_lines - } - all_imported_handlers += imported_handlers.values() - - # Report on relative imports (like "from ..file_handler import FileHandler") - for line in relative_imported_handlers: - errors.append(f"{line} <- Relative import of handler. Use absolute import instead") - - # Report on imports of other handlers that are missing a corresponding requirements.txt entry - for line, imported_handler_name in imported_handlers.items(): - # Check if the imported handler has a requirements.txt file. - imported_handler_req_file = f"mindsdb/integrations/handlers/{imported_handler_name}/requirements.txt" - if os.path.exists(imported_handler_req_file): - if imported_handler_name not in required_handlers.keys(): - errors.append( - f'{line} <- {imported_handler_name} not in handler requirements.txt. Add it like: "-r {imported_handler_req_file}"' - ) - - # Print all the errors for this .py file - print_errors(file, errors) - - # Report on requirements.txt entries that point to a handler that isn't used - requirements_errors = [ - required_handler_name + " in requirements.txt but not used in code" - for required_handler_name in required_handlers.keys() - if required_handler_name not in all_imported_handlers - ] - print_errors(handler_dir, requirements_errors) - - # Report on requirements.txt entries that point to a handler requirements file that doesn't exist - errors = [] - for _, required_handler_line in required_handlers.items(): - if not os.path.exists(required_handler_line.split("-r ")[1]): - errors.append(f"{required_handler_line} <- this requirements file doesn't exist.") - - print_errors(handler_dir, errors) - - -def check_requirements_imports(): - """ - Use deptry to find issues with dependencies. - - Runs deptry on the core codebase (excluding handlers) + the main requirements.txt file. - Then runs it on each handler codebase and requirements.txt individually. - """ - - # Run against the main codebase - errors = run_deptry( - ",".join([MAIN_REQS_PATH] + UTILITIES_REQS_PATHS + EXTRA_REQS_PATHS), - get_ignores_str(MAIN_RULE_IGNORES), - ".", - f'--extend-exclude "{"|".join(MAIN_EXCLUDE_PATHS)}"', - ) - print_errors(MAIN_REQS_PATH, errors) - - # Run on each handler - for file in HANDLER_REQS_PATHS: - handler_no_check = get_requirements_with_DEP002(file) - - ignore_str = get_ignores_str(HANDLER_RULE_IGNORES, dep002_ignore=handler_no_check) - - errors = run_deptry( - f"{file},{MAIN_REQS_PATH},{TEST_REQS_PATH}", - ignore_str, - os.path.dirname(file), - ) - print_errors(file, errors) - - -print("--- Checking requirements files for duplicates ---") -check_for_requirements_duplicates() -print() - -print("--- Checking that requirements match imports ---") -check_requirements_imports() -print() - -print("--- Checking handlers that require other handlers ---") -check_relative_reqs() - -sys.exit(0 if success else 1) diff --git a/tests/scripts/check_version.py b/tests/scripts/check_version.py deleted file mode 100644 index 86de4fedd52..00000000000 --- a/tests/scripts/check_version.py +++ /dev/null @@ -1,23 +0,0 @@ -import re -import sys - -from mindsdb.__about__ import __version__ - -# PEP440: https://www.python.org/dev/peps/pep-0440/ -RELEASE_PATTERN = "^\d+(\.\d+)*$" # noqa: W605 -PRERELEASE_PATTERN = "^\d+(\.\d+)*(a|b|rc)\d+$" # noqa: W605 - -version_str = sys.argv[1].replace('v', '') -is_prerelease = sys.argv[2] == "true" - -if is_prerelease: - if re.match(PRERELEASE_PATTERN, version_str) is None: - raise Exception("Invalid prerelease version: %s" % version_str) -elif re.match(RELEASE_PATTERN, version_str) is None: - raise Exception("Invalid release version: %s" % version_str) - - -if version_str != __version__: - raise Exception("Version mismatch between __about__.py and release tag: %s != %s" % (version_str, __version__)) - -print(version_str) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/api/__init__.py b/tests/unit/api/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/api/a2a/__init__.py b/tests/unit/api/a2a/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/api/a2a/streaming_test_client.py b/tests/unit/api/a2a/streaming_test_client.py deleted file mode 100644 index 7f7d2d44c9d..00000000000 --- a/tests/unit/api/a2a/streaming_test_client.py +++ /dev/null @@ -1,154 +0,0 @@ -import time -import requests -from typing import List, Dict, Any, Tuple - - -class StreamingTestClient: - """A client that tracks timing information for streamed responses.""" - - def __init__(self): - self.session = requests.Session() - - def get_with_timing( - self, url: str, **kwargs - ) -> Tuple[List[Dict[str, Any]], List[float]]: - """ - Make a GET request and track when each chunk is received. - - Args: - url: The URL to request - **kwargs: Additional arguments to pass to requests.get - - Returns: - Tuple containing: - - List of chunks (parsed as JSON if possible) - - List of timestamps when each chunk was received - """ - chunks = [] - timestamps = [] - - with self.session.get(url, stream=True, **kwargs) as response: - # Verify we're getting a chunked response - if response.headers.get("Transfer-Encoding") != "chunked": - print("Warning: Response is not using chunked transfer encoding") - - for chunk in response.iter_content(chunk_size=None, decode_unicode=True): - if chunk: - timestamps.append(time.time()) - try: - # Try to parse as JSON - import json - - # The chunk might contain multiple JSON objects if they were sent together - # Split by newlines and parse each one - for line in chunk.strip().split("\n"): - if line: - chunks.append(json.loads(line)) - except (ValueError, TypeError, json.JSONDecodeError): - # If not JSON, store as string - chunks.append(chunk) - - return chunks, timestamps - - def post_with_timing( - self, url: str, **kwargs - ) -> Tuple[List[Dict[str, Any]], List[float]]: - """ - Make a POST request and track when each chunk is received. - - Args: - url: The URL to request - **kwargs: Additional arguments to pass to requests.post - - Returns: - Tuple containing: - - List of chunks (parsed as JSON if possible) - - List of timestamps when each chunk was received - """ - chunks = [] - timestamps = [] - - with self.session.post(url, stream=True, **kwargs) as response: - # Verify we're getting a chunked response - if response.headers.get("Transfer-Encoding") != "chunked": - print("Warning: Response is not using chunked transfer encoding") - - # Use a smaller chunk size to better detect streaming - buffer = "" - for chunk in response.iter_content(chunk_size=1, decode_unicode=True): - if not chunk: - continue - - # Add to buffer and process complete lines - buffer += chunk - - # Process complete lines if we have a newline - if "\n" in buffer: - lines = buffer.split("\n") - # Keep the last part (might be incomplete) - buffer = lines.pop() - - for line in lines: - if line.strip(): - timestamps.append(time.time()) - try: - # Try to parse as JSON - import json - - chunks.append(json.loads(line)) - except (ValueError, TypeError, json.JSONDecodeError): - # If not JSON, store as string - chunks.append(line) - - # Process any remaining content in the buffer - if buffer.strip(): - timestamps.append(time.time()) - try: - # Try to parse as JSON - import json - - chunks.append(json.loads(buffer)) - except (ValueError, TypeError, json.JSONDecodeError): - # If not JSON, store as string - chunks.append(buffer) - - return chunks, timestamps - - def analyze_timing(self, timestamps: List[float]) -> Dict[str, float]: - """ - Analyze the timing information from a streamed response. - - Args: - timestamps: List of timestamps when chunks were received - - Returns: - Dictionary with timing statistics - """ - if not timestamps or len(timestamps) < 2: - return {"error": "Not enough chunks to analyze"} - - intervals = [ - timestamps[i] - timestamps[i - 1] for i in range(1, len(timestamps)) - ] - - return { - "total_chunks": len(timestamps), - "total_time": timestamps[-1] - timestamps[0], - "average_interval": sum(intervals) / len(intervals), - "min_interval": min(intervals), - "max_interval": max(intervals), - "intervals": intervals, - } - - def post(self, url: str, **kwargs): - """ - Make a POST request and return the response. - - Args: - url: The URL to request - **kwargs: Additional arguments to pass to requests.post - - Returns: - The response object - """ - return self.session.post(url, **kwargs) diff --git a/tests/unit/api/a2a/test_streaming_verification.py b/tests/unit/api/a2a/test_streaming_verification.py deleted file mode 100644 index 686a6497086..00000000000 --- a/tests/unit/api/a2a/test_streaming_verification.py +++ /dev/null @@ -1,176 +0,0 @@ -import unittest -import time -import json -import threading -import pytest -from http.server import HTTPServer, BaseHTTPRequestHandler - -from tests.unit.api.a2a.streaming_test_client import StreamingTestClient - - -class StreamingHandler(BaseHTTPRequestHandler): - """ - A simple HTTP handler that streams responses with controlled timing. - """ - - def do_GET(self): - """Handle GET requests with streaming response.""" - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.send_header("Transfer-Encoding", "chunked") - self.end_headers() - - # Stream 5 chunks with 0.5 second delay between them - for i in range(5): - # Format the chunk properly for chunked transfer encoding - # Format: [chunk size in hex]\r\n[chunk data]\r\n - chunk_data = json.dumps({"chunk": i, "timestamp": time.time()}) + "\n" - chunk_size = hex(len(chunk_data))[2:] # Convert to hex and remove '0x' prefix - - self.wfile.write(f"{chunk_size}\r\n".encode("utf-8")) - self.wfile.write(chunk_data.encode("utf-8")) - self.wfile.write(b"\r\n") - self.wfile.flush() # Ensure the chunk is sent immediately - time.sleep(0.5) # Delay between chunks - - # End the chunked response with a zero-length chunk - self.wfile.write(b"0\r\n\r\n") - self.wfile.flush() - - def do_POST(self): - """Handle POST requests with streaming response.""" - content_length = int(self.headers["Content-Length"]) - post_data = self.rfile.read(content_length).decode("utf-8") - - # Parse the request data - try: - data = json.loads(post_data) - # Get the delay parameter or use default - delay = data.get("delay", 0.5) - chunks = data.get("chunks", 5) - except (ValueError, TypeError, json.JSONDecodeError): - delay = 0.5 - chunks = 5 - - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.send_header("Transfer-Encoding", "chunked") - self.end_headers() - - # Stream chunks with the specified delay - for i in range(chunks): - # Format the chunk properly for chunked transfer encoding - chunk_data = json.dumps({"chunk": i, "timestamp": time.time()}) + "\n" - chunk_size = hex(len(chunk_data))[2:] # Convert to hex and remove '0x' prefix - - self.wfile.write(f"{chunk_size}\r\n".encode("utf-8")) - self.wfile.write(chunk_data.encode("utf-8")) - self.wfile.write(b"\r\n") - self.wfile.flush() # Ensure the chunk is sent immediately - time.sleep(delay) # Delay between chunks - - # End the chunked response with a zero-length chunk - self.wfile.write(b"0\r\n\r\n") - self.wfile.flush() - - -class TestStreamingVerification(unittest.TestCase): - """ - Test cases to verify that responses are properly streamed. - """ - - @classmethod - def setUpClass(cls): - """Start a test server in a separate thread.""" - cls.server = HTTPServer(("localhost", 0), StreamingHandler) - cls.server_port = cls.server.server_port - cls.server_thread = threading.Thread(target=cls.server.serve_forever) - cls.server_thread.daemon = True - cls.server_thread.start() - cls.base_url = f"http://localhost:{cls.server_port}" - - @classmethod - def tearDownClass(cls): - """Shut down the test server.""" - cls.server.shutdown() - cls.server.server_close() - cls.server_thread.join() - - def test_get_streaming(self): - """Test that GET responses are properly streamed.""" - client = StreamingTestClient() - chunks, timestamps = client.get_with_timing(f"{self.base_url}/stream") - - # Verify we got the expected number of chunks - self.assertEqual(len(chunks), 5) - - # Analyze timing - timing = client.analyze_timing(timestamps) - - # Verify that chunks were received over time, not all at once - # The total time should be at least (chunks-1) * delay - self.assertGreaterEqual( - timing["total_time"], 0.5 * 4 * 0.9 - ) # 90% of expected time to account for timing variations - - # Verify that the average interval is close to our delay - self.assertGreaterEqual(timing["average_interval"], 0.5 * 0.8) # 80% of expected delay - - # Print timing information for debugging - print(f"Streaming GET timing: {timing}") - - @pytest.mark.slow - def test_post_streaming(self): - """Test that POST responses are properly streamed.""" - client = StreamingTestClient() - - # Test with different delays - for delay in [0.2, 0.5, 1.0]: - chunks, timestamps = client.post_with_timing(f"{self.base_url}/stream", json={"delay": delay, "chunks": 5}) - - # Verify we got the expected number of chunks - self.assertEqual(len(chunks), 5) - - # Analyze timing - timing = client.analyze_timing(timestamps) - - # Verify that chunks were received over time, not all at once - # The total time should be at least (chunks-1) * delay - self.assertGreaterEqual(timing["total_time"], delay * 4 * 0.9) # 90% of expected time - - # Verify that the average interval is close to our delay - self.assertGreaterEqual(timing["average_interval"], delay * 0.8) # 80% of expected delay - - # Print timing information for debugging - print(f"Streaming POST timing with delay {delay}: {timing}") - - def test_verify_not_batched(self): - """ - Test to specifically verify that chunks aren't being batched together. - - This test uses a longer delay to make it more obvious if batching occurs. - """ - client = StreamingTestClient() - chunks, timestamps = client.post_with_timing(f"{self.base_url}/stream", json={"delay": 1.0, "chunks": 5}) - - # Verify we got the expected number of chunks - self.assertEqual(len(chunks), 5) - - # Analyze timing - timing = client.analyze_timing(timestamps) - - # If chunks are batched, we'll see some very small intervals - # and some very large ones. Check that all intervals are reasonably close - # to our expected delay. - for interval in timing["intervals"]: - # Each interval should be at least 50% of the expected delay - # and not more than 150% of the expected delay - self.assertGreaterEqual(interval, 1.0 * 0.5) - self.assertLessEqual(interval, 1.0 * 1.5) - - # Print timing information for debugging - print(f"Batching verification timing: {timing}") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/api/http/agents_test.py b/tests/unit/api/http/agents_test.py deleted file mode 100644 index bd2532bcd78..00000000000 --- a/tests/unit/api/http/agents_test.py +++ /dev/null @@ -1,442 +0,0 @@ -from http import HTTPStatus -from unittest.mock import patch - -import pandas as pd -import pytest - -from tests.unit.api.http.conftest import create_demo_db, create_dummy_ml - - -def test_prepare(client): - create_demo_db(client) - - create_dummy_ml(client) - # Create model to use in depreciated tests. - # The skill-based approach is only used via Minds. - create_query = """ - CREATE MODEL mindsdb.test_model - FROM example_db (SELECT location as answer, sqft FROM demo_data.home_rentals limit 10) - PREDICT answer - USING engine = 'dummy_ml', join_learn_process = true - """ - train_data = {"query": create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - -@pytest.mark.deprecated( - "MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now" -) -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") -def test_post_agent_depreciated(check_agent_llm, client): - create_request = { - "agent": { - "name": "test_post_agent_depreciated", - "model": {"provider": "openai", "model_name": "test_model"}, - "params": {"timeout": 10}, - } - } - - create_response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - created_agent = create_response.get_json() - - expected_agent = { - "name": "test_post_agent_depreciated", - "model": {"provider": "openai", "model_name": "test_model"}, - "params": {"timeout": 10}, - "id": created_agent["id"], - "project_id": created_agent["project_id"], - "created_at": created_agent["created_at"], - "updated_at": created_agent["updated_at"], - } - - assert created_agent == expected_agent - - -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") -def test_post_agent(check_agent_data, check_agent_llm, client): - create_request = { - "agent": { - "name": "TEST_post_agent", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - } - - create_response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - created_agent = create_response.get_json() - - expected_agent = { - "name": "TEST_post_agent", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "id": created_agent["id"], - "project_id": created_agent["project_id"], - "created_at": created_agent["created_at"], - "updated_at": created_agent["updated_at"], - "prompt_template": "example_db.customers stores customers data", - } - - assert created_agent == expected_agent - - -def test_post_agent_no_agent(client): - create_request = { - "name": "test_post_agent_no_agent", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - create_response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_post_agent_no_name(client): - create_request = { - "agent": { - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - } - create_response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_post_agent_project_not_found(client): - create_request = { - "agent": { - "name": "test_post_agent_no_model_name", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - } - create_response = client.post("/api/projects/womp/agents", json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.NOT_FOUND - - -@pytest.mark.deprecated( - "MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now" -) -def test_post_agent_model_not_found(client): - create_request = { - "agent": { - "name": "test_post_agent_model_not_found", - "model_name": "not_the_model_youre_looking_for", - "params": {"k1": "v1"}, - "provider": "mindsdb", - } - } - create_response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.NOT_FOUND - - -def test_get_agents(client): - create_request = { - "agent": { - "name": "test_get_agents", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - } - - client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - - get_response = client.get("/api/projects/mindsdb/agents", follow_redirects=True) - assert get_response.status_code == HTTPStatus.OK - all_agents = get_response.get_json() - assert len(all_agents) > 0 - - -def test_get_agents_project_not_found(client): - get_response = client.get("/api/projects/bloop/agents", follow_redirects=True) - assert get_response.status_code == HTTPStatus.NOT_FOUND - - -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") -def test_get_agent(check_agent_data, check_agent_llm, client): - create_request = { - "agent": { - "name": "test_get_agent", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - } - - client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - - get_response = client.get("/api/projects/mindsdb/agents/test_get_agent", follow_redirects=True) - assert get_response.status_code == HTTPStatus.OK - agent = get_response.get_json() - - expected_agent = { - "name": "test_get_agent", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - "id": agent["id"], - "project_id": agent["project_id"], - "created_at": agent["created_at"], - "updated_at": agent["updated_at"], - } - - assert agent == expected_agent - - -def test_get_agent_project_not_found(client): - get_response = client.get("/api/projects/bloop/agents/test_get_agent", follow_redirects=True) - assert get_response.status_code == HTTPStatus.NOT_FOUND - - -# At the moment creation via PUT is not allowed -# def test_put_agent_create(client): -# create_request = { -# 'agent': { -# 'name': 'test_put_agent_create', -# 'model_name': 'test_model', -# 'params': { -# 'k1': 'v1' -# }, -# 'provider': 'mindsdb', -# 'skills': ['test_skill'] -# } -# } - -# put_response = client.put('/api/projects/mindsdb/agents/test_put_agent_create', json=create_request, follow_redirects=True) -# assert put_response.status_code == HTTPStatus.CREATED - -# created_agent = put_response.get_json() - -# expected_agent = { -# 'name': 'test_put_agent_create', -# 'model_name': 'test_model', -# 'params': { -# 'k1': 'v1' -# }, -# 'provider': 'mindsdb', -# 'skills': created_agent['skills'], -# 'skills_extra_parameters': created_agent['skills_extra_parameters'], -# 'id': created_agent['id'], -# 'project_id': created_agent['project_id'], -# 'created_at': created_agent['created_at'], -# 'updated_at': created_agent['updated_at'] -# } - -# assert created_agent == expected_agent - - -@pytest.mark.deprecated( - "MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now" -) -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") -def test_put_agent_update_depreciated(check_agent_llm, client): - create_request = { - "agent": { - "name": "test_put_agent_update_depreciated", - "model": {"provider": "openai", "model_name": "test_model"}, - "params": {"timeout": 10}, - } - } - - response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - update_request = { - "agent": { - "params": {"timeout": 20}, - } - } - - update_response = client.put( - "/api/projects/mindsdb/agents/test_put_agent_update_depreciated", json=update_request, follow_redirects=True - ) - updated_agent = update_response.get_json() - - expected_agent = { - "name": "test_put_agent_update_depreciated", - "model": {"provider": "openai", "model_name": "test_model"}, - "params": {"timeout": 20}, - "id": updated_agent["id"], - "project_id": updated_agent["project_id"], - "created_at": updated_agent["created_at"], - "updated_at": updated_agent["updated_at"], - } - - assert updated_agent == expected_agent - - -@pytest.mark.deprecated( - "MindsDB models are no longer used with agents. However, Minds still uses models, so this test is kept for now" -) -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") -def test_put_agent_update(check_agent_data, check_agent_llm, client): - create_request = { - "agent": { - "name": "test_put_agent_update", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - } - - response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - update_request = { - "agent": { - "params": {"timeout": 5}, - "data": { - "tables": ["example_db.customers", "example_db.orders"], - "knowledge_bases": ["example_kb"], - }, - } - } - - update_response = client.put( - "/api/projects/mindsdb/agents/test_put_agent_update", json=update_request, follow_redirects=True - ) - updated_agent = update_response.get_json() - - expected_agent = { - "name": "test_put_agent_update", - "params": {"timeout": 5}, - "id": updated_agent["id"], - "project_id": updated_agent["project_id"], - "created_at": updated_agent["created_at"], - "updated_at": updated_agent["updated_at"], - "data": { - "tables": ["example_db.customers", "example_db.orders"], - "knowledge_bases": ["example_kb"], - }, - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "prompt_template": "example_db.customers stores customers data", - } - - assert updated_agent == expected_agent - - -def test_put_agent_no_agent(client): - create_request = { - "name": "test_put_agent_no_agent", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - - response = client.put( - "/api/projects/mindsdb/agents/test_put_agent_no_agent", json=create_request, follow_redirects=True - ) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -# At the moment creation via PUT is not allowed -# def test_put_agent_model_not_found(client): -# create_request = { -# 'agent': { -# 'name': 'test_put_agent_model_not_found', -# 'model_name': 'oopsy_daisy', -# 'params': { -# 'k1': 'v1', -# 'k2': 'v2' -# }, -# 'provider': 'mindsdb', -# 'skills': ['test_skill'] -# } -# } - -# response = client.put('/api/projects/mindsdb/agents/test_put_agent_model_not_found', json=create_request, follow_redirects=True) -# assert '404' in response.status - - -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_data") -def test_delete_agent(check_agent_data, check_agent_llm, client): - create_request = { - "agent": { - "name": "test_delete_agent", - "model": {"model_name": "gpt-3.5-turbo", "provider": "openai", "api_key": "sk-..."}, - "data": {"tables": ["example_db.customers"]}, - "prompt_template": "example_db.customers stores customers data", - } - } - - client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - - delete_response = client.delete("/api/projects/mindsdb/agents/test_delete_agent", follow_redirects=True) - assert delete_response.status_code == HTTPStatus.NO_CONTENT - - get_response = client.get("/api/projects/mindsdb/agents/test_delete_agent", follow_redirects=True) - assert get_response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_agent_project_not_found(client): - delete_response = client.delete("/api/projects/doop/agents/test_post_agent", follow_redirects=True) - assert delete_response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_agent_not_found(client): - delete_response = client.delete("/api/projects/mindsdb/agents/test_delete_agent_not_found", follow_redirects=True) - assert delete_response.status_code == HTTPStatus.NOT_FOUND - - -@patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") -def test_agent_completions(check_agent_llm, client): - create_request = { - "agent": { - "name": "test_agent", - "model_name": "test_model", - "provider": "mindsdb", - "params": {"prompt_template": "Test message!"}, - } - } - - create_response = client.post("/api/projects/mindsdb/agents", json=create_request, follow_redirects=True) - assert create_response.status_code == HTTPStatus.CREATED - - completions_request = {"messages": [{"role": "user", "content": "Test message!"}]} - - with patch("mindsdb.interfaces.agents.pydantic_ai_agent.PydanticAIAgent") as agent_mock: - agent_mock_instance = agent_mock.return_value - agent_mock_instance.get_completion.return_value = pd.DataFrame([{"answer": "beepboop", "trace_id": "---"}]) - completions_response = client.post( - "/api/projects/mindsdb/agents/test_agent/completions", json=completions_request, follow_redirects=True - ) - - assert completions_response.status_code == HTTPStatus.OK - message_json = completions_response.get_json()["message"] - assert message_json["content"] == "beepboop" - - -def test_agent_completions_project_not_found(client): - completions_request = {"messages": [{"role": "user", "content": "Test message!"}]} - completions_response = client.post( - "/api/projects/bloop/agents/test_agent/completions", json=completions_request, follow_redirects=True - ) - assert completions_response.status_code == HTTPStatus.NOT_FOUND - - -def test_agent_completions_bad_request(client): - completions_request = {"massagez": [{"role": "user", "content": "Test message!"}]} - completions_response = client.post( - "/api/projects/mindsdb/agents/test_agent/completions", json=completions_request, follow_redirects=True - ) - assert completions_response.status_code == HTTPStatus.BAD_REQUEST - - -def test_agent_completions_agent_not_found(client): - completions_request = {"messages": [{"role": "user", "content": "Test message!"}]} - completions_response = client.post( - "/api/projects/mindsdb/agents/zoopy_agent/completions", json=completions_request, follow_redirects=True - ) - assert completions_response.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/unit/api/http/byom_test.py b/tests/unit/api/http/byom_test.py deleted file mode 100644 index 3259482ee1a..00000000000 --- a/tests/unit/api/http/byom_test.py +++ /dev/null @@ -1,84 +0,0 @@ -import io -import os.path -from http import HTTPStatus -from textwrap import dedent - -import pytest - -from mindsdb.utilities.config import config - - -def get_file(): - return io.BytesIO( - dedent(""" - class CustomPredictor(): - def train(self, df, target_col, args=None): - ... - def predict(self, df): - ... - """).encode() - ) - - -@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled") -def test_disabled_byom(client): - """Test disabled byom""" - config._config["byom"]["enabled"] = False - os.environ["MINDSDB_BYOM_ENABLED"] = "false" - response = client.put( - "/api/handlers/byom/model1", - data={ - "code": (get_file(), "/tmp/test_module.py"), - "modules": (io.BytesIO(b""), "req.txt"), - "mode": "custom_function", - }, - ) - assert response.status_code == HTTPStatus.FORBIDDEN - - -@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled") -def test_path_traversal(client): - """Test uploading a file""" - config._config["byom"]["enabled"] = True - os.environ["MINDSDB_BYOM_ENABLED"] = "true" - path = "../../../../../../../../../../tmp/test_module.py" - response = client.put( - "/api/handlers/byom/model1", - data={ - "code": (get_file(), path), - "modules": (io.BytesIO(b""), "req.txt"), - "mode": "custom_function", - }, - ) - assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - assert not os.path.exists(path) - - -@pytest.mark.slow -@pytest.mark.skipif(os.environ.get("MINDSDB_COMMUNITY_HANDLERS") != "true", reason="BYOM is not enabled") -def test_conflict(client): - """Test that it is not possible to create two engins with the same name""" - config._config["byom"]["enabled"] = True - os.environ["MINDSDB_BYOM_ENABLED"] = "true" - path = "test_module.py" - response = client.put( - "/api/handlers/byom/model1", - data={ - "code": (get_file(), path), - "modules": (io.BytesIO(b""), "req.txt"), - "type": "inhouse", - }, - ) - assert response.status_code == HTTPStatus.OK - - response = client.put( - "/api/handlers/byom/model1", - data={ - "code": (get_file(), path), - "modules": (io.BytesIO(b""), "req.txt"), - "type": "inhouse", - }, - ) - - assert response.status_code == HTTPStatus.CONFLICT - assert not os.path.exists(path) diff --git a/tests/unit/api/http/chatbots_test.py b/tests/unit/api/http/chatbots_test.py deleted file mode 100644 index c5da06d57f3..00000000000 --- a/tests/unit/api/http/chatbots_test.py +++ /dev/null @@ -1,412 +0,0 @@ -import pytest -from http import HTTPStatus -from tests.unit.api.http.conftest import create_demo_db, create_dummy_ml - - -def test_prepare(client): - create_demo_db(client) - create_dummy_ml(client) - - # Create model to use in all tests. - create_query = """ - CREATE MODEL mindsdb.test_model - FROM example_db (SELECT * FROM demo_data.home_rentals) - PREDICT rental_price - USING engine = 'dummy_ml', join_learn_process = true - """ - train_data = {"query": create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert "201" in response.status - - -@pytest.fixture() -def test_db(client): - # Fetch all so we don't have to go through the pain of setting context attributes - # to fetch a single database. - all_databases_response = client.get("/api/databases", follow_redirects=True) - all_dbs = all_databases_response.get_json() - for database in all_dbs: - if database["name"] == "example_db": - return database - return None - - -def test_get_all_chatbots(client, test_db): - response = client.get("/api/projects/mindsdb/chatbots", follow_redirects=True) - assert response.status_code == HTTPStatus.OK - assert len(response.get_json()) == 0 - - chatbot_data = { - "chatbot": { - "name": "test_get_all_chatbots", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - - response = client.get("/api/projects/mindsdb/chatbots", follow_redirects=True) - assert "200" in response.status - all_chatbots = response.get_json() - assert len(all_chatbots) == 1 - actual_chatbot = all_chatbots[0] - - expected_chatbot = { - "name": "test_get_all_chatbots", - "model_name": "test_model", - "agent": actual_chatbot["agent"], - "database_id": test_db["id"], - "database": "example_db", - "last_error": None, - "is_running": True, - "params": {"param1": "value1"}, - "created_at": actual_chatbot["created_at"], - "id": actual_chatbot["id"], - "project": "mindsdb", - "webhook_token": None, - } - assert actual_chatbot == expected_chatbot - - -def test_get_all_chatbots_project_not_found(client): - response = client.get("/api/projects/glorp/chatbots", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_get_chatbot(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_get_chatbot", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - - response = client.get("/api/projects/mindsdb/chatbots/test_get_chatbot", follow_redirects=True) - assert response.status_code == HTTPStatus.OK - actual_chatbot = response.get_json() - - expected_chatbot = { - "name": "test_get_chatbot", - "model_name": "test_model", - "agent": actual_chatbot["agent"], - "database_id": test_db["id"], - "database": "example_db", - "last_error": None, - "is_running": True, - "params": {"param1": "value1"}, - "created_at": actual_chatbot["created_at"], - "id": actual_chatbot["id"], - "project": "mindsdb", - "webhook_token": None, - } - assert actual_chatbot == expected_chatbot - - -def test_get_chatbot_not_found(client): - response = client.get("/api/projects/mindsdb/chatbots/test_get_chatbot_not_found", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_get_chatbot_project_not_found(client): - response = client.get("/api/projects/zoop/chatbots/test_get_chatbot", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_post_chatbot(client, test_db): - chatbot_data = { - "chatbot": { - "name": "TEST_post_chatbot", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - created_chatbot = response.get_json() - - expected_chatbot = { - "name": "TEST_post_chatbot", - "model_name": "test_model", - "agent_id": created_chatbot["agent_id"], - "database_id": test_db["id"], - "params": {"param1": "value1"}, - "created_at": created_chatbot["created_at"], - "id": created_chatbot["id"], - "project_id": created_chatbot["project_id"], - "webhook_token": None, - } - assert created_chatbot == expected_chatbot - - -def test_post_chatbot_no_chatbot_fails(client, test_db): - chatbot_data = { - "name": "test_post_chatbot_no_chatbot_fails", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - response = client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_post_chatbot_no_name_fails(client, test_db): - chatbot_data = { - "chatbot": { - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_post_chatbot_no_model_name_fails(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_post_chatbot_no_model_name_fails", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_post_chatbot_no_database_id_fails(client): - chatbot_data = { - "chatbot": { - "name": "test_post_chatbot_no_database_id_fails", - "model_name": "test_model", - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_post_chatbot_model_does_not_exist_fails(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_post_chatbot_model_does_not_exist_fails", - "model_name": "nonexistent_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_post_chatbot_project_does_not_exist_fails(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_post_chatbot_project_does_not_exist_fails", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.post("/api/projects/bloop/chatbots", json=chatbot_data, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_put_chatbot_create(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_put_chatbot_create", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_create", json=chatbot_data, follow_redirects=True - ) - assert response.status_code == HTTPStatus.CREATED - created_chatbot = response.get_json() - - expected_chatbot = { - "name": "test_put_chatbot_create", - "model_name": "test_model", - "agent_id": created_chatbot["agent_id"], - "database_id": test_db["id"], - "params": {"param1": "value1"}, - "created_at": created_chatbot["created_at"], - "id": created_chatbot["id"], - "project_id": created_chatbot["project_id"], - "webhook_token": None, - } - assert created_chatbot == expected_chatbot - - -def test_put_chatbot_update(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_put_chatbot_update", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_update", json=chatbot_data, follow_redirects=True - ) - assert response.status_code == HTTPStatus.CREATED - - updated_chatbot_data = {"chatbot": {"params": {"new_param": "new_value"}}} - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_update", json=updated_chatbot_data, follow_redirects=True - ) - assert response.status_code == HTTPStatus.OK - updated_chatbot = response.get_json() - - expected_chatbot = { - "name": "test_put_chatbot_update", - "model_name": "test_model", - "agent_id": updated_chatbot["agent_id"], - "database_id": test_db["id"], - "params": {"param1": "value1", "new_param": "new_value"}, - "created_at": updated_chatbot["created_at"], - "id": updated_chatbot["id"], - "project_id": updated_chatbot["project_id"], - "webhook_token": None, - } - assert updated_chatbot == expected_chatbot - - -def test_put_chatbot_no_chatbot_fails(client, test_db): - chatbot_data = { - "name": "test_put_chatbot_no_chatbot_fails", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_update", json=chatbot_data, follow_redirects=True - ) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_put_chatbot_project_not_found(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_put_chatbot_project_not_found", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.put( - "/api/projects/flumpus/chatbots/test_put_chatbot_project_not_found", json=chatbot_data, follow_redirects=True - ) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_put_chatbot_model_not_found(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_put_chatbot_model_not_found", - "model_name": "fake_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_model_not_found", json=chatbot_data, follow_redirects=True - ) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_put_chatbot_create_no_name_fails(client, test_db): - chatbot_data = { - "chatbot": { - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_create_no_name_fails", json=chatbot_data, follow_redirects=True - ) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_put_chatbot_create_no_model_fails(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_put_chatbot_create_no_model_fails", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_create_no_model_fails", - json=chatbot_data, - follow_redirects=True, - ) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_put_chatbot_create_no_database_id_fails(client): - chatbot_data = { - "chatbot": { - "name": "test_put_chatbot_create_no_database_id_fails", - "model_name": "test_model", - "is_running": True, - "params": {"param1": "value1"}, - } - } - response = client.put( - "/api/projects/mindsdb/chatbots/test_put_chatbot_create_no_database_id_fails", - json=chatbot_data, - follow_redirects=True, - ) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_delete_chatbot(client, test_db): - chatbot_data = { - "chatbot": { - "name": "test_delete_chatbot", - "model_name": "test_model", - "database_id": test_db["id"], - "is_running": True, - "params": {"param1": "value1"}, - } - } - client.post("/api/projects/mindsdb/chatbots", json=chatbot_data, follow_redirects=True) - - response = client.delete("/api/projects/mindsdb/chatbots/test_delete_chatbot", follow_redirects=True) - assert response.status_code == HTTPStatus.NO_CONTENT - - -def test_delete_chatbot_not_found(client): - response = client.delete("/api/projects/mindsdb/chatbots/test_delete_chatbot_not_found", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_chatbot_project_not_found(client): - response = client.delete("/api/projects/krombopulos/chatbots/test_post_chatbot", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/unit/api/http/config_test.py b/tests/unit/api/http/config_test.py deleted file mode 100644 index 672d7d31cd0..00000000000 --- a/tests/unit/api/http/config_test.py +++ /dev/null @@ -1,9 +0,0 @@ -def test_get_config_returns_knowledge_bases_storage(client): - response = client.get("/api/config/") - - assert response.status_code == 200 - payload = response.get_json() - assert "knowledge_bases" in payload - assert "storage" in payload["knowledge_bases"] - assert "available_vector_engines" in payload["knowledge_bases"] - assert "pgvector_enabled" in payload["knowledge_bases"] diff --git a/tests/unit/api/http/conftest.py b/tests/unit/api/http/conftest.py deleted file mode 100644 index e8b09d33e6f..00000000000 --- a/tests/unit/api/http/conftest.py +++ /dev/null @@ -1,122 +0,0 @@ -import os -import sys -from http import HTTPStatus -from pathlib import Path -from tempfile import TemporaryDirectory - -import pytest -from flask.testing import FlaskClient -from flask.app import Flask - -from mindsdb.api.http.initialize import initialize_app -from mindsdb.migrations import migrate -from mindsdb.interfaces.storage import db -from mindsdb.utilities.config import config -from mindsdb.integrations.libs.process_cache import process_cache - - -@pytest.fixture(scope="module", autouse=True) -def app(): - old_minds_db_con = "" - if "MINDSDB_DB_CON" in os.environ: - old_minds_db_con = os.environ["MINDSDB_DB_CON"] - - try: - with TemporaryDirectory(prefix="test_tmp_") as temp_dir: - os.environ["MINDSDB_STORAGE_DIR"] = temp_dir - db_path = "sqlite:///" + os.path.join(temp_dir, "mindsdb.sqlite3.db") - # Need to change env variable for migrate module, since it calls db.init(). - os.environ["MINDSDB_DB_CON"] = db_path - config.prepare_env_config() - config.merge_configs() - config["gui"]["open_on_start"] = False - config["gui"]["autoupdate"] = False - db.init() - migrate.migrate_to_head() - app = initialize_app() - app._mindsdb_temp_dir = temp_dir - yield app - except PermissionError: - print("PermissionError when deleting temporary directory. MindsDBs temp dir is going to be left as is.") - - process_cache.shutdown() - os.environ["MINDSDB_DB_CON"] = old_minds_db_con - - -@pytest.fixture(scope="module") -def client(app: Flask) -> FlaskClient: - return app.test_client() - - -def create_dummy_db(client: FlaskClient, db_name: str): - temp_dir = client.application._mindsdb_temp_dir - dummy_data_db_path = os.path.join(temp_dir, "_dummy_data_db") - response = client.post( - "/api/sql/query", - json={ - "query": f''' - create database {db_name} - with ENGINE = "dummy_data" - PARAMETERS = {{"db_path": "{dummy_data_db_path}"}}''' - }, - ) - assert response.status_code == HTTPStatus.OK - assert response.json["type"] == "ok" - - -def create_demo_db(client: FlaskClient): - example_db_data = { - "database": { - "name": "example_db", - "engine": "postgres", - "parameters": { - "user": "demo_user", - "password": "demo_password", - "host": "samples.mindsdb.com", - "port": "5432", - "database": "demo", - "schema": "demo_data", - }, - } - } - response = client.post("/api/databases", json=example_db_data, follow_redirects=True) - assert "201" in response.status - - -def create_dummy_ml(client: FlaskClient): - from mindsdb.interfaces.database.integrations import integration_controller - - test_handler_path = Path(__file__).parents[2] - sys.path.append(str(test_handler_path)) - - handler_dir = Path(test_handler_path) / "dummy_ml_handler" - - handler_meta = { - "import": { - "success": None, - "error_message": None, - "folder": handler_dir.name, - "dependencies": [], - }, - "path": handler_dir, - "name": "dummy_ml", - "permanent": False, - } - integration_controller.handlers_import_status["dummy_ml"] = handler_meta - integration_controller.import_handler("dummy_ml", "") - - if not integration_controller.get_handler_meta("dummy_ml")["import"]["success"]: - error = integration_controller.handlers_import_status["dummy_ml"]["import"]["error_message"] - raise Exception(f"Can not import: {str(handler_dir)}: {error}") - - response = client.post( - "/api/sql/query", - json={ - "query": """ - create ml_engine dummy_ml - from dummy_ml - """ - }, - ) - assert response.status_code == HTTPStatus.OK - assert response.json["type"] == "ok" diff --git a/tests/unit/api/http/databases_test.py b/tests/unit/api/http/databases_test.py deleted file mode 100644 index 3a6b3a89e4d..00000000000 --- a/tests/unit/api/http/databases_test.py +++ /dev/null @@ -1,182 +0,0 @@ -from http import HTTPStatus - - -def test_get_databases(client): - response = client.get("/api/databases", follow_redirects=True) - all_databases = response.get_json() - # Should contain default project, log and information schema. - assert len(all_databases) == 3 - assert any(db["name"] == "information_schema" for db in all_databases) - assert any(db["name"] == "mindsdb" for db in all_databases) - assert any(db["name"] == "log" for db in all_databases) - - -def test_get_database(client): - # Get default mindsdb project. - response = client.get("/api/databases/mindsdb", follow_redirects=True) - mindsdb_database = response.get_json() - expected_db = {"name": "mindsdb", "engine": None, "type": "project", "id": mindsdb_database["id"]} - - assert mindsdb_database == expected_db - - response = client.get("/api/databases/MindsDB", follow_redirects=True) - mindsdb_database = response.get_json() - mindsdb_database["name"] = mindsdb_database["name"].lower() - assert mindsdb_database == expected_db - - # Get a newly created integration. - integration_data = { - "database": { - "name": "TEST_get_database", - "engine": "postgres", - "parameters": {"user": "ricky_sanchez", "password": "florpglorp"}, - } - } - response = client.post("/api/databases", json=integration_data, follow_redirects=True) - response = client.get("/api/databases/TEST_get_database", follow_redirects=True) - - integration_db = response.get_json() - expected_db = { - "name": "TEST_get_database", - "type": "data", - "engine": "postgres", - "connection_data": {"user": "ricky_sanchez", "password": "florpglorp"}, - "class_type": "sql", - "permanent": False, - "id": integration_db["id"], - "date_last_update": integration_db["date_last_update"], - } - - assert integration_db == expected_db - - -def test_create_database(client): - mindsdb_data = {"database": {"name": "test_postgres", "engine": "postgres", "parameters": {}}} - response = client.post("/api/databases", json=mindsdb_data, follow_redirects=True) - # Make sure we use the CREATED HTTP status code. - assert response.status_code == HTTPStatus.CREATED - new_db = response.get_json() - - expected_db = {"name": "test_postgres", "engine": "postgres", "type": "data", "id": new_db["id"]} - assert new_db == expected_db - - -def test_create_database_already_exists_abort(client): - mindsdb_data = {"database": {"name": "test_duplicate", "engine": "postgres", "parameters": {}}} - response = client.post("/api/databases", json=mindsdb_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - create_duplicate_response = client.post("/api/databases", json=mindsdb_data, follow_redirects=True) - # Make sure we use CONFLICT status code. - assert create_duplicate_response.status_code == HTTPStatus.CONFLICT - - -def test_create_database_no_database_aborts(client): - mindsdb_data = {"name": "test_postgres", "engine": "postgres", "parameters": {}} - response = client.post("/api/databases", json=mindsdb_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_database_no_name_aborts(client): - mindsdb_data = {"database": {"engine": "postgres", "parameters": {}}} - response = client.post("/api/databases", json=mindsdb_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_database_no_engine_aborts(client): - mindsdb_data = {"database": {"name": "test_postgres", "parameters": {}}} - response = client.post("/api/databases", json=mindsdb_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_update_database_creates_database(client): - database_data = {"database": {"name": "test_update_creates", "engine": "postgres", "parameters": {}}} - response = client.put("/api/databases/test_update_creates", json=database_data, follow_redirects=True) - # Make sure we use the CREATED HTTP status code. - assert response.status_code == HTTPStatus.CREATED - new_db = response.get_json() - - expected_db = {"name": "test_update_creates", "engine": "postgres", "type": "data", "id": new_db["id"]} - assert new_db == expected_db - - -def test_update_database(client): - database_data = {"database": {"name": "test_update", "engine": "postgres", "parameters": {}}} - - updated_data = {"database": {"parameters": {"user": "bearO", "password": "destroydestroydestroy"}}} - client.post("/api/databases", json=database_data, follow_redirects=True) - response = client.put("/api/databases/test_update", json=updated_data, follow_redirects=True) - - assert response.status_code == HTTPStatus.OK - - updated_db = response.get_json() - expected_db = { - "name": "test_update", - "engine": "postgres", - "type": "data", - "connection_data": {"user": "bearO", "password": "destroydestroydestroy"}, - "class_type": "sql", - "permanent": False, - "id": updated_db["id"], - "date_last_update": updated_db["date_last_update"], - } - - assert updated_db == expected_db - - -def test_update_database_no_database_aborts(client): - mindsdb_data = {"name": "test_postgres", "engine": "postgres", "parameters": {}} - response = client.put("/api/databases/test_postgres", json=mindsdb_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_delete_database(client): - mindsdb_data = {"database": {"name": "test_delete", "engine": "postgres", "parameters": {}}} - # Delete newly created DB. - client.post("/api/databases", json=mindsdb_data, follow_redirects=True) - response = client.get("/api/databases/test_delete", follow_redirects=True) - - assert response.status_code == HTTPStatus.OK - - response = client.delete("/api/databases/test_delete", follow_redirects=True) - - # Make sure we return NO_CONTENT status since we don't return the deleted DB. - assert response.status_code == HTTPStatus.NO_CONTENT - - response = client.get("/api/databases/test_delete", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_database_does_not_exist(client): - response = client.delete("/api/databases/batadase", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_system_database(client): - response = client.delete("/api/databases/information_schema", follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_update_database_check_connection_with_invalid_parameters(client): - # Test that PUT with check_connection=true and invalid parameters returns an error - database_data = { - "database": { - "name": "test_invalid_connection", - "engine": "postgres", - "parameters": { - "host": "invalid_host_that_does_not_exist", - "port": 5432, - "user": "invalid_user", - "password": "invalid_password", - "database": "invalid_db", - }, - }, - "check_connection": True, - } - - response = client.put("/api/databases/test_invalid_connection", json=database_data, follow_redirects=True) - # Should return BAD_REQUEST due to connection failure - assert response.status_code == HTTPStatus.BAD_REQUEST - - response_data = response.get_json() - # Verify error message indicates connection error - assert response_data["title"] == "Connection error" diff --git a/tests/unit/api/http/files_test.py b/tests/unit/api/http/files_test.py deleted file mode 100644 index c2eecce5e81..00000000000 --- a/tests/unit/api/http/files_test.py +++ /dev/null @@ -1,234 +0,0 @@ -import io -import os.path -import os -from http import HTTPStatus - - -def test_get_files_list(client): - """Test getting list of all files""" - response = client.get("/api/files/", follow_redirects=True) - assert response.status_code == HTTPStatus.OK - files_list = response.get_json() - assert isinstance(files_list, list) - - -def test_put_file(client): - """Test uploading a file""" - file = io.BytesIO(b"Hello, World!") - - data = {"file": (file, "test.txt")} - response = client.put( - "/api/files/test", - data=data, - content_type="multipart/form-data", - follow_redirects=True, - ) - assert response.status_code == HTTPStatus.OK - - -def test_path_traversal(client): - """Test uploading a file""" - file = io.BytesIO(b"Hello, World!") - path = "../../../../../../../../../../tmp/test_test.txt" - data = {"file": (file, path)} - response = client.put( - "/api/files/my_file", - data=data, - content_type="multipart/form-data", - follow_redirects=True, - ) - assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - assert not os.path.exists(path) - - -def test_delete_file(client): - """Test deleting a file""" - response = client.delete("/api/files/test", follow_redirects=True) - assert response.status_code == HTTPStatus.OK - - -def test_delete_nonexistent_file(client): - """Test deleting a nonexistent file""" - response = client.delete("/api/files/nonexistent.txt", follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - data = response.get_json() - assert "Error deleting file" in data["title"] - assert "There was an error while trying to delete file with name 'nonexistent.txt'" in data["detail"] - - -def test_put_file_invalid_url(client): - """Test uploading with an invalid URL""" - data = {"source_type": "url", "source": "not_a_url"} - response = client.put( - "/api/files/bad.txt", - json=data, - content_type="application/json", - follow_redirects=True, - ) - assert response.status_code == 400 - data = response.get_json() - assert "Invalid URL" in data["title"] - - -def test_put_file_url_with_file_param(client, monkeypatch): - """Test uploading from URL with "file" parameter, which forbidden if source_type!=file""" - monkeypatch.setattr( - "mindsdb.api.http.namespaces.file.config", - {"url_file_upload": {"enabled": True}}, - ) - data = { - "source_type": "url", - "source": "http://example.com/file.txt", - "file": "../path/test.txt", # forbidden param if source_type!=file - } - response = client.put( - "/api/files/remote.txt", - json=data, - content_type="application/json", - follow_redirects=True, - ) - assert response.status_code == 400 - data = response.get_json() - assert "Invalid request parameters" == data["title"] - - -def test_put_file_no_source_type_with_file_param(client, monkeypatch): - """Test uploading from without source_type, but with "file" param""" - monkeypatch.setattr( - "mindsdb.api.http.namespaces.file.config", - {"url_file_upload": {"enabled": True}}, - ) - data = { - "file": "../path/test.txt" # forbidden param if source_type!=file - } - response = client.put( - "/api/files/remote.txt", - json=data, - content_type="application/json", - follow_redirects=True, - ) - assert response.status_code == 400 - data = response.get_json() - assert "Invalid request parameters" == data["title"] - - -def test_put_file_url_upload_disabled(client, monkeypatch): - """Test uploading from URL when URL upload is disabled""" - # Patch config to disable URL upload - monkeypatch.setattr( - "mindsdb.api.http.namespaces.file.config", - {"url_file_upload": {"enabled": False}}, - ) - data = { - "source_type": "url", - "source": "http://example.com/file.txt", - } - response = client.put( - "/api/files/remote.txt", - json=data, - content_type="application/json", - follow_redirects=True, - ) - assert response.status_code == 400 - data = response.get_json() - assert "URL file upload is disabled" in data["detail"] - - -def test_extension_in_filename(client): - """Test uploading a file with an extension in the name""" - file = io.BytesIO(b"Hello, World!") - - data = {"file": (file, "test.txt")} - response = client.put( - "/api/files/test.txt", - data=data, - content_type="multipart/form-data", - follow_redirects=True, - ) - assert response.status_code == 400 - data = response.get_json() - assert "File name cannot contain extension." in data["detail"] - - -def test_archive_file_with_extension_upload(client): - """Test uploading a zip archive file with an extension in the name""" - import zipfile - import io - - # Create a zip file in memory - zip_buffer = io.BytesIO() - with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zf: - zf.writestr("file1.txt", "This is the content of file 1.") - zip_buffer.seek(0) - - data = {"file": (zip_buffer, "archive.zip")} - response = client.put( - "/api/files/archive", - data=data, - content_type="multipart/form-data", - follow_redirects=True, - ) - assert response.status_code == 400 - data = response.get_json() - assert "File name cannot contain extension." in data["detail"] - - -def test_zipfile_traversal(client): - """Test uploading a zip archive with path traversal filenames""" - import zipfile - import io - - # Create a zip file in memory with a symlink - zip_buffer = io.BytesIO() - with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zf: - zf.writestr("../../../../etc/passwd", "malicious content") - zip_buffer.seek(0) - data = {"file": (zip_buffer, "archive.zip")} - response = client.put( - "/api/files/archive", - data=data, - content_type="multipart/form-data", - follow_redirects=True, - ) - # Should fail due to path validation (ValueError is raised) - assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - data = response.get_json() - assert "Attempted Path Traversal in Zip File" in data["detail"] - - -def test_put_file_with_invalid_parameters_multipart(client): - """Test uploading a file with invalid/unexpected parameters via multipart form data""" - file = io.BytesIO(b"Hello, World!") - - data = { - "file": (file, "test.txt"), - "source_type": "file", - "unexpected_param": "unexpected_value", # Invalid parameter - } - response = client.put( - "/api/files/testfile", - data=data, - content_type="multipart/form-data", - follow_redirects=True, - ) - assert response.status_code == 400 - data = response.get_json() - assert "Invalid request parameters" in data["title"] - - -def test_put_file_with_invalid_parameters_json(client): - """Test uploading a file with invalid/unexpected parameters via JSON""" - data = { - "source_type": "url", - "source": "http://example.com/file.txt", - "invalid_field": "some_value", # Invalid parameter - } - response = client.put( - "/api/files/testfile", - json=data, - content_type="application/json", - follow_redirects=True, - ) - assert response.status_code == 400 - response_data = response.get_json() - assert "Invalid request parameters" in response_data["title"] diff --git a/tests/unit/api/http/handlers_test.py b/tests/unit/api/http/handlers_test.py deleted file mode 100644 index 0b586bcd7e4..00000000000 --- a/tests/unit/api/http/handlers_test.py +++ /dev/null @@ -1,153 +0,0 @@ -import tempfile -from http import HTTPStatus -from pathlib import Path -from unittest.mock import patch - - -def test_icon_builtin_handler(client): - """ - A built-in handler with a registered icon and a valid local path must - return the icon file (HTTP 200). - """ - with tempfile.TemporaryDirectory() as tmp: - icon_file = Path(tmp) / "icon.svg" - icon_file.write_text("") - - meta = { - "path": Path(tmp), - "icon": {"name": "icon.svg", "type": "svg", "data": ""}, - "import": {"success": True, "error_message": None}, - } - - with patch.object( - client.application.integration_controller, - "get_handlers_metadata", - return_value={"mysql": meta}, - ): - response = client.get("/api/handlers/mysql/icon", follow_redirects=True) - - status_code = response.status_code - response.close() - - assert status_code == HTTPStatus.OK - - -def test_icon_community_stub_no_path(client): - """ - An unfetched community handler stub (path=None, no 'icon' key) must - return HTTP 404 cleanly — no exception should propagate. - """ - meta = { - "path": None, - "import": { - "success": None, - "error_message": None, - "folder": "github_handler", - }, - "name": "github", - "support_level": "community", - } - - with patch.object( - client.application.integration_controller, - "get_handlers_metadata", - return_value={"github": meta}, - ): - response = client.get("/api/handlers/github/icon", follow_redirects=True) - - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_icon_unknown_handler(client): - """ - A request for an icon of an unknown handler must return HTTP 404. - """ - with patch.object( - client.application.integration_controller, - "get_handlers_metadata", - return_value={}, - ): - response = client.get("/api/handlers/does_not_exist/icon", follow_redirects=True) - - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_handler_info_returns_404_when_not_found(client): - """ - GET /handlers/ must return HTTP 404 when get_handler_meta() returns - None (unknown handler or failed fetch) instead of crashing with TypeError. - """ - with patch.object( - client.application.integration_controller, - "get_handler_meta", - return_value=None, - ): - response = client.get("/api/handlers/nonexistent", follow_redirects=True) - - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_handler_info_returns_200_without_icon_key(client): - """ - GET /handlers/ must not raise KeyError when the handler metadata has - no 'icon' key (community stub or handler without an icon). - """ - meta = { - "path": None, - "import": {"success": None, "error_message": None, "folder": "github_handler"}, - "name": "github", - "title": "GitHub", - "description": "GitHub handler", - "permanent": False, - "connection_args": None, - "class_type": None, - "type": "data", - "support_level": "community", - } - - with patch.object( - client.application.integration_controller, - "get_handler_meta", - return_value=meta, - ): - response = client.get("/api/handlers/github", follow_redirects=True) - - assert response.status_code == HTTPStatus.OK - body = response.get_json() - assert body["name"] == "github" - assert "path" not in body - assert "icon" not in body - - -def test_handlers_list_skips_none_meta(client): - """ - The listing endpoint must not crash when get_handlers_import_status() - returns None for a handler (e.g. an unfetched community handler that - failed to load). The None entry is silently skipped and the remaining - handlers are returned normally. - """ - mysql_meta = { - "path": None, - "import": {"success": True, "error_message": None, "folder": "mysql_handler"}, - "name": "mysql", - "type": "data", - "title": "MySQL", - "description": "MySQL handler", - "permanent": False, - "connection_args": None, - "class_type": "sql", - "support_level": "community", - "icon": None, - } - - with patch.object( - client.application.integration_controller, - "get_handlers_import_status", - return_value={"broken_community": None, "mysql": mysql_meta}, - ): - response = client.get("/api/handlers/", follow_redirects=True) - - assert response.status_code == HTTPStatus.OK - names = [h["name"] for h in response.get_json()] - assert "mysql" in names - assert "broken_community" not in names diff --git a/tests/unit/api/http/jobs_test.py b/tests/unit/api/http/jobs_test.py deleted file mode 100644 index d16a7b4b1ca..00000000000 --- a/tests/unit/api/http/jobs_test.py +++ /dev/null @@ -1,64 +0,0 @@ -from http import HTTPStatus - -import datetime as dt - - -def test_jobs_flow(client): - # --- create --- - - date_format = "%Y-%m-%d %H:%M:%S" - start_at = (dt.datetime.now() + dt.timedelta(days=1)).strftime(date_format) - end_at = (dt.datetime.now() + dt.timedelta(days=2)).strftime(date_format) - job = { - "name": "TEST_job", - "query": "select 1", - "if_query": "select 2", - "start_at": start_at, - "end_at": end_at, - "schedule_str": "every hour", - } - request = {"job": job} - - response = client.post("/api/projects/mindsdb/jobs", json=request) - assert response.status_code == HTTPStatus.OK - created_job = response.json - - for field in ["name", "query", "if_query", "schedule_str"]: - assert created_job[field] == job[field] - - # dates, created date could have milliseconds, compare as substring - assert job["start_at"] in created_job["start_at"] - assert job["end_at"] in created_job["end_at"] - - # --- get created --- - - response = client.get("/api/projects/mindsdb/jobs/TEST_job") - assert response.status_code == HTTPStatus.OK - job_resp = response.json - assert job_resp["query"] == job["query"] - - # --- get history --- - - response = client.get("/api/projects/mindsdb/jobs/TEST_job/history") - assert response.status_code == HTTPStatus.OK - # no executions - assert len(response.get_json()) == 0 - - # --- get list --- - - response = client.get("/api/projects/mindsdb/jobs") - assert response.status_code == HTTPStatus.OK - assert len(response.get_json()) == 1 - - # check first job - job_resp = response.json[0] - assert job_resp["name"] == "TEST_job" - - # --- delete job --- - - response = client.delete("/api/projects/mindsdb/jobs/TEST_job") - assert response.status_code == HTTPStatus.NO_CONTENT - - # got deleted - response = client.get("/api/projects/mindsdb/jobs/TEST_job") - assert response.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/unit/api/http/knowledge_bases_test.py b/tests/unit/api/http/knowledge_bases_test.py deleted file mode 100644 index 4ccfccfe7a7..00000000000 --- a/tests/unit/api/http/knowledge_bases_test.py +++ /dev/null @@ -1,58 +0,0 @@ -from http import HTTPStatus - -from unittest.mock import patch - - -@patch("mindsdb.integrations.handlers.duckdb_faiss_handler.duckdb_faiss_handler.DuckDBFaissHandler") -@patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") -def test_update_kb_embeddings(mock_embedding, handler, client): - # for test of embeddings - mock_embedding().embeddings.return_value = [{"embedding": [0.1, 0.2]}] - - integration_data = { - "database": { - "name": "kb_vector_db", - "engine": "duckdb_faiss", - "parameters": {}, - } - } - response = client.post("/api/databases", json=integration_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - create_response = client.post( - "/api/projects/mindsdb/knowledge_bases", - follow_redirects=True, - json={ - "knowledge_base": { - "name": "test_kb", - "storage": {"database": "kb_vector_db", "table": "default_collection"}, - "params": { - "embedding_model": { - "provider": "gemini", - "model_name": "dummy_model", - "api_key": "embed-key-1", - } - }, - } - }, - ) - assert create_response.status_code == HTTPStatus.CREATED - - mock_embedding.reset_mock() - update_response = client.put( - "/api/projects/mindsdb/knowledge_bases/test_kb", - json={ - "knowledge_base": { - "params": { - "embedding_model": { - "api_key": "embed-key-2", - } - } - } - }, - follow_redirects=True, - ) - - assert update_response.status_code == HTTPStatus.OK - kwargs = mock_embedding.call_args_list[0][0][0] - assert kwargs["api_key"] == "embed-key-2" diff --git a/tests/unit/api/http/models_test.py b/tests/unit/api/http/models_test.py deleted file mode 100644 index 01ccc2c3531..00000000000 --- a/tests/unit/api/http/models_test.py +++ /dev/null @@ -1,173 +0,0 @@ -from http import HTTPStatus -import pytest -import sys - -from tests.unit.api.http.conftest import create_demo_db, create_dummy_ml - - -TEST_DB_NAME = "dummy_db" - - -def test_prepare(client): - create_demo_db(client) - create_dummy_ml(client) - - -def test_train_model(client): - # Learning Hub home rentals model. - create_query = """ - CREATE MODEL mindsdb.home_rentals_model - FROM example_db (SELECT * FROM demo_data.home_rentals limit 10) - PREDICT rental_price - USING engine = 'dummy_ml', join_learn_process = true - """ - train_data = {"query": create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - created_model = response.get_json() - - expected_model = { - "accuracy": created_model["accuracy"], - "active": True, - "error": None, - "fetch_data_query": "SELECT * FROM demo_data.home_rentals limit 10", - "mindsdb_version": created_model["mindsdb_version"], - "name": "home_rentals_model", - "predict": "rental_price", - "status": "complete", - "version": 1, - } - for key, value in expected_model.items(): - assert created_model[key] == value - assert "'target': 'rental_price'" in created_model["problem_definition"] - - -def test_train_model_no_query_aborts(client): - response = client.post("/api/projects/mindsdb/models", json={}, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_train_model_no_project_aborts(client): - response = client.post("/api/projects/nani/models", json={"query": ""}, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_train_model_invalid_query_aborts(client): - invalid_create_query = """ - CREATE MAWDOL mindsdb.home_rentals_model - FRUM example_db (SELECT * FROM demo_data.home_rentals] - PRAYDICT rental_price - """ - train_data = {"query": invalid_create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_train_model_no_create_query_aborts(client): - invalid_create_query = """ - SELECT * FROM models - """ - train_data = {"query": invalid_create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -@pytest.mark.skipif(sys.platform == "win32", reason="Fixme: Fails on Windoows.") -def test_train_model_already_exists_aborts(client): - # Learning Hub home rentals model. - create_query = """ - CREATE MODEL mindsdb.home_rentals_model_duplicate - FROM example_db (SELECT * FROM demo_data.home_rentals limit 10) - PREDICT rental_price - USING engine = 'dummy_ml', join_learn_process = true - """ - train_data = {"query": create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CONFLICT - - -def test_train_model_no_ml_handler_aborts(client): - # Learning Hub home rentals model. - create_query = """ - CREATE MODEL mindsdb.home_rentals_model_no_handler - FROM example_db (SELECT * FROM demo_data.home_rentals) - PREDICT rental_price - USING engine = 'vroomvroom' - """ - train_data = {"query": create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_get_model_by_version(client): - response = client.get("/api/projects/mindsdb/models/home_rentals_model.1", follow_redirects=True) - model_ver_1 = response.get_json() - response = client.get("/api/projects/mindsdb/models/home_rentals_model", follow_redirects=True) - model_active_ver = response.get_json() - - expected_model = model_ver_1.copy() - expected_model["active"] = True - expected_model["error"] = None - expected_model["fetch_data_query"] = "SELECT * FROM demo_data.home_rentals limit 10" - expected_model["name"] = "home_rentals_model" - expected_model["predict"] = "rental_price" - expected_model["version"] = 1 - - assert model_ver_1 == expected_model - assert model_active_ver == expected_model - - -def test_get_model_no_project_aborts(client): - response = client.get("/api/projects/mawp/models/home_rentals_model", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_predict_model_no_project_aborts(client): - response = client.post("/api/projects/mawp/models/home_rentals_model/predict", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_predict_model_no_model_aborts(client): - response = client.post("/api/projects/mindsdb/models/plumbus/predict", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_describe_model_no_project_aborts(client): - response = client.get("/api/projects/mawp/models/home_rentals_model/describe", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_describe_model_no_model_aborts(client): - response = client.get("/api/projects/mindsdb/models/plumbus/describe", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_model(client): - # Learning Hub home rentals model. - create_query = """ - CREATE MODEL mindsdb.home_rentals_model_delete - FROM example_db (SELECT * FROM demo_data.home_rentals limit 10) - PREDICT rental_price - USING engine = 'dummy_ml', join_learn_process = true - """ - train_data = {"query": create_query} - response = client.post("/api/projects/mindsdb/models", json=train_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - response = client.delete("/api/projects/mindsdb/models/home_rentals_model_delete", follow_redirects=True) - assert response.status_code == HTTPStatus.NO_CONTENT - - response = client.get("/api/projects/mindsdb/models/home_rentals_model_delete", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_model_no_project_aborts(client): - response = client.delete("/api/projects/mawp/models/home_rentals_model", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_model_no_model_aborts(client): - response = client.delete("/api/projects/mindsdb/models/meeseeks", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/unit/api/http/projects_test.py b/tests/unit/api/http/projects_test.py deleted file mode 100644 index a65c278b51c..00000000000 --- a/tests/unit/api/http/projects_test.py +++ /dev/null @@ -1,20 +0,0 @@ -from http import HTTPStatus - - -def test_get_projects(client): - response = client.get('/api/projects', follow_redirects=True) - all_projects = response.get_json() - # Should contain default project. - assert len(all_projects) == 1 - assert all_projects[0]['name'] == 'mindsdb' - - -def test_get_project(client): - response = client.get('/api/projects/mindsdb', follow_redirects=True) - default_project = response.get_json() - assert default_project['name'] == 'mindsdb' - - -def test_get_project_not_found(client): - response = client.get('/api/projects/zoop', follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/unit/api/http/tables_test.py b/tests/unit/api/http/tables_test.py deleted file mode 100644 index 27e90cb102e..00000000000 --- a/tests/unit/api/http/tables_test.py +++ /dev/null @@ -1,194 +0,0 @@ -from http import HTTPStatus - -from tests.unit.api.http.conftest import create_demo_db, create_dummy_db - - -TEST_DB_NAME = 'dummy_db' - - -def test_prepare(client): - create_demo_db(client) - create_dummy_db(client, TEST_DB_NAME) - - -def test_get_tables(client): - # Get default mindsdb tables. - response = client.get('/api/databases/mindsdb/tables', follow_redirects=True) - all_tables = response.get_json() - assert any(db['name'] == 'models' for db in all_tables) - - -def test_get_table(client): - # Get default mindsdb models. - response = client.get('/api/databases/mindsdb/tables/models', follow_redirects=True) - table = response.get_json() - expected_table = { - 'name': 'models', - 'type': 'data' - } - assert table == expected_table - - -def test_get_table_not_found(client): - # Get default mindsdb models. - response = client.get('/api/databases/mindsdb/tables/bloop', follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_create_table(client): - - table_data = { - 'table': { - 'name': 'test_create_table_house_sales', - 'select': 'SELECT * FROM example_db.house_sales limit 10', - 'replace': True - } - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - # Make sure we use the CREATED HTTP status code. - assert response.status_code == HTTPStatus.CREATED - new_table = response.get_json() - - expected_table = { - 'name': 'test_create_table_house_sales', - 'type': 'data', - } - assert new_table == expected_table - - # Clean up. - client.delete(f'/api/databases/{TEST_DB_NAME}/tables/test_create_table_house_sales', follow_redirects=True) - - -def test_create_table_no_table_aborts(client): - table_data = { - 'name': 'test_create_table_house_sales', - 'select': 'SELECT * FROM example_db.house_sales limit 10', - 'replace': True - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_table_no_name_aborts(client): - table_data = { - 'table': { - 'select': 'SELECT * FROM example_db.house_sales limit 10', - 'replace': True - } - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_table_no_select_aborts(client): - table_data = { - 'table': { - 'name': 'test_create_table_house_sales', - 'replace': True - } - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_table_in_project_aborts(client): - table_data = { - 'table': { - 'name': 'test_create_table_house_sales', - 'select': 'SELECT * FROM example_db.house_sales limit 10', - 'replace': True - } - } - response = client.post('/api/databases/mindsdb/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_table_already_exists_aborts(client): - table_data = { - 'table': { - 'name': 'test_create_table_already_exists_aborts', - 'select': 'SELECT * FROM example_db.house_sales limit 10', - 'replace': False - } - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CONFLICT - - # Replace table should work fine if it already exists. - table_data['table']['replace'] = True - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - # Clean up. - client.delete(f'/api/databases/{TEST_DB_NAME}/tables/test_create_table_already_exists_aborts', follow_redirects=True) - - -def test_create_table_invalid_select_aborts(client): - table_data = { - 'table': { - 'name': 'test_create_table_house_sales', - 'select': 'SELECT AN ERROR AWWW YEAH', - 'replace': True - } - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def create_table_database_not_found_aborts(client): - table_data = { - 'table': { - 'name': 'test_create_table_house_sales', - 'select': 'SELECT * FROM example_db.house_sales limit 10', - 'replace': True - } - } - response = client.post('/api/databases/missingdb/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_create_table_bad_select_aborts(client): - table_data = { - 'table': { - 'name': 'test_create_table_house_sales', - 'select': 'SELECT wattt FROM example_db.house_sales limit 10', - 'replace': True - } - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_delete_table(client): - table_data = { - 'table': { - 'name': 'test_delete_table', - 'select': 'SELECT * FROM example_db.house_sales limit 10', - 'replace': True - } - } - response = client.post(f'/api/databases/{TEST_DB_NAME}/tables', json=table_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - - response = client.delete(f'/api/databases/{TEST_DB_NAME}/tables/test_delete_table', follow_redirects=True) - assert response.status_code == HTTPStatus.NO_CONTENT - - response = client.get(f'/api/databases/{TEST_DB_NAME}/tables/test_delete_table', follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_project_table_aborts(client): - response = client.delete('/api/databases/mindsdb/tables/models', follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_delete_table_database_not_found_aborts(client): - response = client.delete('/api/databases/databb/tables/models', follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_table_not_found_aborts(client): - response = client.delete('/api/databases/example_db/tables/nonexistent_table', follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/unit/api/http/test_integrations_passthrough.py b/tests/unit/api/http/test_integrations_passthrough.py deleted file mode 100644 index 8785926fb78..00000000000 --- a/tests/unit/api/http/test_integrations_passthrough.py +++ /dev/null @@ -1,198 +0,0 @@ -"""HTTP-layer tests for the /api/integrations//passthrough routes. - -Exercises the Flask blueprint in isolation: the session's integration -controller is mocked to return handlers that satisfy -PassthroughProtocol, so these tests do not touch real handlers and do -not make network calls. -""" - -from http import HTTPStatus -from unittest.mock import MagicMock, patch - -from mindsdb.integrations.libs.passthrough import PassthroughMixin -from mindsdb.integrations.libs.passthrough_types import PassthroughResponse - - -class _StubPassthroughHandler(PassthroughMixin): - """Handler double: the HTTP layer checks the PassthroughProtocol, then - calls `api_passthrough`. We bypass all mixin internals by overriding - `api_passthrough` directly so the endpoint test does not depend on - connection_data, base_url resolution, or the requests library.""" - - def __init__(self, response: PassthroughResponse): - self._response = response - self.calls: list = [] - - def api_passthrough(self, req): # type: ignore[override] - self.calls.append(req) - return self._response - - def test_passthrough(self): - return {"ok": True, "status_code": self._response.status_code} - - -def _patch_handler(handler): - """Patch FakeMysqlProxy so the endpoint resolves `name` to `handler`.""" - proxy = MagicMock() - proxy.session.integration_controller.get_data_handler.return_value = handler - return patch( - "mindsdb.api.http.namespaces.integrations.FakeMysqlProxy", - return_value=proxy, - ) - - -def test_passthrough_happy_path_returns_200_and_serialized_body(client): - handler = _StubPassthroughHandler( - PassthroughResponse( - status_code=200, - headers={"X-Safe": "1"}, - body={"hello": "world"}, - content_type="application/json", - ) - ) - - with _patch_handler(handler): - response = client.post( - "/api/integrations/any_ds/passthrough", - json={"method": "GET", "path": "/me"}, - ) - - assert response.status_code == HTTPStatus.OK - payload = response.get_json() - assert payload == { - "status_code": 200, - "headers": {"X-Safe": "1"}, - "body": {"hello": "world"}, - "content_type": "application/json", - } - # Request actually reached the mixin with the parsed PassthroughRequest. - assert len(handler.calls) == 1 - assert handler.calls[0].method == "GET" - assert handler.calls[0].path == "/me" - - -def test_passthrough_returns_501_when_handler_does_not_support_mixin(client): - # A bare object does not satisfy PassthroughProtocol, so the endpoint - # should surface passthrough_not_supported (501) instead of a 500. - with _patch_handler(object()): - response = client.post( - "/api/integrations/mysql/passthrough", - json={"method": "GET", "path": "/anything"}, - ) - - assert response.status_code == HTTPStatus.NOT_IMPLEMENTED - payload = response.get_json() - assert payload["error_code"] == "passthrough_not_supported" - assert "mysql" in payload["message"] - - -def test_passthrough_returns_400_on_invalid_method(client): - handler = _StubPassthroughHandler(PassthroughResponse(status_code=200, headers={}, body=None, content_type=None)) - - with _patch_handler(handler): - response = client.post( - "/api/integrations/any_ds/passthrough", - json={"method": "TRACE", "path": "/me"}, - ) - - assert response.status_code == HTTPStatus.BAD_REQUEST - payload = response.get_json() - assert payload["error_code"] == "invalid_request" - # The handler must not have been invoked when validation fails up front. - assert handler.calls == [] - - -def _patch_handler_modules(modules: dict): - return patch( - "mindsdb.api.http.namespaces.integrations.integration_controller.handler_modules", - modules, - create=True, - ) - - -def test_capabilities_returns_handlers_dict_and_legacy_list(client): - # Two opted-in handlers covering both auth modes, one non-opt-in, and - # one broken module that lacks a Handler attribute. auth_modes is - # surfaced from the handler's declarative `_auth_mode` class attr — - # not inferred from header format. - class _BearerHandler(PassthroughMixin): - pass # inherits _auth_mode = "bearer" - - class _CustomHeaderHandler(PassthroughMixin): - _auth_header_name = "X-Shopify-Access-Token" - _auth_header_format = "{token}" - _auth_mode = "custom" - - class _NotOptedIn: - pass - - bearer_mod = MagicMock() - bearer_mod.Handler = _BearerHandler - custom_mod = MagicMock() - custom_mod.Handler = _CustomHeaderHandler - plain_mod = MagicMock() - plain_mod.Handler = _NotOptedIn - no_handler_mod = MagicMock(spec=[]) - - fake_modules = { - "hubspot": bearer_mod, - "shopify": custom_mod, - "mysql": plain_mod, - "broken": no_handler_mod, - } - - with _patch_handler_modules(fake_modules): - response = client.get("/api/integrations/capabilities") - - assert response.status_code == HTTPStatus.OK - payload = response.get_json() - - # New structured shape: every opted-in handler appears with auth_modes - # and operations metadata. - assert payload["handlers"] == { - "hubspot": {"auth_modes": ["bearer"], "operations": ["passthrough"]}, - "shopify": {"auth_modes": ["custom"], "operations": ["passthrough"]}, - } - - # Legacy flat list: only bearer-auth handlers (Minds migration compat). - assert payload["bearer_passthrough"] == ["hubspot"] - - -def test_capabilities_auth_mode_is_declarative_not_format_derived(client): - # Handler keeps the default "Bearer {token}" header format but flags - # itself as oauth_refresh. The old format-matching heuristic would - # have bucketed this as "bearer"; the new declarative path returns - # the explicit mode and correctly omits it from the legacy list. - class _OAuthRefreshHandler(PassthroughMixin): - _auth_mode = "oauth_refresh" - # _auth_header_format intentionally left as the default. - - oauth_mod = MagicMock() - oauth_mod.Handler = _OAuthRefreshHandler - - with _patch_handler_modules({"hubspot_oauth": oauth_mod}): - response = client.get("/api/integrations/capabilities") - - assert response.status_code == HTTPStatus.OK - payload = response.get_json() - assert payload["handlers"] == { - "hubspot_oauth": {"auth_modes": ["oauth_refresh"], "operations": ["passthrough"]}, - } - # oauth_refresh is NOT surfaced in the legacy bearer-only list even - # though the underlying header format is still "Bearer {token}". - assert payload["bearer_passthrough"] == [] - - -def test_capabilities_empty_when_no_handlers_opted_in(client): - class _NotOptedIn: - pass - - plain_mod = MagicMock() - plain_mod.Handler = _NotOptedIn - - with _patch_handler_modules({"mysql": plain_mod}): - response = client.get("/api/integrations/capabilities") - - assert response.status_code == HTTPStatus.OK - payload = response.get_json() - assert payload == {"handlers": {}, "bearer_passthrough": []} diff --git a/tests/unit/api/http/test_queries.py b/tests/unit/api/http/test_queries.py deleted file mode 100644 index 0fe4a2aaf34..00000000000 --- a/tests/unit/api/http/test_queries.py +++ /dev/null @@ -1,110 +0,0 @@ -import json - - -class TestParameters: - def test_query_parameters(self, client): - # test filter, target - response = client.post( - "/api/sql/query", - json={ - "query": "select NAME, :x from information_schema.databases where NAME=:db_name", - "params": {"db_name": "mindsdb", "x": 1, "not_used": "abc"}, - }, - ) - data = response.json["data"] - assert data[0] == ["mindsdb", 1] - - # tuples - response = client.post( - "/api/sql/query", - json={ - "query": "select NAME, :x from information_schema.databases where NAME in :db_name", - "params": {"db_name": ["mindsdb", "my_pg"], "x": None}, - }, - ) - data = response.json["data"] - assert data[0] == ["mindsdb", None] - print(response) - - def test_absent_param(self, client): - # absent - response = client.post( - "/api/sql/query", - json={ - "query": "select NAME, :x from information_schema.databases where NAME = :db_name", - "params": {}, - }, - ) - assert "Parameter is not set" in response.json["error_message"] - - def test_json_param(self, client): - # absent - response = client.post( - "/api/sql/query", - json={ - "query": """ - create database my_db - with ENGINE = "dummy_data" - PARAMETERS = { - "username": @my_user - } - """, - "params": {"my_user": "test"}, - }, - ) - assert response.json["type"] == "ok" - - # check - response = client.post( - "/api/sql/query", - json={"query": "SELECT CONNECTION_DATA FROM information_schema.DATABASES where name='my_db'"}, - ) - connection_args = response.json["data"][0][0] - assert json.loads(connection_args)["username"] == "test" - - def test_parameter_extract(self, client): - def req(query): - response = client.post( - "/api/sql/query/utils/parametrize_constants", - json={"query": query}, - ) - return response.json - - res = req( - "select 1 year, SUM(case when month = 1 then total_sales else 0 end) as January from pg_demo.sales where total_sales = 100" - ) - - expected = "SELECT :year, sum(CASE WHEN month = :month THEN total_sales ELSE :January END) AS January FROM pg_demo.sales WHERE total_sales = :total_sales" - assert res["query"] == expected - assert res["databases"] == {"pg_demo": ["sales"]} - assert res["parameters"] == [ - {"name": "year", "value": 1, "type": "int"}, - {"name": "month", "value": 1, "type": "int"}, - {"name": "January", "value": 0, "type": "int"}, - {"name": "total_sales", "value": 100, "type": "int"}, - ] - - res = req("INSERT INTO postgres.employees (employee_id, first_name, last_name) VALUES (101, 'John', 'Doe')") - expected = "INSERT INTO postgres.employees(employee_id, first_name, last_name) VALUES (:employee_id, :first_name, :last_name)" - assert res["query"] == expected - assert res["databases"] == {"postgres": ["employees"]} - assert res["parameters"] == [ - {"name": "employee_id", "value": 101, "type": "int"}, - {"name": "first_name", "value": "John", "type": "str"}, - {"name": "last_name", "value": "Doe", "type": "str"}, - ] - - res = req( - "UPDATE postgres.products SET price = 10, comments = 'test comment' WHERE price = 11 AND brand='CoverON'" - ) - expected = ( - "update postgres.products set price=:price, comments=:comments where price = :price2 AND brand = :brand" - ) - assert res["query"] == expected - assert res["databases"] == {"postgres": ["products"]} - assert res["parameters"] == [ - {"name": "price", "value": 10, "type": "int"}, - {"name": "comments", "value": "test comment", "type": "str"}, - {"name": "price2", "value": 11, "type": "int"}, - {"name": "brand", "value": "CoverON", "type": "str"}, - ] diff --git a/tests/unit/api/http/test_sql_query.py b/tests/unit/api/http/test_sql_query.py deleted file mode 100644 index b40096ecdcc..00000000000 --- a/tests/unit/api/http/test_sql_query.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -Tests for POST /sql/query endpoint with different response_format values: -1. DEFAULT (None) - returns JSON response -2. SSE ("sse") - returns Server-Sent Events stream -3. JSONLINES ("jsonlines") - returns JSON Lines stream -""" - -import json -from http import HTTPStatus -from unittest.mock import patch, MagicMock - -import pandas as pd - -from mindsdb.api.executor.data_types.sql_answer import SQLAnswer -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.utilities.types.column import Column - - -def create_mock_sql_answer(): - """Create a mock SQLAnswer with table data for testing.""" - columns = [ - Column(name="id", alias="id"), - Column(name="name", alias="name"), - Column(name="value", alias="value"), - ] - - df = pd.DataFrame( - [ - [1, "test1", 100], - [2, "test2", 200], - [3, "test3", 300], - ] - ) - - result_set = ResultSet(columns=columns, df=df) - - return SQLAnswer( - resp_type=RESPONSE_TYPE.TABLE, - result_set=result_set, - ) - - -def check_response(response_data: dict): - # Check response structure for default format - assert response_data["type"] == "table" - assert "data" in response_data - assert "column_names" in response_data - assert "context" in response_data - - # Check data content - assert response_data["column_names"] == ["id", "name", "value"] - assert len(response_data["data"]) == 3 - assert response_data["data"][0] == [1, "test1", 100] - assert response_data["data"][1] == [2, "test2", 200] - assert response_data["data"][2] == [3, "test3", 300] - - -def setup_mock_proxy(mock_proxy_class): - """Configure mock proxy with default behavior.""" - mock_proxy = MagicMock() - mock_proxy_class.return_value = mock_proxy - mock_proxy.process_query.return_value = create_mock_sql_answer() - mock_proxy.get_context.return_value = {} - return mock_proxy - - -class TestSQLQueryResponseFormat: - @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy") - def test_query_default_format(self, mock_proxy_class, client): - """Test POST /sql/query with default response format (no response_format parameter).""" - setup_mock_proxy(mock_proxy_class) - - response = client.post( - "/api/sql/query", - json={"query": "SELECT * FROM table"}, - ) - - assert response.status_code == HTTPStatus.OK - response_data = response.json - check_response(response_data) - - @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy") - def test_query_sse_format(self, mock_proxy_class, client): - """Test POST /sql/query with SSE response format (response_format="sse").""" - setup_mock_proxy(mock_proxy_class) - - response = client.post( - "/api/sql/query", - json={ - "query": "SELECT * FROM table", - "response_format": "sse", - }, - ) - - assert response.status_code == HTTPStatus.OK - assert "text/event-stream" in response.content_type - - # Parse SSE response and build unified response dict - response_text = response.get_data(as_text=True) - lines = [line.replace("data: ", "") for line in response_text.split("\n") if line.startswith("data: ")] - - assert len(lines) > 1 - header = json.loads(lines[0]) - data_rows = json.loads(lines[1]) - - response_data = { - "type": header["type"], - "column_names": header["column_names"], - "data": data_rows, - "context": {}, - } - check_response(response_data) - - @patch("mindsdb.api.http.namespaces.sql.FakeMysqlProxy") - def test_query_jsonlines_format(self, mock_proxy_class, client): - """Test POST /sql/query with JSONLINES response format (response_format="jsonlines").""" - setup_mock_proxy(mock_proxy_class) - - response = client.post( - "/api/sql/query", - json={ - "query": "SELECT * FROM table", - "response_format": "jsonlines", - }, - ) - - assert response.status_code == HTTPStatus.OK - assert response.content_type == "application/jsonlines" - - # Parse JSONLINES response and build unified response dict - response_text = response.get_data(as_text=True) - lines = [line for line in response_text.split("\n") if line.strip()] - - assert len(lines) > 1 - header = json.loads(lines[0]) - data_rows = json.loads(lines[1]) - - response_data = { - "type": header["type"], - "column_names": header["column_names"], - "data": data_rows, - "context": {}, - } - check_response(response_data) diff --git a/tests/unit/api/http/views_test.py b/tests/unit/api/http/views_test.py deleted file mode 100644 index 97f603fdbc8..00000000000 --- a/tests/unit/api/http/views_test.py +++ /dev/null @@ -1,121 +0,0 @@ -from http import HTTPStatus - - -def test_get_view_project_not_found_abort(client): - response = client.get("/api/projects/zoopy/views", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_get_view_not_found(client): - response = client.get("/api/projects/mindsdb/views/vroom", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_create_view(client): - view_data = {"view": {"name": "test_create_view", "query": "SELECT * FROM models"}} - response = client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - # Make sure we use the CREATED HTTP status code. - assert response.status_code == HTTPStatus.CREATED - new_view = response.get_json() - - expected_view = {"name": "test_create_view", "query": "SELECT * FROM models", "id": new_view["id"]} - - assert new_view == expected_view - - -def test_create_view_project_not_found_abort(client): - view_data = {"view": {"name": "test_create_view", "query": "SELECT * FROM example_db.house_sales"}} - response = client.post("/api/projects/muhproject/views", json=view_data, follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_create_view_already_exists_abort(client): - view_data = {"view": {"name": "test_create_view_duplicate", "query": "SELECT * FROM models"}} - response = client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - assert response.status_code == HTTPStatus.CREATED - create_duplicate_response = client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - # Make sure we use CONFLICT status code. - assert create_duplicate_response.status_code == HTTPStatus.CONFLICT - - -def test_create_view_no_view_aborts(client): - view_data = {"name": "test_create_view", "query": "SELECT * FROM example_db.house_sales"} - response = client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - assert "400" in response.status - - -def test_create_view_no_name_aborts(client): - view_data = {"view": {"query": "SELECT * FROM example_db.house_sales"}} - response = client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_create_view_no_query_aborts(client): - view_data = {"view": {"name": "test_create_view"}} - response = client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_update_view(client): - view_data = {"view": {"name": "test_update_view", "query": "SELECT * FROM models"}} - - updated_view = {"view": {"query": "SELECT * FROM information_schema.databases"}} - client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - response = client.put("/api/projects/mindsdb/views/test_update_view", json=updated_view, follow_redirects=True) - - assert response.status_code == HTTPStatus.OK - - updated_view = response.get_json() - expected_view = { - "name": "test_update_view", - "query": "SELECT * FROM information_schema.databases", - "id": updated_view["id"], - } - - assert updated_view == expected_view - - -def test_update_view_creates(client): - view_data = {"view": {"query": "SELECT * FROM models"}} - - response = client.put("/api/projects/mindsdb/views/test_update_view_creates", json=view_data, follow_redirects=True) - - assert response.status_code == HTTPStatus.CREATED - - created_view = response.get_json() - expected_view = {"name": "test_update_view_creates", "query": "SELECT * FROM models", "id": created_view["id"]} - - assert created_view == expected_view - - -def test_update_view_no_view_aborts(client): - view_data = {"name": "test_update_view", "query": "SELECT * FROM example_db.house_sales"} - response = client.put("/api/projects/mindsdb/views/test_update_view", json=view_data, follow_redirects=True) - assert response.status_code == HTTPStatus.BAD_REQUEST - - -def test_delete_view(client): - view_data = {"view": {"name": "test_delete_view", "query": "SELECT * FROM models"}} - # Delete newly created DB. - client.post("/api/projects/mindsdb/views", json=view_data, follow_redirects=True) - response = client.get("/api/projects/mindsdb/views/test_delete_view", follow_redirects=True) - - assert response.status_code == HTTPStatus.OK - - response = client.delete("/api/projects/mindsdb/views/test_delete_view", follow_redirects=True) - - # Make sure we return NO_CONTENT status since we don't return the deleted DB. - assert response.status_code == HTTPStatus.NO_CONTENT - - response = client.get("/api/projects/mindsdb/views/test_delete_view", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_view_does_not_exist(client): - response = client.delete("/api/projects/mindsdb/views/florp", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND - - -def test_delete_view_project_not_found(client): - response = client.delete("/api/projects/dindsmb/views/test_delete_view", follow_redirects=True) - assert response.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/unit/api/mcp/__init__.py b/tests/unit/api/mcp/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/api/mcp/test_completions.py b/tests/unit/api/mcp/test_completions.py deleted file mode 100644 index ab03ecff049..00000000000 --- a/tests/unit/api/mcp/test_completions.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -Unit tests for the MCP completion handler (mindsdb/api/mcp/completions.py). -""" - -import asyncio -from unittest.mock import MagicMock, patch - -from mcp.types import PromptReference, ResourceTemplateReference -from mcp.shared.memory import create_connected_server_and_client_session - -from mindsdb.api.mcp.mcp_instance import mcp - -# --------------------------------------------------------------------------- -# Patch targets -# --------------------------------------------------------------------------- - -_PATCH_GET_DB_NAMES = "mindsdb.api.mcp.completions._get_database_names" -_PATCH_CTX = "mindsdb.api.mcp.completions.ctx" -_PATCH_SESSION = "mindsdb.api.mcp.completions.SessionController" - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _run(coro): - return asyncio.run(coro) - - -def _complete(ref, argument: dict, context_arguments: dict | None = None) -> list[str]: - """Run a completion request and return the list of completion values.""" - - async def _inner(): - async with create_connected_server_and_client_session(mcp) as client: - result = await client.complete( - ref=ref, - argument=argument, - context_arguments=context_arguments, - ) - return result.completion.values - - return _run(_inner()) - - -_PROMPT_REF = PromptReference(type="ref/prompt", name="sample_table") -_RESOURCE_REF = ResourceTemplateReference( - type="ref/resource", - uri="schema://databases/{database_name}/tables", -) - - -def _make_table_mock(name: str) -> MagicMock: - t = MagicMock() - t.TABLE_NAME = name - return t - - -class TestDatabaseNameCompletion: - def test_returns_matching_databases(self): - with patch(_PATCH_GET_DB_NAMES, return_value=["pg_prod", "pg_staging", "mysql_db"]): - values = _complete(_PROMPT_REF, {"name": "database_name", "value": "pg"}) - - assert values == ["pg_prod", "pg_staging"] - - def test_prefix_filters_case_sensitively(self): - with patch(_PATCH_GET_DB_NAMES, return_value=["Postgres", "postgres"]): - values = _complete(_PROMPT_REF, {"name": "database_name", "value": "post"}) - - assert values == ["postgres"] - - def test_empty_prefix_returns_all_databases(self): - db_names = ["pg", "mysql", "mongo"] - with patch(_PATCH_GET_DB_NAMES, return_value=db_names): - values = _complete(_PROMPT_REF, {"name": "database_name", "value": ""}) - - assert values == db_names - - def test_no_match_returns_empty_list(self): - with patch(_PATCH_GET_DB_NAMES, return_value=["pg", "mysql"]): - values = _complete(_PROMPT_REF, {"name": "database_name", "value": "oracle"}) - - assert values == [] - - -class TestTableNameCompletion: - def test_returns_matching_tables(self): - with patch(_PATCH_SESSION) as SC: - SC.return_value.datahub.get.return_value.get_tables.return_value = [ - _make_table_mock("orders"), - _make_table_mock("order_items"), - _make_table_mock("users"), - ] - - # match 2/3 - values = _complete( - _RESOURCE_REF, - {"name": "table_name", "value": "ord"}, - context_arguments={"database_name": "pg"}, - ) - - SC.return_value.datahub.get.assert_called_with("pg") - assert values == ["orders", "order_items"] - - # match all - values = _complete( - _RESOURCE_REF, - {"name": "table_name", "value": ""}, - context_arguments={"database_name": "pg"}, - ) - - assert values == ["orders", "order_items", "users"] - - # match 0 - values = _complete( - _RESOURCE_REF, - {"name": "table_name", "value": "qwerty"}, - context_arguments={"database_name": "pg"}, - ) - - assert values == [] - - def test_missing_database_name_context_returns_empty(self): - """When database_name is not in context_arguments, return empty.""" - with patch(_PATCH_SESSION): - values = _complete( - _RESOURCE_REF, - {"name": "table_name", "value": "ord"}, - context_arguments=None, - ) - - assert values == [] - - def test_unknown_argument_name_returns_empty(self): - values = _complete(_PROMPT_REF, {"name": "unknown_param", "value": "foo"}) - assert values == [] diff --git a/tests/unit/api/mcp/test_prompts.py b/tests/unit/api/mcp/test_prompts.py deleted file mode 100644 index 2e7ea7b5d60..00000000000 --- a/tests/unit/api/mcp/test_prompts.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -Unit tests for MCP prompts (mindsdb/api/mcp/prompts/*). - -mcp.get_prompt() is async; tests run it with asyncio.run(). -""" - -import json -import asyncio - -from mindsdb.api.mcp.mcp_instance import mcp - - -def _run(coro): - return asyncio.run(coro) - - -def _get_sample_table_prompt(database_name: str, table_name: str): - """Call sample_table prompt and return the GetPromptResult.""" - return _run(mcp.get_prompt("sample_table", {"database_name": database_name, "table_name": table_name})) - - -def _get_first_message_text(prompt: object) -> str: - """Return the text content of the first message.""" - raw = prompt.messages[0].content.text - # FastMCP serialises the TextContent to JSON inside the PromptMessage - return json.loads(raw)["text"] - - -class TestPrompt: - def test_sample_table_exists(self): - # sample_table exists and has description - prompts = _run(mcp.list_prompts()) - prompt = next(p for p in prompts if p.name == "sample_table") - assert prompt.description # non-empty - - def test_sample_table_content(self): - # test content of the prompt - result = _get_sample_table_prompt("MyDB", "mytable") - assert len(result.messages) == 1 - assert result.messages[0].role == "user" - assert result.messages[0].content.type == "text" - - text = _get_first_message_text(result) - assert "`MyDB`.`mytable`" in text - assert "limit 5" in text.lower() diff --git a/tests/unit/api/mcp/test_query_tool.py b/tests/unit/api/mcp/test_query_tool.py deleted file mode 100644 index bd4d0bcd430..00000000000 --- a/tests/unit/api/mcp/test_query_tool.py +++ /dev/null @@ -1,129 +0,0 @@ -""" -Unit tests for the MCP tools (mindsdb/api/mcp/tools/*). -""" - -import asyncio -import json -from unittest.mock import patch - - -_PATCH_PROXY = "mindsdb.api.mcp.tools.query.FakeMysqlProxy" - - -def _run(coro): - """Run an async coroutine synchronously.""" - return asyncio.run(coro) - - -def _call_tool(sql: str, context=None): - """Call the MCP query tool synchronously and return parsed JSON.""" - args = {"query": sql} - if context is not None: - args["context"] = context - - from mindsdb.api.mcp.mcp_instance import mcp - - content, _ = _run(mcp.call_tool("query", args)) - return json.loads(content[0].text) - - -def _make_proxy_ok(mock_proxy_cls, affected_rows=0): - """Configure mock proxy to return an OK response.""" - mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = { - "type": "ok", - "affected_rows": affected_rows, - } - return mock_proxy_cls.return_value - - -def _make_proxy_table(mock_proxy_cls, column_names, data): - """Configure mock proxy to return a table response.""" - mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = { - "type": "table", - "column_names": column_names, - "data": data, - } - return mock_proxy_cls.return_value - - -def _make_proxy_error(mock_proxy_cls, error_message, error_code=0): - """Configure mock proxy to return an error response.""" - mock_proxy_cls.return_value.process_query.return_value.dump_http_response.return_value = { - "type": "error", - "error_code": error_code, - "error_message": error_message, - } - return mock_proxy_cls.return_value - - -class TestResponseTypes: - def test_select_returns_table_type(self): - expected_data = [[1, "alice"], [2, "bob"]] - columns_list = ["id", "name"] - with patch(_PATCH_PROXY) as MockProxy: - _make_proxy_table(MockProxy, columns_list, expected_data) - result = _call_tool("SELECT * FROM mydb.users") - - assert result["type"] == "table" - assert result["column_names"] == columns_list - assert result["data"] == expected_data - - def test_select_empty_result(self): - columns_list = ["id", "name"] - with patch(_PATCH_PROXY) as MockProxy: - _make_proxy_table(MockProxy, columns_list, []) - result = _call_tool("SELECT * FROM mydb.users WHERE 1=0") - - assert result["type"] == "table" - assert result["column_names"] == columns_list - assert result["data"] == [] - - def test_insert_returns_ok_type(self): - with patch(_PATCH_PROXY) as MockProxy: - _make_proxy_ok(MockProxy, affected_rows=1) - result = _call_tool("INSERT INTO mydb.t (id) VALUES (1)") - - assert result["type"] == "ok" - assert result["affected_rows"] == 1 - - def test_proxy_error_response_returns_error_type(self): - error_message = "Table 'x' doesn't exist" - with patch(_PATCH_PROXY) as MockProxy: - _make_proxy_error(MockProxy, error_message, error_code=123) - result = _call_tool("SELECT * FROM mydb.x") - - assert result["type"] == "error" - assert result["error_message"] == error_message - assert result["error_code"] == 123 - - def test_exception_in_process_query_returns_error_type(self): - error_message = "connection refused" - with patch(_PATCH_PROXY) as MockProxy: - MockProxy.return_value.process_query.side_effect = Exception(error_message) - result = _call_tool("SELECT 1") - - assert result["type"] == "error" - assert result["error_message"] == error_message - - -class TestContextParameter: - def test_context_is_passed_to_set_context(self): - with patch(_PATCH_PROXY) as MockProxy: - proxy = _make_proxy_ok(MockProxy) - _call_tool("SELECT 1", context={"db": "my_postgres"}) - - proxy.set_context.assert_called_once_with({"db": "my_postgres"}) - - def test_omitted_context_defaults_to_empty_dict(self): - with patch(_PATCH_PROXY) as MockProxy: - proxy = _make_proxy_ok(MockProxy) - _call_tool("SELECT 1") # no context argument - - proxy.set_context.assert_called_once_with({}) - - def test_explicit_none_context_defaults_to_empty_dict(self): - with patch(_PATCH_PROXY) as MockProxy: - proxy = _make_proxy_ok(MockProxy) - _call_tool("SELECT 1", context=None) - - proxy.set_context.assert_called_once_with({}) diff --git a/tests/unit/api/mcp/test_resources.py b/tests/unit/api/mcp/test_resources.py deleted file mode 100644 index 6bac3891875..00000000000 --- a/tests/unit/api/mcp/test_resources.py +++ /dev/null @@ -1,177 +0,0 @@ -""" -Unit tests for MCP resources (mindsdb/api/mcp/resources/*) -""" - -import asyncio -import json -from unittest.mock import MagicMock, patch - -import pandas as pd - -from mindsdb.integrations.libs.response import TableResponse as HandlerTableResponse -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE - - -_PATCH_SESSION = "mindsdb.api.mcp.resources.schema.SessionController" -_PATCH_TABLE_RESPONSE = "mindsdb.api.mcp.resources.schema.TableResponse" -_PATCH_RESPONSE_TYPE = "mindsdb.api.mcp.resources.schema.RESPONSE_TYPE" - - -def _run(coro): - return asyncio.run(coro) - - -def _read(uri: str) -> list: - """Read a resource and return parsed JSON payload.""" - from mindsdb.api.mcp.mcp_instance import mcp - - contents = list(_run(mcp.read_resource(uri))) - return json.loads(contents[0].content) - - -def _make_table_mock(name: str, table_type: str = "BASE TABLE", schema: str = "public") -> MagicMock: - t = MagicMock() - t.TABLE_NAME = name - t.TABLE_TYPE = table_type - t.TABLE_SCHEMA = schema - return t - - -def _make_columns_table_response(rows: list[dict]) -> MagicMock: - """Build a mock HandlerTableResponse with COLUMNS_TABLE type.""" - tr = MagicMock(spec=HandlerTableResponse) - tr.type = RESPONSE_TYPE.COLUMNS_TABLE - tr.fetchall.return_value = pd.DataFrame(rows) - return tr - - -def _make_kb(name, project, metadata_cols=None, content_cols=None, id_col="id"): - return { - "name": name, - "project": project, - "metadata_columns": metadata_cols or [], - "content_columns": content_cols or ["body"], - "id_column": id_col, - } - - -class TestListDatabases: - def test_returns_only_data_type_databases(self): - from mindsdb.api.mcp.mcp_instance import mcp - - with patch(_PATCH_SESSION) as SC: - SC.return_value.database_controller.get_list.return_value = [ - {"name": "pg_prod", "type": "data"}, - {"name": "mindsdb", "type": "project"}, - {"name": "mysql_db", "type": "data"}, - ] - - result = list(_run(mcp.read_resource("schema://databases"))) - - assert len(result) == 1 - assert json.loads(result[0].content) == ["pg_prod", "mysql_db"] - assert result[0].mime_type == "application/json" - - def test_filters_out_all_non_data_types(self): - with patch(_PATCH_SESSION) as SC: - SC.return_value.database_controller.get_list.return_value = [ - {"name": "mindsdb", "type": "project"}, - {"name": "files", "type": "files"}, - ] - result = _read("schema://databases") - - assert result == [] - - -class TestDbTables: - def test_returns_table_names(self): - with patch(_PATCH_SESSION) as SC: - SC.return_value.datahub.get.return_value.get_tables.return_value = [ - _make_table_mock("orders"), - _make_table_mock("users"), - ] - result = _read("schema://databases/mydb/tables") - - SC.return_value.datahub.get.assert_called_once_with("mydb") - - names = [t["TABLE_NAME"] for t in result] - assert names == ["orders", "users"] - assert set(result[0].keys()) == {"TABLE_NAME", "TABLE_TYPE", "TABLE_SCHEMA"} - - def test_returns_table_type_and_schema(self): - with patch(_PATCH_SESSION) as SC: - SC.return_value.datahub.get.return_value.get_tables.return_value = [ - _make_table_mock("orders", table_type="VIEW", schema="myschema"), - ] - result = _read("schema://databases/mydb/tables") - - assert result[0]["TABLE_TYPE"] == "VIEW" - assert result[0]["TABLE_SCHEMA"] == "myschema" - - def test_empty_database_returns_empty_list(self): - with patch(_PATCH_SESSION) as SC: - SC.return_value.datahub.get.return_value.get_tables.return_value = [] - result = _read("schema://databases/emptydb/tables") - - assert result == [] - - -class TestDbTableColumns: - def test_returns_column_names_and_types(self): - rows = [ - {"COLUMN_NAME": "id", "MYSQL_DATA_TYPE": "int"}, - {"COLUMN_NAME": "email", "MYSQL_DATA_TYPE": "varchar(255)"}, - ] - with ( - patch(_PATCH_SESSION) as SC, - patch(_PATCH_TABLE_RESPONSE, HandlerTableResponse), - patch(_PATCH_RESPONSE_TYPE, RESPONSE_TYPE), - ): - SC.return_value.integration_controller.get_data_handler.return_value.get_columns.return_value = ( - _make_columns_table_response(rows) - ) - - result = _read("schema://databases/mydb/tables/orders/columns") - SC.return_value.integration_controller.get_data_handler.assert_called_once_with("mydb") - SC.return_value.integration_controller.get_data_handler.return_value.get_columns.assert_called_once_with( - "orders" - ) - - assert result[0] == {"COLUMN_NAME": "id", "MYSQL_DATA_TYPE": "int"} - assert result[1] == {"COLUMN_NAME": "email", "MYSQL_DATA_TYPE": "varchar(255)"} - - -class TestListKnowledgeBases: - def test_returns_knowledge_bases_from_all_projects(self): - with patch(_PATCH_SESSION) as SC: - SC.return_value.datahub.get_projects_names.return_value = ["mindsdb", "my_project"] - SC.return_value.kb_controller.list.side_effect = [ - [_make_kb("kb1", "mindsdb")], - [_make_kb("kb2", "my_project")], - ] - result = _read("schema://knowledge_bases") - - assert len(result) == 2 - assert result[0]["name"] == "kb1" - assert result[1]["name"] == "kb2" - - def test_returns_correct_kb_fields(self): - kb = _make_kb( - "docs_kb", - "mindsdb", - metadata_cols=["source", "date"], - content_cols=["body"], - id_col="doc_id", - ) - with patch(_PATCH_SESSION) as SC: - SC.return_value.datahub.get_projects_names.return_value = ["mindsdb"] - SC.return_value.kb_controller.list.return_value = [kb] - result = _read("schema://knowledge_bases") - - assert result[0] == { - "name": "docs_kb", - "project": "mindsdb", - "metadata_columns": ["source", "date"], - "content_columns": ["body"], - "id_column": "doc_id", - } diff --git a/tests/unit/dummy_llm_handler/__about__.py b/tests/unit/dummy_llm_handler/__about__.py deleted file mode 100644 index da52d3bdb20..00000000000 --- a/tests/unit/dummy_llm_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = '' -__package_name__ = 'mindsdb_dummy_llm_handler' -__version__ = '0.0.1' -__description__ = "" -__author__ = '' -__github__ = '' -__pypi__ = '' -__license__ = '' -__copyright__ = '' diff --git a/tests/unit/dummy_llm_handler/__init__.py b/tests/unit/dummy_llm_handler/__init__.py deleted file mode 100644 index c9eec0c3836..00000000000 --- a/tests/unit/dummy_llm_handler/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description - -try: - from .dummy_llm_handler import DummyHandler as Handler - - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = '' -name = 'dummy_llm' -type = HANDLER_TYPE.ML -permanent = False - -__all__ = ['Handler', 'version', 'name', 'type', 'title', 'description', 'import_error'] diff --git a/tests/unit/dummy_llm_handler/dummy_llm_handler.py b/tests/unit/dummy_llm_handler/dummy_llm_handler.py deleted file mode 100644 index eb59ad45f88..00000000000 --- a/tests/unit/dummy_llm_handler/dummy_llm_handler.py +++ /dev/null @@ -1,28 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.base import BaseMLEngine - - -class DummyHandler(BaseMLEngine): - name = 'dummy_llm' - - @staticmethod - def create_validation(target, args=None, **kwargs): - if args is not None: - args['target'] = target - if 'error' in args.get('using', {}): - raise RuntimeError() - - def create(self, target, args=None, **kwargs): - pass - - def predict(self, df, args=None): - df['answer'] = "random text answer" - df['predictor_id'] = self.model_storage.predictor_id - return df[['predicted', 'predictor_id']] - - def describe(self, attribute=None): - if attribute == 'info': - return pd.DataFrame([['dummy', 0]], columns=['type', 'version']) - else: - tables = ['info'] - return pd.DataFrame(tables, columns=['tables']) diff --git a/tests/unit/dummy_ml_handler/__about__.py b/tests/unit/dummy_ml_handler/__about__.py deleted file mode 100644 index 776383d2d12..00000000000 --- a/tests/unit/dummy_ml_handler/__about__.py +++ /dev/null @@ -1,9 +0,0 @@ -__title__ = '' -__package_name__ = 'mindsdb_dummy_handler' -__version__ = '0.0.1' -__description__ = "" -__author__ = '' -__github__ = '' -__pypi__ = '' -__license__ = '' -__copyright__ = '' diff --git a/tests/unit/dummy_ml_handler/__init__.py b/tests/unit/dummy_ml_handler/__init__.py deleted file mode 100644 index cec316195f1..00000000000 --- a/tests/unit/dummy_ml_handler/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from mindsdb.integrations.libs.const import HANDLER_TYPE - -from .__about__ import __version__ as version, __description__ as description -from .creation_args import creation_args -from .model_using_args import model_using_args -try: - from .dummy_ml_handler import DummyHandler as Handler - import_error = None -except Exception as e: - Handler = None - import_error = e - -title = '' -name = 'dummy_ml' -type = HANDLER_TYPE.ML -permanent = False - -__all__ = [ - 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', - 'creation_args', 'model_using_args' -] diff --git a/tests/unit/dummy_ml_handler/creation_args.py b/tests/unit/dummy_ml_handler/creation_args.py deleted file mode 100644 index 57f1869b4e4..00000000000 --- a/tests/unit/dummy_ml_handler/creation_args.py +++ /dev/null @@ -1,14 +0,0 @@ -from collections import OrderedDict - -from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - - -creation_args = OrderedDict( - api_key={ - 'type': ARG_TYPE.STR, - 'description': 'Key', - 'required': False, - 'label': 'API key', - 'secret': True - } -) diff --git a/tests/unit/dummy_ml_handler/dummy_ml_handler.py b/tests/unit/dummy_ml_handler/dummy_ml_handler.py deleted file mode 100644 index 6d322cfb1fe..00000000000 --- a/tests/unit/dummy_ml_handler/dummy_ml_handler.py +++ /dev/null @@ -1,69 +0,0 @@ -import pandas as pd -from mindsdb.integrations.libs.base import BaseMLEngine - - -class DummyHandler(BaseMLEngine): - name = 'dummy_ml' - - @staticmethod - def create_validation(target, args=None, **kwargs): - if args is not None: - args['target'] = target - if 'error' in args.get('using', {}): - raise RuntimeError() - - def create(self, target, args=None, **kwargs): - args = args['using'] - args['target'] = target - self.model_storage.json_set('args', args) - - def predict(self, df, args=None): - df['predicted'] = 42 - df['predictor_id'] = self.model_storage.predictor_id - df['row_id'] = self.model_storage.predictor_id * 100 + df.reset_index().index - - output_columns = ['predicted', 'predictor_id', 'row_id', 'engine_args'] - - if 'engine_args' in df.columns: - # could exist from previous model - df = df.drop('engine_args', axis=1) - - model_args = self.model_storage.json_get('args') - engine_args = self.engine_storage.json_get('engine_args') - - target = model_args['target'] - # check input - if 'output' in model_args: - df[target] = [model_args['output']] * len(df) - if target not in output_columns: - output_columns.append(target) - if 'input' in df.columns: - df['output'] = df['input'] - output_columns.append('output') - - df.insert(len(df.columns), 'engine_args', [engine_args] * len(df)) - - return df[output_columns] - - def _get_model_verison(self): - return self.model_storage._get_model_record( - self.model_storage.predictor_id - ).version - - def describe(self, attribute=None): - if attribute == 'info': - return pd.DataFrame( - [['dummy', self._get_model_verison()]], - columns=['type', 'version'] - ) - elif isinstance(attribute, list): - return pd.DataFrame( - [['.'.join(attribute), self._get_model_verison()]], - columns=['attribute', 'version'] - ) - else: - tables = ['info'] - return pd.DataFrame(tables, columns=['tables']) - - def create_engine(self, connection_args): - self.engine_storage.json_set('engine_args', connection_args) diff --git a/tests/unit/dummy_ml_handler/model_using_args.py b/tests/unit/dummy_ml_handler/model_using_args.py deleted file mode 100644 index f6947f8dcff..00000000000 --- a/tests/unit/dummy_ml_handler/model_using_args.py +++ /dev/null @@ -1,5 +0,0 @@ -model_using_args = { - 'api_key': { - 'secret': True - } -} diff --git a/tests/unit/executor/data/test.xlsx b/tests/unit/executor/data/test.xlsx deleted file mode 100644 index 5c8e944e931..00000000000 Binary files a/tests/unit/executor/data/test.xlsx and /dev/null differ diff --git a/tests/unit/executor/test_agent.py b/tests/unit/executor/test_agent.py deleted file mode 100644 index a41d36f0a6c..00000000000 --- a/tests/unit/executor/test_agent.py +++ /dev/null @@ -1,782 +0,0 @@ -import os -import json - -from unittest.mock import patch, AsyncMock - -from sqlalchemy.orm.attributes import flag_modified -import pandas as pd -import pytest -import sys -from openai.types.chat import ChatCompletion -from tests.unit.executor_test_base import BaseExecutorDummyML -from tests.unit.executor.test_knowledge_base import set_embedding - - -def action_response(type="final_query", sql="", text=""): - if text: - type = "final_text" - return json.dumps({"sql_query": sql, "type": type, "text": text, "short_description": "a tool"}) - - -def set_openai_completion(mock_openai, llm_response, add_planning=True): - if isinstance(llm_response, str): - llm_responses = [ - action_response(sql=f"select '{llm_response}' as answer"), - ] - elif not isinstance(llm_response, list): - llm_responses = [llm_response] - else: - llm_responses = llm_response - - if add_planning: - # add plan response - llm_responses.insert(0, '{"plan":"my plan is ...", "estimated_steps":3}') - - mock_openai.agent_calls = [] - calls = [] - responses = [] - - async def resp_f(messages, *args, **kwargs): - # return all responses in sequence, then yield only latest from list - if len(llm_responses) == 1: - resp = llm_responses[0] - else: - resp = llm_responses.pop(0) - - # log agent calls, exclude previous part of message - combined_message = "\n".join([m["content"] for m in messages if m["content"]]) - agent_call = combined_message - if len(calls) > 0: - # remove previous call - prev_call = calls[-1] - if agent_call.startswith(prev_call): - agent_call = agent_call[len(prev_call) :] - # remove previous agent response - prev_response = responses[-1] - pos = agent_call.find(prev_response) - if pos != -1: - agent_call = agent_call[pos + len(prev_response) :] - - mock_openai.agent_calls.append(agent_call) - calls.append(combined_message) - responses.append(resp) - - num = len(mock_openai.agent_calls) - data = { - "id": "chatcmpl-123", - "object": "chat.completion", - "created": 1234567890 + num, - "model": "gpt-3.5-turbo", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "tool_calls": [ - { - "id": f"call_{num}", - "type": "function", - "function": {"name": "final_result", "arguments": resp}, - } - ], - }, - "finish_reason": "stop", - } - ], - "usage": {"prompt_tokens": 10, "completion_tokens": 2, "total_tokens": 12}, - } - - return ChatCompletion(**data) - - mock_openai().chat.completions.create = AsyncMock(side_effect=resp_f) - - -def get_dataset_planets(): - data = [ - ["1000", "Moon"], - ["1001", "Jupiter"], - ["1002", "Venus"], - ] - return pd.DataFrame(data, columns=["id", "planet_name"]) - - -class TestAgent(BaseExecutorDummyML): - def setup_method(self): - super().setup_method() - from mindsdb.utilities.config import config - - config["knowledge_bases"]["disable_autobatch"] = True - - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - def test_openai_provider(self, mock_openai): - # test response - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - - self.run_sql(""" - CREATE AGENT my_agent - USING - model = { - 'provider': 'openai', - 'model_name': "gpt-3.5-turbo", - 'api_key': '-key-' - }, - prompt_template="Answer the user input in a helpful way" - """) - - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - - ret = self.run_sql("select * from my_agent where question = 'hi'") - - # check model params - assert mock_openai.call_args_list[-1][1]["api_key"] == "-key-" - assert mock_openai().chat.completions.create.call_args_list[-1][1]["model"] == "gpt-3.5-turbo" - - assert agent_response in ret.answer[0] - - # test join - df = pd.DataFrame( - [ - {"q": "hi"}, - ] - ) - self.save_file("questions", df) - - mock_openai.reset_mock() - set_openai_completion(mock_openai, agent_response) - - ret = self.run_sql(""" - select * from files.questions t - join my_agent a on a.question=t.q - """) - - assert agent_response in ret.answer[0] - - # empty query - mock_openai.reset_mock() - set_openai_completion(mock_openai, agent_response) - - ret = self.run_sql(""" - select * from files.questions t - join my_agent a on a.question=t.q - where t.q = '' - """) - assert len(ret) == 0 - - @patch("mindsdb.utilities.config.Config.get") - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - def test_agent_with_default_llm_params(self, mock_openai, mock_config_get): - # Mock the config.get method to return default LLM parameters - def config_get_side_effect(key, default=None): - if key == "default_llm": - return { - "provider": "openai", - "model_name": "gpt-4o", - "api_key": "sk-abc123", - "base_url": "https://config-url.com/v1", - "api_version": "2024-02-01", - "method": "multi-class", - } - elif key == "default_project": - return "mindsdb" - elif key == "cache": - return {"type": "none"} - return default - - mock_config_get.side_effect = config_get_side_effect - - # Create an agent with only provider specified - should use default LLM params - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT default_params_agent - USING - model = { - 'provider': 'openai', - 'base_url': 'https://custom-url.com/', - 'model_name': "gpt-3" - }, - prompt_template="Answer the user input in a helpful way" - """) - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - - # Check that the agent was created with the default parameters - agent_info = self.run_sql("SELECT * FROM information_schema.agents WHERE name = 'default_params_agent'") - - # Verify the agent has the user-specified parameters but not default parameters - agent_params = json.loads(agent_info["PARAMS"].iloc[0]) - assert agent_params.get("prompt_template") == "Answer the user input in a helpful way" - assert "gpt-3" in agent_info["MODEL"][0] - - # Default parameters should NOT be stored in the database - # They will be applied at runtime via get_agent_llm_params - assert "base_url" not in agent_params - assert "api_version" not in agent_params - assert "method" not in agent_params - - ret = self.run_sql("select * from default_params_agent where question = 'hi'") - assert agent_response in ret.answer[0] - - assert mock_openai.call_args_list[-1][1]["api_key"] == "sk-abc123" - - assert mock_openai.call_args_list[-1][1]["base_url"] == "https://custom-url.com/" # from agent - assert mock_openai().chat.completions.create.call_args_list[-1][1]["model"] == "gpt-3" # from agent - - # --- Test that agent creation works with minimal syntax using default_llm config --- - - # Create an agent with minimal syntax - should use all default LLM params - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT minimal_syntax_agent - USING - data = { } - """) - - mock_openai.reset_mock() - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - - ret = self.run_sql("select * from minimal_syntax_agent where question = 'hi'") - assert agent_response in ret.answer[0] - - assert mock_openai.call_args_list[-1][1]["api_key"] == "sk-abc123" # from default - assert mock_openai.call_args_list[-1][1]["base_url"] == "https://config-url.com/v1" # from default - assert mock_openai().chat.completions.create.call_args_list[-1][1]["model"] == "gpt-4o" # from agent - - @pytest.mark.skipif(sys.platform == "darwin", reason="Fails on macOS") - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - def test_agent_stream(self, mock_openai): - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT my_agent - USING - model={ - "model_name": "gpt-3.5-turbo", - "provider": "openai", - "api_key": "--" - }, - prompt_template="Answer the user input in a helpful way" - """) - - agent_response = "how can I assist you today?" - set_openai_completion(mock_openai, agent_response) - - agents_controller = self.command_executor.session.agents_controller - agent = agents_controller.get_agent("my_agent") - - messages = [{"question": "hi"}] - found = False - for chunk in agents_controller.get_completion(agent, messages, stream=True): - if agent_response in str(chunk.get("content")): - found = True - if not found: - raise AttributeError("Agent response is not found") - - def _create_kb_storage(self, kb_name): - self.run_sql(f""" - create database db_{kb_name} - with engine='duckdb_faiss' - """) - return f"db_{kb_name}.default_collection" - - def _drop_kb_storage(self, vector_table_name): - self.run_sql(f"drop table {vector_table_name}") - - db_name = vector_table_name.split(".")[0] - - self.run_sql(f"drop database {db_name}") - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - def test_agent_retrieval(self, mock_openai, mock_embedding): - set_embedding(mock_embedding) - - vector_table_name = self._create_kb_storage("kb_review") - self.run_sql(f""" - create knowledge base kb_review - using - storage={vector_table_name}, - embedding_model = {{ - "provider": "bedrock", - "model_name": "dummy_model", - "api_key": "dummy_key" - }} - """) - - os.environ["OPENAI_API_KEY"] = "--" - - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - create agent retrieve_agent - using - model={ - "model_name": "gpt-3.5-turbo", - "provider": "openai" - }, - prompt_template='Answer the user input in a helpful way using tools', - data = { - "knowledge_bases": ["kb_review"] - } - """) - - agent_response = "the answer is yes" - user_question = "answer my question" - - set_openai_completion( - mock_openai, - [ - action_response( - sql=f"select * from kb_review where content='{user_question}'", type="exploratory_query" - ), - action_response(sql=f"SELECT '{agent_response}' answer"), - ], - ) - - with patch("mindsdb.interfaces.knowledge_base.controller.KnowledgeBaseTable.select_query") as kb_select: - # kb response - kb_select.return_value = pd.DataFrame([{"id": 1, "content": "ok", "metadata": {}}]) - ret = self.run_sql(f""" - select * from retrieve_agent where question = '{user_question}' - """) - - # check agent output - assert agent_response in ret.answer[0] - - # check kb input - args, _ = kb_select.call_args - assert user_question in args[0].where.args[1].value - - self.run_sql("drop knowledge base kb_review") - self._drop_kb_storage(vector_table_name) - - # should not be possible to drop demo agent - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - def test_drop_demo_agent(self, mock_openai): - """should not be possible to drop demo agent""" - from mindsdb.api.executor.exceptions import ExecutorException - - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT my_demo_agent - USING - model = { - 'provider': 'openai', - 'model_name': "gpt-3.5-turbo", - 'api_key': '-key-' - }, - prompt_template="--" - """) - - # mark as demo in db - agent = self.db.Agents.query.filter_by(name="my_demo_agent").first() - agent.params["is_demo"] = True - flag_modified(agent, "params") - self.db.session.commit() - with pytest.raises(ExecutorException): - self.run_sql("drop agent my_demo_agent") - - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - def test_agent_default_prompt_template(self, mock_openai): - """Test that agents work correctly with default prompt templates in different modes""" - - # Test non-retrieval mode with no prompt_template (should use default) - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT default_prompt_agent - USING - model={ - "model_name": "gpt-3.5-turbo", - "provider": "openai", - "api_key": "--" - } - """) - - agent_response = "default prompt template response" - set_openai_completion(mock_openai, agent_response) - - ret = self.run_sql("select * from default_prompt_agent where question = 'test question'") - assert agent_response in ret.answer[0] - - # Test retrieval mode with no prompt_template (should use default retrieval template) - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT default_retrieval_agent - USING - model={ - "model_name": "gpt-3.5-turbo", - "provider": "openai", - "api_key": "--" - } - """) - - mock_openai.reset_mock() - set_openai_completion(mock_openai, agent_response) - ret = self.run_sql("select * from default_retrieval_agent where question = 'test question'") - assert agent_response in ret.answer[0] - - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_agent_permissions(self, mock_embedding, mock_openai): - set_embedding(mock_embedding) - - vector_table_name = self._create_kb_storage("kb_show") - - kb_sql = f""" - create knowledge base %s - using - storage={vector_table_name}, - embedding_model = {{"provider": "bedrock", "model_name": "titan"}} - """ - self.run_sql(kb_sql % "kb_show1") - self.run_sql(kb_sql % "kb_show2") - self.run_sql(kb_sql % "kb_hide") - - df = get_dataset_planets() - - self.save_file("show1", df) - self.save_file("show2", df) - self.save_file("hide", df) - - self.run_sql(""" - insert into kb_show1 - select id, planet_name content from files.show1 - """) - - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT my_agent - USING - model={ - "model_name": "gpt-3.5-turbo", - "api_key": '--' - }, - data = { - "knowledge_bases": ["kb_show*"], - "tables": ["files.show*"] - }; - """) - - # ===== Access to forbidden KBs ===== - - set_openai_completion( - mock_openai, - [ - action_response(sql="select * from kb_hide where content='Moon'", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - self.run_sql("select * from my_agent where question = 'test'") - - # result of query - assert "`kb_hide` not found" in mock_openai.agent_calls[2] - # it shows available KBs - assert "kb_show*" in mock_openai.agent_calls[2] - - # ===== Access to exposed KBs ===== - set_openai_completion( - mock_openai, - [ - action_response(sql="select * from kb_show1 where content='Moon' limit 1", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - self.run_sql("select * from my_agent where question = 'test'") - - # result of object info - assert "kb_hide" not in mock_openai.agent_calls[1] - assert "kb_show1" in mock_openai.agent_calls[1] - assert "kb_show2" in mock_openai.agent_calls[1] - assert "Sample Data" in mock_openai.agent_calls[1] - assert "Metadata" in mock_openai.agent_calls[1] - - # result of query - assert "Moon" in mock_openai.agent_calls[2] - - # ===== access to forbidden files ===== - - set_openai_completion( - mock_openai, - [ - action_response(sql="select * from files.hide", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - self.run_sql("select * from my_agent where question = 'test'") - # result of query - assert "`hide` not found" in mock_openai.agent_calls[2] - # it shows available tables - assert "show*" in mock_openai.agent_calls[2] - - # ===== access to exposed files ===== - - set_openai_completion( - mock_openai, - [ - action_response(sql="select * from files.show1 where id = '1001'", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - - self.run_sql("select * from my_agent where question = 'test'") - - # result of object info - assert "hide" not in mock_openai.agent_calls[1] - assert "show1" in mock_openai.agent_calls[1] - assert "show2" in mock_openai.agent_calls[1] - assert "Sample Data" in mock_openai.agent_calls[1] - assert "Metadata" in mock_openai.agent_calls[1] - - # result of query - assert "Jupiter" in mock_openai.agent_calls[2] - - self.run_sql("drop knowledge base kb_show1") - self.run_sql("drop knowledge base kb_show2") - self.run_sql("drop knowledge base kb_hide") - self._drop_kb_storage(vector_table_name) - - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_agent_new_syntax(self, mock_embedding, mock_openai): - set_embedding(mock_embedding) - vector_table_name = self._create_kb_storage("kb") - df = get_dataset_planets() - # create 2 files and KBs - for i in (1, 2): - self.run_sql(f""" - create knowledge base kb{i} - using - storage={vector_table_name}, - embedding_model = {{"provider": "bedrock", "model_name": "titan"}} - """) - self.save_file(f"file{i}", df) - - self.run_sql(f""" - insert into kb{i} - select id, planet_name content from files.file{i} where id != 1000 - """) - - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT my_agent - USING - model = { - "provider": 'openai', - "model_name": "gpt-42", - "api_key": '-secret-' - }, - data = { - "knowledge_bases": ["kb1"], - "tables": ["files.file1"] - }, - prompt_template='important user instruction №42' - """) - - # exposed - set_openai_completion( - mock_openai, - [ - action_response(sql="SELECT * FROM kb1", type="exploratory_query"), - action_response(sql="SELECT * FROM files.file1", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - self.run_sql("select * from my_agent where question = 'test'") - assert "Jupiter" in mock_openai.agent_calls[2] - assert "Jupiter" in mock_openai.agent_calls[3] # column - - # not exposed - set_openai_completion( - mock_openai, - [ - action_response(sql="SELECT * FROM kb2", type="exploratory_query"), - action_response(sql="SELECT * FROM files.file2", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - ret = self.run_sql("select * from my_agent where question = 'test'") - assert "`kb2` not found" in mock_openai.agent_calls[2] - assert "`file2` not found" in mock_openai.agent_calls[3] - - # check model params - assert mock_openai.call_args_list[-1][1]["api_key"] == "-secret-" - assert mock_openai().chat.completions.create.call_args_list[-1][1]["model"] == "gpt-42" - - # check agent response - assert "Hi!" in ret.answer[0] - - # check prompt template - assert "important user instruction №42" in mock_openai.agent_calls[0] - - # --- ALTER AGENT --- - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - ALTER AGENT my_agent - USING - model = { - "provider": 'openai', - "model_name": "gpt-18", - "api_key": '-almost secret-' - }, - data = { - "knowledge_bases": ["kb2"], - "tables": ["files.file2"] - }, - prompt_template='important system prompt №37' - """) - - # check exposed - set_openai_completion( - mock_openai, - [ - action_response(sql="SELECT * FROM kb2", type="exploratory_query"), - action_response(sql="SELECT * FROM files.file2", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - self.run_sql("select * from my_agent where question = 'test'") - assert "Jupiter" in mock_openai.agent_calls[2] - assert "Jupiter" in mock_openai.agent_calls[3] # column - - # not exposed - set_openai_completion( - mock_openai, - [ - action_response(sql="SELECT * FROM kb1", type="exploratory_query"), - action_response(sql="SELECT * FROM files.file1", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - ret = self.run_sql("select * from my_agent where question = 'test'") - assert "`kb1` not found" in mock_openai.agent_calls[2] - assert "`file1` not found" in mock_openai.agent_calls[3] - - # check model params - assert mock_openai.call_args_list[-1][1]["api_key"] == "-almost secret-" - assert mock_openai().chat.completions.create.call_args_list[-1][1]["model"] == "gpt-18" - - # check agent response - assert "Hi!" in ret.answer[0] - - # check prompt template - assert "important system prompt №37" in mock_openai.agent_calls[0] - - self.run_sql("drop knowledge base kb1") - self.run_sql("drop knowledge base kb2") - self._drop_kb_storage(vector_table_name) - - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_agent_accept_wrong_quoting(self, mock_embedding, mock_openai): - set_embedding(mock_embedding) - vector_table_name = self._create_kb_storage("kb1") - self.run_sql(f""" - create knowledge base kb1 - using - storage={vector_table_name}, - embedding_model = {{"provider": "bedrock", "model_name": "titan"}} - """) - df = get_dataset_planets() - - self.save_file("file1", df) - - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT my_agent - USING - model={ - "model_name": "gpt-3.5-turbo", - "api_key": '--' - }, - data = { - "knowledge_bases": ["kb1"], - "tables": ["files.file1", "files.file2.*"] - } - """) - self.run_sql(""" - insert into kb1 - select id, planet_name content from files.file1 - """) - - # # exposed - set_openai_completion( - mock_openai, - [ - action_response(sql="SELECT * FROM `mindsdb.kb1` WHERE id = '1001'", type="exploratory_query"), - action_response(sql="SELECT * FROM `files.file1` WHERE id = '1002'", type="exploratory_query"), - action_response(sql="SELECT 'Hi!' answer"), - ], - ) - self.run_sql("select * from my_agent where question = 'test'") - - assert "Jupiter" in mock_openai.agent_calls[2] - assert "Venus" in mock_openai.agent_calls[3] - - self.run_sql("drop knowledge base kb1") - self._drop_kb_storage(vector_table_name) - - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_3_part_table(self, mock_pg, mock_openai): - df = get_dataset_planets() - self.set_handler(mock_pg, name="pg", tables={"planets": df}, schema="public") - - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql(""" - CREATE AGENT my_agent - USING - model={ - "model_name": "gpt-3.5-turbo", - "api_key": '--' - }, - data = { - "tables": ["pg.public.*"] - } - """) - - set_openai_completion( - mock_openai, - [ - action_response(sql="SELECT * FROM pg.public.planets WHERE id = '1000'", type="exploratory_query"), - # test getting table info - action_response(sql="SELECT * FROM `pg.public`.planets WHERE id = '1000'", type="exploratory_query"), - action_response(sql="SELECT * FROM `pg.public`.`planets` WHERE id = '1000'", type="exploratory_query"), - action_response(sql="SELECT 1"), - ], - ) - self.run_sql("select * from my_agent where question = 'test'") - - # results of sql_db_query - assert "Moon" in mock_openai.agent_calls[2] - assert "Moon" in mock_openai.agent_calls[3] - assert "Moon" in mock_openai.agent_calls[4] - - @patch("pydantic_ai.providers.openai.AsyncOpenAI") - @patch("mindsdb.interfaces.agents.pydantic_ai_agent.PydanticAIAgent._get_completion_stream") - def test_agent_query_param_override(self, mock_get_completion, mock_openai): - """ - Test that agent parameters can be overridden per-query using the USING clause in SELECT. - """ - mock_get_completion.return_value = [{"type": "data", "content": "-"}] - - set_openai_completion(mock_openai, action_response(text="hi"), add_planning=False) - self.run_sql( - """ - CREATE AGENT override_agent - USING - model={ - "model_name": "gpt-4o", - "api_key": 'sk-override' - }, - prompt_template = 'Answer questions', - timeout = 60; - """ - ) - - self.run_sql( - """ - SELECT * FROM override_agent - WHERE question = 'How are you?' - USING timeout=5; - """ - ) - assert mock_get_completion.call_args_list[0][0][1].get("timeout") == 5 diff --git a/tests/unit/executor/test_api_handler.py b/tests/unit/executor/test_api_handler.py deleted file mode 100644 index 2e0ee4e9582..00000000000 --- a/tests/unit/executor/test_api_handler.py +++ /dev/null @@ -1,190 +0,0 @@ -import sys -import types - -import datetime as dt -from unittest.mock import patch -from dataclasses import dataclass - -import pandas as pd - -from tests.unit.executor_test_base import BaseExecutorDummyML - - -# import modules virtually if it is not installed -try: - import github # noqa -except ImportError: - module = types.ModuleType("") - exec("Github=None", module.__dict__) - sys.modules["github"] = module - -try: - import chardet # noqa -except ImportError: - sys.modules["chardet"] = types.ModuleType("") - - -class TestApiHandler(BaseExecutorDummyML): - def setup_method(self): - super().setup_method() - self.setup_community_handler("github") - self.setup_community_handler("email") - - @patch("github.Github") - def test_github(self, Github): - """ - Test for APIResource - """ - - # --- create --- - self.run_sql(""" - CREATE DATABASE gh - WITH - ENGINE = 'github', - PARAMETERS = { - "repository": "mindsdb/mindsdb", - "api_key": "-" - } - """) - - # --- select --- - @dataclass - class User: - login: str = "user1" - - @dataclass - class Issue: - number: int - title: str - state = "open" - user = User() - labels = [] - assignees = [User()] - comments: int = 0 - body = "body" - created_at = dt.datetime.now() - updated_at = dt.datetime.now() - closed_at = dt.datetime.now() - closed_by = User() - - data = [ - [123, "bug", "open"], - [124, "feature", "open"], - [125, "feature", "open"], - ] - - get_issues = Github().get_repo().get_issues - - get_issues.return_value = [Issue(*row) for row in data] - - # - group without limit - - ret = self.run_sql(""" - select max(number) number, title from gh.issues - where state = 'open' - and number between 124 and 126 - group by title - """) - - # state was used for github - kwargs = get_issues.call_args_list[-1][1] - assert kwargs["state"] == "open" - - # between was used outside of handler, output is only one row with number=125 - assert len(ret) == 1 - assert ret["number"][0] == 125 - - # - group with limit - - ret = self.run_sql(""" - select count(*) items from gh.issues - where state = 'open' - """) - - # state was used for github - kwargs = get_issues.call_args_list[-1][1] - assert kwargs["state"] == "open" - - # between was used outside of handler, output is only one row with number=125 - assert len(ret) == 1 - assert ret["items"][0] == 3 - - # --- insert --- - self.run_sql(""" - insert into gh.issues (title, body) - values ('feature', 'do better') - """) - create_issue = Github().get_repo().create_issue - args = create_issue.call_args_list[0][0] - kwargs = create_issue.call_args_list[0][1] - - assert args[0] == "feature" - assert kwargs["body"] == "do better" - - @patch("mindsdb_community_handlers.email_handler.email_handler.EmailClient") - def test_email(self, EmailClient): - """ - Test for APITable - """ - - # --- create --- - self.run_sql(""" - CREATE DATABASE em - WITH ENGINE = 'email', - PARAMETERS = { - "email": "e@mail.com", - "password": "-" - } - """) - - # --- select --- - search_email = EmailClient().search_email - - mock_df = pd.DataFrame( - { - "date": [ - "Wed, 02 Feb 2022 15:30:00 +0000", - "Thu, 10 Mar 2022 10:45:15 +0530", - "Fri, 16 Dec 2022 20:15:30 -0400", - ], - "body_content_type": ["text", "text", "text"], - "body": ["info", "info", "info"], - "from_field": ["x1@m.com", "x2@m.com", "x3@m.com"], - "id": ["2", "3", "4"], - "to_field": ["a@m.com", "a@m.com", "b@m.com"], - "subject": ["info", "info", "info"], - } - ) - - search_email.return_value = mock_df - - ret = self.run_sql(""" - SELECT to_field, max(from_field) from_field - FROM em.emails - WHERE subject = 'info' - and id > 1 - group by to_field - order by to_field - """) - - args = search_email.call_args_list[0][0] - - # check input to search_email - assert args[0].subject == "info" - assert args[0].since_email_id == 2 - - # check response - assert len(ret) == 2 - assert ret["from_field"][0] == "x2@m.com" - - # --- insert --- - self.run_sql(""" - INSERT INTO em.emails(to_field, subject, body) - VALUES ("toemail@email.com", "MindsDB", "Hello from MindsDB!"); - """) - - func_call = EmailClient().send_email.call_args_list[0] - args = func_call[0] - kwargs = func_call[1] - - assert args[0] == "toemail@email.com" - assert kwargs["subject"] == "MindsDB" - assert kwargs["body"] == "Hello from MindsDB!" diff --git a/tests/unit/executor/test_base_queires.py b/tests/unit/executor/test_base_queires.py deleted file mode 100644 index 0a0e3c2ab79..00000000000 --- a/tests/unit/executor/test_base_queires.py +++ /dev/null @@ -1,964 +0,0 @@ -from unittest.mock import patch -import datetime as dt -import pytest - -import pandas as pd - -from tests.unit.executor_test_base import BaseExecutorDummyML, BaseExecutorTest - - -def get_stores_df(): - return pd.DataFrame( - columns=["id", "region_id", "format"], - data=[ - [1, 1, "c"], - [2, 2, "a"], - [3, 2, "a"], - [4, 2, "b"], - [5, 1, "b"], - [6, 2, "b"], - ], - ) - - -def get_regions_df(): - return pd.DataFrame( - columns=["id", "name"], - data=[ - [1, "asia"], - [2, "europe"], - ], - ) - - -class TestSelect(BaseExecutorDummyML): - def test_view(self): - df = pd.DataFrame( - [ - {"a": 1, "b": dt.datetime(2020, 1, 1)}, - {"a": 2, "b": dt.datetime(2020, 1, 2)}, - {"a": 1, "b": dt.datetime(2020, 1, 3)}, - ] - ) - self.save_file("tasks", df) - - self.run_sql(""" - create view mindsdb.vTasks ( - select * from files.tasks where a=1 - ) - """) - - # -- create model -- - self.run_sql( - """ - CREATE model mindsdb.task_model - from mindsdb (select * from Vtasks) - PREDICT a - using engine='dummy_ml' - """ - ) - self.wait_predictor("mindsdb", "task_model") - - # use model - ret = self.run_sql(""" - SELECT m.* - FROM mindsdb.vtasks as t - JOIN mindsdb.task_model as m - """) - - assert len(ret) == 2 - assert ret.predicted[0] == 42 - - # check case-insensitive in subselect step - ret = self.run_sql(""" - SELECT - m.predicted as lower, - m.PREDICTED as upper, - M.PREDIcted as varcase, - m.predicted as value, - m.PREDICTED as VALUE - FROM mindsdb.vtasks as t - JOIN mindsdb.task_model as m - """) - assert ret.lower[0] == ret.upper[0] == ret.varcase[0] - assert ret.value[0] == ret.VALUE[0] - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_view_conditions(self, data_handler): - # test view optimisations - df = pd.DataFrame( - [ - {"a": 1, "b": 1}, - {"a": 1, "b": 2}, - ] - ) - self.set_handler(data_handler, name="pg", tables={"tbl1": df}) - self.run_sql("create view v1 (select * from pg.tbl1 where a=1)") - - data_handler.reset_mock() - ret = self.run_sql("select * from v1 where b=2 limit 1") - assert len(ret) == 1 and ret["b"][0] == 2 - calls = data_handler().query.call_args_list - sql = calls[0][0][0].to_string() - - # both conditions are used in query to database - assert "a = 1" in sql and "b = 2" in sql and "LIMIT 1" in sql - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_complex_joins(self, data_handler): - df1 = pd.DataFrame( - [ - {"a": 1, "c": 1, "b": dt.datetime(2020, 1, 1)}, - {"a": 2, "c": 1, "b": dt.datetime(2020, 1, 2)}, - {"a": 1, "c": 3, "b": dt.datetime(2020, 1, 3)}, - {"a": 3, "c": 2, "b": dt.datetime(2020, 1, 2)}, - ] - ) - df2 = pd.DataFrame( - [ - {"a": 6, "c": 1}, - {"a": 4, "c": 2}, - {"a": 2, "c": 3}, - ] - ) - self.set_data("tbl1", df1) - self.set_data("tbl2", df2) - - self.run_sql( - """ - CREATE model mindsdb.pred - PREDICT p - using engine='dummy_ml', - join_learn_process=true - """ - ) - - self.run_sql(""" - create view mindsdb.view2 ( - select * from dummy_data.tbl2 where a!=4 - ) - """) - - # --- test join table-table-table --- - ret = self.run_sql(""" - SELECT t1.a as t1a, t3.a t3a - FROM dummy_data.tbl1 as t1 - JOIN dummy_data.tbl2 as t2 on t1.c=t2.c - LEFT JOIN dummy_data.tbl1 as t3 on t2.a=t3.a - where t1.a=1 - """) - - # must be 2 rows - assert len(ret) == 2 - - # all t1.a values are 1 - assert list(ret.t1a) == [1, 1] - - # t3.a has 2 and None - assert len(ret[ret.t3a == 2]) == 1 - assert len(ret[ret.t3a.isna()]) == 1 - - # --- test join table-predictor-view --- - ret = self.run_sql(""" - SELECT t1.a t1a, t3.a t3a, m.* - FROM dummy_data.tbl1 as t1 - JOIN mindsdb.pred m - LEFT JOIN mindsdb.view2 as t3 on t1.c=t3.c - where t1.a>1 - """) - - # must be 2 rows - assert len(ret) == 2 - - # t1.a > 1 - assert ret[ret.t1a <= 1].empty - - # view: a!=4 - assert ret[ret.t3a == 4].empty - - # t3.a has 6 and None - assert len(ret[ret.t3a == 6]) == 1 - assert len(ret[ret.t3a.isna()]) == 1 - - # contents predicted values - assert list(ret.predicted.unique()) == [42] - - # --- tests table-subselect-view --- - - ret = self.run_sql(""" - SELECT t1.a t1a, - t2.t1a t2t1a, t2.t3a t2t3a, - t3.c t3c, t3.a t3a - FROM dummy_data.tbl1 as t1 - JOIN ( - SELECT t1.a as t1a, t3.a t3a - FROM dummy_data.tbl1 as t1 - JOIN dummy_data.tbl2 as t2 on t1.c=t2.c - LEFT JOIN dummy_data.tbl1 as t3 on t2.a=t3.a - where t1.a=1 - ) t2 on t2.t3a = t1.a - LEFT JOIN mindsdb.view2 as t3 on t1.c=t3.c - where t1.a>1 - """) - - # 1 row - assert len(ret) == 1 - - # check row values - row = ret.iloc[0].to_dict() - assert row["t1a"] == 2 - assert row["t2t3a"] == 2 - - assert row["t2t1a"] == 1 - assert row["t3c"] == 1 - - assert row["t3a"] == 6 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_joins_different_db(self, data_handler): - df1 = pd.DataFrame( - [ - {"a": 1, "c": 1}, - {"a": 3, "c": 2}, - ] - ) - df2 = pd.DataFrame( - [ - {"a": 6, "c": 1}, - {"a": 4, "c": 2}, - {"a": 2, "c": 3}, - ] - ) - - self.set_data("tbl1", df1) - self.set_handler(data_handler, name="pg", tables={"tbl2": df2}) - - # --- test join table-table --- - ret = self.run_sql(""" - SELECT * - FROM dummy_data.tbl1 as t1 - JOIN pg.tbl2 as t2 on t1.c=t2.c - """) - - # must be 2 rows - assert len(ret) == 2 - - # second table is called with filter - calls = data_handler().query.call_args_list - sql = calls[0][0][0].to_string() - assert sql.strip() in ( - # duckdb's `distinct` can return in different order - "SELECT * FROM tbl2 AS t2 WHERE c IN (1, 2)SELECT * FROM tbl2 AS t2 WHERE c IN (2, 1)" - ) - - # --- using alias in order - ret = self.run_sql(""" - SELECT t1.a + t2.a col1, min(t1.a) c - FROM dummy_data.tbl1 as t1 - JOIN pg.tbl2 as t2 on t1.c=t2.c - group by col1 - order by c - """) - assert ret["c"][0] == 1 # alias is the same as column - assert ret["col1"][0] == 7 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_db_mixed_case(self, data_handler): - df = pd.DataFrame( - [ - {"a": 6, "c": 1}, - {"a": 4, "c": 2}, - {"a": 2, "c": 3}, - ] - ) - # mixed case - self.set_handler(data_handler, name="mixDb", tables={"tbl": df, "mixTbl": df}) - self.set_handler(data_handler, name="mixDb2", tables={"tbl": df, "mixTbl": df}) - - # --- works with right case (with quotes and without) - self.run_sql(""" - SELECT * FROM `mixDb`.tbl as t1 - JOIN `mixDb2`.tbl as t2 on t1.c=t2.c - """) - - self.run_sql(""" - SELECT * FROM mixDb.tbl as t1 - JOIN mixDb2.tbl as t2 on t1.c=t2.c - """) - - self.run_sql("SELECT * FROM mixDb.tbl") - - self.run_sql("SELECT * FROM `mixDb`.tbl") - - # --- doesn't work with wrong case - with pytest.raises(Exception): - self.run_sql(""" - SELECT * FROM mixdb.tbl as t1 - JOIN mixDb2.tbl as t2 on t1.c=t2.c - """) - - with pytest.raises(Exception): - self.run_sql(""" - SELECT * FROM `mixdb`.tbl as t1 - JOIN `mixDb2`.tbl as t2 on t1.c=t2.c - """) - - with pytest.raises(Exception): - self.run_sql("SELECT * FROM mixdb.tbl") - - with pytest.raises(Exception): - self.run_sql("SELECT * FROM `mixdb`.tbl") - - # lower case - self.set_handler(data_handler, name="low_db", tables={"tbl": df, "mixTbl": df}) - self.set_handler(data_handler, name="low_db2", tables={"tbl": df, "mixTbl": df}) - - # --- works with any case if not quoted - self.run_sql(""" - SELECT * FROM low_DB.tbl as t1 - JOIN low_DB2.tbl as t2 on t1.c=t2.c - """) - - self.run_sql("SELECT * FROM low_DB.tbl") - - # -- doesn't work quoted - with pytest.raises(Exception): - self.run_sql(""" - SELECT * FROM `low_DB`.tbl as t1 - JOIN `low_DB2`.tbl as t2 on t1.c=t2.c - """) - - with pytest.raises(Exception): - self.run_sql("SELECT * FROM `low_DB`.tbl") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_implicit_join(self, data_handler): - df1 = pd.DataFrame( - [ - {"a": 1, "c": 1}, - {"a": 3, "c": 2}, - ] - ) - df2 = pd.DataFrame( - [ - {"a": 6, "c": 1}, - {"a": 4, "c": 2}, - {"a": 2, "c": 3}, - ] - ) - - self.set_data("tbl1", df1) - self.set_handler(data_handler, name="pg", tables={"tbl2": df2}) - - # --- test join table-table --- - ret = self.run_sql(""" - SELECT * FROM dummy_data.tbl1 as t1, pg.tbl2 as t2 - where t1.c=t2.c - """) - - # must be 2 rows - assert len(ret) == 2 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_federated_query(self, data_handler): - statuses = pd.DataFrame( - [ - [1, "new"], - [2, "progress"], - [3, "done"], - [4, "cancel"], - [5, "duplicate"], - [6, "docs"], - [7, "backlog"], - ], - columns=["id", "name"], - ) - - tasks = pd.DataFrame( - [ - [1, 1, "new1"], - [2, 7, "backlog2"], - [3, 7, "backlog3"], - [4, 7, "backlog4"], - [5, 7, "backlog5"], - [6, 7, "backlog6"], - ], - columns=["id", "status", "name"], - ) - - self.set_handler(data_handler, name="db", tables={"statuses": statuses}) - self.save_file("tasks", tasks) - - # test inner join - ret = self.run_sql(""" - SELECT * FROM db.statuses as t1 - JOIN files.tasks as t2 on t1.id=t2.status - limit 2 - """) - - assert len(ret) == 2 - tries = data_handler().query.call_args_list - assert len(tries) == 2 - query1 = tries[0][0][0] - # not all record were fetched in first query - assert query1.limit.value < 6 - - # test with order by 2nd table - data_handler.reset_mock() - - ret = self.run_sql(""" - SELECT * FROM db.statuses as t1 - JOIN files.tasks as t2 on t1.id=t2.status - order by t2.id - limit 2 - """) - - assert len(ret) == 2 - tries = data_handler().query.call_args_list - # the first table was used once without the limit - assert len(tries) == 1 - query1 = tries[0][0][0] - assert query1.limit is None - - # test left join - data_handler.reset_mock() - - ret = self.run_sql(""" - SELECT * FROM db.statuses as t1 - left join files.tasks as t2 on t1.id=t2.status - limit 2 - """) - - assert len(ret) == 2 - tries = data_handler().query.call_args_list - # the first table was used once with the limit - assert len(tries) == 1 - query1 = tries[0][0][0] - assert query1.limit.value == 2 - - def test_complex_queries(self): - # -- set up data -- - - self.save_file("stores", get_stores_df()) - self.save_file("regions", get_regions_df()) - - # -- create view -- - self.run_sql(""" - create view mindsdb.stores_view ( - select * from files.stores - ) - """) - - # -- create model -- - self.run_sql( - """ - CREATE model model1 - from files (select * from stores) - PREDICT format - using engine='dummy_ml' - """ - ) - self.wait_predictor("mindsdb", "model1") - - self.run_sql( - """ - CREATE model model2 - from files (select * from stores) - PREDICT format - using engine='dummy_ml' - """ - ) - self.wait_predictor("mindsdb", "model2") - - # -- joins / conditions / unions -- - - sql = """ - select - m1.predicted / 2 a, -- 42/2=21 - s.id + (select id from files.regions where id=1) b -- =3 - from files.stores s - join files.regions r on r.id = s.region_id - join model1 m1 - join model2 m2 - where - m1.model_param = (select 100 + id from files.stores where id=1) - and s.region_id=(select id from files.regions where id=2) -- only region_id=2 - and s.format='a' - and s.id = r.id -- cross table condition - union - select id, id from files.regions where id = 1 -- 2nd row with [1,1] - union - select id, id from files.stores where id = 2 -- 2nd row with [2,2] - """ - - ret = self.run_sql(sql) - assert len(ret) == 3 - - # union doesn't guarantee order - ret.sort_values(by="a", inplace=True) - assert list(ret.iloc[0]) == [1, 1] - assert list(ret.iloc[1]) == [2, 2] - assert list(ret.iloc[2]) == [21, 3] - - # -- aggregating / grouping / cases -- - case = """ - case when s.id=1 then 10 - when s.id=2 then 20 - when s.id=3 then 30 - else 100 - end - """ - - sql = f""" - SELECT - -- values for region_id=2: [20, 30, 100, 100] - MAX({case}) c_max, -- =100 - MIN({case}) c_min, -- =20 - SUM({case}) c_sum, -- =250 - COUNT({case}) c_count, -- =4 - AVG({case}) c_avg -- 250/4=62.5 - from stores_view s -- view is used - join files.regions r on r.id = s.region_id - join model1 m1 - group by r.id -- 2 records - having max(r.id) = 2 -- 1 record - """ - - ret = self.run_sql(sql) - - assert len(ret) == 1 - - assert ret.c_max[0] == 100 - assert ret.c_min[0] == 20 - assert ret.c_sum[0] == 250 - assert ret.c_count[0] == 4 - assert ret.c_avg[0] == 62.5 - - sql = """ - SELECT - s.*, - ROW_NUMBER() OVER(PARTITION BY r.id ORDER BY s.id) ROW_NUMBER, - RANK() OVER(PARTITION BY r.id ORDER BY s.format) RANK, - DENSE_RANK() OVER(PARTITION BY r.id ORDER BY s.format) DENSE_RANK, - PERCENT_RANK() OVER(PARTITION BY r.id ORDER BY s.id) PERCENT_RANK, - CUME_DIST() OVER(PARTITION BY r.id ORDER BY s.id) CUME_DIST, - NTILE(2) OVER(PARTITION BY r.id ORDER BY s.id) NTILE, - LAG(s.id, 1) OVER(PARTITION BY r.id ORDER BY s.id) LAG, - LEAD(s.id, 1) OVER(PARTITION BY r.id ORDER BY s.id) LEAD, - FIRST_VALUE(s.format) OVER(PARTITION BY r.id ORDER BY s.id) FIRST_VALUE, - LAST_VALUE(s.format) OVER(PARTITION BY r.id ORDER BY s.id) LAST_VALUE, - NTH_VALUE(s.id, 1) OVER(PARTITION BY r.id ORDER BY s.id) NTH_VALUE - from files.stores s - join files.regions r on r.id = s.region_id - join model1 m1 - order by r.id, s.id - """ - ret = self.run_sql(sql) - - assert list(ret.ROW_NUMBER) == [1, 2, 1, 2, 3, 4] - assert list(ret.RANK) == [2, 1, 1, 1, 3, 3] - assert list(ret.DENSE_RANK) == [2, 1, 1, 1, 2, 2] - - assert list(ret.FIRST_VALUE) == ["c", "c", "a", "a", "a", "a"] - assert list(ret.LAST_VALUE) == ["c", "b", "a", "a", "b", "b"] - - # -- unions functions -- - - # TODO Correlated subqueries (not implemented) - - def test_pruning_ambiguous_columns(self): - self.save_file("stores", get_stores_df()) - self.save_file("regions", get_regions_df()) - - ret = self.run_sql( - """ - select format - from files.stores s - join files.regions r on r.id = s.region_id - where s.id = 3 - """ - ) - assert len(ret) == 1 - assert ret["format"][0] == "a" - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_replace_suqueries(self, data_handler): - df = pd.DataFrame( - columns=["id", "name"], - data=[ - [1, "asia"], - [2, "europe"], - [3, "africa"], - [3, "australia"], - ], - ) - self.set_handler(data_handler, name="pg", tables={"branch": df}) - - empty = pd.DataFrame( - columns=["name"], - data=[ - [None], - ], - ) - self.save_file("empty", empty) - - sql = """ - select - cast( - (select COUNT(*) from pg.branch where `name` in ('asia', 'africa')) as FLOAT - ) - / - ( select COUNT(*) from pg.branch ) - * 100 as percentage - """ - ret = self.run_sql(sql) - assert ret.iloc[0, 0] == 50 - - sql += " from files.empty " - ret = self.run_sql(sql) - assert ret.iloc[0, 0] == 50 - - def test_last(self): - df = pd.DataFrame( - [ - {"a": 1, "b": "a"}, - {"a": 2, "b": "b"}, - {"a": 3, "b": "c"}, - ] - ) - self.set_data("tasks", df) - - # -- create model -- - self.run_sql( - """ - CREATE model task_model - from dummy_data (select * from tasks) - PREDICT a - using engine='dummy_ml', - join_learn_process=true - """ - ) - - # --- check web editor --- - ret = self.run_sql(""" - select * from dummy_data.tasks where a>last - """) - # first call is empty - assert len(ret) == 0 - - # add rows to dataframe - df.loc[len(df.index)] = [4, "d"] # should be tracked - df.loc[len(df.index)] = [0, "z"] # not tracked - self.set_data("tasks", df) - - ret = self.run_sql(""" - select * from dummy_data.tasks where a>last - """) - - # second call content one new line - assert len(ret) == 1 - assert ret.a[0] == 4 - - # --- TEST view --- - - # view without target - with pytest.raises(Exception) as exc_info: - self.run_sql(""" - create view v1 ( - select b from dummy_data.tasks where a>last - ) - """) - assert "should be in query target" in str(exc_info.value) - - # view with target - self.run_sql(""" - create view v1 ( - select * from dummy_data.tasks where a>last - ) - """) - - ret = self.run_sql(""" - select * from v1 - """) - # first call is empty - assert len(ret) == 0 - - # add row to dataframe - df.loc[len(df.index)] = [5, "a"] - self.set_data("tasks", df) - - ret = self.run_sql(""" - select * from v1 - """) - - # second call content one new line - assert len(ret) == 1 - assert ret.a[0] == 5 - - # add row to dataframe - df.loc[len(df.index)] = [6, "a"] - self.set_data("tasks", df) - - # use model - ret = self.run_sql(""" - SELECT m.* - FROM v1 as t - JOIN task_model as m - """) - - # second call content one new line - assert len(ret) == 1 - - # -- view with model - - self.run_sql(""" - create view v2 ( - select t.a+1 as a from dummy_data.tasks t - JOIN task_model as m - where t.a>last - ) - """) - - ret = self.run_sql("select * from v2") - # first call is empty - assert len(ret) == 0 - - # add row to dataframe - df.loc[len(df.index)] = [7, "a"] - self.set_data("tasks", df) - - ret = self.run_sql("select * from v2") - - # second call content one new line - assert len(ret) == 1 - assert ret.a[0] == 8 - - def test_last_coalesce(self): - df = pd.DataFrame( - [ - {"a": 1, "b": "a"}, - {"a": 2, "b": "b"}, - {"a": 3, "b": "c"}, - ] - ) - - self.set_data("tasks", df) - - # -- create model -- - self.run_sql( - """ - CREATE model task_model - PREDICT a - using engine='dummy_ml', - join_learn_process=true - """ - ) - - sqls = [ - """ - select * from dummy_data.tasks - where a > coalesce(last, 1) - """, - """ - select t.* from dummy_data.tasks t - join task_model m - where t.a > coalesce(last, 1) - """, - ] - - # first call two rows - for sql in sqls: - ret = self.run_sql(sql) - assert len(ret) == 2 - - # second call zero rows - for sql in sqls: - ret = self.run_sql(sql) - assert len(ret) == 0 - - # add rows to dataframe - df.loc[len(df.index)] = [4, "d"] # should be tracked - df.loc[len(df.index)] = [0, "z"] # not tracked - self.set_data("tasks", df) - - for sql in sqls: - ret = self.run_sql(sql) - - # have to be one new line - assert len(ret) == 1 - assert ret.a[0] == 4 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_interval(self, data_handler): - df = pd.DataFrame( - [ - {"last_date": dt.datetime(2020, 1, 2)}, - ] - ) - self.set_handler(data_handler, name="pg", tables={"branch": df}) - - ret = self.run_sql("select (last_date + INTERVAL '2 days') d from pg.branch") - - assert ret.d[0] == dt.datetime(2020, 1, 4) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_duplicated_cols(self, data_handler): - df1 = pd.DataFrame( - [ - {"id": 1, "a": 1}, - {"id": 2, "a": 2}, - {"id": 3, "a": 3}, - ] - ) - df2 = pd.DataFrame( - [ - {"id": 1, "a": 10}, - {"id": 2, "a": 20}, - ] - ) - self.set_handler(data_handler, name="pg", tables={"tbl1": df1, "tbl2": df2}) - - ret = self.run_sql(""" - select * from pg.tbl1 as a - join pg.tbl2 as b on a.id=b.id - """) - - first_row = ret.to_dict("split")["data"][0] - assert first_row == [1, 1, 1, 10] - - def test_system_vars(self): - ret = self.run_sql("select @@session.auto_increment_increment, @@character_set_client") - - assert ret.iloc[0, 0] == 1 - assert ret.iloc[0, 1] == "utf8" - - def test_mysql_queries(self): - self.run_sql("SHOW KEYS FROM `mindsdb`.`predictors`") - - self.run_sql("show full columns from `predictors`") - - self.run_sql("SHOW FULL TABLES FROM files") - - def test_select_without_table(self): - test_data = (("session_user", None), ("version()", "8.0.17"), ("@@version_comment", "(MindsDB)"), ("1", 1)) - - for target, response in test_data: - ret = self.run_sql(f"select {target}") - assert len(ret) == 1 - assert ret.iloc[0, 0] == response - - with pytest.raises(Exception) as exc_info: - self.run_sql("select $$") - assert "check the manual that corresponds to your server version for the right syntax" in str(exc_info.value) - - def test_alter_database(self): - self.run_sql(""" - create database test_db using engine='dummy_data', parameters={"key": 1}; - """) - res = self.run_sql(""" - select * from information_schema.databases where name = 'test_db'; - """) - assert res["NAME"][0] == "test_db" - assert res["CONNECTION_DATA"][0] == '{"key": 1}' - - self.run_sql(""" - alter database test_db parameters={"key": 2}; - """) - - # is not possible to update name of database - with pytest.raises(Exception): - self.run_sql(""" - alter database test_db name=db_test; - """) - - res = self.run_sql(""" - select * from information_schema.databases where name = 'test_db'; - """) - assert res["NAME"][0] == "test_db" - assert res["CONNECTION_DATA"][0] == '{"key": 2}' - - def test_unknown_duckdb_function(self): - with pytest.raises(Exception) as exc_info: - self.run_sql(""" - select unknown_function_asdf(1) - """) - - assert "Unknown function" in str(exc_info.value) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_subselect_1row_aggregate(self, data_handler): - self.set_handler(data_handler, name="pg", tables={}) - - ret = self.run_sql(""" - select count (*) result from ( - SELECT * FROM pg ( - select 'content' - ) - ) - """) - assert len(ret) == 1 - assert ret["result"][0] == 1 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_cte_join(self, data_handler): - self.set_handler(data_handler, name="pg", tables={"stores": get_stores_df()}) - self.save_file("regions", get_regions_df()) - - ret = self.run_sql(""" - WITH regions AS ( - SELECT DISTINCT id, name FROM files.regions - ), - stores AS ( - SELECT * FROM pg.stores - LIMIT 10 - ) - SELECT format, region_id FROM pg.stores s - JOIN regions r on r.id = s.region_id - WHERE s.format IN (SELECT format FROM stores WHERE format='a') - LIMIT 100; - """) - assert len(ret) > 1 - assert ret["format"][0] == "a" - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_view_duplicated_cols(self, data_handler): - self.set_handler(data_handler, name="pg", tables={"stores": get_stores_df(), "regions": get_regions_df()}) - - with pytest.raises(Exception): - # `id` exists in both tables, should raise an exception - self.run_sql(""" - create view v1 ( - select * from pg.stores s - join pg.regions r on r.id = s.region_id - ) - """) - - -class TestSet(BaseExecutorTest): - @pytest.mark.parametrize("var", ["var", "@@var", "@@session.var", "session var"]) - @pytest.mark.parametrize("value", ["1", "0", "true", "false", "on", "off"]) - def test_set(self, var, value): - query = f"set {var} = {value}" - self.run_sql(query) - - def test_multy_set(self): - query = "set @@var = ON, session var = 0" - self.run_sql(query) - - -class TestDML(BaseExecutorDummyML): - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_create_empty_table(self, data_handler): - self.set_handler(data_handler, name="pg", tables={}) - - self.run_sql("create table pg.table1 (a DATE, b INTEGER)") - - calls = data_handler().query.call_args_list - sql = calls[0][0][0].to_string() - assert sql.strip() == "CREATE TABLE table1 (a DATE, b INTEGER)" - - def test_delete_from_table(self): - df1 = pd.DataFrame([{"a": 1}]) - self.set_data("tbl1", df1) - - self.run_sql("delete from tbl1 where a=1", database="dummy_data") diff --git a/tests/unit/executor/test_cache.py b/tests/unit/executor/test_cache.py deleted file mode 100644 index 431e8604de1..00000000000 --- a/tests/unit/executor/test_cache.py +++ /dev/null @@ -1,89 +0,0 @@ -import datetime as dt -import time -import unittest -import traceback -import redis -import warnings -import tempfile -import json -import os - -import pandas as pd - -from mindsdb.utilities.cache import RedisCache, FileCache, dataframe_checksum - - -class TestCashe(unittest.TestCase): - - @classmethod - def setup_class(cls): - # config - config = {} - # TODO run on own database - fdi, cfg_file = tempfile.mkstemp(prefix='mindsdb_conf_') - - with os.fdopen(fdi, 'w') as fd: - json.dump(config, fd) - - os.environ['MINDSDB_CONFIG_PATH'] = cfg_file - - def test_redis(self): - cache = RedisCache('predict', max_size=2) - try: - self.cache_test(cache) - except redis.ConnectionError as e: - # Skip test for redis if no redis installed - warnings.warn(f'redis is not available: {e}') - print(traceback.format_exc()) - - def test_file(self): - cache = FileCache('predict', max_size=2) - - self.cache_test(cache) - - def cache_test(self, cache): - - # test save - df = pd.DataFrame([ - [1, 1.2, 'string', dt.datetime.now(), [1, 2, 3], {1: 3}], - [2, 3.2, 'other', dt.datetime(2011, 12, 30), [3], {11: 23, 2: 3}], - ], columns=['a', 'b', 'c', 'd', 'e', 'f']) - - # make bigger - df = pd.concat([df] * 100).reset_index() - - name = dataframe_checksum(df) - - # test save - cache.set(name, df) - - df2 = cache.get(name) - - assert dataframe_checksum(df) == dataframe_checksum(df2) - assert list(df.columns) == list(df2.columns) - - # test save df - name += '1' - cache.set_df(name, df) - - df2 = cache.get_df(name) - - assert dataframe_checksum(df) == dataframe_checksum(df2) - assert list(df.columns) == list(df2.columns) - - # test delete - cache.delete(name) - - df2 = cache.get(name) - assert df2 is None - - # test max_size - # load cache with size 2(max_size) + 5 (buffer) - cache.set('first', df) - for i in range(8): - time.sleep(0.01) - cache.set(str(i), df) - - # get first, must be deleted - df2 = cache.get('first') - assert df2 is None diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py deleted file mode 100644 index 89a4acdfda5..00000000000 --- a/tests/unit/executor/test_executor.py +++ /dev/null @@ -1,2037 +0,0 @@ -from unittest.mock import patch -import datetime as dt -import tempfile -import pytest -import json -import os - -import pandas as pd -import pandas.testing as pdt -import numpy as np - -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb_sql_parser import parse_sql - -from mindsdb.api.executor.utilities.sql import query_df, query_dfs - -# How to run: -# env PYTHONPATH=./ pytest tests/unit/test_executor.py - -from tests.unit.executor_test_base import BaseExecutorMockPredictor - - -class DTYPE: - INT = "Int64" - FLOAT = "Float64" - CATEGORY = "category" - DATETIME = "datetime64[ns]" - - -DTYPE = DTYPE() - - -def to_str(query): - render = SqlalchemyRender("postgres") - s = render.get_string(query) - s = s.strip().replace("\n", " ").replace("\t", "").replace(" ", " ") - return s - - -class Test(BaseExecutorMockPredictor): - def setup_method(self, method): - super().setup_method() - self.set_executor(mock_predict=True, mock_model_controller=True, import_dummy_ml=True) - - @pytest.mark.slow - def test_describe(self): - self.execute("CREATE PROJECT proj;") - - self.execute(""" - CREATE MODEL mindsdb.test_predictor - PREDICT target - USING - engine = 'dummy_ml' - """) - - self.execute(""" - CREATE MODEL proj.test_predictor - PREDICT target - USING - engine = 'dummy_ml' - """) - - self.execute("RETRAIN proj.test_predictor;") - - ret = self.execute("SELECT * FROM proj.models order by version;") - assert len(ret.data) == 2 - - ret = self.execute("DESCRIBE test_predictor") - assert len(ret.data.records) == 1 - assert int(ret.data.records[0]["VERSION"]) == 1 - assert ret.data.records[0]["PROJECT"] == "mindsdb" - - ret = self.execute("DESCRIBE proj.test_predictor") - assert len(ret.data.records) == 1 - assert int(ret.data.records[0]["VERSION"]) == 2 - assert ret.data.records[0]["PROJECT"] == "proj" - - ret = self.execute("DESCRIBE proj.test_predictor.1") - assert int(ret.data.records[0]["VERSION"]) == 1 - assert ret.data.records[0]["PROJECT"] == "proj" - - ret = self.execute("DESCRIBE proj.test_predictor.2") - assert int(ret.data.records[0]["VERSION"]) == 2 - assert ret.data.records[0]["PROJECT"] == "proj" - - ret = self.execute("DESCRIBE proj.test_predictor.`1`.info") - assert "type" in ret.data.records[0] - assert int(ret.data.records[0]["version"]) == 1 - - ret = self.execute("DESCRIBE proj.test_predictor.`2`.info") - assert "type" in ret.data.records[0] - assert int(ret.data.records[0]["version"]) == 2 - - ret = self.execute("DESCRIBE proj.test_predictor.`1`.info.`0-1`") - assert "attribute" in ret.data.records[0] - assert int(ret.data.records[0]["version"]) == 1 - assert ret.data.records[0]["attribute"] == "info.0-1" - - ret = self.execute("DESCRIBE proj.test_predictor.`2`.info.`1-2`") - assert "attribute" in ret.data.records[0] - assert int(ret.data.records[0]["version"]) == 2 - assert ret.data.records[0]["attribute"] == "info.1-2" - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_integration_select(self, mock_handler): - data = [[1, "x"], [1, "y"]] - df = pd.DataFrame(data, columns=["a", "b"]) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - ret = self.execute("select * from pg.tasks") - assert ret.data.to_lists() == data - - # check sql in query method - assert mock_handler().query.call_args[0][0].to_string() == "SELECT * FROM tasks" - - def test_predictor_1_row(self): - predicted_value = 3.14 - predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": DTYPE.FLOAT, "a": DTYPE.INT, "b": DTYPE.CATEGORY}, - "predicted_value": predicted_value, - } - self.set_predictor(predictor) - - ret = self.execute(""" - select p, a from mindsdb.task_model where a = 2 - """) - ret_df = self.ret_to_df(ret) - assert ret_df["p"][0] == predicted_value - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_dates(self, mock_handler): - df = pd.DataFrame( - [ - {"a": 1, "b": dt.datetime(2020, 1, 1)}, - {"a": 2, "b": dt.datetime(2020, 1, 2)}, - {"a": 1, "b": dt.datetime(2020, 1, 3)}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - # --- use predictor --- - predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": DTYPE.FLOAT, "a": DTYPE.INT, "b": DTYPE.CATEGORY}, - "predicted_value": 3.14, - } - self.set_predictor(predictor) - - ret = self.execute(""" - SELECT a, last(b) - FROM ( - SELECT res.a, res.b - FROM pg.tasks as source - JOIN mindsdb.task_model as res - ) - group by 1 - order by a - """) - assert len(ret.data) == 2 - # is last datetime value of a = 1 - assert ret.data.to_lists()[0][1].isoformat() == dt.datetime(2020, 1, 3).isoformat() - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_ts_predictor(self, mock_handler): - # set integration data - - df = pd.DataFrame( - [ - {"a": 1, "t": dt.datetime(2020, 1, 1), "g": "x"}, - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x"}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x"}, - {"a": 11, "t": dt.datetime(2021, 1, 1), "g": "y"}, - {"a": 12, "t": dt.datetime(2021, 1, 2), "g": "y"}, - {"a": 33, "t": dt.datetime(2021, 1, 3), "g": "y"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - # --- use TS predictor --- - - predictor = { - "name": "task_model", - "predict": "a", - "problem_definition": { - "timeseries_settings": { - "is_timeseries": True, - "window": 2, - "order_by": "t", - "group_by": "g", - "horizon": 3, - } - }, - "dtype_dict": { - "a": DTYPE.INT, - "t": DTYPE.DATETIME, - "g": DTYPE.CATEGORY, - }, - "predicted_value": "", - } - self.set_predictor(predictor) - - # set predictor output - predict_result = [ - # window - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x", "__mindsdb_row_id": 2}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x", "__mindsdb_row_id": 3}, - # horizon - {"a": 4, "t": dt.datetime(2020, 1, 4), "g": "x", "__mindsdb_row_id": None}, - {"a": 5, "t": dt.datetime(2020, 1, 5), "g": "x", "__mindsdb_row_id": None}, - {"a": 6, "t": dt.datetime(2020, 1, 6), "g": "x", "__mindsdb_row_id": None}, - # window - {"a": 12, "t": dt.datetime(2021, 1, 2), "g": "y", "__mindsdb_row_id": 2}, - {"a": 13, "t": dt.datetime(2021, 1, 3), "g": "y", "__mindsdb_row_id": 3}, - # horizon - {"a": 14, "t": dt.datetime(2021, 1, 4), "g": "y", "__mindsdb_row_id": None}, - {"a": 15, "t": dt.datetime(2021, 1, 5), "g": "y", "__mindsdb_row_id": None}, - {"a": 16, "t": dt.datetime(2021, 1, 6), "g": "y", "__mindsdb_row_id": None}, - ] - predict_result = pd.DataFrame(predict_result) - self.mock_predict.side_effect = lambda *a, **b: predict_result - - # = latest ______________________ - ret = self.execute(""" - select t.t as t0, p.* from pg.tasks t - join mindsdb.task_model p - where t.t = latest - """) - - data = self.ret_to_df(ret).to_dict("records") - # one key with max value of a - assert len(data) == 2 - # first row - groups = [ - ["x", 3, dt.datetime(2020, 1, 3)], - ["y", 13, dt.datetime(2021, 1, 3)], - ] - if data[0]["g"] == "y": - # other sort order after duckdb join - groups.reverse() - for i, (group, val, date) in enumerate(groups): - assert data[i]["a"] == val - assert data[i]["t"] == date - assert data[i]["g"] == group - - # > latest ______________________ - ret = self.execute(""" - select t.t as t0, p.* from pg.tasks t - join mindsdb.task_model p - where t.t > latest - """) - - ret_df = self.ret_to_df(ret) - # 1st group - ret_df1 = ret_df[ret_df["g"] == "x"] - assert ret_df1.shape[0] == 3 - assert ret_df1.t.min() == dt.datetime(2020, 1, 4) - # table shouldn't join - assert ret_df1.t0.iloc[0] is None - - # 2nd group - ret_df1 = ret_df[ret_df["g"] == "y"] - assert ret_df1.shape[0] == 3 - assert ret_df1.t.min() == dt.datetime(2021, 1, 4) - # table shouldn't join - assert ret_df1.t0.iloc[0] is None - - # > date ______________________ - ret = self.execute(""" - select t.t as t0, p.* from pg.tasks t - join mindsdb.task_model p - where t.t > '2020-01-02' - """) - - ret_df = self.ret_to_df(ret) - - # 1st group - ret_df1 = ret_df[ret_df["g"] == "x"] - assert ret_df1.shape[0] == 4 - assert ret_df1.t.min() == dt.datetime(2020, 1, 3) - - # 2nd group - ret_df1 = ret_df[ret_df["g"] == "y"] - assert ret_df1.shape[0] == 5 # all records from predictor - assert ret_df1.t.min() == dt.datetime(2021, 1, 2) - - # between ______________________ - # set predictor output - predict_result = [ - # window - {"a": 1, "t": dt.datetime(2020, 1, 1), "g": "x", "__mindsdb_row_id": 1}, - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x", "__mindsdb_row_id": 2}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x", "__mindsdb_row_id": 3}, - # horizon - {"a": 1, "t": dt.datetime(2020, 1, 4), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 5), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 6), "g": "x", "__mindsdb_row_id": None}, - ] - predict_result = pd.DataFrame(predict_result) - self.mock_predict.side_effect = lambda *a, **b: predict_result - - ret = self.execute(""" - select p.* from pg.tasks t - join mindsdb.task_model p - where t.t between '2020-01-02' and '2020-01-03' - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 2 - assert ret_df.t.min() == dt.datetime(2020, 1, 2) - assert ret_df.t.max() == dt.datetime(2020, 1, 3) - - # ------- limit ------- - ret = self.execute(""" - select p.* from pg.tasks t - join mindsdb.task_model p - where t.t between '2020-01-02' and '2020-01-03' - limit 1 - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 1 - assert ret_df.t.min() == dt.datetime(2020, 1, 2) - - # ----- empty data ------ - - ret = self.execute(""" - select p.* from pg.tasks t - join mindsdb.task_model p - where t.t > LATEST and t.g = 'wrong' - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 0 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_ts_predictor_no_group(self, mock_handler): - # set integration data - - df = pd.DataFrame( - [ - {"a": 1, "t": dt.datetime(2020, 1, 1), "g": "x"}, - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x"}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - # --- use TS predictor --- - - predictor = { - "name": "task_model", - "predict": "a", - "problem_definition": { - "timeseries_settings": {"is_timeseries": True, "window": 2, "order_by": "t", "horizon": 3} - }, - "dtype_dict": { - "a": DTYPE.INT, - "t": DTYPE.DATETIME, - "g": DTYPE.CATEGORY, - }, - "predicted_value": "", - } - self.set_predictor(predictor) - - # set predictor output - predict_result = [ - # window - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x", "__mindsdb_row_id": 2}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x", "__mindsdb_row_id": 3}, - # horizon - {"a": 1, "t": dt.datetime(2020, 1, 4), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 5), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 6), "g": "x", "__mindsdb_row_id": None}, - ] - predict_result = pd.DataFrame(predict_result) - self.mock_predict.side_effect = lambda *a, **b: predict_result - - # = latest ______________________ - ret = self.execute(""" - select p.* from pg.tasks t - join mindsdb.task_model p - where t.t = latest - """) - - ret_df = self.ret_to_df(ret) - # one key with max value of a - assert ret_df.shape[0] == 1 - assert ret_df.t[0] == dt.datetime(2020, 1, 3) - - # > latest ______________________ - ret = self.execute(""" - select t.t as t0, p.* from pg.tasks t - join mindsdb.task_model p - where t.t > latest - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 3 - assert ret_df.t.min() == dt.datetime(2020, 1, 4) - # table shouldn't join - assert ret_df.t0[0] is None - - # > date ______________________ - ret = self.execute(""" - select p.* from pg.tasks t - join mindsdb.task_model p - where t.t > '2020-01-02' - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 4 - assert ret_df.t.min() == dt.datetime(2020, 1, 3) - - # between ______________________ - # set predictor output - predict_result = [ - # window - {"a": 1, "t": dt.datetime(2020, 1, 1), "g": "x", "__mindsdb_row_id": 1}, - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x", "__mindsdb_row_id": 2}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x", "__mindsdb_row_id": 3}, - # horizon - {"a": 1, "t": dt.datetime(2020, 1, 4), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 5), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 6), "g": "x", "__mindsdb_row_id": None}, - ] - predict_result = pd.DataFrame(predict_result) - self.mock_predict.side_effect = lambda *a, **b: predict_result - - ret = self.execute(""" - select p.* from pg.tasks t - join mindsdb.task_model p - where t.t between '2020-01-02' and '2020-01-03' - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 2 - assert ret_df.t.min() == dt.datetime(2020, 1, 2) - assert ret_df.t.max() == dt.datetime(2020, 1, 3) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_ts_predictor_fix_order_by_modification(self, mock_handler): - # set integration data - - df = pd.DataFrame( - [ - {"a": 1, "t": dt.datetime(2020, 1, 1, 10, 0, 0), "g": "x"}, - {"a": 2, "t": dt.datetime(2020, 1, 1, 10, 1, 0), "g": "x"}, - {"a": 3, "t": dt.datetime(2020, 1, 1, 10, 2, 0), "g": "x"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - # --- use TS predictor --- - - predictor = { - "name": "task_model", - "predict": "a", - "problem_definition": { - "timeseries_settings": {"is_timeseries": True, "window": 2, "order_by": "t", "horizon": 3} - }, - "dtype_dict": { - "a": DTYPE.INT, - "t": DTYPE.DATETIME, - "g": DTYPE.CATEGORY, - }, - "predicted_value": "", - } - self.set_predictor(predictor) - - # set predictor output - predict_result = [ - # window - {"a": 2, "t": dt.datetime(2020, 1, 1, 10, 1, 0), "g": "x", "__mindsdb_row_id": 2}, - {"a": 3, "t": dt.datetime(2020, 1, 1, 10, 2, 0), "g": "x", "__mindsdb_row_id": 3}, - # horizon - {"a": 1, "t": dt.datetime(2020, 1, 1, 10, 3, 0), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 1, 10, 4, 0), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": dt.datetime(2020, 1, 1, 10, 5, 0), "g": "x", "__mindsdb_row_id": None}, - ] - predict_result = pd.DataFrame(predict_result) - self.mock_predict.side_effect = lambda *a, **b: predict_result - - # > latest ______________________ - ret = self.execute(""" - select t.t as t0, p.* from pg.tasks t - join mindsdb.task_model p - where t.t > latest - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 3 - assert ret_df.t.min() == dt.datetime(2020, 1, 1, 10, 3, 0) - # table shouldn't join - assert ret_df.t0[0] is None - - def test_ts_predictor_file(self): - # set integration data - - # save as file - df = pd.DataFrame( - [ - {"a": 1, "t": "2021", "g": "x"}, - {"a": 2, "t": "2022", "g": "x"}, - {"a": 3, "t": "2023", "g": "x"}, - ] - ) - - fd, file_path = tempfile.mkstemp(prefix="file_") - os.close(fd) - - df.to_csv(file_path) - - self.file_controller.save_file("tasks", file_path, "tasks") - - # --- use TS predictor --- - - predictor = { - "name": "task_model", - "predict": "a", - "problem_definition": { - "timeseries_settings": { - "is_timeseries": True, - "window": 2, - "order_by": "t", - "group_by": "g", - "horizon": 3, - } - }, - "dtype_dict": { - "a": DTYPE.INT, - "t": DTYPE.FLOAT, - "g": DTYPE.CATEGORY, - }, - "predicted_value": "", - } - self.set_predictor(predictor) - - # set predictor output - predict_result = [ - # window - {"a": 2, "t": np.float64(2022.0), "g": "x", "__mindsdb_row_id": 2}, - {"a": 3, "t": np.float64(2023.0), "g": "x", "__mindsdb_row_id": 3}, - # horizon - {"a": 1, "t": np.float64(2024.0), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": np.float64(2024.0), "g": "x", "__mindsdb_row_id": None}, - {"a": 1, "t": np.float64(2025.0), "g": "x", "__mindsdb_row_id": None}, - ] - predict_result = pd.DataFrame(predict_result) - self.mock_predict.side_effect = lambda *a, **b: predict_result - - # > latest ______________________ - ret = self.execute(""" - select p.* from files.tasks t - join mindsdb.task_model p - where t.t > latest - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 3 - assert ret_df.t.min() == 2024.0 - - # > latest with CTE - ret = self.execute(""" - WITH trainingdata AS ( - select a.t, a.* from files.tasks a - ) - select t.t as t0, p.* from trainingdata t - join mindsdb.task_model p - where t.t > latest and t.g = 'x' - """) - - ret_df = self.ret_to_df(ret) - assert ret_df.shape[0] == 3 - assert ret_df.t.min() == 2024.0 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_drop_database(self, mock_handler): - from mindsdb.utilities.exception import EntityNotExistsError - - # remove existing (check different cases) - self.set_handler(mock_handler, name="pg", tables={}) - self.execute("drop database pg") - self.set_handler(mock_handler, name="PG", tables={}) - self.execute("drop database `PG`") - self.set_handler(mock_handler, name="pg", tables={}) - self.execute("drop database Pg") - - # try one more time - with pytest.raises(EntityNotExistsError): - self.execute("drop database pg") - - # try if exists - self.execute("drop database if exists pg") - - # try files - try: - self.execute("drop database files") - except Exception as e: - assert "is system database" in str(e) - else: - raise Exception("SqlApiException expected") - - def test_wrong_using(self): - with pytest.raises(Exception) as exc_info: - self.execute(""" - CREATE PREDICTOR task_model - FROM mindsdb - (select * from vtasks) - PREDICT a - using a=1 b=2 -- no ',' here - """) - - assert "Syntax error" in str(exc_info.value) - - -class TestComplexQueries(BaseExecutorMockPredictor): - df = pd.DataFrame( - [ - {"a": 1, "b": "aaa", "c": dt.datetime(2020, 1, 1)}, - {"a": 2, "b": "bbb", "c": dt.datetime(2020, 1, 2)}, - {"a": 1, "b": "ccc", "c": dt.datetime(2020, 1, 3)}, - ] - ) - - task_predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": DTYPE.FLOAT, "a": DTYPE.INT, "b": DTYPE.CATEGORY, "c": DTYPE.DATETIME}, - "predicted_value": "ccc", - } - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_union(self, mock_handler): - jobs_df = self.df.copy(deep=True) - jobs_df["a"][2] = 3 - self.set_handler(mock_handler, name="pg", tables={"tasks": self.df, "jobs": jobs_df}) - - # --- use predictor --- - self.set_predictor(self.task_predictor) - sql = """ - SELECT a as a1, b as target - FROM pg.tasks - UNION {union} - SELECT model.a as a2, model.p as target2 - FROM pg.tasks as t - JOIN mindsdb.task_model as model - WHERE t.a=1 - """ - # union all - ret = self.execute(sql.format(union="ALL")) - - ret_df = self.ret_to_df(ret) - assert list(ret_df.columns) == ["a1", "target"] - assert ret_df.shape[0] == 3 + 2 - - # union - ret = self.execute(sql.format(union="")) - - ret_df = self.ret_to_df(ret) - assert list(ret_df.columns) == ["a1", "target"] - assert ret_df.shape[0] == 3 - - # test union same db - sql = """ - SELECT * - FROM pg.tasks - UNION - SELECT * - FROM pg.jobs - """ - ret = self.execute(sql.format(union="")) - ret_df = self.ret_to_df(ret) - assert list(ret_df.columns) == ["a", "b", "c"] - assert ret_df.shape[0] == 4 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_intersect(self, mock_handler): - df_a = pd.DataFrame([[1, "x"], [1, "x"], [2, "y"], [3, "z"]], columns=["a", "b"]) - df_b = pd.DataFrame([[1, "x"], [1, "x"], [2, "y"], [3, "w"]], columns=["a", "b"]) - - self.set_handler(mock_handler, name="pg1", tables={"df_a": df_a, "df_b": df_b}) - self.set_handler(mock_handler, name="pg2", tables={"df_a": df_a, "df_b": df_b}) - - # INTERSECT operations with tables from the same database and from different databases - # are processed differently, so tests should cover both scenarios. - for db_name in ["pg1", "pg2"]: - sql = f""" - SELECT * FROM pg1.df_a - INTERSECT - SELECT * FROM {db_name}.df_b - """ - ret = self.execute(sql) - ret_df = self.ret_to_df(ret) - assert list(ret_df.columns) == ["a", "b"] - assert ret_df.shape[0] == 2 - assert ((ret_df["a"] == 1) & (ret_df["b"] == "x")).any() - assert ((ret_df["a"] == 2) & (ret_df["b"] == "y")).any() - - sql = f""" - SELECT * FROM pg1.df_a - INTERSECT DISTINCT - SELECT * FROM {db_name}.df_b - """ - ret = self.execute(sql) - ret_df = self.ret_to_df(ret) - assert list(ret_df.columns) == ["a", "b"] - assert ret_df.shape[0] == 2 - assert ((ret_df["a"] == 1) & (ret_df["b"] == "x")).any() - assert ((ret_df["a"] == 2) & (ret_df["b"] == "y")).any() - - sql = f""" - SELECT * FROM pg1.df_a - INTERSECT ALL - SELECT * FROM {db_name}.df_b - """ - ret = self.execute(sql) - ret_df = self.ret_to_df(ret) - assert list(ret_df.columns) == ["a", "b"] - assert ret_df.shape[0] == 3 - assert ((ret_df["a"] == 2) & (ret_df["b"] == "y")).any() - assert ret_df[(ret_df["a"] == 1) & (ret_df["b"] == "x")].shape[0] == 2 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_update_from_select(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.df}) - - # --- use predictor --- - self.set_predictor(self.task_predictor) - sql = """ - update - pg.table2 - set - a1 = df.a, - c1 = df.c - from - ( - SELECT model.a as a, model.b as b, model.p as c - FROM pg.tasks as t - JOIN mindsdb.task_model as model - WHERE t.a=1 - ) - as df - where - table2.a1 = df.a - and table2.b1 = df.b - """ - - self.execute(sql) - - # 1 select and 2 updates - assert mock_handler().query.call_count == 3 - - # second is update - assert ( - mock_handler().query.call_args_list[1][0][0].to_string() - == "update table2 set a1=1, c1='ccc' where a1 = 1 AND b1 = 'ccc'" - ) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_update_in_integration(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={}) - - sql = """ - update - pg.table2 - set - a1 = 1, - c1 = 'ccc' - where - b1 = 'b' - """ - - self.execute(sql) - - # 1 select and 2 updates - assert mock_handler().query.call_count == 1 - - # second is update - assert ( - mock_handler().query.call_args_list[0][0][0].to_string() - == "update table2 set a1=1, c1='ccc' where b1 = 'b'" - ) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_delete_in_integration(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={}) - - sql = """ - delete from - pg.table2 - where - b1 = 'b' - """ - - self.execute(sql) - - # 1 select and 2 updates - assert mock_handler().query.call_count == 1 - - # second is update - assert mock_handler().query.call_args_list[0][0][0].to_string() == "DELETE FROM table2 WHERE b1 = 'b'" - - @patch("mindsdb.integrations.handlers.mysql_handler.Handler") - def test_insert(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.df}, engine="mysql") - del mock_handler().insert - - sql = "insert into pg.tasks (a) values (3);" - resp = self.execute(sql) - calls = mock_handler().query.call_args_list - assert len(calls[0][0][0].columns) == 1 - assert calls[0][0][0].values == [[3]] - assert resp.affected_rows == 1 - - sql = "insert into pg.tasks (a, b) values (3, 4)" - resp = self.execute(sql) - assert len(calls[1][0][0].columns) == 2 - assert calls[1][0][0].values == [[3, 4]] - assert resp.affected_rows == 1 - - sql = "insert into pg.tasks values (1, 2, '2020-01-01 00:00:00')" - resp = self.execute(sql) - assert len(calls[2][0][0].columns) == 3 - assert len(calls[2][0][0].values[0]) == 3 - assert resp.affected_rows == 1 - - @patch("mindsdb.integrations.handlers.mysql_handler.Handler") - def test_create_table(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.df}, engine="mysql") - - # prevent hasattr=true - del mock_handler().insert - - self.set_predictor(self.task_predictor) - sql = """ - create table pg.table1 - ( - SELECT model.a as a, model.b as b, model.p as c - FROM pg.tasks as t - JOIN mindsdb.task_model as model - WHERE t.a=1 - ) - """ - - self.execute(sql) - - calls = mock_handler().query.call_args_list - - # select for predictor - assert to_str(calls[0][0][0]) == "SELECT * FROM tasks AS t WHERE a = 1" - - # create table - assert to_str(calls[1][0][0]) == "CREATE TABLE table1 ( a INTEGER, b TEXT, c TEXT )" - - # load table - assert to_str(calls[2][0][0]) == "INSERT INTO table1 (a, b, c) VALUES (1, 'aaa', 'ccc'), (1, 'ccc', 'ccc')" - - assert len(calls) == 3 - - @patch("mindsdb.integrations.handlers.mysql_handler.Handler") - def test_create_insert(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.df}, engine="mysql") - - # prevent hasattr=true - del mock_handler().insert - - self.set_predictor(self.task_predictor) - sql = """ - insert into pg.table1 - ( - SELECT model.a as a, model.b as b, model.p as c - FROM pg.tasks as t - JOIN mindsdb.task_model as model - WHERE t.a=1 - ) - """ - - self.execute(sql) - - calls = mock_handler().query.call_args_list - - # select for predictor - assert to_str(calls[0][0][0]) == "SELECT * FROM tasks AS t WHERE a = 1" - - # load table - assert to_str(calls[1][0][0]) == "INSERT INTO table1 (a, b, c) VALUES (1, 'aaa', 'ccc'), (1, 'ccc', 'ccc')" - - assert len(calls) == 2 - - @patch("mindsdb.integrations.handlers.mysql_handler.Handler") - def test_affected_rows(self, mock_handler): - """Test that the `affected_rows` are returned correctly for Delete/Insert/Update""" - self.set_handler(mock_handler, name="pg", tables={"tasks": self.df}, engine="mysql") - - del mock_handler().insert - - sql = "delete from pg.tasks where a = 2" - resp = self.execute(sql) - assert resp.affected_rows == 1 - - sql = "insert into pg.tasks (a) values (3), (4)" - resp = self.execute(sql) - assert resp.affected_rows == 2 - - sql = "update pg.tasks set a = 0" - resp = self.execute(sql) - assert resp.affected_rows == 3 - - @patch("mindsdb.integrations.handlers.mysql_handler.Handler") - def test_cte(self, mock_handler): - test_df_1 = pd.DataFrame( - [ - [1, "a"], - [1, "b"], - [2, "b"], - [3, "c"], - ], - columns=["a", "b"], - ) - - test_df_2 = pd.DataFrame( - [ - [1, "a"], - [2, "b"], - [2, "b"], - [3, "c"], - ], - columns=["a", "c"], - ) - self.set_handler(mock_handler, name="pg", tables={"test_t1": test_df_1, "test_t2": test_df_2}, engine="mysql") - - # NOTE important to test joins with different count of rows (0, 1, many), - # as this can affect the actual query that is executed. - sql = """ - WITH ta AS ( - SELECT 'a' AS a, 2 AS b - ), tb AS ( - SELECT 'a' AS a, 'b' AS c - ) - SELECT ta.a, ta.b, tb.c - FROM ta - LEFT JOIN tb ON ta.a = tb.a; - """ - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df(), pd.DataFrame([["a", 2, "b"]], columns=["a", "b", "c"]), check_dtype=False - ) - - sql = """ - WITH ta AS ( - SELECT 'a' AS a, 2 AS b - UNION ALL - SELECT 'b' AS a, 2 AS b - ), tb AS ( - SELECT 'a' AS a, 'b' AS c - ) - SELECT ta.a, ta.b, tb.c - FROM ta - LEFT JOIN tb ON ta.a = tb.a; - """ - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df().sort_values(by=["a", "b", "c"], ignore_index=True), - pd.DataFrame([["a", 2, "b"], ["b", 2, None]], columns=["a", "b", "c"]).sort_values( - by=["a", "b", "c"], ignore_index=True - ), - check_dtype=False, - ) - - sql = """ - WITH ta AS ( - SELECT 'a' AS a, 2 AS b - UNION ALL - SELECT 'b' AS a, 2 AS b - ), tb AS ( - SELECT 'a' AS a, 'b' AS c - UNION ALL - SELECT 'a' AS a, 'c' AS c - ) - SELECT ta.a, ta.b, tb.c - FROM ta - LEFT JOIN tb ON ta.a = tb.a; - """ - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df(), - pd.DataFrame([["a", 2, "b"], ["a", 2, "c"], ["b", 2, None]], columns=["a", "b", "c"]), - check_dtype=False, - ) - - sql = """ - WITH ta AS ( - SELECT 1 as a, 'a' AS b - UNION ALL - SELECT 1 as a, 'b' AS b - ), tb AS ( - select * from pg.test_t1 - ) - SELECT ta.a, ta.b, tb.b as c - FROM ta - LEFT JOIN tb ON ta.a = tb.a; - """ - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df().sort_values(by=["a", "b", "c"], ignore_index=True), - pd.DataFrame( - [[1, "a", "a"], [1, "a", "b"], [1, "b", "a"], [1, "b", "b"]], columns=["a", "b", "c"] - ).sort_values(by=["a", "b", "c"], ignore_index=True), - check_dtype=False, - ) - - sql = """ - WITH ta AS ( - SELECT 1 as a, 'a' AS b - UNION ALL - SELECT 1 as a, 'b' AS b - ), tb AS ( - select * from pg.test_t1 - ) - SELECT ta.a as a, ta.b as b, tb.b as c - FROM ta - LEFT JOIN tb ON ta.a = tb.a - order by a, b, c; - """ - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df(), - pd.DataFrame([[1, "a", "a"], [1, "a", "b"], [1, "b", "a"], [1, "b", "b"]], columns=["a", "b", "c"]), - check_dtype=False, - ) - - sql = """ - WITH ta AS ( - select * from pg.test_t1 where 1 = 0 - ), tb AS ( - select * from pg.test_t2 where c = 'c' - ) - SELECT ta.a, ta.b, tb.c as c - FROM ta - LEFT JOIN tb ON ta.a = tb.a; - """ - resp = self.execute(sql) - assert len(resp.data) == 0 - - sql = """ - WITH ta AS ( - select * from pg.test_t1 where b = 'b' - ), tb AS ( - select * from pg.test_t2 where 1 = 0 - ) - SELECT ta.a, ta.b, tb.c as c - FROM ta - LEFT JOIN tb ON ta.a = tb.a; - """ - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df().sort_values(by=["a", "b", "c"], ignore_index=True), - pd.DataFrame([[1, "b", None], [2, "b", None]], columns=["a", "b", "c"]).sort_values( - by=["a", "b", "c"], ignore_index=True - ), - check_dtype=False, - ) - - sql = """ - WITH ta AS ( - select * from pg.test_t1 - ), tb AS ( - select * from pg.test_t2 where c = 'c' - ) - SELECT ta.a, ta.b, tb.c as c - FROM ta - LEFT JOIN tb ON ta.a = tb.a; - """ - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df().sort_values(by=["a", "b", "c"], ignore_index=True), - pd.DataFrame( - [[1, "a", None], [1, "b", None], [2, "b", None], [3, "c", "c"]], columns=["a", "b", "c"] - ).sort_values(by=["a", "b", "c"], ignore_index=True), - check_dtype=False, - ) - - # different case - sqls = [ - """ - WITH Ta as ( - select 1 as x - ) - select * from ta - """, - """ - WITH ta as ( - select 1 as x - ) - select * from Ta - """, - ] - for sql in sqls: - resp = self.execute(sql) - pdt.assert_frame_equal( - resp.data.to_df(), - pd.DataFrame([[1]], columns=["x"]), - check_dtype=False, - ) - - sql = """ - WITH `Ta` as ( - select 1 as x - ) - select * from ta - """ - with pytest.raises(Exception): - resp = self.execute(sql) - - # @patch('mindsdb.integrations.handlers.postgres_handler.Handler') - # def test_union_type_mismatch(self, mock_handler): - # self.set_handler(mock_handler, name='pg', tables={'tasks': self.df}) - # - # sql = ''' - # SELECT a, b FROM pg.tasks - # UNION - # SELECT b, a FROM pg.tasks - # ''' - # from mindsdb.api.mysql.mysql_proxy.utilities import ErSqlWrongArguments - # with pytest.raises(ErSqlWrongArguments): - # self.command_executor.execute_command(parse_sql(sql)) - - -class TestTableau(BaseExecutorMockPredictor): - task_table = pd.DataFrame( - [ - {"a": 1, "b": "one"}, - {"a": 2, "b": "two"}, - {"a": 1, "b": "three"}, - ] - ) - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_predictor_nested_select(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.task_table}) - - # --- use predictor --- - predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": DTYPE.FLOAT, "a": DTYPE.INT, "b": DTYPE.CATEGORY}, - "predicted_value": 3.14, - } - self.set_predictor(predictor) - ret = self.execute(""" - SELECT - `Custom SQL Query`.`a` AS `height`, - last(`Custom SQL Query`.`b`) AS `lengtht` - FROM ( - SELECT res.a, res.b - FROM pg.tasks as source - JOIN mindsdb.task_model as res - ) `Custom SQL Query` - group by 1 - order by `height` - LIMIT 1 - """) - - # second column is having last value of 'b' - assert ret.data.to_lists()[0][1] == "three" - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_predictor_tableau_header(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.task_table}) - - # --- use predictor --- - predicted_value = 5 - predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": DTYPE.FLOAT, "a": DTYPE.INT, "b": DTYPE.CATEGORY}, - "predicted_value": predicted_value, - } - self.set_predictor(predictor) - ret = self.execute(""" - SELECT - SUM(1) AS `cnt__0B4A4E8BD11C48FFB4730D4D2C32191A_ok`, - sum(`Custom SQL Query`.`a`) AS `sum_height_ok`, - max(`Custom SQL Query`.`p`) AS `sum_length1_ok` - FROM ( - SELECT res.a, res.p - FROM pg.tasks as source - JOIN mindsdb.task_model as res - ) `Custom SQL Query` - HAVING (COUNT(1) > 0) - """) - - # second column is having last value of 'b' - # 3: count rows, 4: sum of 'a', 5 max of prediction - assert ret.data.to_lists()[0] == [3, 4, 5] - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_predictor_tableau_header_alias(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.task_table}) - - # --- use predictor --- - predicted_value = 5 - predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": DTYPE.FLOAT, "a": DTYPE.INT, "b": DTYPE.CATEGORY}, - "predicted_value": predicted_value, - } - self.set_predictor(predictor) - ret = self.execute(""" - SELECT - max(a1) AS a1, - min(a2) AS a2 - FROM ( - SELECT source.a as a1, source.a as a2 - FROM pg.tasks as source - JOIN mindsdb.task_model as res - ) t1 - HAVING (COUNT(1) > 0) - """) - - # second column is having last value of 'b' - # 3: count rows, 4: sum of 'a', 5 max of prediction - assert ret.data.to_lists()[0] == [2, 1] - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_integration_subselect_no_alias(self, mock_handler): - self.set_handler(mock_handler, name="pg", tables={"tasks": self.task_table}) - - ret = self.execute(""" - SELECT max(y2) FROM ( - select a as y2 from pg.tasks - ) - """) - - # second column is having last value of 'b' - # 3: count rows, 4: sum of 'a', 5 max of prediction - assert ret.data.to_lists()[0] == [2] - - -class TestWithNativeQuery(BaseExecutorMockPredictor): - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_integration_native_query(self, mock_handler): - data = [[3, "y"], [1, "y"]] - df = pd.DataFrame(data, columns=["a", "b"]) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - ret = self.execute("select max(a) from pg (select * from tasks) group by b") - - # native query was called - assert mock_handler().native_query.call_args[0][0] == "select * from tasks" - assert ret.data.to_lists()[0][0] == 3 - - @pytest.mark.slow - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_view_native_query(self, mock_handler): - data = [[3, "y"], [1, "y"]] - df = pd.DataFrame(data, columns=["a", "b"]) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - # --- create view --- - self.execute("create view mindsdb.vtasks (select * from pg (select * from tasks))") - - # --- select from view --- - ret = self.execute("select * from mindsdb.vtasks") - # view response equals data from integration - assert ret.data.to_lists() == data - - # --- create predictor --- - mock_handler.reset_mock() - self.execute(""" - CREATE PREDICTOR task_model - FROM mindsdb - (select * from vtasks) - PREDICT a - using - engine='dummy_ml', - join_learn_process=true - """) - - # test creating with if not exists - self.execute(""" - CREATE PREDICTOR IF NOT EXISTS task_model - FROM mindsdb - (select * from vtasks) - PREDICT a - using - engine='dummy_ml', - join_learn_process=true - """) - - # learn was called. - # TODO check input to ML handler - # assert self.mock_create.call_args[0][0].name.to_string() == 'task_model' # it exec in separate process - # integration was called - # TODO: integration is not called during learn process because learn function is mocked - # (data selected inside learn function) - # assert mock_handler().native_query.call_args[0][0] == 'select * from tasks' - - # test alter view - view_query = "select * from pg (select a, b from tasks)" - self.execute(f"alter view mindsdb.vtasks as ({view_query})") - ret = self.execute("select * from information_schema.views") - assert len(ret.data) == 1 - assert ret.data.records[0]["QUERY"] == view_query - - # --- drop view --- - self.execute("drop view vtasks") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_use_predictor_with_view(self, mock_handler): - # set integration data - - df = pd.DataFrame( - [ - {"a": 1, "b": "one"}, - {"a": 2, "b": "two"}, - {"a": 1, "b": "three"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - view_name = "vtasks" - # --- create view --- - self.execute(f"create view mindsdb.{view_name} (select * from pg (select * from tasks))") - - # --- use predictor --- - predicted_value = 3.14 - predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": DTYPE.FLOAT, "a": DTYPE.INT, "b": DTYPE.CATEGORY}, - "predicted_value": predicted_value, - } - self.set_predictor(predictor) - ret = self.execute(f""" - select m.p, v.a - from mindsdb.{view_name} v - join mindsdb.task_model m - where v.a = 2 - """) - - # native query was called - assert mock_handler().native_query.call_args[0][0] == "select * from tasks" - - # check predictor call - # input = one row whit a==2 - data_in = self.mock_predict.call_args[0][1] - assert len(data_in) == 1 - assert data_in._predict_df.iloc[0]["a"] == 2 - - # check prediction - assert ret.data.to_lists()[0][0] == predicted_value - assert len(ret.data) == 1 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_use_ts_predictor_with_view(self, mock_handler): - # set integration data - - df = pd.DataFrame( - [ - {"a": 1, "t": dt.datetime(2020, 1, 1), "g": "x"}, - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x"}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x"}, - {"a": 4, "t": dt.datetime(2020, 1, 1), "g": "y"}, - {"a": 5, "t": dt.datetime(2020, 1, 2), "g": "y"}, - {"a": 6, "t": dt.datetime(2020, 1, 3), "g": "y"}, - {"a": 7, "t": dt.datetime(2020, 1, 1), "g": "z"}, - {"a": 8, "t": dt.datetime(2020, 1, 2), "g": "z"}, - {"a": 9, "t": dt.datetime(2020, 1, 3), "g": "z"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - view_name = "vtasks" - # --- create view --- - self.execute(f"create view {view_name} (select * from pg (select * from tasks))") - - # --- use TS predictor --- - predicted_value = "right" - predictor = { - "name": "task_model", - "predict": "p", - "problem_definition": { - "timeseries_settings": { - "is_timeseries": True, - "window": 10, - "order_by": "t", - "group_by": "g", - "horizon": 1, - } - }, - "dtype_dict": { - "p": DTYPE.CATEGORY, - "a": DTYPE.INT, - "t": DTYPE.DATETIME, - "g": DTYPE.CATEGORY, - }, - "predicted_value": predicted_value, - } - self.set_predictor(predictor) - ret = self.execute(f""" - select task_model.* - from mindsdb.{view_name} - join mindsdb.task_model - where {view_name}.t = latest - """) - - # native query was called without filters - assert mock_handler().native_query.call_args[0][0] == "select * from tasks" - - # input to predictor all 9 rows - when_data = self.mock_predict.call_args[0][1]._predict_df - assert len(when_data) == 9 - - # all group values in input - group_values = {"x", "y", "z"} - assert set(pd.DataFrame(when_data)["g"].unique()) == group_values - - # check prediction - # output is has g=='y' or None - ret_df = self.ret_to_df(ret) - # all group values in output - assert set(ret_df["g"].unique()) == group_values - - # p is predicted value - assert ret_df["p"][0] == predicted_value - - -class TestSteps(BaseExecutorMockPredictor): - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def disabled_test_join_2_tables(self, mock_handler): - # tests for FilterStep and limitoffsetStep - # disabled: current JoinStep is not supporting join with condition - - df = pd.DataFrame( - [ - {"a": 1, "t": dt.datetime(2020, 1, 1), "g": "x"}, - {"a": 2, "t": dt.datetime(2020, 1, 2), "g": "x"}, - {"a": 3, "t": dt.datetime(2020, 1, 3), "g": "x"}, - ] - ) - - self.set_handler(mock_handler, name="pg", tables={"tasks": df, "task2": df}) - self.execute(""" - select t.* from pg.tasks - join pg.tasks2 on tasks.a=tasks2.a - where t.a > 1 - limit 1 - """) - - -class TestExecutionTools: - def test_query_df(self): - d = [{"TRAINING_OPTIONS": {"b": {"x": "A", "y": "B"}}}, {"TRAINING_OPTIONS": {"b": {"x": "A"}}}] - df = pd.DataFrame(d) - query_df(df, "select * from models") - - def test_query_df_with_rollup(self): - """Test GROUP BY WITH ROLLUP functionality""" - # Create test data with hierarchical structure - df = pd.DataFrame( - [ - {"country": "USA", "city": "NY", "amount": 100}, - {"country": "USA", "city": "NY", "amount": 150}, - {"country": "USA", "city": "LA", "amount": 200}, - {"country": "UK", "city": "London", "amount": 250}, - {"country": "UK", "city": "London", "amount": 300}, - ] - ) - - result = query_df( - df, - """ - SELECT country, SUM(amount) as total - FROM df - GROUP BY country WITH ROLLUP - """, - ) - - # Should have 3 rows: USA, UK, and grand total (NULL) - assert len(result) == 3 - # Check that we have a NULL row (grand total) - null_rows = result[result["country"].isna()] - assert len(null_rows) == 1 - # Grand total should be 1000 - assert null_rows["total"].values[0] == 1000 - - # Test multiple column ROLLUP - result = query_df( - df, - """ - SELECT country, city, SUM(amount) as total - FROM df - GROUP BY country, city WITH ROLLUP - """, - ) - - # Should have: - # - 3 detail rows (USA-NY, USA-LA, UK-London) - # - 2 country subtotals (USA-NULL, UK-NULL) - # - 1 grand total (NULL-NULL) - # Total: 6 rows - assert len(result) == 6 - - # Check country subtotals (city is NULL but country is not) - country_subtotals = result[result["city"].isna() & result["country"].notna()] - assert len(country_subtotals) == 2 - - # Check USA subtotal - usa_subtotal = country_subtotals[country_subtotals["country"] == "USA"] - assert len(usa_subtotal) == 1 - assert usa_subtotal["total"].values[0] == 450 # 100 + 150 + 200 - - # Check UK subtotal - uk_subtotal = country_subtotals[country_subtotals["country"] == "UK"] - assert len(uk_subtotal) == 1 - assert uk_subtotal["total"].values[0] == 550 # 250 + 300 - - # Check grand total (both NULL) - grand_total = result[result["country"].isna() & result["city"].isna()] - assert len(grand_total) == 1 - assert grand_total["total"].values[0] == 1000 - - def test_query_df_functions(self): - cur_time = dt.datetime.now() - tests = [ - {"query": "to_base64('test')", "result": "dGVzdA=="}, - {"query": "char_length('海豚')", "result": 2}, - {"query": "length('海豚')", "result": 6}, - {"query": "char(77, 78, 79)", "result": "MNO"}, - {"query": "locate('no', 'yes')", "result": 0}, - {"query": "locate('no', 'yesnoyes')", "result": 4}, - {"query": "format(1234567.89, 0)", "result": "1,234,568"}, - {"query": "format(1234567.89, 3)", "result": "1,234,567.890"}, - {"query": "format(f_float, 2)", "result": "1.10"}, - {"query": "FORMAT('{:,.2f}', 1234567.89)", "result": "1,234,567.89"}, - { - "query": "sha2('abc')", - "result": "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad", - }, - {"query": "REGEXP_SUBSTR('abc def ghi', '[a-z]+')", "result": "abc"}, - {"query": "REGEXP_SUBSTR('abc def ghi', '[a-z]+', 1, 1)", "result": "abc"}, - {"query": "substring_index('www.mysql.com', '.', 2)", "result": "www.mysql"}, - {"query": "substring_index('www.mysql.com', '.', 1)", "result": "www"}, - { - "query": "TIMESTAMPDIFF(MINUTE,'2003-02-01','2003-05-01 12:05:55')", - "result": 128885, - }, - {"query": "TIMESTAMPDIFF(MONTH,'2003-02-01','2003-05-01')", "result": 3}, - { - "query": "EXTRACT(YEAR FROM '2019-07-02')", - "result": 2019, - }, - {"query": "EXTRACT(YEAR_MONTH FROM '2019-07-02')", "result": 201907}, - {"query": "EXTRACT(DAY_MINUTE FROM created)", "result": 302223}, - {"query": "GET_FORMAT(DATE,'ISO')", "result": "%Y-%m-%d"}, - {"query": "GET_FORMAT(DATETIME,'EUR')", "result": "%Y-%m-%d %H.%i.%s"}, - {"query": "DATE_FORMAT('2009-10-30 22:23:11', '%X %V %H:%i:%s')", "result": "2009 43 22:23:11"}, - {"query": "DATE_FORMAT(created, GET_FORMAT(DATE,'EUR'))", "result": "30.10.2009"}, - {"query": "FROM_UNIXTIME(1447430881)", "result": dt.datetime.fromisoformat("2015-11-13 16:08:01")}, - {"query": "FROM_UNIXTIME(f_seconds)", "result": dt.datetime.fromisoformat("2015-11-13 16:08:01")}, - {"query": "FROM_DAYS(730669)", "result": dt.datetime.fromisoformat("2000-07-03")}, - {"query": "FROM_DAYS(f_days)", "result": dt.datetime.fromisoformat("2000-07-03")}, - {"query": "DAYOFYEAR('2009-10-30')", "result": 303}, - {"query": "DAYOFYEAR(created)", "result": 303}, - {"query": "DAYOFWEEK('2009-10-30')", "result": 6}, - {"query": "DAYOFWEEK(created)", "result": 6}, - {"query": "DAYOFMONTH('2009-10-30')", "result": 30}, - {"query": "DAY(created)", "result": 30}, - {"query": "DAYNAME('2009-10-30')", "result": "Friday"}, - {"query": "DAYNAME(created)", "result": "Friday"}, - {"query": "DAYNAME('2009-10-30')", "result": "Friday"}, - {"query": "DAYNAME(created)", "result": "Friday"}, - {"query": "CURDATE()", "result": dt.datetime(cur_time.year, cur_time.month, cur_time.day)}, - {"query": "DATEDIFF('2011-01-15 01:02:03', '2009-10-30 22:23:11')", "result": 442}, - {"query": "DATEDIFF(updated, created)", "result": 442}, - {"query": "DATE_ADD('2011-01-15', INTERVAL 31 DAY)", "result": dt.datetime.fromisoformat("2011-02-15")}, - { - "query": "ADDDATE(updated, INTERVAL '31 DAY')", - "result": dt.datetime.fromisoformat("2011-02-15 01:02:03"), - }, - {"query": "DATE_SUB('2011-01-15', INTERVAL 31 DAY)", "result": dt.datetime.fromisoformat("2010-12-15")}, - {"query": "DATE_SUB(updated, INTERVAL 31 DAY)", "result": dt.datetime.fromisoformat("2010-12-15 01:02:03")}, - { - "query": "ADDTIME('2011-01-15 01:02:03', '1 1:1:1.2')", - "result": dt.datetime.fromisoformat("2011-01-16 02:03:04.200"), - }, - {"query": "ADDTIME(updated, '1 1:1:1.2')", "result": dt.datetime.fromisoformat("2011-01-16 02:03:04.200")}, - { - "query": "CONVERT_TZ('2009-10-30 22:23:11','GMT','MET')", - "result": dt.datetime.fromisoformat("2009-10-30 23:23:11"), - }, - {"query": "CONVERT_TZ(created,'GMT','MET')", "result": dt.datetime.fromisoformat("2009-10-30 23:23:11")}, - ] - - df = pd.DataFrame( - [ - { - "f_seconds": 1447430881, - "f_days": 730669, - "f_float": 1.1, - "created": "2009-10-30 22:23:11", - "updated": "2011-01-15 01:02:03", - } - ] - ) - - for test in tests: - query = f"select {test['query']} as result from df" - expected_result = test["result"] - - result = query_df(df, query)["result"][0] - assert result == expected_result - - query = "select CURTIME() as result from df" - result = query_df(df, query)["result"][0] - assert isinstance(result, dt.time) - - def test_not_exists_correlated_subquery(self): - a = pd.DataFrame( - [ - {"tab_num": 1, "shop": 1}, - {"tab_num": 1, "shop": 2}, - {"tab_num": 1, "shop": 3}, - {"tab_num": 2, "shop": 1}, - {"tab_num": 2, "shop": 2}, - {"tab_num": 3, "shop": 1}, - ] - ) - b = pd.DataFrame([{"shop": 1}, {"shop": 2}, {"shop": 3}]) - - result = query_dfs( - {"A": a, "B": b}, - parse_sql( - """ - SELECT DISTINCT a1.tab_num - FROM A a1 - WHERE NOT EXISTS ( - SELECT * FROM B b - WHERE NOT EXISTS ( - SELECT * FROM A a2 - WHERE a2.tab_num = a1.tab_num AND a2.shop = b.shop - ) - ) - """, - dialect="mindsdb", - ), - ) - - # Only tab_num=1 covers all shops {1, 2, 3} - assert list(result["tab_num"]) == [1] - - def test_exists_correlated_subquery(self): - # EXISTS version: find tab_num values missing at least one shop. - # tab_num=2 misses shop=3, tab_num=3 misses shops 2 and 3. - a = pd.DataFrame( - [ - {"tab_num": 1, "shop": 1}, - {"tab_num": 1, "shop": 2}, - {"tab_num": 1, "shop": 3}, - {"tab_num": 2, "shop": 1}, - {"tab_num": 2, "shop": 2}, - {"tab_num": 3, "shop": 1}, - ] - ) - b = pd.DataFrame([{"shop": 1}, {"shop": 2}, {"shop": 3}]) - - result = query_dfs( - {"A": a, "B": b}, - parse_sql( - """ - SELECT DISTINCT a1.tab_num - FROM A a1 - WHERE EXISTS ( - SELECT * FROM B b - WHERE NOT EXISTS ( - SELECT * FROM A a2 - WHERE a2.tab_num = a1.tab_num AND a2.shop = b.shop - ) - ) - """, - dialect="mindsdb", - ), - ) - - assert sorted(result["tab_num"].tolist()) == [2, 3] - - -class TestIfExistsIfNotExists(BaseExecutorMockPredictor): - def setup_method(self, method): - super().setup_method() - self.set_executor(mock_predict=True, mock_model_controller=True, import_dummy_ml=True) - - def test_ml_engine(self): - from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError - - sql = """ - CREATE ML_ENGINE test_engine - FROM dummy_ml - """ - - # create an ml engine - self.execute(sql) - - # create the same ml engine without if not exists throws an error - with pytest.raises(EntityExistsError): - self.execute(sql) - - # create the same ml engine with if not exists doesn't throw an error - self.execute(""" - CREATE ML_ENGINE IF NOT EXISTS test_engine - FROM dummy_ml - """) - - # create an engine with if not exists should indeed create a new engine - self.execute(""" - CREATE ML_ENGINE IF NOT EXISTS test_engine2 - FROM dummy_ml - """) - - # check that the engine was indeed created - ret = self.execute(""" - SHOW ML_ENGINES - WHERE name = 'test_engine2' - """) - assert len(ret.data) == 1 - - # drop the engine - self.execute("DROP ML_ENGINE test_engine2") - - # check that the engine was indeed dropped - ret = self.execute(""" - SHOW ML_ENGINES - WHERE name = 'test_engine2' - """) - assert len(ret.data) == 0 - - # drop again without if exists should throw an error - with pytest.raises(EntityNotExistsError): - self.execute("DROP ML_ENGINE test_engine2") - - # drop again with if exists should not throw an error - self.execute("DROP ML_ENGINE IF EXISTS test_engine2") - - def test_predictor(self): - from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError - - # create a predictor from dummy ml handler - sql = """ - CREATE MODEL test_predictor - PREDICT target - USING - engine = 'dummy_ml' - """ - self.execute(sql) - - # create the same predictor without if not exists throws an error - with pytest.raises(EntityExistsError): - self.execute(sql) - - # create the same predictor with if not exists doesn't throw an error - self.execute(""" - CREATE MODEL IF NOT EXISTS test_predictor - PREDICT target - USING - engine = 'dummy_ml' - """) - - # create a predictor with if not exists should indeed create a new predictor - self.execute(""" - CREATE MODEL IF NOT EXISTS test_predictor2 - PREDICT target - USING - engine = 'dummy_ml' - """) - - # check that the predictor was indeed created - ret = self.execute(""" - SHOW MODELS - WHERE name = 'test_predictor2' - """) - assert len(ret.data) == 1 - - # drop the predictor - self.execute("DROP MODEL test_predictor2") - - # check that the predictor was indeed dropped - self.execute(""" - SHOW MODELS - WHERE name = 'test_predictor2' - """) - - # drop again without if exists should throw an error - with pytest.raises(EntityNotExistsError): - self.execute("DROP MODEL test_predictor2") - - # drop again with if exists should not throw an error - self.execute("DROP MODEL IF EXISTS test_predictor2") - - def test_project(self): - from mindsdb.utilities.exception import EntityExistsError - - sql = "CREATE PROJECT another_test_project" - self.execute(sql) - # create the same project without if not exists throws an error - with pytest.raises(EntityExistsError): - self.execute(sql) - - # create the same project with if not exists doesn't throw an error - self.execute("CREATE PROJECT IF NOT EXISTS another_test_project") - - self.execute("DROP PROJECT another_test_project") - self.execute("CREATE PROJECT ANOTHER_test_project") - self.execute("DROP PROJECT another_TEST_project") - - def test_database_integration(self): - from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError - - sql = """ - CREATE DATABASE test_database - WITH engine = 'mindsdb' - """ - self.execute(sql) - - # create the same database without if not exists throws an error - with pytest.raises(EntityExistsError): - self.execute(sql) - - # create the same database with if not exists doesn't throw an error - self.execute(""" - CREATE DATABASE IF NOT EXISTS test_database - WITH engine = 'mindsdb' - """) - - # create a database with if not exists should indeed create a new database - self.execute(""" - CREATE DATABASE IF NOT EXISTS test_database2 - WITH engine = 'mindsdb' - """) - - # check that the database was indeed created - ret = self.execute(""" - SHOW DATABASES - WHERE name = 'test_database2' - """) - assert len(ret.data) == 1 - - # drop the database - self.execute("DROP DATABASE test_database2") - - # check that the database was indeed dropped - ret = self.execute(""" - SHOW DATABASES - WHERE name = 'test_database2' - """) - assert len(ret.data) == 0 - - # drop again without if exists should throw an error - with pytest.raises(EntityNotExistsError): - self.execute("DROP DATABASE test_database2") - - # drop again with if exists should not throw an error - self.execute("DROP DATABASE IF EXISTS test_database2") - - def test_job(self): - from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError - - # create a simple job - sql = """ - CREATE JOB test_job ( - SELECT 1 - ) - """ - self.execute(sql) - - # create the same job without if not exists throws an error - with pytest.raises(EntityExistsError): - self.execute(sql) - - # create the same job with if not exists doesn't throw an error - self.execute(""" - CREATE JOB IF NOT EXISTS test_job ( - SELECT 1 - ) - """) - - # create a job with if not exists should indeed create a new job - self.execute(""" - CREATE JOB IF NOT EXISTS test_job2 ( - SELECT 1 - ) - """) - - # drop the job - sql = "DROP JOB test_job2" - self.execute(sql) - - # drop again without if exists should throw an error - with pytest.raises(EntityNotExistsError): - self.execute(sql) - - # drop again with if exists should not throw an error - self.execute("DROP JOB IF EXISTS test_job2") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_view(self, mock_handler): - from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError - - df = pd.DataFrame( - [ - {"a": 1, "b": "one"}, - {"a": 2, "b": "two"}, - {"a": 1, "b": "three"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - sql = """ - CREATE VIEW test_view AS ( - SELECT * FROM pg.tasks - ) - """ - self.execute(sql) - - # create the same view without if not exists throws an error - with pytest.raises(EntityExistsError): - self.execute(sql) - - # create the same view with if not exists doesn't throw an error - self.execute(""" - CREATE VIEW IF NOT EXISTS test_view AS ( - SELECT * FROM pg.tasks - ) - """) - - # create a view with if not exists should indeed create a new view - self.execute(""" - CREATE VIEW IF NOT EXISTS test_view2 AS ( - SELECT * FROM pg.tasks - ) - """) - - # check that the view was indeed created - ret = self.execute(""" - SHOW FULL TABLES - WHERE tables_in_mindsdb = 'test_view2' - """) - assert len(ret.data) == 1 - - # drop the view - self.execute("DROP VIEW test_view2") - - # check that the view was indeed dropped - ret = self.execute(""" - SHOW FULL TABLES - WHERE tables_in_mindsdb = 'test_view2' - """) - assert len(ret.data) == 0 - - # drop again without if exists should throw an error - sql = """ - DROP VIEW test_view2 - """ - with pytest.raises(EntityNotExistsError): - self.execute("DROP VIEW test_view2") - - # drop again with if exists should not throw an error - self.execute("DROP VIEW IF EXISTS test_view2") - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_hide_secrets(self, mock_handler): - HIDDEN_PASSWORD = "******" - df = pd.DataFrame( - [ - {"a": 1, "b": "one"}, - {"a": 2, "b": "two"}, - {"a": 1, "b": "three"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - self.execute(""" - create ml_engine ml_test from dummy_ml using api_key = '123456' - """) - - # region Check that every secret is hidden - ret = self.execute(""" - select * from information_schema.ml_engines where name = 'ml_test' - """) - connection_data = json.loads(ret.data.records[0]["CONNECTION_DATA"]) - assert connection_data["api_key"] == HIDDEN_PASSWORD - - ret = self.execute(""" - select * from information_schema.databases where name = 'pg'; - """) - connection_data = json.loads(ret.data.records[0]["CONNECTION_DATA"]) - assert connection_data["password"] == HIDDEN_PASSWORD - - self.execute(""" - CREATE MODEL mindsdb.test_predictor - PREDICT target - USING - engine = 'dummy_ml', - api_key = '654321' - """) - ret = self.execute(""" - show models where name = 'test_predictor'; - """) - training_options = json.loads(ret.data.records[0]["TRAINING_OPTIONS"]) - assert training_options["using"]["api_key"] == HIDDEN_PASSWORD - # endregion - - # region Set 'show secrets' and make sure that every secret is revealed - self.execute(""" - set show_secrets=True; - """) - ret = self.execute(""" - select * from information_schema.ml_engines where name = 'ml_test' - """) - connection_data = json.loads(ret.data.records[0]["CONNECTION_DATA"]) - assert connection_data["api_key"] != HIDDEN_PASSWORD - - ret = self.execute(""" - select * from information_schema.databases where name = 'pg'; - """) - connection_data = json.loads(ret.data.records[0]["CONNECTION_DATA"]) - assert connection_data["password"] != HIDDEN_PASSWORD - - ret = self.execute(""" - show models where name = 'test_predictor'; - """) - training_options = json.loads(ret.data.records[0]["TRAINING_OPTIONS"]) - assert training_options["using"]["api_key"] != HIDDEN_PASSWORD - # endregion diff --git a/tests/unit/executor/test_files.py b/tests/unit/executor/test_files.py deleted file mode 100644 index cdbee61fbdb..00000000000 --- a/tests/unit/executor/test_files.py +++ /dev/null @@ -1,204 +0,0 @@ -import tempfile -import shutil -from pathlib import Path -import os -import sys -import pytest - -import pandas as pd - -from tests.unit.executor_test_base import BaseExecutorDummyML - - -class TestFiles(BaseExecutorDummyML): - def test_create_table(self): - df = pd.DataFrame( - [ - {"a": 6, "c": 1}, - {"a": 7, "c": 2}, - ] - ) - self.set_data("table1", df) - - self.run_sql( - """ - create table files.myfile - select * from dummy_data.table1 - """ - ) - - self.run_sql( - """ - create or replace table files.myfile - select * from dummy_data.table1 - """ - ) - - ret = self.run_sql("select count(*) c from files.myfile") - assert ret["c"][0] == 2 - - self.run_sql( - """ - insert into files.myfile ( - select * from dummy_data.table1 - ) - """ - ) - - ret = self.run_sql("select count(*) c from files.myfile") - assert ret["c"][0] == 4 - - self.run_sql( - """ - insert into files.myfile (a) - values (9) - """ - ) - - ret = self.run_sql("select count(*) c from files.myfile") - assert ret["c"][0] == 5 - - @pytest.mark.skipif( - sys.platform == "win32", - reason="Fixme: Open file handle somewhere makes this fail on Windows.", - ) - def test_multipage(self): - # copy test file because source will be removed after uloading - source_path = Path(__file__).parent / "data" / "test.xlsx" - fd, file_path = tempfile.mkstemp() - os.close(fd) - shutil.copy(source_path, file_path) - - self.file_controller.save_file("test", file_path, source_path.name) - - ret = self.run_sql("select * from files.test") - assert len(ret) == 2 - first, second = ret[ret.columns[0]] - - # first page - ret = self.run_sql(f"select * from files.test.{first}") - assert len(ret.columns) == 4 - - # second page - ret = self.run_sql(f"select * from files.test.{second}") - assert len(ret.columns) == 2 - - -class TestMultiTableFiles(BaseExecutorDummyML): - @pytest.fixture(autouse=True) - def create_tables_and_data(self): - self.run_sql( - """ - CREATE TABLE files.A5 (tab_num INT, shop INT) - """ - ) - self.run_sql( - """ - INSERT INTO files.A5 VALUES - (1, 1), (1, 2), (1, 3), - (2, 1), (2, 2), (2, 3), - (3, 1) - """ - ) - - self.run_sql( - """ - CREATE TABLE files.B5 (shop INT) - """ - ) - self.run_sql( - """ - INSERT INTO files.B5 (shop) VALUES (1), (2), (3) - """ - ) - - self.run_sql( - """ - CREATE TABLE files.A2 ( - tab_num INT, - fio VARCHAR(50), - city VARCHAR(50) - ) - """ - ) - self.run_sql( - """ - INSERT INTO files.A2 VALUES - (1, 'Stark', 'Winterfell'), - (2, 'Baratheon', 'King''s Landing'), - (3, 'Targaryen', 'Dragonstone'), - (4, 'Lannister', 'Casterly Rock') - """ - ) - - yield - - def test_multi_table_relational_division(self): - """Test complex multi-table relational division""" - - result = self.run_sql( - """ - SELECT DISTINCT a1.tab_num - FROM files.A5 a1 - WHERE NOT EXISTS ( - SELECT * - FROM files.B5 b - WHERE NOT EXISTS ( - SELECT * - FROM files.A5 a2 - WHERE a2.tab_num = a1.tab_num AND a2.shop = b.shop - ) - ) - """ - ) - - assert len(result) == 2 - assert sorted(result["tab_num"].tolist()) == [1, 2] - - def test_multi_table_join_with_aliases(self): - """Test JOIN with aliases and database prefixes""" - result = self.run_sql( - """ - SELECT DISTINCT a1.tab_num, a2.fio, a2.city - FROM files.A5 AS a1 - JOIN files.A2 AS a2 ON a1.tab_num = a2.tab_num - WHERE a1.shop = 1 - """ - ) - assert len(result) == 3 - assert sorted(result["tab_num"].tolist()) == [1, 2, 3] - assert sorted(result["fio"].tolist()) == [ - "Baratheon", - "Stark", - "Targaryen", - ] - assert sorted(result["city"].tolist()) == [ - "Dragonstone", - "King's Landing", - "Winterfell", - ] - - def test_multi_table_join_without_aliases(self): - """Test JOIN without aliases and without database prefixes""" - with pytest.raises(Exception) as excinfo: - self.run_sql( - """ - SELECT DISTINCT A5.tab_num - FROM A5 - JOIN A2 ON A5.tab_num = A2.tab_num - WHERE A5.shop = 1 - """ - ) - msg = str(excinfo.value).lower() - assert "table 'a5' not found in database" in msg - - def test_create_table_with_existing_name(self): - """Test creating a table with an existing name without REPLACE""" - with pytest.raises(Exception) as excinfo: - self.run_sql( - """ - CREATE TABLE files.A5 (tab_num INT, shop INT) - """ - ) - msg = str(excinfo.value).lower() - assert "table 'a5' already exists" in msg diff --git a/tests/unit/executor/test_handler_metrics.py b/tests/unit/executor/test_handler_metrics.py deleted file mode 100644 index 40ae9813a39..00000000000 --- a/tests/unit/executor/test_handler_metrics.py +++ /dev/null @@ -1,58 +0,0 @@ -import datetime -import pandas as pd -from tests.unit.executor_test_base import BaseExecutorDummyML - - -class TestHandlerMetrics(BaseExecutorDummyML): - def test_handler_query_time(self): - self.set_data( - "tasks", - pd.DataFrame( - [ - {"a": 1, "b": datetime.datetime(2020, 1, 1)}, - {"a": 2, "b": datetime.datetime(2020, 1, 2)}, - {"a": 1, "b": datetime.datetime(2020, 1, 3)}, - ] - ), - ) - # Create & predict a simple model. - self.run_sql("create database proj") - self.run_sql( - """ - CREATE model proj.task_model - from dummy_data (select * from tasks) - PREDICT a - using engine='dummy_ml', - tag = 'first', - join_learn_process=true - """ - ) - self.wait_predictor("proj", "task_model") - self.run_sql(""" - SELECT m.* - FROM dummy_data.tasks as t - JOIN proj.task_model as m - """) - # Import here so we don't reuse registry across test functions. - from mindsdb.metrics import metrics - - query_time_metric = list(metrics.INTEGRATION_HANDLER_QUERY_TIME.collect())[0] - query_size_metric = list(metrics.INTEGRATION_HANDLER_RESPONSE_SIZE.collect())[0] - assert len(query_time_metric.samples) == 3 - assert len(query_size_metric.samples) == 3 - for sample in query_time_metric.samples: - assert sample.name.startswith("mindsdb_integration_handler_query_seconds") - if sample.name.endswith("count"): - assert sample.value == 1.0 - elif sample.name.endswith("sum"): - assert sample.value > 0.0 - elif sample.name.endswith("created"): - assert sample.value > 0.0 - for sample in query_size_metric.samples: - assert sample.name.startswith("mindsdb_integration_handler_response_size") - if sample.name.endswith("count"): - assert sample.value == 1.0 - elif sample.name.endswith("sum"): - assert sample.value > 0.0 - elif sample.name.endswith("created"): - assert sample.value > 0.0 diff --git a/tests/unit/executor/test_jobs.py b/tests/unit/executor/test_jobs.py deleted file mode 100644 index bc80c9bb13f..00000000000 --- a/tests/unit/executor/test_jobs.py +++ /dev/null @@ -1,247 +0,0 @@ -import datetime as dt -from unittest.mock import patch - -import pytest - -import pandas as pd - -from tests.unit.executor_test_base import BaseExecutorDummyML - - -@pytest.fixture(scope="class") -def scheduler(): - from mindsdb.interfaces.jobs.scheduler import Scheduler - scheduler_ = Scheduler({}) - - yield scheduler_ - - scheduler_.stop_thread() - - -class TestJobs(BaseExecutorDummyML): - - def test_job(self, scheduler): - - df1 = pd.DataFrame([ - {'a': 1, 'c': 1, 'b': dt.datetime(2020, 1, 1)}, - {'a': 2, 'c': 1, 'b': dt.datetime(2020, 1, 2)}, - {'a': 1, 'c': 3, 'b': dt.datetime(2020, 1, 3)}, - {'a': 3, 'c': 2, 'b': dt.datetime(2020, 1, 2)}, - ]) - self.set_data('tbl1', df1) - - self.run_sql('create database proj1') - # create job - self.run_sql('create job j1 (select * from models; select * from models)', database='proj1') - - # check jobs table - ret = self.run_sql('select * from jobs', database='proj1') - assert len(ret) == 1, "should be 1 job" - row = ret.iloc[0] - assert row.NAME == 'j1' - assert row.START_AT is not None, "start date didn't calc" - assert row.NEXT_RUN_AT is not None, "next date didn't calc" - assert row.SCHEDULE_STR is None - - # new project - self.run_sql('create database proj2') - - # create job with start time and schedule - self.run_sql(''' - create job proj2.j2 ( - select * from dummy_data.tbl1 where b>'{{PREVIOUS_START_DATETIME}}' - ) - start now - every hour - ''', database='proj1') - - # check jobs table - ret = self.run_sql('select * from proj2.jobs') - assert len(ret) == 1, "should be 1 job" - row = ret.iloc[0] - assert row.NAME == 'j2' - assert row.SCHEDULE_STR == 'every hour' - - # check global jobs table - ret = self.run_sql('select * from information_schema.jobs') - # all jobs in list - assert len(ret) == 2 - assert set(ret.NAME.unique()) == {'j1', 'j2'} - - # drop first job - self.run_sql('drop job proj1.j1') - - # ------------ executing - scheduler.check_timetable() - - # check query to integration - job = self.db.Jobs.query.filter(self.db.Jobs.name == 'j2').first() - - # check jobs table - ret = self.run_sql('select * from jobs', database='proj2') - # next run is about 60 minutes from previous - minutes = (ret.NEXT_RUN_AT - ret.START_AT)[0].seconds / 60 - assert minutes > 58 and minutes < 62 - - # check history table - ret = self.run_sql('select * from log.jobs_history', database='proj2') - # proj2.j2 was run one time - assert len(ret) == 1 - assert ret.project[0] == 'proj2' and ret.name[0] == 'j2' - - # run once again - scheduler.check_timetable() - - # job wasn't executed - ret = self.run_sql('select * from log.jobs_history', database='proj2') - assert len(ret) == 1 - - # shift 'next run' and run once again - job = self.db.Jobs.query.filter(self.db.Jobs.name == 'j2').first() - job.next_run_at = job.start_at - dt.timedelta(seconds=1) # different time because there is unique key - self.db.session.commit() - - scheduler.check_timetable() - - ret = self.run_sql('select * from log.jobs_history', database='proj2') - assert len(ret) == 2 # was executed - - # check global history table - # ret = self.run_sql('select * from information_schema.jobs_history', database='proj2') - # assert len(ret) == 2 - # assert sorted([x.upper() for x in list(ret.columns)]) == sorted([x.upper() for x in JobsHistoryTable.columns]) - - # there is no 'jobs_history' table in project - with pytest.raises(Exception): - self.run_sql('select * from jobs_history', database='proj2') - - with pytest.raises(Exception): - self.run_sql('select company_id from log.jobs_history', database='proj2') - - def test_inactive_job(self, scheduler): - # create job - self.run_sql('create job j1 (select * from models)') - - # check jobs table - ret = self.run_sql('select * from jobs') - assert len(ret) == 1, "should be 1 job" - - # deactivate - job = self.db.Jobs.query.filter(self.db.Jobs.name == 'j1').first() - job.active = False - self.db.session.commit() - - # run scheduler - scheduler.check_timetable() - - ret = self.run_sql('select * from log.jobs_history') - # no history - assert len(ret) == 0 - - def test_conditional_job(self, scheduler): - df = pd.DataFrame([ - {'a': 1, 'b': '2'}, - ]) - self.save_file('tasks', df) - - # create job - job_str = ''' - create job j1 ( - CREATE model pred - PREDICT p - using engine='dummy_ml', - join_learn_process=true - ) - if ( - select * from files.tasks where a={var} - ) - ''' - - self.run_sql(job_str.format(var=2)) - - # check jobs table - ret = self.run_sql('select * from jobs') - assert len(ret) == 1, "should be 1 job" - - # run scheduler - scheduler.check_timetable() - - # check no models created - ret = self.run_sql('select * from models where name="pred"') - assert len(ret) == 0 - - # --- attempt2 --- - - self.run_sql(job_str.format(var=1)) - - # check jobs table, still one job - previous was one time job - ret = self.run_sql('select * from jobs') - assert len(ret) == 1, "should be 1 job" - - # run scheduler - scheduler.check_timetable() - - # check 1 model - ret = self.run_sql('select * from models where name="pred"') - assert len(ret) == 1 - - @patch('mindsdb.integrations.handlers.postgres_handler.Handler') - def test_last_in_job(self, data_handler, scheduler): - df = pd.DataFrame([ - {'a': 1, 'b': 'a'}, - {'a': 2, 'b': 'b'}, - ]) - self.set_handler(data_handler, name='pg', tables={'tasks': df}) - self.save_file('tasks', df) - - # -- create model -- - self.run_sql( - ''' - CREATE model task_model - from files (select * from tasks) - PREDICT a - using engine='dummy_ml' - ''' - ) - - # create job to update table - self.run_sql(''' - create job j1 ( - create table files.t1 ( - SELECT m.* - FROM pg.tasks as t - JOIN task_model as m - where t.a > last and t.b='b' - ) - ) - start now - every hour - ''') - - scheduler.check_timetable() - - # table size didn't change - calls = data_handler().query.call_args_list - sql = calls[0][0][0].to_string() - # getting current last value - assert 'ORDER BY a DESC LIMIT 1' in sql - - # insert new record to source db - - df.loc[len(df.index)] = [6, 'a'] - - data_handler.reset_mock() - # shift 'next run' and run once again - job = self.db.Jobs.query.filter(self.db.Jobs.name == 'j1').first() - job.next_run_at = job.start_at - dt.timedelta(seconds=1) # different time because there is unique key - self.db.session.commit() - - scheduler.check_timetable() - - calls = data_handler().query.call_args_list - - assert len(calls) == 1 - sql = calls[0][0][0].to_string() - # getting next value, greater than max previous - assert 'a > 2' in sql - assert "b = 'b'" in sql diff --git a/tests/unit/executor/test_knowledge_base.py b/tests/unit/executor/test_knowledge_base.py deleted file mode 100644 index 7646caca9ae..00000000000 --- a/tests/unit/executor/test_knowledge_base.py +++ /dev/null @@ -1,1433 +0,0 @@ -import time -import json -import tempfile -import datetime as dt - -from unittest.mock import patch, MagicMock -import threading -from contextlib import contextmanager - -import pandas as pd -import pytest - -from tests.unit.executor_test_base import BaseExecutorDummyML -from mindsdb.integrations.utilities.rag.rerankers.base_reranker import ( - ListwiseLLMReranker, -) - - -@contextmanager -def task_monitor(): - from mindsdb.interfaces.tasks.task_monitor import TaskMonitor - - monitor = TaskMonitor() - - stop_event = threading.Event() - worker = threading.Thread(target=monitor.start, daemon=True, args=(stop_event,)) - worker.start() - - yield worker - - stop_event.set() - worker.join() - - -def dummy_embeddings(string, dimension=None, base=None): - # Imitates embedding generation: create vectors which are similar for similar words in inputs - if dimension is None: - dimension = 25**2 - embeds = [0] * dimension - if base is None: - base = 25 - - string = string.lower().replace(",", " ").replace(".", " ") - for word in string.split(): - # encode letters to numbers - values = [] - for letter in word: - val = ord(letter) - 97 - val = min(max(val, 0), 122) - values.append(val) - - # first two values are position in vector - pos = values[0] * base + values[1] - - # the next 4: are value of the vector - values = values[2:6] - emb = sum([val / base ** (i + 1) for i, val in enumerate(values)]) - - embeds[pos] += emb - - return embeds - - -def set_embedding(mock_embedding, dimension=None, base=None): - def resp_f(input, *args, **kwargs): - return [dummy_embeddings(s, dimension, base) for s in input] - - mock_embedding().embeddings.side_effect = resp_f - - -class BaseTestKB(BaseExecutorDummyML): - def setup_method(self): - super().setup_method() - self.storages = [] - - def teardown_method(self): - for db_name in self.storages: - self._drop_storage_db(db_name) - super().teardown_method() - - def _create_kb( - self, - name, - embedding_model=None, - reranking_model=None, - content_columns=None, - id_column=None, - metadata_columns=None, - storage=None, - params=None, - ): - self.run_sql(f"drop knowledge base if exists {name}") - - if embedding_model is None: - embedding_model = { - "provider": "openai", - "model_name": "dummy_model", - "api_key": "dummy_key", - } - - kb_params = { - "embedding_model": embedding_model, - } - if reranking_model is not None: - kb_params["reranking_model"] = reranking_model - if content_columns is not None: - kb_params["content_columns"] = content_columns - if id_column is not None: - kb_params["id_column"] = id_column - if metadata_columns is not None: - kb_params["metadata_columns"] = metadata_columns - if params is not None: - kb_params.update(params) - - if storage is None: - storage = self._get_storage_table(name) - kb_params["storage"] = storage - - param_str = "" - if kb_params: - param_items = [] - for k, v in kb_params.items(): - param_items.append(f"{k}={json.dumps(v)}") - param_str = ",".join(param_items) - - self.run_sql( - f""" - create knowledge base {name} - using - {param_str} - """ - ) - - def _get_storage_table(self, kb_name): - db_name = f"db_{kb_name}" - - self._drop_storage_db(db_name) - - self.run_sql(f""" - create database {db_name} - with - engine='duckdb_faiss' - """) - self.storages.append(db_name) - - return f"{db_name}.default_collection" - - def _drop_storage_db(self, db_name): - try: - self.run_sql(f"drop table {db_name}.default_collection") - except Exception: - ... - - try: - self.run_sql(f"drop database {db_name}") - except Exception: - ... - - def _get_ral_table(self): - data = [ - ["1000", "Green beige", "Beige verdastro"], - ["1004", "Golden yellow", "Giallo oro"], - ["9016", "Traffic white", "Bianco traffico"], - ["9023", "Pearl dark grey", "Grigio scuro perlato"], - ] - - return pd.DataFrame(data, columns=["ral", "english", "italian"]) - - -class TestKBNOAutoBatch(BaseTestKB): - def setup_method(self): - super().setup_method() - from mindsdb.utilities.config import config - - config["knowledge_bases"]["disable_autobatch"] = True - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_kb(self, mock_embedding): - set_embedding(mock_embedding) - - self._create_kb("kb_review") - - self.run_sql("insert into kb_review (content) values ('review')") - - # selectable - ret = self.run_sql("select * from kb_review") - assert len(ret) == 1 - - # show tables in default vectordb - ret = self.run_sql("show knowledge bases") - - db_name = ret.STORAGE[0].split(".")[0] - ret = self.run_sql(f"show tables from {db_name}") - # only one default collection there - assert len(ret) == 1 - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_kb_metadata(self, mock_embedding): - set_embedding(mock_embedding) - - record = { - "review": "all is good, haven't used yet", - "url": "https://laptops.com/123", - "product": "probook", - "specs": "Core i5; 8Gb; 1920х1080", - "id": 123, - } - df = pd.DataFrame([record]) - self.save_file("reviews", df) - - # --- case 1: kb with default columns settings --- - self._create_kb("kb_review") - - self.run_sql( - """ - insert into kb_review - select review as content, id from files.reviews - """ - ) - - ret = self.run_sql("select * from kb_review where _original_doc_id = 123") - assert len(ret) == 1 - assert ret["chunk_content"][0] == record["review"] - - # delete by metadata - self.run_sql("delete from kb_review where _original_doc_id = 123") - ret = self.run_sql("select * from kb_review where _original_doc_id = 123") - assert len(ret) == 0 - - # insert without id - self.run_sql( - """ - insert into kb_review - select review as content, product, url from files.reviews - """ - ) - - # id column wasn't used - ret = self.run_sql("select * from kb_review where _original_doc_id = 123") - assert len(ret) == 0 - - # product/url in metadata - ret = self.run_sql( - "select metadata->>'product' as product, metadata->>'url' as url from kb_review where product = 'probook'" - ) - assert len(ret) == 1 - assert ret["product"][0] == record["product"] - assert ret["url"][0] == record["url"] - - # using json operator in filter - ret = self.run_sql( - "select metadata->>'product' as product, metadata->>'url' as url " - "from kb_review where metadata->>'product' = 'probook'" - ) - assert len(ret) == 1 - assert ret["product"][0] == record["product"] - assert ret["url"][0] == record["url"] - - # --- case 2: kb with defined columns --- - self._create_kb( - "kb_review", - content_columns=["review", "product"], - id_column="url", - metadata_columns=["specs", "id"], - ) - - self.run_sql( - """ - insert into kb_review - select * from files.reviews - """ - ) - - ret = self.run_sql( - "select chunk_content, metadata->>'specs' as specs, metadata->>'id' as id from kb_review" - ) # url in id - - assert len(ret) == 2 # two columns are split in two records - - # review/product in content - content = list(ret["chunk_content"]) - assert record["review"] in content - assert record["product"] in content - - # specs/id in metadata - assert ret["specs"][0] == record["specs"] - assert str(ret["id"][0]) == str(record["id"]) - - # --- case 3: content is defined, id is id, the rest goes to metadata --- - self._create_kb("kb_review", content_columns=["review"]) - - self.run_sql( - """ - insert into kb_review - select * from files.reviews - """ - ) - - # metadata as columns - ret = self.run_sql( - """ - select chunk_content, specs, product, url - from kb_review - where _original_doc_id = 123 -- id is id - """ - ) - assert len(ret) == 1 - # review in content - assert ret["chunk_content"][0] == record["review"] - - # specs/url/product in metadata - assert ret["specs"][0] == record["specs"] - assert ret["url"][0] == record["url"] - assert ret["product"][0] == record["product"] - - def test_listwise_reranker_parses_valid_json(self): - reranker = ListwiseLLMReranker(api_key="-", model="gpt-4o") - - # Fake async LLM response - class _Msg: - def __init__(self, content): - self.content = content - - class _Choice: - def __init__(self, content): - self.message = _Msg(content) - - async def _fake_call_llm(messages): - return '{"ranking": [{"doc_index": 2, "score": 0.9}, {"doc_index": 1, "score": 0.6}, {"doc_index": 3, "score": 0.1}]}' - - # Bind the async method to this reranker instance - reranker._call_llm = _fake_call_llm # type: ignore - - docs = ["A", "B", "C"] - scores = reranker.get_scores("q", docs) - - assert len(scores) == 3 - # doc_index 2 (B) highest, then A, then C - assert scores[1] > scores[0] > scores[2] - # scores are clamped to [0,1] - assert all(0.0 <= s <= 1.0 for s in scores) - - def test_listwise_reranker_handles_code_fence_and_missing_docs(self): - reranker = ListwiseLLMReranker(api_key="-", model="gpt-4o") - - class _Msg: - def __init__(self, content): - self.content = content - - class _Choice: - def __init__(self, content): - self.message = _Msg(content) - - async def _fake_call_llm(messages): - # Returns code-fenced JSON, includes only two entries, one without score - return """```json - {"ranking": [1, {"doc_index": 3, "score": 0.8}]} - ```""" - - reranker._call_llm = _fake_call_llm # type: ignore - - docs = ["D0", "D1", "D2", "D3"] - scores = reranker.get_scores("q", docs) - - assert len(scores) == 4 - # All scores within [0,1] - assert all(0.0 <= s <= 1.0 for s in scores) - # At least doc_index 3 (zero-based 2) should have a relatively high score - assert scores[2] >= 0.5 - - def test_listwise_reranker_json_error_fallback(self): - reranker = ListwiseLLMReranker(api_key="-", model="gpt-4o") - - class _Msg: - def __init__(self, content): - self.content = content - - class _Choice: - def __init__(self, content): - self.message = _Msg(content) - - async def _fake_call_llm(messages): - # Invalid JSON forces fallback - return "not-json" - - reranker._call_llm = _fake_call_llm # type: ignore - - docs = ["X", "Y", "Z"] - scores = reranker.get_scores("q", docs) - - assert len(scores) == 3 - # Fallback pattern should be descending - assert scores[0] > scores[1] > scores[2] - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_join_kb_table(self, mock_embedding): - set_embedding(mock_embedding) - - df = self._get_ral_table() - self.save_file("ral", df) - - self._create_kb("kb_ral") - - self.run_sql( - """ - insert into kb_ral - select ral id, english content from files.ral - """ - ) - - ret = self.run_sql( - """ - select t.italian, k.id, t.ral from kb_ral k - join files.ral t on t.ral = k.id - where k.content = 'white' - limit 2 - """ - ) - - assert len(ret) == 2 - # values are matched - diff = ret[ret["ral"] != ret["id"]] - assert len(diff) == 0 - - # ================= operators ================= - ret = self.run_sql( - """ - select * from kb_ral - where id = '1000' - """ - ) - assert len(ret) == 1 - assert ret["id"][0] == "1000" - - ret = self.run_sql( - """ - select * from kb_ral - where id != '1000' - """ - ) - assert len(ret) == 3 - assert "1000" not in ret["id"] - - ret = self.run_sql( - """ - select * from kb_ral - where id in ('1000', '1004') - """ - ) - assert len(ret) == 2 - assert set(ret["id"]) == {"1000", "1004"} - - ret = self.run_sql( - """ - select * from kb_ral - where id not in ('1000', '1004') - """ - ) - assert len(ret) == 2 - assert set(ret["id"]) == {"9016", "9023"} - - @pytest.mark.slow - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_kb_partitions(self, mock_handler, mock_embedding): - set_embedding(mock_embedding) - - df = self._get_ral_table() - - df = pd.concat([df] * 30) - # unique ids - df["id"] = list(map(str, range(len(df)))) - - self.set_handler(mock_handler, name="pg", tables={"ral": df}) - - def check_partition(insert_sql): - self._create_kb("kb_part", content_columns=["english"]) - - # load kb - ret = self.run_sql(insert_sql) - # inserts returns query - query_id = ret["ID"][0] - - # wait loaded - for i in range(1000): - time.sleep(0.2) - ret = self.run_sql(f"select * from information_schema.queries where id = {query_id}") - if ret["ERROR"][0] is not None: - raise RuntimeError(ret["ERROR"][0]) - if ret["FINISHED_AT"][0] is not None: - break - - # check content - ret = self.run_sql("select * from kb_part") - assert len(ret) == len(df) - - # check queries table - ret = self.run_sql(f"select * from information_schema.queries where id = {query_id}") - assert len(ret) == 1 - rec = ret.iloc[0] - assert "kb_part" in ret["SQL"][0] - assert ret["ERROR"][0] is None - assert ret["FINISHED_AT"][0] is not None - - # test describe - ret = self.run_sql("describe knowledge base kb_part") - assert len(ret) == 1 - rec_d = ret.iloc[0] - assert rec_d["PROCESSED_ROWS"] == rec["PROCESSED_ROWS"] - assert rec_d["INSERT_STARTED_AT"] == rec["STARTED_AT"] - assert rec_d["INSERT_FINISHED_AT"] == rec["FINISHED_AT"] - assert rec_d["QUERY_ID"] == query_id - - # del query - self.run_sql(f"SELECT query_cancel({rec['ID']})") - ret = self.run_sql("select * from information_schema.queries") - assert len(ret) == 0 - - ret = self.run_sql("describe knowledge base kb_part") - assert len(ret) == 1 - rec_d = ret.iloc[0] - assert rec_d["PROCESSED_ROWS"] is None - assert rec_d["INSERT_STARTED_AT"] is None - assert rec_d["INSERT_FINISHED_AT"] is None - assert rec_d["QUERY_ID"] is None - - with task_monitor(): - - def stream_f(*args, **kwargs): - chunk_size = int(len(df) / 10) + 1 - for i in range(10): - yield df[chunk_size * i : chunk_size * (i + 1) :] - - # --- stream mode --- - # Mock native_query to return TableResponse with generator - mock_handler().stream_response = True - - def native_query_with_generator(*args, **kwargs): - from mindsdb.integrations.libs.response import TableResponse - - return TableResponse(data_generator=stream_f()) - - mock_handler().native_query.side_effect = native_query_with_generator - - # test iterate - check_partition( - """ - insert into kb_part SELECT id, english FROM pg.ral - using batch_size=20, track_column=id - """ - ) - - # test iterate (mix case of track_column) - check_partition( - """ - insert into kb_part SELECT id, english FROM pg.ral - using batch_size=20, track_column=Id - """ - ) - - # switched off for faiss - # # test threads - # check_partition( - # """ - # insert into kb_part SELECT id, english FROM pg.ral - # using batch_size=20, track_column=id, threads = 3 - # """ - # ) - - # without track column - check_partition( - """ - insert into kb_part SELECT id, english FROM pg.ral - using batch_size=20 - """ - ) - - # --- general mode --- - # Mock native_query to return TableResponse with full data - mock_handler().stream_response = False - - def native_query_without_generator(*args, **kwargs): - from mindsdb.integrations.libs.response import TableResponse - - return TableResponse(data=df) - - mock_handler().native_query.side_effect = native_query_without_generator - - # test iterate - check_partition( - """ - insert into kb_part SELECT id, english FROM pg.ral - using batch_size=20, track_column=id - """ - ) - - # switched off for faiss - # # test threads - # check_partition( - # """ - # insert into kb_part SELECT id, english FROM pg.ral - # using batch_size=20, track_column=id, threads = 3 - # """ - # ) - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_kb_algebra(self, mock_embedding): - set_embedding(mock_embedding) - - lines, i = [], 0 - for color in ("white", "red", "green"): - for size in ("big", "middle", "small"): - for shape in ("square", "triangle", "circle"): - i += 1 - lines.append([i, i, f"{color} {size} {shape}", color, size, shape, dt.date(2000, 1, i)]) - df = pd.DataFrame(lines, columns=["id", "num", "content", "color", "size", "shape", "valid_date"]) - - self.save_file("items", df) - - self._create_kb("kb_alg", embedding_model={"provider": "bedrock", "model_name": "titan"}) - - self.run_sql( - """ - insert into kb_alg - select * from files.items - """ - ) - - # --- search value excluding others - - ret = self.run_sql( - """ - select * from kb_alg where - content = 'green' - and content not IN ('square', 'triangle') - and content is not null - limit 3 - """ - ) - - # check 3 most relative records - for content in ret["chunk_content"]: - assert "green" in content - assert "square" not in content - assert "triangle" not in content - - # --- search value excluding other and metadata - - ret = self.run_sql( - """ - select * from kb_alg where - content = 'green' - and content != 'square' - and shape != 'triangle' - limit 3 - """ - ) - - for content in ret["chunk_content"]: - assert "green" in content - assert "square" not in content - assert "triangle" not in content - - # -- searching value in list with excluding - - ret = self.run_sql( - """ - select * from kb_alg where - content in ('green', 'white') - and content not like 'green' - limit 3 - """ - ) - for content in ret["chunk_content"]: - assert "white" in content - - # -- using OR - - ret = self.run_sql( - """ - select * from kb_alg where - (content like 'green' and size='big') - or (content like 'white' and size='small') - or (content is null) - limit 3 - """ - ) - for content in ret["chunk_content"]: - if "green" in content: - assert "big" in content - else: - assert "small" in content - - # -- using between and less than - - ret = self.run_sql( - """ - select * from kb_alg where - content like 'white' and num between 3 and 6 and num < 5 - limit 3 - """ - ) - assert len(ret) == 2 - - for _, item in ret.iterrows(): - assert "white" in item["chunk_content"] - assert item["metadata"]["num"] in (3, 4) - - # -- chunk_content and '%' - ret = self.run_sql( - """ - select * from kb_alg where - (chunk_content like '%green%' and size='big') - or (chunk_content like '%white%' and size='small') - or (chunk_content is null) - limit 3 - """ - ) - for content in ret["chunk_content"]: - if "green" in content: - assert "big" in content - else: - assert "small" in content - - # -- metadata: like, not like - for query in ("trian%", "%riangl%", "%angle"): - ret = self.run_sql(f"select * from kb_alg where shape like '{query}'") - - # only triangle - assert set(ret["shape"]) == {"triangle"} - - # -- metadata: '>=', '>', '<=', '<' - - ret = self.run_sql("select * from kb_alg where color > 'red'") - # only white - assert set(ret["color"]) == {"white"} - - ret = self.run_sql("select * from kb_alg where color < 'red'") - # only green - assert set(ret["color"]) == {"green"} - - ret = self.run_sql("select * from kb_alg where color <= 'red' and color > 'green'") - # only red - assert set(ret["color"]) == {"red"} - - # filter by int - ret = self.run_sql("select * from kb_alg where num >= 10") - assert ret["num"].min() == 10 - - # filter by date - ret = self.run_sql("select * from kb_alg where valid_date >= '2000-01-15'") - assert ret["valid_date"].min() > "2000-01-14" and ret["valid_date"].min() < "2000-01-16" - - ret = self.run_sql("select * from kb_alg where valid_date < '2000-01-15'") - assert ret["valid_date"].max() > "2000-01-13" and ret["valid_date"].min() < "2000-01-15" - - # -- filter by id and content - ret = self.run_sql("select * from kb_alg where content = 'green' and id < 22") - assert ret["color"][0] == "green" - assert ret["id"].max() < 22 - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_select_allowed_columns(self, mock_embedding): - set_embedding(mock_embedding) - - # -- no metadata are specified, generated from inserts -- - self._create_kb("kb1") - - self.run_sql("insert into kb1 (id, content, col1) values (1, 'cont1', 'val1')") - self.run_sql("insert into kb1 (id, content, col2) values (2, 'cont2', 'val2')") - - # existed value - ret = self.run_sql("select * from kb1 where col1='val1'") - assert len(ret) == 1 and ret["chunk_content"][0] == "cont1" - - # not existed value - ret = self.run_sql("select * from kb1 where col1='not exist'") - assert len(ret) == 0 - - # not existed column - with pytest.raises(ValueError): - self.run_sql("select * from kb1 where col3='val2'") - - # -- metadata are specified -- - self._create_kb( - "kb2", - metadata_columns=["col1", "col2", "col3"], - ) - - self.run_sql("insert into kb2 (id, content, col1) values (1, 'cont1', 'val1')") - self.run_sql("insert into kb2 (id, content, col2) values (2, 'cont2', 'val2')") - - # existed value - ret = self.run_sql("select * from kb2 where col1='val1'") - assert len(ret) == 1 and ret["chunk_content"][0] == "cont1" - - # not existed value - ret = self.run_sql("select * from kb2 where col3='cont1'") - assert len(ret) == 0 - - # not existed column - with pytest.raises(ValueError): - self.run_sql("select * from kb2 where cont10='val2'") - - @patch("mindsdb.interfaces.knowledge_base.llm_client.OpenAI") - @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_evaluate(self, mock_embedding, mock_get_scores, mock_openai): - set_embedding(mock_embedding) - - question, answer = "2+2", "4" - agent_response = f""" - {{"query": "{question}", "reference_answer": "{answer}"}} - """ - mock_completion = MagicMock() - mock_completion.choices = [MagicMock()] - mock_completion.choices[0].message.content = agent_response - mock_openai().chat.completions.create.return_value = mock_completion - - # reranking result - mock_get_scores.side_effect = lambda query, docs: [0.8 for _ in docs] - - df = self._get_ral_table() - df = df.rename(columns={"english": "content", "ral": "id"}) - self.save_file("ral", df) - - self._create_kb( - "kb1", - reranking_model={ - "provider": "openai", - "model_name": "gpt-3", - "api_key": "-", - }, - ) - self.run_sql("insert into kb1 SELECT id, content FROM files.ral") - - # --- case 1: use table as source, reranker llm, no evaluate - - ret = self.run_sql( - """ - Evaluate knowledge base kb1 - using - test_table = files.eval_test, - generate_data = { - 'from_sql': 'select content, id from files.ral', - 'count': 3 - }, - evaluate=false - """ - ) - - # reranker model is used - assert mock_openai().chat.completions.create.call_args_list[0][1]["model"] == "gpt-3" - - # no response - assert len(ret) == 0 - - # check test data - df_test = self.run_sql("select * from files.eval_test") - assert len(df_test) == 3 - assert df_test["question"][0] == question - assert df_test["answer"][0] == answer - - # --- case 2: use kb as source, custom llm, evaluate - mock_openai.reset_mock() - self.run_sql("drop table files.eval_test") - - ret = self.run_sql( - """ - Evaluate knowledge base kb1 - using - test_table = files.eval_test, - generate_data = true, - llm={'provider': 'openai', 'api_key':'-', 'model_name':'gpt-4'}, - save_to = files.eval_res - """ - ) - - # custom model is used - assert mock_openai().chat.completions.create.call_args_list[0][1]["model"] == "gpt-4" - - # eval resul in response - assert len(ret) == 1 - - # check test data - df_test = self.run_sql("select * from files.eval_test") - assert len(df_test) > 0 - assert df_test["question"][0] == question - assert df_test["answer"][0] == answer - - # check result - df_res = self.run_sql("select * from files.eval_res") - assert len(df_res) == 1 - assert df_res["total"][0] == len(df_test) - # compare with eval response - assert df_res["total"][0] == ret["total"][0] - assert df_res["total_found"][0] == ret["total_found"][0] - - # --- case 3: evaluate without generation and saving - - ret = self.run_sql( - """ - Evaluate knowledge base kb1 - using - test_table = files.eval_test - """ - ) - - # eval resul in response - assert len(ret) == 1 - # compare with table - assert df_res["total"][0] == ret["total"][0] - assert df_res["total_found"][0] == ret["total_found"][0] - - # --- test reranking disabled --- - mock_get_scores.reset_mock() - df = self.run_sql("select * from kb1 where content='test'") - mock_get_scores.assert_called_once() - assert len(df) > 0 - - mock_get_scores.reset_mock() - df = self.run_sql("select * from kb1 where content='test' and reranking =false") - mock_get_scores.assert_not_called() - assert len(df) > 0 - - @patch("mindsdb.utilities.config.Config.get") - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - def test_save_default_params(self, mock_get_scores, mock_embedding, mock_config_get): - # reranking result - mock_get_scores.side_effect = lambda query, docs: [0.8 for _ in docs] - - set_embedding(mock_embedding) - - def config_get_side_effect(key, default=None): - if key == "default_embedding_model": - return { - "provider": "bedrock", - "model_name": "dummy_model", - "api_key": "dummy_key", - } - if key == "default_reranking_model": - return { - "provider": "openai", - "model_name": "openai_model", - "api_key": "openai_key", - } - return default - - mock_config_get.side_effect = config_get_side_effect - - self._create_kb("kb1") - - ret = self.run_sql("describe knowledge base kb1") - - # default model was saved - assert "dummy_model" in ret["EMBEDDING_MODEL"][0] - assert "openai_model" in ret["RERANKING_MODEL"][0] - - # disable default reranking - self._create_kb("kb2", params={"reranking_model": False}) - - ret = self.run_sql("describe knowledge base kb2") - - assert "openai_model" not in ret["RERANKING_MODEL"][0] - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_relevance_filtering_gt_operator(self, mock_embedding): - """Test relevance filtering with GREATER_THAN operator""" - set_embedding(mock_embedding) - - test_data = [ - {"id": "1", "content": "This is about machine learning and AI"}, - {"id": "2", "content": "This is about cooking recipes"}, - { - "id": "3", - "content": "This is about artificial intelligence and neural networks", - }, - {"id": "4", "content": "This is about gardening tips"}, - ] - df = pd.DataFrame(test_data) - self.save_file("test_docs", df) - self._create_kb("kb_relevance_test") - self.run_sql( - """ - insert into kb_relevance_test - select id, content from files.test_docs - """ - ) - - ret = self.run_sql( - """ - select * from kb_relevance_test - where content = 'machine learning' - and relevance > 0.5 - """ - ) - assert isinstance(ret, pd.DataFrame) - - @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_alter_kb(self, mock_embedding, mock_get_scores): - set_embedding(mock_embedding) - - self._create_kb( - "kb1", - embedding_model={ - "provider": "bedrock", - "model_name": "dummy_model", - "api_key": "embed-key-1", - }, - reranking_model={ - "provider": "openai", - "model_name": "gpt-3", - "api_key": "rerank-key-1", - }, - ) - - # update KB - self.run_sql( - """ - ALTER KNOWLEDGE BASE kb1 - USING - reranking_model={'api_key': 'rerank-key-2'}, - embedding_model={'api_key': 'embed-key-2'}, - id_column='my_id', - content_columns=['my_content'], - metadata_columns=['my_meta'] - """ - ) - - # check updated values in database - kb = self.db.KnowledgeBase.query.filter_by(name="kb1").first() - assert kb.params["id_column"] == "my_id" - assert kb.params["content_columns"] == ["my_content"] - assert kb.params["metadata_columns"] == ["my_meta"] - - assert kb.params["reranking_model"]["model_name"] == "gpt-3" - assert kb.params["reranking_model"]["api_key"] == "rerank-key-2" - - assert kb.params["embedding_model"]["api_key"] == "embed-key-2" - - # update embedding fails - with pytest.raises(ValueError): - self.run_sql("ALTER KNOWLEDGE BASE kb1 USING embedding_model={'model_name': 'my_model'}") - - with pytest.raises(ValueError): - self.run_sql("ALTER KNOWLEDGE BASE kb1 USING embedding_model={'provider': 'ollama'}") - - # different provider: params are replaced - self.run_sql("ALTER KNOWLEDGE BASE kb1 USING reranking_model={'provider': 'ollama', 'model_name': 'mistral'}") - kb = self.db.KnowledgeBase.query.filter_by(name="kb1").first() - - assert kb.params["reranking_model"]["provider"] == "ollama" - assert "api_key" not in kb.params["reranking_model"] - - # disable reranking model and ensure config is cleared - self.run_sql("ALTER KNOWLEDGE BASE kb1 USING reranking_model = false") - kb = self.db.KnowledgeBase.query.filter_by(name="kb1").first() - assert kb.params["reranking_model"] == {} - - @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - @patch("mindsdb.interfaces.knowledge_base.llm_client.OpenAI") - def test_ollama(self, mock_openai, mock_get_scores): - mock_emb = MagicMock(data=[MagicMock(embedding=[0.1] * 10)]) - mock_openai().embeddings.create.return_value = mock_emb - - # reranking result - mock_get_scores.side_effect = lambda query, docs: [0.8 for _ in docs] - - self._create_kb( - "kb1", - reranking_model={"provider": "ollama", "model_name": "mistral", "base_url": "http://localhost:11434/v1"}, - embedding_model={"provider": "ollama", "model_name": "nomic", "base_url": "http://localhost:11434/v1"}, - ) - - ret = self.run_sql("describe knowledge base kb1") - - assert "api_key" not in ret["EMBEDDING_MODEL"][0] - assert "api_key" not in ret["RERANKING_MODEL"][0] - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_kb_uppercase_source_columns(self, mock_embedding): - set_embedding(mock_embedding) - - df = pd.DataFrame( - [ - [1, 'Laptop Pro 15"', "High-performance laptop with 16GB RAM and 512GB SSD", "Electronics", 1299.99], - [2, "Table", "Dining table, 144 cm", "Home", 100], - ], - columns=["Product_ID", "Product_Name", "Description", "Category", "Price"], - ) - - self.save_file("oracle_products", df) - - # -- mixed case source columns - self._create_kb( - "kb_oracle_mixedcase", - content_columns=["Product_Name", "Description"], - id_column="Product_ID", - metadata_columns=["Category", "Price"], - ) - self.run_sql( - """ - insert into kb_oracle_mixedcase - select * from files.oracle_products - """ - ) - ret = self.run_sql("select `Category` from kb_oracle_mixedcase where `Category` = 'Home'") - assert len(ret) == 2 - assert ret["Category"][0] == "Home" - - # -- uppercase source columns - df.columns = df.columns.str.upper() - self.run_sql("drop table files.oracle_products") - self.save_file("oracle_products", df) - - self._create_kb( - "kb_oracle_uppercase", - content_columns=["PRODUCT_NAME", "DESCRIPTION"], - id_column="PRODUCT_ID", - metadata_columns=["CATEGORY", "PRICE"], - ) - self.run_sql( - """ - insert into kb_oracle_uppercase - select * from files.oracle_products - """ - ) - - ret = self.run_sql("select * from kb_oracle_uppercase where `CATEGORY` = 'Electronics'") - assert len(ret) == 2 - assert ret["CATEGORY"][0] == "Electronics" - - # -- lowercase source columns - df.columns = df.columns.str.lower() - self.run_sql("drop table files.oracle_products") - self.save_file("oracle_products", df) - - self._create_kb( - "kb_oracle_lowercase", - content_columns=["product_name", "description"], - id_column="product_id", - metadata_columns=["category", "price"], - ) - self.run_sql( - """ - insert into kb_oracle_lowercase - select * from files.oracle_products - """ - ) - ret = self.run_sql("select category from kb_oracle_lowercase where category = 'Home'") - assert len(ret) == 2 - assert ret["category"][0] == "Home" - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_dimension_mismatch(self, mock_embedding): - temp_dir = tempfile.mkdtemp() - - self.run_sql(f""" - create database my_faiss - with - engine='duckdb_faiss', - PARAMETERS = {{ - 'persist_directory': '{temp_dir}' - }} - """) - - set_embedding(mock_embedding, dimension=1000) - self._create_kb("kb1", storage="my_faiss.table1") - - self.run_sql("insert into kb1 (content) values ('review')") - - # change dimension - set_embedding(mock_embedding, dimension=1500) - - with pytest.raises(ValueError): - self._create_kb("kb2", storage="my_faiss.table1") - - self.run_sql("drop knowledge base kb1") - self.run_sql("drop table my_faiss.table1") - self.run_sql("drop database my_faiss") - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_duplicated_ids(self, mock_embedding): - set_embedding(mock_embedding) - - self._create_kb("kb1") - - # insert bug content - self.run_sql(f"insert into kb1 (id, content) values (1, '{'my content' * 1000}')") - - # insert second id - self.run_sql("insert into kb1 (id, content) values (2, 'content2')") - - # first was chunked - ret = self.run_sql("select * from kb1 where id = 1") - assert len(ret) > 1 - - # second wasn't - ret = self.run_sql("select * from kb1 where id = 2") - assert len(ret) == 1 - - # insert short string - self.run_sql("insert into kb1 (id, content) values (1, 'content')") - - # chunks were removed - ret = self.run_sql("select * from kb1 where id = 1") - assert len(ret) == 1 - assert ret["chunk_content"][0] == "content" - - # second id wasn't removed - ret = self.run_sql("select * from kb1 where id = 2") - assert len(ret) == 1 - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_update(self, mock_embedding): - set_embedding(mock_embedding) - - self._create_kb("kb1") - - self.run_sql("insert into kb1 (id, content) values (1, 'cat')") - - ret = self.run_sql("select * from kb1 where id = 1") - assert len(ret) == 1 - chunk_id = ret["chunk_id"][0] - - # update - self.run_sql(f"update kb1 set content = 'dog' where chunk_id = '{chunk_id}'") - # check - ret = self.run_sql("select * from kb1 where id = 1") - assert len(ret) == 1 - assert ret["chunk_content"][0] == "dog" - - @patch("mindsdb.integrations.utilities.rag.rerankers.base_reranker.BaseLLMReranker.get_scores") - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_reranking(self, mock_embedding, mock_get_scores): - set_embedding(mock_embedding) - - self._create_kb( - "kb_ral", - content_columns=["english"], - reranking_model={ - "provider": "openai", - "model_name": "gpt-3", - "api_key": "embed-key-1", - }, - ) - - df = self._get_ral_table() - self.save_file("ral", df) - - self.run_sql( - """ - insert into kb_ral - select * from files.ral - """ - ) - - # rank from greater to lower - mock_get_scores.side_effect = lambda query, docs: [1 - i / 4 for i in range(len(docs))] - ret = self.run_sql("select * from kb_ral where content='white'") - assert "white" in ret["chunk_content"].iloc[0] - - # reverse rank: from lower to greater. the most semantic result have to be moved back - mock_get_scores.side_effect = lambda query, docs: [i / 4 for i in range(len(docs))] - ret = self.run_sql("select * from kb_ral where content='white'") - assert "white" not in ret["chunk_content"].iloc[0] - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_hybrid_search(self, mock_embedding): - df = self._get_ral_table() - self.save_file("ral", df) - - set_embedding(mock_embedding) - - self._create_kb("kb_hybrid", content_columns=["english"]) - - self.run_sql("insert into kb_hybrid select * from files.ral") - - # changing embedding config, making semantic search irrelevant - set_embedding(mock_embedding, base=20) - - # white is not at the top - ret = self.run_sql("select * from kb_hybrid where content='white'") - assert "white" not in ret["chunk_content"].iloc[0] - - # but it is when hybrid search is used - ret = self.run_sql(""" - select * from kb_hybrid where content='white' - and hybrid_search_alpha = 0 - """) - assert "white" in ret["chunk_content"].iloc[0] - - # checking alpha=0.5 - ret = self.run_sql(""" - select * from kb_hybrid where content='white' - and hybrid_search = true - """) - assert "white" in ret["chunk_content"].iloc[0] - - # @pytest.mark.slow - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_create_index(self, mock_embedding): - set_embedding(mock_embedding) - - df = self._get_ral_table() - - df = pd.concat([df] * 30) - # unique ids - df["id"] = list(map(str, range(len(df)))) - self.save_file("ral", df) - - # create kb, fill it - self._create_kb("kb_ral", content_columns=["english"]) - - self.run_sql("insert into kb_ral select * from files.ral") - - # create index default index (ivf_file, for windows it is ivf) - self.run_sql( - """ - CREATE INDEX ON KNOWLEDGE_BASE kb_ral WITH (nlist=1) - """ - ) - - # check kb works after index was created - ret = self.run_sql("select * from kb_ral where content='white'") - assert "white" in ret["chunk_content"].iloc[0] - - # specified index - self.run_sql( - """ - CREATE INDEX ON KNOWLEDGE_BASE kb_ral - WITH (nlist=1, type='ivf', train_count=50) - """ - ) - ret = self.run_sql("select * from kb_ral where content='white'") - assert "white" in ret["chunk_content"].iloc[0] - - def test_providers(self): - with patch("mindsdb.interfaces.knowledge_base.llm_client.BedrockClient.embeddings") as embed: - with patch( - "mindsdb.integrations.utilities.rag.rerankers.base_reranker.AsyncBedrockClient.acompletion" - ) as rerank: - embed.return_value = [[1, 1, 1]] - rerank.return_value = "100" - self._create_kb( - "kb_test", - embedding_model={ - "provider": "bedrock", - "model_name": "amazon.titan", - "aws_access_key_id": "-", - "aws_region_name": "us-east-2", - "aws_secret_access_key": "-", - }, - reranking_model={ - "provider": "bedrock", - "model_name": "llama3", - "aws_access_key_id": "-", - "aws_region_name": "us-east-2", - "aws_secret_access_key": "-", - }, - ) - assert embed.call_args_list[0][0][0] == "amazon.titan" - assert rerank.call_args_list[0][1]["model_name"] == "llama3" - - with patch("mindsdb.interfaces.knowledge_base.llm_client.SnowflakeClient.embeddings") as embed: - embed.return_value = [[1, 1, 1]] - self._create_kb( - "kb_test", - embedding_model={"provider": "snowflake", "model_name": "arctic", "account_id": "ABC", "api_key": "-"}, - ) - assert embed.call_args_list[0][0][0] == "arctic" - with patch("mindsdb.interfaces.knowledge_base.llm_client.GeminiClient.embeddings") as embed: - embed.return_value = [[1, 1, 1]] - self._create_kb( - "kb_test", embedding_model={"provider": "gemini", "model_name": "gemini-embedding", "api_key": "-"} - ) - assert embed.call_args_list[0][0][0] == "gemini-embedding" - - -class TestKBAutoBatch(BaseTestKB): - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_no_autobatch(self, mock_embedding): - set_embedding(mock_embedding) - df = self._get_ral_table() - self.save_file("ral", df) - - # -- sync plan -- - # default id column is `id`, but dataset doesn't have it: - # query should be switched to a sync plan - self._create_kb("kb_ral", content_columns=["english"]) - - ret = self.run_sql( - """ - insert into kb_ral - select * from files.ral - """ - ) - # no response from insert - assert ret is None or len(ret) == 0 - ret = self.run_sql("select * from kb_ral limit 1") - assert len(ret) == 1 - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - def test_autobatch(self, mock_embedding): - set_embedding(mock_embedding) - df = self._get_ral_table() - self.save_file("ral", df) - - # -- async plan -- - with task_monitor(): - self._create_kb("kb_ral_async", id_column="ral", content_columns=["english"]) - - ret = self.run_sql( - """ - insert into kb_ral_async - select * content from files.ral - """ - ) - # result is the record from `queries` - assert len(ret) == 1 - query_id = ret["ID"][0] - for i in range(1000): - time.sleep(0.2) - ret = self.run_sql(f"select * from information_schema.queries where id = {query_id}") - if ret["ERROR"][0] is not None: - raise RuntimeError(ret["ERROR"][0]) - if ret["FINISHED_AT"][0] is not None: - break - - ret = self.run_sql("select * from kb_ral_async where id = '1000'") - assert ret["id"][0] == "1000" diff --git a/tests/unit/executor/test_lowercase.py b/tests/unit/executor/test_lowercase.py deleted file mode 100644 index d7e9d2a32b0..00000000000 --- a/tests/unit/executor/test_lowercase.py +++ /dev/null @@ -1,353 +0,0 @@ -from unittest.mock import patch - -import pytest -import pandas as pd - -from tests.unit.executor_test_base import BaseExecutorDummyML -from tests.unit.executor.test_agent import set_embedding - - -class TestLowercase(BaseExecutorDummyML): - def test_view_name_lowercase(self): - # mix-case - self.run_sql("CREATE VIEW `MyView` AS (SELECT 1)") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 'MyView'") - assert res["TABLE_TYPE"][0] == "VIEW" - - with pytest.raises(Exception): - self.run_sql("DROP VIEW MyView") - self.run_sql("DROP VIEW `MyView`") - - views_names = ["myview", "MyView", "MYVIEW"] - for view_name in views_names: - another_name = "myVIEW" - self.run_sql(f"CREATE VIEW {view_name} AS (SELECT 1)") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE table_name = '{view_name.lower()}'") - assert res["TABLE_TYPE"][0] == "VIEW" - - # alter view: wrong quoted case - with pytest.raises(Exception): - self.run_sql(f"ALTER VIEW `{another_name}` AS (SELECT 2)") - - # alter view: wrong case - self.run_sql(f"ALTER VIEW {another_name} AS (SELECT 2)") - - # select: wrong quoted case - with pytest.raises(Exception): - self.run_sql(f"SELECT * FROM `{another_name}`") - - self.run_sql(f"SELECT * FROM {another_name}") - - # dropL wrong quoted case - with pytest.raises(Exception): - self.run_sql(f"DROP VIEW `{another_name}`") - - self.run_sql(f"DROP VIEW {another_name}") - - def test_project_name_lowercase(self): - # quoted name in mix case - self.run_sql("CREATE DATABASE `MyProject`") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.DATABASES WHERE name = 'MyProject'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP DATABASE MyProject") - self.run_sql("DROP DATABASE `MyProject`") - - # processing is slightly different for 'projects' (without engine) and integrations, so we do cycle for both. - for engine in ["", " WITH ENGINE = 'dummy_data'"]: - for project_name in ["myproject", "MyProject", "MYPROJECT"]: - another_name = "myPROJECT" - self.run_sql(f"CREATE DATABASE {project_name} {engine}") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.DATABASES where name = '{another_name}'") - assert len(res) == 0 - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.DATABASES where name = '{project_name.lower()}'") - assert res["TYPE"][0] == ("project" if engine == "" else "data") - - # FIXME - # with pytest.raises(Exception): - # self.execute(f""" - # SELECT * FROM `{another_name}`.models - # """) - if engine == "": - # change name for projects - with pytest.raises(Exception): - self.run_sql(f"ALTER DATABASE `{another_name}` NAME = '{another_name.lower()}'") - with pytest.raises(Exception): - self.run_sql(f"ALTER DATABASE {another_name} NAME = '{another_name}'") - self.run_sql(f"ALTER DATABASE {another_name} NAME = '{another_name.lower()}'") - - with pytest.raises(Exception): - self.run_sql(f"DROP DATABASE `{another_name}`") - - self.run_sql(f"DROP DATABASE {another_name}") - - def test_ml_engine_name_lowercase(self): - # mixed case - self.run_sql("CREATE ML_ENGINE `MyMlEngine` FROM dummy_ml") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.ML_ENGINES WHERE name ='MyMlEngine'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP ML_ENGINE MyMlEngine") - self.run_sql("DROP ML_ENGINE `MyMlEngine`") - - for engine_name in ["mymlengine", "MyMlEngine", "MYMLENGINE"]: - another_name = "myMLEngine" - self.run_sql(f"CREATE ML_ENGINE {engine_name} FROM dummy_ml") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.ML_ENGINES WHERE name = '{engine_name.lower()}'") - assert res["HANDLER"][0] == "dummy_ml" - - with pytest.raises(Exception): - self.run_sql(f"DROP ML_ENGINE `{another_name}`") - - self.run_sql(f"DROP ML_ENGINE {another_name}") - - def test_model_name_lowercase(self): - self.run_sql("CREATE ML_ENGINE myengine FROM dummy_ml") - df = pd.DataFrame( - [ - {"a": 1, "b": "one"}, - {"a": 2, "b": "two"}, - ] - ) - self.set_data("tasks", df) - - # mixed case - self.run_sql("CREATE MODEL `MyModel` PREDICT a USING engine='myengine', join_learn_process=true") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.MODELS WHERE name ='MyModel'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP MODEL MyModel") - self.run_sql("DROP MODEL `MyModel`") - - # mixed project - self.run_sql("CREATE DATABASE `MyProj`") - self.run_sql("CREATE MODEL `MyProj`.MyModel PREDICT a USING engine='myengine', join_learn_process=true") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.MODELS WHERE name ='mymodel'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP MODEL MyProj.MyModel") - self.run_sql("DROP MODEL `MyProj`.MyModel") - - for model_name in ["mymodel", "MyModel", "MYMODEL"]: - another_name = "myMODEL" - self.run_sql(f"CREATE MODEL {model_name} PREDICT a USING engine='myengine', join_learn_process=true") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.MODELS WHERE name = '{model_name.lower()}'") - assert res["ENGINE"][0] == "dummy_ml" - - with pytest.raises(Exception): - self.run_sql(f"RETRAIN MODEL `{another_name}` using join_learn_process=true") - - self.run_sql(f"RETRAIN MODEL {another_name} using join_learn_process=true") - - with pytest.raises(Exception): - self.run_sql(f""" - FINETUNE MODEL `{another_name}` FROM dummy_data (select * from tasks) using join_learn_process=true - """) - - self.run_sql(f""" - FINETUNE MODEL {another_name} FROM dummy_data (select * from tasks) using join_learn_process=true - """) - - with pytest.raises(Exception): - self.run_sql(f"DROP MODEL `{another_name}`") - self.run_sql(f"DROP MODEL {another_name}") - - @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") - def test_agent_name_lowercase(self, check_agent_llm): - agent_params = """ - model={ - "model_name": "gpt-3.5-turbo", - "provider": "openai" - }, - prompt_template='Answer the user input in a helpful way using tools', - mode='text' - """ - - # mixed case: agent - self.run_sql(f"create agent `MyAGENT` using {agent_params}") - - res = self.run_sql("select * from information_schema.agents where name = 'MyAGENT'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("drop agent MyAGENT") - self.run_sql("drop agent `MyAGENT`") - - for agent_name in "myagent", "MyAgent", "MYAGENT": - another_agent_name = "myAGENT" - - self.run_sql(f""" - create agent {agent_name} using {agent_params} - """) - - # switch to lowercase - self.run_sql(f""" - update agent {agent_name} set {agent_params} - """) - - ret = self.run_sql(f"select * from information_schema.agents where name = '{agent_name.lower()}'") - assert len(ret) == 1 - - with pytest.raises(Exception): - self.run_sql(f"drop agent `{another_agent_name}`") - self.run_sql(f"drop agent {another_agent_name}") - - @patch("mindsdb.interfaces.knowledge_base.controller.LLMClient") - @patch("openai.OpenAI") - def test_knowledgebase_name_lowercase(self, mock_openai, mock_embedding): - set_embedding(mock_embedding) - - self.run_sql(""" - create database my_kb_storage - with engine='duckdb_faiss' - """) - - kb_params = """ - using embedding_model = { - "provider": "bedrock", - "model_name": "dummy_model", - "api_key": "dummy_key" - }, - storage = my_kb_storage.default_collection - """ - - # mixed case - self.run_sql(f"CREATE KNOWLEDGE BASE `MyKB` {kb_params}") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.KNOWLEDGE_BASES WHERE name = 'MyKB'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP KNOWLEDGE BASE MyKB") - self.run_sql("DROP KNOWLEDGE BASE `MyKB`") - - for kb_name in ["mykb", "MyKB", "MYKB"]: - another_kb_name = "myKB" - - self.run_sql(f"CREATE KNOWLEDGE BASE {kb_name} {kb_params}") - - res = self.run_sql(f""" - SELECT * FROM INFORMATION_SCHEMA.KNOWLEDGE_BASES WHERE name = '{kb_name.lower()}' - """) - assert res["NAME"][0] == "mykb" - - with pytest.raises(Exception): - self.run_sql(f"DROP KNOWLEDGE BASE `{another_kb_name}`") - self.run_sql(f"DROP KNOWLEDGE BASE {another_kb_name}") - - self.run_sql("drop table my_kb_storage.default_collection") - - self.run_sql("drop database my_kb_storage") - - def test_job_name_lowercase(self): - # mixed case - self.run_sql("CREATE JOB `MyJOB` (select 1)") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.JOBS WHERE name = 'MyJOB'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP JOB MyJOB") - self.run_sql("DROP JOB `MyJOB`") - - for job_name in ["myjob", "Myjob", "MYJOB"]: - another_name = "myjoB" - self.run_sql(f"CREATE JOB {job_name} (select 1)") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.JOBS WHERE name = '{job_name.lower()}'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql(f"DROP JOB `{another_name}`") - - self.run_sql(f"DROP JOB {another_name}") - - @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") - def test_chatbot_lowercase(self, check_agent_llm): - self.run_sql("create agent my_agent using model={'provider': 'openai', 'model_name': 'gpt-3.5'}") - - self.run_sql("create database my_db using engine='dummy_data'") - - # mixed case - self.run_sql("CREATE CHATBOT `MyChatbot` USING database = 'my_db', agent = 'my_agent'") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.CHATBOTS WHERE name = 'MyChatbot'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP CHATBOT MyChatbot") - self.run_sql("DROP CHATBOT `MyChatbot`") - - for name in ["mychatbot", "MyChatbot", "MYCHATBOT"]: - another_name = "myChatbot" - self.run_sql(f"CREATE CHATBOT {name} USING database = 'my_db', agent = 'my_agent'") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.CHATBOTS WHERE name = '{name.lower()}'") - assert len(res) == 1 - - self.run_sql(f"UPDATE CHATBOT {name} SET agent = 'my_agent'") - - with pytest.raises(Exception): - self.run_sql(f"DROP CHATBOT `{another_name}`") - - self.run_sql(f"DROP CHATBOT {name}") - - def test_database_lowercase(self): - # mixed case - self.run_sql("CREATE DATABASE `MyDB` using engine='dummy_data'") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.DATABASES WHERE name = 'MyDB'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP DATABASE MyDB") - self.run_sql("DROP DATABASE `MyDB`") - - for name in ["mydb", "MyDB", "MYDB"]: - another_name = "myDb" - self.run_sql(f"create database {name} using engine='dummy_data'") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.DATABASES WHERE name = '{name.lower()}'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql(f"DROP DATABASE `{another_name}`") - - self.run_sql(f"DROP DATABASE {name}") - - def test_trigger_lowercase(self): - # mixed case - self.run_sql("CREATE TRIGGER `MyTrigger` on dummy_data.table1 (select 1)") - - res = self.run_sql("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE name = 'MyTrigger'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql("DROP TRIGGER MyTrigger") - self.run_sql("DROP TRIGGER `MyTrigger`") - - for name in ["mytrigger", "MyTrigger", "MYTRIGGER"]: - another_name = "myTrigger" - self.run_sql(f"create TRIGGER {name} on dummy_data.table1 (select 1)") - - res = self.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE name = '{name.lower()}'") - assert len(res) == 1 - - with pytest.raises(Exception): - self.run_sql(f"DROP TRIGGER `{another_name}`") - - self.run_sql(f"DROP TRIGGER {name}") diff --git a/tests/unit/executor/test_models.py b/tests/unit/executor/test_models.py deleted file mode 100644 index 50bd130b220..00000000000 --- a/tests/unit/executor/test_models.py +++ /dev/null @@ -1,188 +0,0 @@ -import datetime as dt -import pytest - -import pandas as pd - -from tests.unit.executor_test_base import BaseExecutorDummyML - - -class TestModels(BaseExecutorDummyML): - - def test_empty_df(self): - # -- create model -- - self.run_sql( - ''' - CREATE model mindsdb.task_model - PREDICT a - using engine='dummy_ml', - join_learn_process=true - ''' - ) - self.wait_predictor('mindsdb', 'task_model') - - self.run_sql("SELECT project FROM models WHERE name='task_model'") - - def test_replace_model(self): - # create model - self.run_sql( - ''' - CREATE or REPLACE model task_model - PREDICT a - using engine='dummy_ml', - join_learn_process=true - ''' - ) - self.wait_predictor('mindsdb', 'task_model') - - # recreate - self.run_sql( - ''' - CREATE or REPLACE model task_model - PREDICT a - using engine='dummy_ml', - join_learn_process=true, my_param='a' - ''' - ) - self.wait_predictor('mindsdb', 'task_model') - - # test json operator - resp = self.run_sql("select training_options->'using'->'my_param' param from models where name='task_model' ") - - # FIXME duckdb returns result quoted - assert resp['param'][0] == '"a"' - - def test_create_validation(self): - from mindsdb.integrations.libs.ml_exec_base import MLEngineException - with pytest.raises(MLEngineException): - self.run_sql( - ''' - CREATE model task_model_x - PREDICT a - using - engine='dummy_ml', - error=1 - ''' - ) - - def test_describe(self): - self.run_sql( - ''' - CREATE model mindsdb.pred - PREDICT p - using engine='dummy_ml', - join_learn_process=true - ''' - ) - ret = self.run_sql('describe mindsdb.pred') - assert ret['TABLES'][0] == ['info'] - - ret = self.run_sql('describe pred') - assert ret['TABLES'][0] == ['info'] - - ret = self.run_sql('describe mindsdb.pred.info') - assert ret['type'][0] == 'dummy' - - ret = self.run_sql('describe pred.info') - assert ret['type'][0] == 'dummy' - - def test_create_engine(self): - self.run_sql(''' - CREATE ML_ENGINE my_engine - FROM dummy_ml - USING - unquoted_arg = yourkey, - json_arg = { - "type": "service_account", - "project_id": "123456" - } - ''') - - self.run_sql( - ''' - CREATE model pred - PREDICT p - using engine='my_engine', - join_learn_process=true - ''' - ) - - ret = self.run_sql('select * from pred where a=1') - args = ret['engine_args'][0] - - # check unquoted value - assert args['unquoted_arg'] == 'yourkey' - - # check json value - assert args['json_arg']['project_id'] == '123456' - - def test_model_column_maping(self): - df = pd.DataFrame([ - {'a': 10, 'c': 30}, - {'a': 20, 'c': 40}, - ]) - self.set_data('tbl', df) - - self.run_sql( - ''' - CREATE model mindsdb.pred - PREDICT p - using engine='dummy_ml', - join_learn_process=true - ''' - ) - ret = self.run_sql(''' - select * from dummy_data.tbl t - join pred m on m.input = t.a - ''') - assert ret['output'][0] == 10 - - # without aliases - ret = self.run_sql(''' - select * from dummy_data.tbl - join pred on pred.input = tbl.c - ''') - assert ret['output'][0] == 30 - - # get mapped column - ret = self.run_sql(''' - select t.a from dummy_data.tbl t - join pred m on m.input = t.a - ''') - assert ret['a'][0] == 10 - - def test_predict_partition(self): - df = pd.DataFrame([ - {'a': 1, 'b': dt.datetime(2020, 1, 1)}, - {'a': 2, 'b': dt.datetime(2020, 1, 2)}, - {'a': 3, 'b': dt.datetime(2020, 1, 3)}, - {'a': 4, 'b': dt.datetime(2020, 1, 5)}, - {'a': 5, 'b': dt.datetime(2020, 1, 6)}, - {'a': 6, 'b': dt.datetime(2020, 1, 7)}, - ]) - self.save_file('tasks', df) - - # -- create model -- - self.run_sql( - ''' - CREATE model mindsdb.task_model - from files (select * from tasks) - PREDICT a - using engine='dummy_ml', - join_learn_process=true - ''' - ) - - # use model - ret = self.run_sql(''' - SELECT * - FROM files.tasks as t - JOIN mindsdb.task_model as m - using partition_size=2 - ''') - - # the same rows in output - assert len(ret) == 6 - assert set(df['a']) == set(ret['a']) - - # all predicted - assert list(ret.predicted.unique()) == [42] diff --git a/tests/unit/executor/test_mongodb_handler.py b/tests/unit/executor/test_mongodb_handler.py deleted file mode 100644 index 9d0a7382084..00000000000 --- a/tests/unit/executor/test_mongodb_handler.py +++ /dev/null @@ -1,122 +0,0 @@ -import unittest - -import pytest -from mindsdb_sql_parser import parse_sql - -try: - from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_render import MongodbRender - from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_parser import MongodbParser - - MONGODB_HANDLER_AVAILABLE = True -except ImportError: - MONGODB_HANDLER_AVAILABLE = False - -# How to run: -# env PYTHONPATH=./ pytest tests/unit/test_mongodb_handler.py - - -@pytest.mark.skipif(not MONGODB_HANDLER_AVAILABLE, reason="mongodb_handler not installed (community handler)") -class TestMongoDBConverters(unittest.TestCase): - def test_ast_to_mongo(self): - sql = """ - select * - from tbl1 - where x!=1 and c=2 and d>4 or e is not null - order by d, e desc - """ - - # sql to ast - query = parse_sql(sql) - mql = MongodbRender().to_mongo_query(query) - - expected_mql = """ - db.tbl1.aggregate([ - {"$match": {"$or": [ - {"$and": [{"$and": [ - {"x": {"$ne": 1}}, - {"c": 2}]}, - {"d": {"$gt": 4}}]}, - {"e": {"$ne": null}}]}}, - {"$sort": {"d": -1, "e": -1}} - ]) - """.replace("\n", "") - - # test ast to mongo - assert mql.to_string().replace(" ", "") == expected_mql.replace(" ", "") - - # test parsing: mql to string and then string to mql - mql_str = mql.to_string() - print(mql_str) - assert MongodbParser().from_string(mql_str).to_string() == mql_str - - sql = """ - select distinct a.b, a.c from tbl1 - where x=1 - limit 2 - offset 3 - """ - - query = parse_sql(sql) - mql = MongodbRender().to_mongo_query(query) - - expected_mql = """ - db.tbl1.aggregate([ - {"$match": {"x": 1}}, - {"$group": { - "_id": {"b": "$b", "c": "$c"}, - "b": {"$first": "$b"}, - "c": {"$first": "$c"} - }}, - {"$project": {"_id": 0, "b": "$b", "c": "$c"}}, - {"$skip": 3}, - {"$limit": 2} - ]) - """.replace("\n", "") - - # test ast to mongo - assert mql.to_string().replace(" ", "") == expected_mql.replace(" ", "") - - sql = """ - select a as name, sum(b) as total, count(c) as cnt - from tbl1 - where x>=5 - group by a - order by cnt desc - """ - - query = parse_sql(sql) - mql = MongodbRender().to_mongo_query(query) - - expected_mql = """ - db.tbl1.aggregate([ - {"$match": {"x": {"$gte": 5}}}, - {"$group": {"_id": {"a": "$a"}, "total": {"$sum": "$b"}, "cnt": {"$sum": {"$cond": [{"$ne": ["$c", null]}, 1, 0]}}}}, - {"$project": {"_id": 0, "name": "$a", "total": "$total", "cnt": "$cnt"}}, - {"$sort": {"cnt": -1}} - ]) - """.replace("\n", "") - - assert mql.to_string().replace(" ", "") == expected_mql.replace(" ", "") - - # TODO use in queries: multiline, objectid, isodate - # covered in tests/unit/handlers/test_mongodb.py - - def test_mongo_parser(self): - mql = """ - db.TransactionFact.find( - {'_id': '0', "a": "1", b: 1} - ) - """ - - expected_mql = 'db.TransactionFact.find({"_id": "0", "a": "1", "b": 1})' - - assert MongodbParser().from_string(mql).to_string() == expected_mql - - -@pytest.mark.skipif(not MONGODB_HANDLER_AVAILABLE, reason="mongodb_handler not installed (community handler)") -class TestMongoDBHandler(unittest.TestCase): - def test_mongo_handler(self): - # TODO how to test mongo handler - # test mysql query - # test mongo query - pass diff --git a/tests/unit/executor/test_predictor_params.py b/tests/unit/executor/test_predictor_params.py deleted file mode 100644 index 911dc8185d8..00000000000 --- a/tests/unit/executor/test_predictor_params.py +++ /dev/null @@ -1,62 +0,0 @@ -from unittest.mock import patch - -import pandas as pd - -from mindsdb_sql_parser import parse_sql - -from tests.unit.executor_test_base import BaseExecutorMockPredictor - - -class Test(BaseExecutorMockPredictor): - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_use_predictor_params(self, mock_handler): - # set integration data - - df = pd.DataFrame( - [ - {"a": 1, "b": "one"}, - {"a": 2, "b": "two"}, - {"a": 1, "b": "three"}, - ] - ) - self.set_handler(mock_handler, name="pg", tables={"tasks": df}) - - # --- use predictor --- - predicted_value = 3.14 - predictor = { - "name": "task_model", - "predict": "p", - "dtype_dict": {"p": "Float64", "a": "Int64", "b": "category"}, - "predicted_value": predicted_value, - } - self.set_predictor(predictor) - - # --- join table --- - - ret = self.command_executor.execute_command( - parse_sql(""" - select m.p, v.a - from pg.tasks v - join mindsdb.task_model m - where v.a = 2 - using p1='a', p2={'x':1, 'y':2} - """) - ) - assert ret.error_code is None - - # check predictor input - predict_args = self.mock_predict.call_args[1]["params"] - assert predict_args == {"p1": "a", "p2": {"x": 1, "y": 2}} - - # --- inline prediction --- - self.mock_predict.reset_mock() - - ret = self.command_executor.execute_command( - parse_sql(""" - select * from mindsdb.task_model where a = 2 - using p1=1, p2=[1,2] - """) - ) - - predict_args = self.mock_predict.call_args[1]["params"] - assert predict_args == {"p1": 1, "p2": [1, 2]} diff --git a/tests/unit/executor/test_project_structure.py b/tests/unit/executor/test_project_structure.py deleted file mode 100644 index 741e3cc25de..00000000000 --- a/tests/unit/executor/test_project_structure.py +++ /dev/null @@ -1,263 +0,0 @@ -import datetime as dt -import pytest - -import pandas as pd - -from tests.unit.executor_test_base import BaseExecutorDummyML - - -class TestProjectStructure(BaseExecutorDummyML): - def test_version_managing(self): - from mindsdb.utilities.exception import EntityNotExistsError - - # set up - self.set_data( - "tasks", - pd.DataFrame( - [ - {"a": 1, "b": dt.datetime(2020, 1, 1)}, - {"a": 2, "b": dt.datetime(2020, 1, 2)}, - {"a": 1, "b": dt.datetime(2020, 1, 3)}, - ] - ), - ) - - # ================= retrain cycles ===================== - - # create folder - self.run_sql("create database proj") - - # -- create model -- - ret = self.run_sql( - """ - CREATE model proj.task_model - from proj (select * from dummy_data.tasks) - PREDICT a - using engine='dummy_ml', - tag = 'first', - join_learn_process=true - """ - ) - assert ret["NAME"][0] == "task_model" - assert ret["ENGINE"][0] == "dummy_ml" - self.wait_predictor("proj", "task_model") - - # tag works in create model - ret = self.run_sql("select * from proj.models") - assert ret["TAG"][0] == "first" - - # use model - ret = self.run_sql(""" - SELECT m.*, extract(day from t.b) as day - FROM dummy_data.tasks as t - JOIN proj.task_model as m - """) - - assert len(ret) == 3 - assert ret.predicted[0] == 42 - assert ret.day[0] == 1 - - # -- retrain predictor with tag -- - ret = self.run_sql( - """ - retrain proj.task_model - from dummy_data (select * from tasks where a=2) - PREDICT b - using tag = 'second', - join_learn_process=true - """ - ) - assert ret["NAME"][0] == "task_model" - assert ret["TAG"][0] == "second" - self.wait_predictor("proj", "task_model", filters={"tag": "second"}) - - # get current model - ret = self.run_sql("select * from proj.models where active=1") - - # check target - assert ret["PREDICT"][0] == "b" - - # check label - assert ret["TAG"][0] == "second" - - # use model - ret = self.run_sql(""" - SELECT m.* - FROM dummy_data.tasks as t - JOIN proj.task_model as m - """) - assert ret.predicted[0] == 42 - - # used model has tag 'second' - models = self.get_models() - model_id = ret.predictor_id[0] - assert models[model_id].label == "second" - - # -- retrain again with active=0 -- - self.run_sql( - """ - retrain proj.task_model - from dummy_data (select * from tasks where a=2) - PREDICT a - using tag='third', active=0 - """ - ) - self.wait_predictor("proj", "task_model", filters={"tag": "third"}) - - ret = self.run_sql("select * from proj.models where active=1") - - # check target is from previous retrain - assert ret["PREDICT"][0] == "b" - - # use model - ret = self.run_sql(""" - SELECT m.* - FROM dummy_data.tasks as t - JOIN proj.task_model as m - """) - - # used model has tag 'second' (previous) - models = self.get_models() - model_id = ret.predictor_id[0] - assert models[model_id].label == "second" - - # ================ working with inactive versions ================= - - # run 3rd version model and check used model version - ret = self.run_sql(""" - SELECT m.* - FROM dummy_data.tasks as t - JOIN proj.task_model.3 as m - """) - - # 3rd version was used - models = self.get_models() - model_id = ret.predictor_id[0] - assert models[model_id].label == "third" - - # one-line query model by version - ret = self.run_sql("SELECT * from proj.task_model.3 where a=1 and b=2") - model_id = ret.predictor_id[0] - assert models[model_id].label == "third" - - # check exception: not existing version - with pytest.raises(EntityNotExistsError) as exc_info: - self.run_sql( - "SELECT * from proj.task_model.4 where a=1 and b=2", - ) - - # ===================== one-line with 'use database'======================= - - # active - ret = self.run_sql("SELECT * from task_model where a=1 and b=2", database="proj") - model_id = ret.predictor_id[0] - assert models[model_id].label == "second" - - # inactive - ret = self.run_sql("SELECT * from task_model.3 where a=1 and b=2", database="proj") - model_id = ret.predictor_id[0] - assert models[model_id].label == "third" - - # ================== managing versions ========================= - - # check 'show models' command in different combination - # Show models where - ret = self.run_sql("Show models") - # mindsdb project - assert len(ret) == 0 - - ret = self.run_sql("Show models from proj") - # it also shows versions - assert len(ret) == 3 and ret["NAME"][0] == "task_model" - - # ret = self.run_sql('Show models in proj') - # assert len(ret) == 3 and ret['NAME'][0] == 'task_model' - - ret = self.run_sql("Show models from proj where name='task_model'") - assert len(ret) == 3 and ret["NAME"][0] == "task_model" - - # model is not exists - ret = self.run_sql("Show models from proj where name='xxx'") - assert len(ret) == 0 - - # ---------------- - - # See all versions - ret = self.run_sql("select * from proj.models") - # we have all tags in versions - assert set(ret["TAG"]) == {"first", "second", "third"} - - # Set active selected version - self.run_sql("set active proj.task_model.1") - - # get active version - ret = self.run_sql("select * from proj.models where active = 1") - assert ret["TAG"][0] == "first" - - # use active version ? - - # Delete specific version - self.run_sql("drop model proj.task_model.2") - - # deleted version not in list - ret = self.run_sql("select * from proj.models") - assert len(ret) == 2 - assert "second" not in ret["TAG"] - - # try to use deleted version - with pytest.raises(EntityNotExistsError) as exc_info: - self.run_sql( - "SELECT * from proj.task_model.2 where a=1", - ) - - # exception with deleting active version - with pytest.raises(Exception) as exc_info: - self.run_sql("drop model proj.task_model.1") - assert "Can't remove active version" in str(exc_info.value) - - # exception with deleting non-existing version - with pytest.raises(Exception) as exc_info: - self.run_sql("drop model proj.task_model.11") - assert "is not found" in str(exc_info.value) - - # ---------------------------------------------------- - - # retrain without all params - self.run_sql( - """ - retrain proj.task_model - """ - ) - self.wait_predictor("proj", "task_model", filters={"version": "4"}) - - # ---------------------------------------------------- - - # drop predictor and check model is deleted and no versions - self.run_sql("drop model proj.task_model") - ret = self.run_sql("select * from proj.models") - assert len(ret) == 0 - - # versions are also deleted - ret = self.run_sql("select * from proj.models") - assert len(ret) == 0 - - def test_project_names_duplicate(self): - # create folder - self.run_sql("create project proj1") - - self.run_sql("create database db1 using engine='dummy_data'") - - with pytest.raises(Exception): - self.run_sql("create project db1") - - with pytest.raises(Exception): - self.run_sql("create database proj1 using engine='dummy_data'") - - def test_object_not_found(self): - with pytest.raises(Exception) as exc_info: - self.run_sql("select * from any_db.any_table") - assert "not found" in str(exc_info.value) - - with pytest.raises(Exception) as exc_info: - self.run_sql("select * from any_db") - assert "not found" in str(exc_info.value) diff --git a/tests/unit/executor/test_schema.py b/tests/unit/executor/test_schema.py deleted file mode 100644 index 540467a9c78..00000000000 --- a/tests/unit/executor/test_schema.py +++ /dev/null @@ -1,237 +0,0 @@ -from unittest.mock import patch -import pandas as pd -import pytest - - -from tests.unit.executor_test_base import BaseExecutorDummyML - - -class TestSchema(BaseExecutorDummyML): - def test_show(self): - for item in ("chatbots", "knowledge_bases", "agents", "jobs"): - self.run_sql(f"show {item}") - - @pytest.mark.slow - @patch("mindsdb.interfaces.agents.agents_controller.check_agent_llm") - def test_schema(self, check_agent): - # --- create objects + describe --- - # todo: create knowledge base (requires chromadb) - - df = pd.DataFrame( - [ - {"a": 6, "c": 1}, - ] - ) - self.set_data("table1", df) - - # project - self.run_sql("create project proj2") - - # ml_engine - self.run_sql(""" - CREATE ML_ENGINE engine1 from dummy_ml - """) - - # job - self.run_sql("create job j1 (select * from models) every hour") - self.run_sql("create job proj2.j2 (select * from models) every hour") - - df = self.run_sql("describe job j1") - assert df.NAME[0] == "j1" and df.QUERY[0] == "select * from models" - - # view - self.run_sql("create view v1 (select * from models)") - self.run_sql("create view proj2.v2 (select * from models)") - - df = self.run_sql("describe view v1") - assert df.NAME[0] == "v1" and df.QUERY[0] == "select * from models" - - # columns of view - ret = self.run_sql("show columns from v1") - assert "NAME" in list(ret.Field) - - # columns of table - ret = self.run_sql("show full columns from table1", database="dummy_data") - assert "Collation" in list(ret.columns) - - ret = self.run_sql("show columns from table1", database="dummy_data") - assert "Collation" not in list(ret.columns) - - # model - self.run_sql(""" - CREATE model pred1 - PREDICT p - using engine='dummy_ml', - join_learn_process=true - """) - self.run_sql(""" - CREATE model proj2.pred2 - PREDICT p - using engine='dummy_ml', - join_learn_process=true - """) - # and retrain first model - self.run_sql(""" - RETRAIN pred1 - using engine='dummy_ml' - """) - - # trigger - self.run_sql(""" - create trigger trigger1 - on dummy_data.table1 (show models) - """) - self.run_sql(""" - create trigger proj2.trigger2 - on dummy_data.table1 (show models) - """) - - df = self.run_sql("describe trigger trigger1") - assert df.NAME[0] == "trigger1" and df.QUERY[0] == "show models" - - # agent - self.run_sql(""" - CREATE AGENT agent1 - USING model = {'model_name': "pred1", "provider": "openai"} - """) - self.run_sql(""" - CREATE AGENT proj2.agent2 - USING model = {'model_name': "pred2", "provider": "openai"} -- it looks up in agent's project - """) - - df = self.run_sql("describe agent agent1") - assert df.NAME[0] == "agent1" and "pred1" in df.MODEL[0] - - # chatbot - self.run_sql(""" - CREATE CHATBOT chatbot1 - USING database = "dummy_data", - agent = "agent1" - """) - self.run_sql(""" - CREATE CHATBOT proj2.chatbot2 - USING database = "dummy_data", - agent = "agent2" -- it looks up in chatbot's project - """) - - df = self.run_sql("describe chatbot chatbot1") - assert df.NAME[0] == "chatbot1" and df.DATABASE[0] == "dummy_data" - - # --- SHOW --- - - # handlers - df = self.run_sql("show handlers") - assert "dummy_ml" in list(df.NAME) - - # projects - df = self.run_sql("show projects") - objects = list(df.iloc[:, 0]) - assert "mindsdb" in objects - assert "proj2" in objects - - # databases - df = self.run_sql("show databases") - objects = list(df.iloc[:, 0]) - assert "information_schema" in objects - assert "log" in objects - - # ml engines - df = self.run_sql("show ml_engines") - assert "engine1" in list(df.NAME) - - # project objects - def _test_proj_obj(table_name, obj_name): - # check: obj1 is current project, obj2 in proj2 - - df = self.run_sql(f"show {table_name}") - assert len(df) == 1 and f"{obj_name}1" in list(df.NAME) - - df = self.run_sql(f"show {table_name} from proj2") - assert len(df) == 1 and f"{obj_name}2" in list(df.NAME) - - _test_proj_obj("jobs", "j") - _test_proj_obj("views", "v") - _test_proj_obj("triggers", "trigger") - _test_proj_obj("chatbots", "chatbot") - _test_proj_obj("agents", "agent") - - # model - df = self.run_sql("show models") - # two versions of same model - assert len(df[df.NAME != "pred1"]) == 0 and len(df) == 2 - - df = self.run_sql("show models from proj2") - assert "pred2" in list(df.NAME) and len(df) == 1 - - # --- information_schema --- - - df = self.run_sql("select * from information_schema.TABLES") - df = df[df.TABLE_SCHEMA == "information_schema"] - for table in ( - "TABLES", - "COLUMNS", - "MODELS", - "DATABASES", - "ML_ENGINES", - "HANDLERS", - "JOBS", - "CHATBOTS", - "KNOWLEDGE_BASES", - "AGENTS", - "VIEWS", - "TRIGGERS", - "QUERIES", - ): - assert table in list(df.TABLE_NAME) - - # selectable - self.run_sql(f"select * from information_schema.{table}") - - # handlers - df = self.run_sql("select * from information_schema.HANDLERS") - assert "dummy_ml" in list(df.NAME) - - # databases - df = self.run_sql("select * from information_schema.DATABASES") - assert "mindsdb" in list(df.NAME) - assert "proj2" in list(df.NAME) - assert "log" in list(df.NAME) - - # ml engines - df = self.run_sql("select * from information_schema.ML_ENGINES") - assert "engine1" in list(df.NAME) - - # project objects - def _test_proj_obj(table_name, obj_name): - # obj1 in mindsdb, obj2 in proj2 - - df = self.run_sql(f"select * from information_schema.{table_name}") - assert len(df) == 2 - - df1 = df[df.PROJECT == "mindsdb"] - assert df1.iloc[0].NAME == f"{obj_name}1" - - df1 = df[df.PROJECT == "proj2"] - assert df1.iloc[0].NAME == f"{obj_name}2" - - _test_proj_obj("JOBS", "j") - _test_proj_obj("VIEWS", "v") - _test_proj_obj("TRIGGERS", "trigger") - _test_proj_obj("CHATBOTS", "chatbot") - _test_proj_obj("AGENTS", "agent") - - # models - df = self.run_sql("select * from information_schema.MODELS") - # two versions of pred1 and one version of pred2 - assert len(df[df.NAME == "pred1"]) == 2 - assert len(df[df.NAME == "pred2"]) == 1 - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_select_columns(self, data_handler): - df = pd.DataFrame([[1, "x"], [2, "y"]], columns=["aa", "bb"]) - - self.set_handler(data_handler, name="pg", tables={"tbl1": df}) - - ret = self.run_sql("SELECT * FROM information_schema.columns WHERE table_schema='pg'") - - assert list(ret["COLUMN_NAME"]) == ["aa", "bb"] diff --git a/tests/unit/executor/test_udf.py b/tests/unit/executor/test_udf.py deleted file mode 100644 index fa320e188b3..00000000000 --- a/tests/unit/executor/test_udf.py +++ /dev/null @@ -1,109 +0,0 @@ -import os -from tempfile import TemporaryDirectory -from textwrap import dedent -from unittest.mock import patch - -import pandas as pd -import pytest -from mindsdb_sql_parser.ast import Identifier -from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine -from tests.unit.executor_test_base import BaseExecutorDummyML - - -@pytest.mark.skip(reason="BYOM feature is currently disabled in MindsDB") -@pytest.mark.parametrize("byom_type", ["inhouse", "venv"]) -class TestBYOM(BaseExecutorDummyML): - def _create_engine(self, name, code, **kwargs): - with TemporaryDirectory(prefix="udf_test_") as temp_dir: - code_path = os.path.join(temp_dir, "code.py") - reqs_path = os.path.join(temp_dir, "reqs.py") - - open(code_path, "w").write(code) - open(reqs_path, "w").write("") - - params = { - "code": code_path, - "modules": reqs_path, - } - params.update(kwargs) - - ret = self.command_executor.execute_command( - CreateMLEngine(name=Identifier(name), handler="byom", params=params) - ) - assert ret.error_code is None - - @patch("mindsdb.integrations.handlers.postgres_handler.Handler") - def test_udf(self, data_handler, byom_type): - df = pd.DataFrame( - [ - {"a": 3, "b": 4, "c": "a", "d": "b"}, - ] - ) - self.set_handler(data_handler, name="pg", tables={"sample": df}) - - code = dedent(""" - from os import listdir # imported function - - def fibo(num: int) -> int: - if num < 2: - return num - return fibo(num - 1) + fibo(num - 2) - - # not annotated - def add1(a, b): - return a + b - - # annotated - def add2(a: int, b: int) -> int: - return a + b - """) - - self._create_engine(name="myml", code=code, type=byom_type, mode="custom_function") - # convert to explicit types, because duckdb doesn't convert it and fails - ret = self.run_sql(""" - select myml.fibo(b) x, - myml.add1(a::char,b::char) y, - myml.add2(a,b) z - from pg.sample - """) - assert ret["x"][0] == 3 - assert ret["y"][0] == "34" - assert ret["z"][0] == 7 - - # test without table - ret = self.run_sql(""" - select myml.fibo(4) x - """) - assert ret["x"][0] == 3 - - def test_byom(self, byom_type): - code = dedent(""" - from datetime import datetime - import pandas as pd - - class MyBYOM(): - - def train(self, df, target_col, args=None): - self.target_col = target_col - self.value = '>my_response' - - def predict(self, df): - df[self.target_col] = df['input_col'] + self.value - - return df[[self.target_col]] - """) - - self._create_engine(name="myml", code=code, type=byom_type) - - self.run_sql(""" - create model m1 - predict output_col - using engine='myml', - join_learn_process=true - """) - - ret = self.run_sql(""" - select * from m1 - where input_col = 'my_input' - """) - assert ret["output_col"][0] == "my_input>my_response" diff --git a/tests/unit/executor_test_base.py b/tests/unit/executor_test_base.py deleted file mode 100644 index 3cb3e2e8640..00000000000 --- a/tests/unit/executor_test_base.py +++ /dev/null @@ -1,643 +0,0 @@ -import copy -import datetime as dt -import json -import os -import pytest -import sys -import tempfile -import shutil -import time -from unittest import mock -from pathlib import Path - -import duckdb -import numpy as np -import pandas as pd -from prometheus_client import REGISTRY -from mindsdb_sql_parser import parse_sql - -from mindsdb.interfaces.database.integrations import integration_controller -from mindsdb.utilities.config import Config - -from mindsdb.utilities import log -from mindsdb.utilities.constants import DEFAULT_COMPANY_ID, DEFAULT_USER_ID -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.integrations.utilities.community_handler_fetcher import ( - community_handlers_enabled, - fetch_handler, - get_community_handlers_storage_dir, -) - -logger = log.getLogger(__name__) - - -def unload_module(path): - # remove all modules started with path - import sys - - to_remove = [] - for module_name in sys.modules: - if module_name.startswith(path + ".") or module_name == path: - to_remove.append(module_name) - to_remove.sort(reverse=True) - for module_name in to_remove: - sys.modules.pop(module_name) - - -class BaseUnitTest: - """ - mindsdb instance with temporal database and config - """ - - @staticmethod - def setup_class(cls): - # remove imports of mindsdb in previous tests - unload_module("mindsdb") - - # database temp file - - cls.storage_dir = tempfile.mkdtemp(prefix="mindsdb_db_") - - cls.db_file = os.path.join(cls.storage_dir, "mindsdb.db") - - # config - config = {"storage_db": "sqlite:///" + cls.db_file} - # config temp file - cfg_file = os.path.join(cls.storage_dir, "config.json") - - with open(cfg_file, "w") as fd: - json.dump(config, fd) - - cls._original_storage_dir_env = os.environ.get("MINDSDB_STORAGE_DIR") - cls._original_config_path_env = os.environ.get("MINDSDB_CONFIG_PATH") - os.environ["MINDSDB_STORAGE_DIR"] = cls.storage_dir - os.environ["MINDSDB_CONFIG_PATH"] = cfg_file - - # initialize config - from mindsdb.utilities.config import Config - - Config() - - from mindsdb.interfaces.storage import db - - db.init() - cls.db = db - - @staticmethod - def teardown_class(cls): - # remove tmp db file - cls.db.session.close() - shutil.rmtree(cls.storage_dir, ignore_errors=True) - - # remove environ for next tests - for env_var_name in ("MINDSDB_DB_CON", "MINDSDB_STORAGE_DIR", "MINDSDB_CONFIG_PATH"): - if env_var_name in os.environ: - del os.environ[env_var_name] - - if cls._original_storage_dir_env is not None: - os.environ["MINDSDB_STORAGE_DIR"] = cls._original_storage_dir_env - if cls._original_config_path_env is not None: - os.environ["MINDSDB_CONFIG_PATH"] = cls._original_config_path_env - - # remove import of mindsdb for next tests - unload_module("mindsdb") - - def setup_method(self): - self._dummy_db_path = os.path.join(tempfile.mkdtemp(), "_mindsdb_duck_db") - self.clear_db(self.db) - self.reset_prom_collectors() - - def clear_db(self, db): - # drop - db.session.rollback() - db.Base.metadata.drop_all(db.engine) - - # create - db.Base.metadata.create_all(db.engine) - - # fill with data - from mindsdb.interfaces.database.integrations import integration_controller - from sqlalchemy.exc import IntegrityError as SQLAlchemyIntegrityError - - integration_controller.create_permanent_integrations() - - # Insert dummy_data if it doesn't exist (idempotent with race condition handling) - dummy_record = db.Integration.query.filter_by( - name="dummy_data", company_id=DEFAULT_COMPANY_ID, user_id=DEFAULT_USER_ID - ).first() - if dummy_record is None: - try: - dummy_record = db.Integration( - name="dummy_data", - data={"db_path": self._dummy_db_path}, - engine="dummy_data", - company_id=DEFAULT_COMPANY_ID, - user_id=DEFAULT_USER_ID, - ) - db.session.add(dummy_record) - db.session.flush() - except SQLAlchemyIntegrityError: - db.session.rollback() - dummy_record = db.Integration.query.filter_by( - name="dummy_data", company_id=DEFAULT_COMPANY_ID, user_id=DEFAULT_USER_ID - ).first() - - # default project (idempotent with race condition handling) - project_record = db.Project.query.filter_by( - name="mindsdb", company_id=DEFAULT_COMPANY_ID, user_id=DEFAULT_USER_ID - ).first() - if project_record is None: - try: - project_record = db.Project( - name="mindsdb", - company_id=DEFAULT_COMPANY_ID, - user_id=DEFAULT_USER_ID, - ) - db.session.add(project_record) - db.session.flush() - except SQLAlchemyIntegrityError: - db.session.rollback() - - db.session.commit() - return db - - def set_data(self, table, data): - con = duckdb.connect(self._dummy_db_path) - con.execute("DROP TABLE IF EXISTS {}".format(table)) - con.execute("CREATE TABLE {} AS SELECT * FROM data".format(table)) - - def wait_predictor(self, project, name, timeout=100, filters=None): - """ - Wait for the predictor to be created, - raising an exception if predictor creation fails or exceeds timeout - """ - for attempt in range(timeout): - sql = f"select * from {project}.models where name='{name}'" - if filters is not None: - for k, v in filters.items(): - sql += f" and {k}='{v}'" - ret = self.run_sql(sql) - if not ret.empty: - status = ret["STATUS"][0] - if status == "complete": - return - elif status == "error": - raise RuntimeError("Predictor failed", ret["ERROR"][0]) - time.sleep(0.5) - raise RuntimeError("Predictor wasn't created") - - def run_sql(self, sql): - """Execute SQL and return a DataFrame, raising an AssertionError if an error occurs""" - ret = self.command_executor.execute_command(parse_sql(sql)) - assert ret.error_code is None, f"SQL execution failed with error: {ret.error_code}" - if ret.data is not None: - return ret.data.to_df() - - @staticmethod - def ret_to_df(ret): - # converts executor response to dataframe - return ret.data.to_df() - - def reset_prom_collectors(self) -> None: - """Resets collectors in the default Prometheus registry. - - Modifies the `REGISTRY` registry. Supposed to be called at the beginning - of individual test functions. Else registry is reused across test functions - and so we can run into errors like duplicate metrics or unexpected values - for metrics. - """ - # Unregister all collectors. - collectors = list(REGISTRY._collector_to_names.keys()) - for collector in collectors: - REGISTRY.unregister(collector) - - -class BaseExecutorTest(BaseUnitTest): - """ - Set up executor: mock data handler - """ - - def setup_method(self, import_dummy_ml=False): - super().setup_method() - self.set_executor(import_dummy_ml=import_dummy_ml) - - def _import_handler(self, integration_controller, handler_name, handler_dir, is_community=False): - handler_meta = { - "import": { - "success": None, - "error_message": None, - "folder": handler_dir.name, - "dependencies": [], - }, - "path": handler_dir, - "name": handler_name, - "permanent": False, - "community": is_community, - } - integration_controller.handlers_import_status[handler_name] = handler_meta - # For community handlers: import_handler uses spec_from_file_location (path-based). - # For built-in handlers: pass "" as base_import so importlib resolves from sys.path. - if is_community: - integration_controller.import_handler(handler_name) - else: - integration_controller.import_handler(handler_name, "") - - def setup_community_handler(self, handler_name: str): - """ - Register and import a community handler for testing. - - Requires MINDSDB_COMMUNITY_HANDLERS=true — this mirrors the production - gate and also exercises the fetch mechanism when the env var is set. - Skips the test if the env var is not set or the handler cannot be fetched. - """ - - if not community_handlers_enabled(): - pytest.skip( - f"Community handler '{handler_name}' test skipped: set MINDSDB_COMMUNITY_HANDLERS=true to enable" - ) - - storage_root = Path(Config()["paths"]["root"]) - storage_dir = get_community_handlers_storage_dir(storage_root) - handler_dir_name = f"{handler_name}_handler" - handler_dir = storage_dir / handler_dir_name - - if not (handler_dir / "__init__.py").exists(): - try: - handler_dir = fetch_handler(handler_dir_name, storage_dir) - except Exception as e: - pytest.skip(f"Could not fetch community handler '{handler_name}': {e}") - - if handler_dir is None or not (handler_dir / "__init__.py").exists(): - pytest.skip(f"Community handler '{handler_name}' not available") - - self._import_handler(integration_controller, handler_name, handler_dir, is_community=True) - - def set_executor( - self, - mock_predict=False, - mock_model_controller=False, - import_dummy_ml=False, - import_dummy_llm=False, - ): - # creates executor instance with mocked model_interface - from mindsdb.api.executor.controllers.session_controller import ( - SessionController, - ) - from mindsdb.api.executor.command_executor import ( - ExecuteCommands, - ) - - # clear cache of previous test case to apply mocks of current test case - from mindsdb.integrations.libs.process_cache import process_cache - - process_cache.cache = {} - from mindsdb.interfaces.database.integrations import integration_controller - from mindsdb.interfaces.file.file_controller import FileController - from mindsdb.interfaces.model.model_controller import ModelController - from mindsdb.utilities.context import context as ctx - from mindsdb.interfaces.storage import db - - self.file_controller = FileController() - - if mock_model_controller: - model_controller = mock.Mock() - self.mock_model_controller = model_controller - else: - model_controller = ModelController() - - # no predictors yet - # self.mock_model_controller.get_models.side_effect = lambda: [] - - if import_dummy_ml: - test_handler_path = os.path.dirname(__file__) - sys.path.append(test_handler_path) - - handler_dir = Path(test_handler_path) / "dummy_ml_handler" - self._import_handler(integration_controller, "dummy_ml", handler_dir) - - if not integration_controller.get_handler_meta("dummy_ml")["import"]["success"]: - error = integration_controller.handlers_import_status["dummy_ml"]["import"]["error_message"] - raise Exception(f"Can not import: {str(handler_dir)}: {error}") - - r_dummy_ml = db.Integration.query.filter_by( - name="dummy_ml", company_id=DEFAULT_COMPANY_ID, user_id=DEFAULT_USER_ID - ).first() - if r_dummy_ml is None: - r_dummy_ml = db.Integration( - name="dummy_ml", - data={}, - engine="dummy_ml", - company_id=DEFAULT_COMPANY_ID, - user_id=DEFAULT_USER_ID, - ) - db.session.add(r_dummy_ml) - db.session.commit() - self.dummy_ml_integration_id = r_dummy_ml.id - - test_handler_path = os.path.dirname(__file__) - sys.path.append(test_handler_path) - - handler_dir = Path(test_handler_path) / "dummy_llm_handler" - self._import_handler(integration_controller, "dummy_llm", handler_dir) - - if not integration_controller.handlers_import_status["dummy_llm"]["import"]["success"]: - error = integration_controller.handlers_import_status["dummy_llm"]["import"]["error_message"] - raise Exception(f"Can not import: {str(handler_dir)}: {error}") - - if mock_predict: - predict_patcher = mock.patch("mindsdb.integrations.libs.ml_exec_base.BaseMLEngineExec.predict") - self.mock_predict = predict_patcher.__enter__() - self.mock_create = mock.Mock() - - ctx.set_default() - sql_session = SessionController() - sql_session.database = "mindsdb" - sql_session.integration_controller = integration_controller - - self.command_executor = ExecuteCommands(sql_session) - - # disable cache. it is need to check predictor input - config_patch = mock.patch("mindsdb.utilities.cache.FileCache.get") - self.mock_config = config_patch.__enter__() - self.mock_config.side_effect = lambda x: None - - def teardown_method(self): - # Don't want cache to pick up a stale version with the wrong duckdb_path. - self.command_executor.session.integration_controller.delete("dummy_data") - if os.path.exists(self._dummy_db_path): - os.unlink(self._dummy_db_path) - os.rmdir(os.path.dirname(self._dummy_db_path)) - - def save_file(self, name, df): - file_path = tempfile.mktemp(prefix="mindsdb_file_") - df.to_parquet(file_path) - self.file_controller.save_file(name, file_path, name) - - def set_handler(self, mock_handler, name, tables, engine="postgres", schema=None): - # integration - # delete by name - r = self.db.Integration.query.filter_by(name=name).first() - if r is not None: - self.db.session.delete(r) - - # create - r = self.db.Integration( - name=name, - data={"password": "secret"}, - engine=engine, - company_id=DEFAULT_COMPANY_ID, - user_id=DEFAULT_USER_ID, - ) - self.db.session.add(r) - self.db.session.commit() - - from mindsdb.integrations.libs.response import TableResponse - - def handler_response(df, affected_rows: None | int = None): - response = TableResponse(data=df, affected_rows=affected_rows) - return response - - def get_tables_f(): - tables_ar = [] - for table in tables: - tables_ar.append( - { - "table_schema": schema or "public", - "table_name": table, - "table_type": "BASE TABLE", - } - ) - - return handler_response(pd.DataFrame(tables_ar)) - - mock_handler().get_tables.side_effect = get_tables_f - mock_handler().cache_single_instance = False - mock_handler().cache_thread_safe = True - mock_handler().cache_usage_lock = True - - def get_columns_f(table_name): - type = "varchar" - cols = [] - for col, typ in tables[table_name].dtypes.items(): - if pd.api.types.is_integer_dtype(typ): - type = "integer" - elif pd.api.types.is_float_dtype(typ): - type = "float" - elif pd.api.types.is_datetime64_dtype(typ): - type = "datetime" - cols.append({"Field": col, "Type": type}) - return handler_response(pd.DataFrame(cols)) - - mock_handler().get_columns.side_effect = get_columns_f - - # use duckdb to execute query for integrations - def native_query_f(query): - con = duckdb.connect(database=":memory:") - - if schema is not None: - con.execute(f"CREATE SCHEMA {schema}") - - for table_name, df in tables.items(): - # it is not possible to insert/delete from a dataframe itself, but possible if create table from it - con.register(f"{table_name}_df", df) - if schema is None: - con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM {table_name}_df;") - else: - con.execute(f"CREATE TABLE {schema}.{table_name} AS SELECT * FROM {table_name}_df;") - - try: - con.execute(query) - columns = [c[0] for c in con.description] - data = con.fetchall() - # region for insert/update/delete duckdb returns rowcount as 'Count' value in result, rather than using the - # cursor.rowcount attr. - match (columns, data): - case ["Count"], [(affected_rows,)]: - result_df = pd.DataFrame() - case _: - affected_rows = None - result_df = pd.DataFrame(data, columns=columns) - result_df = result_df.replace({np.nan: None}) - # endregion - except Exception: - # this might be wrong. - result_df = pd.DataFrame() - affected_rows = None - - for table in tables.keys(): - con.unregister(table) - - con.close() - return handler_response(result_df, affected_rows=affected_rows) - - def query_f(query): - renderer = SqlalchemyRender("postgres") - query_str = renderer.get_string(query, with_failback=True) - return native_query_f(query_str) - - mock_handler().native_query.side_effect = native_query_f - - mock_handler().query.side_effect = query_f - - def set_project(self, project): - r = self.db.Project.query.filter_by(name=project["name"]).first() - if r is not None: - self.db.session.delete(r) - - r = self.db.Project( - id=1, - name=project["name"], - company_id=DEFAULT_COMPANY_ID, - user_id=DEFAULT_USER_ID, - ) - self.db.session.add(r) - self.db.session.commit() - - -class BaseExecutorDummyML(BaseExecutorTest): - """ - Set up executor: mock data handler - """ - - def setup_method(self): - super().setup_method(import_dummy_ml=True) - - def run_sql(self, sql, throw_error=True, database="mindsdb"): - self.command_executor.session.database = database - ret = self.command_executor.execute_command(parse_sql(sql)) - if throw_error: - assert ret.error_code is None - if ret.data is not None: - return ret.data.to_df() - - def get_models(self): - models = {} - for p in self.db.Predictor.query.all(): - models[p.id] = p - return models - - -class BaseExecutorDummyLLM(BaseExecutorTest): - """ - Set up executor: mock LLM handler - """ - - def setup_method(self): - super().setup_method() - self.set_executor(import_dummy_llm=True) - - -class BaseExecutorMockPredictor(BaseExecutorTest): - """ - Set up executor: mock data handler and LW handler - """ - - def setup_method(self): - super().setup_method() - self.set_executor(mock_predict=True, mock_model_controller=True, import_dummy_ml=True) - - def set_predictor(self, predictor): - # fill model_interface mock with predictor data for test case - - # clear calls - self.mock_model_controller.reset_mock() - self.mock_predict.reset_mock() - self.mock_create.reset_mock() - - # remove previous predictor record - r = self.db.Predictor.query.filter_by(name=predictor["name"]).first() - if r is not None: - self.db.session.delete(r) - - if "problem_definition" not in predictor: - predictor["problem_definition"] = {"timeseries_settings": {"is_timeseries": False}} - - # add predictor to table - r = self.db.Predictor( - name=predictor["name"], - data={}, - dtype_dict=predictor["dtype_dict"], - learn_args=predictor["problem_definition"], - to_predict=predictor["predict"], - integration_id=self.dummy_ml_integration_id, - project_id=1, - status="complete", - company_id=DEFAULT_COMPANY_ID, - user_id=DEFAULT_USER_ID, - ) - self.db.session.add(r) - self.db.session.commit() - - def predict_f(_model_name, df, pred_format="dict", *args, **kargs): - # df is mutable and may change after 'predict' call. - # This dirty hack is to save original df. - df._predict_df = df[:] - - explain_arr = [] - data = df.to_dict("records") - - predicted_value = predictor["predicted_value"] - target = predictor["predict"] - - meta = { - # 'select_data_query': None, 'when_data': None, - "original": None, - "confidence": 0.8, - "anomaly": None, - } - - data = copy.deepcopy(data) - for row in data: - # row = row.copy() - exp_row = { - "predicted_value": predictor["predicted_value"], - "confidence": 0.9999, - "anomaly": None, - "truth": None, - } - explain_arr.append({predictor["predict"]: exp_row}) - - row[target] = predicted_value - # dict_arr.append({predictor['predict']: row}) - - for k, v in meta.items(): - row[f"{target}_{k}"] = v - row[f"{target}_explain"] = str(exp_row) - - if pred_format == "explain": - return explain_arr - return pd.DataFrame(data) - - predictor_record = { - "version": None, - "is_active": None, - "status": "complete", - "current_phase": None, - "accuracy": 0.9992752583404642, - "data_source": None, - "update": "available", - "data_source_name": None, - "mindsdb_version": "22.3.5.0", - "error": None, - "train_end_at": None, - "updated_at": dt.datetime(2022, 5, 12, 16, 40, 26), - "created_at": dt.datetime(2022, 4, 4, 14, 48, 39), - } - predictor_record.update(predictor) - - def get_model_data_f(name, *args): - if name != predictor["name"]: - raise Exception(f"Model does not exists: {name}") - return predictor_record - - # inject predictor info to model interface - self.mock_predict.side_effect = predict_f - self.mock_model_controller.get_models.side_effect = lambda: [predictor_record] - self.mock_model_controller.get_model_data.side_effect = get_model_data_f - - def execute(self, sql): - ret = self.command_executor.execute_command(parse_sql(sql)) - if ret.error_code is not None: - raise Exception() - return ret diff --git a/tests/unit/handlers/base_handler_test.py b/tests/unit/handlers/base_handler_test.py deleted file mode 100644 index be54f494402..00000000000 --- a/tests/unit/handlers/base_handler_test.py +++ /dev/null @@ -1,287 +0,0 @@ -from abc import ABC, abstractmethod -from unittest.mock import MagicMock, Mock - -from mindsdb.integrations.libs.response import ( - DataHandlerResponse as Response, - HandlerStatusResponse as StatusResponse, -) - - -class MockCursorContextManager(Mock): - """ - A mock class that simulates a cursor context manager for database clients. - This class is used in the `BaseDatabaseHandlerTest` class to simulate the cursor object returned by the database client. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.description = kwargs.get("description", [["a"]]) - self.data = kwargs.get("data", [[1]]) - - def __enter__(self): - return self - - def __exit__(self, *args): - pass - - def fetchall(self): - return self.data - - -class BaseHandlerTestSetup(ABC): - """ - The base class that provides setup and teardown methods for tests. - It is recommended to use a 'base' subclass of `BaseHandlerTest` as a base class for testing handlers. - This class can be used as a base class to only set up the test environment for testing handlers. - """ - - @property - @abstractmethod - def dummy_connection_data(self): - """ - A dictionary containing dummy connection data. - This attribute should be overridden in subclasses to provide the specific connection data. - """ - pass - - def setUp(self): - """ - Sets up the test environment by creating instances of the patcher and handler. - """ - self.patcher = self.create_patcher() - self.mock_connect = self.patcher.start() - self.handler = self.create_handler() - - def tearDown(self): - """ - Tears down the test environment by stopping the patcher. - """ - self.patcher.stop() - - @abstractmethod - def create_patcher(self): - """ - Create and return a unittest.mock.patch instance for the package used to implement the connection. - This method should be overridden in subclasses to provide the specific patch instance. - """ - pass - - @abstractmethod - def create_handler(self): - """ - Create and return a handler instance. - This method should be overridden in subclasses to provide the specific handler. - """ - pass - - -class BaseHandlerTest(BaseHandlerTestSetup): - """ - The Base class for testing handlers. This class provides methods to test the `connect` and `check_connection` methods of a handler. - It is recommended to use a 'base' subclass of this class as a base class for testing handlers. - This class can be used as a base class for testing only the connection-related methods of a handler. - """ - - @property - @abstractmethod - def err_to_raise_on_connect_failure(self): - """ - An exception to raise when the connection fails. This is the exception that is raised in the `connect` and `check_connection` methods when the connection fails. - This attribute should be overridden in subclasses to provide the specific exception. - """ - pass - - def test_connect_success(self): - """ - Tests if the `connect` method handles a successful connection and sets `is_connected` to True. - """ - self.mock_connect.return_value = MagicMock() - connection = self.handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once() - - def test_connect_failure(self): - """ - Tests if the `connect` method handles a failed connection and sets `is_connected` to False. - """ - self.mock_connect.side_effect = self.err_to_raise_on_connect_failure - - with self.assertRaises(type(self.err_to_raise_on_connect_failure)): - self.handler.connect() - self.assertFalse(self.handler.is_connected) - - def test_check_connection_success(self): - """ - Tests if the `check_connection` method handles a successful connection check and returns a StatusResponse object that accurately reflects the connection status. - """ - self.mock_connect.return_value = MagicMock() - response = self.handler.check_connection() - - assert isinstance(response, StatusResponse) - self.assertTrue(response.success) - self.assertFalse(response.error_message) - - def test_check_connection_failure(self): - """ - Tests if the `check_connection` method handles a failed connection check and returns a StatusResponse object that accurately reflects the connection status. - """ - self.mock_connect.side_effect = self.err_to_raise_on_connect_failure - response = self.handler.check_connection() - - assert isinstance(response, StatusResponse) - self.assertFalse(response.success) - self.assertTrue(response.error_message) - - -class BaseDatabaseHandlerTest(BaseHandlerTest): - """ - The Base class for testing database handlers. This class provides methods to test the `native_query`, `get_tables` and `get_columns` methods of a handler. - This class should be used as a base class for testing database handlers which have an implementation that uses a typical database client (with a cursor) like psycopg2, pymssql, etc. - """ - - @property - def mock_table(self): - """ - A string containing the name of a mock table. This attribute should be used as the table name when constructing SQL queries. - It should be used in the `get_columns_query` attribute. - """ - return "mock_table" - - @property - @abstractmethod - def get_tables_query(self): - """ - A string containing the SQL query to get the tables of a database. This is the query that is executed in the `get_tables` method. - This attribute should be overridden in subclasses to provide the specific query. - """ - pass - - @property - @abstractmethod - def get_columns_query(self): - """ - A string containing the SQL query to get the columns of a table. This is the query that is executed in the `get_columns` method. - This attribute should be overridden in subclasses to provide the specific query. - """ - pass - - def test_get_columns(self): - """ - Tests if the `get_tables` method calls `native_query` with the correct SQL query. - """ - self.handler.native_query = MagicMock() - self.handler.get_columns(self.mock_table) - - self.handler.native_query.assert_called_once() - assert self.handler.native_query.call_args_list[0][0][0] == self.get_columns_query - - def test_get_tables(self): - """ - Tests if the `get_columns` method constructs the correct SQL query and if it calls `native_query` with that query. - """ - self.handler.native_query = MagicMock() - self.handler.get_tables() - - self.handler.native_query.assert_called_once_with(self.get_tables_query) - - -class BaseAPIHandlerTest(BaseHandlerTest): - """ - The Base class for testing API handlers. This class provides methods to test the `get_tables` and `get_columns` methods of a handler. - The 'native_query' should be tested in the individual subclasses because not all API handlers support it. - """ - - @property - @abstractmethod - def registered_tables(self): - """ - A list of tables that are registered to the handler. - This attribute should be overridden in subclasses to provide the specific list of tables. - """ - pass - - def test_get_columns(self): - """ - Tests if the `get_columns` method returns the list of columns of a table. - """ - response = self.handler.get_columns(self.registered_tables[0]) - - assert isinstance(response, Response) - assert response.data_frame.columns.tolist() == ["Field", "Type"] - - def test_get_tables(self): - """ - Tests if the `get_tables` method returns the list of registered tables. - """ - response = self.handler.get_tables() - - assert isinstance(response, Response) - assert all(col in response.data_frame.columns.tolist() for col in ["table_name", "table_type"]) - assert response.data_frame["table_type"].unique().tolist() == ["BASE TABLE"] - assert response.data_frame["table_name"].tolist() == self.registered_tables - - -class BaseAPIChatHandlerTest(BaseAPIHandlerTest): - """ - The Base class for testing chat handlers. This class provides methods to test `get_chat_config` and `get_my_user_name` methods of a handler. - If a `subscribe` method is implemented, it should be tested in the individual subclass. - """ - - def test_get_chat_config(self): - """ - Tests if the `get_chat_config` method returns a chat configuration in the form of a dictionary. - The dictionary should contain the keys `polling` and optionally, either `chat_table` or `tables`. - If either `chat_table` or `tables` is present, they should contain the keys `name`, `chat_id_col`, `username_col`, `text_col`, and `time_col`. - """ - response = self.handler.get_chat_config() - - assert isinstance(response, dict) - assert ( - "polling" in response - and isinstance(response["polling"], dict) - and "type" in response["polling"] - and response["polling"]["type"] in ["realtime", "message_count", "webhook"] - ) - - required_keys = ["name", "chat_id_col", "username_col", "text_col", "time_col"] - if "chat_table" in response: - assert isinstance(response["chat_table"], dict) - assert all(key in list(response["chat_table"].keys()) for key in required_keys) - - if "tables" in response: - assert isinstance(response["tables"], list) - assert all(isinstance(table, dict) for table in response["tables"]) - assert all( - all(key in list(table["chat_table"].keys()) for key in required_keys) for table in response["tables"] - ) - - @abstractmethod - def test_get_my_user_name(self): - """ - Tests if the `get_my_user_name` method returns the name of the user. - This should be overridden in subclasses to provide the specific test. - """ - pass - - -class BaseAPIResourceTestSetup(BaseHandlerTestSetup): - """ - The base class that provides setup and teardown methods for testing implementations of the `APIResource` class. - """ - - def setUp(self): - """ - Sets up the test environment by creating an instance of the resource along with the patcher and handler from the parent class. - """ - super().setUp() - self.resource = self.create_resource() - - @abstractmethod - def create_resource(self): - """ - Create and return an instance of the resource. - This method should be overridden in subclasses to provide the specific resource. - """ - pass diff --git a/tests/unit/handlers/community_handlers/test_access_handler.py b/tests/unit/handlers/community_handlers/test_access_handler.py deleted file mode 100644 index b360d3ef602..00000000000 --- a/tests/unit/handlers/community_handlers/test_access_handler.py +++ /dev/null @@ -1,486 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch -import pandas as pd -import sys - -import pytest - -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, RESPONSE_TYPE - -# Mock pyodbc and sqlalchemy_access before importing the handler -# This is necessary because the handler imports these at module level -if "pyodbc" not in sys.modules: - sys.modules["pyodbc"] = MagicMock() -if "sqlalchemy_access" not in sys.modules: - sys.modules["sqlalchemy_access"] = MagicMock() - sys.modules["sqlalchemy_access.base"] = MagicMock() - -try: - from mindsdb.integrations.handlers.access_handler.access_handler import AccessHandler -except ImportError: - pytestmark = pytest.mark.skip("access_handler not installed (community handler)") - - -class BaseAccessHandlerTest(unittest.TestCase): - """Base test class with common setup and helper methods.""" - - # Test constants - TEST_DB_PATH = "C:\\Users\\test\\Documents\\test_db.accdb" - TEST_HANDLER_NAME = "test_access_handler" - - def setUp(self): - """Set up test fixtures.""" - self.connection_data = {"db_file": self.TEST_DB_PATH} - self.handler = AccessHandler(self.TEST_HANDLER_NAME, self.connection_data) - - def tearDown(self): - """Clean up after tests.""" - if hasattr(self.handler, "is_connected") and self.handler.is_connected: - self.handler.disconnect() - - @staticmethod - def create_mock_connection_with_cursor(): - """Create a mock connection with a cursor context manager.""" - mock_connection = MagicMock() - mock_cursor = MagicMock() - mock_connection.cursor.return_value.__enter__.return_value = mock_cursor - return mock_connection, mock_cursor - - def setup_mock_select_query(self, mock_cursor, columns, rows): - """Configure mock cursor for SELECT query results.""" - mock_cursor.description = [(col,) for col in columns] - mock_cursor.fetchall.return_value = rows - - def setup_mock_write_query(self, mock_cursor): - """Configure mock cursor for INSERT/UPDATE/DELETE queries.""" - mock_cursor.fetchall.return_value = None - - def setup_mock_tables(self, mock_cursor, table_names): - """Configure mock cursor for get_tables results.""" - mock_tables = [] - for name in table_names: - mock_table = MagicMock() - mock_table.table_name = name - mock_tables.append(mock_table) - mock_cursor.tables.return_value = mock_tables - - def setup_mock_columns(self, mock_cursor, columns_data): - """Configure mock cursor for get_columns results. - - Args: - mock_cursor: The mock cursor object - columns_data: List of tuples [(column_name, type_name), ...] - """ - mock_columns = [] - for col_name, type_name in columns_data: - mock_col = MagicMock() - mock_col.column_name = col_name - mock_col.type_name = type_name - mock_columns.append(mock_col) - - mock_cursor.columns.return_value = mock_columns - - -class TestAccessHandlerConnection(BaseAccessHandlerTest): - """Test suite for Access Handler connection management.""" - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_connect_success(self, mock_pyodbc, mock_platform): - """Test successful connection to Access database.""" - mock_platform.return_value = "Windows" - mock_connection, _ = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - - result = self.handler.connect() - - mock_pyodbc.connect.assert_called_once_with( - r"Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=" + self.TEST_DB_PATH - ) - - self.assertTrue(self.handler.is_connected) - self.assertEqual(result, mock_connection) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_connect_when_already_connected(self, mock_pyodbc, mock_platform): - """Test that connect returns existing connection when already connected.""" - mock_platform.return_value = "Windows" - mock_connection, _ = self.create_mock_connection_with_cursor() - self.handler.connection = mock_connection - self.handler.is_connected = True - - result = self.handler.connect() - - mock_pyodbc.connect.assert_not_called() - self.assertEqual(result, mock_connection) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_connect_failure(self, mock_pyodbc, mock_platform): - """Test connection failure handling.""" - mock_platform.return_value = "Windows" - error_msg = "Driver not found" - mock_pyodbc.connect.side_effect = Exception(error_msg) - - with self.assertRaises(Exception) as context: - self.handler.connect() - - self.assertIn(error_msg, str(context.exception)) - self.assertFalse(self.handler.is_connected) - - def test_disconnect_when_connected(self): - """Test disconnect when connection exists.""" - mock_connection, _ = self.create_mock_connection_with_cursor() - self.handler.connection = mock_connection - self.handler.is_connected = True - - result = self.handler.disconnect() - - mock_connection.close.assert_called_once() - self.assertFalse(result) - self.assertFalse(self.handler.is_connected) - - def test_disconnect_when_not_connected(self): - """Test disconnect when not connected.""" - self.handler.is_connected = False - - result = self.handler.disconnect() - - self.assertIsNone(result) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_check_connection_success(self, mock_pyodbc, mock_platform): - """Test successful connection check.""" - mock_platform.return_value = "Windows" - mock_connection, _ = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - - result = self.handler.check_connection() - - self.assertIsInstance(result, StatusResponse) - self.assertTrue(result.success) - self.assertFalse(self.handler.is_connected) # Should disconnect after check - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_check_connection_failure(self, mock_pyodbc, mock_platform): - """Test connection check failure.""" - mock_platform.return_value = "Windows" - error_message = "Cannot open database" - mock_pyodbc.connect.side_effect = Exception(error_message) - - result = self.handler.check_connection() - - self.assertIsInstance(result, StatusResponse) - self.assertFalse(result.success) - self.assertIn(error_message, result.error_message) - - -class TestAccessHandlerQueries(BaseAccessHandlerTest): - """Test suite for Access Handler query execution.""" - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_select_success(self, mock_pyodbc, mock_platform): - """Test successful SELECT query execution.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - - # Setup test data - columns = ["id", "name", "email"] - rows = [(1, "John Doe", "john@example.com"), (2, "Jane Smith", "jane@example.com")] - self.setup_mock_select_query(mock_cursor, columns, rows) - - query = "SELECT * FROM customers" - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(result.data_frame, pd.DataFrame) - self.assertEqual(len(result.data_frame), 2) - self.assertEqual(list(result.data_frame.columns), columns) - mock_cursor.execute.assert_called_once_with(query) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_insert_success(self, mock_pyodbc, mock_platform): - """Test successful INSERT query execution.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - self.setup_mock_write_query(mock_cursor) - - query = "INSERT INTO customers (name, email) VALUES ('Test User', 'test@example.com')" - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.OK) - mock_cursor.execute.assert_called_once_with(query) - mock_connection.commit.assert_called_once() - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_update_success(self, mock_pyodbc, mock_platform): - """Test successful UPDATE query execution.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - self.setup_mock_write_query(mock_cursor) - - query = "UPDATE customers SET status = 'active' WHERE id = 1" - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.OK) - mock_cursor.execute.assert_called_once_with(query) - mock_connection.commit.assert_called_once() - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_delete_success(self, mock_pyodbc, mock_platform): - """Test successful DELETE query execution.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - self.setup_mock_write_query(mock_cursor) - - query = "DELETE FROM customers WHERE id = 1" - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.OK) - mock_cursor.execute.assert_called_once_with(query) - mock_connection.commit.assert_called_once() - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_empty_result(self, mock_pyodbc, mock_platform): - """Test query with empty result set.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - mock_cursor.fetchall.return_value = [] - - query = "SELECT * FROM customers WHERE id = 999" - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.OK) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_failure(self, mock_pyodbc, mock_platform): - """Test query execution failure.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - - error_message = "Syntax error in query" - mock_cursor.execute.side_effect = Exception(error_message) - - query = "SELECT * FROM invalid_table" - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.ERROR) - self.assertIn(error_message, result.error_message) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_disconnects_when_needed(self, mock_pyodbc, mock_platform): - """Test that native_query disconnects when it opened the connection.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - self.setup_mock_write_query(mock_cursor) - - self.handler.is_connected = False - self.handler.native_query("SELECT 1") - - self.assertFalse(self.handler.is_connected) - - @patch("mindsdb.integrations.handlers.access_handler.access_handler.AccessDialect", MagicMock()) - @patch("mindsdb.integrations.handlers.access_handler.access_handler.SqlalchemyRender") - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_query_with_ast(self, mock_pyodbc, mock_platform, mock_render): - """Test query method with AST input.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - self.setup_mock_write_query(mock_cursor) - - mock_renderer = MagicMock() - mock_render.return_value = mock_renderer - mock_renderer.get_string.return_value = "SELECT * FROM test_table" - - from mindsdb_sql_parser import parse_sql - - ast_query = parse_sql("SELECT * FROM test_table") - - result = self.handler.query(ast_query) - - self.assertEqual(result.type, RESPONSE_TYPE.OK) - mock_renderer.get_string.assert_called_once_with(ast_query, with_failback=True) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_native_query_with_special_characters(self, mock_pyodbc, mock_platform): - """Test query with special characters.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - self.setup_mock_select_query(mock_cursor, ["name"], [("O'Brien",)]) - - query = "SELECT * FROM customers WHERE name = 'O''Brien'" - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - mock_cursor.execute.assert_called_once_with(query) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_multiple_sequential_queries(self, mock_pyodbc, mock_platform): - """Test multiple sequential queries.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - self.setup_mock_write_query(mock_cursor) - - queries = ["INSERT INTO test VALUES (1)", "INSERT INTO test VALUES (2)", "INSERT INTO test VALUES (3)"] - results = [self.handler.native_query(q) for q in queries] - - for result in results: - self.assertEqual(result.type, RESPONSE_TYPE.OK) - - -class TestAccessHandlerMetadata(BaseAccessHandlerTest): - """Test suite for Access Handler metadata operations.""" - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_get_tables_success(self, mock_pyodbc, mock_platform): - """Test successful retrieval of table list.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - - table_names = ["customers", "orders"] - self.setup_mock_tables(mock_cursor, table_names) - - result = self.handler.get_tables() - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(result.data_frame, pd.DataFrame) - self.assertEqual(len(result.data_frame), 2) - for name in table_names: - self.assertIn(name, result.data_frame["table_name"].values) - mock_cursor.tables.assert_called_once_with(tableType="Table") - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_get_tables_empty(self, mock_pyodbc, mock_platform): - """Test get_tables with no tables.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - mock_cursor.tables.return_value = [] - - result = self.handler.get_tables() - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(result.data_frame, pd.DataFrame) - self.assertEqual(len(result.data_frame), 0) - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_get_columns_success(self, mock_pyodbc, mock_platform): - """Test successful retrieval of column list.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - - columns_data = [("id", "INTEGER"), ("name", "VARCHAR"), ("email", "VARCHAR")] - self.setup_mock_columns(mock_cursor, columns_data) - - result = self.handler.get_columns("customers") - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(result.data_frame, pd.DataFrame) - self.assertEqual(len(result.data_frame), 3) - self.assertEqual(list(result.data_frame.columns), ["column_name", "data_type"]) - for col_name, _ in columns_data: - self.assertIn(col_name, result.data_frame["column_name"].values) - mock_cursor.columns.assert_called_once_with(table="customers") - - @patch("platform.system") - @patch("mindsdb.integrations.handlers.access_handler.access_handler.pyodbc") - def test_get_columns_empty(self, mock_pyodbc, mock_platform): - """Test get_columns with no columns.""" - mock_platform.return_value = "Windows" - mock_connection, mock_cursor = self.create_mock_connection_with_cursor() - mock_pyodbc.connect.return_value = mock_connection - mock_cursor.columns.return_value = [] - - result = self.handler.get_columns("empty_table") - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(result.data_frame, pd.DataFrame) - self.assertEqual(len(result.data_frame), 0) - - -class TestAccessHandlerConnectionArgs(BaseAccessHandlerTest): - """Test suite for Access Handler connection arguments validation.""" - - def test_connection_args_structure(self): - """Test that connection_args has the correct structure.""" - from mindsdb.integrations.handlers.access_handler.connection_args import connection_args - - required_fields = ["type", "description", "required", "label"] - self.assertIn("db_file", connection_args) - for field in required_fields: - self.assertIn(field, connection_args["db_file"]) - self.assertTrue(connection_args["db_file"]["required"]) - - def test_connection_args_example_structure(self): - """Test that connection_args_example has the correct structure.""" - from mindsdb.integrations.handlers.access_handler.connection_args import connection_args_example - - self.assertIn("db_file", connection_args_example) - self.assertIsInstance(connection_args_example["db_file"], str) - - def test_handler_initialization(self): - """Test handler initialization with valid connection data.""" - handler = AccessHandler("test_handler", self.connection_data) - - self.assertEqual(handler.name, "test_handler") - self.assertEqual(handler.connection_data, self.connection_data) - self.assertEqual(handler.dialect, "access") - self.assertFalse(handler.is_connected) - self.assertIsNone(handler.connection) - - -class TestAccessHandlerEdgeCases(BaseAccessHandlerTest): - """Test suite for Access Handler edge cases.""" - - def test_handler_del_when_connected(self): - """Test __del__ method when handler is connected.""" - handler = AccessHandler("test_handler", self.connection_data) - mock_connection, _ = self.create_mock_connection_with_cursor() - handler.connection = mock_connection - handler.is_connected = True - - handler.__del__() - - mock_connection.close.assert_called_once() - - def test_handler_del_when_not_connected(self): - """Test __del__ method when handler is not connected.""" - handler = AccessHandler("test_handler", self.connection_data) - handler.is_connected = False - - try: - handler.__del__() - except Exception as e: - self.fail(f"__del__ raised exception: {e}") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_clickhouse.py b/tests/unit/handlers/community_handlers/test_clickhouse.py deleted file mode 100644 index 68ec1d895fd..00000000000 --- a/tests/unit/handlers/community_handlers/test_clickhouse.py +++ /dev/null @@ -1,88 +0,0 @@ -from collections import OrderedDict -import unittest -import pytest -from unittest.mock import patch, MagicMock - -from sqlalchemy.exc import SQLAlchemyError -from mindsdb_sql_parser import parse_sql - -from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager -from mindsdb.integrations.libs.response import TableResponse - -try: - from mindsdb.integrations.handlers.clickhouse_handler.clickhouse_handler import ClickHouseHandler -except ImportError: - pytestmark = pytest.mark.skip("Clickhouse handler not installed") - - -class TestClickHouseHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - host="127.0.0.1", - port=8123, - user="example_user", - password="example_pass", - database="example_db", - protocol="native", - ) - - @property - def err_to_raise_on_connect_failure(self): - return SQLAlchemyError("Connection Failed") - - @property - def get_tables_query(self): - return f"SHOW TABLES FROM {self.dummy_connection_data['database']}" - - @property - def get_columns_query(self): - return f"DESCRIBE {self.mock_table}" - - def create_handler(self): - return ClickHouseHandler("clickhouse", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch( - "mindsdb.integrations.handlers.clickhouse_handler.clickhouse_handler.create_engine", - return_value=MagicMock(), - ) - - def test_initialization(self): - """Test if the handler initializes with correct values and defaults.""" - self.mock_connect.return_value = MagicMock() - self.assertEqual(self.handler.name, "clickhouse") - self.assertEqual(self.handler.dialect, "clickhouse") - self.assertFalse(self.handler.is_connected) - self.assertEqual(self.handler.protocol, "native") - - def test_renderer(self): - sql = "SELECT * FROM ch.table WHERE created_at = (now() - INTERVAL '5' MINUTE);" - rendered_sql = self.handler.renderer.get_string(parse_sql(sql), with_failback=True) - assert rendered_sql == "SELECT * \nFROM ch.\"table\" \nWHERE created_at = now() - INTERVAL '5' MINUTE" - - def test_connect_success(self): - self.mock_connect.return_value = MagicMock() - self.handler.connect() - self.mock_connect.assert_called_once_with( - f"clickhouse+{self.dummy_connection_data['protocol']}://{self.dummy_connection_data['user']}:{self.dummy_connection_data['password']}@{self.dummy_connection_data['host']}:{self.dummy_connection_data['port']}/{self.dummy_connection_data['database']}" - ) - - def test_native_query(self): - """ - Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a Response object. - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - query_str = f"SELECT * FROM {self.mock_table}" - data = self.handler.native_query(query_str) - - assert isinstance(data, TableResponse) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_confluence.py b/tests/unit/handlers/community_handlers/test_confluence.py deleted file mode 100644 index 0a56a9a68a5..00000000000 --- a/tests/unit/handlers/community_handlers/test_confluence.py +++ /dev/null @@ -1,550 +0,0 @@ -from collections import OrderedDict -import unittest -from unittest.mock import MagicMock, call, patch - -import pytest -import pandas as pd - -from base_handler_test import BaseHandlerTestSetup, BaseAPIResourceTestSetup - -try: - from mindsdb.integrations.handlers.confluence_handler.confluence_api_client import ConfluenceAPIClient - from mindsdb.integrations.handlers.confluence_handler.confluence_handler import ConfluenceHandler - from mindsdb.integrations.handlers.confluence_handler.confluence_tables import ( - ConfluenceBlogPostsTable, - ConfluenceDatabasesTable, - ConfluencePagesTable, - ConfluenceSpacesTable, - ConfluenceWhiteboardsTable, - ConfluenceTasksTable, - ) - -except ImportError: - pytestmark = pytest.mark.skip("Confluence handler not installed") - -from mindsdb.integrations.libs.response import TableResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, SortColumn - - -class TestConfluenceHandler(BaseHandlerTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - api_base="https://demo.atlassian.net/", - username="demo@example.com", - password="demo_password", - ) - - def create_handler(self): - return ConfluenceHandler("confluence", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("requests.Session") - - def test_connect(self): - """ - Test if `connect` method successfully establishes a connection and sets `is_connected` flag to True. - The `connect` method for this handler does not check the validity of the connection; it succeeds even with incorrect credentials. - The `check_connection` method handles the connection status. - """ - connection = self.handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - - def test_check_connection_success(self): - """ - Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection. - """ - mock_request = MagicMock() - mock_request.return_value = MagicMock( - status_code=200, - raise_for_status=lambda: None, - json=lambda: dict(results=[], _links=dict(next=None)), - ) - self.mock_connect.return_value = MagicMock(request=mock_request) - - response = self.handler.check_connection() - - self.assertTrue(response.success) - assert isinstance(response, StatusResponse) - self.assertFalse(response.error_message) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/spaces", - params={"description-format": "view", "limit": 1}, - json=None, - ) - - def test_check_connection_failure(self): - """ - Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a failed connection. - """ - mock_request = MagicMock() - mock_request.return_value = MagicMock( - status_code=401, - raise_for_status=lambda: None, - ) - self.mock_connect.return_value = MagicMock(request=mock_request) - response = self.handler.check_connection() - - self.assertFalse(response.success) - assert isinstance(response, StatusResponse) - self.assertTrue(response.error_message) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/spaces", - params={"description-format": "view", "limit": 1}, - json=None, - ) - - def test_get_tables(self): - """ - Test that the `get_tables` method returns a TableResponse with a list of table names. - """ - response = self.handler.get_tables() - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertEqual(response.data_frame.columns.tolist(), ["table_name", "table_type"]) - - def test_get_columns(self): - """ - Test that the `get_columns` method returns a TableResponse with a list of columns for a table. - """ - response = self.handler.get_columns("spaces") - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertEqual(response.data_frame.columns.tolist(), ["Field", "Type"]) - - -class ConfluenceTablesTestSetup(BaseAPIResourceTestSetup): - @property - def dummy_connection_data(self): - return OrderedDict( - api_base="https://demo.atlassian.net/", - username="demo@example.com", - password="demo_password", - ) - - def create_handler(self): - return ConfluenceHandler("confluence", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("requests.Session") - - def setUp(self): - """ - Set up common test fixtures. - """ - super().setUp() - - mock_request = MagicMock() - mock_request.return_value = MagicMock( - status_code=200, - raise_for_status=lambda: None, - json=lambda: dict( - results=[{column: f"mock_{column}" for column in self.resource.get_columns()}], _links=dict(next=None) - ), - ) - self.mock_connect.return_value = MagicMock(request=mock_request) - - -class TestConfluenceSpacesTable(ConfluenceTablesTestSetup, unittest.TestCase): - def create_resource(self): - return ConfluenceSpacesTable(self.handler) - - def test_list_all_returns_results(self): - """ - Test that the `list` with a query equivalent to `SELECT * FROM spaces` returns a list of spaces. - """ - df = self.resource.list(conditions=[]) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/spaces", - params={"description-format": "view"}, - json=None, - ) - - def test_list_with_conditions_returns_results(self): - """ - Test that the `list` method returns a list of spaces with the specified conditions. - """ - mock_id = "mock_id" - mock_key = "mock_key" - mock_type = "mock_type" - mock_status = "mock_status" - df = self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.EQUAL, value=mock_id), - FilterCondition(column="key", op=FilterOperator.EQUAL, value=mock_key), - FilterCondition(column="type", op=FilterOperator.EQUAL, value=mock_type), - FilterCondition(column="status", op=FilterOperator.EQUAL, value=mock_status), - ] - ) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/spaces", - params={ - "description-format": "view", - "ids": [mock_id], - "keys": [mock_key], - "type": mock_type, - "status": mock_status, - }, - json=None, - ) - - def test_list_with_unsupported_operator_raises_error(self): - """ - Test that an unsupported operator on id raises a ValueError. - """ - with self.assertRaises(ValueError): - self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.GREATER_THAN, value="1"), - ] - ) - - -class TestConfluencePagesTable(ConfluenceTablesTestSetup, unittest.TestCase): - def create_resource(self): - return ConfluencePagesTable(self.handler) - - def test_list_all_returns_results(self): - """ - Test that the `list` with a query equivalent to `SELECT * FROM pages` returns a list of pages. - """ - df = self.resource.list(conditions=[]) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/pages", - params={"body-format": "storage"}, - json=None, - ) - - def test_list_with_conditions_returns_results(self): - """ - Test that the `list` method returns a list of pages with the specified conditions. - """ - mock_id = "mock_id" - mock_space_id = "mock_space_id" - mock_status = "mock_status" - mock_title = "mock_title" - df = self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.EQUAL, value=mock_id), - FilterCondition(column="spaceId", op=FilterOperator.EQUAL, value=mock_space_id), - FilterCondition(column="status", op=FilterOperator.EQUAL, value=mock_status), - FilterCondition(column="title", op=FilterOperator.EQUAL, value=mock_title), - ] - ) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/pages", - params={ - "body-format": "storage", - "id": [mock_id], - "space-id": [mock_space_id], - "status": [mock_status], - "title": mock_title, - }, - json=None, - ) - - def test_list_with_sort_created_at_desc(self): - """ - Test that the `list` method applies descending createdAt sort. - """ - sort_column = SortColumn(column="createdAt", ascending=False) - df = self.resource.list(conditions=[], sort=[sort_column]) - - self.assertIsInstance(df, pd.DataFrame) - self.assertTrue(sort_column.applied) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/pages", - params={ - "body-format": "storage", - "sort": "-created-date", - }, - json=None, - ) - - -class TestConfluenceBlogPostsTable(ConfluenceTablesTestSetup, unittest.TestCase): - def create_resource(self): - return ConfluenceBlogPostsTable(self.handler) - - def test_list_all_returns_results(self): - """ - Test that the `list` with a query equivalent to `SELECT * FROM blogposts` returns a list of blog posts. - """ - df = self.resource.list(conditions=[]) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/blogposts", - params={"body-format": "storage"}, - json=None, - ) - - def test_list_with_conditions_returns_results(self): - """ - Test that the `list` method returns a list of blog posts with the specified conditions. - """ - mock_id = "mock_id" - mock_space_id = "mock_space_id" - mock_status = "mock_status" - mock_title = "mock_title" - df = self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.EQUAL, value=mock_id), - FilterCondition(column="spaceId", op=FilterOperator.EQUAL, value=mock_space_id), - FilterCondition(column="status", op=FilterOperator.EQUAL, value=mock_status), - FilterCondition(column="title", op=FilterOperator.EQUAL, value=mock_title), - ] - ) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/blogposts", - params={ - "body-format": "storage", - "id": [mock_id], - "space-id": [mock_space_id], - "status": [mock_status], - "title": mock_title, - }, - json=None, - ) - - -class TestConfluenceDatabasesTable(ConfluenceTablesTestSetup, unittest.TestCase): - def create_resource(self): - return ConfluenceDatabasesTable(self.handler) - - def test_list_with_database_id_returns_results(self): - """ - Test that the `list` method returns a list of databases with the specified database ID. - """ - mock_request = MagicMock() - mock_request.return_value = MagicMock( - status_code=200, - raise_for_status=lambda: None, - json=lambda: {column: f"mock_{column}" for column in self.resource.get_columns()}, - ) - self.mock_connect.return_value = MagicMock(request=mock_request) - - mock_id = "mock_id" - df = self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.EQUAL, value=mock_id), - ] - ) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", f"{self.dummy_connection_data['api_base']}/wiki/api/v2/databases/{mock_id}", params=None, json=None - ) - - def test_list_without_database_id_raises_error(self): - """ - Test that the `list` method raises an error when no database ID is provided. - """ - with self.assertRaises(ValueError): - self.resource.list(conditions=[]) - - -class TestConfluenceWhiteboardsTable(ConfluenceTablesTestSetup, unittest.TestCase): - def create_resource(self): - return ConfluenceWhiteboardsTable(self.handler) - - def test_list_with_whiteboard_id_returns_results(self): - """ - Test that the `list` method returns a list of whiteboards with the specified whiteboard ID. - """ - mock_request = MagicMock() - mock_request.return_value = MagicMock( - status_code=200, - raise_for_status=lambda: None, - json=lambda: {column: f"mock_{column}" for column in self.resource.get_columns()}, - ) - self.mock_connect.return_value = MagicMock(request=mock_request) - - mock_id = "mock_id" - df = self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.EQUAL, value=mock_id), - ] - ) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", f"{self.dummy_connection_data['api_base']}/wiki/api/v2/whiteboards/{mock_id}", params=None, json=None - ) - - def test_list_without_whiteboard_id_raises_error(self): - """ - Test that the `list` method raises an error when no whiteboard ID is provided. - """ - with self.assertRaises(ValueError): - self.resource.list(conditions=[]) - - -class TestConfluenceTasksTable(ConfluenceTablesTestSetup, unittest.TestCase): - def create_resource(self): - return ConfluenceTasksTable(self.handler) - - def test_list_all_returns_results(self): - """ - Test that the `list` with a query equivalent to `SELECT * FROM tasks` returns a list of tasks. - """ - df = self.resource.list(conditions=[]) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/tasks", - params={"body-format": "storage"}, - json=None, - ) - - def test_list_with_conditions_returns_results(self): - """ - Test that the `list` method returns a list of tasks with the specified conditions. - """ - mock_task_ids = ["mock_task_id"] - mock_space_ids = ["mock_space_id"] - mock_page_ids = ["mock_page_id"] - mock_created_by_ids = ["mock_created_by_id"] - mock_assigned_to_ids = ["mock_assigned_to_id"] - mock_completed_by_ids = ["mock_completed_by_id"] - mock_status = "mock_status" - - df = self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.EQUAL, value=mock_task_ids[0]), - FilterCondition(column="spaceId", op=FilterOperator.EQUAL, value=mock_space_ids[0]), - FilterCondition(column="pageId", op=FilterOperator.EQUAL, value=mock_page_ids[0]), - FilterCondition(column="createdBy", op=FilterOperator.EQUAL, value=mock_created_by_ids[0]), - FilterCondition(column="assignedTo", op=FilterOperator.EQUAL, value=mock_assigned_to_ids[0]), - FilterCondition(column="completedBy", op=FilterOperator.EQUAL, value=mock_completed_by_ids[0]), - FilterCondition(column="status", op=FilterOperator.EQUAL, value=mock_status), - ] - ) - - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.columns.tolist(), self.resource.get_columns()) - self.assertEqual(df.shape, (1, len(self.resource.get_columns()))) - - self.mock_connect.return_value.request.assert_called_with( - "GET", - f"{self.dummy_connection_data['api_base']}/wiki/api/v2/tasks", - params={ - "body-format": "storage", - "id": mock_task_ids, - "space-id": mock_space_ids, - "page-id": mock_page_ids, - "created-by": mock_created_by_ids, - "assigned-to": mock_assigned_to_ids, - "completed-by": mock_completed_by_ids, - "status": mock_status, - }, - json=None, - ) - - -class TestConfluenceAPIClient(unittest.TestCase): - def test_paginate_with_cursor(self): - client = ConfluenceAPIClient("https://example.com", "user", "pass") - client._make_request = MagicMock( - side_effect=[ - {"results": [{"id": 1}], "_links": {"next": "https://example.com/next?cursor=abc&foo=1"}}, - {"results": [{"id": 2}], "_links": {"next": None}}, - ] - ) - - results = client._paginate("https://example.com/wiki/api/v2/pages", {"limit": 1}) - - self.assertEqual(results, [{"id": 1}, {"id": 2}]) - self.assertEqual( - client._make_request.call_args_list, - [ - call("GET", "https://example.com/wiki/api/v2/pages", {"limit": 1}), - call("GET", "https://example.com/wiki/api/v2/pages", {"limit": 1, "cursor": "abc"}), - ], - ) - - def test_paginate_with_next_url(self): - client = ConfluenceAPIClient("https://example.com", "user", "pass") - client._make_request = MagicMock( - side_effect=[ - {"results": [{"id": 1}], "_links": {"next": "https://example.com/wiki/api/v2/pages?foo=bar"}}, - {"results": [{"id": 2}], "_links": {"next": None}}, - ] - ) - - results = client._paginate("https://example.com/wiki/api/v2/pages", {"limit": 1}) - - self.assertEqual(results, [{"id": 1}, {"id": 2}]) - self.assertEqual( - client._make_request.call_args_list, - [ - call("GET", "https://example.com/wiki/api/v2/pages", {"limit": 1}), - call("GET", "https://example.com/wiki/api/v2/pages?foo=bar"), - ], - ) - - def test_make_request_raises_for_non_200(self): - client = ConfluenceAPIClient("https://example.com", "user", "pass") - response = MagicMock(status_code=400, text="bad request") - client.session.request = MagicMock(return_value=response) - - with self.assertRaises(Exception): - client._make_request("GET", "https://example.com/wiki/api/v2/pages") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_databricks.py b/tests/unit/handlers/community_handlers/test_databricks.py deleted file mode 100644 index 3b968ada2de..00000000000 --- a/tests/unit/handlers/community_handlers/test_databricks.py +++ /dev/null @@ -1,626 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock -from collections import OrderedDict -from typing import List - -import pytest -import pandas as pd - -try: - from databricks.sql import RequestError, ServerOperationError - from mindsdb_sql_parser import parse_sql - from mindsdb.integrations.handlers.databricks_handler.databricks_handler import ( - DatabricksHandler, - ) - -except ImportError: - pytestmark = pytest.mark.skip("Databricks handler not installed") - -from mindsdb.integrations.libs.response import ( - TableResponse, - ErrorResponse, - OkResponse, - DataHandlerResponse, - RESPONSE_TYPE, - HandlerStatusResponse as StatusResponse, -) - - -class CursorContextManager: - """Mock cursor that supports context manager protocol.""" - - def __init__(self): - self.description = [] - self._results = [] - self.execute = MagicMock() - self.fetchall = MagicMock(return_value=[]) - self.fetchone = MagicMock(return_value=None) - - def __enter__(self): - return self - - def __exit__(self, *args): - pass - - def set_results(self, results: List[tuple], columns: List[str]): - """Set mock query results.""" - self._results = results - self.description = [(col,) for col in columns] - self.fetchall = MagicMock(return_value=results) - self.fetchone = MagicMock(return_value=results[0] if results else None) - - -CONNECT_PATCH_PATH = "mindsdb.integrations.handlers.databricks_handler.databricks_handler.connect" - - -class TestInstallationCheck(unittest.TestCase): - """Test handler installation and information schema.""" - - def test_handler_import(self): - """Verify handler is properly installed and can be imported.""" - from mindsdb.integrations.handlers.databricks_handler import databricks_handler - - self.assertTrue(hasattr(databricks_handler, "DatabricksHandler")) - - def test_handler_name(self): - """Verify handler has correct name attribute.""" - self.assertEqual(DatabricksHandler.name, "databricks") - - def test_connection_args_validation(self): - """Verify required connection args are validated.""" - # Missing required params should raise ValueError - handler = DatabricksHandler("test", connection_data={}) - with self.assertRaises(ValueError) as ctx: - handler.connect() - self.assertIn("server_hostname", str(ctx.exception)) - self.assertIn("http_path", str(ctx.exception)) - self.assertIn("access_token", str(ctx.exception)) - - -class TestDatabricksHandler(unittest.TestCase): - dummy_connection_data = OrderedDict( - server_hostname="adb-1234567890123456.7.azuredatabricks.net", - http_path="sql/protocolv1/o/1234567890123456/1234-567890-test123", - access_token="dapi1234567890ab1cde2f3ab456c7d89efa", - ) - - def setUp(self): - # Patch where connect is used, not where it's defined - self.patcher = patch(CONNECT_PATCH_PATH) - self.mock_connect = self.patcher.start() - self.handler = DatabricksHandler("databricks", connection_data=self.dummy_connection_data) - - def tearDown(self): - self.patcher.stop() - - def test_connect_success(self): - """ - Tests if the `connect` method successfully establishes a connection and sets `is_connected` flag to True. - """ - mock_conn = MagicMock() - self.mock_connect.return_value = mock_conn - - connection = self.handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once() - - def test_connect_failure(self): - """ - Tests if the `connect` method correctly handles a connection failure by raising a databricks.sql.RequestError and sets is_connected to False. - """ - self.mock_connect.side_effect = RequestError("Connection Failed") - - with self.assertRaises(RequestError): - self.handler.connect() - self.assertFalse(self.handler.is_connected) - - def test_check_connection_success(self): - """ - Tests if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status. - """ - mock_conn = MagicMock() - mock_cursor = CursorContextManager() - mock_cursor.set_results([(1,)], ["1"]) - mock_conn.cursor.return_value = mock_cursor - self.mock_connect.return_value = mock_conn - - response = self.handler.check_connection() - - self.assertTrue(response.success) - self.assertIsInstance(response, StatusResponse) - self.assertFalse(response.error_message) - - def test_check_connection_failure(self): - """ - Tests if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status. - """ - self.mock_connect.side_effect = RequestError("Connection Failed") - - response = self.handler.check_connection() - - self.assertFalse(response.success) - self.assertIsInstance(response, StatusResponse) - self.assertTrue(response.error_message) - - -class TestTableOperations(unittest.TestCase): - """Test table operations (DDL & DML).""" - - dummy_connection_data = OrderedDict( - server_hostname="test.azuredatabricks.net", - http_path="sql/test", - access_token="test_token", - schema="default", - ) - - def setUp(self): - self.patcher = patch(CONNECT_PATCH_PATH) - self.mock_connect = self.patcher.start() - self.mock_conn = MagicMock() - self.mock_cursor = CursorContextManager() - self.mock_conn.cursor.return_value = self.mock_cursor - self.mock_connect.return_value = self.mock_conn - self.handler = DatabricksHandler("databricks", connection_data=self.dummy_connection_data) - - def tearDown(self): - self.patcher.stop() - - def test_native_query(self): - """ - Tests the `native_query` method to ensure it executes a SQL query using a mock cursor and returns a TableResponse object. - """ - self.mock_cursor.set_results([], []) - - query_str = "SELECT * FROM table" - data = self.handler.native_query(query_str) - - self.mock_cursor.execute.assert_called_once_with(query_str) - self.assertIsInstance(data, DataHandlerResponse) - self.assertNotIsInstance(data, ErrorResponse) - - def test_native_query_empty_select_returns_table(self): - self.mock_cursor.set_results([], ["id", "name"]) - - response = self.handler.native_query("SELECT id, name FROM table WHERE 1 = 0") - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertEqual(list(response.data_frame.columns), ["id", "name"]) - self.assertEqual(len(response.data_frame), 0) - - def test_get_tables(self): - """ - Tests if the `get_tables` method to confirm it correctly calls `native_query` with the appropriate SQL commands. - """ - self.handler.native_query = MagicMock() - - self.handler.get_tables() - expected_query = """ - SELECT - table_schema, - table_name, - table_type - FROM - information_schema.tables - WHERE - table_schema != 'information_schema' - and table_schema = current_schema() - """ - self.handler.native_query.assert_called_once_with(expected_query) - - def test_get_tables_returns_non_table_response_without_transform(self): - expected = ErrorResponse(error_message="boom") - self.handler.native_query = MagicMock(return_value=expected) - - result = self.handler.get_tables() - - self.assertIs(result, expected) - - def test_get_columns(self): - """ - Tests if the `get_columns` method correctly constructs the SQL query and if it calls `native_query` with the correct query. - """ - self.handler.native_query = MagicMock() - - table_name = "mock_table" - self.handler.get_columns(table_name) - - expected_query = f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - null as CHARACTER_SET_NAME, - null as COLLATION_NAME - FROM - information_schema.columns - WHERE - table_name = '{table_name}' - AND - table_schema = current_schema() - """ - - self.handler.native_query.assert_called_once_with(expected_query) - - def test_native_query_server_error(self): - """Test query execution with server error.""" - self.mock_cursor.execute = MagicMock(side_effect=ServerOperationError("Server error")) - - result = self.handler.native_query("SELECT * FROM test_table") - - self.assertIsInstance(result, ErrorResponse) - self.assertIn("Server error", result.error_message) - - def test_get_tables_all_schemas(self): - """Test get_tables with all=True.""" - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame([{"table_name": "t1"}]))) - - self.handler.get_tables(all=True) - - query = self.handler.native_query.call_args[0][0] - self.assertNotIn("table_schema = current_schema()", query) - - def test_get_columns_with_schema(self): - """Test get_columns with explicit schema.""" - mock_df = pd.DataFrame( - [ - { - "COLUMN_NAME": "id", - "DATA_TYPE": "INT", - "ORDINAL_POSITION": 1, - "COLUMN_DEFAULT": None, - "IS_NULLABLE": "NO", - "CHARACTER_MAXIMUM_LENGTH": None, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": 10, - "NUMERIC_SCALE": 0, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": None, - "COLLATION_NAME": None, - } - ] - ) - - self.handler.native_query = MagicMock(return_value=TableResponse(data=mock_df)) - - self.handler.get_columns("test_table", schema_name="my_schema") - - query = self.handler.native_query.call_args[0][0] - self.assertIn("'my_schema'", query) - - -class TestAdvancedQueries(unittest.TestCase): - dummy_connection_data = OrderedDict( - server_hostname="test.azuredatabricks.net", - http_path="sql/test", - access_token="test_token", - schema="default", - ) - - def setUp(self): - self.patcher = patch(CONNECT_PATCH_PATH) - self.mock_connect = self.patcher.start() - self.mock_conn = MagicMock() - self.mock_cursor = CursorContextManager() - self.mock_conn.cursor.return_value = self.mock_cursor - self.mock_connect.return_value = self.mock_conn - self.handler = DatabricksHandler("databricks", connection_data=self.dummy_connection_data) - - def tearDown(self): - self.patcher.stop() - - def test_aggregation_count(self): - """Test COUNT(*) aggregation.""" - self.mock_cursor.set_results([(100,)], ["count"]) - - result = self.handler.native_query("SELECT COUNT(*) as count FROM orders") - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(result.data_frame.iloc[0]["count"], 100) - - def test_aggregation_sum_group_by(self): - """Test SUM with GROUP BY.""" - self.mock_cursor.set_results( - [("Electronics", 50000.0), ("Clothing", 25000.0)], - ["category", "total_sales"], - ) - - result = self.handler.native_query("SELECT category, SUM(amount) as total_sales FROM sales GROUP BY category") - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(result.data_frame), 2) - - def test_cte_query(self): - """Test SELECT with CTE.""" - self.mock_cursor.set_results([(1, "Product A", 100)], ["id", "name", "quantity"]) - - query = """ - WITH cte AS ( - SELECT * FROM products WHERE quantity > 50 - ) - SELECT * FROM cte WHERE id = 1 - """ - - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.mock_cursor.execute.assert_called() - - def test_join_query(self): - """Test JOIN query.""" - self.mock_cursor.set_results([(1, "Order 1", "Customer A")], ["order_id", "order_name", "customer_name"]) - - query = """ - SELECT o.id as order_id, o.name as order_name, c.name as customer_name - FROM orders o - JOIN customers c ON o.customer_id = c.id - """ - - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - - -class TestDateTimeFunctions(unittest.TestCase): - """Test date/time functions and INTERVAL transformations.""" - - dummy_connection_data = OrderedDict( - server_hostname="test.azuredatabricks.net", - http_path="sql/test", - access_token="test_token", - schema="default", - ) - - def setUp(self): - self.patcher = patch(CONNECT_PATCH_PATH) - self.mock_connect = self.patcher.start() - self.mock_conn = MagicMock() - self.mock_cursor = CursorContextManager() - self.mock_conn.cursor.return_value = self.mock_cursor - self.mock_connect.return_value = self.mock_conn - self.handler = DatabricksHandler("databricks", connection_data=self.dummy_connection_data) - - def tearDown(self): - self.patcher.stop() - - def test_current_timestamp(self): - """Test CURRENT_TIMESTAMP function.""" - from datetime import datetime - - now = datetime.now() - self.mock_cursor.set_results([(now,)], ["current_timestamp"]) - - result = self.handler.native_query("SELECT CURRENT_TIMESTAMP as current_timestamp") - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(result.data_frame.iloc[0]["current_timestamp"], now) - - def test_date_add_interval_days_native_query(self): - """Test DATE_ADD with INTERVAL days via native_query.""" - from datetime import datetime, timedelta - - base_date = datetime(2023, 1, 1) - expected_date = base_date + timedelta(days=30) - self.mock_cursor.set_results([(expected_date,)], ["due_date"]) - - query = """ - SELECT DATE_ADD(o_orderdate, 30) as due_date - FROM orders - LIMIT 5 - """ - - result = self.handler.native_query(query) - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertEqual(result.data_frame.iloc[0]["due_date"], expected_date) - - def test_query_transforms_date_add_day_interval(self): - """Test DATE_ADD with INTERVAL DAY is transformed to integer argument.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '30' DAY) AS due_date FROM orders LIMIT 1") - # breakpoint() - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("date_add(o_orderdate, 30)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_days_plural(self): - """Test DATE_ADD with INTERVAL DAYS (plural) is transformed correctly.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL 7 DAYS) AS due_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("date_add(o_orderdate, 7)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_day_interval(self): - """Test DATE_SUB with INTERVAL DAY is transformed to integer argument.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '5' DAY) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("date_sub(o_orderdate, 5)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_week_interval(self): - """Test DATE_ADD with INTERVAL WEEK is converted to days.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' WEEK) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("date_add(o_orderdate, 14)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_week_interval(self): - """Test DATE_SUB with INTERVAL WEEK is converted to days.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '2' WEEK) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("date_sub(o_orderdate, 14)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_month_interval(self): - """Test DATE_ADD with INTERVAL MONTH uses ADD_MONTHS function.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' MONTH) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("add_months(o_orderdate, 2)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_month_interval(self): - """Test DATE_SUB with INTERVAL MONTH uses ADD_MONTHS with negative value.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '3' MONTH) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("add_months(o_orderdate, -3)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_year_interval(self): - """Test DATE_ADD with INTERVAL YEAR uses ADD_MONTHS with 12x multiplier.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '1' YEAR) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("add_months(o_orderdate, 12)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_year_interval(self): - """Test DATE_SUB with INTERVAL YEAR uses ADD_MONTHS with negative 12x value.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '2' YEAR) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("add_months(o_orderdate, -24)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_hour_interval(self): - """Test DATE_ADD with INTERVAL HOUR uses TIMESTAMPADD function.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '6' HOUR) AS future_time FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("timestampadd(hour, 6, o_orderdate)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_hour_interval(self): - """Test DATE_SUB with INTERVAL HOUR uses TIMESTAMPADD with negative value.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '3' HOUR) AS past_time FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("timestampadd(hour, -3, o_orderdate)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_minute_interval(self): - """Test DATE_ADD with INTERVAL MINUTE uses TIMESTAMPADD function.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '30' MINUTE) AS future_time FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("timestampadd(minute, 30, o_orderdate)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_second_interval(self): - """Test DATE_ADD with INTERVAL SECOND uses TIMESTAMPADD function.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '45' SECOND) AS future_time FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("timestampadd(second, 45, o_orderdate)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_without_interval_unchanged(self): - """Test that queries without INTERVAL pass through unchanged.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, 10) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("date_add(o_orderdate, 10)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_add_quarter_interval(self): - """Test DATE_ADD with INTERVAL QUARTER uses ADD_MONTHS with 3x multiplier.""" - query = parse_sql("SELECT DATE_ADD(o_orderdate, INTERVAL '2' QUARTER) AS future_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("add_months(o_orderdate, 6)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_quarter_interval(self): - """Test DATE_SUB with INTERVAL QUARTER uses ADD_MONTHS with negative 3x value.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '1' QUARTER) AS past_date FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("add_months(o_orderdate, -3)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_minute_interval(self): - """Test DATE_SUB with INTERVAL MINUTE uses TIMESTAMPADD with negative value.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '15' MINUTE) AS past_time FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("timestampadd(minute, -15, o_orderdate)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - def test_query_transforms_date_sub_second_interval(self): - """Test DATE_SUB with INTERVAL SECOND uses TIMESTAMPADD with negative value.""" - query = parse_sql("SELECT DATE_SUB(o_orderdate, INTERVAL '30' SECOND) AS past_time FROM orders") - self.handler.native_query = MagicMock(return_value=OkResponse()) - - self.handler.query(query) - - transformed_sql = self.handler.native_query.call_args[0][0].lower() - self.assertIn("timestampadd(second, -30, o_orderdate)", transformed_sql) - self.assertNotIn("interval", transformed_sql) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_dynamodb.py b/tests/unit/handlers/community_handlers/test_dynamodb.py deleted file mode 100644 index 535d11548ab..00000000000 --- a/tests/unit/handlers/community_handlers/test_dynamodb.py +++ /dev/null @@ -1,188 +0,0 @@ -import unittest -from collections import OrderedDict -from botocore.client import ClientError -from unittest.mock import patch, MagicMock, Mock - -import pytest - -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.select.star import Star -from mindsdb_sql_parser.ast.select.identifier import Identifier - -from base_handler_test import BaseHandlerTestSetup -from mindsdb.integrations.libs.response import TableResponse, HandlerStatusResponse as StatusResponse, RESPONSE_TYPE - -try: - from mindsdb.integrations.handlers.dynamodb_handler.dynamodb_handler import DynamoDBHandler - -except ImportError: - pytestmark = pytest.mark.skip("DynamoDB handler not installed") - - -class TestDynamoDBHandler(BaseHandlerTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - aws_access_key_id="AQAXEQK89OX07YS34OP", - aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", - region_name="us-east-2", - ) - - def create_handler(self): - return DynamoDBHandler("dynamodb", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("boto3.client") - - def test_connect_failure_with_missing_connection_data(self): - """ - Test if `connect` method raises ValueError when required connection parameters are missing. - """ - self.handler.connection_data = {} - with self.assertRaises(ValueError): - self.handler.connect() - - def test_connect_success(self): - """ - Test if `connect` method successfully establishes a connection and sets `is_connected` flag to True. - Also, verifies that boto3.client is called exactly once. - The `connect` method for this handler does not check the validity of the connection; it succeeds even with incorrect credentials. - The `check_connection` method handles the connection status. - """ - self.mock_connect.return_value = MagicMock() - connection = self.handler.connect() - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once() - - def test_check_connection_failure_with_incorrect_credentials(self): - """ - Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on failed connection due to incorrect credentials. - """ - self.mock_connect.return_value.list_tables.side_effect = ClientError( - error_response={"Error": {"Code": "AccessDeniedException", "Message": "Access Denied"}}, - operation_name="list_tables", - ) - - response = self.handler.check_connection() - - self.assertFalse(response.success) - assert isinstance(response, StatusResponse) - self.assertTrue(response.error_message) - - def test_check_connection_success(self): - """ - Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection. - """ - self.mock_connect.return_value.list_tables.return_value = {"TableNames": ["table1", "table2"]} - response = self.handler.check_connection() - - self.assertTrue(response.success) - assert isinstance(response, StatusResponse) - self.assertFalse(response.error_message) - - def test_query_select_success(self): - """ - Test if the `query` method returns a TableResponse object with a data frame containing the query result. - `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. - """ - mock_boto3_client = Mock() - mock_boto3_client.execute_statement.return_value = { - "Items": [{"id": {"N": "1"}, "name": {"S": "Alice"}}, {"id": {"N": "2"}, "name": {"S": "Bob"}}] - } - - self.handler.connect = MagicMock(return_value=mock_boto3_client) - query = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("table1"), - ) - response = self.handler.query(query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ["id", "name"]) - self.assertEqual(df["id"].tolist(), [1, 2]) - self.assertEqual(df["name"].tolist(), ["Alice", "Bob"]) - - def test_query_select_failure_with_unsupported_clause(self): - """ - Test if the `query` method raises ValueError on a SELECT query with an unsupported clause. - """ - query = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("table1"), - limit=10, - ) - with self.assertRaises(ValueError): - self.handler.query(query) - - def test_query_insert_failure(self): - """ - Test if the `query` method raises ValueError on an INSERT query. INSERT queries are not supported by this handler at the moment. - """ - mock_boto3_client = Mock() - mock_boto3_client.execute_statement.return_value = {} - - self.handler.connect = MagicMock(return_value=mock_boto3_client) - query = ast.Insert(table=Identifier("table1"), columns=["id", "name"], values=[[1, "Alice"]]) - with self.assertRaises(ValueError): - self.handler.query(query) - - def test_get_tables(self): - """ - Test if the `get_tables` method returns a TableResponse object with a list of tables. - """ - mock_boto3_client = Mock() - mock_boto3_client.list_tables.return_value = {"TableNames": ["table1", "table2"]} - - self.handler.connection = mock_boto3_client - response = self.handler.get_tables() - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ["table_name"]) - self.assertEqual(df["table_name"].tolist(), ["table1", "table2"]) - - def test_get_columns(self): - """ - Test if the `get_columns` method returns a TableResponse object with a list of columns for a given table. - """ - mock_boto3_client = Mock() - mock_boto3_client.describe_table.return_value = { - "Table": { - "KeySchema": [ - {"AttributeName": "id", "KeyType": "HASH"}, - {"AttributeName": "name", "KeyType": "RANGE"}, - ], - "AttributeDefinitions": [ - {"AttributeName": "id", "AttributeType": "N"}, - {"AttributeName": "name", "AttributeType": "S"}, - ], - } - } - - self.handler.connection = mock_boto3_client - response = self.handler.get_columns("table1") - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ["column_name", "data_type"]) - self.assertEqual(df["column_name"].tolist(), ["id", "name"]) - self.assertEqual(df["data_type"].tolist(), ["N", "S"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_jira.py b/tests/unit/handlers/community_handlers/test_jira.py deleted file mode 100644 index dd1e4d8f6a0..00000000000 --- a/tests/unit/handlers/community_handlers/test_jira.py +++ /dev/null @@ -1,397 +0,0 @@ -import pytest -import unittest - -from unittest.mock import patch, MagicMock -from requests.exceptions import HTTPError - -import pandas as pd - - -from base_handler_test import BaseHandlerTestSetup -from mindsdb.integrations.libs.response import ( - HandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) - -try: - from mindsdb.integrations.handlers.jira_handler.jira_handler import JiraHandler - from mindsdb.integrations.handlers.jira_handler.jira_tables import ( - JiraAttachmentsTable, - JiraCommentsTable, - JiraIssuesTable, - JiraUsersTable, - JiraProjectsTable, - JiraGroupsTable, - SERVER_COLUMNS, - ) -except ImportError: - pytestmark = pytest.mark.skip("Jira handler not installed") - - -class TestJiraHandler(BaseHandlerTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return { - "jira_url": "https://your-domain.atlassian.net", - "jira_username": "username", - "jira_api_token": "your_api_token", - "is_cloud": False, - } - - @property - def err_to_raise_on_connect_failure(self): - return HTTPError("Failed to connect to Jira") - - def create_handler(self): - return JiraHandler("jira", self.dummy_connection_data) - - def create_patcher(self): - return patch("mindsdb.integrations.handlers.jira_handler.jira_handler.Jira") - - def test_connect_cloud_success(self): - """Ensure cloud connections normalize credentials and reuse Jira constructor correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - connection = self.handler.connect() - - self.assertIs(connection, mock_client) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once_with( - username=self.dummy_connection_data["jira_username"], - password=self.dummy_connection_data["jira_api_token"], - url=self.dummy_connection_data["jira_url"], - cloud=True, - ) - - def test_connect_reuse_existing_connection(self): - """If already connected, connect should reuse the existing client.""" - cached_connection = MagicMock() - self.handler.connection = cached_connection - self.handler.is_connected = True - - connection = self.handler.connect() - - self.assertIs(connection, cached_connection) - self.mock_connect.assert_not_called() - - def test_connect_runtime_error_on_missing_cached_connection(self): - """Marking the handler as connected without a cached client should raise.""" - self.handler.is_connected = True - self.handler.connection = None - - with self.assertRaises(RuntimeError): - self.handler.connect() - - def test_check_connection_http_error(self): - """check_connection should surface HTTP errors from the Jira client.""" - mock_client = MagicMock() - mock_client.myself.side_effect = HTTPError("Unauthorized") - self.mock_connect.return_value = mock_client - - response = self.handler.check_connection() - - assert isinstance(response, StatusResponse) - self.assertFalse(response.success) - self.assertIn("Unauthorized", response.error_message) - self.assertFalse(self.handler.is_connected) - - def test_native_query_http_error(self): - """native_query should return an error response when Jira raises HTTPError.""" - mock_client = MagicMock() - mock_client.jql.side_effect = HTTPError("Bad JQL") - self.mock_connect.return_value = mock_client - - response = self.handler.native_query("project = TEST") - - assert isinstance(response, Response) - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("Bad JQL", response.error_message) - - def test_native_query_returns_empty_dataframe_when_no_issues(self): - """Ensure native_query returns an empty dataframe with expected columns.""" - mock_client = MagicMock() - mock_client.jql.return_value = {} - self.mock_connect.return_value = mock_client - - response = self.handler.native_query("project = TEST") - - assert isinstance(response, Response) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertTrue(response.data_frame.empty) - issues_columns = JiraIssuesTable(self.handler).get_columns() - self.assertListEqual(list(response.data_frame.columns), issues_columns) - - def test_attachments_table_fetches_missing_fields(self): - """Attachments table should refresh issues to retrieve missing attachment fields.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - issue_without_attachments = {"id": "1", "key": "ISSUE-1", "fields": {}} - mock_client.get_all_projects.return_value = [{"id": "100"}] - mock_client.get_all_project_issues.return_value = [issue_without_attachments] - mock_client.get_issue.return_value = { - "fields": {"attachment": [{"id": "att-1", "filename": "log.txt", "size": 10, "mimeType": "text/plain"}]} - } - - attachments_table = JiraAttachmentsTable(self.handler) - result_df = attachments_table.list(limit=1) - - self.assertEqual(len(result_df), 1) - self.assertEqual(result_df.loc[0, "attachment_id"], "att-1") - self.assertEqual(result_df.loc[0, "issue_key"], "ISSUE-1") - self.assertEqual(result_df.loc[0, "filename"], "log.txt") - - def test_issues_table_missing_assignee(self): - """Test that issues without assignee are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_issues = [ - { - "id": "1", - "key": "TEST-1", - "fields": { - "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, - "summary": "Issue with assignee", - "priority": {"name": "High"}, - "creator": {"displayName": "John Doe"}, - "assignee": {"displayName": "Jane Smith"}, - "status": {"name": "In Progress"}, - }, - }, - { - "id": "2", - "key": "TEST-2", - "fields": { - "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, - "summary": "Unassigned issue", - "priority": {"name": "Medium"}, - "creator": {"displayName": "John Doe"}, - "status": {"name": "Open"}, - }, - }, - { - "id": "3", - "key": "TEST-3", - "fields": { - "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, - "summary": "Issue without priority", - "creator": {"displayName": "John Doe"}, - "status": {"name": "Done"}, - }, - }, - ] - - mock_client.get_all_projects.return_value = [{"id": "10001"}] - mock_client.get_all_project_issues.return_value = mock_issues - - issues_table = JiraIssuesTable(self.handler) - result_df = issues_table.list(conditions=[]) - - self.assertEqual(len(result_df), 3) - self.assertIsNotNone(result_df) - - expected_columns = issues_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "assignee"], "Jane Smith") - self.assertTrue(pd.isna(result_df.loc[1, "assignee"])) - self.assertTrue(pd.isna(result_df.loc[2, "assignee"])) - - self.assertEqual(result_df.loc[0, "priority"], "High") - self.assertEqual(result_df.loc[1, "priority"], "Medium") - self.assertTrue(pd.isna(result_df.loc[2, "priority"])) - - def test_users_table_missing_timezone(self): - """Test that users without timeZone field are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_users = [ - { - "accountId": "user1", - "accountType": "atlassian", - "emailAddress": "user1@example.com", - "displayName": "User One", - "active": True, - "timeZone": "America/New_York", - "locale": "en_US", - }, - { - "accountId": "user2", - "accountType": "atlassian", - "emailAddress": "user2@example.com", - "displayName": "User Two", - "active": True, - "locale": "en_US", - }, - { - "accountId": "user3", - "accountType": "atlassian", - "displayName": "User Three", - "active": False, - }, - ] - - mock_client.users_get_all.return_value = mock_users - - users_table = JiraUsersTable(self.handler) - result_df = users_table.list(conditions=[]) - - self.assertEqual(len(result_df), 3) - self.assertIsNotNone(result_df) - - expected_columns = users_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "timeZone"], "America/New_York") - self.assertTrue(pd.isna(result_df.loc[1, "timeZone"])) - self.assertTrue(pd.isna(result_df.loc[2, "timeZone"])) - - self.assertEqual(result_df.loc[0, "emailAddress"], "user1@example.com") - self.assertEqual(result_df.loc[1, "emailAddress"], "user2@example.com") - self.assertTrue(pd.isna(result_df.loc[2, "emailAddress"])) - - def test_projects_table_missing_optional_fields(self): - """Test that projects with missing optional fields are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_projects = [ - { - "id": "10001", - "key": "PROJ1", - "name": "Project One", - "projectTypeKey": "software", - "simplified": True, - "style": "classic", - "isPrivate": False, - "entityId": "entity1", - "uuid": "uuid1", - }, - { - "id": "10002", - "key": "PROJ2", - "name": "Project Two", - }, - ] - - mock_client.get_all_projects.return_value = mock_projects - - projects_table = JiraProjectsTable(self.handler) - result_df = projects_table.list(conditions=[]) - - self.assertEqual(len(result_df), 2) - self.assertIsNotNone(result_df) - - expected_columns = projects_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "projectTypeKey"], "software") - self.assertTrue(pd.isna(result_df.loc[1, "projectTypeKey"])) - - def test_groups_table_missing_fields(self): - """Test that groups with missing fields are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_groups = { - "groups": [ - { - "groupId": "group1", - "name": "Developers", - "html": "Developers", - }, - { - "groupId": "group2", - "name": "Managers", - }, - ] - } - - mock_client.get_groups.return_value = mock_groups - - groups_table = JiraGroupsTable(self.handler) - result_df = groups_table.list(conditions=[]) - - self.assertEqual(len(result_df), 2) - self.assertIsNotNone(result_df) - - expected_columns = groups_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "html"], "Developers") - self.assertTrue(pd.isna(result_df.loc[1, "html"])) - - def test_comments_table_fetches_missing_fields(self): - """Comments table should refresh issues to retrieve missing comment fields.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - issue_without_comments = {"id": "1", "key": "ISSUE-1", "fields": {}} - mock_client.get_all_projects.return_value = [{"id": "100"}] - mock_client.get_all_project_issues.return_value = [issue_without_comments] - mock_client.get_issue.return_value = { - "fields": { - "comment": { - "comments": [ - { - "id": "c-1", - "body": "First comment", - "created": "2024-01-01", - "updated": "2024-01-02", - "author": { - "displayName": "Commenter", - "accountId": "acc-1", - }, - "visibility": { - "type": "role", - "value": "admin", - }, - } - ] - } - } - } - - comments_table = JiraCommentsTable(self.handler) - result_df = comments_table.list(limit=1) - - self.assertEqual(len(result_df), 1) - self.assertEqual(result_df.loc[0, "comment_id"], "c-1") - self.assertEqual(result_df.loc[0, "issue_key"], "ISSUE-1") - self.assertEqual(result_df.loc[0, "body"], "First comment") - self.assertEqual(result_df.loc[0, "author"], "Commenter") - self.assertEqual(result_df.loc[0, "visibility_type"], "role") - self.assertEqual(result_df.loc[0, "visibility_value"], "admin") - - def test_users_table_server_mode_columns(self): - """Users table should switch to server columns when client.cloud is False.""" - mock_client = MagicMock() - mock_client.cloud = False - self.mock_connect.return_value = mock_client - - mock_client.user.return_value = { - "name": "serveruser", - "displayName": "Server User", - "emailAddress": "server@example.com", - } - - users_table = JiraUsersTable(self.handler) - result_df = users_table.list() - - self.assertEqual(len(result_df), 1) - self.assertListEqual(list(result_df.columns), SERVER_COLUMNS) - self.assertEqual(result_df.loc[0, "name"], "serveruser") - self.assertEqual(result_df.loc[0, "displayName"], "Server User") - self.assertEqual(result_df.loc[0, "emailAddress"], "server@example.com") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_jira_handler.py b/tests/unit/handlers/community_handlers/test_jira_handler.py deleted file mode 100644 index 5014a9a97e9..00000000000 --- a/tests/unit/handlers/community_handlers/test_jira_handler.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Unit tests for Jira handler.""" - -from collections import OrderedDict -import pytest -import unittest -from unittest.mock import patch, MagicMock -import pandas as pd - -try: - from mindsdb.integrations.handlers.jira_handler.jira_handler import JiraHandler - from mindsdb.integrations.handlers.jira_handler.jira_tables import ( - JiraIssuesTable, - JiraUsersTable, - JiraProjectsTable, - JiraGroupsTable, - ) -except ImportError: - pytestmark = pytest.mark.skip("Jira handler not installed") - -from base_handler_test import BaseAPIHandlerTest - - -class TestJiraHandler(BaseAPIHandlerTest, unittest.TestCase): - """Test Jira handler following standard test patterns.""" - - @property - def dummy_connection_data(self): - return OrderedDict( - url="https://test.atlassian.net", - username="test@example.com", - api_token="test_token_12345", - ) - - @property - def registered_tables(self): - return ["projects", "issues", "groups", "users"] - - @property - def err_to_raise_on_connect_failure(self): - return Exception("Authentication failed") - - def create_handler(self): - return JiraHandler("test_jira", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mindsdb.integrations.handlers.jira_handler.jira_handler.Jira") - - def test_issues_table_missing_assignee(self): - """Test that issues without assignee are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_issues = [ - { - "id": "1", - "key": "TEST-1", - "fields": { - "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, - "summary": "Issue with assignee", - "priority": {"name": "High"}, - "creator": {"displayName": "John Doe"}, - "assignee": {"displayName": "Jane Smith"}, - "status": {"name": "In Progress"}, - }, - }, - { - "id": "2", - "key": "TEST-2", - "fields": { - "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, - "summary": "Unassigned issue", - "priority": {"name": "Medium"}, - "creator": {"displayName": "John Doe"}, - "status": {"name": "Open"}, - }, - }, - { - "id": "3", - "key": "TEST-3", - "fields": { - "project": {"id": "10001", "key": "TEST", "name": "Test Project"}, - "summary": "Issue without priority", - "creator": {"displayName": "John Doe"}, - "status": {"name": "Done"}, - }, - }, - ] - - mock_client.get_all_projects.return_value = [{"id": "10001"}] - mock_client.get_all_project_issues.return_value = mock_issues - - issues_table = JiraIssuesTable(self.handler) - result_df = issues_table.list(conditions=[]) - - self.assertEqual(len(result_df), 3) - self.assertIsNotNone(result_df) - - expected_columns = issues_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "assignee"], "Jane Smith") - self.assertTrue(pd.isna(result_df.loc[1, "assignee"])) - self.assertTrue(pd.isna(result_df.loc[2, "assignee"])) - - self.assertEqual(result_df.loc[0, "priority"], "High") - self.assertEqual(result_df.loc[1, "priority"], "Medium") - self.assertTrue(pd.isna(result_df.loc[2, "priority"])) - - def test_users_table_missing_timezone(self): - """Test that users without timeZone field are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_users = [ - { - "accountId": "user1", - "accountType": "atlassian", - "emailAddress": "user1@example.com", - "displayName": "User One", - "active": True, - "timeZone": "America/New_York", - "locale": "en_US", - }, - { - "accountId": "user2", - "accountType": "atlassian", - "emailAddress": "user2@example.com", - "displayName": "User Two", - "active": True, - "locale": "en_US", - }, - { - "accountId": "user3", - "accountType": "atlassian", - "displayName": "User Three", - "active": False, - }, - ] - - mock_client.users_get_all.return_value = mock_users - - users_table = JiraUsersTable(self.handler) - result_df = users_table.list(conditions=[]) - - self.assertEqual(len(result_df), 3) - self.assertIsNotNone(result_df) - - expected_columns = users_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "timeZone"], "America/New_York") - self.assertTrue(pd.isna(result_df.loc[1, "timeZone"])) - self.assertTrue(pd.isna(result_df.loc[2, "timeZone"])) - - self.assertEqual(result_df.loc[0, "emailAddress"], "user1@example.com") - self.assertEqual(result_df.loc[1, "emailAddress"], "user2@example.com") - self.assertTrue(pd.isna(result_df.loc[2, "emailAddress"])) - - def test_projects_table_missing_optional_fields(self): - """Test that projects with missing optional fields are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_projects = [ - { - "id": "10001", - "key": "PROJ1", - "name": "Project One", - "projectTypeKey": "software", - "simplified": True, - "style": "classic", - "isPrivate": False, - "entityId": "entity1", - "uuid": "uuid1", - }, - { - "id": "10002", - "key": "PROJ2", - "name": "Project Two", - }, - ] - - mock_client.get_all_projects.return_value = mock_projects - - projects_table = JiraProjectsTable(self.handler) - result_df = projects_table.list(conditions=[]) - - self.assertEqual(len(result_df), 2) - self.assertIsNotNone(result_df) - - expected_columns = projects_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "projectTypeKey"], "software") - self.assertTrue(pd.isna(result_df.loc[1, "projectTypeKey"])) - - def test_groups_table_missing_fields(self): - """Test that groups with missing fields are handled correctly.""" - mock_client = MagicMock() - self.mock_connect.return_value = mock_client - - mock_groups = { - "groups": [ - { - "groupId": "group1", - "name": "Developers", - "html": "Developers", - }, - { - "groupId": "group2", - "name": "Managers", - }, - ] - } - - mock_client.get_groups.return_value = mock_groups - - groups_table = JiraGroupsTable(self.handler) - result_df = groups_table.list(conditions=[]) - - self.assertEqual(len(result_df), 2) - self.assertIsNotNone(result_df) - - expected_columns = groups_table.get_columns() - for col in expected_columns: - self.assertIn(col, result_df.columns) - - self.assertEqual(result_df.loc[0, "html"], "Developers") - self.assertTrue(pd.isna(result_df.loc[1, "html"])) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_mongodb.py b/tests/unit/handlers/community_handlers/test_mongodb.py deleted file mode 100644 index 81232adea01..00000000000 --- a/tests/unit/handlers/community_handlers/test_mongodb.py +++ /dev/null @@ -1,794 +0,0 @@ -import unittest -from collections import OrderedDict -from unittest.mock import patch, MagicMock - -import pytest - -from bson import ObjectId -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast.select.star import Star -import pymongo -from pymongo.errors import InvalidURI, OperationFailure -import pymongo.results - -from base_handler_test import BaseHandlerTestSetup -from mindsdb.integrations.libs.response import ( - TableResponse, - OkResponse, - ErrorResponse, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) - -try: - from mindsdb.integrations.handlers.mongodb_handler.mongodb_handler import MongoDBHandler -except ImportError: - pytestmark = pytest.mark.skip("mongodb_handler not installed (community handler)") - - -class TestMongoDBHandler(BaseHandlerTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict(host="mongodb://localhost:27017", database="sample_mflix") - - def create_handler(self): - return MongoDBHandler("mongodb", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mindsdb.integrations.handlers.mongodb_handler.mongodb_handler.MongoClient") - - def test_connect_success(self): - """ - Test if `connect` method successfully establishes a connection and sets `is_connected` flag to True. - Also, verifies that pymongo.MongoClient is called exactly once. - """ - self.mock_connect.return_value = MagicMock() - connection = self.handler.connect() - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once() - - def test_connect_failure_with_invalid_uri(self): - """ - Test if `connect` method raises InvalidURI exception when an invalid URI is provided. - """ - self.mock_connect.side_effect = InvalidURI - - with self.assertRaises(InvalidURI): - self.handler.connect() - - self.assertFalse(self.handler.is_connected) - - def test_connect_failure_with_incorrect_credentials(self): - """ - Test if `connect` method raises OperationFailure exception when incorrect credentials are provided. - """ - self.mock_connect.side_effect = OperationFailure(error="Authentication failed.") - - with self.assertRaises(OperationFailure): - self.handler.connect() - - self.assertFalse(self.handler.is_connected) - - def test_check_connection_failure_with_non_existent_database(self): - """ - Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on failed connection due to non-existent database. - """ - self.mock_connect.return_value.list_database_names.return_value = ["demo"] - - response = self.handler.check_connection() - - self.assertFalse(response.success) - assert isinstance(response, StatusResponse) - self.assertTrue(response.error_message) - - def test_check_connection_success(self): - """ - Test if the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection. - """ - self.mock_connect.return_value.list_database_names.return_value = ["sample_mflix"] - - response = self.handler.check_connection() - - self.assertTrue(response.success) - assert isinstance(response, StatusResponse) - self.assertFalse(response.error_message) - - def test_query_failure_with_non_existent_collection(self): - """ - Test if the `query` method returns an ErrorResponse object with an error message on failed query due to non-existent collection. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - query = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("theaters"), - ) - - response = self.handler.query(query) - - assert isinstance(response, ErrorResponse) - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertTrue(response.error_message) - - def test_query_failure_with_unsupported_query_type(self): - """ - Test if the `query` method raises NotImplementedError on unsupported query operation. - This exception will be raised in the `to_mongo_query` method of the `MongodbRender` class. - """ - query = ast.Insert( - table=ast.Identifier("table1"), - columns=["id", "name"], - values=[[1, "Alice"]], - ) - - with self.assertRaises(NotImplementedError): - self.handler.query(query) - - def test_query_failure_with_unsupported_operation(self): - """ - Test if the `query` method raises NotImplementedError on unsupported operation. - This exception will be raised in the `handle_where` method of the `MongodbRender` class. - """ - query = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("movies"), - where=ast.BinaryOperation(args=[ast.Identifier("name"), ast.Constant("The Dark Knight")], op="in"), - ) - - with self.assertRaises(NotImplementedError): - self.handler.query(query) - - def test_query_select_success(self): - """ - Test if the `query` method returns a TableResponse object with a data frame containing the query result. - `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].aggregate.return_value = [ - { - "_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"), - "name": "The Dark Knight", - "plot": "The Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan.", - "runtime": 152, - } - ] - - query = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("movies"), - ) - - response = self.handler.query(query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["_id", "name", "plot", "runtime"]) - self.assertEqual(df["name"].tolist(), ["The Dark Knight"]) - - def test_query_update_success(self): - """ - Test if the `query` method returns an OkResponse object with a 'OK' status. - `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]][ - "movies" - ].update_many.return_value = pymongo.results.UpdateResult( - acknowledged=True, raw_result={"n": 1, "nModified": 1} - ) - - query = ast.Update( - table=ast.Identifier("movies"), - update_columns={ - "name": ast.Constant("The Dark Knight"), - "plot": ast.Constant( - "The Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan." - ), - "runtime": ast.Constant(152), - }, - where=ast.BinaryOperation(args=[ast.Identifier("name"), ast.Constant("The Dark Knight")], op="="), - ) - - response = self.handler.query(query) - - assert isinstance(response, OkResponse) - self.assertEqual(response.type, RESPONSE_TYPE.OK) - - def test_get_tables(self): - """ - Tests the `get_tables` method returns a TableResponse object with a list of tables (collections) in the database. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "theaters", - "movies", - "comments", - "sessions", - "users", - "embedded_movies", - ] - - response = self.handler.get_tables() - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 6) - self.assertEqual(df.columns.tolist(), ["table_name"]) - self.assertEqual( - df["table_name"].tolist(), - ["theaters", "movies", "comments", "sessions", "users", "embedded_movies"], - ) - - def test_get_columns(self): - """ - Tests the `get_columns` method returns a TableResponse object with a list of columns (fields) for a given table (collection). - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].find_one.return_value = { - "_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"), - "name": "The Dark Knight", - "plot": "The Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan.", - "runtime": 152, - } - - response = self.handler.get_columns("movies") - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 4) - self.assertEqual(df.columns.tolist(), ["Field", "Type"]) - self.assertEqual(df["Field"].tolist(), ["_id", "name", "plot", "runtime"]) - self.assertEqual(df["Type"].tolist(), ["str", "str", "str", "int"]) - - # use subquery for select - def test_query_select_with_subquery_success(self): - """ - Test if the `query` method returns a response object with a data frame containing the query result for a select with subquery. - e.g., SELECT * FROM (SELECT * FROM theaters); - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies", - "theaters", - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["theaters"].aggregate.return_value = [ - { - "_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"), - "name": "Cinema City", - "location": "Downtown", - } - ] - - subquery = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("theaters"), - ) - - main_query = ast.Select( - targets=[ - Star(), - ], - from_table=subquery, - ) - - response = self.handler.query(main_query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["_id", "name", "location"]) - self.assertEqual(df["name"].tolist(), ["Cinema City"]) - - def test_query_select_with_complex_subquery_success(self): - """ - Test if the `query` method returns a response object with a data frame containing the query result for a select with complex subquery. - e.g. SELECT * FROM (SELECT CAST(customer_id AS VARCHAR) AS cust_id, CAST(first_name AS VARCHAR) AS fname FROM mongo_db.customers) - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "customers" - ] - self.mock_connect.return_value[self.dummy_connection_data["database"]]["customers"].aggregate.return_value = [ - {"cust_id": "C001", "fname": "John"} - ] - - cust_cast = ast.TypeCast( - arg=ast.Identifier(parts=["customer_id"]), - type_name="VARCHAR", - precision=None, - ) - cust_cast.alias = ast.Identifier(parts=["cust_id"]) - - fname_cast = ast.TypeCast( - arg=ast.Identifier(parts=["first_name"]), - type_name="VARCHAR", - precision=None, - ) - fname_cast.alias = ast.Identifier(parts=["fname"]) - - subquery = ast.Select( - targets=[cust_cast, fname_cast], - from_table=ast.Identifier(parts=["mongo_db", "customers"]), - where=None, - group_by=None, - having=None, - order_by=None, - limit=None, - offset=None, - distinct=False, - modifiers=None, - cte=None, - mode=None, - ) - - main_query = ast.Select( - targets=[ast.Star()], - from_table=subquery, - where=None, - group_by=None, - having=None, - order_by=None, - limit=ast.Constant(50), - offset=None, - distinct=False, - modifiers=None, - cte=None, - mode=None, - ) - - response = self.handler.query(main_query) - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["cust_id", "fname"]) - self.assertEqual(df["cust_id"].tolist(), ["C001"]) - self.assertEqual(df["fname"].tolist(), ["John"]) - - def test_query_select_with_where_operators(self): - """ - Test SELECT with various WHERE operators (>, <, >=, <=, !=) - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].aggregate.return_value = [ - { - "_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"), - "name": "Inception", - "runtime": 148, - } - ] - - query = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("movies"), - where=ast.BinaryOperation(args=[ast.Identifier("runtime"), ast.Constant(150)], op="<"), - ) - - response = self.handler.query(query) - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["_id", "name", "runtime"]) - self.assertEqual(df["name"].tolist(), ["Inception"]) - - def test_query_select_with_and_or_conditions(self): - """ - Test SELECT with AND/OR conditions in WHERE clause - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].aggregate.return_value = [ - { - "_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"), - "name": "The Matrix", - "runtime": 136, - } - ] - - query = ast.Select( - targets=[ - Star(), - ], - from_table=ast.Identifier("movies"), - where=ast.BinaryOperation( - args=[ - ast.BinaryOperation(args=[ast.Identifier("runtime"), ast.Constant(140)], op="<"), - ast.BinaryOperation( - args=[ast.Identifier("name"), ast.Constant("The Matrix")], - op="=", - ), - ], - op="AND", - ), - ) - - response = self.handler.query(query) - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["_id", "name", "runtime"]) - self.assertEqual(df["name"].tolist(), ["The Matrix"]) - - def test_unsupported_select_query_(self): - """ - NotImplementedError for unsupported inner subselect: - SELECT * FROM (SELECT COUNT(*) FROM movies); - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - inner = ast.Select( - targets=[ - ast.Function(op="COUNT", args=[ast.Star()], distinct=False, from_arg=None), - ], - from_table=ast.Identifier(parts=["movies"]), - ) - - outer = ast.Select( - targets=[ast.Star()], - from_table=inner, - ) - - with self.assertRaises(NotImplementedError) as ctx: - self.handler.query(outer) - - self.assertIn("Unsupported inner target", str(ctx.exception)) - - def test_select_with_match_and_projection(self): - """ - Test SELECT with WHERE clause and specific projections - if match: - arg.append({"$match": match}) - if match is not None and proj != {}: - arg.append({"$project": proj}) - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].aggregate.return_value = [ - { - "_id": ObjectId("5f5b3f3b3f3b3f3b3f3b3f3b"), - "name": "Interstellar", - } - ] - - query = ast.Select( - targets=[ - ast.Identifier("name"), - ], - from_table=ast.Identifier("movies"), - where=ast.BinaryOperation(args=[ast.Identifier("runtime"), ast.Constant(170)], op=">"), - ) - - response = self.handler.query(query) - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["_id", "name"]) - self.assertEqual(df["name"].tolist(), ["Interstellar"]) - - def test_select_constant_with_alias(self): - """ - Test SELECT with constant value and alias - e.g., SELECT 1 AS one, 'test' AS text FROM movies; - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].aggregate.return_value = [ - {"one": 1, "text": "test"} - ] - - query = ast.Select( - targets=[ - ast.Constant(1, alias=ast.Identifier("one")), - ast.Constant("test", alias=ast.Identifier("text")), - ], - from_table=ast.Identifier("movies"), - ) - - response = self.handler.query(query) - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["one", "text"]) - self.assertEqual(df["one"].tolist(), [1]) - self.assertEqual(df["text"].tolist(), ["test"]) - - def test_select_with_constant_no_alias(self): - """ - Test SELECT with constant value without alias - e.g., SELECT 42, 'hello' FROM movies; - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].aggregate.return_value = [ - {"42": 42, "hello": "hello"} - ] - - query = ast.Select( - targets=[ - ast.Constant(42), - ast.Constant("hello"), - ], - from_table=ast.Identifier("movies"), - ) - - response = self.handler.query(query) - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["42", "hello"]) - self.assertEqual(df["42"].tolist(), [42]) - self.assertEqual(df["hello"].tolist(), ["hello"]) - - def test_query_select_with_subquery_and_where(self): - """ - Test if the `query` method returns a response object with a data frame - containing the query result for a select with subquery that has WHERE clause. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "movies" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["movies"].aggregate.return_value = [ - { - "name": "The Dark Knight", - "runtime": 152, - }, - { - "name": "Inception", - "runtime": 148, - }, - ] - - subquery = ast.Select( - targets=[ - ast.Identifier(parts=["name"]), - ast.Identifier(parts=["runtime"]), - ], - from_table=ast.Identifier("movies"), - where=ast.BinaryOperation(op=">", args=[ast.Identifier(parts=["runtime"]), ast.Constant(120)]), - ) - - main_query = ast.Select( - targets=[ - Star(), - ], - from_table=subquery, - ) - - response = self.handler.query(main_query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ["name", "runtime"]) - self.assertEqual(df["name"].tolist(), ["The Dark Knight", "Inception"]) - self.assertEqual(df["runtime"].tolist(), [152, 148]) - - def test_query_select_nested_field_projection(self): - """ - Test if the `query` method correctly handles nested field projection using dot notation. - MongoDB stores nested documents (JSON data) that can be accessed with dot notation. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "clients" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["clients"].aggregate.return_value = [ - { - "financials.profit_margin": 0.18, - "financials.account_balance": 150000, - }, - { - "financials.profit_margin": 0.22, - "financials.account_balance": 85000, - }, - ] - - query = ast.Select( - targets=[ - ast.Identifier(parts=["financials", "profit_margin"]), - ast.Identifier(parts=["financials", "account_balance"]), - ], - from_table=ast.Identifier("clients"), - ) - - response = self.handler.query(query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 2) - self.assertEqual( - df.columns.tolist(), - ["financials.profit_margin", "financials.account_balance"], - ) - self.assertEqual(df["financials.profit_margin"].tolist(), [0.18, 0.22]) - self.assertEqual(df["financials.account_balance"].tolist(), [150000, 85000]) - - def test_query_select_nested_field_with_where(self): - """ - Test nested field projection with WHERE clause on nested field. - Tests that nested fields work correctly in both SELECT and WHERE clauses. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "clients" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["clients"].aggregate.return_value = [ - { - "financials.profit_margin": 0.18, - }, - { - "financials.profit_margin": 0.22, - }, - ] - - query = ast.Select( - targets=[ - ast.Identifier(parts=["financials", "profit_margin"]), - ], - from_table=ast.Identifier("clients"), - where=ast.BinaryOperation( - op=">", - args=[ - ast.Identifier(parts=["financials", "profit_margin"]), - ast.Constant(0.15), - ], - ), - ) - - response = self.handler.query(query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 2) - self.assertEqual(df.columns.tolist(), ["financials.profit_margin"]) - self.assertEqual(df["financials.profit_margin"].tolist(), [0.18, 0.22]) - - def test_query_aggregation_on_nested_field(self): - """ - Test aggregation function (AVG) on nested field. - Tests that nested fields work correctly with aggregation functions. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "clients" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["clients"].aggregate.return_value = [ - { - "avg_margin": 0.191, - } - ] - - query = ast.Select( - targets=[ - ast.Function( - op="AVG", - args=[ast.Identifier(parts=["financials", "profit_margin"])], - alias=ast.Identifier(parts=["avg_margin"]), - ) - ], - from_table=ast.Identifier("clients"), - ) - - response = self.handler.query(query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 1) - self.assertEqual(df.columns.tolist(), ["avg_margin"]) - self.assertAlmostEqual(df["avg_margin"].tolist()[0], 0.191, places=3) - - def test_query_group_by_with_nested_aggregation(self): - """ - Test GROUP BY with aggregation on nested field. - Tests that nested fields work correctly with GROUP BY and aggregation. - """ - self.mock_connect.return_value[self.dummy_connection_data["database"]].list_collection_names.return_value = [ - "clients" - ] - - self.mock_connect.return_value[self.dummy_connection_data["database"]]["clients"].aggregate.return_value = [ - { - "industry": "technology", - "avg_margin": 0.18, - }, - { - "industry": "finance", - "avg_margin": 0.22, - }, - { - "industry": "healthcare", - "avg_margin": 0.15, - }, - ] - - query = ast.Select( - targets=[ - ast.Identifier(parts=["industry"]), - ast.Function( - op="AVG", - args=[ast.Identifier(parts=["financials", "profit_margin"])], - alias=ast.Identifier(parts=["avg_margin"]), - ), - ], - from_table=ast.Identifier("clients"), - group_by=[ast.Identifier(parts=["industry"])], - ) - - response = self.handler.query(query) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 3) - self.assertEqual(df.columns.tolist(), ["industry", "avg_margin"]) - self.assertEqual(df["industry"].tolist(), ["technology", "finance", "healthcare"]) - self.assertEqual(df["avg_margin"].tolist(), [0.18, 0.22, 0.15]) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_ms_teams.py b/tests/unit/handlers/community_handlers/test_ms_teams.py deleted file mode 100644 index d0a4a914e22..00000000000 --- a/tests/unit/handlers/community_handlers/test_ms_teams.py +++ /dev/null @@ -1,481 +0,0 @@ -from collections import OrderedDict -import unittest -from unittest.mock import patch, MagicMock -import pytest -import pandas as pd - -from base_handler_test import BaseHandlerTestSetup, BaseAPIResourceTestSetup - -try: - from mindsdb.integrations.handlers.ms_teams_handler.ms_teams_handler import MSTeamsHandler - from mindsdb.integrations.handlers.ms_teams_handler.ms_teams_tables import ( - TeamsTable, - ChannelsTable, - ChannelMessagesTable, - ChatsTable, - ChatMessagesTable, - ) -except ImportError: - pytestmark = pytest.mark.skip("MSTeams handler not installed") - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) -from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - - -class TestMSTeamsHandler(BaseHandlerTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - client_id="12345678-90ab-cdef-1234-567890abcdef", - client_secret="a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", - tenant_id="abcdef12-3456-7890-abcd-ef1234567890", - code="1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef", # Not passed by the user, but by the front-end. - ) - - @property - def dummy_connection_data_without_code(self): - return OrderedDict( - client_id="12345678-90ab-cdef-1234-567890abcdef", - client_secret="a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", - tenant_id="abcdef12-3456-7890-abcd-ef1234567890", - ) - - def create_handler(self): - mock_handler_storage = MagicMock() - mock_handler_storage.file_get.side_effect = FileNotFoundError - mock_handler_storage.file_set.return_value = None - - return MSTeamsHandler("teams", connection_data=self.dummy_connection_data, handler_storage=mock_handler_storage) - - def create_handler_without_code_without_cache(self): - mock_handler_storage = MagicMock() - mock_handler_storage.file_get.side_effect = FileNotFoundError - - return MSTeamsHandler( - "teams", connection_data=self.dummy_connection_data_without_code, handler_storage=mock_handler_storage - ) - - def create_handler_without_code_with_cache(self): - mock_handler_storage = MagicMock() - mock_handler_storage.file_get.side_effect = b"mock_file_content" - - return MSTeamsHandler( - "teams", connection_data=self.dummy_connection_data_without_code, handler_storage=mock_handler_storage - ) - - def create_patcher(self): - return patch("msal.ConfidentialClientApplication") - - def test_connect_without_code_returns_redirect_url(self): - """ - Test if `connect` method successfully returns a redirect URL when the authentication code is not provided. - """ - self.handler = self.create_handler_without_code_without_cache() - - mock_msal = MagicMock() - mock_msal.get_accounts.return_value = [] - mock_auth_url = "https://mock.auth.url" - mock_msal.get_authorization_request_url.return_value = mock_auth_url - - self.mock_connect.return_value = mock_msal - - with self.assertRaises(AuthException, msg=f"Authorisation required. Please follow the url: {mock_auth_url}"): - self.handler.connect() - - self.assertFalse(self.handler.is_connected) - - @patch("msal.SerializableTokenCache") - def test_connect_with_valid_code_returns_access_token(self, mock_token_cache): - """ " - Test if `connect` method successfully returns an access token when the authentication code is provided and is valid. - """ - mock_msal = MagicMock() - mock_msal.get_accounts.return_value = [] - - mock_msal.acquire_token_by_authorization_code.return_value = {"access_token": "mock_access_token"} - - self.mock_connect.return_value = mock_msal - - mock_token_cache.has_state_changed = True - - connection = self.handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - - def test_connect_with_invalid_code_raises_error(self): - """ " - Test if `connect` method raises an AuthException when the authentication code is invalid. - """ - mock_msal = MagicMock() - mock_msal.get_accounts.return_value = [] - - mock_msal.acquire_token_by_authorization_code.return_value = { - "error": "invalid_grant", - "error_description": "AADSTS70000: The provided authorization code is invalid or has expired.", - } - - self.mock_connect.return_value = mock_msal - - with self.assertRaises( - AuthException, - msg="Error getting access token: AADSTS70000: The provided authorization code is invalid or has expired.", - ): - self.handler.connect() - - self.assertFalse(self.handler.is_connected) - - @patch("msal.SerializableTokenCache") - def test_connect_with_cache_returns_access_code(self, mock_token_cache): - """ " - Test if `connect` method successfully returns an access token when valid information is found in the cache. - """ - self.handler = self.create_handler_without_code_with_cache() - - mock_msal = MagicMock() - mock_msal.get_accounts.return_value = [{"mock_key": "mock_value"}] - - mock_msal.acquire_token_silent.return_value = {"access_token": "mock_access_token"} - - self.mock_connect.return_value = mock_msal - - mock_token_cache.has_state_changed = True - - connection = self.handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - - @patch("requests.get") - def test_check_connection_with_successful_connection(self, mock_get): - """ " - Test if `check_connection` method successfully returns a StatusResponse object with success=True and no error message when the connection check is successful. - """ - mock_msal = MagicMock() - mock_msal.get_accounts.return_value = [] - - mock_msal.acquire_token_by_authorization_code.return_value = {"access_token": "mock_access_token"} - - self.mock_connect.return_value = mock_msal - - mock_response = MagicMock(status_code=200) - mock_get.return_value = mock_response - - response = self.handler.check_connection() - - assert isinstance(response, StatusResponse) - self.assertTrue(response.success) - self.assertFalse(response.error_message) - - @patch("requests.get") - def test_check_connection_with_failed_connection(self, mock_get): - """ " - Test if `check_connection` method successfully returns a StatusResponse object with success=False and an error message when the connection check fails. - """ - mock_msal = MagicMock() - mock_msal.get_accounts.return_value = [] - - mock_msal.acquire_token_by_authorization_code.return_value = {"access_token": "mock_access_token"} - - self.mock_connect.return_value = mock_msal - - mock_response = MagicMock(status_code=400) - mock_get.return_value = mock_response - - response = self.handler.check_connection() - - self.assertFalse(response.success) - assert isinstance(response, StatusResponse) - self.assertTrue(response.error_message) - - -class MSTeamsResourceTestSetup(BaseAPIResourceTestSetup): - @property - def dummy_connection_data(self): - return OrderedDict( - client_id="12345678-90ab-cdef-1234-567890abcdef", - client_secret="a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", - tenant_id="abcdef12-3456-7890-abcd-ef1234567890", - code="1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef", # Not passed by the user, but by the front-end. - ) - - def create_handler(self): - mock_handler_storage = MagicMock() - mock_handler_storage.file_get.side_effect = FileNotFoundError - mock_handler_storage.file_set.return_value = None - - return MSTeamsHandler("teams", connection_data=self.dummy_connection_data, handler_storage=mock_handler_storage) - - def create_patcher(self): - return patch("msal.ConfidentialClientApplication") - - def setUp(self): - super().setUp() - mock_msal = MagicMock() - mock_msal.get_accounts.return_value = [] - - mock_msal.acquire_token_by_authorization_code.return_value = {"access_token": "mock_access_token"} - - self.mock_connect.return_value = mock_msal - - def generate_mock_data(self, columns=None): - if columns is None: - columns = self.resource.get_columns() - return {column: f"mock_{column}" for column in columns} - - -class TestMSTeamsTeamsTable(MSTeamsResourceTestSetup, unittest.TestCase): - def create_resource(self): - return TeamsTable(self.handler) - - @patch("requests.get") - def test_list(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for all teams. - """ - mock_response = MagicMock(status_code=200) - - mock_team = self.generate_mock_data() - mock_response.json.return_value = {"value": [mock_team]} - mock_get.return_value = mock_response - - response = self.resource.list() - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_team])) - - -class TestMSTeamsChannelsTable(MSTeamsResourceTestSetup, unittest.TestCase): - def create_resource(self): - return ChannelsTable(self.handler) - - @patch("requests.get") - def test_list_all(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for all channels in all teams. - """ - mock_response = MagicMock(status_code=200) - - mock_channel = self.generate_mock_data() - mock_response.json.return_value = {"value": [mock_channel]} - mock_get.return_value = mock_response - - response = self.resource.list(conditions=[]) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_channel])) - - @patch("requests.get") - def test_list_with_team_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for all channels in a team. - """ - mock_response = MagicMock(status_code=200) - - mock_channel = self.generate_mock_data() - mock_response.json.return_value = {"value": [mock_channel]} - mock_get.return_value = mock_response - - response = self.resource.list( - conditions=[FilterCondition(column="teamId", op=FilterOperator.EQUAL, value="mock_team_id")] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_channel])) - - @patch("requests.get") - def test_list_with_channel_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for a specific channel in a team. - """ - mock_response_1 = MagicMock(status_code=200) - mock_response_2 = MagicMock(status_code=200) - - mock_team = self.generate_mock_data(columns=TeamsTable(self.handler).get_columns()) - mock_channel = self.generate_mock_data() - mock_response_1.json.return_value = {"value": [mock_team]} - mock_response_2.json.return_value = {"value": [mock_channel]} - - mock_get.side_effect = [mock_response_1, mock_response_2] - - response = self.resource.list( - conditions=[FilterCondition(column="id", op=FilterOperator.EQUAL, value="mock_id")] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_channel])) - - @patch("requests.get") - def test_list_with_team_id_and_channel_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for a specific channel in a specific team. - """ - mock_response = MagicMock(status_code=200) - - mock_channel = self.generate_mock_data() - mock_response.json.return_value = mock_channel - mock_get.return_value = mock_response - - response = self.resource.list( - conditions=[ - FilterCondition(column="teamId", op=FilterOperator.EQUAL, value="mock_team_id"), - FilterCondition(column="id", op=FilterOperator.EQUAL, value="mock_id"), - ] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_channel])) - - -class TestMSTeamsChannelMessagesTable(MSTeamsResourceTestSetup, unittest.TestCase): - def create_resource(self): - return ChannelMessagesTable(self.handler) - - def test_list_without_team_id_and_channel_id_raises_error(self): - """ " - Test if `list` method raises a ValueError when teamId and channelId are not provided. - """ - with self.assertRaises( - ValueError, msg="The 'channelIdentity_teamId' and 'channelIdentity_channelId' columns are required." - ): - self.resource.list(conditions=[]) - - @patch("requests.get") - def test_list_with_team_id_and_channel_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for all messages in a specific channel in a specific team. - """ - mock_response = MagicMock(status_code=200) - - mock_message = self.generate_mock_data() - mock_response.json.return_value = {"value": [mock_message]} - mock_get.return_value = mock_response - - response = self.resource.list( - conditions=[ - FilterCondition(column="channelIdentity_teamId", op=FilterOperator.EQUAL, value="mock_team_id"), - FilterCondition(column="channelIdentity_channelId", op=FilterOperator.EQUAL, value="mock_channel_id"), - ] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_message])) - - @patch("requests.get") - def test_list_with_team_id_channel_id_and_message_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for a specific message in a specific channel in a specific team. - """ - mock_response = MagicMock(status_code=200) - - mock_message = self.generate_mock_data() - mock_response.json.return_value = mock_message - mock_get.return_value = mock_response - - response = self.resource.list( - conditions=[ - FilterCondition(column="channelIdentity_teamId", op=FilterOperator.EQUAL, value="mock_team_id"), - FilterCondition(column="channelIdentity_channelId", op=FilterOperator.EQUAL, value="mock_channel_id"), - FilterCondition(column="id", op=FilterOperator.EQUAL, value="mock_id"), - ] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_message])) - - -class TestMSTeamsChatsTable(MSTeamsResourceTestSetup, unittest.TestCase): - def create_resource(self): - return ChatsTable(self.handler) - - @patch("requests.get") - def test_list_all(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for all chats. - """ - mock_response = MagicMock(status_code=200) - - mock_channel = self.generate_mock_data() - mock_response.json.return_value = {"value": [mock_channel]} - mock_get.return_value = mock_response - - response = self.resource.list(conditions=[]) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_channel])) - - @patch("requests.get") - def test_list_with_chat_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for a specific chat. - """ - mock_response = MagicMock(status_code=200) - - mock_chat = self.generate_mock_data() - mock_response.json.return_value = mock_chat - mock_get.return_value = mock_response - - response = self.resource.list( - conditions=[FilterCondition(column="id", op=FilterOperator.EQUAL, value="mock_id")] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_chat])) - - -class TestMSTeamsChatMessagesTable(MSTeamsResourceTestSetup, unittest.TestCase): - def create_resource(self): - return ChatMessagesTable(self.handler) - - def test_list_without_chat_id_raises_error(self): - """ " - Test if `list` method raises a ValueError when chatId is not provided. - """ - with self.assertRaises(ValueError, msg="The 'chatIdentity_chatId' column is required."): - self.resource.list(conditions=[]) - - @patch("requests.get") - def test_list_with_chat_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for all messages in a specific chat. - """ - mock_response = MagicMock(status_code=200) - - mock_message = self.generate_mock_data() - mock_response.json.return_value = {"value": [mock_message]} - mock_get.return_value = mock_response - - response = self.resource.list( - conditions=[FilterCondition(column="chatId", op=FilterOperator.EQUAL, value="mock_chat_id")] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_message])) - - @patch("requests.get") - def test_list_with_chat_id_and_message_id(self, mock_get): - """ " - Test if `list` method successfully returns a pandas DataFrame with data for a specific message in a specific chat. - """ - mock_response = MagicMock(status_code=200) - - mock_message = self.generate_mock_data() - mock_response.json.return_value = mock_message - mock_get.return_value = mock_response - - response = self.resource.list( - conditions=[ - FilterCondition(column="chatId", op=FilterOperator.EQUAL, value="mock_chat_id"), - FilterCondition(column="id", op=FilterOperator.EQUAL, value="mock_id"), - ] - ) - - assert isinstance(response, pd.DataFrame) - pd.testing.assert_frame_equal(response, pd.DataFrame([mock_message])) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_openbb_tables.py b/tests/unit/handlers/community_handlers/test_openbb_tables.py deleted file mode 100644 index 50f0d270a5a..00000000000 --- a/tests/unit/handlers/community_handlers/test_openbb_tables.py +++ /dev/null @@ -1,101 +0,0 @@ -from types import SimpleNamespace -from unittest.mock import patch - -import pandas as pd -import pytest - -try: - from mindsdb.integrations.handlers.openbb_handler.openbb_tables import OpenBBtable -except ImportError: - OpenBBtable = None - -pytestmark = pytest.mark.skipif(OpenBBtable is None, reason="openbb_handler not installed") - - -class _DummyOpenBBResponse: - def __init__(self, payload): - self.payload = payload - - def to_df(self): - return pd.DataFrame([self.payload]) - - -class _DummyPrice: - def historical(self, **kwargs): - return _DummyOpenBBResponse(kwargs) - - -class _DummyEquity: - def __init__(self): - self.price = _DummyPrice() - - -class _DummyCoverage: - def __init__(self): - self.commands = {".equity.price.historical": {}} - - -class _DummyObb: - def __init__(self): - self.equity = _DummyEquity() - self.coverage = _DummyCoverage() - - -class _DummyHandler: - def __init__(self): - self.obb = _DummyObb() - - -def test_openbb_command_resolution_returns_callable(): - table = OpenBBtable(_DummyHandler()) - - function = table._resolve_openbb_command("obb.equity.price.historical") - result = function(symbol="AAPL").to_df() - - assert result.iloc[0]["symbol"] == "AAPL" - - -def test_openbb_select_treats_params_as_data(): - table = OpenBBtable(_DummyHandler()) - malicious_value = "__import__('os').system('echo hacked')" - query = SimpleNamespace(where=object()) - - with patch( - "mindsdb.integrations.handlers.openbb_handler.openbb_tables.extract_comparison_conditions", - return_value=[["=", "cmd", "obb.equity.price.historical"], ["=", "symbol", malicious_value]], - ): - result = table.select(query) - - assert result.iloc[0]["symbol"] == malicious_value - - -def test_openbb_command_resolution_rejects_private_segments(): - table = OpenBBtable(_DummyHandler()) - - with pytest.raises(ValueError, match="Invalid OpenBB command segment"): - table._resolve_openbb_command("obb.__class__") - - -def test_openbb_select_coerces_literal_string_params(): - table = OpenBBtable(_DummyHandler()) - query = SimpleNamespace(where=object()) - - with patch( - "mindsdb.integrations.handlers.openbb_handler.openbb_tables.extract_comparison_conditions", - return_value=[ - ["=", "cmd", "obb.equity.price.historical"], - ["=", "limit", "123"], - ["=", "adjusted", "true"], - ["=", "symbol", "'AAPL'"], - ["=", "ids", "[1, 2]"], - ["=", "raw_symbol", "AAPL"], - ], - ): - result = table.select(query) - - row = result.iloc[0] - assert row["limit"] == 123 - assert bool(row["adjusted"]) is True - assert row["symbol"] == "AAPL" - assert row["ids"] == [1, 2] - assert row["raw_symbol"] == "AAPL" diff --git a/tests/unit/handlers/community_handlers/test_s3.py b/tests/unit/handlers/community_handlers/test_s3.py deleted file mode 100644 index 443da7fc776..00000000000 --- a/tests/unit/handlers/community_handlers/test_s3.py +++ /dev/null @@ -1,218 +0,0 @@ -from collections import OrderedDict -import unittest -from unittest.mock import patch, MagicMock - -import pytest - -from botocore.client import ClientError -from mindsdb_sql_parser import ast -from mindsdb_sql_parser.ast import Select, Identifier, Star, Constant - -import pandas as pd - -from base_handler_test import BaseHandlerTestSetup -from mindsdb.integrations.libs.response import ( - OkResponse, - TableResponse, - DataHandlerResponse as Response, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) - -try: - from mindsdb.integrations.handlers.s3_handler.s3_handler import S3Handler -except ImportError: - pytestmark = pytest.mark.skip("s3_handler not installed (community handler)") - - -class TestS3Handler(BaseHandlerTestSetup, unittest.TestCase): - @property - def object_name(self): - return "`my-bucket/my-file.csv`" - - @property - def dummy_connection_data(self): - return OrderedDict( - aws_access_key_id="AQAXEQK89OX07YS34OP", - aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", - bucket="mindsdb-bucket", - region_name="us-east-2", - ) - - def create_handler(self): - return S3Handler("s3", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("boto3.client") - - def test_connect(self): - """ - Test if `connect` method successfully establishes a connection and sets `is_connected` flag to True. - Also, verifies that duckdb.connect is called exactly once. - The `connect` method for this handler does not check the validity of the connection; it succeeds even with incorrect credentials. - The `check_connection` method handles the connection status. - """ - self.mock_connect.return_value = MagicMock() - connection = self.handler.connect() - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once() - - @patch("boto3.client") - def test_check_connection_success(self, mock_boto3_client): - """ - Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection. - """ - # Mock the boto3 client object and its methods. - mock_boto3_client_instance = MagicMock() - mock_boto3_client.return_value = mock_boto3_client_instance - - response = self.handler.check_connection() - - self.assertTrue(response.success) - assert isinstance(response, StatusResponse) - self.assertFalse(response.error_message) - - @patch("boto3.client") - def test_check_connection_failure_invalid_bucket_or_no_access(self, mock_boto3_client): - """ - Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on failed connection due to invalid bucket or lack of access permissions. - """ - # Mock the boto3 client object and its methods. - mock_boto3_client_instance = MagicMock() - mock_boto3_client.return_value = mock_boto3_client_instance - mock_boto3_client_instance.head_bucket.side_effect = ClientError( - error_response={ - "Error": { - "Code": "404", - "Message": "Not Found", - } - }, - operation_name="HeadBucket", - ) - - response = self.handler.check_connection() - - self.assertFalse(response.success) - assert isinstance(response, StatusResponse) - self.assertTrue(response.error_message) - - @patch("boto3.client") - def test_query_select(self, mock_boto3_client): - """ - Tests the `query` method to ensure it executes a SELECT SQL query using a mock cursor and returns a Response object. - SELECT works somewhat differently than the other queries, so it is tested separately. - `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. - """ - # Mock the boto3 client object and its methods. - mock_boto3_client_instance = MagicMock() - mock_boto3_client.return_value = mock_boto3_client_instance - - duckdb_connect = MagicMock() - self.handler._connect_duckdb = duckdb_connect - duckdb_execute = duckdb_connect().__enter__().execute - duckdb_execute().fetchdf.return_value = pd.DataFrame([], columns=["col_2"]) - - # Craft the SELECT query and execute it. - object_name = "my-bucket/my-file.csv" - select = ast.Select(targets=[Star()], from_table=Identifier(parts=[object_name])) - - duckdb_execute.reset_mock() - response = self.handler.query(select) - - duckdb_execute.assert_called_once_with( - f"SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{object_name.replace('`', '')}'" - ) - - assert isinstance(response, TableResponse) - - @patch("boto3.client") - def test_query_insert(self, mock_boto3_client): - """ - Tests the `query` method to ensure it executes a INSERT SQL query using a mock cursor and returns a Response object. - INSERT works similarly to UPDATE and DELETE. - `native_query` cannot be tested directly because it depends on some pre-processing steps handled by the `query` method. - """ - # Mock the boto3 client object and its methods. - mock_boto3_client_instance = MagicMock() - mock_boto3_client.return_value = mock_boto3_client_instance - mock_boto3_client_instance.head_object.return_value = MagicMock() - - duckdb_connect = MagicMock() - self.handler._connect_duckdb = duckdb_connect - duckdb_execute = duckdb_connect().__enter__().execute - duckdb_execute().fetchdf.return_value = None - - # Craft the INSERT query and execute it. - columns = ["col_1", "col_2"] - values = [("val_1", "val_2")] - insert = ast.Insert(table=Identifier(parts=[self.object_name]), columns=columns, values=values) - duckdb_execute.reset_mock() - response = self.handler.query(insert) - - sqls = [i[0][0] for i in duckdb_execute.call_args_list] - assert ( - sqls[0] - == f"CREATE TABLE tmp_table AS SELECT * FROM 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'" - ) - - assert sqls[1] == "INSERT INTO tmp_table BY NAME SELECT * FROM df" - - assert sqls[2] == f"COPY tmp_table TO 's3://{self.dummy_connection_data['bucket']}/{self.object_name}'" - - assert isinstance(response, OkResponse) - - @patch("boto3.client") - def test_get_tables(self, mock_boto3_client): - """ - Test that the `get_tables` method correctly calls the `list_objects_v2` method and returns a Response object with the supported objects (files). - """ - # Mock the boto3 client object and its methods. - mock_boto3_client_instance = MagicMock() - mock_boto3_client.return_value = mock_boto3_client_instance - mock_boto3_client_instance.list_objects_v2.return_value = { - "Contents": [ - {"Key": "file1.csv"}, - {"Key": "file2.tsv"}, - {"Key": "file3.json"}, - {"Key": "file4.parquet"}, - {"Key": "file5.xlsx"}, - ] - } - - response = self.handler.get_tables() - - assert isinstance(response, Response) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), 5) # +1 table is 'files' - self.assertNotIn("file5.xlsx", df["table_name"].values) - - @patch("mindsdb.integrations.handlers.s3_handler.s3_handler.S3Handler.query") - def test_get_columns(self, mock_query): - """ - Test that the `get_columns` method correctly constructs the SQL query and calls `native_query` with the correct query. - """ - mock_query.return_value = TableResponse( - data=pd.DataFrame( - data={ - "col_1": ["row_1", "row_2", "row_3"], - "col_2": [1, 2, 3], - }, - ), - ) - - table_name = "mock_table" - response = self.handler.get_columns(table_name) - - expected_query = Select(targets=[Star()], from_table=Identifier(parts=[table_name]), limit=Constant(1)) - self.handler.query.assert_called_once_with(expected_query) - - df = response.data_frame - self.assertEqual(df.columns.tolist(), ["column_name", "data_type"]) - self.assertEqual(df["data_type"].values.tolist(), ["string", "int64"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/community_handlers/test_slack.py b/tests/unit/handlers/community_handlers/test_slack.py deleted file mode 100644 index 62a9ada8bc8..00000000000 --- a/tests/unit/handlers/community_handlers/test_slack.py +++ /dev/null @@ -1,1337 +0,0 @@ -from collections import OrderedDict -from copy import deepcopy -import datetime as dt -import os -import threading -from types import SimpleNamespace -import pytest -import unittest -from unittest.mock import MagicMock, patch - -from mindsdb_sql_parser.ast import BinaryOperation, Constant, Delete, Identifier, Insert, Update -import pandas as pd - -from base_handler_test import BaseAPIChatHandlerTest, BaseAPIResourceTestSetup -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse, TableResponse -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - -try: - from slack_sdk.errors import SlackApiError - from slack_sdk.web.slack_response import SlackResponse - from mindsdb.integrations.handlers.slack_handler.slack_handler import SlackHandler - from mindsdb.integrations.handlers.slack_handler.slack_tables import ( - SlackConversationsTable, - SlackMessagesTable, - SlackThreadsTable, - SlackUsersTable, - ) - from mindsdb.integrations.handlers.slack_handler.connection_args import connection_args, connection_args_example - - # Mock response for the first call to the `conversations.info` method. - MOCK_RESPONSE_CONV_INFO_1 = { - "ok": True, - "channel": { - "id": "C012AB3CD", - "name": "general", - "is_channel": True, - "is_group": False, - "is_im": False, - "is_mpim": False, - "is_private": False, - "created": 1449252889, - "updated": 1678229664302, - }, - } - - # Mock SlackResponse object for the first call to the `conversations.info` method. - MOCK_SLACK_RESPONSE_CONV_INFO_1 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.info", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_INFO_1), - ) - - # Mock response for the second call to the `conversations.info` method. - MOCK_RESPONSE_CONV_INFO_2 = { - "ok": True, - "channel": { - "id": "C012AB3CE", - "name": "random", - "is_channel": True, - "is_group": False, - "is_im": False, - "is_mpim": False, - "is_private": False, - "created": 1449252889, - "updated": 1678229664302, - }, - } - - # Mock SlackResponse object for the second call to the `conversations.info` method. - MOCK_SLACK_RESPONSE_CONV_INFO_2 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.info", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_INFO_2), - ) - - # Mock response for the first call to the `conversations.list` method. - MOCK_RESPONSE_CONV_LIST_1 = { - "ok": True, - "channels": [MOCK_RESPONSE_CONV_INFO_1["channel"]], - "response_metadata": {"next_cursor": "dGVhbTpDMDYxRkE1UEI="}, - } - - # Mock SlackResponse object for the first call to the `conversations.list` method. - MOCK_SLACK_RESPONSE_CONV_LIST_1 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.list", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_LIST_1), - ) - - # Mock response for the second call to the `conversations.list` method. - MOCK_RESPONSE_CONV_LIST_2 = { - "ok": True, - "channels": [MOCK_RESPONSE_CONV_INFO_2["channel"]], - "response_metadata": {"next_cursor": ""}, - } - - # Mock SlackResponse object for the second call to the `conversations.list` method. - MOCK_SLACK_RESPONSE_CONV_LIST_2 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.list", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_LIST_2), - ) - - # Mock response for the first call to the `conversations.history` method. - MOCK_RESPONSE_CONV_HISTORY_1 = { - "ok": True, - "messages": [ - { - "type": "message", - "user": "U123ABC456", - "text": "I find you punny and would like to smell your nose letter", - "ts": "1512085950.000216", - }, - { - "type": "message", - "user": "U222BBB222", - "text": "What, you want to smell my shoes better?", - "ts": "1512104434.000490", - }, - ], - "has_more": True, - "pin_count": 0, - "response_metadata": {"next_cursor": "bmV4dF90czoxNTEyMDg1ODYxMDAwNTQz"}, - } - - # Mock SlackResponse object for the first call to the `conversations.history` method. - MOCK_SLACK_RESPONSE_CONV_HISTORY_1 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.history", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_HISTORY_1), - ) - - # Mock response for the second call to the `conversations.history` method. - MOCK_RESPONSE_CONV_HISTORY_2 = { - "ok": True, - "messages": [ - { - "type": "message", - "user": "U222BBB222", - "text": "Isn't this whether dreadful?", - "ts": "1512104434.000490", - }, - ], - "has_more": False, - "pin_count": 0, - "response_metadata": {"next_cursor": ""}, - } - - # Mock SlackResponse object for the second call to the `conversations.history` method. - MOCK_SLACK_RESPONSE_CONV_HISTORY_2 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.history", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_HISTORY_2), - ) - - # Mock response for the first call to the `conversations.replies` method. - MOCK_RESPONSE_CONV_REPLIES_1 = { - "messages": [ - { - "type": "message", - "user": "U061F7AUR", - "text": "island", - "thread_ts": "1482960137.003543", - "reply_count": 3, - "subscribed": True, - "last_read": "1484678597.521003", - "unread_count": 0, - "ts": "1482960137.003543", - }, - { - "type": "message", - "user": "U061F7AUR", - "text": "one island", - "thread_ts": "1482960137.003543", - "parent_user_id": "U061F7AUR", - "ts": "1483037603.017503", - }, - ], - "has_more": True, - "ok": True, - "response_metadata": {"next_cursor": "bmV4dF90czoxNDg0Njc4MjkwNTE3MDkx"}, - } - - # Mock SlackResponse object for the first call to the `conversations.replies` method. - MOCK_SLACK_RESPONSE_CONV_REPLIES_1 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.replies", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_REPLIES_1), - ) - - # Mock response for the second call to the `conversations.replies` method. - MOCK_RESPONSE_CONV_REPLIES_2 = { - "messages": [ - { - "type": "message", - "user": "U061F7AUR", - "text": "two island", - "thread_ts": "1482960137.003543", - "parent_user_id": "U061F7AUR", - "ts": "1483051909.018632", - }, - { - "type": "message", - "user": "U061F7AUR", - "text": "three for the land", - "thread_ts": "1482960137.003543", - "parent_user_id": "U061F7AUR", - "ts": "1483125339.020269", - }, - ], - "has_more": False, - "ok": True, - "response_metadata": {"next_cursor": ""}, - } - - # Mock SlackResponse object for the second call to the `conversations.replies` method. - MOCK_SLACK_RESPONSE_CONV_REPLIES_2 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/conversations.replies", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_CONV_REPLIES_2), - ) - - # Mock response for the first call to the `users.info` method. - MOCK_RESPONSE_USERS_LIST_1 = { - "ok": True, - "members": [ - { - "id": "W012A3CDE", - "team_id": "T012AB3C4", - "name": "spengler", - "deleted": False, - "color": "9f69e7", - "real_name": "spengler", - "tz": "America/Los_Angeles", - "tz_label": "Pacific Daylight Time", - "tz_offset": -25200, - "profile": { - "avatar_hash": "ge3b51ca72de", - "status_text": "Print is dead", - "status_emoji": ":books:", - "real_name": "Egon Spengler", - "display_name": "spengler", - "real_name_normalized": "Egon Spengler", - "display_name_normalized": "spengler", - "email": "spengler@ghostbusters.example.com", - "image_24": "https://.../avatar/e3b51ca72dee4ef87916ae2b9240df50.jpg", - "image_32": "https://.../avatar/e3b51ca72dee4ef87916ae2b9240df50.jpg", - "image_48": "https://.../avatar/e3b51ca72dee4ef87916ae2b9240df50.jpg", - "image_72": "https://.../avatar/e3b51ca72dee4ef87916ae2b9240df50.jpg", - "image_192": "https://.../avatar/e3b51ca72dee4ef87916ae2b9240df50.jpg", - "image_512": "https://.../avatar/e3b51ca72dee4ef87916ae2b9240df50.jpg", - "team": "T012AB3C4", - }, - "is_admin": True, - "is_owner": False, - "is_primary_owner": False, - "is_restricted": False, - "is_ultra_restricted": False, - "is_bot": False, - "updated": 1502138686, - "is_app_user": False, - "has_2fa": False, - } - ], - "cache_ts": 1498777272, - "response_metadata": {"next_cursor": "dXNlcjpVMEc5V0ZYTlo="}, - } - - # Mock SlackResponse object for the first call to the `users.info` method. - MOCK_SLACK_RESPONSE_USERS_LIST_1 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/users.list", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_USERS_LIST_1), - ) - - # Mock response for the second call to the `users.info` method. - MOCK_RESPONSE_USERS_LIST_2 = { - "ok": True, - "members": [ - { - "id": "W07QCRPA4", - "team_id": "T0G9PQBBK", - "name": "glinda", - "deleted": False, - "color": "9f69e7", - "real_name": "Glinda Southgood", - "tz": "America/Los_Angeles", - "tz_label": "Pacific Daylight Time", - "tz_offset": -25200, - "profile": { - "avatar_hash": "8fbdd10b41c6", - "image_24": "https://a.slack-edge.com...png", - "image_32": "https://a.slack-edge.com...png", - "image_48": "https://a.slack-edge.com...png", - "image_72": "https://a.slack-edge.com...png", - "image_192": "https://a.slack-edge.com...png", - "image_512": "https://a.slack-edge.com...png", - "image_1024": "https://a.slack-edge.com...png", - "image_original": "https://a.slack-edge.com...png", - "first_name": "Glinda", - "last_name": "Southgood", - "title": "Glinda the Good", - "phone": "", - "skype": "", - "real_name": "Glinda Southgood", - "real_name_normalized": "Glinda Southgood", - "display_name": "Glinda the Fairly Good", - "display_name_normalized": "Glinda the Fairly Good", - "email": "glenda@south.oz.coven", - }, - "is_admin": True, - "is_owner": False, - "is_primary_owner": False, - "is_restricted": False, - "is_ultra_restricted": False, - "is_bot": False, - "updated": 1480527098, - "has_2fa": False, - } - ], - "cache_ts": 1498777272, - "response_metadata": {"next_cursor": ""}, - } - - # Mock SlackResponse object for the second call to the `users.info` method. - MOCK_SLACK_RESPONSE_USERS_LIST_2 = SlackResponse( - client=MagicMock(), - http_verb="GET", - api_url="https://slack.com/api/users.list", - req_args=MagicMock(), - headers=MagicMock(), - status_code=200, - data=deepcopy(MOCK_RESPONSE_USERS_LIST_2), - ) - -except ImportError: - pytestmark = pytest.mark.skip("Slack handler not installed") - - -class TestSlackHandler(BaseAPIChatHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict(token="xoxb-111-222-xyz", app_token="xapp-A111-222-xyz") - - @property - def err_to_raise_on_connect_failure(self): - return SlackApiError("Connection Failed", response=MagicMock()) - - @property - def registered_tables(self): - return ["conversations", "messages", "threads", "users"] - - def create_handler(self): - return SlackHandler("slack", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mindsdb.integrations.handlers.slack_handler.slack_handler.WebClient") - - @patch("mindsdb.integrations.handlers.slack_handler.slack_handler.SocketModeClient") - def test_check_connection_success(self, mock_socket_mode_client): - """ - Tests if the `check_connection` method handles a successful connection check and returns a StatusResponse object that accurately reflects the connection status. - """ - self.mock_connect.return_value = MagicMock() - response = self.handler.check_connection() - - assert isinstance(response, StatusResponse) - self.assertTrue(response.success) - self.assertFalse(response.error_message) - - def test_get_my_user_name(self): - """ - Tests the `get_my_user_name` method to ensure it correctly returns a username. - """ - self.mock_connect.return_value.auth_test.return_value.data = { - "ok": True, - "url": "https://subarachnoid.slack.com/", - "team": "Subarachnoid Workspace", - "user": "bot", - "team_id": "T0G9PQBBK", - "user_id": "W23456789", - "bot_id": "BZYBOTHED", - } - - response = self.handler.get_my_user_name() - assert response == "BZYBOTHED" - - def test_native_query(self): - """ - Tests the `native_query` method to ensure it executes a Slack API method and returns a Response object. - """ - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - query = "conversations_info(channel='C1234567890')" - response = self.handler.native_query(query) - - self.mock_connect.return_value.conversations_info.assert_called_once_with(channel="C1234567890") - assert isinstance(response, TableResponse) - expected_df = pd.DataFrame([MOCK_RESPONSE_CONV_INFO_1["channel"]]) - pd.testing.assert_frame_equal(response.data_frame, expected_df) - - def test_native_query_with_pagination(self): - """ - Tests the `native_query` method to ensure it executes a Slack API method with pagination and returns a Response object. - """ - self.mock_connect.return_value.conversations_list.side_effect = [ - deepcopy(MOCK_SLACK_RESPONSE_CONV_LIST_1), - deepcopy(MOCK_SLACK_RESPONSE_CONV_LIST_2), - ] - - query = "conversations_list()" - response = self.handler.native_query(query) - - self.assertEqual(self.mock_connect.return_value.conversations_list.call_count, 2) - self.mock_connect.return_value.conversations_list.assert_any_call() - self.mock_connect.return_value.conversations_list.assert_any_call(cursor="dGVhbTpDMDYxRkE1UEI=") - - assert isinstance(response, TableResponse) - expected_df = pd.DataFrame(MOCK_RESPONSE_CONV_LIST_1["channels"] + MOCK_RESPONSE_CONV_LIST_2["channels"]) - pd.testing.assert_frame_equal(response.data_frame, expected_df) - - @patch.dict(os.environ, {}, clear=True) - @patch("mindsdb.integrations.handlers.slack_handler.slack_handler.Config") - def test_connect_requires_token(self, mock_config): - mock_config.return_value.get.return_value = {} - handler = SlackHandler("slack", connection_data={}) - with self.assertRaises(ValueError): - handler.connect() - - def test_connect_reuses_existing_connection(self): - cached = MagicMock() - self.handler.web_connection = cached - self.handler.is_connected = True - connection = self.handler.connect() - self.assertIs(connection, cached) - self.mock_connect.assert_not_called() - - def test_check_connection_failure_resets_flag(self): - self.handler.is_connected = True - error = SlackApiError(message="boom", response=MagicMock(data={"error": "boom"})) - self.handler.connect = MagicMock(side_effect=error) - - response = self.handler.check_connection() - self.assertFalse(response.success) - self.assertFalse(self.handler.is_connected) - self.assertIn("boom", response.error_message) - - def test_call_slack_api_raises_slack_error(self): - slack_error = SlackApiError(message="bad", response={"error": "bad"}) - method = MagicMock(side_effect=slack_error) - self.handler.connect = MagicMock(return_value=MagicMock(conversations_list=method)) - - with self.assertRaises(SlackApiError): - self.handler._call_slack_api("conversations_list", {}) - - def test_extract_data_single_list(self): - response_data = {"ok": True, "channels": [{"id": "C1"}]} - rows = self.handler._extract_data_from_response(response_data) - self.assertEqual(rows, [{"id": "C1"}]) - - def test_extract_data_single_object(self): - response_data = {"ok": True, "channel": {"id": "C1"}} - rows = self.handler._extract_data_from_response(response_data) - self.assertEqual(rows, [{"id": "C1"}]) - - def test_extract_data_invalid_response(self): - with self.assertRaises(ValueError): - self.handler._extract_data_from_response({"ok": True, "foo": {"id": 1}, "bar": []}) - - def test_subscribe_unsupported_table(self): - stop_event = threading.Event() - with self.assertRaises(RuntimeError): - self.handler.subscribe(stop_event, lambda *_: None, table_name="users") - - def test_subscribe_rejects_columns(self): - stop_event = threading.Event() - with self.assertRaises(RuntimeError): - self.handler.subscribe(stop_event, lambda *_: None, columns=["text"]) - - @patch("mindsdb.integrations.handlers.slack_handler.slack_handler.SocketModeResponse") - @patch("mindsdb.integrations.handlers.slack_handler.slack_handler.SocketModeClient") - def test_subscribe_processes_message(self, mock_socket_cls, mock_response_cls): - class FakeClient: - def __init__(self): - self.socket_mode_request_listeners = [] - - def send_socket_mode_response(self, response): - self.sent = response - - def connect(self): - request = SimpleNamespace( - envelope_id="123", - type="events_api", - retry_attempt=None, - payload={ - "event": { - "type": "message", - "channel": "C1", - "user": "U1", - "text": "Hello", - "ts": "1717000000.0", - } - }, - ) - for listener in list(self.socket_mode_request_listeners): - listener(self, request) - - def close(self): - self.closed = True - - fake_client = FakeClient() - mock_socket_cls.return_value = fake_client - mock_response_cls.side_effect = lambda envelope_id: SimpleNamespace(envelope_id=envelope_id) - - rows = [] - - stop_event = threading.Event() - stop_event.set() - self.handler.subscribe(stop_event, lambda row, key: rows.append((row, key))) - - self.assertEqual(len(rows), 1) - row, key = rows[0] - self.assertEqual(key, {"channel_id": "C1"}) - self.assertEqual(row["user"], "U1") - self.assertEqual(row["text"], "Hello") - - -class SlackAPIResourceTestSetup(BaseAPIResourceTestSetup): - @property - def dummy_connection_data(self): - return OrderedDict(token="xoxb-111-222-xyz", app_token="xapp-A111-222-xyz") - - def create_handler(self): - return SlackHandler("slack", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mindsdb.integrations.handlers.slack_handler.slack_handler.WebClient") - - -class TestSlackConversationsTable(SlackAPIResourceTestSetup, unittest.TestCase): - def create_resource(self): - return SlackConversationsTable(self.handler) - - def _get_expected_df_for_conv_info_1(self): - """ - Returns the expected DataFrame for a single call to the `conversations_info` method. - """ - mock_response_conv_info = deepcopy(MOCK_RESPONSE_CONV_INFO_1) - mock_response_conv_info["channel"]["created_at"] = dt.datetime.fromtimestamp( - mock_response_conv_info["channel"]["created"] - ) - mock_response_conv_info["channel"]["updated_at"] = dt.datetime.fromtimestamp( - mock_response_conv_info["channel"]["updated"] / 1000 - ) - - return pd.DataFrame([mock_response_conv_info["channel"]], columns=self.resource.get_columns()) - - def _get_expected_df_for_conv_info_2(self): - """ - Returns the expected DataFrame for multiple(2) calls to the `conversations_info` method. - """ - mock_response_conv_info = deepcopy(MOCK_RESPONSE_CONV_INFO_2) - mock_response_conv_info["channel"]["created_at"] = dt.datetime.fromtimestamp( - mock_response_conv_info["channel"]["created"] - ) - mock_response_conv_info["channel"]["updated_at"] = dt.datetime.fromtimestamp( - mock_response_conv_info["channel"]["updated"] / 1000 - ) - - expected_df_conv_info_1 = self._get_expected_df_for_conv_info_1() - expected_df_conv_info_2 = pd.DataFrame( - [mock_response_conv_info["channel"]], columns=self.resource.get_columns() - ) - - return pd.concat([expected_df_conv_info_1, expected_df_conv_info_2], ignore_index=True) - - def _get_expected_df_for_conv_list_1(self): - """ - Returns the expected DataFrame for a single call to the `conversations_list` method. - """ - mock_response_conv_list = deepcopy(MOCK_RESPONSE_CONV_LIST_1) - mock_response_conv_list["channels"][0]["created_at"] = dt.datetime.fromtimestamp( - mock_response_conv_list["channels"][0]["created"] - ) - mock_response_conv_list["channels"][0]["updated_at"] = dt.datetime.fromtimestamp( - mock_response_conv_list["channels"][0]["updated"] / 1000 - ) - - return pd.DataFrame([mock_response_conv_list["channels"][0]], columns=self.resource.get_columns()) - - def _get_expected_df_for_conv_list_2(self): - """ - Returns the expected DataFrame for multiple(2) calls to the `conversations_list` method. - """ - mock_response_conv_list = deepcopy(MOCK_RESPONSE_CONV_LIST_2) - mock_response_conv_list["channels"][0]["created_at"] = dt.datetime.fromtimestamp( - mock_response_conv_list["channels"][0]["created"] - ) - mock_response_conv_list["channels"][0]["updated_at"] = dt.datetime.fromtimestamp( - mock_response_conv_list["channels"][0]["updated"] / 1000 - ) - - expected_df_conv_list_1 = self._get_expected_df_for_conv_list_1() - expected_df_conv_list_2 = pd.DataFrame( - [mock_response_conv_list["channels"][0]], columns=self.resource.get_columns() - ) - - return pd.concat([expected_df_conv_list_1, expected_df_conv_list_2], ignore_index=True) - - def test_list_with_channel_id(self): - """ - Tests the `list` method of the SlackConversationsTable class to ensure it correctly fetches the details of a specific conversation. - """ - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - response = self.resource.list( - conditions=[ - FilterCondition(column="id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"]) - ] - ) - - self.assertEqual(self.mock_connect.return_value.conversations_info.call_count, 1) - self.mock_connect.return_value.conversations_info.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_info_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_multiple_channel_ids(self): - """ - Tests the `list` method of the SlackConversationsTable class to ensure it correctly fetches the details of multiple conversations. - """ - self.mock_connect.return_value.conversations_info.side_effect = [ - deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1), - deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_2), - ] - - response = self.resource.list( - conditions=[ - FilterCondition( - column="id", - op=FilterOperator.IN, - value=[MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], MOCK_RESPONSE_CONV_INFO_2["channel"]["id"]], - ) - ] - ) - - self.assertEqual(self.mock_connect.return_value.conversations_info.call_count, 2) - self.mock_connect.return_value.conversations_info.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ) - self.mock_connect.return_value.conversations_info.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_2["channel"]["id"] - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_info_2() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_no_conditions_and_no_limit(self): - """ - Tests the `list` method of the SlackConversationsTable class to ensure it correctly fetches a list of conversations without any conditions or limits. - """ - self.mock_connect.return_value.conversations_list.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_LIST_1) - - response = self.resource.list(conditions=[]) - - self.assertEqual(self.mock_connect.return_value.conversations_list.call_count, 1) - self.mock_connect.return_value.conversations_list.assert_any_call(limit=1000) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_list_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_no_conditions_and_limit_less_than_1000(self): - """ - Tests the `list` method of the SlackConversationsTable class to ensure it correctly fetches a list of conversations without any conditions and with a limit less than 1000. - """ - self.mock_connect.return_value.conversations_list.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_LIST_1) - - response = self.resource.list(conditions=[], limit=999) - - self.assertEqual(self.mock_connect.return_value.conversations_list.call_count, 1) - self.mock_connect.return_value.conversations_list.assert_any_call(limit=999) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_list_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_no_conditions_and_limit_more_than_1000(self): - """ - Tests the `list` method of the SlackConversationsTable class to ensure it correctly fetches a list of conversations without any conditions and with a limit more than 1000. - """ - self.mock_connect.return_value.conversations_list.side_effect = [ - deepcopy(MOCK_SLACK_RESPONSE_CONV_LIST_1), - deepcopy(MOCK_SLACK_RESPONSE_CONV_LIST_2), - ] - - response = self.resource.list(conditions=[], limit=1001) - - self.assertEqual(self.mock_connect.return_value.conversations_list.call_count, 2) - self.mock_connect.return_value.conversations_list.assert_any_call() - self.mock_connect.return_value.conversations_list.assert_any_call(cursor="dGVhbTpDMDYxRkE1UEI=") - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_list_2() - pd.testing.assert_frame_equal(response, expected_df) - - -class TestSlackMessagesTable(SlackAPIResourceTestSetup, unittest.TestCase): - def create_resource(self): - return SlackMessagesTable(self.handler) - - def _get_expected_df_for_conv_history_1(self): - """ - Returns the expected DataFrame for a single call to the `conversations_history` method. - """ - mock_response_conv_history = deepcopy(MOCK_RESPONSE_CONV_HISTORY_1) - - expected_df_conv_history = pd.DataFrame( - mock_response_conv_history["messages"], columns=self.resource.get_columns() - ) - expected_df_conv_history["created_at"] = pd.to_datetime( - expected_df_conv_history["ts"].astype(float), unit="s" - ).dt.strftime("%Y-%m-%d %H:%M:%S") - - expected_df_conv_history["channel_name"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["name"] - expected_df_conv_history["channel_id"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - - return expected_df_conv_history - - def _get_expected_df_for_conv_history_2(self): - """ - Returns the expected DataFrame for multiple(2) calls to the `conversations_history` method. - """ - mock_response_conv_history_2 = deepcopy(MOCK_RESPONSE_CONV_HISTORY_2) - - expected_df_conv_history_1 = self._get_expected_df_for_conv_history_1() - expected_df_conv_history_2 = pd.DataFrame( - mock_response_conv_history_2["messages"], columns=self.resource.get_columns() - ) - expected_df_conv_history_2["created_at"] = pd.to_datetime( - expected_df_conv_history_2["ts"].astype(float), unit="s" - ).dt.strftime("%Y-%m-%d %H:%M:%S") - - expected_df_conv_history_2["channel_name"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["name"] - expected_df_conv_history_2["channel_id"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - - return pd.concat([expected_df_conv_history_1, expected_df_conv_history_2], ignore_index=True) - - def test_list_with_channel_id_and_no_limit(self): - """ - Tests the `list` method of the SlackMessagesTable class to ensure it correctly fetches the messages of a specific conversation without any limit. - """ - self.mock_connect.return_value.conversations_history.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_HISTORY_1) - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - response = self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ) - ] - ) - - self.assertEqual(self.mock_connect.return_value.conversations_history.call_count, 1) - self.mock_connect.return_value.conversations_history.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], limit=999 - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_history_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_channel_id_and_limit_less_than_999(self): - """ - Tests the `list` method of the SlackMessagesTable class to ensure it correctly fetches the messages of a specific conversation with a limit less than 999. - """ - self.mock_connect.return_value.conversations_history.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_HISTORY_1) - - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - response = self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ) - ], - limit=998, - ) - - self.assertEqual(self.mock_connect.return_value.conversations_history.call_count, 1) - self.mock_connect.return_value.conversations_history.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], limit=998 - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_history_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_channel_id_and_limit_more_than_999(self): - """ - Tests the `list` method of the SlackMessagesTable class to ensure it correctly fetches the messages of a specific conversation with a limit more than 999. - """ - self.mock_connect.return_value.conversations_history.side_effect = [ - deepcopy(MOCK_SLACK_RESPONSE_CONV_HISTORY_1), - deepcopy(MOCK_SLACK_RESPONSE_CONV_HISTORY_2), - ] - - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - response = self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ) - ], - limit=1000, - ) - - self.assertEqual(self.mock_connect.return_value.conversations_history.call_count, 2) - self.mock_connect.return_value.conversations_history.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], limit=999 - ) - self.mock_connect.return_value.conversations_history.assert_any_call( - cursor=MOCK_RESPONSE_CONV_HISTORY_1["response_metadata"]["next_cursor"], - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], - limit=999, - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_history_2() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_without_channel_id(self): - """ - Tests the `list` method raises a ValueError when the `channel_id` column is not included in the conditions. - """ - with self.assertRaises(ValueError): - self.resource.list(conditions=[]) - - def test_list_with_channel_id_and_unsupported_operator(self): - """ - Tests the `list` method raises a ValueError when an unsupported operator is used with the `channel_id` column. - """ - with self.assertRaises(ValueError): - self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.IN, value=[MOCK_RESPONSE_CONV_INFO_1["channel"]["id"]] - ) - ] - ) - - def test_insert_with_channel_id_and_text(self): - """ - Tests the `insert` method of the SlackMessagesTable class to ensure it correctly sends a message to a specific conversation. - """ - self.mock_connect.return_value.chat_postMessage.return_value = MagicMock() - - self.resource.insert( - query=Insert(table="messages", columns=["channel_id", "text"], values=[["C012AB3CD", "Hello, World!"]]) - ) - - self.assertEqual(self.mock_connect.return_value.chat_postMessage.call_count, 1) - self.mock_connect.return_value.chat_postMessage.assert_any_call(channel="C012AB3CD", text="Hello, World!") - - def test_insert_without_channel_id(self): - """ - Tests the `insert` method raises a ValueError when the `channel_id` column is not included in the columns. - """ - with self.assertRaises(ValueError): - self.resource.insert(query=Insert(table="messages", columns=["text"], values=[["Hello, World!"]])) - - def test_insert_without_text(self): - """ - Tests the `insert` method raises a ValueError when the `text` column is not included in the columns. - """ - with self.assertRaises(ValueError): - self.resource.insert(query=Insert(table="messages", columns=["channel_id"], values=[["C012AB3CD"]])) - - def test_update_with_channel_id_text_and_ts(self): - """ - Tests the `update` method of the SlackMessagesTable class to ensure it correctly updates a message in a specific conversation. - """ - self.mock_connect.return_value.chat_update.return_value = MagicMock() - - self.resource.update( - query=Update( - table="messages", - update_columns={"text": "Hello, World!"}, - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("channel_id"), Constant("C012AB3CD")]), - BinaryOperation(op="=", args=[Identifier("ts"), Constant("1512085950.000216")]), - ], - ), - ) - ) - - self.assertEqual(self.mock_connect.return_value.chat_update.call_count, 1) - self.mock_connect.return_value.chat_update.assert_any_call( - channel="C012AB3CD", text="Hello, World!", ts="1512085950.000216" - ) - - def test_update_without_channel_id(self): - """ - Tests the `update` method raises a ValueError when the `channel_id` column is not included in the conditions. - """ - with self.assertRaises(ValueError): - self.resource.update( - query=Update( - table="messages", - update_columns={"text": "Hello, World!"}, - where=BinaryOperation(op="=", args=[Identifier("ts"), Constant("1512085950.000216")]), - ) - ) - - def test_update_without_ts(self): - """ - Tests the `update` method raises a ValueError when the `ts` column is not included in the conditions. - """ - with self.assertRaises(ValueError): - self.resource.update( - query=Update( - table="messages", - update_columns={"text": "Hello, World!"}, - where=BinaryOperation(op="=", args=[Identifier("channel_id"), Constant("C012AB3CD")]), - ) - ) - - def test_update_without_text(self): - """ - Tests the `update` method raises a ValueError when the `text` column is not included in the update_columns. - """ - with self.assertRaises(ValueError): - self.resource.update( - query=Update( - table="messages", - update_columns={}, - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("channel_id"), Constant("C012AB3CD")]), - BinaryOperation(op="=", args=[Identifier("ts"), Constant("1512085950.000216")]), - ], - ), - ) - ) - - def test_delete_with_channel_id_and_ts(self): - """ - Tests the `delete` method of the SlackMessagesTable class to ensure it correctly deletes a message in a specific conversation. - """ - self.mock_connect.return_value.chat_delete.return_value = MagicMock() - - self.resource.delete( - query=Delete( - table="messages", - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("channel_id"), Constant("C012AB3CD")]), - BinaryOperation(op="=", args=[Identifier("ts"), Constant("1512085950.000216")]), - ], - ), - ) - ) - - self.assertEqual(self.mock_connect.return_value.chat_delete.call_count, 1) - self.mock_connect.return_value.chat_delete.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], ts=float("1512085950.000216") - ) - - def test_delete_without_channel_id(self): - """ - Tests the `delete` method raises a ValueError when the `channel_id` column is not included in the conditions. - """ - with self.assertRaises(ValueError): - self.resource.delete( - query=Delete( - table="messages", - where=BinaryOperation(op="=", args=[Identifier("ts"), Constant("1512085950.000216")]), - ) - ) - - def test_delete_without_ts(self): - """ - Tests the `delete` method raises a ValueError when the `ts` column is not included in the conditions. - """ - with self.assertRaises(ValueError): - self.resource.delete( - query=Delete( - table="messages", - where=BinaryOperation(op="=", args=[Identifier("channel_id"), Constant("C012AB3CD")]), - ) - ) - - -class TestSlackThreadsTable(SlackAPIResourceTestSetup, unittest.TestCase): - def create_resource(self): - return SlackThreadsTable(self.handler) - - def _get_expected_df_for_conv_replies_1(self): - """ - Returns the expected DataFrame for a single call to the `conversations_replies` method. - """ - mock_response_conv_replies = deepcopy(MOCK_RESPONSE_CONV_REPLIES_1) - - expected_df_conv_replies = pd.DataFrame( - mock_response_conv_replies["messages"], columns=self.resource.get_columns() - ) - - expected_df_conv_replies["channel_name"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["name"] - expected_df_conv_replies["channel_id"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - - return expected_df_conv_replies - - def _get_expected_df_for_conv_replies_2(self): - """ - Returns the expected DataFrame for multiple(2) calls to the `conversations_replies` method. - """ - mock_response_conv_replies_2 = deepcopy(MOCK_RESPONSE_CONV_REPLIES_2) - - expected_df_conv_replies_1 = self._get_expected_df_for_conv_replies_1() - expected_df_conv_replies_2 = pd.DataFrame( - mock_response_conv_replies_2["messages"], columns=self.resource.get_columns() - ) - - expected_df_conv_replies_2["channel_name"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["name"] - expected_df_conv_replies_2["channel_id"] = MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - - return pd.concat([expected_df_conv_replies_1, expected_df_conv_replies_2], ignore_index=True) - - def test_list_with_channel_id_thread_ts_and_no_limit(self): - """ - Tests the `list` method of the SlackThreadsTable class to ensure it correctly fetches the replies of a specific thread without any limit. - """ - self.mock_connect.return_value.conversations_replies.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_REPLIES_1) - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - response = self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ), - FilterCondition(column="thread_ts", op=FilterOperator.EQUAL, value="1482960137.003543"), - ] - ) - - self.assertEqual(self.mock_connect.return_value.conversations_replies.call_count, 1) - self.mock_connect.return_value.conversations_replies.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], ts="1482960137.003543", limit=1000 - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_replies_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_channel_id_thread_ts_and_limit_less_than_1000(self): - """ - Tests the `list` method of the SlackThreadsTable class to ensure it correctly fetches the replies of a specific thread with a limit less than 1000. - """ - self.mock_connect.return_value.conversations_replies.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_REPLIES_1) - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - response = self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ), - FilterCondition(column="thread_ts", op=FilterOperator.EQUAL, value="1482960137.003543"), - ], - limit=999, - ) - - self.assertEqual(self.mock_connect.return_value.conversations_replies.call_count, 1) - self.mock_connect.return_value.conversations_replies.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], ts="1482960137.003543", limit=999 - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_replies_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_channel_id_thread_ts_and_limit_more_than_1000(self): - """ - Tests the `list` method of the SlackThreadsTable class to ensure it correctly fetches the replies of a specific thread with a limit more than 1000. - """ - self.mock_connect.return_value.conversations_replies.side_effect = [ - deepcopy(MOCK_SLACK_RESPONSE_CONV_REPLIES_1), - deepcopy(MOCK_SLACK_RESPONSE_CONV_REPLIES_2), - ] - - self.mock_connect.return_value.conversations_info.return_value = deepcopy(MOCK_SLACK_RESPONSE_CONV_INFO_1) - - response = self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ), - FilterCondition(column="thread_ts", op=FilterOperator.EQUAL, value="1482960137.003543"), - ], - limit=1001, - ) - - self.assertEqual(self.mock_connect.return_value.conversations_replies.call_count, 2) - self.mock_connect.return_value.conversations_replies.assert_any_call( - channel=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"], ts="1482960137.003543" - ) - self.mock_connect.return_value.conversations_replies.assert_any_call( - cursor=MOCK_RESPONSE_CONV_REPLIES_1["response_metadata"]["next_cursor"] - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_conv_replies_2() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_without_channel_id(self): - """ - Tests the `list` method raises a ValueError when the `channel_id` column is not included in the conditions. - """ - with self.assertRaises(ValueError): - self.resource.list( - conditions=[FilterCondition(column="thread_ts", op=FilterOperator.EQUAL, value="1482960137.003543")] - ) - - def test_list_with_channel_id_and_unsupported_operator(self): - """ - Tests the `list` method raises a ValueError when an unsupported operator is used with the `channel_id` column. - """ - with self.assertRaises(ValueError): - self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.IN, value=[MOCK_RESPONSE_CONV_INFO_1["channel"]["id"]] - ) - ] - ) - - def test_list_without_thread_ts(self): - """ - Tests the `list` method raises a ValueError when the `thread_ts` column is not included in the conditions. - """ - with self.assertRaises(ValueError): - self.resource.list( - conditions=[ - FilterCondition( - column="channel_id", op=FilterOperator.EQUAL, value=MOCK_RESPONSE_CONV_INFO_1["channel"]["id"] - ) - ] - ) - - def test_insert_with_channel_id_thread_ts_and_text(self): - """ - Tests the `insert` method of the SlackThreadsTable class to ensure it correctly sends a reply to a specific thread. - """ - self.mock_connect.return_value.chat_postMessage.return_value = MagicMock() - - self.resource.insert( - query=Insert( - table="threads", - columns=["channel_id", "thread_ts", "text"], - values=[["C012AB3CD", "1482960137.003543", "Hello, World!"]], - ) - ) - - self.assertEqual(self.mock_connect.return_value.chat_postMessage.call_count, 1) - self.mock_connect.return_value.chat_postMessage.assert_any_call( - channel="C012AB3CD", thread_ts="1482960137.003543", text="Hello, World!" - ) - - def test_insert_without_channel_id(self): - """ - Tests the `insert` method raises a ValueError when the `channel_id` column is not included in the columns. - """ - with self.assertRaises(ValueError): - self.resource.insert( - query=Insert( - table="threads", columns=["thread_ts", "text"], values=[["1482960137.003543", "Hello, World!"]] - ) - ) - - def test_insert_without_thread_ts(self): - """ - Tests the `insert` method raises a ValueError when the `thread_ts` column is not included in the columns. - """ - with self.assertRaises(ValueError): - self.resource.insert( - query=Insert(table="threads", columns=["channel_id", "text"], values=[["C012AB3CD", "Hello, World!"]]) - ) - - def test_insert_without_text(self): - """ - Tests the `insert` method raises a ValueError when the `text` column is not included in the columns. - """ - with self.assertRaises(ValueError): - self.resource.insert( - query=Insert( - table="threads", columns=["channel_id", "thread_ts"], values=[["C012AB3CD", "1482960137.003543"]] - ) - ) - - -class TestSlackUsersTable(SlackAPIResourceTestSetup, unittest.TestCase): - def create_resource(self): - return SlackUsersTable(self.handler) - - def _get_expected_df_for_users_list_1(self): - """ - Returns the expected DataFrame for a single call to the `users_list` method. - """ - mock_response_users_list = deepcopy(MOCK_RESPONSE_USERS_LIST_1) - - expected_df_users_list = pd.DataFrame(mock_response_users_list["members"], columns=self.resource.get_columns()) - - return expected_df_users_list - - def _get_expected_df_for_users_list_2(self): - """ - Returns the expected DataFrame for multiple(2) calls to the `users_list` method. - """ - mock_response_users_list_2 = deepcopy(MOCK_RESPONSE_USERS_LIST_2) - - expected_df_users_list_1 = self._get_expected_df_for_users_list_1() - expected_df_users_list_2 = pd.DataFrame( - mock_response_users_list_2["members"], columns=self.resource.get_columns() - ) - - return pd.concat([expected_df_users_list_1, expected_df_users_list_2], ignore_index=True) - - def test_list_with_no_limit(self): - """ - Tests the `list` method of the SlackUsersTable class to ensure it correctly fetches the details of all users without any limit. - """ - self.mock_connect.return_value.users_list.return_value = deepcopy(MOCK_SLACK_RESPONSE_USERS_LIST_1) - - response = self.resource.list() - - self.assertEqual(self.mock_connect.return_value.users_list.call_count, 1) - self.mock_connect.return_value.users_list.assert_any_call(limit=1000) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_users_list_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_limit_less_than_1000(self): - """ - Tests the `list` method of the SlackUsersTable class to ensure it correctly fetches the details of all users with a limit less than 1000. - """ - self.mock_connect.return_value.users_list.return_value = deepcopy(MOCK_SLACK_RESPONSE_USERS_LIST_1) - - response = self.resource.list(limit=999) - - self.assertEqual(self.mock_connect.return_value.users_list.call_count, 1) - self.mock_connect.return_value.users_list.assert_any_call(limit=999) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_users_list_1() - pd.testing.assert_frame_equal(response, expected_df) - - def test_list_with_limit_more_than_1000(self): - """ - Tests the `list` method of the SlackUsersTable class to ensure it correctly fetches the details of all users with a limit more than 1000. - """ - self.mock_connect.return_value.users_list.side_effect = [ - deepcopy(MOCK_SLACK_RESPONSE_USERS_LIST_1), - deepcopy(MOCK_SLACK_RESPONSE_USERS_LIST_2), - ] - - response = self.resource.list(limit=1001) - - self.assertEqual(self.mock_connect.return_value.users_list.call_count, 2) - self.mock_connect.return_value.users_list.assert_any_call() - self.mock_connect.return_value.users_list.assert_any_call( - cursor=MOCK_RESPONSE_USERS_LIST_1["response_metadata"]["next_cursor"] - ) - - assert isinstance(response, pd.DataFrame) - expected_df = self._get_expected_df_for_users_list_2() - pd.testing.assert_frame_equal(response, expected_df) - - -class TestSlackConnectionArgs(unittest.TestCase): - def test_connection_args_metadata(self): - self.assertIn("token", connection_args) - self.assertTrue(connection_args["token"]["required"]) - self.assertEqual(connection_args["token"]["label"], "Token") - self.assertFalse(connection_args["app_token"]["required"]) - self.assertTrue(connection_args["app_token"]["secret"]) - - def test_connection_args_example(self): - self.assertIn("token", connection_args_example) - self.assertTrue(connection_args_example["token"].startswith("xapp-")) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/data/test.csv b/tests/unit/handlers/data/test.csv deleted file mode 100644 index 832bab243fd..00000000000 --- a/tests/unit/handlers/data/test.csv +++ /dev/null @@ -1,4 +0,0 @@ -col_one,col_two,col_three,col_four -1,-1,0.1,A -2,-2,0.2,B -3,-3,0.3,C diff --git a/tests/unit/handlers/data/test.json b/tests/unit/handlers/data/test.json deleted file mode 100644 index c6ab737bf17..00000000000 --- a/tests/unit/handlers/data/test.json +++ /dev/null @@ -1 +0,0 @@ -[{"col_one":1,"col_two":-1,"col_three":0.1,"col_four":"A"},{"col_one":2,"col_two":-2,"col_three":0.2,"col_four":"B"},{"col_one":3,"col_two":-3,"col_three":0.3,"col_four":"C"}] diff --git a/tests/unit/handlers/data/test.parquet b/tests/unit/handlers/data/test.parquet deleted file mode 100644 index 59fefdff87d..00000000000 Binary files a/tests/unit/handlers/data/test.parquet and /dev/null differ diff --git a/tests/unit/handlers/data/test.pdf b/tests/unit/handlers/data/test.pdf deleted file mode 100644 index a9c693722ce..00000000000 Binary files a/tests/unit/handlers/data/test.pdf and /dev/null differ diff --git a/tests/unit/handlers/data/test.txt b/tests/unit/handlers/data/test.txt deleted file mode 100644 index fea1ada39cb..00000000000 --- a/tests/unit/handlers/data/test.txt +++ /dev/null @@ -1,10 +0,0 @@ -Overview -Introduction to MindsDB -MindsDB is an AI Automation platform for building AI/ML powered features and applications. It works by connecting any source of data with any AI/ML model or framework and automating how real-time data flows between them. - -MindsDB allows you to easily: - -Connect to any store of data or end-user application. -Pass data to an AI model from any store of data or end-user application. -Plug the output of an AI model into any store of data or end-user application. -Fully automate these workflows to build AI-powered features and applications. diff --git a/tests/unit/handlers/data/test.xlsx b/tests/unit/handlers/data/test.xlsx deleted file mode 100644 index 811ddd0d18e..00000000000 Binary files a/tests/unit/handlers/data/test.xlsx and /dev/null differ diff --git a/tests/unit/handlers/test_bigquery.py b/tests/unit/handlers/test_bigquery.py deleted file mode 100644 index 448af57d609..00000000000 --- a/tests/unit/handlers/test_bigquery.py +++ /dev/null @@ -1,241 +0,0 @@ -import unittest -import pytest -import pandas as pd -from collections import OrderedDict -from unittest.mock import patch, MagicMock -from google.api_core.exceptions import BadRequest - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, - TableResponse, - ErrorResponse, -) - -try: - from mindsdb.integrations.handlers.bigquery_handler.bigquery_handler import BigQueryHandler -except ImportError: - pytestmark = pytest.mark.skip("Bigquery handler not installed") - - -class TestBigQueryHandler(unittest.TestCase): - dummy_connection_data = OrderedDict( - project_id="tough-future-332513", - dataset="example_ds", - service_account_keys="example_keys", - ) - - def setUp(self): - self.patcher_get_oauth2_credentials = patch( - "mindsdb.integrations.utilities.handlers.auth_utilities.google.GoogleServiceAccountOAuth2Manager.get_oauth2_credentials" - ) - self.patcher_client = patch("mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.Client") - self.mock_get_oauth2_credentials = self.patcher_get_oauth2_credentials.start() - self.mock_connect = self.patcher_client.start() - self.handler = BigQueryHandler("bigquery", connection_data=self.dummy_connection_data) - - def tearDown(self): - self.patcher_get_oauth2_credentials.stop() - self.patcher_client.stop() - - def test_connect_success(self): - """ - Test if `connect` method successfully establishes a connection and sets `is_connected` flag to True. - Also, verifies that google.cloud.bigquery.Client is called exactly once. - """ - self.mock_connect.return_value = MagicMock() - connection = self.handler.connect() - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once() - - def test_connect_failure(self): - """ - Ensures that the connect method correctly handles a connection failure by raising a google.api_core.exceptions.BadRequest and sets is_connected to False. - """ - self.mock_connect.side_effect = BadRequest("Connection Failed") - - with self.assertRaises(BadRequest): - self.handler.connect() - self.assertFalse(self.handler.is_connected) - - def test_check_connection(self): - """ - Verifies that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status. - """ - self.mock_connect.return_value = MagicMock() - connected = self.handler.check_connection() - self.assertTrue(connected) - assert isinstance(connected, StatusResponse) - self.assertFalse(connected.error_message) - - def test_native_query(self): - """ - Tests the `native_query` method to ensure it executes a SQL query using the mock query object and returns a Response object. - """ - mock_conn = MagicMock() - self.handler.connect = MagicMock(return_value=mock_conn) - - mock_query = MagicMock() - mock_query.to_dataframe.return_value = pd.DataFrame({"col": [1, 2, 3]}) - mock_conn.query.return_value = mock_query - - query_str = "SELECT * FROM table" - - with patch( - "mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.QueryJobConfig" - ) as mock_query_job_config: - mock_query_job_config_instance = mock_query_job_config.return_value - data = self.handler.native_query(query_str) - mock_conn.query.assert_called_once_with(query_str, job_config=mock_query_job_config_instance) - assert isinstance(data, TableResponse) - - def test_native_query_empty_select_returns_table(self): - mock_conn = MagicMock() - self.handler.connect = MagicMock(return_value=mock_conn) - - mock_query = MagicMock() - mock_query.to_dataframe.return_value = pd.DataFrame(columns=["id"]) - mock_conn.query.return_value = mock_query - - with patch("mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.QueryJobConfig"): - response = self.handler.native_query("SELECT id FROM table WHERE 1 = 0") - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertEqual(list(response.data_frame.columns), ["id"]) - self.assertTrue(response.data_frame.empty) - - def test_native_query_empty_dataframe_without_columns_returns_ok(self): - mock_conn = MagicMock() - self.handler.connect = MagicMock(return_value=mock_conn) - - mock_query = MagicMock() - mock_query.to_dataframe.return_value = pd.DataFrame() - mock_conn.query.return_value = mock_query - - with patch("mindsdb.integrations.handlers.bigquery_handler.bigquery_handler.QueryJobConfig"): - response = self.handler.native_query("UPDATE table SET col = 1") - - self.assertEqual(response.type, RESPONSE_TYPE.OK) - - def test_get_tables(self): - """ - Checks if the `get_tables` method correctly constructs the SQL query and if it calls `native_query` with the correct query. - """ - self.handler.native_query = MagicMock() - - self.handler.get_tables() - - expected_query = f""" - SELECT table_name, table_schema, table_type - FROM `{self.dummy_connection_data["project_id"]}.{self.dummy_connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` - WHERE table_type IN ('BASE TABLE', 'VIEW') - """ - - self.handler.native_query.assert_called_once_with(expected_query) - - def test_get_columns(self): - """ - Checks if the `get_columns` method correctly constructs the SQL query and if it calls `native_query` with the correct query. - """ - self.handler.native_query = MagicMock() - - table_name = "mock_table" - self.handler.get_columns(table_name) - - expected_query = f""" - SELECT column_name AS Field, data_type as Type - FROM `{self.dummy_connection_data["project_id"]}.{self.dummy_connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS` - WHERE table_name = '{table_name}' - """ - - self.handler.native_query.assert_called_once_with(expected_query) - - def test_meta_get_tables_filters(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - - self.handler.meta_get_tables(table_names=["orders"]) - - query = self.handler.native_query.call_args[0][0] - self.assertIn("AND t.table_name IN ('orders')", query) - - def test_meta_get_columns_filters(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - - self.handler.meta_get_columns(table_names=["orders"]) - - query = self.handler.native_query.call_args[0][0] - self.assertIn("WHERE table_name IN ('orders')", query) - - def test_meta_get_column_statistics_batches_results(self): - columns = [f"col_{i}" for i in range(22)] - - # First response: column types query (required by meta_get_column_statistics_for_table) - column_types_result = pd.DataFrame( - { - "column_name": columns, - "data_type": ["INT64"] * 22, # All columns are INT64 type - } - ) - - # Second response: first batch statistics (20 columns) - first_batch_result = pd.DataFrame( - { - "table_name": ["table"] * 20, - "column_name": [f"col_{i}" for i in range(20)], - "null_percentage": [0.0] * 20, - "minimum_value": ["1"] * 20, - "maximum_value": ["10"] * 20, - "distinct_values_count": [10] * 20, - } - ) - - # Third response: second batch statistics (2 columns) - second_batch_result = pd.DataFrame( - { - "table_name": ["table"] * 2, - "column_name": ["col_20", "col_21"], - "null_percentage": [0.0, 50.0], - "minimum_value": ["1", "a"], - "maximum_value": ["10", "z"], - "distinct_values_count": [10, 20], - } - ) - - self.handler.native_query = MagicMock( - side_effect=[ - TableResponse(data=column_types_result), - TableResponse(data=first_batch_result), - TableResponse(data=second_batch_result), - ] - ) - - response = self.handler.meta_get_column_statistics_for_table("table", columns) - - self.assertEqual(response.resp_type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(response.data_frame), 22) # Total of 20 + 2 = 22 columns - self.assertEqual(self.handler.native_query.call_count, 3) # 1 for column types + 2 for batches - - def test_meta_get_column_statistics_returns_error_when_empty(self): - self.handler.native_query = MagicMock(return_value=ErrorResponse(error_message="boom")) - - response = self.handler.meta_get_column_statistics_for_table("table", ["col"]) - self.assertEqual(response.resp_type, RESPONSE_TYPE.ERROR) - - def test_meta_get_primary_keys_filters(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - self.handler.meta_get_primary_keys(table_names=["orders"]) - - query = self.handler.native_query.call_args[0][0] - self.assertIn("AND tc.table_name IN ('orders')", query) - self.assertNotIn("tc.constraint_name,", query) - - def test_meta_get_foreign_keys_filters(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - self.handler.meta_get_foreign_keys(table_names=["orders"]) - query = self.handler.native_query.call_args[0][0] - self.assertIn("AND tc.table_name IN ('orders')", query) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_file.py b/tests/unit/handlers/test_file.py deleted file mode 100644 index 7c54c8cbbc7..00000000000 --- a/tests/unit/handlers/test_file.py +++ /dev/null @@ -1,430 +0,0 @@ -import os -import shutil -import tempfile -from io import BytesIO, StringIO -from pathlib import Path - -import pandas -import pytest -from mindsdb_sql_parser.exceptions import ParsingException -from mindsdb_sql_parser.ast import ( - CreateTable, - DropTables, - Identifier, - Insert, - TableColumn, - Update, -) - -from mindsdb.integrations.handlers.file_handler.file_handler import FileHandler -from mindsdb.integrations.libs.response import RESPONSE_TYPE, INF_SCHEMA_COLUMNS_NAMES_SET, INF_SCHEMA_COLUMNS_NAMES -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE -from mindsdb.integrations.utilities.files.file_reader import ( - FileReader, - FileProcessingError, -) - - -# Define a table to use as content for all of the file types -# This data needs to match that saved in the files in the ./data/ dir (except pdf and txt files) -test_file_content = [ - ["col_one", "col_two", "col_three", "col_four"], - [1, -1, 0.1, "A"], - [2, -2, 0.2, "B"], - [3, -3, 0.3, "C"], -] - -test_excel_sheet_content = [ - ["Sheet_Name"], - ["Sheet1"], -] - -file_records = [("one", 1, test_file_content[0]), ("two", 2, test_file_content[0])] - - -class MockFileController: - """ - Pretends to be a file controller. Gives details of 'file_records' above, and mocks file deletion. - We're not testing the file controller here, so we don't need to rely on it. - """ - - def get_files(self): - return [ - { - "name": record[0], - "row_count": record[1], - "columns": record[2], - } - for record in file_records - ] - - def get_files_names(self): - return [file["name"] for file in self.get_files()] - - def get_file_path(self, name): - return True - - def get_file_meta(self, *args, **kwargs): - return self.get_files()[0] - - def delete_file(self, name): - return True - - def save_file(self, name, file_path, file_name=None): - return True - - def get_file_data(self, name, page_name=None): - return pandas.DataFrame(test_file_content[1:], columns=test_file_content[0]) - - def set_file_data(self, name, df, page_name=None): - return True - - -def curr_dir(): - return os.path.dirname(os.path.realpath(__file__)) - - -def csv_file() -> str: - return os.path.join(curr_dir(), "data", "test.csv") - - -def xlsx_file() -> str: - return os.path.join(curr_dir(), "data", "test.xlsx") - - -def json_file() -> str: - return os.path.join(curr_dir(), "data", "test.json") - - -def parquet_file() -> str: - return os.path.join(curr_dir(), "data", "test.parquet") - - -def pdf_file() -> str: - return os.path.join(curr_dir(), "data", "test.pdf") - - -def txt_file() -> str: - return os.path.join(curr_dir(), "data", "test.txt") - - -class TestIsItX: - """Tests all of the 'is_it_x()' functions to determine a file's type""" - - def test_is_it_csv(self): - # We can't test xlsx or parquet here because they're binary files - for file_path, result in ((csv_file(), True), (json_file(), False)): - with open(file_path, "r") as fh: - assert FileReader.is_csv(StringIO(fh.read())) is result - - def test_format(self): - for file_path, result in ( - (csv_file(), "csv"), - (xlsx_file(), "xlsx"), - (json_file(), "json"), - (parquet_file(), "parquet"), - (txt_file(), "txt"), - (pdf_file(), "pdf"), - ): - assert FileReader(path=file_path).get_format() == result - - def test_is_it_json(self): - # We can't test xlsx or parquet here because they're binary files - for file_path, result in ( - (csv_file(), False), - (json_file(), True), - (txt_file(), False), - ): - with open(file_path, "r") as fh: - assert FileReader.is_json(StringIO(fh.read())) is result - - def test_is_it_parquet(self): - for file_path, result in ( - (csv_file(), False), - (xlsx_file(), False), - (json_file(), False), - (parquet_file(), True), - (txt_file(), False), - (pdf_file(), False), - ): - with open(file_path, "rb") as fh: - assert FileReader.is_parquet(BytesIO(fh.read())) is result - - -class TestQuery: - """Tests all of the scenarios relating to the query() function""" - - def test_query_drop(self, monkeypatch): - """Test a valid drop table query""" - - def mock_delete(self, name): - return True - - monkeypatch.setattr(MockFileController, "delete_file", mock_delete) - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query(DropTables([Identifier(parts=["one"])])) - - assert response.type == RESPONSE_TYPE.OK - - def test_query_drop_bad_delete(self, monkeypatch): - """Test an invalid drop table query""" - - def mock_delete(self, name): - raise Exception("File delete error") - - monkeypatch.setattr(MockFileController, "delete_file", mock_delete) - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query(DropTables([Identifier(parts=["one"])])) - - assert response.type == RESPONSE_TYPE.ERROR - - def test_query_insert(self, monkeypatch): - """Test an invalid insert query""" - # Create a temporary file to save the csv file to. - csv_file_path = csv_file() - csv_tmp = os.path.join(tempfile.gettempdir(), "test.csv") - if os.path.exists(csv_tmp): - os.remove(csv_tmp) - shutil.copy(csv_file_path, csv_tmp) - - def mock_get_file_path(self, name): - return csv_tmp - - monkeypatch.setattr(MockFileController, "get_file_path", mock_get_file_path) - - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query( - Insert( - table=Identifier(parts=["someTable"]), - columns=[ - "col_one", - "col_two", - "col_three", - "col_four", - ], - values=[ - [1, -1, 0.1, "A"], - [2, -2, 0.2, "B"], - [3, -3, 0.3, "C"], - ], - ) - ) - - assert response.type == RESPONSE_TYPE.OK - - def test_query_create(self): - """Test a valid create table query""" - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query( - CreateTable( - name=Identifier(parts=["someTable"]), - columns=[TableColumn(name="col1"), TableColumn(name="col2")], - ) - ) - - assert response.type == RESPONSE_TYPE.OK - - def test_query_create_or_replace(self): - """Test a valid create or replace table query""" - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query( - CreateTable( - name=Identifier(parts=["someTable"]), - columns=[TableColumn(name="col1"), TableColumn(name="col2")], - is_replace=True, - ) - ) - - assert response.type == RESPONSE_TYPE.OK - - def test_query_bad_type(self): - """Test an invalid query type for files""" - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query(Update([Identifier(parts=["someTable"])])) - - assert response.type == RESPONSE_TYPE.ERROR - - def test_native_query(self, monkeypatch): - """Test a valid native table query""" - - def mock_delete(self, name): - return True - - monkeypatch.setattr(MockFileController, "delete_file", mock_delete) - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.native_query("DROP TABLE one") - - assert response.type == RESPONSE_TYPE.OK - - def test_invalid_native_query(self): - file_handler = FileHandler(file_controller=MockFileController()) - with pytest.raises(ParsingException): - file_handler.native_query("INVALID QUERY") - - def test_query_drop_other_database(self): - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query(DropTables([Identifier(parts=["otherdb", "table1"])])) - assert response.type == RESPONSE_TYPE.ERROR - assert "database" in response.error_message.lower() - - def test_query_create_existing_without_replace(self): - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query( - CreateTable( - name=Identifier(parts=["one"]), - columns=[TableColumn(name="col1")], - is_replace=False, - ) - ) - assert response.type == RESPONSE_TYPE.ERROR - assert "already exists" in response.error_message.lower() - - def test_query_create_invalid_namespace(self): - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.query( - CreateTable( - name=Identifier(parts=["files", "nested", "tbl"]), - columns=[TableColumn(name="col1")], - ) - ) - assert response.type == RESPONSE_TYPE.ERROR - assert "namespace" in response.error_message.lower() - - def test_query_select_missing_table(self, monkeypatch): - def missing_file(self, name, page_name=None): - raise FileNotFoundError(f"{name} not found") - - monkeypatch.setattr(MockFileController, "get_file_data", missing_file) - file_handler = FileHandler(file_controller=MockFileController()) - with pytest.raises(RuntimeError): - file_handler.query(file_handler.parser("select * from files.missing_table")) - - -def test_handle_source(): - def get_reader(file_path): - # using path - reader = FileReader(path=file_path) - yield reader - - # using file descriptor - with open(file_path, "rb") as fd: - reader = FileReader(file=fd) - yield reader - fd.seek(0) - content = fd.read() - - # using bytesio - fd = BytesIO(content) - reader = FileReader(file=fd, name=Path(file_path).name) - yield reader - - for file_path, expected_columns in ( - (csv_file(), test_file_content[0]), - (xlsx_file(), test_file_content[0]), - (json_file(), test_file_content[0]), - (parquet_file(), test_file_content[0]), - (pdf_file(), ["content", "metadata"]), - (txt_file(), ["content", "metadata"]), - ): - # using different methods to create reader - for reader in get_reader(file_path): - df = reader.get_page_content() - assert isinstance(df, pandas.DataFrame) - - assert df.columns.tolist() == expected_columns - - # The pdf and txt files have some different content - if reader.get_format() not in ("pdf", "txt"): - assert len(df) == len(test_file_content) - 1 - assert df.values.tolist() == test_file_content[1:] - - -@pytest.mark.parametrize( - "csv_string,delimiter", - [ - (StringIO("example,csv,file"), ","), - (StringIO("example;csv;file"), ";"), - (StringIO("example\tcsv\tfile"), "\t"), - ], -) -def test_check_valid_dialects(csv_string, delimiter): - dialect = FileReader._get_csv_dialect(csv_string) - assert dialect.delimiter == delimiter - - -def test_tsv(): - file = BytesIO(b"example;csv;file\tname") - - reader = FileReader(file=file, name="test.tsv") - assert reader.get_format() == "csv" - assert reader.parameters["delimiter"] == "\t" - - df = reader.get_page_content() - assert len(df.columns) == 2 - - -def test_bad_csv_header(): - file = BytesIO(b" a,b ,c\n1,2,3\n") - reader = FileReader(file=file, name="test.tsv") - df = reader.get_page_content() - assert set(df.columns) == set(["a", "b", "c"]) - - wrong_data = [ - b"a, ,c\n1,2,3\n", - b"a, \t,c\n1,2,3\n", - b" ,b,c\n1,2,3\n", - ] - for data in wrong_data: - reader = FileReader(file=BytesIO(data), name="test.tsv") - with pytest.raises(FileProcessingError): - df = reader.get_page_content() - - -def test_check_invalid_dialects(): - with pytest.raises(Exception): - FileHandler._get_csv_dialect("example csv file") - with pytest.raises(Exception): - FileHandler._get_csv_dialect("example\ncsv\nfile") - with pytest.raises(Exception): - FileHandler._get_csv_dialect("example|csv|file") - - -def test_get_tables(): - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.get_tables() - - assert response.type == RESPONSE_TYPE.TABLE - - expected_df = pandas.DataFrame( - [{"TABLE_NAME": x[0], "TABLE_ROWS": x[1], "TABLE_TYPE": "BASE TABLE"} for x in file_records] - ) - - assert response.data_frame.equals(expected_df) - - -def test_get_columns(): - file_handler = FileHandler(file_controller=MockFileController()) - response = file_handler.get_columns("mock") - - assert response.type == RESPONSE_TYPE.COLUMNS_TABLE - - data = [] - for name in file_records[0][2]: - row = {} - for key_name in INF_SCHEMA_COLUMNS_NAMES_SET: - if key_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME: - row[key_name] = name - elif key_name == INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE: - row[key_name] = "str" - elif key_name == INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE: - row[key_name] = MYSQL_DATA_TYPE.TEXT - else: - row[key_name] = None - data.append(row) - - expected_df = pandas.DataFrame(data) - assert set(response.data_frame.columns) == set(expected_df.columns) - expected_df = expected_df[response.data_frame.columns] - - # Use 'compare' to ignore dtypes (object != string) - assert response.data_frame.compare(expected_df).empty diff --git a/tests/unit/handlers/test_hubspot.py b/tests/unit/handlers/test_hubspot.py deleted file mode 100644 index edbb3732ecc..00000000000 --- a/tests/unit/handlers/test_hubspot.py +++ /dev/null @@ -1,1253 +0,0 @@ -from collections import OrderedDict -import pytest -import unittest -from unittest.mock import patch, MagicMock -import pandas as pd - -try: - from hubspot.crm.objects import SimplePublicObject - from mindsdb.integrations.handlers.hubspot_handler.hubspot_handler import ( - HubspotHandler, - ) - from mindsdb.integrations.handlers.hubspot_handler.hubspot_tables import ( - CompaniesTable, - ContactsTable, - DealsTable, - canonical_op, - to_hubspot_property, - to_internal_property, - _build_hubspot_search_filters, - _normalize_filter_conditions, - ) - from mindsdb_sql_parser.ast import Select, Identifier, Function - from mindsdb_sql_parser import parse_sql - from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator -except ImportError: - pytestmark = pytest.mark.skip("HubSpot handler not installed") - -from base_handler_test import BaseHandlerTestSetup - -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, - DataHandlerResponse, - RESPONSE_TYPE, -) - - -class TestHubspotHandler(BaseHandlerTestSetup, unittest.TestCase): - """Test class for HubspotHandler.""" - - EXPECTED_TABLES = [ - "companies", - "contacts", - "deals", - "tickets", - "tasks", - "calls", - "emails", - "meetings", - "notes", - ] - - @property - def dummy_connection_data(self): - return OrderedDict(access_token="test_token_12345_dummy_not_real") - - @property - def registered_tables(self): - return ["companies", "contacts", "deals"] - - @property - def err_to_raise_on_connect_failure(self): - return Exception("Authentication failed") - - def create_handler(self): - """Create HubspotHandler instance for testing.""" - return HubspotHandler("hubspot", connection_data=self.dummy_connection_data) - - def create_patcher(self): - """Create patch for HubSpot client connection.""" - return patch("mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpot") - - def test_initialization(self): - """Test if the handler initializes correctly with proper values.""" - self.assertEqual(self.handler.name, "hubspot") - self.assertFalse(self.handler.is_connected) - self.assertEqual(self.handler.connection_data, self.dummy_connection_data) - - # Test that tables are registered - self.assertIn("companies", self.handler._tables.keys()) - self.assertIn("contacts", self.handler._tables.keys()) - self.assertIn("deals", self.handler._tables.keys()) - - def test_connect_success(self): - """Test if `connect` method successfully establishes connection.""" - mock_hubspot_client = MagicMock() - self.mock_connect.return_value = mock_hubspot_client - - connection = self.handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.assertEqual(connection, mock_hubspot_client) - self.mock_connect.assert_called_once_with(access_token=self.dummy_connection_data["access_token"]) - - def test_connect_reuse_existing_connection(self): - """Test that connect reuses existing connection when already connected.""" - mock_hubspot_client = MagicMock() - self.mock_connect.return_value = mock_hubspot_client - - # First connection - connection1 = self.handler.connect() - - # Second connection should reuse existing - connection2 = self.handler.connect() - - self.assertEqual(connection1, connection2) - self.mock_connect.assert_called_once() - - def test_connect_failure(self): - """Test connect method handles connection failures properly.""" - self.mock_connect.side_effect = self.err_to_raise_on_connect_failure - - with self.assertRaises(type(self.err_to_raise_on_connect_failure)): - self.handler.connect() - self.assertFalse(self.handler.is_connected) - - def test_check_connection_success(self): - """Test check_connection method with successful connection.""" - mock_hubspot_client = MagicMock() - self.mock_connect.return_value = mock_hubspot_client - - response = self.handler.check_connection() - - assert isinstance(response, StatusResponse) - self.assertTrue(response.success) - self.assertIsNone(response.error_message) - self.assertTrue(self.handler.is_connected) - - def test_check_connection_failure(self): - """Test check_connection method with failed connection.""" - self.mock_connect.side_effect = self.err_to_raise_on_connect_failure - - response = self.handler.check_connection() - - assert isinstance(response, StatusResponse) - self.assertFalse(response.success) - self.assertIsNotNone(response.error_message) - self.assertFalse(self.handler.is_connected) - - def test_native_query(self): - """Test native_query method executes SQL queries.""" - mock_hubspot_client = MagicMock() - mock_companies_data = [ - SimplePublicObject( - id="123", - properties={ - "name": "Test Company", - "city": "New York", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.get_all.return_value = mock_companies_data - - query = "SELECT * FROM companies LIMIT 1" - response = self.handler.native_query(query) - - assert isinstance(response, DataHandlerResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsNotNone(response.data_frame) - - def test_get_columns_companies(self): - """Test get_columns method for companies table.""" - mock_hubspot_client = MagicMock() - mock_company_data = [ - SimplePublicObject( - id="123", - properties={ - "name": "Test Company", - "city": "New York", - "phone": "+1-555-123-4567", - "state": "NY", - "domain": "testcompany.com", - "industry": "Technology", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.get_all.return_value = mock_company_data - - response = self.handler.get_columns("companies") - - assert isinstance(response, DataHandlerResponse) - self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) - - df = response.data_frame - # Check for comprehensive column metadata - self.assertIn("COLUMN_NAME", df.columns) - self.assertIn("DATA_TYPE", df.columns) - - # Check that expected columns are present - column_names = df["COLUMN_NAME"].tolist() - expected_columns = [ - "id", - "name", - "city", - "phone", - "state", - "domain", - "industry", - "createdate", - "lastmodifieddate", - ] - for col in expected_columns: - self.assertIn(col, column_names) - - def test_get_columns_contacts(self): - """Test get_columns method for contacts table.""" - mock_hubspot_client = MagicMock() - mock_contact_data = [ - SimplePublicObject( - id="456", - properties={ - "email": "test@example.com", - "firstname": "John", - "lastname": "Doe", - "phone": "+1-555-123-4567", - "company": "Test Company", - "website": "example.com", - "createdate": "2023-01-01T00:00:00Z", - "lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.contacts.get_all.return_value = mock_contact_data - - response = self.handler.get_columns("contacts") - - assert isinstance(response, DataHandlerResponse) - self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) - - df = response.data_frame - - self.assertIn("COLUMN_NAME", df.columns) - self.assertIn("DATA_TYPE", df.columns) - - column_names = df["COLUMN_NAME"].tolist() - expected_columns = [ - "id", - "email", - "firstname", - "lastname", - "phone", - "company", - "website", - "createdate", - "lastmodifieddate", - ] - for col in expected_columns: - self.assertIn(col, column_names) - - def test_get_columns_deals(self): - """Test get_columns method for deals table.""" - mock_hubspot_client = MagicMock() - mock_deal_data = [ - SimplePublicObject( - id="789", - properties={ - "dealname": "Test Deal", - "amount": "10000", - "pipeline": "default", - "closedate": "2023-12-31", - "dealstage": "closedwon", - "hubspot_owner_id": "12345", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.deals.get_all.return_value = mock_deal_data - - response = self.handler.get_columns("deals") - - assert isinstance(response, DataHandlerResponse) - self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) - - df = response.data_frame - self.assertIn("COLUMN_NAME", df.columns) - self.assertIn("DATA_TYPE", df.columns) - - column_names = df["COLUMN_NAME"].tolist() - expected_columns = [ - "id", - "dealname", - "amount", - "pipeline", - "closedate", - "dealstage", - "hubspot_owner_id", - "createdate", - "lastmodifieddate", - ] - for col in expected_columns: - self.assertIn(col, column_names) - - def test_deals_targets_include_function_identifiers(self): - """Ensure aggregate targets include referenced columns for fetch.""" - deals_table = DealsTable(MagicMock()) - query = Select( - targets=[ - Identifier("pipeline"), - Function("sum", args=[Identifier("amount")], alias=Identifier("sum_amount")), - ], - from_table=Identifier("deals"), - ) - targets = deals_table._get_targets(query) - self.assertIn("pipeline", targets) - self.assertIn("amount", targets) - - def test_get_columns_invalid_table(self): - """Test get_columns method with invalid table name.""" - response = self.handler.get_columns("nonexistent_table") - - assert isinstance(response, DataHandlerResponse) - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIsNotNone(response.error_message) - - def test_native_query_with_insert(self): - """Test native_query with INSERT statement.""" - mock_hubspot_client = MagicMock() - mock_created_companies = MagicMock() - mock_created_companies.results = [SimplePublicObject(id="new123", properties={"name": "New Company"})] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.batch_api.create.return_value = mock_created_companies - - insert_query = "INSERT INTO companies (name, city) VALUES ('New Company', 'Boston')" - response = self.handler.native_query(insert_query) - - assert isinstance(response, DataHandlerResponse) - - self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR) - - def test_native_query_with_update(self): - """Test native_query with UPDATE statement.""" - mock_hubspot_client = MagicMock() - - mock_search_result = MagicMock() - mock_search_result.results = [ - SimplePublicObject( - id="123", - properties={ - "name": "Test Company", - "city": "New York", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - mock_search_result.paging = None - - mock_updated_companies = MagicMock() - mock_updated_companies.results = [SimplePublicObject(id="123", properties={"name": "Updated Company"})] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - mock_hubspot_client.crm.companies.batch_api.update.return_value = mock_updated_companies - - update_query = "UPDATE companies SET city='Boston' WHERE name='Test Company'" - response = self.handler.native_query(update_query) - - assert isinstance(response, DataHandlerResponse) - self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR) - - def test_native_query_with_delete(self): - """Test native_query with DELETE statement.""" - mock_hubspot_client = MagicMock() - - mock_search_result = MagicMock() - mock_search_result.results = [ - SimplePublicObject( - id="123", - properties={ - "name": "Test Company", - "city": "New York", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - mock_search_result.paging = None - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - mock_hubspot_client.crm.companies.batch_api.archive.return_value = None - - delete_query = "DELETE FROM companies WHERE name='Test Company'" - response = self.handler.native_query(delete_query) - - assert isinstance(response, DataHandlerResponse) - self.assertNotEqual(response.type, RESPONSE_TYPE.ERROR) - - def test_handler_name(self): - """Test handler name is set correctly.""" - self.assertEqual(self.handler.name, "hubspot") - - def test_connection_data_storage(self): - """Test connection data is stored correctly.""" - self.assertEqual( - self.handler.connection_data["access_token"], - self.dummy_connection_data["access_token"], - ) - - def test_connect_invalid_credentials(self): - """Test connect method with invalid credentials.""" - handler = HubspotHandler("hubspot", connection_data={"access_token": ""}) - with self.assertRaises(ValueError) as context: - handler.connect() - self.assertIn("Invalid access_token provided", str(context.exception)) - - handler = HubspotHandler("hubspot", connection_data={"client_id": "", "client_secret": "secret"}) - with self.assertRaises(ValueError) as context: - handler.connect() - self.assertIn("Invalid OAuth credentials provided", str(context.exception)) - - def test_disconnect(self): - """Test disconnect method.""" - mock_hubspot_client = MagicMock() - self.mock_connect.return_value = mock_hubspot_client - - self.handler.connect() - self.assertTrue(self.handler.is_connected) - - self.handler.disconnect() - self.assertFalse(self.handler.is_connected) - self.assertIsNone(self.handler.connection) - - def test_native_query_empty_query(self): - """Test native_query with empty or None query.""" - response = self.handler.native_query(None) - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("Query cannot be None or empty", response.error_message) - - response = self.handler.native_query("") - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("Query cannot be None or empty", response.error_message) - - def test_native_query_invalid_sql(self): - """Test native_query with invalid SQL.""" - response = self.handler.native_query("INVALID SQL QUERY") - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("Query execution failed", response.error_message) - - def test_get_tables_connection_failure(self): - """Test get_tables method with connection failure.""" - self.mock_connect.side_effect = Exception("Connection failed") - - response = self.handler.get_tables() - - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("Failed to retrieve table list", response.error_message) - - def test_get_columns_companies_detailed(self): - """Test get_columns method for companies table with detailed analysis.""" - mock_hubspot_client = MagicMock() - mock_company_data = [ - SimplePublicObject( - id="123", - properties={ - "name": "Test Company", - "city": "New York", - "phone": "+1-555-123-4567", - "state": "NY", - "domain": "testcompany.com", - "industry": "Technology", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.get_all.return_value = mock_company_data - - response = self.handler.get_columns("companies") - - # After calling to_columns_table_response, type should be COLUMNS_TABLE - self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) - df = response.data_frame - - expected_columns = [ - "COLUMN_NAME", - "DATA_TYPE", - "ORDINAL_POSITION", - "COLUMN_DEFAULT", - "IS_NULLABLE", - "CHARACTER_MAXIMUM_LENGTH", - "CHARACTER_OCTET_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - "DATETIME_PRECISION", - "CHARACTER_SET_NAME", - "COLLATION_NAME", - "MYSQL_DATA_TYPE", - ] - for col in expected_columns: - self.assertIn(col, df.columns, f"Missing standard column: {col}") - - column_names = df["COLUMN_NAME"].tolist() - - self.assertIn("id", column_names) - self.assertIn("name", column_names) - self.assertIn("city", column_names) - - def test_get_columns_connection_failure(self): - """Test get_columns method with connection failure.""" - self.mock_connect.side_effect = Exception("Connection failed") - - response = self.handler.get_columns("companies") - - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("Failed to retrieve columns", response.error_message) - - def test_data_type_inference(self): - """Test _infer_data_type method.""" - self.assertEqual(self.handler._infer_data_type(None), "VARCHAR") - self.assertEqual(self.handler._infer_data_type(True), "BOOLEAN") - self.assertEqual(self.handler._infer_data_type(42), "INTEGER") - self.assertEqual(self.handler._infer_data_type(3.14), "DECIMAL") - self.assertEqual(self.handler._infer_data_type("text"), "VARCHAR") - self.assertEqual(self.handler._infer_data_type("2023-01-01T00:00:00Z"), "TIMESTAMP") - - def test_table_descriptions(self): - """Test _get_table_description method.""" - self.assertIn("companies data", self.handler._get_table_description("companies")) - self.assertIn("contacts data", self.handler._get_table_description("contacts")) - self.assertIn("deals data", self.handler._get_table_description("deals")) - - def test_check_connection_with_api_test(self): - """Test check_connection method performs actual API test.""" - mock_hubspot_client = MagicMock() - mock_contacts_data = [ - SimplePublicObject( - id="123", - properties={"email": "test@example.com"}, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.contacts.get_all.return_value = mock_contacts_data - - response = self.handler.check_connection() - - self.assertTrue(response.success) - self.assertIsNone(response.error_message) - - # Now check_connection tries contacts first - mock_hubspot_client.crm.contacts.get_all.assert_called_with(limit=1) - - def test_oauth_connection(self): - """Test OAuth connection flow.""" - oauth_data = OrderedDict(client_id="test_client_id", client_secret="test_client_secret") - handler = HubspotHandler("hubspot", connection_data=oauth_data) - - mock_hubspot_client = MagicMock() - mock_access_token = "oauth_access_token_123" - - with ( - patch("mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpot") as mock_hubspot, - patch( - "mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpotOAuth2Manager" - ) as mock_oauth_manager_cls, - ): - mock_hubspot.return_value = mock_hubspot_client - mock_oauth_manager_cls.return_value.get_access_token.return_value = mock_access_token - - connection = handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(handler.is_connected) - mock_hubspot.assert_called_with(access_token=mock_access_token) - - def test_comprehensive_error_handling(self): - """Test comprehensive error handling in various scenarios.""" - with patch("mindsdb.integrations.handlers.hubspot_handler.hubspot_handler.HubSpot") as mock_hubspot: - mock_hubspot.side_effect = Exception("API Error") - - with self.assertRaises(ValueError) as context: - self.handler.connect() - self.assertIn("Connection to HubSpot failed", str(context.exception)) - - def test_secure_logging(self): - """Test that sensitive credentials are not logged.""" - sensitive_data = {"access_token": "secret_token_12345"} - handler = HubspotHandler("hubspot", connection_data=sensitive_data) - - self.assertIn("access_token", handler.connection_data) - - def test_column_statistics_calculation(self): - """Test comprehensive column statistics calculation.""" - numeric_values = [100, 200, 300, None, 150, 250] - stats = self.handler._calculate_column_statistics("amount", numeric_values) - - self.assertEqual(stats["null_count"], 1) - self.assertEqual(stats["distinct_count"], 5) # 5 unique non-null values - self.assertIsNotNone(stats["average_value"]) - self.assertEqual(stats["average_value"], 200.0) # (100+200+300+150+250)/5 - - # Test string data - string_values = ["apple", "banana", "apple", None, "cherry"] - stats = self.handler._calculate_column_statistics("fruit", string_values) - - self.assertEqual(stats["null_count"], 1) - self.assertEqual(stats["distinct_count"], 3) - # min_value and max_value are now None to avoid misleading string comparisons - self.assertIsNone(stats["min_value"]) - self.assertIsNone(stats["max_value"]) - - def test_data_type_inference_from_samples(self): - """Test improved data type inference from multiple samples.""" - # Mixed numeric and string - should pick most common - mixed_values = [100, 200, "300", 400, 500] # mostly numeric - data_type = self.handler._infer_data_type_from_samples(mixed_values) - self.assertEqual(data_type, "INTEGER") - - # Timestamp strings - timestamp_values = ["2023-01-01T10:00:00Z", "2023-01-02T11:00:00Z", None] - data_type = self.handler._infer_data_type_from_samples(timestamp_values) - self.assertEqual(data_type, "TIMESTAMP") - - # All null values - null_values = [None, None, None] - data_type = self.handler._infer_data_type_from_samples(null_values) - self.assertEqual(data_type, "VARCHAR") - - def test_get_columns_with_standard_schema(self): - """Test get_columns method returns standard information_schema.columns format.""" - mock_hubspot_client = MagicMock() - - # Mock larger dataset - mock_company_data = [] - for i in range(50): # Create 50 sample companies - mock_company_data.append( - SimplePublicObject( - id=f"company_{i}", - properties={ - "name": f"Company {i}", - "city": "New York" if i % 2 == 0 else "San Francisco", - "industry": "Technology", - "hubspot_owner_id": f"owner_{i % 5}", - "annual_revenue": str(100000 + i * 1000), - "createdate": f"2023-01-{(i % 28) + 1:02d}T10:00:00Z", - }, - ) - ) - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.get_all.return_value = mock_company_data - - response = self.handler.get_columns("companies") - - self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) - df = response.data_frame - - expected_columns = [ - "COLUMN_NAME", - "DATA_TYPE", - "ORDINAL_POSITION", - "COLUMN_DEFAULT", - "IS_NULLABLE", - "CHARACTER_MAXIMUM_LENGTH", - "CHARACTER_OCTET_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - "DATETIME_PRECISION", - "CHARACTER_SET_NAME", - "COLLATION_NAME", - "MYSQL_DATA_TYPE", - ] - - for col in expected_columns: - self.assertIn(col, df.columns, f"Missing standard column: {col}") - - non_standard_columns = [ - "IS_PRIMARY_KEY", - "IS_FOREIGN_KEY", - "NULL_COUNT", - "DISTINCT_COUNT", - "MIN_VALUE", - "MAX_VALUE", - "AVERAGE_VALUE", - "COLUMN_DESCRIPTION", - ] - for col in non_standard_columns: - self.assertNotIn(col, df.columns, f"Non-standard column should not be present: {col}") - - id_row = df[df["COLUMN_NAME"] == "id"] - self.assertEqual(len(id_row), 1) - self.assertEqual(id_row.iloc[0]["ORDINAL_POSITION"], 1) - self.assertEqual(id_row.iloc[0]["IS_NULLABLE"], "NO") - - def test_estimate_table_rows_with_search_api(self): - """Test that _estimate_table_rows uses search API for accurate counts.""" - mock_hubspot_client = MagicMock() - - mock_search_result = MagicMock() - mock_search_result.total = 5432 - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - - self.handler.connect() - row_count = self.handler._estimate_table_rows("companies") - - mock_hubspot_client.crm.companies.search_api.do_search.assert_called_once_with( - public_object_search_request={"limit": 1} - ) - - self.assertEqual(row_count, 5432) - - def test_estimate_table_rows_fallback(self): - """Test that _estimate_table_rows handles search API failures gracefully.""" - mock_hubspot_client = MagicMock() - - mock_hubspot_client.crm.contacts.search_api.do_search.side_effect = Exception("API error") - mock_hubspot_client.crm.contacts.get_all.return_value = [SimplePublicObject(id="1", properties={})] - - self.mock_connect.return_value = mock_hubspot_client - self.handler.connect() - - row_count = self.handler._estimate_table_rows("contacts") - - self.assertIsNone(row_count) - - def test_meta_get_columns(self): - """Test meta_get_columns returns data catalog column metadata.""" - mock_hubspot_client = MagicMock() - mock_company_data = [ - SimplePublicObject( - id="123", - properties={ - "name": "Test Company", - "city": "New York", - "industry": "Technology", - "createdate": "2023-01-01T00:00:00Z", - }, - ) - ] - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.get_all.return_value = mock_company_data - mock_hubspot_client.crm.contacts.get_all.return_value = [] - mock_hubspot_client.crm.deals.get_all.return_value = [] - - response = self.handler.meta_get_columns(table_names=["companies"]) - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - df = response.data_frame - - expected_columns = [ - "TABLE_NAME", - "COLUMN_NAME", - "DATA_TYPE", - "COLUMN_DESCRIPTION", - "IS_NULLABLE", - "COLUMN_DEFAULT", - ] - for col in expected_columns: - self.assertIn(col, df.columns, f"Missing data catalog column: {col}") - - self.assertIn("companies", df["TABLE_NAME"].tolist()) - - column_names = df[df["TABLE_NAME"] == "companies"]["COLUMN_NAME"].tolist() - self.assertIn("id", column_names) - self.assertIn("name", column_names) - self.assertIn("city", column_names) - - def test_meta_get_column_statistics(self): - """Test meta_get_column_statistics returns statistical information.""" - mock_hubspot_client = MagicMock() - - # Create larger sample dataset for statistics - mock_contact_data = [] - for i in range(50): - mock_contact_data.append( - SimplePublicObject( - id=f"contact_{i}", - properties={ - "email": f"user{i}@example.com", - "firstname": "John" if i % 2 == 0 else "Jane", - "lastname": "Doe", - "city": "New York" if i % 3 == 0 else "San Francisco", - }, - ) - ) - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.contacts.get_all.return_value = mock_contact_data - mock_hubspot_client.crm.companies.get_all.return_value = [] - mock_hubspot_client.crm.deals.get_all.return_value = [] - - response = self.handler.meta_get_column_statistics(table_names=["contacts"]) - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - df = response.data_frame - - expected_columns = [ - "TABLE_NAME", - "COLUMN_NAME", - "NULL_PERCENTAGE", - "DISTINCT_VALUES_COUNT", - "MINIMUM_VALUE", - "MAXIMUM_VALUE", - "MOST_COMMON_VALUES", - "MOST_COMMON_FREQUENCIES", - ] - for col in expected_columns: - self.assertIn(col, df.columns, f"Missing statistics column: {col}") - - firstname_stats = df[(df["TABLE_NAME"] == "contacts") & (df["COLUMN_NAME"] == "firstname")] - self.assertEqual(len(firstname_stats), 1) - - self.assertEqual(firstname_stats.iloc[0]["DISTINCT_VALUES_COUNT"], 2) - - self.assertEqual(firstname_stats.iloc[0]["NULL_PERCENTAGE"], 0.0) - - def test_meta_get_columns_all_tables(self): - """Test meta_get_columns with no table filter returns all tables.""" - mock_hubspot_client = MagicMock() - - self.mock_connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.get_all.return_value = [ - SimplePublicObject(id="1", properties={"name": "Company"}) - ] - mock_hubspot_client.crm.contacts.get_all.return_value = [ - SimplePublicObject(id="2", properties={"email": "test@example.com"}) - ] - mock_hubspot_client.crm.deals.get_all.return_value = [ - SimplePublicObject(id="3", properties={"dealname": "Deal"}) - ] - - response = self.handler.meta_get_columns() # No table_names specified - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - df = response.data_frame - - tables_present = df["TABLE_NAME"].unique().tolist() - - self.assertIn("companies", tables_present) - self.assertIn("contacts", tables_present) - self.assertIn("deals", tables_present) - - def test_select_companies_with_in_clause_uses_search_api(self): - """ - MindsDB calls table.select(query_ast) directly — not native_query. - Verify that a WHERE city IN (...) query routes to the HubSpot Search API. - """ - - mock_hubspot_client = MagicMock() - mock_search_result = MagicMock() - mock_search_result.results = [ - SimplePublicObject( - id="1", - properties={ - "name": "NYC Company", - "city": "New York", - "createdate": "2023-01-01T00:00:00Z", - "hs_lastmodifieddate": "2023-01-01T00:00:00Z", - }, - ), - ] - mock_search_result.paging = None - - handler = MagicMock() - handler.connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - - table = CompaniesTable(handler) - query = parse_sql("SELECT * FROM companies WHERE city IN ('New York', 'Austin')", dialect="mindsdb") - result = table.select(query) - - mock_hubspot_client.crm.companies.search_api.do_search.assert_called() - mock_hubspot_client.crm.companies.get_all.assert_not_called() - self.assertIsNotNone(result) - - def test_select_contacts_with_in_clause(self): - """ - MindsDB calls ContactsTable.select(query_ast) directly. - Verify city IN (...) routes to HubSpot Search API. - """ - mock_hubspot_client = MagicMock() - mock_search_result = MagicMock() - mock_search_result.results = [ - SimplePublicObject( - id="101", - properties={ - "email": "john@example.com", - "firstname": "John", - "lastname": "Doe", - "city": "Boston", - }, - ), - ] - mock_search_result.paging = None - - handler = MagicMock() - handler.connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.contacts.search_api.do_search.return_value = mock_search_result - - table = ContactsTable(handler) - query = parse_sql("SELECT * FROM contacts WHERE city IN ('Boston', 'Chicago')", dialect="mindsdb") - result = table.select(query) - - mock_hubspot_client.crm.contacts.search_api.do_search.assert_called() - self.assertIsNotNone(result) - - def test_select_deals_with_in_clause(self): - """ - MindsDB calls DealsTable.select(query_ast) directly. - Verify dealstage IN (...) routes to HubSpot Search API. - """ - mock_hubspot_client = MagicMock() - mock_search_result = MagicMock() - mock_search_result.results = [ - SimplePublicObject( - id="201", - properties={ - "dealname": "Big Deal", - "amount": "50000", - "dealstage": "closedwon", - }, - ), - ] - mock_search_result.paging = None - - handler = MagicMock() - handler.connect.return_value = mock_hubspot_client - handler._hubspot_deal_stage_map_cache = ({}, {}) - handler._hubspot_deal_stage_rows_cache = [] - handler._hubspot_owner_rows_cache = [] - handler._hubspot_owner_map_cache = {} - mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_search_result - mock_hubspot_client.crm.pipelines.pipelines_api.get_all.return_value = MagicMock(results=[]) - - table = DealsTable(handler) - query = parse_sql("SELECT * FROM deals WHERE dealstage IN ('closedwon', 'closedlost')", dialect="mindsdb") - result = table.select(query) - - mock_hubspot_client.crm.deals.search_api.do_search.assert_called() - self.assertIsNotNone(result) - - def test_select_with_in_clause_verifies_filter_structure(self): - """ - Verify that city IN (...) generates the correct HubSpot Search API filter payload. - Tests via CompaniesTable.select() — the actual call path MindsDB uses. - """ - mock_hubspot_client = MagicMock() - mock_search_result = MagicMock() - mock_search_result.results = [] - mock_search_result.paging = None - - handler = MagicMock() - handler.connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - - table = CompaniesTable(handler) - query = parse_sql("SELECT * FROM companies WHERE city IN ('NYC', 'LA', 'Chicago')", dialect="mindsdb") - table.select(query) - - call_args = mock_hubspot_client.crm.companies.search_api.do_search.call_args - search_request = call_args.kwargs.get("public_object_search_request", {}) - - self.assertIn("filterGroups", search_request) - filter_groups = search_request["filterGroups"] - self.assertEqual(len(filter_groups), 1) - - filters = filter_groups[0]["filters"] - self.assertEqual(len(filters), 1) - - in_filter = filters[0] - self.assertEqual(in_filter["propertyName"], "city") - self.assertEqual(in_filter["operator"], "IN") - self.assertIn("values", in_filter) - self.assertEqual(set(in_filter["values"]), {"NYC", "LA", "Chicago"}) - - def test_select_with_not_in_clause(self): - """Verify industry NOT IN (...) generates NOT_IN operator in HubSpot filter.""" - mock_hubspot_client = MagicMock() - mock_search_result = MagicMock() - mock_search_result.results = [] - mock_search_result.paging = None - - handler = MagicMock() - handler.connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - - table = CompaniesTable(handler) - query = parse_sql("SELECT * FROM companies WHERE industry NOT IN ('Retail', 'Healthcare')", dialect="mindsdb") - table.select(query) - - call_args = mock_hubspot_client.crm.companies.search_api.do_search.call_args - search_request = call_args.kwargs.get("public_object_search_request", {}) - - filters = search_request["filterGroups"][0]["filters"] - self.assertEqual(filters[0]["operator"], "NOT_IN") - - def test_select_with_in_and_equality_combined(self): - """Verify city IN (...) AND industry = '...' both push down to the Search API.""" - mock_hubspot_client = MagicMock() - mock_search_result = MagicMock() - mock_search_result.results = [] - mock_search_result.paging = None - - handler = MagicMock() - handler.connect.return_value = mock_hubspot_client - mock_hubspot_client.crm.companies.search_api.do_search.return_value = mock_search_result - - table = CompaniesTable(handler) - query = parse_sql( - "SELECT * FROM companies WHERE city IN ('NYC', 'LA') AND industry = 'Technology'", - dialect="mindsdb", - ) - table.select(query) - - mock_hubspot_client.crm.companies.search_api.do_search.assert_called() - - def test_canonical_op_normalization(self): - """Test canonical operator normalization.""" - self.assertEqual(canonical_op("="), "eq") - self.assertEqual(canonical_op("EQ"), "eq") - self.assertEqual(canonical_op(">="), "gte") - self.assertEqual(canonical_op(FilterOperator.GREATER_THAN_OR_EQUAL), "gte") - self.assertEqual(canonical_op("NOT IN"), "not_in") - - def test_property_mapping(self): - """Test internal/HubSpot property mapping.""" - self.assertEqual(to_hubspot_property("lastmodifieddate"), "hs_lastmodifieddate") - self.assertEqual(to_internal_property("hs_lastmodifieddate"), "lastmodifieddate") - self.assertEqual(to_hubspot_property("id"), "hs_object_id") - self.assertEqual(to_internal_property("hs_object_id"), "id") - - def test_build_hubspot_search_filters_in_and_mapping(self): - """Test search filter mapping for IN and special properties.""" - conditions = _normalize_filter_conditions( - [ - FilterCondition("city", FilterOperator.IN, ["NYC", "LA"]), - FilterCondition("id", FilterOperator.EQUAL, "123"), - FilterCondition("lastmodifieddate", FilterOperator.GREATER_THAN_OR_EQUAL, "2024-01-01T00:00:00Z"), - ] - ) - filters = _build_hubspot_search_filters(conditions, {"city", "id", "lastmodifieddate"}) - - self.assertIsNotNone(filters) - filter_by_property = {f["propertyName"]: f for f in filters} - - self.assertIn("city", filter_by_property) - self.assertEqual(filter_by_property["city"]["operator"], "IN") - self.assertEqual(set(filter_by_property["city"]["values"]), {"NYC", "LA"}) - - self.assertIn("hs_object_id", filter_by_property) - self.assertEqual(filter_by_property["hs_object_id"]["operator"], "EQ") - self.assertEqual(filter_by_property["hs_object_id"]["value"], "123") - - self.assertIn("hs_lastmodifieddate", filter_by_property) - self.assertEqual(filter_by_property["hs_lastmodifieddate"]["operator"], "GTE") - - def test_meta_get_column_statistics_multiple_tables(self): - """Test meta_get_column_statistics retains stats across tables.""" - mock_hubspot_client = MagicMock() - self.mock_connect.return_value = mock_hubspot_client - - mock_hubspot_client.crm.companies.get_all.return_value = [ - SimplePublicObject( - id="company_1", - properties={"name": "Company A", "hs_lastmodifieddate": "2024-01-01T00:00:00Z"}, - ) - ] - mock_hubspot_client.crm.contacts.get_all.return_value = [ - SimplePublicObject( - id="contact_1", - properties={"email": "test@example.com", "hs_lastmodifieddate": "2024-01-02T00:00:00Z"}, - ) - ] - - response = self.handler.meta_get_column_statistics(table_names=["companies", "contacts"]) - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - df = response.data_frame - table_names = df["TABLE_NAME"].unique().tolist() - self.assertIn("companies", table_names) - self.assertIn("contacts", table_names) - - def test_search_pushdown_builds_sorts_and_properties(self): - """ - Verify that ORDER BY and SELECT columns are pushed down to the HubSpot Search API. - Uses DealsTable.select() — the actual call path MindsDB uses. - """ - mock_hubspot_client = MagicMock() - mock_search_result = MagicMock() - mock_search_result.results = [] - mock_search_result.paging = None - - handler = MagicMock() - handler.connect.return_value = mock_hubspot_client - handler._hubspot_deal_stage_map_cache = ({}, {}) - handler._hubspot_deal_stage_rows_cache = [] - handler._hubspot_owner_rows_cache = [] - handler._hubspot_owner_map_cache = {} - mock_hubspot_client.crm.deals.search_api.do_search.return_value = mock_search_result - mock_hubspot_client.crm.pipelines.pipelines_api.get_all.return_value = MagicMock(results=[]) - - table = DealsTable(handler) - query = parse_sql( - "SELECT dealname FROM deals WHERE pipeline='default' ORDER BY closedate DESC LIMIT 5", - dialect="mindsdb", - ) - table.select(query) - - call_args = mock_hubspot_client.crm.deals.search_api.do_search.call_args - search_request = call_args.kwargs.get("public_object_search_request", {}) - - self.assertIn("sorts", search_request) - self.assertEqual(search_request["sorts"][0]["propertyName"], "closedate") - self.assertEqual(search_request["sorts"][0]["direction"], "DESCENDING") - self.assertEqual(search_request["properties"], ["dealname"]) - - def test_three_table_join_on_clause_orientations(self): - """ - Verify CORE JOIN ASSOC JOIN CORE resolves left_assoc_col / right_assoc_col - correctly regardless of which side of the ON each table appears on. - """ - - company_df = pd.DataFrame({"id": ["1"], "name": ["Acme"]}) - assoc_df = pd.DataFrame({"company_id": ["1"], "contact_id": ["42"]}) - contact_df = pd.DataFrame({"id": ["42"], "firstname": ["Alice"]}) - - orientations = [ - ("c.id = cc.company_id", "cc.contact_id = ct.id"), # A - ("cc.company_id = c.id", "cc.contact_id = ct.id"), # B - ("c.id = cc.company_id", "ct.id = cc.contact_id"), # C - ("cc.company_id = c.id", "ct.id = cc.contact_id"), # D - ] - - handler: HubspotHandler = self.create_handler() - - for left_on, right_on in orientations: - with self.subTest(left_on=left_on, right_on=right_on): - companies_mock = MagicMock() - companies_mock.select.return_value = company_df.copy() - assoc_mock = MagicMock() - assoc_mock.list.return_value = assoc_df.copy() - contacts_mock = MagicMock() - contacts_mock.list.return_value = contact_df.copy() - - handler._tables["companies"] = companies_mock - handler._tables["company_contacts"] = assoc_mock - handler._tables["contacts"] = contacts_mock - - query = f""" - SELECT * - FROM companies c - JOIN company_contacts cc ON {left_on} - JOIN contacts ct ON {right_on} - """ - response = handler.native_query(query) - - self.assertEqual( - response.type, - RESPONSE_TYPE.TABLE, - msg=f"orientation ({left_on!r}, {right_on!r}) returned ERROR: " - f"{getattr(response, 'error_message', '')}", - ) - self.assertFalse(response.data_frame.empty) - - # The assoc table must be queried by company_id (left_assoc_col), not - # by some other column — this is the column the bug inverted. - assoc_conditions = assoc_mock.list.call_args.kwargs.get("conditions", []) - assoc_filter_cols = [fc.column for fc in assoc_conditions] - self.assertIn( - "company_id", - assoc_filter_cols, - msg=f"assoc.list not filtered on company_id for orientation " - f"({left_on!r}, {right_on!r}); got {assoc_filter_cols}", - ) - - def test_multijoin_query_handling(self): - """Test that multijoin queries return appropriate error since not supported.""" - query = """ - SELECT c.name, o.dealname - FROM companies c - JOIN deals o ON c.id = o.company_id - """ - response = self.handler.native_query(query) - - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("not supported", response.error_message) - - -class TestHubspotPassthrough(unittest.TestCase): - """Exercise the PassthroughMixin retrofit (PAT path).""" - - def _mock_response(self, status_code=200): - resp = MagicMock() - resp.status_code = status_code - resp.headers = {"Content-Type": "application/json"} - resp.iter_content = MagicMock(return_value=iter([b'{"results":[]}'])) - resp.close = MagicMock() - return resp - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_passthrough_uses_bearer_and_hubspot_base_url(self, mock_request): - mock_request.return_value = self._mock_response() - handler = HubspotHandler( - "hubspot", - connection_data={"access_token": "pat-abc123xyz"}, - ) - from mindsdb.integrations.libs.passthrough_types import PassthroughRequest - - resp = handler.api_passthrough(PassthroughRequest("GET", "/crm/v3/owners")) - - self.assertEqual(resp.status_code, 200) - args, kwargs = mock_request.call_args - self.assertEqual(args[0], "GET") - self.assertEqual(args[1], "https://api.hubapi.com/crm/v3/owners") - self.assertEqual(kwargs["headers"]["Authorization"], "Bearer pat-abc123xyz") - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_test_passthrough_returns_ok_on_200(self, mock_request): - mock_request.return_value = self._mock_response(status_code=200) - handler = HubspotHandler("hubspot", connection_data={"access_token": "pat"}) - - result = handler.test_passthrough() - - self.assertTrue(result["ok"]) - self.assertEqual(result["status_code"], 200) - self.assertEqual(result["host"], "api.hubapi.com") - self.assertIsInstance(result["latency_ms"], int) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_test_passthrough_returns_auth_failed_on_401(self, mock_request): - mock_request.return_value = self._mock_response(status_code=401) - handler = HubspotHandler("hubspot", connection_data={"access_token": "pat"}) - - result = handler.test_passthrough() - - self.assertFalse(result["ok"]) - self.assertEqual(result["error_code"], "auth_failed") - self.assertEqual(result["status_code"], 401) - self.assertEqual(result["host"], "api.hubapi.com") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_mariadb.py b/tests/unit/handlers/test_mariadb.py deleted file mode 100644 index 9d75a8dce72..00000000000 --- a/tests/unit/handlers/test_mariadb.py +++ /dev/null @@ -1,89 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock -from collections import OrderedDict - -from mysql.connector import Error as MySQLError - -from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager -from mindsdb.integrations.handlers.mariadb_handler.mariadb_handler import MariaDBHandler -from mindsdb.integrations.libs.response import TableResponse - - -class TestMariaDBHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - host="127.0.0.1", - port=3307, - user="example_user", - password="example_pass", - database="example_db", - ) - - @property - def err_to_raise_on_connect_failure(self): - return MySQLError("Connection Failed") - - @property - def get_tables_query(self): - return """ - SELECT - TABLE_SCHEMA AS table_schema, - TABLE_NAME AS table_name, - TABLE_TYPE AS table_type - FROM - information_schema.TABLES - WHERE - TABLE_TYPE IN ('BASE TABLE', 'VIEW') - AND TABLE_SCHEMA = DATABASE() - ORDER BY 2 - ; - """ - - @property - def get_columns_query(self): - return f""" - select - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - from - information_schema.columns - where - table_name = '{self.mock_table}' - and table_schema = DATABASE(); - """ - - def create_handler(self): - return MariaDBHandler("mariadb", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mysql.connector.connect") - - def test_native_query(self): - """Test that native_query returns a TableResponse object with no error""" - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager( - data=[{"id": 1}], description=[("id", 3, None, None, None, None, 1, 0, 45)] - ) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - query_str = f"SELECT * FROM {self.mock_table}" - data = self.handler.native_query(query_str) - - self.assertIsInstance(data, TableResponse) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_mssql.py b/tests/unit/handlers/test_mssql.py deleted file mode 100644 index d7024d51359..00000000000 --- a/tests/unit/handlers/test_mssql.py +++ /dev/null @@ -1,1190 +0,0 @@ -from collections import OrderedDict -import unittest -import pytest -from decimal import Decimal -from unittest.mock import patch, MagicMock -from uuid import UUID -import datetime -import sys -import builtins - -try: - from pymssql import OperationalError - from mindsdb.integrations.handlers.mssql_handler.mssql_handler import SqlServerHandler -except ImportError: - pytestmark = pytest.mark.skip("MSSQL handler not installed") - -from pandas import DataFrame - -from base_handler_test import BaseDatabaseHandlerTest -from mindsdb.integrations.libs.response import ( - OkResponse, - TableResponse, - ErrorResponse, - INF_SCHEMA_COLUMNS_NAMES_SET, - RESPONSE_TYPE, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -class TestMSSQLHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - host="127.0.0.1", - port=1433, - user="example_user", - password="example_pass", - database="example_db", - ) - - @property - def err_to_raise_on_connect_failure(self): - return OperationalError("Connection Failed") - - @property - def get_tables_query(self): - return f""" - SELECT - table_schema, - table_name, - table_type - FROM {self.dummy_connection_data["database"]}.INFORMATION_SCHEMA.TABLES - WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW'); - """ - - @property - def get_columns_query(self): - return f""" - SELECT - column_name as "Field", - data_type as "Type" - FROM - information_schema.columns - WHERE - table_name = '{self.mock_table}' - """ - - def create_handler(self): - return SqlServerHandler("mssql", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("pymssql.connect") - - def test_native_query_with_results(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query returns a result set - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_cursor.fetchall.return_value = [{"id": 1, "name": "test1"}, {"id": 2, "name": "test2"}] - - mock_cursor.description = [ - ("id", None, None, None, None, None, None), - ("name", None, None, None, None, None, None), - ] - - query_str = "SELECT * FROM test_table" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(as_dict=True) - mock_cursor.execute.assert_called_once_with(query_str) - - assert isinstance(data, TableResponse) - self.assertEqual(data.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(data.data_frame, DataFrame) - expected_columns = ["id", "name"] - self.assertEqual(list(data.data_frame.columns), expected_columns) - - mock_conn.commit.assert_called_once() - - def test_native_query_no_results(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query doesn't return any results (e.g., INSERT, UPDATE, DELETE) - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_cursor.description = None - - query_str = "INSERT INTO test_table VALUES (1, 'test')" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(as_dict=True) - mock_cursor.execute.assert_called_once_with(query_str) - - assert isinstance(data, OkResponse) - self.assertEqual(data.type, RESPONSE_TYPE.OK) - - mock_conn.commit.assert_called_once() - - def test_native_query_error(self): - """ - Tests the `native_query` method to ensure it properly handles and returns database errors - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - error_msg = "Syntax error in SQL statement" - error = OperationalError(error_msg) - mock_cursor.execute.side_effect = error - - query_str = "INVALID SQL" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(as_dict=True) - mock_cursor.execute.assert_called_once_with(query_str) - - assert isinstance(data, ErrorResponse) - self.assertEqual(data.type, RESPONSE_TYPE.ERROR) - self.assertEqual(data.error_message, str(error)) - - mock_conn.rollback.assert_called_once() - - def test_query_method(self): - """ - Tests the query method to ensure it correctly converts ASTNode to SQL and calls native_query - """ - orig_renderer = self.handler.renderer - renderer_mock = MagicMock() - renderer_mock.get_string = MagicMock(return_value="SELECT * FROM test") - - try: - self.handler.renderer = renderer_mock - self.handler.native_query = MagicMock() - self.handler.native_query.return_value = OkResponse() - - mock_ast = MagicMock() - result = self.handler.query(mock_ast) - renderer_mock.get_string.assert_called_once_with(mock_ast, with_failback=False) - self.handler.native_query.assert_called_once_with("SELECT * FROM test") - self.assertEqual(result, self.handler.native_query.return_value) - finally: - self.handler.renderer = orig_renderer - - def test_get_tables(self): - """ - Tests that get_tables calls native_query with the correct SQL - """ - expected_response = OkResponse() - self.handler.native_query = MagicMock(return_value=expected_response) - - response = self.handler.get_tables() - - self.handler.native_query.assert_called_once() - call_args = self.handler.native_query.call_args[0][0] - database = self.handler.connection_args["database"] - - self.assertIn(f"{database}.INFORMATION_SCHEMA.TABLES", call_args) - self.assertIn("table_schema", call_args) - self.assertIn("table_name", call_args) - self.assertIn("table_type", call_args) - self.assertEqual(response, expected_response) - - def test_get_columns(self): - """ - Tests that get_columns calls native_query with the correct SQL - """ - expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))) - self.handler.native_query = MagicMock(return_value=expected_response) - - table_name = "test_table" - response = self.handler.get_columns(table_name) - assert response.type == RESPONSE_TYPE.COLUMNS_TABLE - self.handler.native_query.assert_called_once() - call_args = self.handler.native_query.call_args[0][0] - - expected_sql = f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM - information_schema.columns - WHERE - table_name = '{table_name}' - """ - self.assertEqual(call_args, expected_sql) - self.assertEqual(response, expected_response) - - def test_meta_get_tables_returns_response(self): - # realistic names - df = DataFrame( - [ - { - "table_name": "customers", - "table_schema": "dbo", - "table_type": "BASE TABLE", - "table_description": None, - "row_count": 100, - }, - { - "table_name": "orders", - "table_schema": "dbo", - "table_type": "BASE TABLE", - "table_description": None, - "row_count": 500, - }, - { - "table_name": "products", - "table_schema": "dbo", - "table_type": "BASE TABLE", - "table_description": None, - "row_count": 42, - }, - ] - ) - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - - # without filter - response = self.handler.meta_get_tables() - self.handler.native_query.assert_called_once() - self.assertIs(response, expected_response) - - # with filter - self.handler.native_query.reset_mock() - tables = ["customers", "orders"] - filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True) - filtered_response = TableResponse(data=filtered_df) - self.handler.native_query = MagicMock(return_value=filtered_response) - response = self.handler.meta_get_tables(table_names=tables) - self.handler.native_query.assert_called_once() - self.assertIs(response, filtered_response) - self.assertEqual(sorted(list(response.data_frame["table_name"])), sorted(tables)) - - def test_meta_get_columns_returns_response(self): - df = DataFrame( - [ - { - "table_name": "customers", - "column_name": "id", - "data_type": "int", - "column_description": None, - "column_default": None, - "is_nullable": 0, - }, - { - "table_name": "customers", - "column_name": "name", - "data_type": "varchar", - "column_description": None, - "column_default": None, - "is_nullable": 1, - }, - { - "table_name": "products", - "column_name": "sku", - "data_type": "varchar", - "column_description": None, - "column_default": None, - "is_nullable": 0, - }, - ] - ) - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - - # without filter - response = self.handler.meta_get_columns() - self.handler.native_query.assert_called_once() - self.assertIs(response, expected_response) - - # with filter - self.handler.native_query.reset_mock() - tables = ["customers"] - filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True) - filtered_response = TableResponse(data=filtered_df) - self.handler.native_query = MagicMock(return_value=filtered_response) - response = self.handler.meta_get_columns(table_names=tables) - self.handler.native_query.assert_called_once() - self.assertIs(response, filtered_response) - self.assertTrue((response.data_frame["table_name"] == "customers").all()) - - def test_meta_get_column_statistics_returns_response(self): - df = DataFrame( - [ - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "id", - "NULL_PERCENTAGE": 0.0, - "DISTINCT_VALUES_COUNT": 100, - "MOST_COMMON_VALUES": None, - "MOST_COMMON_FREQUENCIES": None, - "MINIMUM_VALUE": "1", - "MAXIMUM_VALUE": "100", - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "sku", - "NULL_PERCENTAGE": 0.0, - "DISTINCT_VALUES_COUNT": 42, - "MOST_COMMON_VALUES": None, - "MOST_COMMON_FREQUENCIES": None, - "MINIMUM_VALUE": None, - "MAXIMUM_VALUE": None, - }, - ] - ) - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - - # without filter - response = self.handler.meta_get_column_statistics() - self.handler.native_query.assert_called_once() - self.assertIs(response, expected_response) - - # with filter - self.handler.native_query.reset_mock() - tables = ["customers"] - filtered_df = df[df["TABLE_NAME"].isin(tables)].reset_index(drop=True) - filtered_response = TableResponse(data=filtered_df) - self.handler.native_query = MagicMock(return_value=filtered_response) - response = self.handler.meta_get_column_statistics(table_names=tables) - self.handler.native_query.assert_called_once() - self.assertIs(response, filtered_response) - self.assertTrue((response.data_frame["TABLE_NAME"] == "customers").all()) - - def test_meta_get_primary_keys_returns_response(self): - df = DataFrame( - [ - { - "table_name": "customers", - "column_name": "id", - "ordinal_position": 1, - "constraint_name": "pk_customers", - }, - {"table_name": "orders", "column_name": "id", "ordinal_position": 1, "constraint_name": "pk_orders"}, - ] - ) - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - - # without filter - response = self.handler.meta_get_primary_keys() - self.handler.native_query.assert_called_once() - self.assertIs(response, expected_response) - - # with filter - self.handler.native_query.reset_mock() - tables = ["customers"] - filtered_df = df[df["table_name"].isin(tables)].reset_index(drop=True) - filtered_response = TableResponse(data=filtered_df) - self.handler.native_query = MagicMock(return_value=filtered_response) - response = self.handler.meta_get_primary_keys(table_names=tables) - self.handler.native_query.assert_called_once() - self.assertIs(response, filtered_response) - self.assertEqual(list(response.data_frame["table_name"].unique()), ["customers"]) - - def test_meta_get_foreign_keys_returns_response(self): - df = DataFrame( - [ - { - "parent_table_name": "customers", - "parent_column_name": "id", - "child_table_name": "orders", - "child_column_name": "customer_id", - "constraint_name": "fk_orders_customers", - }, - { - "parent_table_name": "products", - "parent_column_name": "sku", - "child_table_name": "orders", - "child_column_name": "product_sku", - "constraint_name": "fk_orders_products", - }, - ] - ) - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - - # without filter - response = self.handler.meta_get_foreign_keys() - self.handler.native_query.assert_called_once() - self.assertIs(response, expected_response) - - # with filter (filter by child table names per handler implementation) - self.handler.native_query.reset_mock() - tables = ["orders"] - filtered_df = df[df["child_table_name"].isin(tables)].reset_index(drop=True) - filtered_response = TableResponse(data=filtered_df) - self.handler.native_query = MagicMock(return_value=filtered_response) - response = self.handler.meta_get_foreign_keys(table_names=tables) - self.handler.native_query.assert_called_once() - self.assertIs(response, filtered_response) - self.assertTrue((response.data_frame["child_table_name"] == "orders").all()) - - def test_meta_methods_result_shape_and_exceptions(self): - """ - Smoke-check expected columns presence and exception propagation - for all meta_* methods with and without table filters. - """ - methods = [ - ( - "meta_get_tables", - lambda: DataFrame( - [ - { - "table_name": "t1", - "table_schema": "dbo", - "table_type": "BASE TABLE", - "table_description": None, - "row_count": 1, - } - ] - ), - self.handler.meta_get_tables, - ), - ( - "meta_get_columns", - lambda: DataFrame( - [ - { - "table_name": "t1", - "column_name": "c1", - "data_type": "int", - "column_description": None, - "column_default": None, - "is_nullable": 1, - } - ] - ), - self.handler.meta_get_columns, - ), - ( - "meta_get_column_statistics", - lambda: DataFrame( - [ - { - "TABLE_NAME": "t1", - "COLUMN_NAME": "c1", - "NULL_PERCENTAGE": None, - "DISTINCT_VALUES_COUNT": 0, - "MOST_COMMON_VALUES": None, - "MOST_COMMON_FREQUENCIES": None, - "MINIMUM_VALUE": None, - "MAXIMUM_VALUE": None, - } - ] - ), - self.handler.meta_get_column_statistics, - ), - ( - "meta_get_primary_keys", - lambda: DataFrame( - [{"table_name": "t1", "column_name": "id", "ordinal_position": 1, "constraint_name": "pk_t1"}] - ), - self.handler.meta_get_primary_keys, - ), - ( - "meta_get_foreign_keys", - lambda: DataFrame( - [ - { - "parent_table_name": "p", - "parent_column_name": "id", - "child_table_name": "c", - "child_column_name": "p_id", - "constraint_name": "fk_c_p", - } - ] - ), - self.handler.meta_get_foreign_keys, - ), - ] - - for name, df_factory, method in methods: - with self.subTest(method=name, case="no_filter"): - df = df_factory() - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - res = method() - self.handler.native_query.assert_called_once() - self.assertIs(res, expected_response) - self.assertIsNotNone(res.data_frame) - # Columns presence smoke-check - for col in list(df.columns): - self.assertIn(col, res.data_frame.columns) - - with self.subTest(method=name, case="with_filter"): - df = df_factory() - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - res = ( - method(table_names=["A", "B"]) - if name != "meta_get_column_statistics" - else method(table_names=["A", "B"]) - ) # same signature - self.handler.native_query.assert_called_once() - self.assertIs(res, expected_response) - self.assertIsNotNone(res.data_frame) - for col in list(df.columns): - self.assertIn(col, res.data_frame.columns) - - with self.subTest(method=name, case="exception_propagation"): - err = ( - OperationalError(f"{name} failure") - if name != "meta_get_primary_keys" - else OperationalError("pk failure") - ) - self.handler.native_query = MagicMock(side_effect=err) - with self.assertRaises(type(err)): - _ = method() - - # access denied - with self.subTest(method="meta_get_column_statistics", case="permissions_error"): - permission_err = OperationalError("The SELECT permission was denied on object 'dm_db_stats_histogram'") - self.handler.native_query = MagicMock(side_effect=permission_err) - with self.assertRaises(OperationalError): - _ = self.handler.meta_get_column_statistics() - - def test_connect_validation(self): - """ - Tests that connect method raises ValueError when required connection parameters are missing - """ - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["host"] - handler = SqlServerHandler("mssql", connection_data=invalid_connection_args) - - with self.assertRaises(ValueError): - handler.connect() - - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["user"] - handler = SqlServerHandler("mssql", connection_data=invalid_connection_args) - - with self.assertRaises(ValueError): - handler.connect() - - def test_connect_optional_params(self): - """ - Tests that connect method passes optional parameters to the connection - """ - self.handler.connection_args["server"] = "my_server" - self.handler.connect() - - call_kwargs = self.mock_connect.call_args[1] - self.assertEqual(call_kwargs["server"], "my_server") - self.tearDown() - self.setUp() - self.handler.connection_args["port"] = 1433 - self.handler.connect() - - call_kwargs = self.mock_connect.call_args[1] - self.assertEqual(call_kwargs["port"], 1433) - - def test_disconnect(self): - """ - Tests the disconnect method to ensure it correctly closes connections - """ - mock_conn = MagicMock() - - self.handler.connection = mock_conn - self.handler.is_connected = True - self.handler.disconnect() - - mock_conn.close.assert_called_once() - self.assertFalse(self.handler.is_connected) - self.handler.is_connected = False - mock_conn.reset_mock() - self.handler.disconnect() - mock_conn.close.assert_not_called() - - def test_check_connection(self): - """ - Tests the check_connection method to ensure it properly tests connectivity - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - response = self.handler.check_connection() - mock_cursor.execute.assert_called_once_with("select 1;") - - self.assertTrue(response.success) - self.assertIsNone(response.error_message) - self.handler.connect.side_effect = OperationalError("Connection error") - - response = self.handler.check_connection() - - self.assertFalse(response.success) - self.assertEqual(response.error_message, "Connection error") - - self.handler.connect.side_effect = ValueError("Invalid connection args") - - response = self.handler.check_connection() - - self.assertFalse(response.success) - self.assertEqual(response.error_message, "Invalid connection args") - - def test_types_casting(self): - """Test that types are casted correctly""" - query_str = "SELECT * FROM test_table" - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - # region test numeric types (and bool, as bit is a synonym for boolean) - """Data obtained using: - CREATE TABLE test_numeric_types ( - n_bit BIT, -- 0|1|NULL - n_tinyint TINYINT, -- 0:255 - n_smallint SMALLINT, -- -32,768:32,767 - n_int INT, -- -2^31:2^31-1 - n_bigint BIGINT, -- -2^63:2^63-1 - n_decimal DECIMAL(18,2), - n_decimal_p DECIMAL(38), - n_numeric NUMERIC(18,4), - n_money MONEY, -- -922,337,203,685,477.5808:922,337,203,685,477.5807 - n_smallmoney SMALLMONEY, -- -214,748.3648:214,748.3647 - n_float FLOAT(53), - n_real REAL -- FLOAT(24) - ); - - INSERT INTO test_numeric_types ( - n_bit, - n_tinyint, - n_smallint, - n_int, - n_bigint, - n_decimal, - n_decimal_p, - n_numeric, - n_money, - n_smallmoney, - n_float, - n_real - ) VALUES ( - 1, -- n_bit - 255, -- n_tinyint - 32767, -- n_smallint - 2147483647, -- n_int - 9223372036854775807, -- n_bigint - 1234.56, -- n_decimal - 12345678901234567890123456789012345678, -- n_decimal_p - 1234.5678, -- n_numeric - $123456.7890, -- n_money - $214748.3647, -- n_smallmoney - 3.14159265358979, -- n_float - 3.141592 -- n_real - ); - """ - input_row = { - "n_bit": True, - "n_tinyint": 255, - "n_smallint": 32767, - "n_int": 2147483647, - "n_bigint": 9223372036854775807, - "n_decimal": Decimal("1234.56"), - "n_decimal_p": Decimal("12345678901234567890123456789012345678"), - "n_numeric": Decimal("1234.5678"), - "n_money": Decimal("123456.7890"), - "n_smallmoney": Decimal("214748.3647"), - "n_float": 3.14159265358979, - "n_real": 3.141592025756836, - } - mock_cursor.fetchall.return_value = [input_row] - - mock_cursor.description = [ - ("n_bit", 3, None, None, None, None, None), - ("n_tinyint", 3, None, None, None, None, None), - ("n_smallint", 3, None, None, None, None, None), - ("n_int", 3, None, None, None, None, None), - ("n_bigint", 3, None, None, None, None, None), - ("n_decimal", 5, None, None, None, None, None), - ("n_decimal_p", 5, None, None, None, None, None), - ("n_numeric", 5, None, None, None, None, None), - ("n_money", 5, None, None, None, None, None), - ("n_smallmoney", 5, None, None, None, None, None), - ("n_float", 3, None, None, None, None, None), - ("n_real", 3, None, None, None, None, None), - ] - - response: TableResponse = self.handler.native_query(query_str) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.TINYINT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.FLOAT, - ] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for columns_name, input_value in input_row.items(): - result_value = response.data_frame[columns_name][0] - self.assertEqual(result_value, input_value) - # endregion - - # region test string types - """Data obtained using: - CREATE TABLE test_text_blob_types ( - t_char CHAR(10), - t_nchar NCHAR(10), -- Unicode - t_varchar VARCHAR(100), - t_nvarchar NVARCHAR(100), -- Unicode - t_text TEXT, - t_ntext NTEXT, -- Unicode - t_binary BINARY(10), - t_varbinary VARBINARY(100), - t_image IMAGE, - t_xml XML, - t_uniqueidentifier UNIQUEIDENTIFIER - ); - - INSERT INTO test_text_blob_types ( - t_char, - t_nchar, - t_varchar, - t_nvarchar, - t_text, - t_ntext, - t_binary, - t_varbinary, - t_image, - t_xml, - t_uniqueidentifier - ) VALUES ( - 'Test', -- t_char - N'Test', -- t_nchar - 'Test', -- t_varchar - N'Test', -- t_nvarchar - 'Test', -- t_text - N'Test', -- t_ntext - 0x48656C6C6F, -- t_binary ('Hello' hex) - 0x48656C6C6F, -- t_varbinary ('Hello World' hex) - 0x48656C6C6F, -- t_image ('Hello Image' hex) - 'TestXML123', -- t_xml - NEWID() -- t_uniqueidentifier - ); - """ - input_row = { - "t_char": "Test ", - "t_nchar": "Test ", - "t_varchar": "Test", - "t_nvarchar": "Test", - "t_text": "Test", - "t_ntext": "Test", - "t_binary": b"Hello\x00\x00\x00\x00\x00", - "t_varbinary": b"Hello", - "t_image": b"Hello", - "t_xml": "TestXML123", - "t_uniqueidentifier": UUID("497b4fec-4659-431d-a146-39e76740c8a9"), - } - mock_cursor.fetchall.return_value = [input_row] - - mock_cursor.description = [ - ("t_char", 1, None, None, None, None, None), - ("t_nchar", 1, None, None, None, None, None), - ("t_varchar", 1, None, None, None, None, None), - ("t_nvarchar", 1, None, None, None, None, None), - ("t_text", 1, None, None, None, None, None), - ("t_ntext", 1, None, None, None, None, None), - ("t_binary", 2, None, None, None, None, None), - ("t_varbinary", 2, None, None, None, None, None), - ("t_image", 2, None, None, None, None, None), - ("t_xml", 1, None, None, None, None, None), - ("t_uniqueidentifier", 2, None, None, None, None, None), - ] - - response: TableResponse = self.handler.native_query(query_str) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.BINARY, - ] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for columns_name, input_value in input_row.items(): - result_value = response.data_frame[columns_name][0] - self.assertEqual(result_value, input_value) - # endregion - - # region test date types - """Data obtained using: - CREATE TABLE test_datetime_types ( - d_date DATE, -- (YYYY-MM-DD) - d_time TIME, - d_time_p TIME(7), - d_smalldatetime SMALLDATETIME, - d_datetime DATETIME, - d_datetime2 DATETIME2, - d_datetime2_p DATETIME2(7), - d_datetimeoffset DATETIMEOFFSET, - d_datetimeoffset_p DATETIMEOFFSET(7) - ); - - INSERT INTO test_datetime_types ( - d_date, - d_time, - d_time_p, - d_smalldatetime, - d_datetime, - d_datetime2, - d_datetime2_p, - d_datetimeoffset, - d_datetimeoffset_p - ) VALUES ( - GETDATE(), -- d_date - CAST(GETDATE() AS TIME), -- d_time - CAST(GETDATE() AS TIME(7)), -- d_time_p - GETDATE(), -- d_smalldatetime - GETDATE(), -- d_datetime - SYSDATETIME(), -- d_datetime2 - SYSDATETIME(), -- d_datetime2_p - SYSDATETIMEOFFSET(), -- d_datetimeoffset - SYSDATETIMEOFFSET() -- d_datetimeoffset_p - ); - """ - input_row = { - "d_date": datetime.date(2025, 4, 22), - "d_time": datetime.time(12, 30, 45, 123456), - "d_time_p": datetime.time(12, 30, 45, 123456), - "d_smalldatetime": datetime.datetime(2025, 4, 22, 12, 30), - "d_datetime": datetime.datetime(2025, 4, 22, 12, 30, 45, 123456), - "d_datetime2": datetime.datetime(2025, 4, 22, 12, 30, 45, 123456), - "d_datetime2_p": datetime.datetime(2025, 4, 22, 12, 30, 45, 123456), - "d_datetimeoffset": datetime.datetime(2025, 4, 22, 12, 30, 45, 123456, tzinfo=datetime.timezone.utc), - "d_datetimeoffset_p": datetime.datetime( - 2025, 4, 22, 12, 30, 45, 123456, tzinfo=datetime.timezone(datetime.timedelta(hours=-7)) - ), - } - mock_cursor.fetchall.return_value = [input_row] - - mock_cursor.description = [ - ("d_date", 2, None, None, None, None, None), - ("d_time", 2, None, None, None, None, None), - ("d_time_p", 2, None, None, None, None, None), - ("d_smalldatetime", 4, None, None, None, None, None), - ("d_datetime", 4, None, None, None, None, None), - ("d_datetime2", 2, None, None, None, None, None), - ("d_datetime2_p", 2, None, None, None, None, None), - ("d_datetimeoffset", 2, None, None, None, None, None), - ("d_datetimeoffset_p", 2, None, None, None, None, None), - ] - - response: TableResponse = self.handler.native_query(query_str) - excepted_mysql_types = [ - # DATE and TIME is not possible to infer, so they are BINARY - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - ] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for columns_name, input_value in input_row.items(): - result_value = response.data_frame[columns_name][0] - if columns_name == "d_datetimeoffset_p": - self.assertEqual(result_value.strftime("%Y-%m-%d %H:%M:%S"), "2025-04-22 19:30:45") - continue - self.assertEqual(result_value, input_value) - # endregion - - -class TestMSSQLHandlerODBC(unittest.TestCase): - """Tests for MSSQL handler with ODBC connection""" - - def setUp(self): - self.connection_data = OrderedDict( - host="127.0.0.1", - port=1433, - user="example_user", - password="example_pass", - database="example_db", - driver="ODBC Driver 18 for SQL Server", - use_odbc=True, - ) - - def test_odbc_mode_enabled_with_driver_param(self): - """Test that ODBC mode is enabled when driver parameter is provided""" - handler = SqlServerHandler("mssql_odbc", connection_data=self.connection_data) - self.assertTrue(handler.use_odbc) - - def test_odbc_mode_enabled_with_use_odbc_param(self): - """Test that ODBC mode is enabled when use_odbc parameter is True""" - connection_data = self.connection_data.copy() - del connection_data["driver"] - connection_data["use_odbc"] = True - - handler = SqlServerHandler("mssql_odbc", connection_data=connection_data) - self.assertTrue(handler.use_odbc) - - def test_odbc_mode_disabled_by_default(self): - """Test that ODBC mode is disabled when neither driver nor use_odbc is provided""" - connection_data = OrderedDict( - host="127.0.0.1", - port=1433, - user="example_user", - password="example_pass", - database="example_db", - ) - handler = SqlServerHandler("mssql", connection_data=connection_data) - self.assertFalse(handler.use_odbc) - - def test_odbc_connection_string_construction(self): - """Test that ODBC connection string is constructed correctly""" - mock_pyodbc = MagicMock() - mock_connect = MagicMock() - mock_pyodbc.connect = mock_connect - - # Mock pyodbc in sys.modules so it can be imported - sys.modules["pyodbc"] = mock_pyodbc - - try: - handler = SqlServerHandler("mssql_odbc", connection_data=self.connection_data) - handler.connect() - self.assertTrue(mock_connect.called, "mock_connect was not called") - call_args = mock_connect.call_args - conn_str = call_args[0][0] if call_args[0] else "" - - self.assertIn("DRIVER={ODBC Driver 18 for SQL Server}", conn_str) - self.assertIn("SERVER=127.0.0.1,1433", conn_str) - self.assertIn("DATABASE=example_db", conn_str) - self.assertIn("UID=example_user", conn_str) - self.assertIn("PWD=example_pass", conn_str) - finally: - if "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - def test_odbc_connection_with_encryption_params(self): - """Test that encryption parameters are added to connection string""" - mock_pyodbc = MagicMock() - mock_connect = MagicMock() - mock_pyodbc.connect = mock_connect - - sys.modules["pyodbc"] = mock_pyodbc - - connection_data = self.connection_data.copy() - connection_data["encrypt"] = "yes" - connection_data["trust_server_certificate"] = "yes" - - try: - handler = SqlServerHandler("mssql_odbc", connection_data=connection_data) - handler.connect() - self.assertTrue(mock_connect.called, "mock_connect was not called") - conn_str = mock_connect.call_args[0][0] - self.assertIn("Encrypt=yes", conn_str) - self.assertIn("TrustServerCertificate=yes", conn_str) - finally: - if "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - def test_odbc_import_error_handling(self): - """Test that ImportError is raised with helpful message when pyodbc is not installed""" - orig_pyodbc = sys.modules.get("pyodbc") - - try: - # Remove pyodbc from sys.modules to simulate it not being installed - if "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - handler = SqlServerHandler("mssql_odbc", connection_data=self.connection_data) - - original_import = builtins.__import__ - - def mock_import(name, *args, **kwargs): - if name == "pyodbc": - raise ImportError("No module named 'pyodbc'") - return original_import(name, *args, **kwargs) - - with patch("builtins.__import__", side_effect=mock_import): - with self.assertRaises(ImportError) as context: - handler._connect_odbc() - - self.assertIn("pyodbc is not installed", str(context.exception)) - self.assertIn("pip install", str(context.exception).lower()) - finally: - if orig_pyodbc is not None: - sys.modules["pyodbc"] = orig_pyodbc - elif "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - def test_odbc_driver_not_found_error(self): - """Test that ConnectionError is raised with helpful message when ODBC driver is not found""" - mock_pyodbc = MagicMock() - mock_pyodbc.Error = Exception - mock_error = Exception("Can't open lib 'ODBC Driver 18 for SQL Server' : file not found") - mock_pyodbc.connect.side_effect = mock_error - - sys.modules["pyodbc"] = mock_pyodbc - - try: - handler = SqlServerHandler("mssql_odbc", connection_data=self.connection_data) - with self.assertRaises((ConnectionError, Exception)): - handler.connect() - finally: - if "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - def test_odbc_native_query_with_row_objects(self): - """Test that native_query correctly handles pyodbc Row objects""" - - class MockRow: - def __init__(self, *values): - self.values = values - - def __iter__(self): - return iter(self.values) - - def __getitem__(self, idx): - return self.values[idx] - - mock_pyodbc = MagicMock() - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - mock_cursor.fetchall.return_value = [MockRow(1, "test1"), MockRow(2, "test2")] - mock_cursor.description = [ - ("id", None, None, None, None, None, None), - ("name", None, None, None, None, None, None), - ] - - sys.modules["pyodbc"] = mock_pyodbc - - try: - handler = SqlServerHandler("mssql_odbc", connection_data=self.connection_data) - handler.connect = MagicMock(return_value=mock_conn) - handler.is_connected = True - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - query_str = "SELECT * FROM test_table" - response = handler.native_query(query_str) - - # Verify cursor was called without as_dict parameter (ODBC doesn't support it) - mock_conn.cursor.assert_called_once_with() - mock_cursor.execute.assert_called_once_with(query_str) - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(response.data_frame, DataFrame) - self.assertEqual(list(response.data_frame.columns), ["id", "name"]) - finally: - if "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - def test_odbc_connection_string_with_additional_args(self): - """Test that additional connection string arguments are appended""" - connection_data = self.connection_data.copy() - connection_data["connection_string_args"] = "ApplicationIntent=ReadOnly;MultiSubnetFailover=Yes" - - mock_pyodbc = MagicMock() - mock_connect = MagicMock() - mock_pyodbc.connect = mock_connect - - sys.modules["pyodbc"] = mock_pyodbc - - try: - handler = SqlServerHandler("mssql_odbc", connection_data=connection_data) - handler.connect() - - self.assertTrue(mock_connect.called, "mock_connect was not called") - conn_str = mock_connect.call_args[0][0] - self.assertIn("ApplicationIntent=ReadOnly", conn_str) - self.assertIn("MultiSubnetFailover=Yes", conn_str) - finally: - if "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - def test_odbc_vs_pymssql_type_inference(self): - """Test that type inference works correctly for ODBC connections""" - mock_pyodbc = MagicMock() - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - class MockRow: - def __init__(self, *values): - self.values = values - - def __iter__(self): - return iter(self.values) - - def __getitem__(self, idx): - return self.values[idx] - - mock_cursor.fetchall.return_value = [ - MockRow(123, 45.67, "text", datetime.datetime(2024, 1, 1)), - ] - mock_cursor.description = [ - ("int_col", None, None, None, None, None, None), - ("float_col", None, None, None, None, None, None), - ("text_col", None, None, None, None, None, None), - ("datetime_col", None, None, None, None, None, None), - ] - - sys.modules["pyodbc"] = mock_pyodbc - - try: - handler = SqlServerHandler("mssql_odbc", connection_data=self.connection_data) - handler.connect = MagicMock(return_value=mock_conn) - handler.is_connected = True - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - response = handler.native_query("SELECT * FROM test") - - self.assertIsInstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsNotNone(response.columns) - self.assertTrue(len(response.columns) > 0) - finally: - if "pyodbc" in sys.modules: - del sys.modules["pyodbc"] - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_mysql.py b/tests/unit/handlers/test_mysql.py deleted file mode 100644 index a506e0ba844..00000000000 --- a/tests/unit/handlers/test_mysql.py +++ /dev/null @@ -1,827 +0,0 @@ -import unittest -import datetime -from array import array -from decimal import Decimal -from collections import OrderedDict -from unittest.mock import patch, MagicMock - -import mysql.connector -import pandas as pd -from pandas import DataFrame -from pandas.api import types as pd_types - -from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager -from mindsdb.integrations.handlers.mysql_handler.mysql_handler import MySQLHandler -from mindsdb.integrations.libs.response import ( - OkResponse, - TableResponse, - DataHandlerResponse as Response, - INF_SCHEMA_COLUMNS_NAMES_SET, - RESPONSE_TYPE, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -class TestMySQLHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - host="127.0.0.1", - port=3306, - user="root", - password="password", - database="test_db", - ssl=False, - ) - - @property - def err_to_raise_on_connect_failure(self): - return mysql.connector.Error("Connection Failed") - - @property - def get_tables_query(self): - return """ - SELECT - TABLE_SCHEMA AS table_schema, - TABLE_NAME AS table_name, - TABLE_TYPE AS table_type - FROM - information_schema.TABLES - WHERE - TABLE_TYPE IN ('BASE TABLE', 'VIEW') - AND TABLE_SCHEMA = DATABASE() - ORDER BY 2 - ; - """ - - @property - def get_columns_query(self): - return f""" - select - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - from - information_schema.columns - where - table_name = '{self.mock_table}' - and table_schema = DATABASE(); - """ - - def create_handler(self): - return MySQLHandler("mysql", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mysql.connector.connect") - - def test_native_query(self): - """Test that native_query returns a Response object with no error""" - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager( - data=[{"id": 1}], description=[("id", 3, None, None, None, None, 1, 0, 45)] - ) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - query_str = f"SELECT * FROM {self.mock_table}" - data = self.handler.native_query(query_str) - - self.assertIsInstance(data, TableResponse) - - def test_native_query_with_results(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query returns a result set, streaming data via fetchmany - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - mock_conn.is_connected = MagicMock(return_value=True) - - # fetchmany returns tuples (non-dictionary cursor), then empty list to signal end - mock_cursor.fetchmany.side_effect = [ - [(1, "test1"), (2, "test2")], - [], - ] - - # MySQL cursor provides column info via description attribute - mock_cursor.description = [ - ("id", None, None, None, None, None, None), - ("name", None, None, None, None, None, None), - ] - - mock_cursor.with_rows = True - - query_str = "SELECT * FROM test_table" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(buffered=False) - mock_cursor.execute.assert_called_once_with(query_str) - - assert isinstance(data, TableResponse) - self.assertIsInstance(data.data_frame, DataFrame) - - expected_columns = ["id", "name"] - self.assertEqual(list(data.data_frame.columns), expected_columns) - - def test_native_query_no_results(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query doesn't return any results (e.g., INSERT, UPDATE, DELETE) - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - mock_conn.is_connected = MagicMock(return_value=True) - - mock_cursor.with_rows = False - mock_cursor.rowcount = 1 - - query_str = "INSERT INTO test_table VALUES (1, 'test')" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(buffered=False) - mock_cursor.execute.assert_called_once_with(query_str) - - assert isinstance(data, OkResponse) - self.assertEqual(data.affected_rows, 1) - - def test_native_query_error(self): - """ - Tests the `native_query` method to ensure it properly handles and returns database errors - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - mock_conn.is_connected = MagicMock(return_value=True) - - error_msg = "Syntax error in SQL statement" - error = mysql.connector.Error(error_msg) - mock_cursor.execute.side_effect = error - - query_str = "INVALID SQL" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(buffered=False) - mock_cursor.execute.assert_called_once_with(query_str) - - assert isinstance(data, Response) - self.assertEqual(data.type, RESPONSE_TYPE.ERROR) - self.assertEqual(data.error_message, str(error)) - - mock_conn.rollback.assert_called_once() - - def test_is_connected_property(self): - """ - Tests the is_connected property to ensure it correctly reflects the connection state - """ - self.handler.connection = None - self.assertFalse(self.handler.is_connected) - - mock_conn = MagicMock() - mock_conn.is_connected = MagicMock(return_value=False) - self.handler.connection = mock_conn - self.assertFalse(self.handler.is_connected) - - mock_conn.is_connected = MagicMock(return_value=True) - self.handler.connection = mock_conn - self.assertTrue(self.handler.is_connected) - - def test_disconnect(self): - """ - Tests the disconnect method to ensure it correctly closes connections - """ - mock_conn = MagicMock() - mock_conn.is_connected = MagicMock(return_value=True) - self.handler.connection = mock_conn - - self.handler.disconnect() - - mock_conn.close.assert_called_once() - - mock_conn.reset_mock() - mock_conn.is_connected = MagicMock(return_value=False) - self.handler.disconnect() - mock_conn.close.assert_not_called() - - def test_check_connection_success(self): - """ - Tests that check_connection returns success status when connection is valid - """ - self.handler.connection_data = self.dummy_connection_data.copy() - - mock_conn = MagicMock() - mock_conn.is_connected = MagicMock(return_value=True) - self.mock_connect.return_value = mock_conn - - response = self.handler.check_connection() - - self.assertTrue(response.success) - self.assertIsNone(response.error_message) - self.mock_connect.assert_called_once() - - def test_check_connection_failure(self): - """ - Tests that check_connection returns failure status and error message when connection fails - """ - self.handler.connection_data = self.dummy_connection_data.copy() - - error_message = "Connection failed: Unknown MySQL server host 'invalid-host'" - self.mock_connect.side_effect = mysql.connector.Error(error_message) - - response = self.handler.check_connection() - - self.assertFalse(response.success) - self.assertIsNotNone(response.error_message) - self.assertIn("Connection failed", response.error_message) - - def test_check_connection_closes_on_success(self): - """ - Tests that check_connection closes the connection after successful check if it wasn't already connected - """ - self.handler.connection_data = self.dummy_connection_data.copy() - self.handler.connection = None # Not connected initially - - mock_conn = MagicMock() - mock_conn.is_connected = MagicMock(return_value=True) - self.mock_connect.return_value = mock_conn - - response = self.handler.check_connection() - - self.assertTrue(response.success) - mock_conn.close.assert_called_once() - - def test_connection_with_url(self): - """ - Tests connecting with a URL connection string instead of individual parameters - """ - url_connection_data = {"url": "mysql://root:password@127.0.0.1:3306/test_db"} - self.handler.connection_data = url_connection_data - - # Mock ConnectionConfig to process the URL - with patch("mindsdb.integrations.handlers.mysql_handler.mysql_handler.ConnectionConfig") as mock_config_class: - mock_model = MagicMock() - mock_model.model_dump.return_value = { - "host": "127.0.0.1", - "port": 3306, - "user": "root", - "password": "password", - "database": "test_db", - "connection_timeout": 10, - "collation": "utf8mb4_general_ci", - "use_pure": True, - } - mock_config_class.return_value = mock_model - - self.handler.connect() - - mock_config_class.assert_called_once_with(**url_connection_data) - self.mock_connect.assert_called_once() - - def test_unpack_config(self): - """ - Tests the _unpack_config method to ensure it correctly validates and unpacks connection data - """ - with patch("mindsdb.integrations.handlers.mysql_handler.mysql_handler.ConnectionConfig") as mock_config_class: - mock_model = MagicMock() - mock_model.model_dump.return_value = { - "host": "127.0.0.1", - "port": 3306, - "user": "root", - "password": "password", - "database": "test_db", - } - mock_config_class.return_value = mock_model - - valid_config = self.dummy_connection_data.copy() - self.handler.connection_data = valid_config - - config = self.handler._unpack_config() - mock_config_class.assert_called_once_with(**valid_config) - mock_model.model_dump.assert_called_once_with(exclude_unset=True) - - self.assertEqual(config["host"], "127.0.0.1") - self.assertEqual(config["port"], 3306) - self.assertEqual(config["user"], "root") - self.assertEqual(config["password"], "password") - self.assertEqual(config["database"], "test_db") - - mock_config_class.side_effect = ValueError("Invalid config") - with self.assertRaises(ValueError): - self.handler._unpack_config() - - def test_connect_with_ssl(self): - """ - Tests connecting with SSL configuration to ensure SSL parameters are correctly passed - """ - self.handler.connection_data = self.dummy_connection_data.copy() - self.handler.connection_data["ssl"] = True - self.handler.connection_data["ssl_ca"] = "/path/to/ca.pem" - self.handler.connection_data["ssl_cert"] = "/path/to/cert.pem" - self.handler.connection_data["ssl_key"] = "/path/to/key.pem" - - self.handler.connect() - - call_kwargs = self.mock_connect.call_args[1] - self.assertIn("client_flags", call_kwargs) - self.assertIn(mysql.connector.constants.ClientFlag.SSL, call_kwargs["client_flags"]) - self.assertEqual(call_kwargs["ssl_ca"], "/path/to/ca.pem") - self.assertEqual(call_kwargs["ssl_cert"], "/path/to/cert.pem") - self.assertEqual(call_kwargs["ssl_key"], "/path/to/key.pem") - - def test_connect_sets_configuration(self): - """ - Tests that connect method correctly sets default configuration values when not provided - """ - self.handler.connection_data = { - "host": "127.0.0.1", - "port": 3306, - "user": "root", - "password": "password", - "database": "test_db", - } - - self.handler.connect() - - call_kwargs = self.mock_connect.call_args[1] - self.assertEqual(call_kwargs["connection_timeout"], 10) - self.assertEqual(call_kwargs["collation"], "utf8mb4_general_ci") - self.assertEqual(call_kwargs["use_pure"], True) - - # Verify autocommit was set on the connection - self.mock_connect.return_value.autocommit = True - - def test_query_method(self): - """ - Tests the query method to ensure it correctly converts ASTNode to SQL and calls native_query - """ - with patch("mindsdb.integrations.handlers.mysql_handler.mysql_handler.SqlalchemyRender") as mock_renderer_class: - mock_renderer = MagicMock() - mock_renderer.get_string.return_value = "SELECT * FROM test" - mock_renderer_class.return_value = mock_renderer - - self.handler.native_query = MagicMock() - self.handler.native_query.return_value = OkResponse() - - mock_ast = MagicMock() - - result = self.handler.query(mock_ast) - - mock_renderer_class.assert_called_once_with("mysql") - - mock_renderer.get_string.assert_called_once_with(mock_ast, with_failback=True) - - self.handler.native_query.assert_called_once_with("SELECT * FROM test") - self.assertEqual(result, self.handler.native_query.return_value) - - def test_connection_with_conn_attrs(self): - """ - Tests connecting with connection attributes to ensure they are correctly passed - """ - self.handler.connection_data = self.dummy_connection_data.copy() - self.handler.connection_data["conn_attrs"] = {"program_name": "mindsdb", "client_version": "1.0"} - - self.handler.connect() - - call_kwargs = self.mock_connect.call_args[1] - self.assertEqual(call_kwargs["conn_attrs"], {"program_name": "mindsdb", "client_version": "1.0"}) - - def test_get_tables(self): - """ - Tests that get_tables calls native_query with the correct SQL - """ - expected_response = OkResponse() - self.handler.native_query = MagicMock(return_value=expected_response) - - response = self.handler.get_tables() - - self.handler.native_query.assert_called_once() - call_args = self.handler.native_query.call_args[0][0] - - self.assertIn("information_schema.TABLES", call_args) - self.assertIn("TABLE_SCHEMA", call_args) - self.assertIn("TABLE_NAME", call_args) - self.assertIn("TABLE_TYPE", call_args) - - self.assertEqual(response, expected_response) - - def test_get_columns(self): - """ - Tests that get_columns calls native_query with the correct SQL - """ - expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))) - self.handler.native_query = MagicMock(return_value=expected_response) - - table_name = "test_table" - response = self.handler.get_columns(table_name) - assert response.type == RESPONSE_TYPE.COLUMNS_TABLE - - self.handler.native_query.assert_called_once() - call_args = self.handler.native_query.call_args[0][0] - - expected_sql = f""" - select - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - from - information_schema.columns - where - table_name = '{table_name}' - and table_schema = DATABASE(); - """ - self.assertEqual(call_args, expected_sql) - self.assertEqual(response, expected_response) - - def test_types_casting(self): - """Test that types are casted correctly""" - query_str = "SELECT * FROM test_table" - - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - mock_conn.is_connected = MagicMock(return_value=True) - - # region test TEXT/BLOB types and sub-types - input_row = OrderedDict( - t_varchar="v_varchar", - t_tinytext="v_tinytext", - t_text="v_text", - t_mediumtext="v_mediumtext", - t_longtext="v_longtext", - t_tinyblon="v_tinyblon", - t_blob="v_blob", - t_mediumblob="v_mediumblob", - t_longblob="v_longblob", - t_json='{"key": "value"}', - ) - mock_cursor.fetchall.return_value = [list(input_row.values())] - - mock_cursor.description = [ - ("t_varchar", 253, None, None, None, None, 1, 0, 45), - ("t_tinytext", 252, None, None, None, None, 1, 16, 45), - ("t_text", 252, None, None, None, None, 1, 16, 45), - ("t_mediumtext", 252, None, None, None, None, 1, 16, 45), - ("t_longtext", 252, None, None, None, None, 1, 16, 45), - ("t_tinyblon", 252, None, None, None, None, 1, 144, 63), - ("t_blob", 252, None, None, None, None, 1, 144, 63), - ("t_mediumblob", 252, None, None, None, None, 1, 144, 63), - ("t_longblob", 252, None, None, None, None, 1, 144, 63), - ("t_json", 245, None, None, None, None, 1, 144, 63), - ] - - response: Response = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.VARBINARY, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.BLOB, - MYSQL_DATA_TYPE.BLOB, - MYSQL_DATA_TYPE.BLOB, - MYSQL_DATA_TYPE.BLOB, - MYSQL_DATA_TYPE.JSON, - ] - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - for key, input_value in input_row.items(): - result_value = response.data_frame[key][0] - self.assertEqual(type(result_value), type(input_value)) - self.assertEqual(result_value, input_value) - # endregion - - # region test TINYINT/BOOL/BOOLEAN types - input_row = OrderedDict(t_tinyint=1, t_bool=1, t_boolean=1) - mock_cursor.fetchall.return_value = [list(input_row.values())] - - mock_cursor.description = [ - ("t_tinyint", 1, None, None, None, None, 1, 0, 63), - ("t_bool", 1, None, None, None, None, 1, 0, 63), - ("t_boolean", 1, None, None, None, None, 1, 0, 63), - ] - response: Response = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.TINYINT, MYSQL_DATA_TYPE.TINYINT] - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - for key, input_value in input_row.items(): - result_value = response.data_frame[key][0] - # without None values in result columns types will be one of pandas types - self.assertTrue(pd_types.is_integer_dtype(result_value)) - self.assertEqual(result_value, input_value) - # endregion - - # region test numeric types - input_row = OrderedDict( - t_tinyint=1, - t_bool=0, - t_smallint=2, - t_year=2025, - t_mediumint=3, - t_int=4, - t_bigint=5, - t_float=1.1, - t_double=2.2, - t_decimal=Decimal("3.3"), - ) - mock_cursor.fetchall.return_value = [list(input_row.values())] - mock_cursor.description = [ - ("t_tinyint", 1, None, None, None, None, 1, 0, 63), - ("t_bool", 1, None, None, None, None, 1, 0, 63), - ("t_smallint", 2, None, None, None, None, 1, 0, 63), - ("t_year", 13, None, None, None, None, 1, 96, 63), - ("t_mediumint", 9, None, None, None, None, 1, 0, 63), - ("t_int", 3, None, None, None, None, 1, 0, 63), - ("t_bigint", 8, None, None, None, None, 1, 0, 63), - ("t_float", 4, None, None, None, None, 1, 0, 63), - ("t_double", 5, None, None, None, None, 1, 0, 63), - ("t_decimal", 246, None, None, None, None, 1, 0, 63), - ] - response: Response = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.TINYINT, - MYSQL_DATA_TYPE.TINYINT, - MYSQL_DATA_TYPE.SMALLINT, - MYSQL_DATA_TYPE.YEAR, - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.BIGINT, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DECIMAL, - ] - - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - for key, input_value in input_row.items(): - result_value = response.data_frame[key][0] - self.assertEqual(result_value, input_value) - # endregion - - # test date/time types - input_row = OrderedDict( - t_date=datetime.date(2025, 4, 16), - t_time=datetime.timedelta(seconds=45600), - t_year=2025, - t_datetime=datetime.datetime(2025, 4, 16, 12, 30, 15), - t_timestamp=datetime.datetime(2025, 4, 16, 12, 30, 15), - ) - mock_cursor.fetchall.return_value = [list(input_row.values())] - - mock_cursor.description = [ - ("t_date", 10, None, None, None, None, 1, 128, 63), - ("t_time", 11, None, None, None, None, 1, 128, 63), - ("t_year", 13, None, None, None, None, 1, 96, 63), - ("t_datetime", 12, None, None, None, None, 1, 128, 63), - ("t_timestamp", 7, None, None, None, None, 1, 128, 63), - ] - - response: Response = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.DATE, - MYSQL_DATA_TYPE.TIME, - MYSQL_DATA_TYPE.YEAR, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.TIMESTAMP, - ] - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - for key, input_value in input_row.items(): - result_value = response.data_frame[key][0] - self.assertEqual(result_value, input_value) - # endregion - - # region test casting of nullable types - bigint_val = 9223372036854775807 - input_rows = [OrderedDict(t_bigint=bigint_val, t_boolean=1), OrderedDict(t_bigint=None, t_boolean=None)] - mock_cursor.fetchall.return_value = [list(row.values()) for row in input_rows] - description = [ - ("t_bigint", 8, None, None, None, None, 1, 0, 63), - ("t_boolean", 1, None, None, None, None, 1, 0, 63), - ] - mock_cursor.description = description - response: Response = self.handler.native_query(query_str, stream=False) - self.assertEqual(response.data_frame.dtypes.iloc[0], "Int64") - self.assertEqual(response.data_frame.dtypes.iloc[1], "Int64") - self.assertEqual(response.data_frame.iloc[0, 0], bigint_val) - self.assertEqual(response.data_frame.iloc[0, 1], 1) - self.assertTrue(response.data_frame.iloc[1, 0] is pd.NA) - self.assertTrue(response.data_frame.iloc[1, 1] is pd.NA) - # endregion - - # region test vector type - input_row = OrderedDict( - t_vector=array("f", [1.1, 2.2, 3.3]), - ) - mock_cursor.fetchall.return_value = [list(input_row.values())] - - mock_cursor.description = [("t_vector", 242, None, None, None, None, 1, 144, 63)] - - response: Response = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [MYSQL_DATA_TYPE.VECTOR] - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - self.assertEqual(input_row["t_vector"], response.data_frame["t_vector"][0]) - # endregion - - def _test_meta_method_with_filter(self, method, sample_data, filter_column, filter_values): - """ - Helper method to test meta catalog methods with and without filtering. - - Args: - method: The method to test (e.g., self.handler.meta_get_tables) - sample_data: List of dicts containing sample data - filter_column: Column name to filter on - filter_values: List of values to filter by - """ - # Test without filter - df = DataFrame(sample_data) - expected_response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=expected_response) - - response = method() - self.handler.native_query.assert_called_once() - self.assertIs(response, expected_response) - - # Test with filter - self.handler.native_query.reset_mock() - filtered_df = df[df[filter_column].isin(filter_values)].reset_index(drop=True) - filtered_response = TableResponse(data=filtered_df) - self.handler.native_query = MagicMock(return_value=filtered_response) - - response = method(table_names=filter_values) - self.handler.native_query.assert_called_once() - self.assertIs(response, filtered_response) - - # Verify filtered data - if filter_column in response.data_frame.columns: - self.assertTrue(response.data_frame[filter_column].isin(filter_values).all()) - - return response - - def test_meta_get_tables_returns_response(self): - """Test that meta_get_tables returns correct response with and without filtering""" - sample_data = [ - { - "table_name": "customers", - "table_schema": "test_db", - "table_type": "BASE TABLE", - "table_description": "Customer information", - "row_count": 100, - }, - { - "table_name": "orders", - "table_schema": "test_db", - "table_type": "BASE TABLE", - "table_description": None, - "row_count": 500, - }, - { - "table_name": "products", - "table_schema": "test_db", - "table_type": "BASE TABLE", - "table_description": None, - "row_count": 42, - }, - ] - self._test_meta_method_with_filter( - self.handler.meta_get_tables, sample_data, "table_name", ["customers", "orders"] - ) - - def test_meta_get_columns_returns_response(self): - """Test that meta_get_columns returns correct response with and without filtering""" - sample_data = [ - { - "table_name": "customers", - "column_name": "id", - "data_type": "int", - "column_description": None, - "column_default": None, - "is_nullable": 0, - }, - { - "table_name": "customers", - "column_name": "name", - "data_type": "varchar", - "column_description": None, - "column_default": None, - "is_nullable": 1, - }, - { - "table_name": "products", - "column_name": "sku", - "data_type": "varchar", - "column_description": "Product SKU", - "column_default": None, - "is_nullable": 0, - }, - ] - self._test_meta_method_with_filter(self.handler.meta_get_columns, sample_data, "table_name", ["customers"]) - - def test_meta_get_column_statistics_returns_response(self): - """Test that meta_get_column_statistics returns correct response with and without filtering""" - sample_data = [ - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "id", - "MOST_COMMON_VALUES": None, - "MOST_COMMON_FREQUENCIES": None, - "NULL_PERCENTAGE": 0.0, - "MINIMUM_VALUE": "1", - "MAXIMUM_VALUE": "100", - "DISTINCT_VALUES_COUNT": 100, - }, - { - "TABLE_NAME": "customers", - "COLUMN_NAME": "name", - "MOST_COMMON_VALUES": None, - "MOST_COMMON_FREQUENCIES": None, - "NULL_PERCENTAGE": 5.0, - "MINIMUM_VALUE": "Alice", - "MAXIMUM_VALUE": "Zoe", - "DISTINCT_VALUES_COUNT": 95, - }, - { - "TABLE_NAME": "products", - "COLUMN_NAME": "sku", - "MOST_COMMON_VALUES": None, - "MOST_COMMON_FREQUENCIES": None, - "NULL_PERCENTAGE": 0.0, - "MINIMUM_VALUE": None, - "MAXIMUM_VALUE": None, - "DISTINCT_VALUES_COUNT": 42, - }, - ] - self._test_meta_method_with_filter( - self.handler.meta_get_column_statistics, sample_data, "TABLE_NAME", ["customers"] - ) - - def test_meta_get_primary_keys_returns_response(self): - """Test that meta_get_primary_keys returns correct response with and without filtering""" - sample_data = [ - {"table_name": "customers", "column_name": "id", "ordinal_position": 1, "constraint_name": "PRIMARY"}, - {"table_name": "orders", "column_name": "id", "ordinal_position": 1, "constraint_name": "PRIMARY"}, - {"table_name": "products", "column_name": "id", "ordinal_position": 1, "constraint_name": "PRIMARY"}, - ] - self._test_meta_method_with_filter(self.handler.meta_get_primary_keys, sample_data, "table_name", ["customers"]) - - def test_meta_get_foreign_keys_returns_response(self): - """Test that meta_get_foreign_keys returns correct response with and without filtering""" - sample_data = [ - { - "parent_table_name": "customers", - "parent_column_name": "id", - "child_table_name": "orders", - "child_column_name": "customer_id", - "constraint_name": "orders_ibfk_1", - }, - { - "parent_table_name": "products", - "parent_column_name": "id", - "child_table_name": "orders", - "child_column_name": "product_id", - "constraint_name": "orders_ibfk_2", - }, - ] - self._test_meta_method_with_filter( - self.handler.meta_get_foreign_keys, sample_data, "child_table_name", ["orders"] - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_netsuite.py b/tests/unit/handlers/test_netsuite.py deleted file mode 100644 index a038769aa67..00000000000 --- a/tests/unit/handlers/test_netsuite.py +++ /dev/null @@ -1,106 +0,0 @@ -from collections import OrderedDict -import unittest -from unittest.mock import MagicMock, patch - -import pytest - -try: - from mindsdb.integrations.handlers.netsuite_handler.netsuite_handler import NetSuiteHandler - from mindsdb.integrations.handlers.netsuite_handler.netsuite_tables import NetSuiteRecordTable -except ImportError: - pytestmark = pytest.mark.skip("NetSuite handler not installed") - -from base_handler_test import BaseHandlerTestSetup -from mindsdb.integrations.libs.response import RESPONSE_TYPE -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - - -class TestNetSuiteHandler(BaseHandlerTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - account_id="123456_SB1", - consumer_key="ck", - consumer_secret="cs", - token_id="token", - token_secret="secret", - record_types="customer", - ) - - def create_handler(self): - return NetSuiteHandler("netsuite", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mindsdb.integrations.handlers.netsuite_handler.netsuite_handler.requests.Session") - - def test_connect_success(self): - session = MagicMock() - self.mock_connect.return_value = session - - with patch("mindsdb.integrations.handlers.netsuite_handler.netsuite_handler.OAuth1") as oauth_mock: - connection = self.handler.connect() - - self.assertIs(connection, session) - self.assertTrue(self.handler.is_connected) - oauth_mock.assert_called_once() - self.assertEqual(session.auth, oauth_mock.return_value) - - def test_connect_missing_required_params_raises(self): - handler = NetSuiteHandler("netsuite", connection_data={"account_id": "123"}) - with patch("mindsdb.integrations.handlers.netsuite_handler.netsuite_handler.OAuth1"): - with self.assertRaises(ValueError): - handler.connect() - - def test_check_connection_success(self): - with patch("mindsdb.integrations.handlers.netsuite_handler.netsuite_handler.OAuth1"): - self.handler._request = MagicMock() - response = self.handler.check_connection() - - self.assertTrue(response.success) - self.handler._request.assert_called_once_with("POST", "/services/rest/query/v1/suiteql", json={"q": "SELECT 1"}) - - def test_check_connection_failure_sets_error_message(self): - with patch("mindsdb.integrations.handlers.netsuite_handler.netsuite_handler.OAuth1"): - self.handler._request = MagicMock(side_effect=RuntimeError("boom")) - response = self.handler.check_connection() - - self.assertFalse(response.success) - self.assertEqual(response.error_message, "boom") - - def test_native_query_parses_suiteql_payload(self): - payload = { - "columnMetadata": [{"name": "id"}, {"name": "id"}], - "items": [{"values": [1, "a"]}, {"values": [2]}], - } - self.handler._request = MagicMock(return_value=payload) - - response = self.handler.native_query("SELECT id FROM transaction") - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - df = response.data_frame - self.assertEqual(list(df.columns), ["id", "id_2"]) - self.assertEqual(df.iloc[0].tolist(), [1, "a"]) - self.assertEqual(df.iloc[1].tolist(), [2, None]) - - -class TestNetSuiteRecordTable(unittest.TestCase): - def test_list_builds_q_filters(self): - handler = MagicMock() - handler._suiteql_select.return_value = {"items": [], "columnMetadata": []} - - table = NetSuiteRecordTable(handler, "customer") - conditions = [ - FilterCondition("email", FilterOperator.EQUAL, "user@example.com"), - FilterCondition("foo", FilterOperator.EQUAL, None), - ] - - table.list(conditions=conditions, limit=10) - - handler._suiteql_select.assert_called_once_with( - table="customer", - where_sql=" WHERE email = 'user@example.com' AND foo IS NULL", - limit=10, - targets=None, - order_by_sql="", - ) - self.assertTrue(all(condition.applied for condition in conditions)) diff --git a/tests/unit/handlers/test_oracle.py b/tests/unit/handlers/test_oracle.py deleted file mode 100644 index fb18a57fcc6..00000000000 --- a/tests/unit/handlers/test_oracle.py +++ /dev/null @@ -1,1080 +0,0 @@ -import pytest -import unittest -import datetime -from array import array -from decimal import Decimal -from collections import OrderedDict -from unittest.mock import patch, MagicMock - -try: - import oracledb - from oracledb import DatabaseError - from mindsdb.integrations.handlers.oracle_handler.oracle_handler import ( - OracleHandler, - ) -except ImportError: - pytestmark = pytest.mark.skip("Oracle handler not installed") - -import pandas as pd -from pandas import DataFrame - -from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager -from mindsdb.integrations.libs.response import ( - TableResponse, - OkResponse, - ErrorResponse, - INF_SCHEMA_COLUMNS_NAMES_SET, - RESPONSE_TYPE, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -class TestOracleHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict(user="example_user", password="example_pass", dsn="example_dsn") - - @property - def err_to_raise_on_connect_failure(self): - return DatabaseError("Connection Failed") - - @property - def get_tables_query(self): - return """ - SELECT table_name - FROM user_tables - ORDER BY 1 - """ - - @property - def get_columns_query(self): - return f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - COLUMN_ID AS ORDINAL_POSITION, - DATA_DEFAULT AS COLUMN_DEFAULT, - CASE NULLABLE WHEN 'Y' THEN 'YES' ELSE 'NO' END AS IS_NULLABLE, - CHAR_LENGTH AS CHARACTER_MAXIMUM_LENGTH, - NULL AS CHARACTER_OCTET_LENGTH, - DATA_PRECISION AS NUMERIC_PRECISION, - DATA_SCALE AS NUMERIC_SCALE, - NULL AS DATETIME_PRECISION, - CHARACTER_SET_NAME, - NULL AS COLLATION_NAME - FROM USER_TAB_COLUMNS - WHERE table_name = '{self.mock_table}' - ORDER BY TABLE_NAME, COLUMN_ID; - """ - - def create_handler(self): - return OracleHandler("oracle", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("mindsdb.integrations.handlers.oracle_handler.oracle_handler.connect") - - def test_connect_validation(self): - """ - Tests that connect method raises ValueError when required connection parameters are missing - """ - # Test missing 'user' - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["user"] - handler = OracleHandler("oracle", connection_data=invalid_connection_args) - with self.assertRaises(ValueError): - handler.connect() - - # Test missing 'password' - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["password"] - handler = OracleHandler("oracle", connection_data=invalid_connection_args) - with self.assertRaises(ValueError): - handler.connect() - - # Test missing 'dsn' AND missing 'host' - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["dsn"] - invalid_connection_args.pop("host", None) - handler = OracleHandler("oracle", connection_data=invalid_connection_args) - with self.assertRaises(ValueError): - handler.connect() - - # Test missing 'oracle_client_lib_dir' when thick_mode is enabled - invalid_connection_args = self.dummy_connection_data.copy() - invalid_connection_args["thick_mode"] = True - handler = OracleHandler("oracle", connection_data=invalid_connection_args) - with self.assertRaises(ValueError): - handler.connect() - - def test_disconnect(self): - """ - Tests the disconnect method to ensure it correctly closes connections - """ - mock_conn = MagicMock() - self.handler.connection = mock_conn - self.handler.is_connected = True - self.handler.disconnect() - - mock_conn.close.assert_called_once() - self.assertFalse(self.handler.is_connected) - - self.handler.is_connected = False - mock_conn.reset_mock() - self.handler.disconnect() - mock_conn.close.assert_not_called() - - def test_check_connection(self): - """ - Tests the check_connection method to ensure it properly tests connectivity using ping() - """ - mock_conn = MagicMock() - self.handler.connect = MagicMock(return_value=mock_conn) - - response = self.handler.check_connection() - mock_conn.ping.assert_called_once() - self.assertTrue(response.success) - self.assertIsNone(response.error_message) - - self.handler.connect = MagicMock() - connect_error = DatabaseError("Connection error") - self.handler.connect.side_effect = connect_error - response = self.handler.check_connection() - self.assertFalse(response.success) - self.assertEqual(response.error_message, str(connect_error)) - self.handler.connect.assert_called_once() - - mock_conn.reset_mock() - self.handler.connect = MagicMock(return_value=mock_conn) - ping_error = DatabaseError("Ping error") - mock_conn.ping.side_effect = ping_error - response = self.handler.check_connection() - self.assertFalse(response.success) - self.assertEqual(response.error_message, str(ping_error)) - mock_conn.ping.assert_called_once() - - def test_thick_mode_connection(self): - """ - Tests that thick mode connection initializes Oracle client with the provided library directory - """ - connection_args = self.dummy_connection_data.copy() - connection_args["thick_mode"] = True - connection_args["oracle_client_lib_dir"] = "/path/to/oracle/client/lib" - - with patch( - "mindsdb.integrations.handlers.oracle_handler.oracle_handler.oracledb.init_oracle_client" - ) as mock_init: - handler = OracleHandler("oracle", connection_data=connection_args) - handler.connect() - mock_init.assert_called_once_with(lib_dir="/path/to/oracle/client/lib") - - def test_native_query_with_results_streaming(self): - """ - Tests the `native_query` method for a SELECT statement returning results at server side execution. - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - # Server-side execution uses fetchmany, not fetchall - mock_cursor.fetchmany = MagicMock(side_effect=[[(1, "test1"), (2, "test2")], []]) - mock_cursor.description = [ - ("ID", None, None, None, None, None, None), - ("NAME", None, None, None, None, None, None), - ] - - query_str = "SELECT ID, NAME FROM test_table" - data = self.handler.native_query(query_str, stream=True) - - mock_conn.cursor.assert_called_once() - mock_cursor.execute.assert_called_once_with(query_str) - - # Verify the response - self.assertIsInstance(data, TableResponse) - self.assertEqual(data.type, RESPONSE_TYPE.TABLE) - self.assertIsNone(data._data) - data.fetchall() - self.assertIsInstance(data._data, DataFrame) - expected_columns = ["ID", "NAME"] - self.assertListEqual(list(data.data_frame.columns), expected_columns) - self.assertEqual(len(data.data_frame), 2) - - def test_native_query_with_no_streaming(self): - """ - Tests the `native_query` method for a SELECT statement returning results at client side execution. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_cursor.fetchall = MagicMock(return_value=[(1, "test1"), (2, "test2")]) - mock_cursor.description = [ - ("ID", None, None, None, None, None, None), - ("NAME", None, None, None, None, None, None), - ] - - query_str = "SELECT ID, NAME FROM test_table" - data = self.handler.native_query(query_str, stream=False) - - mock_conn.cursor.assert_called_once() - mock_cursor.execute.assert_called_once_with(query_str) - mock_cursor.fetchall.assert_called_once() - mock_conn.commit.assert_called_once() - - self.assertIsInstance(data, TableResponse) - self.assertEqual(data.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(data.data_frame, DataFrame) - expected_columns = ["ID", "NAME"] - self.assertListEqual(list(data.data_frame.columns), expected_columns) - self.assertEqual(len(data.data_frame), 2) - - def test_native_query_no_results(self): - """ - Tests the `native_query` method for a statement that doesn't return results (e.g., INSERT). - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_cursor.description = None - mock_cursor.rowcount = 1 - - query_str = "INSERT INTO test_table VALUES (1, 'test')" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once() - mock_cursor.execute.assert_called_once_with(query_str) - mock_cursor.fetchall.assert_not_called() - mock_conn.commit.assert_called_once() - - self.assertIsInstance(data, OkResponse) - self.assertEqual(data.type, RESPONSE_TYPE.OK) - self.assertEqual(data.affected_rows, 1) - - def test_native_query_error(self): - """ - Tests the `native_query` method handles database errors correctly. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - error_msg = "ORA-00942: table or view does not exist" - error = DatabaseError(error_msg) - mock_cursor.execute.side_effect = error - - query_str = "INVALID SQL" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once() - mock_cursor.execute.assert_called_once_with(query_str) - mock_cursor.fetchall.assert_not_called() - mock_conn.rollback.assert_called_once() - mock_conn.commit.assert_not_called() - - self.assertIsInstance(data, ErrorResponse) - self.assertEqual(data.type, RESPONSE_TYPE.ERROR) - self.assertEqual(data.error_message, error_msg) - - def test_query_method(self): - """ - Tests the query method to ensure it correctly converts ASTNode to SQL and calls native_query. - """ - orig_renderer_attr = hasattr(self.handler, "renderer") - if orig_renderer_attr: - orig_renderer = self.handler.renderer - - self.handler.native_query = MagicMock() - expected_response = TableResponse() - self.handler.native_query.return_value = expected_response - mock_ast = MagicMock() - - expected_sql = "SELECT * FROM rendered_table" - - with patch("mindsdb.integrations.handlers.oracle_handler.oracle_handler.SqlalchemyRender") as MockRenderer: - mock_renderer_instance = MockRenderer.return_value - mock_renderer_instance.get_string.return_value = expected_sql - - result = self.handler.query(mock_ast) - - MockRenderer.assert_called_once_with("oracle") - mock_renderer_instance.get_string.assert_called_once_with(mock_ast, with_failback=True) - self.handler.native_query.assert_called_once_with(expected_sql) - self.assertEqual(result, expected_response) - - del self.handler.native_query - if orig_renderer_attr: - self.handler.renderer = orig_renderer - - def test_get_tables(self): - """ - Tests that get_tables calls native_query with the correct SQL for Oracle - and returns the expected DataFrame structure. - """ - expected_df = DataFrame( - [ - ("SAMPLEUSER", "CUSTOMERS", "BASE TABLE"), - ("SAMPLEUSER", "PRODUCTS", "BASE TABLE"), - ("SAMPLEUSER", "ORDERS_VIEW", "VIEW"), - ], - columns=["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"], - ) - expected_response = TableResponse(data=expected_df) - - self.handler.native_query = MagicMock(return_value=expected_response) - - response = self.handler.get_tables() - - self.handler.native_query.assert_called_once() - - expected_query = """ - SELECT - owner AS table_schema, - table_name AS table_name, - 'BASE TABLE' AS table_type - FROM all_tables t - JOIN all_users u ON t.owner = u.username - WHERE t.tablespace_name = 'USERS' - - UNION ALL - - SELECT - v.owner AS table_schema, - v.view_name AS table_name, - 'VIEW' AS table_type - FROM all_views v - JOIN all_users u ON v.owner = u.username - WHERE v.owner IN ( - SELECT DISTINCT owner - FROM all_tables - WHERE tablespace_name = 'USERS' - ) - """ - self.handler.native_query.assert_called_once_with(expected_query) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(response.data_frame, DataFrame) - self.assertEqual(len(response.data_frame), 3) - self.assertListEqual( - list(response.data_frame.columns), - ["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"], - ) - self.assertEqual(response.data_frame.iloc[0]["TABLE_SCHEMA"], "SAMPLEUSER") - self.assertEqual(response.data_frame.iloc[0]["TABLE_NAME"], "CUSTOMERS") - self.assertEqual(response.data_frame.iloc[0]["TABLE_TYPE"], "BASE TABLE") - - view_rows = response.data_frame[response.data_frame["TABLE_TYPE"] == "VIEW"] - self.assertEqual(len(view_rows), 1) - - del self.handler.native_query - - def test_get_tables_multiple_schemas(self): - """ - Tests that get_tables calls native_query with the correct SQL for Oracle - and returns the expected DataFrame structure when multiple schemas are present. - """ - expected_df = DataFrame( - [ - ("SAMPLEUSER1", "EMPLOYEES", "BASE TABLE"), - ("SAMPLEUSER1", "DEPARTMENTS", "BASE TABLE"), - ("SAMPLEUSER1", "EMP_VIEW", "VIEW"), - ("SAMPLEUSER2", "CUSTOMERS", "BASE TABLE"), - ("SAMPLEUSER2", "ORDERS", "BASE TABLE"), - ("SAMPLEUSER2", "CUST_VIEW", "VIEW"), - ("SAMPLEUSER3", "PRODUCTS", "BASE TABLE"), - ], - columns=["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"], - ) - expected_response = TableResponse(data=expected_df) - - self.handler.native_query = MagicMock(return_value=expected_response) - - response = self.handler.get_tables() - - self.handler.native_query.assert_called_once() - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(response.data_frame, DataFrame) - self.assertEqual(len(response.data_frame), 7) - self.assertListEqual( - list(response.data_frame.columns), - ["TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE"], - ) - schemas = response.data_frame["TABLE_SCHEMA"].unique() - self.assertEqual(len(schemas), 3) - self.assertIn("SAMPLEUSER1", schemas) - self.assertIn("SAMPLEUSER2", schemas) - self.assertIn("SAMPLEUSER3", schemas) - - table_types = response.data_frame["TABLE_TYPE"].unique() - self.assertIn("BASE TABLE", table_types) - self.assertIn("VIEW", table_types) - - tables = response.data_frame[response.data_frame["TABLE_TYPE"] == "BASE TABLE"] - views = response.data_frame[response.data_frame["TABLE_TYPE"] == "VIEW"] - self.assertEqual(len(tables), 5) - self.assertEqual(len(views), 2) - - del self.handler.native_query - - def test_get_columns(self): - """ - Tests that get_columns calls native_query with the correct SQL for Oracle - and returns the expected DataFrame structure. - """ - query_columns = [ - "COLUMN_NAME", - "DATA_TYPE", - "ORDINAL_POSITION", - "COLUMN_DEFAULT", - "IS_NULLABLE", - "CHARACTER_MAXIMUM_LENGTH", - "CHARACTER_OCTET_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - "DATETIME_PRECISION", - "CHARACTER_SET_NAME", - "COLLATION_NAME", - ] - - expected_df_data = [ - { - "COLUMN_NAME": "COL1", - "DATA_TYPE": "VARCHAR2", - "ORDINAL_POSITION": 1, - "COLUMN_DEFAULT": None, - "IS_NULLABLE": "YES", - "CHARACTER_MAXIMUM_LENGTH": 255, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": None, - "NUMERIC_SCALE": None, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": "AL32UTF8", - "COLLATION_NAME": None, - }, - { - "COLUMN_NAME": "COL2", - "DATA_TYPE": "NUMBER", - "ORDINAL_POSITION": 2, - "COLUMN_DEFAULT": "0", - "IS_NULLABLE": "NO", - "CHARACTER_MAXIMUM_LENGTH": None, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": 38, - "NUMERIC_SCALE": 0, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": None, - "COLLATION_NAME": None, - }, - ] - expected_df = DataFrame(expected_df_data, columns=query_columns) - - expected_response = TableResponse(data=expected_df) - self.handler.native_query = MagicMock(return_value=expected_response) - - table_name = "test_table" - response = self.handler.get_columns(table_name) - - self.handler.native_query.assert_called_once() - call_args = self.handler.native_query.call_args[0][0] - self.assertIn("FROM USER_TAB_COLUMNS", call_args) - self.assertIn(f"WHERE table_name = '{table_name}'", call_args) - self.assertIn("COLUMN_NAME", call_args) - self.assertIn("DATA_TYPE", call_args) - self.assertIn("COLUMN_ID AS ORDINAL_POSITION", call_args) - self.assertNotIn("MYSQL_DATA_TYPE", call_args) - - self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) - self.assertIsInstance(response.data_frame, DataFrame) - - expected_final_columns = INF_SCHEMA_COLUMNS_NAMES_SET - self.assertSetEqual(set(response.data_frame.columns), expected_final_columns) - - self.assertEqual(response.data_frame.iloc[0]["COLUMN_NAME"], "COL1") - self.assertEqual(response.data_frame.iloc[0]["DATA_TYPE"], "VARCHAR2") - self.assertIn("MYSQL_DATA_TYPE", response.data_frame.columns) - self.assertIsNotNone(response.data_frame.iloc[0]["MYSQL_DATA_TYPE"]) - - del self.handler.native_query - - def test_types_casting(self): - """Test that types are casted correctly""" - query_str = "SELECT * FROM test_table" - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - # region test numeric types - """Data obtained using: - CREATE TABLE test_numeric_types ( - n_number NUMBER, - n_number_p NUMBER(38), - n_number_ps NUMBER(10,2), - n_integer INTEGER, - n_smallint SMALLINT, - n_decimal DECIMAL(10,2), - n_decimal_p DECIMAL(15), - n_numeric NUMERIC(10,2), - n_float FLOAT, - n_float_p FLOAT(126), - n_real REAL, -- is FLOAT(63) - n_double_precision DOUBLE PRECISION, -- is FLOAT(126) - n_binary_float BINARY_FLOAT, -- 32-bit - n_binary_double BINARY_DOUBLE -- 64-bit - ); - - INSERT INTO test_numeric_types ( - n_number, - n_number_p, - n_number_ps, - n_integer, - n_smallint, - n_decimal, - n_decimal_p, - n_numeric, - n_float, - n_float_p, - n_real, - n_double_precision, - n_binary_float, - n_binary_double - ) VALUES ( - 123456.789, -- n_number - 12345678901234567890123456789012345678, -- n_number_p (38 digits) - 1234.56, -- n_number_ps - 2147483647, -- n_int - 32767, -- n_smallint - 9876.54, -- n_decimal - 123456789012345, -- n_decimal_p - 1234.56, -- n_numeric - 3.14159265358979, -- n_float - 2.718281828459045235360287471352, -- n_float_p - 3.14159, -- n_real - 2.7182818284590452, -- n_double_precision - 3.14159265E0, -- n_binary_float - 2.718281828459045235360287471352E0 -- n_binary_double - ); - """ - input_row = ( - 123456.789, - 12345678901234567890123456789012345678, - 1234.56, - 2147483647, - 32767, - 9876.54, - 123456789012345, - 1234.56, - 3.14159265358979, - 2.718281828459045, - 3.14159, - 2.718281828459045, - 3.1415927410125732, - 2.718281828459045, - ) - mock_cursor.fetchall.return_value = [input_row] - - mock_cursor.description = [ - ("N_NUMBER", oracledb.DB_TYPE_NUMBER, 127, None, 0, -127, True), - ("N_NUMBER_P", oracledb.DB_TYPE_NUMBER, 39, None, 38, 0, True), - ("N_NUMBER_PS", oracledb.DB_TYPE_NUMBER, 14, None, 10, 2, True), - ("N_INTEGER", oracledb.DB_TYPE_NUMBER, 39, None, 38, 0, True), - ("N_SMALLINT", oracledb.DB_TYPE_NUMBER, 39, None, 38, 0, True), - ("N_DECIMAL", oracledb.DB_TYPE_NUMBER, 14, None, 10, 2, True), - ("N_DECIMAL_P", oracledb.DB_TYPE_NUMBER, 16, None, 15, 0, True), - ("N_NUMERIC", oracledb.DB_TYPE_NUMBER, 14, None, 10, 2, True), - ("N_FLOAT", oracledb.DB_TYPE_NUMBER, 127, None, 126, -127, True), - ("N_FLOAT_P", oracledb.DB_TYPE_NUMBER, 127, None, 126, -127, True), - ("N_REAL", oracledb.DB_TYPE_NUMBER, 64, None, 63, -127, True), - ("N_DOUBLE_PRECISION", oracledb.DB_TYPE_NUMBER, 127, None, 126, -127, True), - ("N_BINARY_FLOAT", oracledb.DB_TYPE_NUMBER, 127, None, None, None, True), - ("N_BINARY_DOUBLE", oracledb.DB_TYPE_NUMBER, 127, None, None, None, True), - ] - - response: TableResponse = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.DECIMAL, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.FLOAT, - ] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[response.data_frame.columns[i]][0] - self.assertEqual(result_value, input_value) - # endregion - - # region rest boolean types - """Data obtained using: - CREATE TABLE test_boolean_test ( - t_boolean boolean, - t_bool bool - ); - - INSERT INTO test_boolean_test (t_boolean, t_bool) VALUES (TRUE, false); - """ - - input_row = (True, False) - mock_cursor.fetchall.return_value = [input_row] - mock_cursor.description = [ - ("T_BOOLEAN", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True), - ("T_BOOL", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True), - ] - response: TableResponse = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [MYSQL_DATA_TYPE.BOOLEAN, MYSQL_DATA_TYPE.BOOLEAN] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[response.data_frame.columns[i]][0] - self.assertEqual(result_value, input_value) - # endregion - - # region test text types - """Data obtained using: - CREATE TABLE test_text_types ( - t_char CHAR(10), - t_nchar NCHAR(10), -- unicode - t_varchar2 VARCHAR2(100), - t_nvarchar2 NVARCHAR2(100), -- unicode - t_long LONG, - t_clob CLOB, - t_nclob NCLOB, -- unicode - t_raw RAW(100), - t_blob BLOB - ); - - INSERT INTO test_text_types ( - t_char, - t_nchar, - t_varchar2, - t_nvarchar2, - t_long, - t_clob, - t_nclob, - t_raw, - t_blob - ) VALUES ( - 'Test', -- t_char - N'Unicode', -- t_nchar - 'Test', -- t_varchar2 - N'Unicode', -- t_nvarchar2 - 'Test', -- t_long - TO_CLOB('Test'), -- t_clob - TO_NCLOB('Test'), -- t_nclob - HEXTORAW('54657374'), -- t_raw - HEXTORAW('54657374') -- t_blob - ); - """ - input_row = ( - "Test ", - "Unicode ", - "Test", - "Unicode", - "Test", - "Test", - "Test", - b"Test", - b"Test", - ) - mock_cursor.fetchall.return_value = [input_row] - - mock_cursor.description = [ - ("T_CHAR", oracledb.DB_TYPE_CHAR, 10, 10, None, None, True), - ("T_NCHAR", oracledb.DB_TYPE_NCHAR, 10, 20, None, None, True), - ("T_VARCHAR2", oracledb.DB_TYPE_VARCHAR, 100, 100, None, None, True), - ("T_NVARCHAR2", oracledb.DB_TYPE_NVARCHAR, 100, 200, None, None, True), - ("T_LONG", oracledb.DB_TYPE_LONG, None, None, None, None, True), - ("T_CLOB", oracledb.DB_TYPE_LONG, None, None, None, None, True), - ("T_NCLOB", oracledb.DB_TYPE_LONG_NVARCHAR, None, None, None, None, True), - ("T_RAW", oracledb.DB_TYPE_RAW, 100, 100, None, None, True), - ("T_BLOB", oracledb.DB_TYPE_LONG_RAW, None, None, None, None, True), - ] - response: TableResponse = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.BINARY, - ] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[response.data_frame.columns[i]][0] - self.assertEqual(result_value, input_value) - # endregion - - # region test date types - """Data obtained using: - CREATE TABLE test_datetime_types ( - d_date DATE, - d_timestamp TIMESTAMP, - d_timestamp_p TIMESTAMP(9) - -- timezone is not supported in thin mode - -- d_timestamp_tz TIMESTAMP WITH TIME ZONE, - -- d_timestamp_tz_p TIMESTAMP(6) WITH TIME ZONE, - -- d_timestamp_ltz TIMESTAMP WITH LOCAL TIME ZONE - ); - - INSERT INTO test_datetime_types ( - d_date, - d_timestamp, - d_timestamp_p - -- timezone is not supported in thin mode - -- d_timestamp_tz, - -- d_timestamp_tz_p, - -- d_timestamp_ltz - ) VALUES ( - DATE '2023-10-15', -- d_date - TIMESTAMP '2023-10-15 10:30:45.123456789', -- d_timestamp - TIMESTAMP '2023-10-15 10:30:45.123456789' -- d_timestamp_p - -- timezone is not supported in thin mode - -- TIMESTAMP '2023-10-15 10:30:45.123456' AT TIME ZONE 'America/Los_Angeles', -- d_timestamp_tz - -- TIMESTAMP '2023-10-15 10:30:45.123456' AT TIME ZONE '-07:00', -- d_timestamp_tz_p - -- TIMESTAMP '2023-10-15 10:30:45.123456' -- d_timestamp_ltz - ); - """ - input_row = ( - datetime.datetime(2023, 10, 15, 0, 0), - datetime.datetime(2023, 10, 15, 10, 30, 45, 123457), - datetime.datetime(2023, 10, 15, 10, 30, 45, 123456), - ) - mock_cursor.fetchall.return_value = [input_row] - mock_cursor.description = [ - ("D_DATE", oracledb.DB_TYPE_DATE, 23, None, None, None, True), - ("D_TIMESTAMP", oracledb.DB_TYPE_TIMESTAMP, 23, None, 0, 6, True), - ("D_TIMESTAMP_P", oracledb.DB_TYPE_TIMESTAMP, 23, None, 0, 9, True), - ] - response: TableResponse = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [ - MYSQL_DATA_TYPE.DATE, - MYSQL_DATA_TYPE.TIMESTAMP, - MYSQL_DATA_TYPE.TIMESTAMP, - ] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[response.data_frame.columns[i]][0] - self.assertEqual(result_value, input_value) - # endregion - - # region test nullable types - bigint_val = 9223372036854775807 - input_rows = [(bigint_val, True), (None, None)] - mock_cursor.fetchall.return_value = input_rows - mock_cursor.description = [ - ( - "N_BIGINT", - oracledb.DB_TYPE_NUMBER, - 39, - None, - 17, - 0, - True, - ), # set 17 just to force cast to Int64 - ("T_BOOLEAN", oracledb.DB_TYPE_BOOLEAN, None, None, None, None, True), - ] - response: TableResponse = self.handler.native_query(query_str, stream=False) - self.assertEqual(response.data_frame.dtypes[0], "Int64") - self.assertEqual(response.data_frame.dtypes[1], "boolean") - self.assertEqual(response.data_frame.iloc[0, 0], bigint_val) - self.assertEqual(response.data_frame.iloc[0, 1], True) - self.assertTrue(response.data_frame.iloc[1, 0] is pd.NA) - self.assertTrue(response.data_frame.iloc[1, 1] is pd.NA) - # endregion - - # region test vector and json type - """Data obtained using: - CREATE TABLE test_vector_type ( - t_embedding VECTOR(3, FLOAT32), - t_json json - ) TABLESPACE USERS; - - INSERT INTO test_vector_type VALUES ( - TO_VECTOR('[1.1, 2.2, 3.3]', 3, FLOAT32), - JSON_OBJECT( - 'category' VALUE 'electronics', - 'price' VALUE 299.99 - ) - ); - """ - input_row = ( - array("f", [1.1, 2.2, 3.3]), - {"category": "electronics", "price": Decimal("299.99")}, - ) - mock_cursor.fetchall.return_value = [input_row] - mock_cursor.description = [ - ("T_EMBEDDING", oracledb.DB_TYPE_VECTOR, None, None, None, None, True), - ("T_JSON", oracledb.DB_TYPE_JSON, None, None, None, None, True), - ] - response: TableResponse = self.handler.native_query(query_str, stream=False) - excepted_mysql_types = [MYSQL_DATA_TYPE.VECTOR, MYSQL_DATA_TYPE.JSON] - self.assertEqual([col.type for col in response.columns], excepted_mysql_types) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[response.data_frame.columns[i]][0] - self.assertEqual(result_value, input_value) - # endregion - - def test_insert(self): - """ - Tests the insert method to ensure it correctly constructs and executes an INSERT statement - using insertmany for batch inserts. - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - mock_cursor.rowcount = 3 - - df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) - - response = self.handler.insert("test_table", df) - expected_sql = "INSERT INTO test_table (id, name) VALUES (:1, :2)" - expected_values = df.values.tolist() - mock_cursor.executemany.assert_called_once_with(expected_sql, expected_values) - mock_conn.commit.assert_called_once() - - self.assertEqual(response.affected_rows, 3) - self.assertEqual(response.type, RESPONSE_TYPE.OK) - - def test_insert_error(self): - """ - Tests the insert method to ensure it correctly handles errors - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - error_msg = "ORA-00942: table or view does not exist" - mock_cursor.executemany.side_effect = DatabaseError(error_msg) - - df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) - - with self.assertRaises(DatabaseError): - self.handler.insert("nonexistent_table", df) - - mock_conn.rollback.assert_called_once() - - # Metadata Handler Tests - def test_meta_get_tables(self, table_names=None): - expected_df = DataFrame( - [ - ("TABLE1", "SAMPLEUSER", "TABLE", "desc1", 5), - ("TABLE2", "SAMPLEUSER", "VIEW", "desc2", 0), - ], - columns=[ - "table_name", - "table_schema", - "table_type", - "table_description", - "row_count", - ], - ) - mock_response = TableResponse(data=expected_df) - self.handler.native_query = MagicMock(return_value=mock_response) - - response = self.handler.meta_get_tables(table_names=table_names) - self.handler.native_query.assert_called_once() - - assert response is mock_response - df = response.data_frame - assert list(df["table_name"]) == ["TABLE1", "TABLE2"] - - del self.handler.native_query - - def test_meta_get_columns(self, table_names=None): - """ - Test the retrieval of column metadata. - """ - expected_df = DataFrame( - [ - ("TABLE1", "COL1", "VARCHAR2", "desc1", None, 1), - ("TABLE1", "COL2", "NUMBER", "desc2", "0", 0), - ], - columns=[ - "table_name", - "column_name", - "data_type", - "column_description", - "column_default", - "is_nullable", - ], - ) - - mock_response = TableResponse(data=expected_df) - self.handler.native_query = MagicMock(return_value=mock_response) - - table_name = "TABLE1" - response = self.handler.meta_get_columns(table_name) - self.handler.native_query.assert_called_once() - - assert response is mock_response - df = response.data_frame - assert list(df["column_name"]) == ["COL1", "COL2"] - assert list(df["is_nullable"]) == [1, 0] - - del self.handler.native_query - - def test_meta_get_column_statistics(self, table_names=None): - """ - Test the retrieval of column statistics. - """ - expected_df = DataFrame( - [ - ("STATS_TABLE", "ID", 0.0, 1500, None, None, "1,1500"), - ("STATS_TABLE", "CATEGORY", 5.5, 12, None, None, "A,Z"), - ], - columns=[ - "TABLE_NAME", - "COLUMN_NAME", - "NULL_PERCENTAGE", - "DISTINCT_VALUES_COUNT", - "MOST_COMMON_VALUES", - "MOST_COMMON_FREQUENCIES", - "HISTOGRAM_BOUNDS", - ], - ) - - mock_response = TableResponse(data=expected_df) - self.handler.native_query = MagicMock(return_value=mock_response) - table_names = ["STATS_TABLE"] - response = self.handler.meta_get_column_statistics(table_names=table_names) - self.handler.native_query.assert_called_once() - final_df = response.data_frame - - assert list(final_df.columns) == [ - "TABLE_NAME", - "COLUMN_NAME", - "NULL_PERCENTAGE", - "DISTINCT_VALUES_COUNT", - "MOST_COMMON_VALUES", - "MOST_COMMON_FREQUENCIES", - "MINIMUM_VALUE", - "MAXIMUM_VALUE", - ] - - assert list(final_df["COLUMN_NAME"]) == ["ID", "CATEGORY"] - assert list(final_df["MINIMUM_VALUE"]) == ["1", "A"] - assert list(final_df["MAXIMUM_VALUE"]) == ["1500", "Z"] - - del self.handler.native_query - - def test_meta_get_primary_keys(self): - """ - Test the retrieval of primary key metadata. - """ - expected_df = DataFrame( - [ - ("USERS", "USER_ID", 1, "PK_USERS"), - ("ORDERS", "ORDER_ID", 3, "PK_ORDERS"), - ], - columns=[ - "table_name", - "column_name", - "ordinal_position", - "constraint_name", - ], - ) - - mock_response = TableResponse(data=expected_df) - self.handler.native_query = MagicMock(return_value=mock_response) - - table_names = ["USERS", "ORDERS"] - response = self.handler.meta_get_primary_keys(table_names=table_names) - self.handler.native_query.assert_called_once() - - assert response is mock_response - df = response.data_frame - assert list(df["table_name"]) == ["USERS", "ORDERS"] - assert list(df["column_name"]) == ["USER_ID", "ORDER_ID"] - assert list(df["ordinal_position"]) == [1, 3] - assert list(df["constraint_name"]) == ["PK_USERS", "PK_ORDERS"] - - del self.handler.native_query - - def test_meta_get_foreign_keys(self, table_names=None): - """ - Test the retrieval of foreign key metadata. - """ - expected_df = DataFrame( - [ - ( - "ORDERS", - "USER_ID", - "USERS", - "USER_ID", - 1, - "FK_ORDERS_USERS", - ), - ( - "ORDER_ITEMS", - "ORDER_ID", - "ORDERS", - "ORDER_ID", - 1, - "FK_ORDERITEMS_ORDERS", - ), - ], - columns=[ - "table_name", - "column_name", - "referenced_table_name", - "referenced_column_name", - "ordinal_position", - "constraint_name", - ], - ) - - mock_response = TableResponse(data=expected_df) - self.handler.native_query = MagicMock(return_value=mock_response) - - table_names = ["ORDERS", "ORDER_ITEMS"] - response = self.handler.meta_get_foreign_keys(table_names=table_names) - self.handler.native_query.assert_called_once() - - assert response is mock_response - df = response.data_frame - assert list(df["table_name"]) == ["ORDERS", "ORDER_ITEMS"] - assert list(df["column_name"]) == ["USER_ID", "ORDER_ID"] - assert list(df["referenced_table_name"]) == ["USERS", "ORDERS"] - assert list(df["referenced_column_name"]) == ["USER_ID", "ORDER_ID"] - assert list(df["ordinal_position"]) == [1, 1] - assert list(df["constraint_name"]) == [ - "FK_ORDERS_USERS", - "FK_ORDERITEMS_ORDERS", - ] - - del self.handler.native_query - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_postgres.py b/tests/unit/handlers/test_postgres.py deleted file mode 100644 index a0e3adc1335..00000000000 --- a/tests/unit/handlers/test_postgres.py +++ /dev/null @@ -1,1109 +0,0 @@ -import unittest -import json -import datetime -from uuid import UUID -from decimal import Decimal -from zoneinfo import ZoneInfo -from collections import OrderedDict -from unittest.mock import patch, MagicMock - -import psycopg -from psycopg.pq import ExecStatus -from psycopg.postgres import types as pg_types -import numpy as np -import pandas as pd -from pandas import DataFrame -from pandas.api import types as pd_types - -from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager -from mindsdb.integrations.handlers.postgres_handler.postgres_handler import PostgresHandler, _map_type -from mindsdb.integrations.libs.response import ( - RESPONSE_TYPE, - TableResponse, - OkResponse, - ErrorResponse, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -class ColumnDescription: - def __init__(self, **kwargs): - self.name = kwargs.get("name") - self.type_code = kwargs.get("type_code") - self.type_display = kwargs.get("type_display") - - -# map between regtype name and type id -regtype_to_oid = {t.regtype: t.oid for t in pg_types} -type_name_to_oid = {t.name: t.oid for t in pg_types} -type_name_to_array_oid = {t.name: t.array_oid for t in pg_types} - - -class TestPostgresHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - host="127.0.0.1", - port=5432, - user="example_user", - schema="public", - password="example_pass", - database="example_db", - sslmode="prefer", - ) - - @property - def err_to_raise_on_connect_failure(self): - return psycopg.Error("Connection Failed") - - @property - def get_tables_query(self): - return """ - SELECT - table_schema, - table_name, - table_type - FROM - information_schema.tables - WHERE - table_schema NOT IN ('information_schema', 'pg_catalog') - and table_type in ('BASE TABLE', 'VIEW') - and table_schema = current_schema() - """ - - @property - def get_columns_query(self): - return f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM - information_schema.columns - WHERE - table_name = '{self.mock_table}' - AND - table_schema = current_schema() - """ - - def create_handler(self): - return PostgresHandler("psql", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("psycopg.connect") - - def test_native_query_command_ok_stream(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query doesn't return a result set (ExecStatus.COMMAND_OK) - """ - mock_conn = MagicMock() - mock_cursor_server = MockCursorContextManager() - mock_cursor_client = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(side_effect=[mock_cursor_server, mock_cursor_client]) - - syntax_error = psycopg.errors.SyntaxError('syntax error at or near "insert"') - mock_cursor_server.execute.side_effect = syntax_error - mock_cursor_client.execute.return_value = None - - # Setup pgresult - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.COMMAND_OK - mock_cursor_client.pgresult = mock_pgresult - mock_cursor_client.rowcount = 1 - - query_str = "INSERT INTO table VALUES (1, 2, 3)" - data = self.handler.native_query(query_str, stream=True) - mock_cursor_server.execute.assert_called_once_with(query_str) - mock_cursor_client.execute.assert_called_once_with(query_str) - assert isinstance(data, OkResponse) - self.assertEqual(data.affected_rows, 1) - - def test_native_query_command_ok_no_stream(self): - """ - Tests the `native_query` at client side execution - """ - mock_conn = MagicMock() - # mock_cursor_server = MockCursorContextManager() - mock_cursor_client = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(side_effect=[mock_cursor_client]) - - # syntax_error = psycopg.errors.SyntaxError('syntax error at or near "insert"') - # mock_cursor_server.execute.side_effect = syntax_error - mock_cursor_client.execute.return_value = None - - # Setup pgresult - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.COMMAND_OK - mock_cursor_client.pgresult = mock_pgresult - mock_cursor_client.rowcount = 1 - - query_str = "INSERT INTO table VALUES (1, 2, 3)" - data = self.handler.native_query(query_str, stream=False) - # mock_cursor_server.execute.assert_called_once_with(query_str) - mock_cursor_client.execute.assert_called_once_with(query_str) - assert isinstance(data, OkResponse) - self.assertEqual(data.affected_rows, 1) - - def test_native_query_with_results_client_side(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query returns a result set - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_cursor.fetchall = MagicMock(side_effect=[[[1, "name1"], [2, "name2"]], []]) - - # Create proper description objects with necessary type_code for _cast_dtypes - mock_cursor.description = [ - ColumnDescription(name="id", type_code=regtype_to_oid["integer"]), # int4 type code - ColumnDescription(name="name", type_code=regtype_to_oid["text"]), # text type code - ] - - # Make sure pgresult doesn't have COMMAND_OK status - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.TUPLES_OK - mock_cursor.pgresult = mock_pgresult - - query_str = "SELECT * FROM table" - data = self.handler.native_query(query_str, stream=False) - mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, TableResponse) - assert getattr(data, "error_code", None) is None - self.assertEqual(data.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(data.data_frame, DataFrame) - self.assertEqual(list(data.data_frame.columns), ["id", "name"]) - - def test_native_query_with_results_stream(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query returns a result set at server side execution - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - # Server-side execution uses fetchmany, not fetchall - mock_cursor.fetchmany = MagicMock(side_effect=[[[1, "name1"], [2, "name2"]], []]) - - mock_cursor.description = [ - ColumnDescription(name="id", type_code=regtype_to_oid["integer"]), # int4 type code - ColumnDescription(name="name", type_code=regtype_to_oid["text"]), # text type code - ] - - query_str = "SELECT * FROM table" - data = self.handler.native_query(query_str, stream=True) - mock_cursor.execute.assert_called_once_with(query_str) - - # Verify the response - assert isinstance(data, TableResponse) - assert getattr(data, "error_code", None) is None - self.assertEqual(data.type, RESPONSE_TYPE.TABLE) - self.assertIsNone(data._data) - data.fetchall() - self.assertIsInstance(data._data, DataFrame) - self.assertEqual(list(data.data_frame.columns), ["id", "name"]) - - # Verify DataFrame contains all expected rows - self.assertEqual(len(data.data_frame), 2) - self.assertEqual(data.data_frame["id"].tolist(), [1, 2]) - self.assertEqual(data.data_frame["name"].tolist(), ["name1", "name2"]) - - def test_native_query_with_params(self): - """ - Tests the `native_query` method with parameters to ensure executemany is called correctly - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.COMMAND_OK - mock_cursor.pgresult = mock_pgresult - - query_str = "INSERT INTO table VALUES (%s, %s)" - params = [(1, "a"), (2, "b")] - data = self.handler.native_query(query_str, params=params) - mock_cursor.executemany.assert_called_once_with(query_str, params) - assert isinstance(data, OkResponse) - - def test_native_query_error(self): - """ - Tests the `native_query` method to ensure it properly handles and returns database errors - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - error_msg = "Syntax error in SQL statement" - error = psycopg.Error(error_msg) - # Using side_effect to simulate an exception when execute is called - mock_cursor.execute.side_effect = error - - query_str = "INVALID SQL" - data = self.handler.native_query(query_str) - - mock_cursor.execute.assert_called_once_with(query_str) - - assert isinstance(data, ErrorResponse) - - # The handler implementation sets error_code to 0, check error_message instead - self.assertEqual(data.error_code, 0) - self.assertEqual(data.error_message, str(error)) - - # Ensure rollback was called - mock_conn.rollback.assert_called_once() - - def test_connect_with_schema_sets_search_path_after_connection(self): - """ - Tests that when schema is provided, search_path is set via SET command - after connection (pooler-compatible) rather than via startup options. - """ - self.tearDown() - self.setUp() - self.handler.connection_args["schema"] = "my_schema" - - mock_cursor = MockCursorContextManager() - self.mock_connect.return_value.cursor.return_value = mock_cursor - - connection = self.handler.connect() - - self.assertTrue(self.handler.is_connected) - self.assertIsNotNone(connection) - - mock_cursor.execute.assert_called_once_with('SET search_path TO "my_schema", public;') - self.mock_connect.return_value.commit.assert_called_once() - - def test_make_connection_args_applies_overrides(self): - handler = self.handler - handler.connection_args = OrderedDict( - host="db", - port=6543, - user="u", - password="p", - database="d", - connection_parameters={"application_name": "mdb"}, - autocommit=True, - schema="custom", - ) - config = handler._make_connection_args() - self.assertEqual(config["application_name"], "mdb") - self.assertEqual(config["connect_timeout"], 10) - self.assertTrue(config["autocommit"]) - - def test_map_type_handles_known_and_unknown(self): - self.assertEqual(_map_type("INTEGER"), MYSQL_DATA_TYPE.INT) - self.assertEqual(_map_type("json"), MYSQL_DATA_TYPE.JSON) - self.assertEqual(_map_type(None), MYSQL_DATA_TYPE.VARCHAR) - self.assertEqual(_map_type("not_real_type"), MYSQL_DATA_TYPE.VARCHAR) - - def test_query_method_uses_renderer_params(self): - self.handler.renderer.get_exec_params = MagicMock(return_value=("SELECT 1", ["foo"])) - self.handler.native_query = MagicMock(return_value="ok") - query_node = MagicMock() - - result = self.handler.query(query_node) - - self.assertEqual(result, "ok") - self.handler.renderer.get_exec_params.assert_called_once_with(query_node, with_failback=True) - self.handler.native_query.assert_called_once_with("SELECT 1", ["foo"], stream=False) - - def test_insert_respects_existing_column_case(self): - if getattr(self.handler, "name", None) != "postgres": - self.skipTest("Only applicable to Postgres COPY-based insert.") - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - copy_cm = MagicMock() - copy_cm.__enter__.return_value = MagicMock() - mock_cursor.copy.return_value = copy_cm - mock_cursor.rowcount = 2 - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - self.handler.disconnect = MagicMock() - self.handler.get_columns = MagicMock( - return_value=TableResponse( - data=pd.DataFrame({"COLUMN_NAME": ["Id", "Amount"]}), - ) - ) - - df = pd.DataFrame({"id": [1], "amount": [10]}) - with patch.object(pd.DataFrame, "to_csv", autospec=True) as mock_to_csv: - resp = self.handler.insert("sales", df) - - self.assertEqual(resp.affected_rows, mock_cursor.rowcount) - mock_to_csv.assert_called_once() - mock_conn.commit.assert_called_once() - mock_conn.rollback.assert_not_called() - self.handler.disconnect.assert_called_once() - executed_copy = mock_cursor.copy.call_args[0][0] - self.assertIn('"Id"', executed_copy) - self.assertIn('"Amount"', executed_copy) - - def test_meta_get_column_statistics_returns_non_table_response(self): - error_response = ErrorResponse(error_message="boom") - self.handler.native_query = MagicMock(return_value=error_response) - - result = self.handler.meta_get_column_statistics() - - self.assertIs(result, error_response) - self.handler.native_query.assert_called_once() - - def test_cast_dtypes(self): - """ - Tests the _cast_dtypes method to ensure it correctly converts PostgreSQL types to pandas types - """ - df = pd.DataFrame( - { - "int2_col": ["1", "2"], - "int4_col": ["10", "20"], - "int8_col": ["100", "200"], - "numeric_col": ["1.5", "2.5"], - "float4_col": ["1.1", "2.2"], - "float8_col": ["10.1", "20.2"], - "text_col": ["a", "b"], - } - ) - - original_get = psycopg.postgres.types.get - - try: - type_mocks = {} - for pg_type, oid in type_name_to_oid.items(): - type_mock = MagicMock() - type_mock.name = pg_type - type_mocks[oid] = type_mock - - # Mock the types.get function - # Make it return a default mock for any OID to avoid KeyError - def mock_get(oid): - if oid in type_mocks: - return type_mocks[oid] - else: - # Return a default mock with unknown type name - default_mock = MagicMock() - default_mock.name = "unknown" - return default_mock - - psycopg.postgres.types.get = mock_get - - description = [ - ColumnDescription(name="int2_col", type_code=type_name_to_oid["int2"]), - ColumnDescription(name="int4_col", type_code=type_name_to_oid["int4"]), - ColumnDescription(name="int8_col", type_code=type_name_to_oid["int8"]), - ColumnDescription(name="numeric_col", type_code=type_name_to_oid["numeric"]), - ColumnDescription(name="float4_col", type_code=type_name_to_oid["float4"]), - ColumnDescription(name="float8_col", type_code=type_name_to_oid["float8"]), - ColumnDescription(name="text_col", type_code=type_name_to_oid["text"]), - ] - - self.handler._cast_dtypes(df, description) - # Verify the types were correctly cast - self.assertEqual(df["int2_col"].dtype, "int16") - self.assertEqual(df["int4_col"].dtype, "int32") - self.assertEqual(df["int8_col"].dtype, "int64") - self.assertEqual(df["numeric_col"].dtype, "float64") - self.assertEqual(df["float4_col"].dtype, "float32") - self.assertEqual(df["float8_col"].dtype, "float64") - self.assertEqual(df["text_col"].dtype, "object") - - finally: - # Restore original function - psycopg.postgres.types.get = original_get - - def test_cast_dtypes_with_nulls(self): - """ - Tests the _cast_dtypes method with NULL values to ensure correct handling - """ - df = pd.DataFrame({"int2_col": ["1", None], "float4_col": ["1.1", None]}) - - # Create type code mapping - type_codes = { - "int2": 21, # Typical OID for int2 - "float4": 700, # Typical OID for float4 - } - - # Create mock psycopg.postgres.types.get function - original_get = psycopg.postgres.types.get - - try: - type_mocks = {} - for pg_type, oid in type_codes.items(): - type_mock = MagicMock() - type_mock.name = pg_type - type_mocks[oid] = type_mock - - # Make it return a default mock for any OID to avoid KeyError - def mock_get(oid): - if oid in type_mocks: - return type_mocks[oid] - else: - default_mock = MagicMock() - default_mock.name = "unknown" - return default_mock - - psycopg.postgres.types.get = mock_get - - # Set up description with our custom class - description = [ - ColumnDescription(name="int2_col", type_code=type_codes["int2"]), - ColumnDescription(name="float4_col", type_code=type_codes["float4"]), - ] - - self.handler._cast_dtypes(df, description) - - self.assertEqual(df["int2_col"].dtype, "int16") - self.assertEqual(df["float4_col"].dtype, "float32") - self.assertEqual(df["int2_col"].iloc[1], 0) - self.assertEqual(df["float4_col"].iloc[1], 0) - - finally: - psycopg.postgres.types.get = original_get - - def test_insert(self): - """ - Tests the insert method to ensure it correctly uses the COPY command - to insert a DataFrame into a PostgreSQL table - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.TUPLES_OK - mock_cursor.pgresult = mock_pgresult - mock_cursor.rowcount = 1 - - get_columns_result = [ - ["id", "int", 1, None, "YES", None, None, None, None, None, None, None], - ["name", "text", 2, None, "YES", None, None, None, None, None, None, None], - ] - mock_cursor.fetchmany = MagicMock(side_effect=[get_columns_result, []]) - - information_schema_description = [ - ColumnDescription(name="COLUMN_NAME", type_code=regtype_to_oid["text"]), - ColumnDescription(name="DATA_TYPE", type_code=regtype_to_oid["text"]), - ColumnDescription(name="ORDINAL_POSITION", type_code=regtype_to_oid["integer"]), - ColumnDescription(name="COLUMN_DEFAULT", type_code=regtype_to_oid["text"]), - ColumnDescription(name="IS_NULLABLE", type_code=regtype_to_oid["text"]), - ColumnDescription(name="CHARACTER_MAXIMUM_LENGTH", type_code=regtype_to_oid["integer"]), - ColumnDescription(name="CHARACTER_OCTET_LENGTH", type_code=regtype_to_oid["integer"]), - ColumnDescription(name="NUMERIC_PRECISION", type_code=regtype_to_oid["integer"]), - ColumnDescription(name="NUMERIC_SCALE", type_code=regtype_to_oid["integer"]), - ColumnDescription(name="DATETIME_PRECISION", type_code=regtype_to_oid["integer"]), - ColumnDescription(name="CHARACTER_SET_NAME", type_code=regtype_to_oid["text"]), - ColumnDescription(name="COLLATION_NAME", type_code=regtype_to_oid["text"]), - ] - mock_cursor.description = information_schema_description - - # Create mock for copy operation - copy_obj = MagicMock() - mock_cursor.copy = MagicMock(return_value=copy_obj) - # Ensure copy.__enter__ returns the copy object to mimic context manager - copy_obj.__enter__ = MagicMock(return_value=copy_obj) - copy_obj.__exit__ = MagicMock(return_value=None) - - df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) - - self.handler.insert("test_table", df) - - # Verify copy was called with correct SQL - copy_sql = 'copy "test_table" ("id","name") from STDIN WITH CSV' - mock_cursor.copy.assert_called_once_with(copy_sql) - # commit for get_columns and insert - self.assertEqual(mock_conn.commit.call_count, 2) - - def test_insert_error(self): - """ - Tests the insert method to ensure it correctly handles errors - """ - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - error_msg = "Table doesn't exist" - error = psycopg.Error(error_msg) - # Before calling copy, get_columns is called - mock_cursor.execute = MagicMock(side_effect=error) - mock_cursor.copy = MagicMock(side_effect=error) - - df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) - - # Call the insert method and expect an exception - with self.assertRaisesRegex(ValueError, "Table doesn't exist"): - self.handler.insert("nonexistent_table", df) - - mock_conn.rollback.assert_called() - - def test_disconnect(self): - """ - Tests the disconnect method to ensure it correctly closes connections - """ - mock_conn = MagicMock() - - self.handler.connection = mock_conn - self.handler.is_connected = True - - self.handler.disconnect() - - mock_conn.close.assert_called_once() - self.assertFalse(self.handler.is_connected) - mock_conn.reset_mock() - self.handler.disconnect() - mock_conn.close.assert_not_called() - - def test_connection_parameters(self): - """ - Tests that connection parameters are correctly passed to psycopg.connect - """ - self.tearDown() - self.setUp() - self.handler.connection_args["connection_parameters"] = {"application_name": "mindsdb_test", "keepalives": 1} - - self.handler.connect() - - call_kwargs = self.mock_connect.call_args[1] - - self.assertEqual(call_kwargs["application_name"], "mindsdb_test") - self.assertEqual(call_kwargs["keepalives"], 1) - self.assertEqual(call_kwargs["connect_timeout"], 10) - self.assertEqual(call_kwargs["sslmode"], "prefer") - - # Test with a different schema - # Create a fresh handler with different schema - self.tearDown() - self.setUp() - self.handler.connection_args["schema"] = "custom_schema" - self.handler.connection_args["connection_parameters"] = {"application_name": "mindsdb_test"} - - self.handler.connect() - call_kwargs = self.mock_connect.call_args[1] - - def test_types_casting(self): - """Test that types are casted correctly""" - query_str = "SELECT * FROM test_table" - - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.TUPLES_OK - mock_cursor.pgresult = mock_pgresult - # mock_conn.is_connected = MagicMock(return_value=True) - - # region test TEXT/BLOB types and sub-types - """Test data obtained from: - - CREATE TABLE test_text_blob_types ( - id SERIAL PRIMARY KEY, - t_char CHAR(10), - t_varchar VARCHAR(100), - t_text TEXT, - t_bytea BYTEA, - t_json JSON, - t_jsonb JSONB, - t_xml XML, - t_uuid UUID - ); - - INSERT INTO test_text_blob_types ( - t_char, t_varchar, t_text, t_bytea, t_json, t_jsonb, t_xml, t_uuid - ) VALUES ( - 'Test', - 'Test', - 'Test', - E'\\x44656D6F2062696E61727920646174612E', - '{"name": "test"}', - '{"name": "test"}', - 'test123', - 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11' - ); - """ - input_row = ( - "Test ", - "Test", - "Test", - b"Demo binary data.", - {"name": "test"}, - {"name": "test"}, - "test123", - UUID("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11"), - ) - mock_cursor.fetchall.return_value = [input_row] - - description = [ - ColumnDescription(name="t_char", type_code=type_name_to_oid["bpchar"]), - ColumnDescription(name="t_varchar", type_code=type_name_to_oid["varchar"]), - ColumnDescription(name="t_text", type_code=type_name_to_oid["text"]), - ColumnDescription(name="t_bytea", type_code=type_name_to_oid["bytea"]), - ColumnDescription(name="t_json", type_code=type_name_to_oid["json"]), - ColumnDescription(name="t_jsonb", type_code=type_name_to_oid["jsonb"]), - ColumnDescription(name="t_xml", type_code=type_name_to_oid["xml"]), - ColumnDescription(name="t_uuid", type_code=type_name_to_oid["uuid"]), - ] - mock_cursor.description = description - excepted_mysql_types = [ - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.VARCHAR, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.JSON, - MYSQL_DATA_TYPE.JSON, - MYSQL_DATA_TYPE.VARCHAR, - MYSQL_DATA_TYPE.VARCHAR, - ] - response: TableResponse = self.handler.native_query(query_str, stream=False) - - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - - for i, input_value in enumerate(input_row): - result_value = response.data_frame[description[i].name][0] - self.assertEqual(type(result_value), type(input_value), f"type mismatch: {result_value} != {input_value}") - self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}") - # endregion - - # region test BOOLEAN type - input_rows = [(True,), (False,)] - mock_cursor.fetchall.return_value = input_rows - mock_cursor.description = [ColumnDescription(name="t_boolean", type_code=16)] - excepted_mysql_types = [MYSQL_DATA_TYPE.BOOL] - response: TableResponse = self.handler.native_query(query_str, stream=False) - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - self.assertTrue(pd_types.is_bool_dtype(response.data_frame["t_boolean"][0])) - self.assertTrue(bool(response.data_frame["t_boolean"][0]) is True) - self.assertTrue(bool(response.data_frame["t_boolean"][1]) is False) - # endregion - - # region test numeric types - """Test data obtained from: - - CREATE TABLE test_numeric_types ( - n_smallint SMALLINT, - n_integer INTEGER, - n_bigint BIGINT, - n_decimal DECIMAL(10,2), - n_numeric NUMERIC(10,4), - n_real REAL, - n_double_precision DOUBLE PRECISION, - n_smallserial SMALLSERIAL, - n_serial SERIAL, - n_bigserial BIGSERIAL, - n_money MONEY, - n_int2 INT2, -- alt for SMALLINT - n_int4 INT4, -- alt for INTEGER - n_int8 INT8, -- alt for BIGINT - n_float4 FLOAT4, -- alt for REAL - n_float8 FLOAT8 -- alt for DOUBLE PRECISION - ); - - INSERT INTO test_numeric_types ( - n_smallint, - n_integer, - n_bigint, - n_decimal, - n_numeric, - n_real, - n_double_precision, - n_money, - n_int2, - n_int4, - n_int8, - n_float4, - n_float8 - ) VALUES ( - 32767, -- n_smallint (max value) - 2147483647, -- n_integer (max value) - 9223372036854775807, -- n_bigint (max value) - 1234.56, -- n_decimal - 12345.6789, -- n_numeric - 3.14159, -- n_real - 2.7182818284590452, -- n_double_precision - '$10,500.25', -- n_money - -32768, -- n_int2 (min value) - 42, -- n_int4 - 123456789, -- n_int8 - 0.00123, -- n_float4 - 9.8765432109876 -- n_float8 - ); - """ - input_row = ( - 32767, # n_smallint (max value) - 2147483647, # n_integer (max value) - 9223372036854775807, # n_bigint (max value) - Decimal("1234.56"), # n_decimal - Decimal("12345.6789"), # n_numeric - 3.14159, # n_real - 2.718281828459045, # n_double_precision - 1, # n_smallserial - 1, # n_serial - 1, # n_bigserial - "$10,500.25", # n_money - -32768, # n_int2 - 42, # n_int4 - 123456789, # n_int8 - 0.00123, # n_float4 - 9.8765432109876, # n_float8 - ) - mock_cursor.fetchall.return_value = [input_row] - - description = [ - ColumnDescription(name="n_smallint", type_code=21), - ColumnDescription(name="n_integer", type_code=23), - ColumnDescription(name="n_bigint", type_code=20), - ColumnDescription(name="n_decimal", type_code=1700), - ColumnDescription(name="n_numeric", type_code=1700), - ColumnDescription(name="n_real", type_code=700), - ColumnDescription(name="n_double_precision", type_code=701), - ColumnDescription(name="n_smallserial", type_code=21), - ColumnDescription(name="n_serial", type_code=23), - ColumnDescription(name="n_bigserial", type_code=20), - ColumnDescription(name="n_money", type_code=790), - ColumnDescription(name="n_int2", type_code=21), - ColumnDescription(name="n_int4", type_code=23), - ColumnDescription(name="n_int8", type_code=20), - ColumnDescription(name="n_float4", type_code=700), - ColumnDescription(name="n_float8", type_code=701), - ] - mock_cursor.description = description - - excepted_mysql_types = [ - MYSQL_DATA_TYPE.SMALLINT, # n_smallint - MYSQL_DATA_TYPE.INT, # n_integer - MYSQL_DATA_TYPE.BIGINT, # n_bigint - MYSQL_DATA_TYPE.DECIMAL, # n_decimal - MYSQL_DATA_TYPE.DECIMAL, # n_numeric - MYSQL_DATA_TYPE.FLOAT, # n_real - MYSQL_DATA_TYPE.DOUBLE, # n_double_precision - MYSQL_DATA_TYPE.SMALLINT, # n_smallserial - MYSQL_DATA_TYPE.INT, # n_serial - MYSQL_DATA_TYPE.BIGINT, # n_bigserial - MYSQL_DATA_TYPE.TEXT, # n_money - MYSQL_DATA_TYPE.SMALLINT, # n_int2 - MYSQL_DATA_TYPE.INT, # n_int4 - MYSQL_DATA_TYPE.BIGINT, # n_int8 - MYSQL_DATA_TYPE.FLOAT, # n_float4 - MYSQL_DATA_TYPE.DOUBLE, # n_float8 - ] - response: TableResponse = self.handler.native_query(query_str, stream=False) - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[description[i].name][0] - self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}") - # endregion - - # region test datetime types - """Test data obtained from: - - CREATE TABLE test_time_types ( - t_date DATE, - t_time TIME, - t_time_tz TIME WITH TIME ZONE, - t_timestamp TIMESTAMP, - t_timestamp_tz TIMESTAMP WITH TIME ZONE, - t_interval INTERVAL, - t_timestamptz TIMESTAMPTZ, - t_timetz TIMETZ - ); - - INSERT INTO test_time_types ( - t_date, - t_time, - t_time_tz, - t_timestamp, - t_timestamp_tz, - t_interval, - t_timestamptz, - t_timetz - ) VALUES ( - '2023-10-15', -- t_date - '14:30:45', -- t_time - '14:30:45+03:00', -- t_time_tz - '2023-10-15 14:30:45', -- t_timestamp - '2023-10-15 14:30:45+03:00', -- t_timestamp_tz - '2 years 3 months 15 days 12 hours 30 minutes 15 seconds', -- t_interval - '2023-10-15 14:30:45+03:00', -- t_timestamptz - '14:30:45+03:00' -- t_timetz - ); - """ - input_row = ( - datetime.date(2023, 10, 15), - datetime.time(14, 30, 45), - datetime.time(14, 30, 45, tzinfo=datetime.timezone(datetime.timedelta(seconds=10800))), - datetime.datetime(2023, 10, 15, 14, 30, 45), - datetime.datetime(2023, 10, 15, 11, 30, 45, tzinfo=ZoneInfo(key="Etc/UTC")), - datetime.timedelta(days=835, seconds=45015), - datetime.datetime(2023, 10, 15, 11, 30, 45, tzinfo=ZoneInfo(key="Etc/UTC")), - datetime.time(14, 30, 45, tzinfo=datetime.timezone(datetime.timedelta(seconds=10800))), - ) - mock_cursor.fetchall.return_value = [input_row] - - description = [ - ColumnDescription(name="t_date", type_code=1082), - ColumnDescription(name="t_time", type_code=1083), - ColumnDescription(name="t_time_tz", type_code=1266), - ColumnDescription(name="t_timestamp", type_code=1114), - ColumnDescription(name="t_timestamp_tz", type_code=1184), - ColumnDescription(name="t_interval", type_code=1186), - ColumnDescription(name="t_timestamptz", type_code=1184), - ColumnDescription(name="t_timetz", type_code=1266), - ] - mock_cursor.description = description - - excepted_mysql_types = [ - MYSQL_DATA_TYPE.DATE, # DATE - MYSQL_DATA_TYPE.TIME, # TIME - MYSQL_DATA_TYPE.TIME, # TIME WITH TIME ZONE - MYSQL_DATA_TYPE.DATETIME, # TIMESTAMP - MYSQL_DATA_TYPE.DATETIME, # TIMESTAMP WITH TIME ZONE - MYSQL_DATA_TYPE.VARCHAR, # INTERVAL - MYSQL_DATA_TYPE.DATETIME, # TIMESTAMPTZ - MYSQL_DATA_TYPE.TIME, # TIMETZ - ] - - response: TableResponse = self.handler.native_query(query_str, stream=False) - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[description[i].name][0] - self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}") - # endregion - - # region test casting of nullable types - bigint_val = 9223372036854775807 - input_rows = [(bigint_val, True), (None, None)] - mock_cursor.fetchall.return_value = input_rows - description = [ - ColumnDescription(name="n_bigint", type_code=20), - ColumnDescription(name="t_boolean", type_code=16), - ] - mock_cursor.description = description - response: TableResponse = self.handler.native_query(query_str, stream=False) - self.assertEqual(response.data_frame.dtypes[0], "Int64") - self.assertEqual(response.data_frame.dtypes[1], "boolean") - self.assertEqual(response.data_frame.iloc[0, 0], bigint_val) - self.assertEqual(response.data_frame.iloc[0, 1], True) - self.assertTrue(response.data_frame.iloc[1, 0] is pd.NA) - self.assertTrue(response.data_frame.iloc[1, 1] is pd.NA) - # endregion - - # region test arrays and vector - """Note: for vector type need to install pgvector extension - Test data obtained from: - - CREATE TABLE test_array_types ( - int_arr1 integer[], - int_arr2 integer[][], - text_arr1 text[], - embedding vector(3) - ); - - INSERT INTO test_array_types ( - int_arr1, - int_arr2, - text_arr1, - embedding - ) VALUES ( - '{1,null,3}', - '{{1,2,3},{4,null,6}}', - '{"test1", null, "test3"}', - '[1.1, 2.2, 3.3]' - ); - """ - input_row = ( - [1, None, 3], # int_arr1 - [[1, 2, 3], [4, None, 6]], # int_arr2 - ["test1", None, "test3"], # text_arr1 - np.array([1.1, 2.2, 3.3], dtype="float32"), - ) - mock_cursor.fetchall.return_value = [input_row] - - description = [ - ColumnDescription(name="int_arr1", type_code=type_name_to_array_oid["int4"]), - ColumnDescription(name="int_arr2", type_code=type_name_to_array_oid["int4"]), - ColumnDescription(name="text_arr1", type_code=type_name_to_array_oid["varchar"]), - ColumnDescription(name="embedding", type_code=16390, type_display="vector"), - ] - mock_cursor.description = description - - excepted_mysql_types = [ - MYSQL_DATA_TYPE.JSON, - MYSQL_DATA_TYPE.JSON, - MYSQL_DATA_TYPE.JSON, - MYSQL_DATA_TYPE.VECTOR, - ] - - response: TableResponse = self.handler.native_query(query_str, stream=False) - for column, mysql_type in zip(response.columns, excepted_mysql_types): - self.assertEqual(column.type, mysql_type) - for i, input_value in enumerate(input_row): - result_value = response.data_frame[description[i].name][0] - self.assertEqual(type(result_value), type(input_value), f"type mismatch: {result_value} != {input_value}") - if isinstance(result_value, list): - self.assertEqual(result_value, input_value, f"value mismatch: {result_value} != {input_value}") - else: - self.assertTrue(np.all(result_value == input_value)) - # endregion - - def test_get_tables_all_flag(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - self.handler.get_tables(all=True) - query = self.handler.native_query.call_args[0][0] - self.assertNotIn("current_schema()", query.split("table_schema")[-1]) - - def test_get_columns_with_schema_name(self): - df = pd.DataFrame( - { - "COLUMN_NAME": ["id"], - "DATA_TYPE": ["integer"], - "ORDINAL_POSITION": [1], - "COLUMN_DEFAULT": [None], - "IS_NULLABLE": ["YES"], - "CHARACTER_MAXIMUM_LENGTH": [None], - "CHARACTER_OCTET_LENGTH": [None], - "NUMERIC_PRECISION": [None], - "NUMERIC_SCALE": [None], - "DATETIME_PRECISION": [None], - "CHARACTER_SET_NAME": [None], - "COLLATION_NAME": [None], - } - ) - self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) - self.handler.get_columns("customers", schema_name="analytics") - query = self.handler.native_query.call_args[0][0] - self.assertIn("table_schema = 'analytics'", query) - - def test_meta_get_tables_filters_by_list(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - self.handler.meta_get_tables(table_names=["orders"]) - query = self.handler.native_query.call_args[0][0] - self.assertIn("IN ('orders')", query) - - def test_meta_get_columns_filters_by_list(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - self.handler.meta_get_columns(table_names=["orders"]) - query = self.handler.native_query.call_args[0][0] - self.assertIn("IN ('orders')", query) - - def test_meta_get_column_statistics_transforms_histogram(self): - df = pd.DataFrame( - { - "tablename": ["orders"], - "attname": ["amount"], - "null_frac": [0.1], - "n_distinct": [5], - "most_common_values": ["{A,B}"], - "most_common_frequencies": ["{0.5,0.5}"], - "histogram_bounds": ["{1,5,10}"], - } - ) - response = TableResponse(data=df) - self.handler.native_query = MagicMock(return_value=response) - - result = self.handler.meta_get_column_statistics(table_names=["orders"]) - - self.assertIn("MINIMUM_VALUE", result.data_frame.columns) - self.assertEqual(result.data_frame.loc[0, "MINIMUM_VALUE"], "1") - self.assertEqual(result.data_frame.loc[0, "MAXIMUM_VALUE"], "10") - self.assertEqual(result.data_frame.loc[0, "MOST_COMMON_VALUES"], ["A", "B"]) - - def test_meta_get_primary_keys_with_filter(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - self.handler.meta_get_primary_keys(table_names=["orders"]) - query = self.handler.native_query.call_args[0][0] - self.assertIn("AND tc.table_name IN ('orders')", query) - - def test_meta_get_foreign_keys_with_filter(self): - self.handler.native_query = MagicMock(return_value=TableResponse(data=pd.DataFrame())) - self.handler.meta_get_foreign_keys(table_names=["orders"]) - query = self.handler.native_query.call_args[0][0] - self.assertIn("AND tc.table_name IN ('orders')", query) - - def test_subscribe_creates_triggers_and_processes_events(self): - class FakeConn: - def __init__(self): - self.executed = [] - self.commits = 0 - self.closed = False - - def execute(self, sql): - self.executed.append(sql.strip()) - return self - - def fetchone(self): - return (1,) - - def add_notify_handler(self, handler): - event = MagicMock() - event.payload = json.dumps({"amount": 10}) - handler(event) - - def commit(self): - self.commits += 1 - - def close(self): - self.closed = True - - fake_conn = FakeConn() - self.mock_connect.return_value = fake_conn - - class ToggleEvent: - def __init__(self): - self.calls = 0 - - def is_set(self): - self.calls += 1 - return self.calls > 1 - - callback_rows = [] - - with patch("time.sleep", return_value=None): - self.handler.subscribe( - stop_event=ToggleEvent(), - callback=lambda row: callback_rows.append(row), - table_name="orders", - columns=["amount"], - ) - - self.assertTrue(callback_rows) - self.assertTrue(any("CREATE OR REPLACE TRIGGER" in sql for sql in fake_conn.executed)) - self.assertTrue(any("drop trigger" in sql.lower() for sql in fake_conn.executed)) - self.assertTrue(fake_conn.closed) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_redshift.py b/tests/unit/handlers/test_redshift.py deleted file mode 100644 index 1d40b93fb4d..00000000000 --- a/tests/unit/handlers/test_redshift.py +++ /dev/null @@ -1,172 +0,0 @@ -import unittest -from unittest.mock import MagicMock - -import numpy as np -import pandas as pd -import psycopg - -from mindsdb.integrations.libs.response import OkResponse, ErrorResponse, RESPONSE_TYPE -from mindsdb.integrations.handlers.redshift_handler.redshift_handler import RedshiftHandler -from test_postgres import TestPostgresHandler - - -class TestRedshiftHandler(TestPostgresHandler): - def create_handler(self): - return RedshiftHandler("redshift", connection_data=self.dummy_connection_data) - - def test_native_query(self): - """ - This test is overridden to avoid issues with the generic MockCursorContextManager not being compatible with Postgres/Redshift cursor behavior. - More specific tests (test_native_query_with_results, test_native_query_command_ok, test_native_query_error) cover this functionality. - """ - pass - - def test_insert(self): - """ - Tests the `insert` method to ensure it correctly inserts a DataFrame into a table and returns the appropriate response. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_cursor.executemany.return_value = None - - df = pd.DataFrame({"column1": [1, 2, 3, np.nan], "column2": ["a", "b", "c", None]}) - - table_name = "mock_table" - response = self.handler.insert(table_name, df) - - columns = ", ".join([f'"{col}"' if " " in col else col for col in df.columns]) - values = ", ".join(["%s" for _ in range(len(df.columns))]) - expected_query = f"INSERT INTO {table_name} ({columns}) VALUES ({values})" - - mock_cursor.executemany.assert_called_once_with(expected_query, df.replace({np.nan: None}).values.tolist()) - assert isinstance(response, OkResponse) - self.assertEqual(response.type, RESPONSE_TYPE.OK) - mock_conn.commit.assert_called_once() - - def test_insert_error(self): - """ - Tests the `insert` method to ensure it correctly handles an exception and returns the appropriate response. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - error_msg = "Table doesn't exist" - error = psycopg.Error(error_msg) - mock_cursor.executemany.side_effect = error - - df = pd.DataFrame({"column1": [1, 2, 3, np.nan], "column2": ["a", "b", "c", None]}) - - response = self.handler.insert("nonexistent_table", df) - - mock_cursor.executemany.assert_called_once() - mock_conn.rollback.assert_called_once() - - assert isinstance(response, ErrorResponse) - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertEqual(response.error_message, error_msg) - - def test_insert_with_empty_dataframe(self): - """ - Tests the `insert` method with an empty DataFrame to ensure it handles this edge case correctly. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - df = pd.DataFrame(columns=["column1", "column2"]) - - table_name = "mock_table" - response = self.handler.insert(table_name, df) - - columns = ", ".join([f'"{col}"' if " " in col else col for col in df.columns]) - values = ", ".join(["%s" for _ in range(len(df.columns))]) - expected_query = f"INSERT INTO {table_name} ({columns}) VALUES ({values})" - - mock_cursor.executemany.assert_called_once() - call_args, call_kwargs = mock_cursor.executemany.call_args - self.assertEqual(call_args[0], expected_query) - self.assertEqual(len(call_args[1]), 0) - - assert isinstance(response, OkResponse) - self.assertEqual(response.type, RESPONSE_TYPE.OK) - - mock_conn.commit.assert_called_once() - - def test_insert_with_special_column_names(self): - """ - Tests the `insert` method with column names that contain spaces and special characters - to verify proper quoting of column names. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - df = pd.DataFrame( - { - "normal_column": [1, 2], - "column with spaces": ["a", "b"], - "column-with-hyphens": [True, False], - "mixed@column#123": [3.14, 2.71], - } - ) - - table_name = "mock_table" - response = self.handler.insert(table_name, df) - - call_args = mock_cursor.executemany.call_args[0][0] - - for col in df.columns: - if " " in col: - self.assertIn(f'"{col}"', call_args) - else: - self.assertTrue(col in call_args or f'"{col}"' in call_args) - - assert isinstance(response, OkResponse) - self.assertEqual(response.type, RESPONSE_TYPE.OK) - - def test_insert_disconnect_when_needed(self): - """ - Tests that the `insert` method disconnects when it created the connection - but keeps the connection open if it was already connected. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.is_connected = False - self.handler.connect = MagicMock(return_value=mock_conn) - self.handler.disconnect = MagicMock() - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - df = pd.DataFrame({"column1": [1, 2, 3]}) - self.handler.insert("mock_table", df) - self.handler.disconnect.assert_called_once() - self.handler.connect.reset_mock() - self.handler.disconnect.reset_mock() - self.handler.is_connected = True - self.handler.insert("mock_table", df) - self.handler.disconnect.assert_not_called() - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_rest_api.py b/tests/unit/handlers/test_rest_api.py deleted file mode 100644 index ff10b02d67f..00000000000 --- a/tests/unit/handlers/test_rest_api.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Unit tests for the generic REST API passthrough handler.""" - -from unittest.mock import patch, MagicMock - -from mindsdb.integrations.handlers.rest_api_handler.rest_api_handler import RestApiHandler -from mindsdb.integrations.libs.passthrough import PassthroughProtocol -from mindsdb.integrations.libs.passthrough_types import PassthroughRequest, PassthroughResponse -from mindsdb.integrations.libs.response import ( - HandlerStatusResponse as StatusResponse, -) - - -VALID_DATA = { - "base_url": "https://api.example.com", - "bearer_token": "test-token-123", -} - - -def _make_handler(connection_data=None): - if connection_data is None: - connection_data = dict(VALID_DATA) - return RestApiHandler("test_rest", connection_data=connection_data) - - -class TestRestApiHandlerInit: - def test_satisfies_passthrough_protocol(self): - assert issubclass(RestApiHandler, PassthroughProtocol) - - def test_stores_connection_data(self): - data = {"base_url": "https://x.com", "bearer_token": "tok"} - handler = _make_handler(data) - assert handler.connection_data == data - - def test_default_test_request_path(self): - handler = _make_handler() - assert handler._test_request.method == "GET" - assert handler._test_request.path == "/" - - def test_custom_test_path(self): - handler = _make_handler( - { - "base_url": "https://api.example.com", - "bearer_token": "tok", - "test_path": "/health", - } - ) - assert handler._test_request.path == "/health" - - def test_custom_test_path_without_slash(self): - handler = _make_handler( - { - "base_url": "https://api.example.com", - "bearer_token": "tok", - "test_path": "status", - } - ) - assert handler._test_request.path == "/status" - - -class TestCheckConnection: - def test_success(self): - handler = _make_handler() - response = handler.check_connection() - assert isinstance(response, StatusResponse) - assert response.success is True - assert not response.error_message - - def test_missing_base_url(self): - handler = _make_handler({"bearer_token": "tok"}) - response = handler.check_connection() - assert response.success is False - assert "base_url" in response.error_message - - def test_missing_bearer_token(self): - handler = _make_handler({"base_url": "https://api.example.com"}) - response = handler.check_connection() - assert response.success is False - assert "bearer_token" in response.error_message - - def test_empty_connection_data(self): - handler = _make_handler({}) - response = handler.check_connection() - assert response.success is False - - -class TestPassthroughIntegration: - """Test that the mixin methods work correctly on RestApiHandler.""" - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_api_passthrough_injects_bearer(self, mock_request): - mock_resp = MagicMock() - mock_resp.status_code = 200 - mock_resp.headers = {"Content-Type": "application/json"} - mock_resp.iter_content.return_value = [b'{"ok": true}'] - mock_resp.close = MagicMock() - mock_request.return_value = mock_resp - - handler = _make_handler() - result = handler.api_passthrough(PassthroughRequest(method="GET", path="/v1/users")) - - assert isinstance(result, PassthroughResponse) - assert result.status_code == 200 - headers = mock_request.call_args.kwargs["headers"] - assert headers["Authorization"] == "Bearer test-token-123" - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_api_passthrough_uses_base_url(self, mock_request): - mock_resp = MagicMock() - mock_resp.status_code = 200 - mock_resp.headers = {} - mock_resp.iter_content.return_value = [b""] - mock_resp.close = MagicMock() - mock_request.return_value = mock_resp - - handler = _make_handler() - handler.api_passthrough(PassthroughRequest(method="GET", path="/foo")) - - called_url = mock_request.call_args.args[1] - assert called_url == "https://api.example.com/foo" - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_api_passthrough_includes_default_headers(self, mock_request): - mock_resp = MagicMock() - mock_resp.status_code = 200 - mock_resp.headers = {} - mock_resp.iter_content.return_value = [b""] - mock_resp.close = MagicMock() - mock_request.return_value = mock_resp - - handler = _make_handler( - { - "base_url": "https://api.example.com", - "bearer_token": "tok", - "default_headers": {"Accept": "application/json", "X-Team": "data"}, - } - ) - handler.api_passthrough(PassthroughRequest(method="GET", path="/")) - - headers = mock_request.call_args.kwargs["headers"] - assert headers["Accept"] == "application/json" - assert headers["X-Team"] == "data" - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_test_passthrough_success(self, mock_request): - mock_resp = MagicMock() - mock_resp.status_code = 200 - mock_resp.headers = {"Content-Type": "application/json"} - mock_resp.iter_content.return_value = [b'{"ok": true}'] - mock_resp.close = MagicMock() - mock_request.return_value = mock_resp - - handler = _make_handler() - result = handler.test_passthrough() - - assert isinstance(result, dict) - assert result["ok"] is True - assert result["status_code"] == 200 - - def test_test_passthrough_with_no_network(self): - """test_passthrough catches connection errors gracefully.""" - handler = _make_handler() - result = handler.test_passthrough() - assert isinstance(result, dict) - assert result["ok"] is False - assert result["error_code"] in ("network", "unknown") diff --git a/tests/unit/handlers/test_salesforce.py b/tests/unit/handlers/test_salesforce.py deleted file mode 100644 index 6df5580ba27..00000000000 --- a/tests/unit/handlers/test_salesforce.py +++ /dev/null @@ -1,743 +0,0 @@ -from collections import OrderedDict -import pytest -import unittest -from unittest.mock import patch, MagicMock - -try: - from salesforce_api.exceptions import AuthenticationError, RestRequestCouldNotBeUnderstoodError - from mindsdb.integrations.handlers.salesforce_handler.salesforce_handler import SalesforceHandler - from mindsdb.integrations.handlers.salesforce_handler.salesforce_tables import create_table_class - from mindsdb.integrations.handlers.salesforce_handler.constants import get_soql_instructions -except ImportError: - pytestmark = pytest.mark.skip("Salesforce handler not installed") - -from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Star -from base_handler_test import BaseHandlerTestSetup, BaseAPIResourceTestSetup -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - -from mindsdb.integrations.libs.response import ( - TableResponse, - HandlerStatusResponse as StatusResponse, - RESPONSE_TYPE, -) - - -class TestSalesforceHandler(BaseHandlerTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - username="demo@example.com", - password="demo_password", - client_id="3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - client_secret="5A52C1A1E21DF9012IODC9ISNXXAADDA9", - ) - - def create_handler(self): - return SalesforceHandler("salesforce", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("salesforce_api.Salesforce") - - def test_salesforce_init_success(self): - import mindsdb.integrations.handlers.salesforce_handler as m - - assert m.import_error is None - assert m.Handler is not None - # Check metadata - assert m.name == "salesforce" - assert m.title == "Salesforce" - assert m.type is not None - - def test_connect(self): - """ - Test if `connect` method successfully establishes a connection and sets `is_connected` flag to True. - Also, verifies that salesforce_api.Salesforce is instantiated exactly once. - """ - self.mock_connect.return_value = MagicMock() - connection = self.handler.connect() - - self.assertIsNotNone(connection) - self.assertTrue(self.handler.is_connected) - self.mock_connect.assert_called_once() - - def test_connect_missing_required_params_raises(self): - handler = SalesforceHandler("salesforce", connection_data={"username": "demo"}) - with self.assertRaises(ValueError): - handler.connect() - - def test_connect_reuses_existing_connection(self): - existing = MagicMock() - self.handler.connection = existing - self.handler.is_connected = True - - connection = self.handler.connect() - - self.assertIs(connection, existing) - self.mock_connect.assert_not_called() - - def test_connect_authentication_error_is_raised(self): - self.mock_connect.side_effect = AuthenticationError("invalid") - with self.assertRaises(AuthenticationError): - self.handler.connect() - - def test_connect_unknown_error_is_raised(self): - self.mock_connect.side_effect = RuntimeError("boom") - with self.assertRaises(RuntimeError): - self.handler.connect() - - def test_native_query_flattens_nested_resources(self): - connection = MagicMock() - connection.sobjects.query.return_value = [ - { - "attributes": {"type": "Opportunity"}, - "Id": "1", - "Name": "Opp", - "Account": { - "attributes": {"type": "Account"}, - "Name": "Acme", - "Industry": "Tech", - }, - } - ] - self.handler.connection = connection - self.handler.is_connected = True - self.handler.resource_names = ["Account"] - - response = self.handler.native_query("SELECT Id FROM Opportunity") - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - df = response.data_frame - self.assertIn("Account_Name", df.columns) - self.assertIn("Account_Industry", df.columns) - self.assertEqual(df.iloc[0]["Account_Name"], "Acme") - - def test_native_query_handles_rest_error(self): - connection = MagicMock() - rest_error = RestRequestCouldNotBeUnderstoodError("bad query") - connection.sobjects.query.side_effect = rest_error - self.handler.connection = connection - self.handler.is_connected = True - - response = self.handler.native_query("SELECT Id FROM Bad") - - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertEqual(response.error_message, str(rest_error)) - - def test_native_query_handles_generic_error(self): - connection = MagicMock() - connection.sobjects.query.side_effect = RuntimeError("boom") - self.handler.connection = connection - self.handler.is_connected = True - - response = self.handler.native_query("SELECT Id FROM Bad") - - self.assertEqual(response.type, RESPONSE_TYPE.ERROR) - self.assertIn("boom", response.error_message) - - def test_check_connection_success(self): - """ - Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a successful connection. - """ - response = self.handler.check_connection() - - self.assertTrue(response.success) - assert isinstance(response, StatusResponse) - self.assertFalse(response.error_message) - - def test_check_connection_failure(self): - """ - Test that the `check_connection` method returns a StatusResponse object and accurately reflects the connection status on a failed connection. - """ - self.mock_connect.side_effect = AuthenticationError("Invalid credentials") - response = self.handler.check_connection() - - self.assertFalse(response.success) - assert isinstance(response, StatusResponse) - self.assertTrue(response.error_message) - - def test_get_tables(self): - """ - Test that the `get_tables` method returns a TableResponse with a list of tables mapped from the Salesforce API. - """ - mock_tables = ["Account", "Contact"] - self.mock_connect.return_value = MagicMock( - sobjects=MagicMock( - describe=lambda: {"sobjects": [{"name": table, "queryable": True} for table in mock_tables]} - ) - ) - self.handler.connect() - response = self.handler.get_tables() - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), len(mock_tables)) - self.assertEqual(list(df["table_name"]), [name.lower() for name in mock_tables]) - - def test_get_columns(self): - """ - Test that the `get_columns` method returns a TableResponse with a list of columns for a given table. - """ - mock_columns = ["Id", "Name", "Email"] - mock_table = "Contact" - - # Create a mock for the Contact object that will be accessed via getattr - contact_mock = MagicMock() - contact_mock.describe.return_value = {"fields": [{"name": column} for column in mock_columns]} - - # Create the main sobjects mock - sobjects_mock = MagicMock() - sobjects_mock.describe.return_value = {"sobjects": [{"name": mock_table, "queryable": True}]} - - # Set the contact attribute (lowercase) on the sobjects mock since the handler uses resource_name.lower() - setattr(sobjects_mock, mock_table.lower(), contact_mock) - - # Create the client mock - client_mock = MagicMock() - client_mock.sobjects = sobjects_mock - - # Make sure the mock_connect always returns the same client mock - self.mock_connect.return_value = client_mock - - self.handler.connect() - response = self.handler.get_columns(mock_table) - - assert isinstance(response, TableResponse) - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - - df = response.data_frame - self.assertEqual(len(df), len(mock_columns)) - self.assertEqual(list(df["Field"]), mock_columns) - - def test_pre_filtering_with_include_tables(self): - """ - Test that pre-filtering works correctly when include_tables is specified. - """ - # Setup handler with include_tables - connection_data = OrderedDict( - username="demo@example.com", - password="demo_password", - client_id="3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - client_secret="5A52C1A1E21DF9012IODC9ISNXXAADDA9", - include_tables=["Account", "Contact"], - ) - handler = SalesforceHandler("salesforce", connection_data=connection_data) - - # Mock connection and individual table describe calls - mock_connection = MagicMock() - mock_connection.sobjects.Account = MagicMock() - mock_connection.sobjects.Account.describe.return_value = {"queryable": True} - mock_connection.sobjects.Contact = MagicMock() - mock_connection.sobjects.Contact.describe.return_value = {"queryable": True} - - # Set the connection attribute directly and mark as connected - handler.connection = mock_connection - handler.is_connected = True - - resource_names = handler._get_resource_names() - - # Should only return the specified tables - self.assertEqual(set(resource_names), {"Account", "Contact"}) - - # Should NOT call global describe() method - mock_connection.sobjects.describe.assert_not_called() - - # Should call individual table describe() methods - mock_connection.sobjects.Account.describe.assert_called_once() - mock_connection.sobjects.Contact.describe.assert_called_once() - - def test_pre_filtering_with_tables_parameter(self): - """ - Test that pre-filtering works correctly when 'tables' parameter is specified (alternative to include_tables). - """ - # Setup handler with 'tables' parameter - connection_data = OrderedDict( - username="demo@example.com", - password="demo_password", - client_id="3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - client_secret="5A52C1A1E21DF9012IODC9ISNXXAADDA9", - tables=["Lead", "Opportunity"], - ) - handler = SalesforceHandler("salesforce", connection_data=connection_data) - - # Mock connection and individual table describe calls - mock_connection = MagicMock() - mock_connection.sobjects.Lead = MagicMock() - mock_connection.sobjects.Lead.describe.return_value = {"queryable": True} - mock_connection.sobjects.Opportunity = MagicMock() - mock_connection.sobjects.Opportunity.describe.return_value = {"queryable": True} - - # Set the connection attribute directly and mark as connected - handler.connection = mock_connection - handler.is_connected = True - - resource_names = handler._get_resource_names() - - # Should only return the specified tables - self.assertEqual(set(resource_names), {"Lead", "Opportunity"}) - - # Should NOT call global describe() method - mock_connection.sobjects.describe.assert_not_called() - - # Should call individual table describe() methods - mock_connection.sobjects.Lead.describe.assert_called_once() - mock_connection.sobjects.Opportunity.describe.assert_called_once() - - def test_pre_filtering_with_exclude_tables(self): - """ - Test that exclude_tables parameter properly filters out specified tables. - """ - # Setup handler with include_tables and exclude_tables - connection_data = OrderedDict( - username="demo@example.com", - password="demo_password", - client_id="3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - client_secret="5A52C1A1E21DF9012IODC9ISNXXAADDA9", - include_tables=["Account", "Contact", "Lead"], - exclude_tables=["Lead"], - ) - handler = SalesforceHandler("salesforce", connection_data=connection_data) - - # Mock connection and individual table describe calls - mock_connection = MagicMock() - mock_connection.sobjects.Account = MagicMock() - mock_connection.sobjects.Account.describe.return_value = {"queryable": True} - mock_connection.sobjects.Contact = MagicMock() - mock_connection.sobjects.Contact.describe.return_value = {"queryable": True} - mock_connection.sobjects.Lead = MagicMock() - mock_connection.sobjects.Lead.describe.return_value = {"queryable": True} - - # Set the connection attribute directly and mark as connected - handler.connection = mock_connection - handler.is_connected = True - - resource_names = handler._get_resource_names() - - # Should only return Account and Contact (Lead should be excluded) - self.assertEqual(set(resource_names), {"Account", "Contact"}) - - # Should NOT call global describe() method - mock_connection.sobjects.describe.assert_not_called() - - # Should call describe() only for non-excluded tables - mock_connection.sobjects.Account.describe.assert_called_once() - mock_connection.sobjects.Contact.describe.assert_called_once() - mock_connection.sobjects.Lead.describe.assert_not_called() - - def test_fallback_to_full_discovery_when_no_include_tables(self): - """ - Test that fallback to full discovery works when no include_tables/tables specified. - """ - # Setup handler without include_tables or tables - connection_data = OrderedDict( - username="demo@example.com", - password="demo_password", - client_id="3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - client_secret="5A52C1A1E21DF9012IODC9ISNXXAADDA9", - ) - handler = SalesforceHandler("salesforce", connection_data=connection_data) - - # Mock connection with global describe response - mock_connection = MagicMock() - mock_connection.sobjects.describe.return_value = { - "sobjects": [ - {"name": "Account", "queryable": True}, - {"name": "Contact", "queryable": True}, - {"name": "AccountHistory", "queryable": True}, # Should be filtered out by hard-coded rules - {"name": "CustomObject__c", "queryable": True}, - ] - } - - # Set the connection attribute directly and mark as connected - handler.connection = mock_connection - handler.is_connected = True - - resource_names = handler._get_resource_names() - - # Should return filtered tables (excluding History tables per hard-coded rules) - self.assertIn("Account", resource_names) - self.assertIn("Contact", resource_names) - self.assertIn("CustomObject__c", resource_names) - self.assertNotIn("AccountHistory", resource_names) # Should be filtered out - - # Should call global describe() method for full discovery - mock_connection.sobjects.describe.assert_called_once() - - def test_error_handling_for_non_existent_tables(self): - """ - Test that non-existent tables are handled gracefully with warnings. - """ - # Setup handler with include_tables containing non-existent table - connection_data = OrderedDict( - username="demo@example.com", - password="demo_password", - client_id="3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - client_secret="5A52C1A1E21DF9012IODC9ISNXXAADDA9", - include_tables=["Account", "NonExistentTable", "Contact"], - ) - handler = SalesforceHandler("salesforce", connection_data=connection_data) - - # Mock connection where NonExistentTable raises an exception - mock_connection = MagicMock() - mock_connection.sobjects.Account = MagicMock() - mock_connection.sobjects.Account.describe.return_value = {"queryable": True} - mock_connection.sobjects.Contact = MagicMock() - mock_connection.sobjects.Contact.describe.return_value = {"queryable": True} - - # Mock NonExistentTable to raise an exception - mock_non_existent = MagicMock() - mock_non_existent.describe.side_effect = Exception("Table not found") - mock_connection.sobjects.NonExistentTable = mock_non_existent - - # Set the connection attribute directly and mark as connected - handler.connection = mock_connection - handler.is_connected = True - - resource_names = handler._get_resource_names() - - # Should only return existing tables (Account, Contact) and skip non-existent one - self.assertEqual(set(resource_names), {"Account", "Contact"}) - - # Should NOT call global describe() method - mock_connection.sobjects.describe.assert_not_called() - - # Should call describe() for all specified tables (including non-existent) - mock_connection.sobjects.Account.describe.assert_called_once() - mock_connection.sobjects.Contact.describe.assert_called_once() - mock_connection.sobjects.NonExistentTable.describe.assert_called_once() - - def test_validate_specified_tables_skips_non_queryable(self): - handler = self.create_handler() - handler.connection = MagicMock() - handler.is_connected = True - handler.connection.sobjects.Account = MagicMock() - handler.connection.sobjects.Account.describe.return_value = {"queryable": False} - - validated = handler._validate_specified_tables(["Account"], []) - - self.assertEqual(validated, []) - handler.connection.sobjects.Account.describe.assert_called_once() - - def test_meta_get_handler_info_returns_prompt(self): - info = self.handler.meta_get_handler_info() - self.assertIn("SOQL", info) - - def test_meta_get_tables_filters_requested_tables(self): - mock_connection = MagicMock() - mock_connection.sobjects.describe.return_value = { - "sobjects": [ - {"name": "Account", "queryable": True}, - {"name": "Contact", "queryable": True}, - ] - } - self.mock_connect.return_value = mock_connection - - with patch( - "mindsdb.integrations.handlers.salesforce_handler.salesforce_handler.MetaAPIHandler.meta_get_tables", - return_value=TableResponse(), - ) as mock_meta: - response = self.handler.meta_get_tables(table_names=["contact"]) - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - mock_meta.assert_called_once() - kwargs = mock_meta.call_args.kwargs - self.assertEqual(len(kwargs["main_metadata"]), 1) - self.assertEqual(kwargs["main_metadata"][0]["name"], "Contact") - - -class TestSalesforceAnyTable(BaseAPIResourceTestSetup, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - username="demo@example.com", - password="demo_password", - client_id="3MVG9lKcPoNINVBIPJjdw1J9LLM82HnZz9Yh7ZJnY", - client_secret="5A52C1A1E21DF9012IODC9ISNXXAADDA9", - ) - - def create_handler(self): - return SalesforceHandler("salesforce", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("salesforce_api.Salesforce") - - def create_resource(self): - return create_table_class(self.table_name)(self.handler) - - def setUp(self): - """ - Set up common test fixtures. - """ - self.table_name = "contact" - self.mock_columns = ["Id", "Name", "Email"] - self.mock_record = {column: f"{column}_value" for column in self.mock_columns} - - super().setUp() - - table_api = MagicMock() - table_api.describe.return_value = { - "fields": [ - { - "name": column, - "type": "string", - "nillable": True, - "defaultValue": "", - "inlineHelpText": "", - } - for column in self.mock_columns - ] - } - table_api.insert = MagicMock() - table_api.update = MagicMock() - table_api.delete = MagicMock() - - self._row_count = 5 - - def fake_query(query_str): - if query_str.startswith("SELECT COUNT"): - return [{"expr0": self._row_count}] - return [{"attributes": {"type": self.table_name}, **self.mock_record}] - - sobjects = MagicMock() - sobjects.query.side_effect = fake_query - setattr(sobjects, self.table_name, table_api) - - self.mock_client = MagicMock(sobjects=sobjects) - self.mock_connect.return_value = self.mock_client - - def test_select_all(self): - """ - Test that the `select` method returns the data from the Salesforce resource for a simple SELECT * query. - """ - select_query = Select(targets=[Star()], from_table=Identifier(parts=[self.table_name])) - df = self.resource.select(select_query) - - self.assertEqual(len(df), 1) - self.assertEqual(list(df.columns), self.mock_columns) - self.assertEqual(list(df.iloc[0]), list(self.mock_record.values())) - - def test_select_columns(self): - """ - Test that the `select` method returns the data from the Salesforce resource for a SELECT query with specific columns. - """ - select_query = Select( - targets=[Identifier(parts=[column]) for column in self.mock_columns], - from_table=Identifier(parts=[self.table_name]), - ) - df = self.resource.select(select_query) - - self.assertEqual(len(df), 1) - self.assertEqual(list(df.columns), self.mock_columns) - self.assertEqual(list(df.iloc[0]), list(self.mock_record.values())) - - def test_select_columns_with_alias(self): - """ - Test that the `select` method returns the data from the Salesforce resource for a SELECT query with specific columns and aliases. - """ - select_query = Select( - targets=[ - Identifier(parts=[column], alias=Identifier(parts=[f"{column}_alias"])) for column in self.mock_columns - ], - from_table=Identifier(parts=[self.table_name]), - ) - df = self.resource.select(select_query) - - self.assertEqual(len(df), 1) - self.assertEqual(list(df.columns), [f"{column}_alias" for column in self.mock_columns]) - self.assertEqual(list(df.iloc[0]), list(self.mock_record.values())) - - def test_select_columns_with_condition(self): - """ - Test that the `select` method returns the data from the Salesforce resource for a SELECT query with specific columns and a WHERE condition. - """ - select_query = Select( - targets=[Identifier(parts=[column]) for column in self.mock_columns], - from_table=Identifier(parts=[self.table_name]), - where=BinaryOperation(op="=", args=[Identifier("Id"), Constant("Id_value")]), - ) - df = self.resource.select(select_query) - - self.assertEqual(len(df), 1) - self.assertEqual(list(df.columns), self.mock_columns) - self.assertEqual(list(df.iloc[0]), list(self.mock_record.values())) - - def test_select_columns_with_condition_and_limit(self): - """ - Test that the `select` method returns the data from the Salesforce resource for a SELECT query with specific columns, a WHERE condition, and a LIMIT clause. - """ - select_query = Select( - targets=[Identifier(parts=[column]) for column in self.mock_columns], - from_table=Identifier(parts=[self.table_name]), - where=BinaryOperation(op="=", args=[Identifier("Id"), Constant("Id_value")]), - limit=Constant(1), - ) - df = self.resource.select(select_query) - - self.assertEqual(len(df), 1) - self.assertEqual(list(df.columns), self.mock_columns) - self.assertEqual(list(df.iloc[0]), list(self.mock_record.values())) - - def test_select_columns_with_conditions(self): - """ - Test that the `select` method returns the data from the Salesforce resource for a SELECT query with specific columns and multiple WHERE conditions. - """ - select_query = Select( - targets=[Identifier(parts=[column]) for column in self.mock_columns], - from_table=Identifier(parts=[self.table_name]), - where=BinaryOperation( - op="AND", - args=[ - BinaryOperation(op="=", args=[Identifier("Id"), Constant("Id_value")]), - BinaryOperation(op="=", args=[Identifier("Name"), Constant("Name_value")]), - ], - ), - ) - df = self.resource.select(select_query) - - self.assertEqual(len(df), 1) - self.assertEqual(list(df.columns), self.mock_columns) - self.assertEqual(list(df.iloc[0]), list(self.mock_record.values())) - - def test_constants_in_soql_instructions(self): - """ - Test that the SOQL instructions contain the expected constants. - """ - instructions = get_soql_instructions("DummyIntegration") - # Prompt may chage but these keywords must be present related to SOQL syntax - required_syntax = [ - "SOQL", - "SELECT", - "FROM", - "WHERE", - "LIMIT", - "INCLUDES", - "EXCLUDES", - "OPERATORS", - "FROM DummyIntegration(", # native query examples - ] - for term in required_syntax: - self.assertIn(term, instructions, f"Missing syntax in instructions: {term}") - - def test_add_uses_salesforce_insert(self): - payload = {"Name": "Abc"} - table_api = getattr(self.mock_client.sobjects, self.table_name) - self.resource.add(payload) - table_api.insert.assert_called_once_with(payload) - - def test_modify_updates_ids_from_conditions(self): - table_api = getattr(self.mock_client.sobjects, self.table_name) - self.resource.modify( - [FilterCondition(column="Id", op=FilterOperator.EQUAL, value="123")], - {"Name": "Updated"}, - ) - table_api.update.assert_called_once_with("123", {"Name": "Updated"}) - - def test_remove_deletes_ids_from_conditions(self): - table_api = getattr(self.mock_client.sobjects, self.table_name) - self.resource.remove([FilterCondition(column="Id", op=FilterOperator.IN, value=["1", "2"])]) - table_api.delete.assert_any_call("1") - table_api.delete.assert_any_call("2") - self.assertEqual(table_api.delete.call_count, 2) - - def test_validate_conditions_only_allows_id(self): - with self.assertRaises(ValueError): - self.resource._validate_conditions([FilterCondition(column="Name", op=FilterOperator.EQUAL, value="A")]) - - def test_validate_conditions_requires_supported_operator(self): - with self.assertRaises(ValueError): - self.resource._validate_conditions([FilterCondition(column="Id", op=FilterOperator.NOT_EQUAL, value="1")]) - - def test_meta_get_tables_returns_metadata_with_rowcount(self): - result = self.resource.meta_get_tables( - self.table_name, - [{"name": self.table_name, "fields": [{"name": "Id"}], "label": "Contacts"}], - ) - self.assertEqual(result["table_name"], self.table_name) - self.assertEqual(result["table_type"], "BASE TABLE") - self.assertEqual(result["row_count"], self._row_count) - - def test_meta_get_tables_handles_missing_resource(self): - result = self.resource.meta_get_tables(self.table_name, []) - self.assertIsNone(result["row_count"]) - self.assertEqual(result["table_name"], self.table_name) - - def test_meta_get_columns_builds_schema(self): - metadata = { - "fields": [ - { - "name": "Id", - "type": "string", - "nillable": False, - "defaultValue": "foo", - "inlineHelpText": "Identifier", - } - ] - } - self.resource._get_resource_metadata = MagicMock(return_value=metadata) - columns = self.resource.meta_get_columns(self.table_name) - self.assertEqual(columns[0]["column_name"], "Id") - self.assertEqual(columns[0]["data_type"], "string") - - -class TestSalesforcePassthrough(unittest.TestCase): - """Exercise the PassthroughMixin retrofit (per-instance base URL).""" - - CONNECTION_DATA = { - "username": "u", - "password": "p", - "client_id": "cid", - "client_secret": "csec", - "access_token": "sf_access_tok", - "instance_url": "https://my-org.my.salesforce.com", - } - - def _mock_response(self, status_code=200): - resp = MagicMock() - resp.status_code = status_code - resp.headers = {"Content-Type": "application/json"} - resp.iter_content = MagicMock(return_value=iter([b'{"sobjects":[]}'])) - resp.close = MagicMock() - return resp - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_passthrough_uses_bearer_and_instance_url(self, mock_request): - mock_request.return_value = self._mock_response() - handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA) - from mindsdb.integrations.libs.passthrough_types import PassthroughRequest - - resp = handler.api_passthrough(PassthroughRequest("GET", "/services/data/v60.0/")) - - self.assertEqual(resp.status_code, 200) - args, kwargs = mock_request.call_args - self.assertEqual(args[0], "GET") - self.assertEqual(args[1], "https://my-org.my.salesforce.com/services/data/v60.0/") - self.assertEqual(kwargs["headers"]["Authorization"], "Bearer sf_access_tok") - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_test_passthrough_returns_ok_on_200(self, mock_request): - mock_request.return_value = self._mock_response(status_code=200) - handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA) - - result = handler.test_passthrough() - - self.assertTrue(result["ok"]) - self.assertEqual(result["status_code"], 200) - self.assertEqual(result["host"], "my-org.my.salesforce.com") - self.assertIsInstance(result["latency_ms"], int) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_test_passthrough_returns_auth_failed_on_401(self, mock_request): - mock_request.return_value = self._mock_response(status_code=401) - handler = SalesforceHandler("salesforce", connection_data=self.CONNECTION_DATA) - - result = handler.test_passthrough() - - self.assertFalse(result["ok"]) - self.assertEqual(result["error_code"], "auth_failed") - self.assertEqual(result["status_code"], 401) - self.assertEqual(result["host"], "my-org.my.salesforce.com") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_shopify_handler.py b/tests/unit/handlers/test_shopify_handler.py deleted file mode 100644 index b1ecc6e8811..00000000000 --- a/tests/unit/handlers/test_shopify_handler.py +++ /dev/null @@ -1,883 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch -import sys -import json - -from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse -from mindsdb.integrations.libs.api_handler_exceptions import ( - MissingConnectionParams, - ConnectionFailed, -) - -# Mock shopify and requests modules before importing the handler -if "shopify" not in sys.modules: - sys.modules["shopify"] = MagicMock() - sys.modules["shopify.ShopifyResource"] = MagicMock() - sys.modules["shopify.Session"] = MagicMock() - sys.modules["shopify.Shop"] = MagicMock() - -if "requests" not in sys.modules: - sys.modules["requests"] = MagicMock() - -from mindsdb.integrations.handlers.shopify_handler.shopify_handler import ShopifyHandler -from mindsdb.integrations.handlers.shopify_handler.utils import query_graphql_nodes, MAX_PAGE_LIMIT -from mindsdb.integrations.handlers.shopify_handler.models.products import Products - - -class BaseShopifyHandlerTest(unittest.TestCase): - """Base test class with common setup and helper methods.""" - - # Test constants - TEST_SHOP_URL = "test-shop.myshopify.com" - TEST_CLIENT_ID = "test_client_id" - TEST_CLIENT_SECRET = "test_client_secret" - TEST_HANDLER_NAME = "test_shopify_handler" - - def setUp(self): - """Set up test fixtures.""" - self.connection_data = { - "shop_url": self.TEST_SHOP_URL, - "client_id": self.TEST_CLIENT_ID, - "client_secret": self.TEST_CLIENT_SECRET, - } - - def tearDown(self): - """Clean up after tests.""" - pass - - -class TestShopifyHandlerInitialization(BaseShopifyHandlerTest): - """Test suite for Shopify Handler initialization.""" - - def test_handler_initialization_success(self): - """Test successful handler initialization with all required parameters.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - self.assertEqual(handler.name, self.TEST_HANDLER_NAME) - self.assertEqual(handler.connection_data, self.connection_data) - self.assertFalse(handler.is_connected) - self.assertIsNone(handler.connection) - - def test_handler_initialization_without_connection_data(self): - """Test handler initialization fails when connection_data is missing.""" - with self.assertRaises(MissingConnectionParams) as context: - ShopifyHandler(self.TEST_HANDLER_NAME) - - self.assertIn("Incomplete parameters", str(context.exception)) - - def test_handler_initialization_missing_shop_url(self): - """Test handler initialization fails when shop_url is missing.""" - incomplete_data = { - "client_id": self.TEST_CLIENT_ID, - "client_secret": self.TEST_CLIENT_SECRET, - } - - with self.assertRaises(MissingConnectionParams) as context: - ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=incomplete_data) - - self.assertIn("shop_url", str(context.exception)) - - def test_handler_initialization_missing_client_id(self): - """Test handler initialization fails when client_id is missing.""" - incomplete_data = { - "shop_url": self.TEST_SHOP_URL, - "client_secret": self.TEST_CLIENT_SECRET, - } - - with self.assertRaises(MissingConnectionParams) as context: - ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=incomplete_data) - - self.assertIn("client_id", str(context.exception)) - - def test_handler_initialization_missing_client_secret(self): - """Test handler initialization fails when client_secret is missing.""" - incomplete_data = { - "shop_url": self.TEST_SHOP_URL, - "client_id": self.TEST_CLIENT_ID, - } - - with self.assertRaises(MissingConnectionParams) as context: - ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=incomplete_data) - - self.assertIn("client_secret", str(context.exception)) - - def test_handler_tables_registered(self): - """Test that all required tables are registered during initialization.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - expected_tables = [ - "products", - "customers", - "orders", - "product_variants", - "marketing_events", - "inventory_items", - "staff_members", - "gift_cards", - ] - - for table_name in expected_tables: - self.assertIn(table_name, handler._tables) - - -class TestShopifyHandlerConnection(BaseShopifyHandlerTest): - """Test suite for Shopify Handler connection management.""" - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.shopify") - def test_connect_success(self, mock_shopify, mock_requests): - """Test successful connection to Shopify API.""" - # Mock the OAuth response - mock_response = MagicMock() - mock_response.json.return_value = {"access_token": "test_access_token"} - mock_requests.post.return_value = mock_response - - # Mock the Session - mock_session = MagicMock() - mock_shopify.Session.return_value = mock_session - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - result = handler.connect() - - # Verify OAuth request was made - mock_requests.post.assert_called_once() - call_args = mock_requests.post.call_args - self.assertIn(self.TEST_SHOP_URL, call_args[0][0]) - self.assertEqual(call_args[1]["data"]["client_id"], self.TEST_CLIENT_ID) - self.assertEqual(call_args[1]["data"]["client_secret"], self.TEST_CLIENT_SECRET) - - # Verify session was created - mock_shopify.Session.assert_called_once_with(self.TEST_SHOP_URL, "2025-10", "test_access_token") - - self.assertTrue(handler.is_connected) - self.assertEqual(result, mock_session) - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.shopify") - def test_connect_when_already_connected(self, mock_shopify, mock_requests): - """Test that connect returns existing connection when already connected.""" - mock_session = MagicMock() - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - handler.connection = mock_session - handler.is_connected = True - - result = handler.connect() - - # Should not make new OAuth request - mock_requests.post.assert_not_called() - mock_shopify.Session.assert_not_called() - - self.assertEqual(result, mock_session) - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - def test_connect_oauth_failure(self, mock_requests): - """Test connection failure when OAuth request fails.""" - error_msg = "Invalid credentials" - mock_requests.post.side_effect = Exception(error_msg) - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - with self.assertRaises(Exception) as context: - handler.connect() - - self.assertIn(error_msg, str(context.exception)) - self.assertFalse(handler.is_connected) - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - def test_connect_missing_access_token(self, mock_requests): - """Test connection failure when access token is missing from response.""" - mock_response = MagicMock() - mock_response.json.return_value = {} # No access_token - mock_requests.post.return_value = mock_response - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - with self.assertRaises(ConnectionFailed) as context: - handler.connect() - - self.assertIn("Unable to get an access token", str(context.exception)) - self.assertFalse(handler.is_connected) - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.shopify") - def test_check_connection_success(self, mock_shopify, mock_requests): - """Test successful connection check.""" - # Mock OAuth response - mock_response = MagicMock() - mock_response.json.return_value = {"access_token": "test_access_token"} - mock_requests.post.return_value = mock_response - - # Mock session and shop - mock_session = MagicMock() - mock_shopify.Session.return_value = mock_session - mock_shopify.ShopifyResource.activate_session = MagicMock() - mock_shopify.Shop.current.return_value = MagicMock() - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - result = handler.check_connection() - - self.assertIsInstance(result, StatusResponse) - self.assertTrue(result.success) - self.assertTrue(handler.is_connected) - - # Verify session was activated and Shop.current was called - mock_shopify.ShopifyResource.activate_session.assert_called_once_with(mock_session) - mock_shopify.Shop.current.assert_called_once() - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.shopify") - def test_check_connection_failure(self, mock_shopify, mock_requests): - """Test connection check failure.""" - # Mock OAuth response - mock_response = MagicMock() - mock_response.json.return_value = {"access_token": "test_access_token"} - mock_requests.post.return_value = mock_response - - # Mock session - mock_session = MagicMock() - mock_shopify.Session.return_value = mock_session - mock_shopify.ShopifyResource.activate_session = MagicMock() - - # Make Shop.current fail - error_message = "Invalid shop" - mock_shopify.Shop.current.side_effect = Exception(error_message) - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - response = handler.check_connection() - self.assertFalse(response.success) - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - def test_check_connection_oauth_failure(self, mock_requests): - """Test connection check failure during OAuth.""" - mock_response = MagicMock() - mock_response.json.return_value = {} - mock_requests.post.return_value = mock_response - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - with self.assertRaises(ConnectionFailed): - handler.connect() - - -class TestShopifyHandlerConnectionArgs(BaseShopifyHandlerTest): - """Test suite for Shopify Handler connection arguments validation.""" - - def test_connection_args_structure(self): - """Test that connection_args has the correct structure.""" - from mindsdb.integrations.handlers.shopify_handler.connection_args import connection_args - - required_fields = ["type", "description", "required", "label"] - - # Check shop_url - self.assertIn("shop_url", connection_args) - for field in required_fields: - self.assertIn(field, connection_args["shop_url"]) - self.assertTrue(connection_args["shop_url"]["required"]) - - # Check client_id - self.assertIn("client_id", connection_args) - for field in required_fields: - self.assertIn(field, connection_args["client_id"]) - self.assertTrue(connection_args["client_id"]["required"]) - - # Check client_secret - self.assertIn("client_secret", connection_args) - for field in required_fields: - self.assertIn(field, connection_args["client_secret"]) - self.assertTrue(connection_args["client_secret"]["required"]) - self.assertTrue(connection_args["client_secret"].get("secret", False)) - - def test_connection_args_example_structure(self): - """Test that connection_args_example has the correct structure.""" - from mindsdb.integrations.handlers.shopify_handler.connection_args import ( - connection_args_example, - ) - - self.assertIn("shop_url", connection_args_example) - self.assertIn("client_id", connection_args_example) - self.assertIn("client_secret", connection_args_example) - - self.assertIsInstance(connection_args_example["shop_url"], str) - self.assertIsInstance(connection_args_example["client_id"], str) - self.assertIsInstance(connection_args_example["client_secret"], str) - - -class TestShopifyHandlerEdgeCases(BaseShopifyHandlerTest): - """Test suite for Shopify Handler edge cases.""" - - def test_handler_name_attribute(self): - """Test that handler has correct name attribute.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - self.assertEqual(handler.name, self.TEST_HANDLER_NAME) - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.shopify") - def test_multiple_connections(self, mock_shopify, mock_requests): - """Test multiple connection attempts.""" - # Mock OAuth response - mock_response = MagicMock() - mock_response.json.return_value = {"access_token": "test_access_token"} - mock_requests.post.return_value = mock_response - - # Mock session - mock_session = MagicMock() - mock_shopify.Session.return_value = mock_session - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - # First connection - result1 = handler.connect() - self.assertTrue(handler.is_connected) - - # Second connection (should return existing) - result2 = handler.connect() - self.assertEqual(result1, result2) - - # Should only call OAuth once - self.assertEqual(mock_requests.post.call_count, 1) - - def test_connection_data_preserved(self): - """Test that connection data is preserved after initialization.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - self.assertEqual(handler.connection_data["shop_url"], self.TEST_SHOP_URL) - self.assertEqual(handler.connection_data["client_id"], self.TEST_CLIENT_ID) - self.assertEqual(handler.connection_data["client_secret"], self.TEST_CLIENT_SECRET) - - def test_table_list_raises_for_unknown_target(self): - """Test that table list() fails fast when a requested target field is unknown.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - handler.connect = MagicMock(return_value=MagicMock()) - - products_table = handler._tables["products"] - - with self.assertRaises(ValueError) as context: - products_table.list(targets=["__not_a_real_column__"]) - - self.assertEqual( - str(context.exception), - "The specified fields were not found in the table schema: __not_a_real_column__", - ) - - def test_empty_connection_data(self): - """Test handler initialization with empty connection_data.""" - with self.assertRaises(MissingConnectionParams): - ShopifyHandler(self.TEST_HANDLER_NAME, connection_data={}) - - def test_partial_connection_data(self): - """Test handler initialization with partial connection_data.""" - partial_data = {"shop_url": self.TEST_SHOP_URL} - - with self.assertRaises(MissingConnectionParams) as context: - ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=partial_data) - - # Should mention the missing parameters - error_message = str(context.exception) - self.assertTrue("client_id" in error_message or "client_secret" in error_message) - - -class TestShopifyHandlerIntegration(BaseShopifyHandlerTest): - """Test suite for Shopify Handler integration scenarios.""" - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.shopify") - def test_connection_and_check(self, mock_shopify, mock_requests): - """Test connection followed by check_connection.""" - # Mock OAuth response - mock_response = MagicMock() - mock_response.json.return_value = {"access_token": "test_access_token"} - mock_requests.post.return_value = mock_response - - # Mock session - mock_session = MagicMock() - mock_shopify.Session.return_value = mock_session - mock_shopify.ShopifyResource.activate_session = MagicMock() - mock_shopify.Shop.current.return_value = MagicMock() - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - # First connect - handler.connect() - self.assertTrue(handler.is_connected) - - # Then check connection - result = handler.check_connection() - self.assertTrue(result.success) - - def test_handler_with_extra_kwargs(self): - """Test handler initialization with extra keyword arguments.""" - extra_kwargs = { - "connection_data": self.connection_data, - "extra_param": "extra_value", - } - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, **extra_kwargs) - self.assertEqual(handler.name, self.TEST_HANDLER_NAME) - self.assertTrue(handler.is_connected is False) - - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.requests") - @patch("mindsdb.integrations.handlers.shopify_handler.shopify_handler.shopify") - def test_native_query_products(self, mock_shopify, mock_requests): - """Test native query for fetching products.""" - # Mock OAuth response - mock_response = MagicMock() - mock_response.json.return_value = {"access_token": "test_access_token"} - mock_requests.post.return_value = mock_response - - # Mock session - mock_session = MagicMock() - mock_shopify.Session.return_value = mock_session - mock_shopify.ShopifyResource.activate_session = MagicMock() - - # Mock GraphQL response - graphql_result = { - "data": { - "products": { - "edges": [ - {"node": {"id": "gid://shopify/Product/1", "title": "Product 1"}}, - {"node": {"id": "gid://shopify/Product/2", "title": "Product 2"}}, - {"node": {"id": "gid://shopify/Product/3", "title": "Product 3"}}, - {"node": {"id": "gid://shopify/Product/4", "title": "Product 4"}}, - {"node": {"id": "gid://shopify/Product/5", "title": "Product 5"}}, - ] - } - } - } - - mock_graphql = MagicMock() - mock_graphql.execute.return_value = json.dumps(graphql_result) - mock_shopify.GraphQL.return_value = mock_graphql - - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - result = handler.native_query("{ products(first: 5) { edges { node { id title } } } }") - - # Verify result - from mindsdb.integrations.libs.response import RESPONSE_TYPE - - self.assertEqual(result.type, RESPONSE_TYPE.TABLE) - self.assertIsNotNone(result.data_frame) - - -class TestShopifyHandlerTableMetadata(BaseShopifyHandlerTest): - """Test suite for Shopify Handler table metadata methods.""" - - def test_products_table_meta_get_tables(self): - """Test meta_get_tables method for products table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - products_table = handler._tables["products"] - - # Mock the query_graphql method on the instance - products_table.query_graphql = MagicMock(return_value={"data": {"productsCount": {"count": 100}}}) - - result = products_table.meta_get_tables() - - self.assertIsInstance(result, dict) - self.assertEqual(result["table_name"], "products") - self.assertEqual(result["table_type"], "BASE TABLE") - self.assertIn("table_description", result) - self.assertEqual(result["row_count"], 100) - - def test_products_table_meta_get_primary_keys(self): - """Test meta_get_primary_keys method for products table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - products_table = handler._tables["products"] - - result = products_table.meta_get_primary_keys("products") - - self.assertIsInstance(result, list) - self.assertEqual(len(result), 1) - self.assertEqual(result[0]["table_name"], "products") - self.assertEqual(result[0]["column_name"], "id") - - def test_products_table_meta_get_foreign_keys(self): - """Test meta_get_foreign_keys method for products table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - products_table = handler._tables["products"] - - result = products_table.meta_get_foreign_keys("products", ["products", "orders"]) - - self.assertIsInstance(result, list) - # Products table should have no foreign keys - self.assertEqual(len(result), 0) - - def test_products_table_get_columns(self): - """Test get_columns method for products table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - products_table = handler._tables["products"] - - result = products_table.get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - # Check that some expected columns are present - self.assertIn("id", result) - self.assertIn("title", result) - # All items should be strings - for column_name in result: - self.assertIsInstance(column_name, str) - - def test_products_table_meta_get_columns(self): - """Test meta_get_columns method for products table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - products_table = handler._tables["products"] - - result = products_table.meta_get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - # Each column should be a dictionary with metadata - for column in result: - self.assertIsInstance(column, dict) - self.assertIn("COLUMN_NAME", column) - - def test_product_variants_table_meta_get_tables(self): - """Test meta_get_tables method for product_variants table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - variants_table = handler._tables["product_variants"] - - # Mock the query_graphql method on the instance - variants_table.query_graphql = MagicMock(return_value={"data": {"productVariantsCount": {"count": 250}}}) - - result = variants_table.meta_get_tables() - - self.assertIsInstance(result, dict) - self.assertEqual(result["table_name"], "product_variants") - self.assertEqual(result["table_type"], "BASE TABLE") - self.assertIn("table_description", result) - self.assertEqual(result["row_count"], 250) - - def test_product_variants_table_meta_get_primary_keys(self): - """Test meta_get_primary_keys method for product_variants table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - variants_table = handler._tables["product_variants"] - - result = variants_table.meta_get_primary_keys("product_variants") - - self.assertIsInstance(result, list) - self.assertEqual(len(result), 1) - self.assertEqual(result[0]["table_name"], "product_variants") - self.assertEqual(result[0]["column_name"], "id") - - def test_product_variants_table_meta_get_foreign_keys(self): - """Test meta_get_foreign_keys method for product_variants table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - variants_table = handler._tables["product_variants"] - - result = variants_table.meta_get_foreign_keys("product_variants", ["products", "product_variants"]) - - self.assertIsInstance(result, list) - # Product variants should have a foreign key to products - self.assertGreater(len(result), 0) - self.assertEqual(result[0]["PARENT_TABLE_NAME"], "product_variants") - self.assertEqual(result[0]["PARENT_COLUMN_NAME"], "productId") - self.assertEqual(result[0]["CHILD_TABLE_NAME"], "products") - self.assertEqual(result[0]["CHILD_COLUMN_NAME"], "id") - - def test_product_variants_table_get_columns(self): - """Test get_columns method for product_variants table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - variants_table = handler._tables["product_variants"] - - result = variants_table.get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - # Check that some expected columns are present - self.assertIn("id", result) - self.assertIn("productId", result) - - def test_product_variants_table_meta_get_columns(self): - """Test meta_get_columns method for product_variants table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - variants_table = handler._tables["product_variants"] - - result = variants_table.meta_get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - for column in result: - self.assertIsInstance(column, dict) - self.assertIn("COLUMN_NAME", column) - - def test_customers_table_meta_get_tables(self): - """Test meta_get_tables method for customers table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - customers_table = handler._tables["customers"] - - # Mock the query_graphql method on the instance - customers_table.query_graphql = MagicMock(return_value={"data": {"customersCount": {"count": 500}}}) - - result = customers_table.meta_get_tables() - - self.assertIsInstance(result, dict) - self.assertEqual(result["table_name"], "customers") - self.assertEqual(result["table_type"], "BASE TABLE") - self.assertIn("table_description", result) - self.assertEqual(result["row_count"], 500) - - def test_customers_table_meta_get_primary_keys(self): - """Test meta_get_primary_keys method for customers table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - customers_table = handler._tables["customers"] - - result = customers_table.meta_get_primary_keys("customers") - - self.assertIsInstance(result, list) - self.assertEqual(len(result), 1) - self.assertEqual(result[0]["table_name"], "customers") - self.assertEqual(result[0]["column_name"], "id") - - def test_customers_table_meta_get_foreign_keys(self): - """Test meta_get_foreign_keys method for customers table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - customers_table = handler._tables["customers"] - - result = customers_table.meta_get_foreign_keys("customers", ["customers", "orders"]) - - self.assertIsInstance(result, list) - # Customers table should have no foreign keys - self.assertEqual(len(result), 0) - - def test_customers_table_get_columns(self): - """Test get_columns method for customers table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - customers_table = handler._tables["customers"] - - result = customers_table.get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - # Check that some expected columns are present - self.assertIn("id", result) - self.assertIn("emailAddress", result) - - def test_customers_table_meta_get_columns(self): - """Test meta_get_columns method for customers table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - customers_table = handler._tables["customers"] - - result = customers_table.meta_get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - for column in result: - self.assertIsInstance(column, dict) - self.assertIn("COLUMN_NAME", column) - - def test_orders_table_meta_get_tables(self): - """Test meta_get_tables method for orders table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - orders_table = handler._tables["orders"] - - # Mock the query_graphql method on the instance - orders_table.query_graphql = MagicMock(return_value={"data": {"ordersCount": {"count": 1000}}}) - - result = orders_table.meta_get_tables() - - self.assertIsInstance(result, dict) - self.assertEqual(result["table_name"], "orders") - self.assertEqual(result["table_type"], "BASE TABLE") - self.assertIn("table_description", result) - self.assertEqual(result["row_count"], 1000) - - def test_orders_table_meta_get_primary_keys(self): - """Test meta_get_primary_keys method for orders table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - orders_table = handler._tables["orders"] - - result = orders_table.meta_get_primary_keys("orders") - - self.assertIsInstance(result, list) - self.assertEqual(len(result), 1) - self.assertEqual(result[0]["table_name"], "orders") - self.assertEqual(result[0]["column_name"], "id") - - def test_orders_table_meta_get_foreign_keys(self): - """Test meta_get_foreign_keys method for orders table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - orders_table = handler._tables["orders"] - - # Test with customers in the table list - result = orders_table.meta_get_foreign_keys("orders", ["customers", "orders"]) - - self.assertIsInstance(result, list) - # Orders table should have a foreign key to customers - self.assertGreater(len(result), 0) - self.assertEqual(result[0]["PARENT_TABLE_NAME"], "orders") - self.assertEqual(result[0]["PARENT_COLUMN_NAME"], "customerId") - self.assertEqual(result[0]["CHILD_TABLE_NAME"], "customers") - self.assertEqual(result[0]["CHILD_COLUMN_NAME"], "id") - - def test_orders_table_get_columns(self): - """Test get_columns method for orders table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - orders_table = handler._tables["orders"] - - result = orders_table.get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - # Check that some expected columns are present - self.assertIn("id", result) - self.assertIn("customerId", result) - - def test_orders_table_meta_get_columns(self): - """Test meta_get_columns method for orders table.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - orders_table = handler._tables["orders"] - - result = orders_table.meta_get_columns() - - self.assertIsInstance(result, list) - self.assertGreater(len(result), 0) - for column in result: - self.assertIsInstance(column, dict) - self.assertIn("COLUMN_NAME", column) - - def test_all_tables_have_metadata_methods(self): - """Test that all registered tables have required metadata methods.""" - handler = ShopifyHandler(self.TEST_HANDLER_NAME, connection_data=self.connection_data) - - expected_tables = [ - "products", - "customers", - "orders", - "product_variants", - "marketing_events", - "inventory_items", - "staff_members", - "gift_cards", - ] - - for table_name in expected_tables: - with self.subTest(table=table_name): - table = handler._tables[table_name] - - # Check that all required methods exist - self.assertTrue(hasattr(table, "meta_get_tables")) - self.assertTrue(hasattr(table, "meta_get_primary_keys")) - self.assertTrue(hasattr(table, "meta_get_foreign_keys")) - self.assertTrue(hasattr(table, "get_columns")) - self.assertTrue(hasattr(table, "meta_get_columns")) - - # Check that methods are callable - self.assertTrue(callable(table.meta_get_tables)) - self.assertTrue(callable(table.meta_get_primary_keys)) - self.assertTrue(callable(table.meta_get_foreign_keys)) - self.assertTrue(callable(table.get_columns)) - self.assertTrue(callable(table.meta_get_columns)) - - @patch("mindsdb.integrations.handlers.shopify_handler.utils.ShopifyQuery") - def test_limit_large_than_max_page_limit(self, mock_shopify_query): - """Test pagination when limit exceeds MAX_PAGE_LIMIT.""" - # First request returns MAX_PAGE_LIMIT items - first_result = { - "data": { - "products": { - "nodes": [{"id": str(i), "title": f"Product {i}"} for i in range(MAX_PAGE_LIMIT)], - "pageInfo": {"hasNextPage": True, "endCursor": "cursor"}, - } - } - } - # Second request returns remaining items - second_result = { - "data": { - "products": { - "nodes": [{"id": str(i), "title": f"Product {i}"} for i in range(MAX_PAGE_LIMIT, 300)], - "pageInfo": {"hasNextPage": False, "endCursor": None}, - } - } - } - - mock_query_instance = MagicMock() - mock_query_instance.execute.side_effect = [first_result, second_result] - mock_shopify_query.return_value = mock_query_instance - - result = query_graphql_nodes( - root_name="products", - root_class=Products, - columns="id title", - limit=300, - ) - - # Verify that two requests were made - self.assertEqual(mock_shopify_query.call_count, 2) - - # First call should request min(300, 250) = 250 - first_call_kwargs = mock_shopify_query.call_args_list[0][1] - self.assertEqual(first_call_kwargs["limit"], MAX_PAGE_LIMIT) - - # Second call should request min(max(300-250, 1), 250) = 50 - second_call_kwargs = mock_shopify_query.call_args_list[1][1] - self.assertEqual(second_call_kwargs["limit"], 50) - - # Result should be exactly 300 items - self.assertEqual(len(result), 300) - - -class TestShopifyPassthrough(unittest.TestCase): - """Exercise the PassthroughMixin retrofit (X-Shopify-Access-Token auth).""" - - CONNECTION_DATA = { - "shop_url": "test-shop.myshopify.com", - "client_id": "cid", - "client_secret": "csec", - "access_token": "shpat_tokenvalue", - } - - def _mock_response(self, status_code=200): - resp = MagicMock() - resp.status_code = status_code - resp.headers = {"Content-Type": "application/json"} - resp.iter_content = MagicMock(return_value=iter([b'{"shop":{"id":1}}'])) - resp.close = MagicMock() - return resp - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_passthrough_uses_shopify_header_and_per_shop_base_url(self, mock_request): - mock_request.return_value = self._mock_response() - handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA) - from mindsdb.integrations.libs.passthrough_types import PassthroughRequest - - resp = handler.api_passthrough(PassthroughRequest("GET", "/admin/api/2024-01/shop.json")) - - self.assertEqual(resp.status_code, 200) - args, kwargs = mock_request.call_args - self.assertEqual(args[0], "GET") - self.assertEqual(args[1], "https://test-shop.myshopify.com/admin/api/2024-01/shop.json") - # Custom Shopify auth header; no bearer Authorization. - self.assertEqual(kwargs["headers"]["X-Shopify-Access-Token"], "shpat_tokenvalue") - self.assertNotIn("Authorization", kwargs["headers"]) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_test_passthrough_returns_ok_on_200(self, mock_request): - mock_request.return_value = self._mock_response(status_code=200) - handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA) - - result = handler.test_passthrough() - - self.assertTrue(result["ok"]) - self.assertEqual(result["status_code"], 200) - self.assertEqual(result["host"], "test-shop.myshopify.com") - self.assertIsInstance(result["latency_ms"], int) - # The probe should hit the version-less endpoint so it survives - # Shopify's quarterly Admin API version retirements. - self.assertEqual(mock_request.call_args[0][1], "https://test-shop.myshopify.com/admin/shop.json") - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_test_passthrough_returns_auth_failed_on_401(self, mock_request): - mock_request.return_value = self._mock_response(status_code=401) - handler = ShopifyHandler("shopify", connection_data=self.CONNECTION_DATA) - - result = handler.test_passthrough() - - self.assertFalse(result["ok"]) - self.assertEqual(result["error_code"], "auth_failed") - self.assertEqual(result["status_code"], 401) - self.assertEqual(result["host"], "test-shop.myshopify.com") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_snowflake.py b/tests/unit/handlers/test_snowflake.py deleted file mode 100644 index 07c0c87b040..00000000000 --- a/tests/unit/handlers/test_snowflake.py +++ /dev/null @@ -1,1674 +0,0 @@ -import pytest -import tempfile - -try: - import snowflake - import snowflake.connector - from mindsdb.integrations.handlers.snowflake_handler.snowflake_handler import SnowflakeHandler, _map_type -except ImportError: - pytestmark = pytest.mark.skip("Snowflake handler not installed") - -import unittest -from unittest.mock import patch, MagicMock -from collections import OrderedDict -from decimal import Decimal -import datetime -import numpy as np -import pandas as pd -from pandas import DataFrame - - -from base_handler_test import BaseDatabaseHandlerTest -from mindsdb.integrations.libs.response import ( - OkResponse, - TableResponse, - ErrorResponse, - INF_SCHEMA_COLUMNS_NAMES_SET, - RESPONSE_TYPE, -) -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -class ColumnDescription: - def __init__(self, **kwargs): - for key in kwargs: - setattr(self, key, kwargs[key]) - - -class TestSnowflakeHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - account="tvuibdy-vm85921", - user="example_user", - password="example_pass", - database="example_db", - schema="example_schema", - auth_type="password", - ) - - @property - def err_to_raise_on_connect_failure(self): - return snowflake.connector.errors.Error("Connection Failed") - - @property - def get_tables_query(self): - return """ - SELECT TABLE_NAME, TABLE_SCHEMA, TABLE_TYPE - FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_TYPE IN ('BASE TABLE', 'VIEW') - AND TABLE_SCHEMA = current_schema() - """ - - @property - def get_columns_query(self): - return f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_NAME = '{self.mock_table}' - AND TABLE_SCHEMA = current_schema() - """ - - def create_handler(self): - return SnowflakeHandler("snowflake", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("snowflake.connector.connect") - - def create_temp_key_file(self, content): - """ - Helper to create a temporary key file and ensure it gets cleaned up. - """ - import tempfile - import os - - # Create a temporary file - temp_key_file = tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pem") - temp_key_file.write(content) - temp_key_file.close() - temp_key_path = temp_key_file.name - - # Register cleanup - self.addCleanup(lambda: os.unlink(temp_key_path) if os.path.exists(temp_key_path) else None) - - return temp_key_path - - def get_key_pair_connection_data(self, private_key_path, passphrase=None): - """ - Helper to create connection data for key pair authentication. - """ - data = OrderedDict( - account="tvuibdy-vm85921", - user="example_user", - database="example_db", - schema="example_schema", - private_key_path=private_key_path, - auth_type="key_pair", - ) - if passphrase: - data["private_key_passphrase"] = passphrase - return data - - def test_connect_validation(self): - """ - Tests that connect method raises ValueError when required connection parameters are missing - """ - # Test missing 'account' - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["account"] - handler = SnowflakeHandler("snowflake", connection_data=invalid_connection_args) - with self.assertRaises(ValueError): - handler.connect() - - # Test missing 'user' - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["user"] - handler = SnowflakeHandler("snowflake", connection_data=invalid_connection_args) - with self.assertRaises(ValueError): - handler.connect() - - # Test missing both 'password' and 'private_key_path' - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["password"] - handler = SnowflakeHandler("snowflake", connection_data=invalid_connection_args) - with self.assertRaises(ValueError) as context: - handler.connect() - self.assertIn("Password must be provided", str(context.exception)) - - # Test missing 'database' - invalid_connection_args = self.dummy_connection_data.copy() - del invalid_connection_args["database"] - handler = SnowflakeHandler("snowflake", connection_data=invalid_connection_args) - with self.assertRaises(ValueError): - handler.connect() - - def test_map_type_handles_unknown_types(self): - self.assertEqual(_map_type("BOOLEAN"), MYSQL_DATA_TYPE.BOOL) - self.assertEqual(_map_type("VARIANT"), MYSQL_DATA_TYPE.VARCHAR) - self.assertEqual(_map_type("custom_type"), MYSQL_DATA_TYPE.VARCHAR) - - def test_check_connection_failure_resets_flag(self): - self.handler.is_connected = True - error = snowflake.connector.errors.Error("boom") - self.handler.connect = MagicMock(side_effect=error) - - response = self.handler.check_connection() - - self.assertFalse(response.success) - self.assertFalse(self.handler.is_connected) - self.assertEqual(response.error_message, str(error)) - - def test_disconnect(self): - """ - Tests the disconnect method to ensure it correctly closes connections - """ - mock_conn = MagicMock() - self.handler.connection = mock_conn - self.handler.is_connected = True - self.handler.disconnect() - - mock_conn.close.assert_called_once() - self.assertFalse(self.handler.is_connected) - self.handler.is_connected = False - mock_conn.reset_mock() - self.handler.disconnect() - mock_conn.close.assert_not_called() - - def test_check_connection(self): - """ - Tests the check_connection method to ensure it properly tests connectivity - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.fetchone.return_value = [1] - mock_conn.cursor.return_value = mock_cursor - - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - - response = self.handler.check_connection() - mock_conn.cursor.assert_called_once() - mock_cursor.execute.assert_called_once_with("select 1;") - self.assertTrue(response.success) - self.assertIsNone(response.error_message) - - self.handler.connect = MagicMock() - connect_error = snowflake.connector.errors.Error("Connection error") - self.handler.connect.side_effect = connect_error - - response = self.handler.check_connection() - - self.assertFalse(response.success) - self.assertEqual(response.error_message, str(connect_error)) - - def test_native_query_with_results(self): - """ - Tests the `native_query` method to ensure it executes a SQL query and handles the case - where the query returns a result set (e.g., SELECT). - It uses fetch_pandas_batches() for Snowflake. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock(spec=snowflake.connector.cursor.DictCursor) - - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor.return_value = mock_cursor - - expected_columns = ["ID", "NAME"] - batch1_data = [(1, "test1")] - batch2_data = [(2, "test2")] - mock_df_batch1 = DataFrame(batch1_data, columns=expected_columns) - mock_df_batch2 = DataFrame(batch2_data, columns=expected_columns) - mock_cursor.fetch_pandas_batches.return_value = iter([mock_df_batch1, mock_df_batch2]) - - mock_cursor.description = [ - ColumnDescription(name="ID", type_code=snowflake.connector.constants.FIELD_NAME_TO_ID["FIXED"]), - ColumnDescription(name="NAME", type_code=snowflake.connector.constants.FIELD_NAME_TO_ID["TEXT"]), - ] - mock_cursor.rowcount = 2 - - query_str = "SELECT ID, NAME FROM test_table" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(snowflake.connector.DictCursor) - mock_cursor.execute.assert_called_once_with(query_str) - mock_cursor.fetch_pandas_batches.assert_called_once() - mock_cursor.fetchall.assert_not_called() - - self.assertIsInstance(data, TableResponse) - self.assertEqual(data.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(data.data_frame, DataFrame) - self.assertListEqual(list(data.data_frame.columns), expected_columns) - self.assertEqual(len(data.data_frame), 2) - self.assertEqual(data.data_frame.iloc[0]["ID"], 1) - self.assertEqual(data.data_frame.iloc[0]["NAME"], "test1") - self.assertEqual(data.data_frame.iloc[1]["ID"], 2) - self.assertEqual(data.data_frame.iloc[1]["NAME"], "test2") - - mock_conn.commit.assert_not_called() - mock_conn.rollback.assert_not_called() - - def test_native_query_no_results(self): - """ - Tests the `native_query` method to ensure it executes a non-SELECT SQL query (e.g., INSERT) - and correctly returns RESPONSE_TYPE.OK by simulating the NotSupportedError fallback. - """ - mock_conn = MagicMock() - mock_cursor = MagicMock(spec=snowflake.connector.cursor.DictCursor) - - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor.return_value = mock_cursor - mock_cursor.description = None - mock_cursor.fetch_pandas_batches.side_effect = snowflake.connector.errors.NotSupportedError() - mock_cursor.fetchall.return_value = [{"number of rows inserted": 1}] - mock_cursor.rowcount = 1 - - query_str = "INSERT INTO test_table VALUES (1, 'test')" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once_with(snowflake.connector.DictCursor) - mock_cursor.execute.assert_called_once_with(query_str) - mock_cursor.fetch_pandas_batches.assert_called_once() - - self.assertIsInstance(data, OkResponse) - self.assertEqual(data.type, RESPONSE_TYPE.OK) - self.assertEqual(data.affected_rows, 1) - - mock_conn.commit.assert_not_called() - mock_conn.rollback.assert_not_called() - - def test_native_query_fallback_returns_table(self): - mock_conn = MagicMock() - mock_cursor = MagicMock(spec=snowflake.connector.cursor.DictCursor) - mock_cursor.__enter__.return_value = mock_cursor - mock_cursor.__exit__.return_value = None - mock_cursor.fetch_pandas_batches.side_effect = snowflake.connector.errors.NotSupportedError() - mock_cursor.fetchall.return_value = [{"COL": 1}, {"COL": 2}] - mock_cursor.description = [("COL",)] - mock_cursor.rowcount = 2 - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor.return_value = mock_cursor - - response = self.handler.native_query("CALL test_proc()") - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertEqual(len(response.data_frame), 2) - - def test_native_query_fallback_without_data_returns_ok(self): - mock_conn = MagicMock() - mock_cursor = MagicMock(spec=snowflake.connector.cursor.DictCursor) - mock_cursor.__enter__.return_value = mock_cursor - mock_cursor.__exit__.return_value = None - mock_cursor.fetch_pandas_batches.side_effect = snowflake.connector.errors.NotSupportedError() - mock_cursor.fetchall.return_value = None - mock_cursor.description = [] - mock_cursor.rowcount = 0 - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor.return_value = mock_cursor - - response = self.handler.native_query("UNKNOWN") - self.assertEqual(response.type, RESPONSE_TYPE.OK) - - def test_native_query_error(self): - """ - Tests the `native_query` method to ensure it properly handles and returns database errors - """ - mock_conn = MagicMock() - mock_cursor = MagicMock() - - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - error_msg = "Syntax error in SQL statement" - error = snowflake.connector.errors.ProgrammingError(msg=error_msg) - mock_cursor.execute.side_effect = error - - query_str = "INVALID SQL" - data = self.handler.native_query(query_str) - - mock_conn.cursor.assert_called_once() - mock_cursor.execute.assert_called_once_with(query_str) - - self.assertIsInstance(data, ErrorResponse) - self.assertEqual(data.type, RESPONSE_TYPE.ERROR) - self.assertIn(error_msg, data.error_message) - - mock_conn.rollback.assert_not_called() - mock_conn.commit.assert_not_called() - - def test_native_query_releases_memory_pool_when_jemalloc(self): - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__.return_value = mock_cursor - mock_cursor.__exit__.return_value = None - mock_cursor.fetch_pandas_batches.return_value = iter([DataFrame([[1, "foo"]], columns=["ID", "NAME"])]) - mock_cursor.description = [ - ColumnDescription(name="ID", type_code=0, scale=0), - ColumnDescription(name="NAME", type_code=2), - ] - mock_cursor.rowcount = 1 - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor.return_value = mock_cursor - - with patch("mindsdb.integrations.handlers.snowflake_handler.snowflake_handler.memory_pool") as mock_pool: - mock_pool.backend_name = "jemalloc" - mock_pool.release_unused = MagicMock() - - response = self.handler.native_query("SELECT 1", stream=False) - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - mock_pool.release_unused.assert_called_once() - - def test_key_pair_authentication_success(self): - """ - Tests successful connection using key pair authentication - """ - temp_key_path = self.create_temp_key_file("DUMMY PRIVATE KEY CONTENT") - - try: - key_pair_connection_data = self.get_key_pair_connection_data(temp_key_path) - - handler = SnowflakeHandler("snowflake", connection_data=key_pair_connection_data) - - with patch("snowflake.connector.connect") as mock_connect: - mock_conn = MagicMock() - mock_connect.return_value = mock_conn - - connection = handler.connect() - - mock_connect.assert_called_once() - call_kwargs = mock_connect.call_args[1] - - self.assertIn("private_key_file", call_kwargs) - self.assertEqual(call_kwargs["private_key_file"], temp_key_path) - self.assertEqual(call_kwargs["authenticator"], "SNOWFLAKE_JWT") - self.assertNotIn("password", call_kwargs) - self.assertEqual(call_kwargs["account"], "tvuibdy-vm85921") - self.assertEqual(call_kwargs["user"], "example_user") - self.assertEqual(call_kwargs["database"], "example_db") - - self.assertTrue(handler.is_connected) - self.assertEqual(connection, mock_conn) - finally: - import os - - if os.path.exists(temp_key_path): - os.unlink(temp_key_path) - - def test_key_pair_authentication_with_passphrase(self): - """ - Tests successful connection using key pair authentication with passphrase - """ - temp_key_path = self.create_temp_key_file("DUMMY ENCRYPTED PRIVATE KEY CONTENT") - - try: - key_pair_connection_data = self.get_key_pair_connection_data(temp_key_path, "test_passphrase") - - handler = SnowflakeHandler("snowflake", connection_data=key_pair_connection_data) - - with patch("snowflake.connector.connect") as mock_connect: - mock_conn = MagicMock() - mock_connect.return_value = mock_conn - - connection = handler.connect() - - mock_connect.assert_called_once() - call_kwargs = mock_connect.call_args[1] - - self.assertIn("private_key_file", call_kwargs) - self.assertEqual(call_kwargs["private_key_file"], temp_key_path) - self.assertEqual(call_kwargs["authenticator"], "SNOWFLAKE_JWT") - self.assertIn("private_key_file_pwd", call_kwargs) - self.assertEqual(call_kwargs["private_key_file_pwd"], "test_passphrase") - self.assertNotIn("password", call_kwargs) - - self.assertTrue(handler.is_connected) - self.assertEqual(connection, mock_conn) - finally: - import os - - if os.path.exists(temp_key_path): - os.unlink(temp_key_path) - - def test_key_pair_authentication_with_inline_private_key(self): - """ - Tests successful connection using in-memory private key content - """ - connection_data = OrderedDict( - account="tvuibdy-vm85921", - user="example_user", - database="example_db", - schema="example_schema", - private_key="-----BEGIN PRIVATE KEY-----\\nINLINE KEY\\n-----END PRIVATE KEY-----", - auth_type="key_pair", - ) - - handler = SnowflakeHandler("snowflake", connection_data=connection_data) - - with ( - patch( - "mindsdb.integrations.handlers.snowflake_handler.auth_types.KeyPairAuthType._load_private_key", - return_value="parsed_key", - ) as mock_loader, - patch("snowflake.connector.connect") as mock_connect, - ): - mock_conn = MagicMock() - mock_connect.return_value = mock_conn - - connection = handler.connect() - - mock_loader.assert_called_once_with( - "-----BEGIN PRIVATE KEY-----\\nINLINE KEY\\n-----END PRIVATE KEY-----", None - ) - mock_connect.assert_called_once() - call_kwargs = mock_connect.call_args[1] - - self.assertIn("private_key", call_kwargs) - self.assertEqual(call_kwargs["private_key"], "parsed_key") - self.assertNotIn("private_key_file", call_kwargs) - self.assertTrue(handler.is_connected) - self.assertEqual(connection, mock_conn) - - def test_key_pair_authentication_with_inline_private_key_and_passphrase(self): - """ - Tests inline private key content when a passphrase is supplied - """ - connection_data = OrderedDict( - account="tvuibdy-vm85921", - user="example_user", - database="example_db", - schema="example_schema", - private_key="-----BEGIN PRIVATE KEY-----\\nINLINE KEY\\n-----END PRIVATE KEY-----", - private_key_passphrase="inline-pass", - auth_type="key_pair", - ) - - handler = SnowflakeHandler("snowflake", connection_data=connection_data) - - with ( - patch( - "mindsdb.integrations.handlers.snowflake_handler.auth_types.KeyPairAuthType._load_private_key", - return_value="parsed_key", - ) as mock_loader, - patch("snowflake.connector.connect") as mock_connect, - ): - mock_conn = MagicMock() - mock_connect.return_value = mock_conn - - handler.connect() - - mock_loader.assert_called_once_with( - "-----BEGIN PRIVATE KEY-----\\nINLINE KEY\\n-----END PRIVATE KEY-----", "inline-pass" - ) - call_kwargs = mock_connect.call_args[1] - self.assertIn("private_key", call_kwargs) - self.assertEqual(call_kwargs["private_key"], "parsed_key") - self.assertNotIn("private_key_file", call_kwargs) - - def test_key_pair_authentication_file_not_found(self): - """ - Tests that ValueError is raised when private key file doesn't exist - """ - key_pair_connection_data = OrderedDict( - account="tvuibdy-vm85921", - user="example_user", - database="example_db", - schema="example_schema", - private_key_path="/nonexistent/path/to/key.pem", - auth_type="key_pair", - ) - - handler = SnowflakeHandler("snowflake", connection_data=key_pair_connection_data) - - with self.assertRaises(ValueError) as context: - handler.connect() - self.assertIn("Private key file not found", str(context.exception)) - - def test_key_pair_authentication_invalid_key(self): - """ - Tests that Snowflake connector raises an error when private key is invalid - """ - with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pem") as temp_key_file: - temp_key_file.write("INVALID KEY CONTENT") - temp_key_path = temp_key_file.name - - try: - key_pair_connection_data = OrderedDict( - account="tvuibdy-vm85921", - user="example_user", - database="example_db", - schema="example_schema", - private_key_path=temp_key_path, - auth_type="key_pair", - ) - - handler = SnowflakeHandler("snowflake", connection_data=key_pair_connection_data) - - with patch("snowflake.connector.connect") as mock_connect: - mock_connect.side_effect = snowflake.connector.errors.ProgrammingError( - msg="Failed to parse private key" - ) - - with self.assertRaises(snowflake.connector.errors.ProgrammingError): - handler.connect() - finally: - import os - - if os.path.exists(temp_key_path): - os.unlink(temp_key_path) - - def test_password_authentication_works(self): - """ - Tests that password authentication still works (backward compatibility) - """ - handler = SnowflakeHandler("snowflake", connection_data=self.dummy_connection_data) - - with patch("snowflake.connector.connect") as mock_connect: - mock_conn = MagicMock() - mock_connect.return_value = mock_conn - - connection = handler.connect() - - mock_connect.assert_called_once() - call_kwargs = mock_connect.call_args[1] - - self.assertIn("password", call_kwargs) - self.assertNotIn("private_key", call_kwargs) - self.assertEqual(call_kwargs["password"], "example_pass") - - self.assertTrue(handler.is_connected) - self.assertEqual(connection, mock_conn) - - def test_invalid_auth_type_fails(self): - """ - Tests that providing an unknown auth_type raises a ValueError. - """ - connection_data = self.dummy_connection_data.copy() - connection_data["auth_type"] = "invalid" - - handler = SnowflakeHandler("snowflake", connection_data=connection_data) - - with self.assertRaises(ValueError) as context: - handler.connect() - self.assertIn("Invalid auth_type", str(context.exception)) - - def test_query_method(self): - """ - Tests the query method to ensure it correctly converts ASTNode to SQL and calls native_query - """ - orig_renderer = getattr(self.handler, "renderer", None) - renderer_mock = MagicMock() - renderer_mock.get_string.return_value = "SELECT * FROM test_table_rendered" - - self.handler.native_query = MagicMock() - expected_response = TableResponse(data=DataFrame()) - self.handler.native_query.return_value = expected_response - - try: - if orig_renderer: - self.handler.renderer = renderer_mock - - mock_ast = MagicMock() - result = self.handler.query(mock_ast) - - if orig_renderer: - renderer_mock.get_string.assert_called_once_with(mock_ast, with_failback=True) - expected_query = "SELECT * FROM test_table_rendered" - else: - expected_query = str(mock_ast) - - self.handler.native_query.assert_called_once_with(expected_query) - self.assertEqual(result, expected_response) - - finally: - if orig_renderer: - self.handler.renderer = orig_renderer - del self.handler.native_query - - def test_get_tables(self): - """ - Tests that get_tables calls native_query with the correct SQL for Snowflake - """ - expected_response = TableResponse( - data=DataFrame([("table1", "SCHEMA1", "BASE TABLE")], columns=["TABLE_NAME", "TABLE_SCHEMA", "TABLE_TYPE"]) - ) - self.handler.native_query = MagicMock(return_value=expected_response) - - response = self.handler.get_tables() - - self.handler.native_query.assert_called_once() - call_args = self.handler.native_query.call_args[0][0] - - self.assertIn("FROM INFORMATION_SCHEMA.TABLES", call_args) - self.assertIn("TABLE_NAME", call_args) - self.assertIn("TABLE_SCHEMA", call_args) - self.assertIn("TABLE_TYPE", call_args) - self.assertIn("current_schema()", call_args) - self.assertIn("('BASE TABLE', 'VIEW')", call_args) - - self.assertEqual(response.type, RESPONSE_TYPE.TABLE) - self.assertIsInstance(response.data_frame, DataFrame) - self.assertListEqual(list(response.data_frame.columns), ["TABLE_NAME", "TABLE_SCHEMA", "TABLE_TYPE"]) - - del self.handler.native_query - - def test_get_columns(self): - """ - Tests that get_columns calls native_query with the correct SQL for Snowflake - and returns the expected DataFrame structure. - """ - query_columns = [ - "COLUMN_NAME", - "DATA_TYPE", - "ORDINAL_POSITION", - "COLUMN_DEFAULT", - "IS_NULLABLE", - "CHARACTER_MAXIMUM_LENGTH", - "CHARACTER_OCTET_LENGTH", - "NUMERIC_PRECISION", - "NUMERIC_SCALE", - "DATETIME_PRECISION", - "CHARACTER_SET_NAME", - "COLLATION_NAME", - ] - - expected_df_data = [ - { - "COLUMN_NAME": "COL1", - "DATA_TYPE": "VARCHAR", - "ORDINAL_POSITION": 1, - "COLUMN_DEFAULT": None, - "IS_NULLABLE": "YES", - "CHARACTER_MAXIMUM_LENGTH": 255, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": None, - "NUMERIC_SCALE": None, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": None, - "COLLATION_NAME": None, - }, - { - "COLUMN_NAME": "COL2", - "DATA_TYPE": "NUMBER", - "ORDINAL_POSITION": 2, - "COLUMN_DEFAULT": "0", - "IS_NULLABLE": "NO", - "CHARACTER_MAXIMUM_LENGTH": None, - "CHARACTER_OCTET_LENGTH": None, - "NUMERIC_PRECISION": 38, - "NUMERIC_SCALE": 0, - "DATETIME_PRECISION": None, - "CHARACTER_SET_NAME": None, - "COLLATION_NAME": None, - }, - ] - expected_df = DataFrame(expected_df_data, columns=query_columns) - - expected_response = TableResponse(data=expected_df) - self.handler.native_query = MagicMock(return_value=expected_response) - - table_name = "test_table" - response = self.handler.get_columns(table_name) - - self.handler.native_query.assert_called_once() - call_args = self.handler.native_query.call_args[0][0] - self.assertIn("FROM INFORMATION_SCHEMA.COLUMNS", call_args) - self.assertIn(f"TABLE_NAME = '{table_name}'", call_args) - self.assertIn("COLUMN_NAME", call_args) - self.assertIn("DATA_TYPE", call_args) - self.assertIn("IS_NULLABLE", call_args) - self.assertNotIn("MYSQL_DATA_TYPE", call_args) - - self.assertEqual(response.type, RESPONSE_TYPE.COLUMNS_TABLE) - self.assertIsInstance(response.data_frame, DataFrame) - - # Verify (including MYSQL_DATA_TYPE added by to_columns_table_response) - self.assertListEqual(sorted(list(response.data_frame.columns)), sorted(list(INF_SCHEMA_COLUMNS_NAMES_SET))) - - self.assertEqual(response.data_frame.iloc[0]["COLUMN_NAME"], "COL1") - self.assertEqual(response.data_frame.iloc[0]["DATA_TYPE"], "VARCHAR") - self.assertIn("MYSQL_DATA_TYPE", response.data_frame.columns) - self.assertIsNotNone(response.data_frame.iloc[0]["MYSQL_DATA_TYPE"]) - - del self.handler.native_query - - def test_meta_get_tables_casts_rowcount(self): - df = DataFrame( - [ - { - "TABLE_CATALOG": "CAT", - "TABLE_SCHEMA": "PUBLIC", - "TABLE_NAME": "ORDERS", - "TABLE_TYPE": "BASE TABLE", - "TABLE_DESCRIPTION": None, - "ROW_COUNT": "5", - "CREATED": "2024-01-01", - "LAST_ALTERED": "2024-01-02", - } - ] - ) - self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) - - result = self.handler.meta_get_tables(table_names=["orders"]) - - query = self.handler.native_query.call_args[0][0] - self.assertIn("AND TABLE_NAME IN ('ORDERS')", query) - self.assertTrue(pd.api.types.is_integer_dtype(result.data_frame["ROW_COUNT"])) - - def test_meta_get_columns_filters(self): - df = DataFrame( - [ - { - "TABLE_NAME": "ORDERS", - "COLUMN_NAME": "ID", - "DATA_TYPE": "NUMBER", - "COLUMN_DESCRIPTION": None, - "COLUMN_DEFAULT": None, - "IS_NULLABLE": True, - } - ] - ) - self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) - - result = self.handler.meta_get_columns(table_names=["orders"]) - - query = self.handler.native_query.call_args[0][0] - self.assertIn("AND TABLE_NAME IN ('ORDERS')", query) - self.assertEqual(result.data_frame.iloc[0]["TABLE_NAME"], "ORDERS") - - def test_meta_get_column_statistics_success(self): - columns_df = DataFrame( - { - "TABLE_SCHEMA": ["PUBLIC", "PUBLIC"], - "TABLE_NAME": ["ORDERS", "ORDERS"], - "COLUMN_NAME": ["ID", "AMOUNT"], - "DATA_TYPE": ["NUMBER", "NUMBER"], - } - ) - stats_df = DataFrame( - [ - { - "total_rows": 10, - "nulls_ID": 2, - "distincts_ID": 5, - "min_ID": 1, - "max_ID": 10, - "nulls_AMOUNT": 0, - "distincts_AMOUNT": 3, - "min_AMOUNT": 5, - "max_AMOUNT": 20, - } - ] - ) - self.handler.native_query = MagicMock( - side_effect=[ - TableResponse(data=columns_df), - TableResponse(data=stats_df), - ] - ) - - result = self.handler.meta_get_column_statistics(table_names=["orders"]) - - self.assertEqual(len(result.data_frame), 2) - id_stats = result.data_frame[result.data_frame["column_name"] == "ID"].iloc[0] - self.assertEqual(id_stats["null_percentage"], 20.0) - self.assertEqual(id_stats["distinct_values_count"], 5) - self.assertEqual(id_stats["minimum_value"], 1) - self.assertEqual(id_stats["maximum_value"], 10) - - def test_meta_get_column_statistics_handles_error_response(self): - self.handler.native_query = MagicMock(return_value=ErrorResponse(error_message="boom")) - result = self.handler.meta_get_column_statistics(table_names=["orders"]) - self.assertEqual(result.type, RESPONSE_TYPE.ERROR) - - def test_meta_get_primary_keys_filters(self): - df = DataFrame( - [ - {"table_name": "ORDERS", "column_name": "ID", "key_sequence": 1, "constraint_name": "PK_ORDERS"}, - {"table_name": "CUSTOMERS", "column_name": "ID", "key_sequence": 1, "constraint_name": "PK_CUSTOMERS"}, - ] - ) - self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) - - result = self.handler.meta_get_primary_keys(table_names=["ORDERS"]) - - query = self.handler.native_query.call_args[0][0] - self.assertIn("SHOW PRIMARY KEYS", query) - self.assertEqual(len(result.data_frame), 1) - self.assertEqual(result.data_frame.iloc[0]["table_name"], "ORDERS") - self.assertIn("ordinal_position", result.data_frame.columns) - - def test_meta_get_primary_keys_handles_exception(self): - self.handler.native_query = MagicMock(side_effect=Exception("boom")) - result = self.handler.meta_get_primary_keys() - self.assertEqual(result.type, RESPONSE_TYPE.ERROR) - - def test_meta_get_foreign_keys_filters(self): - df = DataFrame( - [ - { - "pk_table_name": "ORDERS", - "pk_column_name": "CUSTOMER_ID", - "fk_table_name": "CUSTOMERS", - "fk_column_name": "ID", - }, - { - "pk_table_name": "INVENTORY", - "pk_column_name": "PRODUCT_ID", - "fk_table_name": "PRODUCTS", - "fk_column_name": "ID", - }, - ] - ) - self.handler.native_query = MagicMock(return_value=TableResponse(data=df)) - - result = self.handler.meta_get_foreign_keys(table_names=["ORDERS", "CUSTOMERS"]) - - self.assertEqual(len(result.data_frame), 1) - self.assertIn("child_table_name", result.data_frame.columns) - row = result.data_frame.iloc[0] - self.assertEqual(row["parent_table_name"], "ORDERS") - self.assertEqual(row["parent_column_name"], "CUSTOMER_ID") - self.assertEqual(row["child_table_name"], "CUSTOMERS") - self.assertEqual(row["child_column_name"], "ID") - - def test_meta_get_foreign_keys_handles_exception(self): - self.handler.native_query = MagicMock(side_effect=Exception("boom")) - result = self.handler.meta_get_foreign_keys() - self.assertEqual(result.type, RESPONSE_TYPE.ERROR) - - def test_meta_get_handler_info_returns_guidance(self): - info = self.handler.meta_get_handler_info() - self.assertIn("ticks", info) - self.assertIn("double quotes", info) - - def test_types_casting(self): - """Test that types are casted correctly""" - query_str = "SELECT * FROM test_table" - mock_conn = MagicMock() - mock_cursor = MagicMock() - mock_cursor.__enter__ = MagicMock(return_value=mock_cursor) - mock_cursor.__exit__ = MagicMock(return_value=None) - mock_conn.cursor.return_value = mock_cursor - self.handler.connect = MagicMock(return_value=mock_conn) - - # region test numeric types - """Test data obtained using: - CREATE TABLE test_numeric_types ( - n_number NUMBER, - n_number_p NUMBER(38), - n_number_ps NUMBER(10,2), - n_int INTEGER, - n_integer INTEGER, - n_bigint BIGINT, - n_smallint SMALLINT, - n_tinyint TINYINT, - n_byteint BYTEINT, - n_float FLOAT, - n_float4 FLOAT4, - n_float8 FLOAT8, - n_double DOUBLE, - n_double_precision DOUBLE PRECISION, - n_real REAL, - n_decimal DECIMAL(10,2), - n_numeric NUMERIC(10,2) - ); - - INSERT INTO test_numeric_types ( - n_number, - n_number_p, - n_number_ps, - n_int, - n_integer, - n_bigint, - n_smallint, - n_tinyint, - n_byteint, - n_float, - n_float4, - n_float8, - n_double, - n_double_precision, - n_real, - n_decimal, - n_numeric - ) VALUES ( - 123456.789, -- n_number - 12345678901234567890123456789012345678, -- n_number_p (38 numbers) - 1234.56, -- n_number_ps - 2147483647, -- n_int - -2147483648, -- n_integer - 9223372036854775807, -- n_bigint - 32767, -- n_smallint - 255, -- n_tinyint - 127, -- n_byteint - 3.14159265358979, -- n_float - 3.14159, -- n_float4 - 3.141592653589793238, -- n_float8 - 2.7182818284590452, -- n_double - 1.6180339887498948, -- n_double_precision - 0.5772156649015329, -- n_real - 9876.54, -- n_decimal - 1234.56 -- n_numeric - ); - """ - input_data = pd.DataFrame( - { - "N_NUMBER": pd.Series([123457], dtype="int32"), - "N_NUMBER_P": pd.Series([Decimal("12345678901234567890123456789012345678")], dtype="object"), - "N_NUMBER_PS": pd.Series([1234.56], dtype="float64"), - "N_INT": pd.Series([2147483647], dtype="int32"), - "N_INTEGER": pd.Series([-2147483648], dtype="int32"), - "N_BIGINT": pd.Series([9223372036854775807], dtype="int64"), - "N_SMALLINT": pd.Series([32767], dtype="int16"), - "N_TINYINT": pd.Series([255], dtype="int16"), - "N_BYTEINT": pd.Series([127], dtype="int8"), - "N_FLOAT": pd.Series([3.14159265358979], dtype="float64"), - "N_FLOAT4": pd.Series([3.14159], dtype="float64"), - "N_FLOAT8": pd.Series([3.141592653589793], dtype="float64"), - "N_DOUBLE": pd.Series([2.718281828459045], dtype="float64"), - "N_DOUBLE_PRECISION": pd.Series([1.618033988749895], dtype="float64"), - "N_REAL": pd.Series([0.5772156649015329], dtype="float64"), - "N_DECIMAL": pd.Series([9876.54], dtype="float64"), - "N_NUMERIC": pd.Series([1234.56], dtype="float64"), - } - ) - mock_cursor.fetch_pandas_batches.return_value = iter([input_data]) - mock_cursor.description = [ - ColumnDescription( - name="N_NUMBER", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_NUMBER_P", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_NUMBER_PS", - type_code=0, - display_size=None, - internal_size=None, - precision=10, - scale=2, - is_nullable=True, - ), - ColumnDescription( - name="N_INT", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_INTEGER", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_BIGINT", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_SMALLINT", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_TINYINT", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_BYTEINT", - type_code=0, - display_size=None, - internal_size=None, - precision=38, - scale=0, - is_nullable=True, - ), - ColumnDescription( - name="N_FLOAT", - type_code=1, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="N_FLOAT4", - type_code=1, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="N_FLOAT8", - type_code=1, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="N_DOUBLE", - type_code=1, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="N_DOUBLE_PRECISION", - type_code=1, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="N_REAL", - type_code=1, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="N_DECIMAL", - type_code=0, - display_size=None, - internal_size=None, - precision=10, - scale=2, - is_nullable=True, - ), - ColumnDescription( - name="N_NUMERIC", - type_code=0, - display_size=None, - internal_size=None, - precision=10, - scale=2, - is_nullable=True, - ), - ] - - excepted_mysql_types = [ - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.BIGINT, - MYSQL_DATA_TYPE.SMALLINT, - MYSQL_DATA_TYPE.SMALLINT, - MYSQL_DATA_TYPE.TINYINT, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DOUBLE, - ] - - response = self.handler.native_query(query_str) - actual_mysql_types = [col.type for col in response.columns] - self.assertEqual(actual_mysql_types, excepted_mysql_types) - for column_name in input_data.columns: - result_value = response.data_frame[column_name][0] - self.assertEqual(result_value, input_data[column_name][0]) - # endregion - - # region test string/blob types - """Data obtained using: - CREATE TABLE test_text_blob_types ( - t_string STRING, -- up to 16 МБ - t_string_size STRING(100), -- STRING with max len - t_char CHAR(10), -- fix len - t_varchar VARCHAR(100), -- STRING alias - t_text TEXT, -- STRING alias - t_binary BINARY, -- bin data up to 8 МБ - t_binary_size BINARY(100), -- bin with max len - t_varbinary VARBINARY(100) -- BINARY alias - ); - - INSERT INTO test_text_blob_types ( - t_string, - t_string_size, - t_char, - t_varchar, - t_text, - t_binary, - t_binary_size, - t_varbinary - ) VALUES ( - 't_string', -- t_string - 't_string_size', -- t_string_size - 't_char', -- t_char - 't_varchar', -- t_varchar - 't_text', -- t_text - TO_BINARY('t_binary', 'UTF-8'), -- t_binary - TO_BINARY('t_binary_size', 'UTF-8'), -- t_binary_size - TO_BINARY('t_variant', 'UTF-8') -- t_varbinary - ); - """ - input_data = pd.DataFrame( - [ - [ - "t_string", - "t_string_size", - "t_char", - "t_varchar", - "t_text", - b"t_binary", - b"t_binary_size", - b"t_variant", - ] - ], - columns=[ - "T_STRING", - "T_STRING_SIZE", - "T_CHAR", - "T_VARCHAR", - "T_TEXT", - "T_BINARY", - "T_BINARY_SIZE", - "T_VARBINARY", - ], - dtype="object", - ) - mock_cursor.fetch_pandas_batches.return_value = iter([input_data]) - mock_cursor.description = [ - ColumnDescription( - name="T_STRING", - type_code=2, - display_size=None, - internal_size=16777216, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_STRING_SIZE", - type_code=2, - display_size=None, - internal_size=100, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_CHAR", - type_code=2, - display_size=None, - internal_size=10, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_VARCHAR", - type_code=2, - display_size=None, - internal_size=100, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_TEXT", - type_code=2, - display_size=None, - internal_size=16777216, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_BINARY", - type_code=11, - display_size=None, - internal_size=8388608, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_BINARY_SIZE", - type_code=11, - display_size=None, - internal_size=100, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_VARBINARY", - type_code=11, - display_size=None, - internal_size=100, - precision=None, - scale=None, - is_nullable=True, - ), - ] - excepted_mysql_types = [ - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.BINARY, - MYSQL_DATA_TYPE.BINARY, - ] - - response = self.handler.native_query(query_str) - actual_mysql_types = [col.type for col in response.columns] - self.assertEqual(actual_mysql_types, excepted_mysql_types) - for column_name in input_data.columns: - result_value = response.data_frame[column_name][0] - self.assertEqual(result_value, input_data[column_name][0]) - # endregion - - # region test bool types - """Data obtained using: - CREATE TABLE test_boolean_types ( - b_boolean BOOLEAN - ); - - INSERT INTO test_boolean_types ( - b_boolean - ) VALUES ( - TRUE -- b_boolean - ); - """ - input_data = pd.DataFrame([[True]], columns=["B_BOOLEAN"], dtype="bool") - mock_cursor.fetch_pandas_batches.return_value = iter([input_data]) - mock_cursor.description = [ - ColumnDescription( - name="B_BOOLEAN", - type_code=13, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ) - ] - excepted_mysql_types = [MYSQL_DATA_TYPE.BOOLEAN] - - response = self.handler.native_query(query_str) - actual_mysql_types = [col.type for col in response.columns] - self.assertEqual(actual_mysql_types, excepted_mysql_types) - for column_name in input_data.columns: - result_value = response.data_frame[column_name][0] - self.assertEqual(result_value, input_data[column_name][0]) - # endregion - - # region test date/time types - """Data obtained using: - CREATE TABLE test_datetime_types ( - d_date DATE, - d_datetime DATETIME, - d_datetime_p DATETIME(3), - d_time TIME, - d_time_p TIME(6), - d_timestamp TIMESTAMP, - d_timestamp_p TIMESTAMP(9), - d_timestamp_ltz TIMESTAMP_LTZ, -- timestamp with local tz - d_timestamp_ltz_p TIMESTAMP_LTZ(3), - d_timestamp_ntz TIMESTAMP_NTZ, -- timestamp no tz - d_timestamp_ntz_p TIMESTAMP_NTZ(6), - d_timestamp_tz TIMESTAMP_TZ, -- timestamp with tz - d_timestamp_tz_p TIMESTAMP_TZ(9) - ); - - INSERT INTO test_datetime_types ( - d_date, - d_datetime, - d_datetime_p, - d_time, - d_time_p, - d_timestamp, - d_timestamp_p, - d_timestamp_ltz, - d_timestamp_ltz_p, - d_timestamp_ntz, - d_timestamp_ntz_p, - d_timestamp_tz, - d_timestamp_tz_p - ) VALUES ( - '2023-10-15', -- d_date - '2023-10-15 14:30:45.123456789', -- d_datetime - '2023-10-15 14:30:45.123', -- d_datetime_p - '14:30:45', -- d_time - '14:30:45.123456', -- d_time_p - '2023-10-15 14:30:45.123456789 +03:00', -- d_timestamp - '2023-10-15 14:30:45.123456789 +03:00', -- d_timestamp_p - '2023-10-15 14:30:45.123 +03:00', -- d_timestamp_ltz - '2023-10-15 14:30:45.123 +03:00', -- d_timestamp_ltz_p - '2023-10-15 14:30:45.123456', -- d_timestamp_ntz - '2023-10-15 14:30:45.123456', -- d_timestamp_ntz_p - '2023-10-15 14:30:45.123456789 +03:00', -- d_timestamp_tz - '2023-10-15 14:30:45.123456789 +03:00' -- d_timestamp_tz_p - ); - """ - input_data = pd.DataFrame( - { - "D_DATE": pd.Series([datetime.date(2023, 10, 15)], dtype="object"), - "D_DATETIME": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456789")], dtype="datetime64[ns]"), - "D_DATETIME_P": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123000")], dtype="datetime64[ms]"), - "D_TIME": pd.Series([datetime.time(14, 30, 45)], dtype="object"), - "D_TIME_P": pd.Series([datetime.time(14, 30, 45, 123456)], dtype="object"), - "D_TIMESTAMP": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456789")], dtype="datetime64[ns]"), - "D_TIMESTAMP_P": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456789")], dtype="datetime64[ns]"), - "D_TIMESTAMP_LTZ": pd.Series( - [pd.Timestamp("2023-10-15 04:30:45.123000-0700", tz="America/Los_Angeles")], - dtype="datetime64[ns, America/Los_Angeles]", - ), - "D_TIMESTAMP_LTZ_P": pd.Series( - [pd.Timestamp("2023-10-15 04:30:45.123000-0700", tz="America/Los_Angeles")], - dtype="datetime64[ns, America/Los_Angeles]", - ), - "D_TIMESTAMP_NTZ": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456")], dtype="datetime64[ns]"), - "D_TIMESTAMP_NTZ_P": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456")], dtype="datetime64[ns]"), - "D_TIMESTAMP_TZ": pd.Series( - [pd.Timestamp("2023-10-15 04:30:45.123456789-0700", tz="America/Los_Angeles")], - dtype="datetime64[ns, America/Los_Angeles]", - ), - "D_TIMESTAMP_TZ_P": pd.Series( - [pd.Timestamp("2023-10-15 04:30:45.123456789-0700", tz="America/Los_Angeles")], - dtype="datetime64[ns, America/Los_Angeles]", - ), - } - ) - mock_cursor.fetch_pandas_batches.return_value = iter([input_data]) - mock_cursor.description = [ - ColumnDescription( - name="D_DATE", - type_code=3, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="D_DATETIME", - type_code=8, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ColumnDescription( - name="D_DATETIME_P", - type_code=8, - display_size=None, - internal_size=None, - precision=0, - scale=3, - is_nullable=True, - ), - ColumnDescription( - name="D_TIME", - type_code=12, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ColumnDescription( - name="D_TIME_P", - type_code=12, - display_size=None, - internal_size=None, - precision=0, - scale=6, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP", - type_code=8, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP_P", - type_code=8, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP_LTZ", - type_code=6, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP_LTZ_P", - type_code=6, - display_size=None, - internal_size=None, - precision=0, - scale=3, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP_NTZ", - type_code=8, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP_NTZ_P", - type_code=8, - display_size=None, - internal_size=None, - precision=0, - scale=6, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP_TZ", - type_code=7, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ColumnDescription( - name="D_TIMESTAMP_TZ_P", - type_code=7, - display_size=None, - internal_size=None, - precision=0, - scale=9, - is_nullable=True, - ), - ] - excepted_mysql_types = [ - MYSQL_DATA_TYPE.DATE, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.TIME, - MYSQL_DATA_TYPE.TIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - MYSQL_DATA_TYPE.DATETIME, - ] - expected_result_df = pd.DataFrame( - { - "D_DATE": pd.Series([datetime.date(2023, 10, 15)], dtype="object"), - "D_DATETIME": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456789")], dtype="datetime64[ns]"), - "D_DATETIME_P": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123000")], dtype="datetime64[ms]"), - "D_TIME": pd.Series([datetime.time(14, 30, 45)], dtype="object"), - "D_TIME_P": pd.Series([datetime.time(14, 30, 45, 123456)], dtype="object"), - "D_TIMESTAMP": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456789")], dtype="datetime64[ns]"), - "D_TIMESTAMP_P": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456789")], dtype="datetime64[ns]"), - "D_TIMESTAMP_LTZ": pd.Series([pd.Timestamp("2023-10-15 11:30:45.123000")], dtype="datetime64[ns]"), - "D_TIMESTAMP_LTZ_P": pd.Series([pd.Timestamp("2023-10-15 11:30:45.123000")], dtype="datetime64[ns]"), - "D_TIMESTAMP_NTZ": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456")], dtype="datetime64[ns]"), - "D_TIMESTAMP_NTZ_P": pd.Series([pd.Timestamp("2023-10-15 14:30:45.123456")], dtype="datetime64[ns]"), - "D_TIMESTAMP_TZ": pd.Series([pd.Timestamp("2023-10-15 11:30:45.123456789")], dtype="datetime64[ns]"), - "D_TIMESTAMP_TZ_P": pd.Series([pd.Timestamp("2023-10-15 11:30:45.123456789")], dtype="datetime64[ns]"), - } - ) - response = self.handler.native_query(query_str) - actual_mysql_types = [col.type for col in response.columns] - self.assertEqual(actual_mysql_types, excepted_mysql_types) - self.assertTrue(response.data_frame.equals(expected_result_df)) - # endregion - - # region json/array types - """Test request: - - select * from demo_snowflake ( - select - OBJECT_CONSTRUCT('name', 'Jones', 'age', 42) as t_json, - ARRAY_CONSTRUCT(12, 'twelve', NULL) as t_array, - [1.1,2.2,3.3]::VECTOR(FLOAT,3) as t_vector - ); - """ - input_data = pd.DataFrame( - { - "T_JSON": pd.Series([{"name": "Jones", "age": 42}], dtype="object"), - # snowflake returns arrays as text - "T_ARRAY": pd.Series(['[\n 12,\n "twelve",\n undefined\n]'], dtype="object"), - "T_VECTOR": pd.Series([np.array([1.1, 2.2, 3.3], dtype="float32")], dtype="object"), - } - ) - mock_cursor.fetch_pandas_batches.return_value = iter([input_data]) - mock_cursor.description = [ - ColumnDescription( - name="T_JSON", - type_code=9, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_ARRAY", - type_code=10, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ColumnDescription( - name="T_VECTOR", - type_code=16, - display_size=None, - internal_size=None, - precision=None, - scale=None, - is_nullable=True, - ), - ] - - excepted_mysql_types = [MYSQL_DATA_TYPE.JSON, MYSQL_DATA_TYPE.JSON, MYSQL_DATA_TYPE.VECTOR] - - expected_result_df = pd.DataFrame( - { - "T_JSON": pd.Series([{"name": "Jones", "age": 42}], dtype="object"), - "T_ARRAY": pd.Series(['[\n 12,\n "twelve",\n undefined\n]'], dtype="object"), - "T_VECTOR": pd.Series([np.array([1.1, 2.2, 3.3], dtype="float32")], dtype="object"), - } - ) - response = self.handler.native_query(query_str) - actual_mysql_types = [col.type for col in response.columns] - self.assertEqual(actual_mysql_types, excepted_mysql_types) - self.assertTrue(response.data_frame.equals(expected_result_df)) - # endregion - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_timescaledb.py b/tests/unit/handlers/test_timescaledb.py deleted file mode 100644 index 52cbd771908..00000000000 --- a/tests/unit/handlers/test_timescaledb.py +++ /dev/null @@ -1,100 +0,0 @@ -from collections import OrderedDict -import unittest -from unittest.mock import patch, MagicMock - -import psycopg -from psycopg.pq import ExecStatus - -from base_handler_test import BaseDatabaseHandlerTest, MockCursorContextManager -from mindsdb.integrations.handlers.timescaledb_handler.timescaledb_handler import TimeScaleDBHandler -from mindsdb.integrations.libs.response import DataHandlerResponse as Response - - -class TestTimescaleHandler(BaseDatabaseHandlerTest, unittest.TestCase): - @property - def dummy_connection_data(self): - return OrderedDict( - host="127.0.0.1", - port=5432, - user="example_user", - schema="public", - password="example_pass", - database="example_db", - ) - - @property - def err_to_raise_on_connect_failure(self): - return psycopg.Error("Connection Failed") - - @property - def get_tables_query(self): - return """ - SELECT - table_schema, - table_name, - table_type - FROM - information_schema.tables - WHERE - table_schema NOT IN ('information_schema', 'pg_catalog') - and table_type in ('BASE TABLE', 'VIEW') - and table_schema = current_schema() - """ - - @property - def get_columns_query(self): - return f""" - SELECT - COLUMN_NAME, - DATA_TYPE, - ORDINAL_POSITION, - COLUMN_DEFAULT, - IS_NULLABLE, - CHARACTER_MAXIMUM_LENGTH, - CHARACTER_OCTET_LENGTH, - NUMERIC_PRECISION, - NUMERIC_SCALE, - DATETIME_PRECISION, - CHARACTER_SET_NAME, - COLLATION_NAME - FROM - information_schema.columns - WHERE - table_name = '{self.mock_table}' - AND - table_schema = current_schema() - """ - - def create_handler(self): - return TimeScaleDBHandler("timescaledb", connection_data=self.dummy_connection_data) - - def create_patcher(self): - return patch("psycopg.connect") - - def test_native_query(self): - """ - Tests the `native_query` method to ensure it executes a SQL query using a mock cursor, - returns a Response object, and correctly handles the ExecStatus scenario - """ - # TODO: Can this be handled via the base class? The use of ExecStatus is specific to Postgres. - mock_conn = MagicMock() - mock_cursor = MockCursorContextManager() - - self.handler.connect = MagicMock(return_value=mock_conn) - mock_conn.cursor = MagicMock(return_value=mock_cursor) - - mock_cursor.execute.return_value = None - - mock_pgresult = MagicMock() - mock_pgresult.status = ExecStatus.COMMAND_OK - mock_cursor.pgresult = mock_pgresult - - query_str = "SELECT * FROM table" - data = self.handler.native_query(query_str) - mock_cursor.execute.assert_called_once_with(query_str) - assert isinstance(data, Response) - self.assertFalse(data.error_code) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/handlers/test_web.py b/tests/unit/handlers/test_web.py deleted file mode 100644 index cb34256e15d..00000000000 --- a/tests/unit/handlers/test_web.py +++ /dev/null @@ -1,233 +0,0 @@ -from urllib.parse import urljoin -import concurrent.futures - -import pytest -import unittest -from unittest.mock import patch, MagicMock - -from requests import Response, Request -from bs4 import BeautifulSoup - -from mindsdb.integrations.libs.api_handler_exceptions import TableAlreadyExists - -try: - from mindsdb.integrations.handlers.web_handler.web_handler import WebHandler - from mindsdb.integrations.handlers.web_handler.web_handler import CrawlerTable - from mindsdb.integrations.handlers.web_handler import urlcrawl_helpers as helpers - - WEB_HANDLER_AVAILABLE = True -except ImportError: - WEB_HANDLER_AVAILABLE = False - -from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator - - -@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)") -class TestWebsHandler(unittest.TestCase): - def setUp(self) -> None: - self.handler = WebHandler(name="test_web_handler") - - def test_crawler_already_registered(self): - with self.assertRaises(TableAlreadyExists): - self.handler._register_table("crawler", CrawlerTable) - - -PDF_CONTENT = ( - b"%PDF-1.7\n\n1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n\n2 0 obj\n<< /Type /Pages " - b"/Kids [3 0 R] /Count 1 >>\nendobj\n\n3 0 obj\n<< /Type /Page /Parent 2 0 R /Contents 4 0 R " - b">>\nendobj\n\n4 0 obj\n<< /Length 22 >>\nstream\nBT\n/Helvetica 12 Tf\n1 0 0 1 50 700 Tm\n(" - b"Hello, this is a test!) Tj\nET\nendstream\nendobj\n\nxref\n0 5\n0000000000 65535 " - b"f\n0000000010 00000 n\n0000000077 00000 n\n0000000122 00000 n\n0000000203 00000 n\n0000000277 " - b"00000 n\ntrailer\n<< /Size 5 /Root 1 0 R >>\nstartxref\n343\n%%EOF\n " -) - -BROKEN_PDF_CONTENT = b"%PDF-1.4\n\nThis is not a valid PDF file content\n" - -HTML_SAMPLE_1 = "

Heading One

Heading Two

" - -MARKDOWN_SAMPLE_1 = "# Heading One \n\n ## Heading Two" - - -@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)") -class TestWebHelpers(unittest.TestCase): - @patch("requests.Response") - def test_pdf_to_markdown(self, mock_response) -> None: - response = mock_response.return_value - response.content = PDF_CONTENT - result = helpers.pdf_to_markdown(response) - assert "Hello, this is a test!" in result - - @patch("requests.Response") - def test_broken_pdf_to_markdown(self, mock_response) -> None: - response = mock_response.return_value - response.content = BROKEN_PDF_CONTENT - - with pytest.raises(Exception, match="Failed to process PDF data"): - helpers.pdf_to_markdown(response) - - def test_url_validation(self): - assert helpers.is_valid("https://google.com") is True - assert helpers.is_valid("google.com") is False - - def test_get_readable_text_from_soup(self) -> None: - soup = BeautifulSoup(HTML_SAMPLE_1, "html.parser") - import re - - expected = re.sub(r"\s+", " ", MARKDOWN_SAMPLE_1).strip() - actual = re.sub(r"\s+", " ", helpers.get_readable_text_from_soup(soup)).strip() - - assert expected == actual - - @patch("mindsdb.integrations.handlers.web_handler.urlcrawl_helpers.get_all_website_links") - @patch("concurrent.futures.ProcessPoolExecutor") - def test_parallel_get_all_website_links(self, mock_executor, mock_get_links): - # Setup: Mock the get_all_website_links function to return a list of links - mock_get_links.return_value = ["link1", "link2", "link3"] - - # Setup: Mock the ProcessPoolExecutor class to return a mock executor - mock_executor_instance = MagicMock() - mock_executor.return_value.__enter__.return_value = mock_executor_instance - - # Setup: Mock the executor to return a future that immediately completes with a result - mock_future = concurrent.futures.Future() - mock_future.set_result(["link1", "link2", "link3"]) - mock_executor_instance.submit.return_value = mock_future - - # Call the function with a list of URLs - urls = ["url1", "url2", "url3"] - result = helpers.parallel_get_all_website_links(urls) - - # Assert: Check if the function returns the expected result - expected = { - "url1": ["link1", "link2", "link3"], - "url2": ["link1", "link2", "link3"], - "url3": ["link1", "link2", "link3"], - } - assert result == expected - - # Assert: Check if the mocks were called as expected - mock_get_links.assert_called() - - -def html_get(url, **kwargs): - # generate html page with 10 sub-links in the same domain - if not url.endswith("/"): - url = url + "/" - links = [f"link {i}\n" for i in range(10)] - - html = f""" - - - Content for {url} - {"".join(links)} - different domain - - - """ - resp = Response() - resp._content = html.encode() - resp.request = Request() - resp.status_code = 200 - - return resp - - -@pytest.mark.skipif(not WEB_HANDLER_AVAILABLE, reason="web_handler not installed (community handler)") -class TestWebHandler(unittest.TestCase): - @patch("requests.Session.get") - def test_web_cases(self, mock_get): - mock_get.side_effect = html_get - - crawler_table = CrawlerTable(handler=MagicMock()) - - # filters - single_url = FilterCondition("url", FilterOperator.EQUAL, "https://docs.mindsdb.com/") - two_urls = FilterCondition("url", FilterOperator.IN, ("https://docs.mindsdb.com/", "https://docs.python.org/")) - - depth_0 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 0) - depth_1 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 1) - depth_2 = FilterCondition("crawl_depth", FilterOperator.EQUAL, 2) - - per_url_2 = FilterCondition("per_url_limit", FilterOperator.EQUAL, 2) - - # ---- single url ----- - - # default limit 1 - df = crawler_table.list(conditions=[single_url]) - assert len(df) == 1 - - # requested count of results - df = crawler_table.list(conditions=[single_url], limit=100) - assert len(df) == 100 - - # ---- depth ----- - - # only main url - df = crawler_table.list(conditions=[single_url, depth_0]) - assert len(df) == 1 - - # main url and all links from it - df = crawler_table.list(conditions=[single_url, depth_1]) - assert len(df) == 11 - - # main url, +10 from it, +10*10 from every nested - df = crawler_table.list(conditions=[single_url, depth_2]) - assert len(df) == 111 - - # depth + limit - df = crawler_table.list(conditions=[single_url, depth_2], limit=5) - assert len(df) == 5 - - # ---- multiple url ----- - - # without limit: every url - df = crawler_table.list(conditions=[two_urls]) - assert len(df) == 2 - - # with limit: as requested - df = crawler_table.list(conditions=[two_urls], limit=100) - assert len(df) == 100 - - # every url twice - df = crawler_table.list(conditions=[two_urls, per_url_2]) - assert len(df) == 4 - - # every url twice, limited - df = crawler_table.list(conditions=[two_urls, per_url_2], limit=3) - assert len(df) == 3 - - # ---- multiple + depth ----- - - # one result per url - df = crawler_table.list(conditions=[two_urls, depth_0]) - assert len(df) == 2 - - # crawl 2 levels both urls - df = crawler_table.list(conditions=[two_urls, depth_2]) - assert len(df) == 2 * 111 - - # ---- multiple + depth + limit ----- - - # 2 levels, limited - df = crawler_table.list(conditions=[two_urls, depth_2], limit=100) - assert len(df) == 100 - - # ---- multiple + depth + per_url ----- - - # one result per url - df = crawler_table.list(conditions=[two_urls, depth_0, per_url_2]) - assert len(df) == 2 - - # two pages per url - df = crawler_table.list(conditions=[two_urls, depth_2, per_url_2]) - assert len(df) == 4 - - # ---- multiple + depth + per_url + limit - - # one result per url - df = crawler_table.list(conditions=[two_urls, depth_0, per_url_2], limit=3) - assert len(df) == 2 - - # 4 results but limited - df = crawler_table.list(conditions=[two_urls, depth_2, per_url_2], limit=3) - assert len(df) == 3 diff --git a/tests/unit/integrations/__init__.py b/tests/unit/integrations/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/integrations/libs/__init__.py b/tests/unit/integrations/libs/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/integrations/libs/test_response.py b/tests/unit/integrations/libs/test_response.py deleted file mode 100644 index 18aa870d939..00000000000 --- a/tests/unit/integrations/libs/test_response.py +++ /dev/null @@ -1,671 +0,0 @@ -"""Unit tests for response classes in mindsdb.integrations.libs.response module. - -This module tests all response types used by handlers: -- TableResponse: for queries that return data (SELECT, SHOW, etc.) -- OkResponse: for successful operations without data (CREATE, DROP, etc.) -- ErrorResponse: for error cases -- HandlerStatusResponse: for connection status checks -- normalize_response: for converting legacy HandlerResponse to new types -- _safe_pandas_concat: memory-safe DataFrame concatenation -""" - -from unittest.mock import patch, MagicMock - -import pandas as pd -import pytest - -from mindsdb.integrations.libs.response import ( - TableResponse, - OkResponse, - ErrorResponse, - HandlerStatusResponse, - HandlerResponse, - normalize_response, - _safe_pandas_concat, - RESPONSE_TYPE, - DataHandlerResponse, -) -from mindsdb.utilities.types.column import Column -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE - - -def _mock_virtual_memory(available_kb: int): - """Create a mock for psutil.virtual_memory() with given available memory in KB.""" - mock_mem = MagicMock() - mock_mem.available = available_kb << 10 # convert KB back to bytes - return mock_mem - - -class TestHandlerStatusResponse: - """Tests for HandlerStatusResponse class.""" - - def test_init_success(self): - """Test initialization with success status.""" - redirect_url = "https://example.com/auth" - copy_storage = "s3://bucket/path" - response = HandlerStatusResponse(success=True, redirect_url=redirect_url, copy_storage=copy_storage) - - assert response.success is True - assert response.error_message is None - assert response.redirect_url == redirect_url - assert response.copy_storage == copy_storage - - json_data = response.to_json() - assert json_data["success"] is True - assert json_data["error"] is None - assert json_data["redirect_url"] == redirect_url - assert json_data["copy_storage"] == copy_storage - - def test_init_failure(self): - """Test initialization with failure status.""" - error_msg = "Connection failed" - response = HandlerStatusResponse(success=False, error_message=error_msg) - - assert response.success is False - assert response.error_message == error_msg - assert response.redirect_url is None - assert response.copy_storage is None - - json_data = response.to_json() - assert json_data["success"] is False - assert json_data["error"] == error_msg - assert "redirect_url" not in json_data - assert "copy_storage" not in json_data - - -class TestErrorResponse: - """Unit tests for ErrorResponse class.""" - - def test_init_basic(self): - """Test basic initialization.""" - response = ErrorResponse(error_code=1, error_message="Test error", is_expected_error=True) - - assert response.type == RESPONSE_TYPE.ERROR - assert response.resp_type == RESPONSE_TYPE.ERROR - assert response.error_code == 1 - assert response.error_message == "Test error" - assert response.is_expected_error is True - assert response.exception is None - assert isinstance(response, DataHandlerResponse) - - def test_exception_capture(self): - """Test that exception is captured from current context.""" - try: - raise ValueError("Test exception") - except ValueError: - response = ErrorResponse(error_message="Caught exception") - assert response.exception is not None - assert isinstance(response.exception, ValueError) - - -class TestOkResponse: - """Unit tests for OkResponse class.""" - - def test_init(self): - """Test initialization with affected rows count.""" - response = OkResponse(affected_rows=5) - - assert response.type == RESPONSE_TYPE.OK - assert response.resp_type == RESPONSE_TYPE.OK - assert response.affected_rows == 5 - assert isinstance(response, DataHandlerResponse) - - def test_init_without_affected_rows(self): - """Test initialization without affected rows.""" - response = OkResponse() - - assert response.affected_rows is None - - -class TestTableResponse: - """Unit tests for TableResponse class.""" - - def test_init_with_data(self): - """Test initialization with DataFrame.""" - df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) - response = TableResponse(data=df) - - assert response.type == RESPONSE_TYPE.TABLE - assert response.resp_type == RESPONSE_TYPE.TABLE - assert response._fetched is True - pd.testing.assert_frame_equal(response._data, df) - # 'columns' was not provided as attr, so should be as in df - assert [c.name for c in response.columns] == ["id", "name"] - - def test_complex_init_with_generator(self): - """Test initialization with data generator.""" - column1 = Column(name="id", type=MYSQL_DATA_TYPE.INT) - column2 = Column(name="name", type=MYSQL_DATA_TYPE.VARCHAR) - columns = [column1, column2] - df = pd.DataFrame({"id": [0, 1], "name": ["a", "b"]}) - df1 = pd.DataFrame({"id": [2, 3], "name": ["d", "e"]}) - df2 = pd.DataFrame({"id": [4, 5], "name": ["f", "g"]}) - - def data_gen(): - yield df1 - yield df2 - - response = TableResponse(data=df, data_generator=data_gen(), columns=columns) - - assert response.columns[0] is column1 - assert response.columns[1] is column2 - assert response.data_generator is not None - pd.testing.assert_frame_equal(response._data, df) - assert response._fetched is False - pieces = [] - while isinstance(el := response.fetchmany(), pd.DataFrame): - pieces.append(el) - pd.testing.assert_frame_equal(pieces[0], df1) - pd.testing.assert_frame_equal(pieces[1], df2) - pd.testing.assert_frame_equal(response._data, pd.concat([df, df1, df2])) - assert response._fetched is True - assert response.data_generator is None - - def test_data_frame_property(self): - """Test initialization with explicit columns.""" - columns = [Column(name="id", type=MYSQL_DATA_TYPE.INT), Column(name="name", type=MYSQL_DATA_TYPE.VARCHAR)] - df = pd.DataFrame({"id": [0, 1], "name": ["a", "b"]}) - df1 = pd.DataFrame({"id": [2, 3], "name": ["d", "e"]}) - df2 = pd.DataFrame({"id": [4, 5], "name": ["f", "g"]}) - - def data_gen(): - yield df1 - yield df2 - - response = TableResponse(data=df, data_generator=data_gen(), columns=columns) - assert response._fetched is False - pd.testing.assert_frame_equal(response._data, df) - pd.testing.assert_frame_equal(response.data_frame, pd.concat([df, df1, df2])) - assert response._fetched is True - - # should not change result - response.fetchall() - pd.testing.assert_frame_equal(response.data_frame, pd.concat([df, df1, df2])) - - def test_init_with_affected_rows(self): - """Test initialization with affected_rows.""" - df = pd.DataFrame({"id": [1, 2, 3]}) - response = TableResponse(data=df, affected_rows=100) - - assert response.affected_rows == 100 - - def test_iterate_no_save_no_generator(self): - """Test iterate_no_save yields existing data.""" - df = pd.DataFrame({"id": [1, 2, 3]}) - # Need to provide a generator (even empty) to avoid TypeError - response = TableResponse(data=df, data_generator=iter([])) - - chunks = list(response.iterate_no_save()) - - assert len(chunks) == 1 - pd.testing.assert_frame_equal(chunks[0], df) - - # after `iterate_no_save` result should be invalid - with pytest.raises(ValueError): - pd.testing.assert_frame_equal(response.data_frame, df) - - def test_iterate_no_save_with_generator(self): - """Test iterate_no_save yields all chunks without saving.""" - df1 = pd.DataFrame({"id": [4, 5]}) - df2 = pd.DataFrame({"id": [6, 7]}) - - def data_gen(): - yield df1 - yield df2 - - df = pd.DataFrame({"id": [1, 2, 3]}) - response = TableResponse(data=df, data_generator=data_gen()) - chunks = list(response.iterate_no_save()) - - assert len(chunks) == 3 - pd.testing.assert_frame_equal(chunks[0], df) - pd.testing.assert_frame_equal(chunks[1], df1) - pd.testing.assert_frame_equal(chunks[2], df2) - - # after `iterate_no_save` result should be invalid - with pytest.raises(ValueError): - pd.testing.assert_frame_equal(response.data_frame, df) - - -class TestNormalizeResponse: - """Unit tests for normalize_response function.""" - - def test_normalize_table_response(self): - """Test that TableResponse is returned as-is.""" - original = TableResponse(data=pd.DataFrame({"id": [1, 2]})) - result = normalize_response(original) - - assert result is original - - def test_normalize_ok_response(self): - """Test that OkResponse is returned as-is.""" - original = OkResponse(affected_rows=5) - result = normalize_response(original) - - assert result is original - - def test_normalize_error_response(self): - """Test that ErrorResponse is returned as-is.""" - original = ErrorResponse(error_message="Test error") - result = normalize_response(original) - - assert result is original - - def test_normalize_legacy_error_response(self): - """Test conversion of legacy HandlerResponse with ERROR type.""" - legacy = HandlerResponse(resp_type=RESPONSE_TYPE.ERROR, error_code=1, error_message="Legacy error") - result = normalize_response(legacy) - - assert isinstance(result, ErrorResponse) - assert result.error_code == 1 - assert result.error_message == "Legacy error" - - def test_normalize_legacy_ok_response(self): - """Test conversion of legacy HandlerResponse with OK type.""" - legacy = HandlerResponse(resp_type=RESPONSE_TYPE.OK, affected_rows=10) - result = normalize_response(legacy) - - assert isinstance(result, OkResponse) - assert result.affected_rows == 10 - - def test_normalize_legacy_table_response(self): - """Test conversion of legacy HandlerResponse with TABLE type.""" - df = pd.DataFrame({"id": [1, 2], "name": ["a", "b"]}) - legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df) - result = normalize_response(legacy) - - assert isinstance(result, TableResponse) - pd.testing.assert_frame_equal(result.data_frame, df) - - def test_normalize_legacy_table_response_with_mysql_types(self): - """Test conversion preserves mysql_types as column types.""" - df = pd.DataFrame({"id": [1, 2], "name": ["a", "b"]}) - mysql_types = [MYSQL_DATA_TYPE.INT, MYSQL_DATA_TYPE.VARCHAR] - legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df, mysql_types=mysql_types) - result = normalize_response(legacy) - - assert isinstance(result, TableResponse) - assert len(result.columns) == 2 - assert result.columns[0].type == MYSQL_DATA_TYPE.INT - assert result.columns[1].type == MYSQL_DATA_TYPE.VARCHAR - - def test_normalize_legacy_table_response_empty_dataframe(self): - """Test conversion with empty DataFrame.""" - df = pd.DataFrame() - legacy = HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=df) - result = normalize_response(legacy) - - assert isinstance(result, TableResponse) - assert len(result.columns) == 0 - - -class TestSafePandasConcat: - """Unit tests for _safe_pandas_concat function.""" - - @patch("mindsdb.integrations.libs.response.psutil") - def test_concat_with_enough_memory(self, mock_psutil): - """Test successful concatenation when sufficient memory is available.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df1 = pd.DataFrame({"id": [1, 2]}) - df2 = pd.DataFrame({"id": [3, 4]}) - result = _safe_pandas_concat([df1, df2]) - - pd.testing.assert_frame_equal(result, pd.concat([df1, df2])) - - @patch("mindsdb.integrations.libs.response.psutil") - def test_concat_raises_memory_error_when_not_enough_memory(self, mock_psutil): - """Test MemoryError is raised when available memory is too low.""" - # Set available memory to essentially 0 - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=10) - - df1 = pd.DataFrame({"x": list(range(1000))}) - df2 = pd.DataFrame({"x": list(range(1000))}) - - with pytest.raises(MemoryError): - _safe_pandas_concat([df1, df2]) - - @patch("mindsdb.integrations.libs.response.psutil") - def test_concat_single_piece(self, mock_psutil): - """Test concatenation with a single DataFrame.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df = pd.DataFrame({"id": [1, 2, 3]}) - result = _safe_pandas_concat([df]) - - pd.testing.assert_frame_equal(result, df) - - -class TestRaiseIfLowMemory: - """Unit tests for TableResponse._raise_if_low_memory method.""" - - @patch("mindsdb.integrations.libs.response.psutil") - def test_with_known_affected_rows_enough_memory(self, mock_psutil): - """Test no error when affected_rows is known and memory is sufficient.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - response = TableResponse(data=pd.DataFrame({"id": [1, 2]}), affected_rows=100) - response._last_data_piece = pd.DataFrame({"id": list(range(10))}) - response.rows_fetched = 10 - - # Should not raise - response._raise_if_low_memory() - - @patch("mindsdb.integrations.libs.response.psutil") - def test_with_known_affected_rows_not_enough_memory(self, mock_psutil): - """Test MemoryError when affected_rows is known and memory is insufficient.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1) - - # Use strings to ensure DataFrame memory > 1KB after >> 10 - large_piece = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]}) - response = TableResponse(data=pd.DataFrame({"text": ["a"]}), affected_rows=1000) - response._last_data_piece = large_piece - response.rows_fetched = 100 - - with pytest.raises(MemoryError, match="Not enough memory"): - response._raise_if_low_memory() - - @patch("mindsdb.integrations.libs.response.psutil") - def test_with_unknown_affected_rows_enough_memory(self, mock_psutil): - """Test no error when affected_rows is None and memory is sufficient.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - response = TableResponse(data=pd.DataFrame({"id": [1, 2]})) - response._last_data_piece = pd.DataFrame({"id": list(range(10))}) - - # Should not raise - response._raise_if_low_memory() - - @patch("mindsdb.integrations.libs.response.psutil") - def test_with_unknown_affected_rows_not_enough_memory(self, mock_psutil): - """Test MemoryError when affected_rows is None and memory is insufficient.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1) - - # Use strings to ensure DataFrame memory > 1KB after >> 10 - large_piece = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]}) - response = TableResponse(data=pd.DataFrame({"text": ["a"]})) - response._last_data_piece = large_piece - - with pytest.raises(MemoryError, match="Not enough memory"): - response._raise_if_low_memory() - - @patch("mindsdb.integrations.libs.response.psutil") - def test_all_rows_already_fetched(self, mock_psutil): - """Test no error when all rows have been fetched (rows_expected = 0).""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=0) - - response = TableResponse(data=pd.DataFrame({"id": [1, 2]}), affected_rows=10) - response._last_data_piece = pd.DataFrame({"id": list(range(10))}) - response.rows_fetched = 10 # all rows fetched - - # rows_expected = min(10 - 10, 10) = 0, should not raise - response._raise_if_low_memory() - - -class TestIterateWithMemoryCheck: - """Unit tests for TableResponse._iterate_with_memory_check method.""" - - def test_none_generator_yields_nothing(self): - """Test that no chunks are yielded when data_generator is None.""" - response = TableResponse(data=pd.DataFrame({"id": [1]})) - assert response._data_generator is None - - chunks = list(response._iterate_with_memory_check()) - assert chunks == [] - - @patch("mindsdb.integrations.libs.response.psutil") - def test_normal_iteration(self, mock_psutil): - """Test that all chunks are yielded during normal iteration.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df1 = pd.DataFrame({"id": [1, 2]}) - df2 = pd.DataFrame({"id": [3, 4]}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="id")] - response = TableResponse(data_generator=data_gen(), columns=columns) - - chunks = list(response._iterate_with_memory_check()) - - assert len(chunks) == 2 - pd.testing.assert_frame_equal(chunks[0], df1) - pd.testing.assert_frame_equal(chunks[1], df2) - - @patch("mindsdb.integrations.libs.response.psutil") - def test_memory_error_stops_iteration_after_first_chunk(self, mock_psutil): - """Test that MemoryError is raised after the first chunk when memory runs out. - - The pre-loop _raise_if_low_memory() is a no-op (since _last_data_piece is None), - so the first real psutil.virtual_memory() call happens at the post-yield check. - """ - # Use strings to ensure DataFrame memory > 1KB after >> 10 - df1 = pd.DataFrame({"text": ["x" * 200 for _ in range(100)]}) - df2 = pd.DataFrame({"text": ["y" * 200 for _ in range(100)]}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="text")] - response = TableResponse(data_generator=data_gen(), columns=columns) - - gen = response._iterate_with_memory_check() - - # First chunk succeeds — post-yield check will be the first real psutil call - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1) - first = next(gen) - pd.testing.assert_frame_equal(first, df1) - - # Resuming the generator triggers _raise_if_low_memory with 0 available memory - with pytest.raises(MemoryError): - next(gen) - - @patch("mindsdb.integrations.libs.response.psutil") - def test_updates_last_data_piece_and_rows_fetched(self, mock_psutil): - """Test that _last_data_piece and rows_fetched are updated during iteration.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df1 = pd.DataFrame({"id": [1, 2, 3]}) - df2 = pd.DataFrame({"id": [4, 5]}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="id")] - response = TableResponse(data_generator=data_gen(), columns=columns) - assert response.rows_fetched == 0 - - list(response._iterate_with_memory_check()) - - pd.testing.assert_frame_equal(response._last_data_piece, df2) - assert response.rows_fetched == 5 - - -class TestTableResponseFetchallEdgeCases: - """Additional edge-case tests for TableResponse.fetchall.""" - - def test_fetchall_no_generator_returns_existing_data(self): - """Test fetchall returns existing data when no generator is set.""" - df = pd.DataFrame({"id": [1, 2, 3]}) - response = TableResponse(data=df) - - result = response.fetchall() - pd.testing.assert_frame_equal(result, df) - - @patch("mindsdb.integrations.libs.response.psutil") - def test_fetchall_generator_only_no_initial_data(self, mock_psutil): - """Test fetchall with generator but no initial data.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df1 = pd.DataFrame({"id": [1, 2]}) - df2 = pd.DataFrame({"id": [3, 4]}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="id")] - response = TableResponse(data_generator=data_gen(), columns=columns) - - result = response.fetchall() - pd.testing.assert_frame_equal(result, pd.concat([df1, df2])) - assert response._fetched is True - assert response._data_generator is None - - @patch("mindsdb.integrations.libs.response.psutil") - def test_fetchall_empty_generator_creates_empty_df(self, mock_psutil): - """Test fetchall with empty generator creates DataFrame with column names.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - columns = [Column(name="id"), Column(name="name")] - response = TableResponse(data_generator=iter([]), columns=columns) - - result = response.fetchall() - assert list(result.columns) == ["id", "name"] - assert len(result) == 0 - - def test_fetchall_raises_if_invalid(self): - """Test fetchall raises ValueError if data was already consumed by iterate_no_save.""" - df = pd.DataFrame({"id": [1]}) - response = TableResponse(data=df, data_generator=iter([])) - list(response.iterate_no_save()) - - with pytest.raises(ValueError, match="Data has already been fetched"): - response.fetchall() - - -class TestTableResponseFetchmanyEdgeCases: - """Additional edge-case tests for TableResponse.fetchmany.""" - - @patch("mindsdb.integrations.libs.response.psutil") - def test_fetchmany_first_piece_with_no_initial_data(self, mock_psutil): - """Test fetchmany sets _data directly when no initial data exists.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df1 = pd.DataFrame({"id": [1, 2]}) - columns = [Column(name="id")] - response = TableResponse(data_generator=iter([df1]), columns=columns) - - piece = response.fetchmany() - pd.testing.assert_frame_equal(piece, df1) - pd.testing.assert_frame_equal(response._data, df1) - - @patch("mindsdb.integrations.libs.response.psutil") - def test_fetchmany_accumulates_data(self, mock_psutil): - """Test fetchmany accumulates pieces in _data.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df = pd.DataFrame({"id": [0]}) - df1 = pd.DataFrame({"id": [1]}) - df2 = pd.DataFrame({"id": [2]}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="id")] - response = TableResponse(data=df, data_generator=data_gen(), columns=columns) - - response.fetchmany() # df1 - response.fetchmany() # df2 - - pd.testing.assert_frame_equal(response._data, pd.concat([df, df1, df2])) - - @patch("mindsdb.integrations.libs.response.psutil") - def test_fetchmany_returns_none_when_exhausted(self, mock_psutil): - """Test fetchmany returns None and marks response as fetched when generator is empty.""" - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - - df1 = pd.DataFrame({"id": [1]}) - columns = [Column(name="id")] - response = TableResponse(data_generator=iter([df1]), columns=columns) - - piece1 = response.fetchmany() - assert isinstance(piece1, pd.DataFrame) - - piece2 = response.fetchmany() - assert piece2 is None - assert response._fetched is True - assert response._data_generator is None - - def test_fetchmany_raises_if_invalid(self): - """Test fetchmany raises ValueError after iterate_no_save.""" - df = pd.DataFrame({"id": [1]}) - response = TableResponse(data=df, data_generator=iter([])) - list(response.iterate_no_save()) - - with pytest.raises(ValueError, match="Data has already been fetched"): - response.fetchmany() - - -class TestMemoryErrorPropagation: - """Tests for MemoryError propagation through fetchall, fetchmany, and iterate_no_save.""" - - @patch("mindsdb.integrations.libs.response.psutil") - def test_fetchall_raises_memory_error(self, mock_psutil): - """Test MemoryError propagates through fetchall.""" - # Enough memory for first chunk, then out of memory - mock_psutil.virtual_memory.side_effect = [ - _mock_virtual_memory(available_kb=1_000_000), # pre-loop check - _mock_virtual_memory(available_kb=0), # post-yield check - ] - - df1 = pd.DataFrame({"x": list(range(1000))}) - df2 = pd.DataFrame({"x": list(range(1000))}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="x")] - response = TableResponse(data_generator=data_gen(), columns=columns) - - with pytest.raises(MemoryError): - response.fetchall() - - @patch("mindsdb.integrations.libs.response.psutil") - def test_fetchmany_raises_memory_error(self, mock_psutil): - """Test MemoryError propagates through fetchmany on second call.""" - df1 = pd.DataFrame({"x": list(range(1000))}) - df2 = pd.DataFrame({"x": list(range(1000))}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="x")] - response = TableResponse(data_generator=data_gen(), columns=columns) - - # First fetchmany: enough memory (pre-loop check is no-op since _last_data_piece is None) - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=1_000_000) - response.fetchmany() - - # Second fetchmany: pre-loop check fails because we now have _last_data_piece set - mock_psutil.virtual_memory.return_value = _mock_virtual_memory(available_kb=0) - with pytest.raises(MemoryError): - response.fetchmany() - - @patch("mindsdb.integrations.libs.response.psutil") - def test_iterate_no_save_raises_memory_error(self, mock_psutil): - """Test MemoryError propagates through iterate_no_save.""" - mock_psutil.virtual_memory.side_effect = [ - _mock_virtual_memory(available_kb=1_000_000), # pre-loop check - _mock_virtual_memory(available_kb=0), # post-yield check after first chunk - ] - - df1 = pd.DataFrame({"x": list(range(1000))}) - df2 = pd.DataFrame({"x": list(range(1000))}) - - def data_gen(): - yield df1 - yield df2 - - columns = [Column(name="x")] - response = TableResponse(data_generator=data_gen(), columns=columns) - - with pytest.raises(MemoryError): - list(response.iterate_no_save()) diff --git a/tests/unit/interfaces/agents/test_generic_api_key.py b/tests/unit/interfaces/agents/test_generic_api_key.py deleted file mode 100644 index 3473aa05c70..00000000000 --- a/tests/unit/interfaces/agents/test_generic_api_key.py +++ /dev/null @@ -1,75 +0,0 @@ -import os -import unittest -from unittest.mock import patch - -from mindsdb.integrations.utilities.handler_utils import get_api_key - - -class TestGenericApiKeyHandling(unittest.TestCase): - """Test generic API key handling in agent creation and usage.""" - - def setUp(self): - """Set up test environment.""" - # Mock environment variables - self.env_patcher = patch.dict( - os.environ, {"OPENAI_API_KEY": "test-env-api-key", "ANTHROPIC_API_KEY": "test-env-anthropic-key"} - ) - self.env_patcher.start() - - def tearDown(self): - """Clean up after tests.""" - self.env_patcher.stop() - - def test_get_generic_api_key_from_args(self): - """Test retrieving generic API key from create_args.""" - # Test getting generic API key from create_args - api_key = get_api_key("openai", {"api_key": "test-generic-api-key"}) - self.assertEqual(api_key, "test-generic-api-key") - - def test_get_generic_api_key_from_params(self): - """Test retrieving generic API key from params dictionary.""" - # Test getting generic API key from params dictionary - api_key = get_api_key("openai", {"params": {"api_key": "test-generic-params-api-key"}}) - self.assertEqual(api_key, "test-generic-params-api-key") - - def test_get_generic_api_key_from_using(self): - """Test retrieving generic API key from using dictionary.""" - # Test getting generic API key from using dictionary - api_key = get_api_key("openai", {"using": {"api_key": "test-generic-using-api-key"}}) - self.assertEqual(api_key, "test-generic-using-api-key") - - def test_provider_specific_key_priority_over_generic(self): - """Test that provider-specific keys take priority over generic keys.""" - # Test that provider-specific key takes priority over generic key in args - api_key = get_api_key("openai", {"openai_api_key": "test-specific-api-key", "api_key": "test-generic-api-key"}) - self.assertEqual(api_key, "test-specific-api-key") - - # Test that provider-specific key takes priority over generic key in params - api_key = get_api_key( - "openai", - {"params": {"openai_api_key": "test-specific-params-api-key", "api_key": "test-generic-params-api-key"}}, - ) - self.assertEqual(api_key, "test-specific-params-api-key") - - # Test that provider-specific key takes priority over generic key in using - api_key = get_api_key( - "openai", - {"using": {"openai_api_key": "test-specific-using-api-key", "api_key": "test-generic-using-api-key"}}, - ) - self.assertEqual(api_key, "test-specific-using-api-key") - - def test_get_generic_api_key_for_google_provider(self): - """Test retrieving generic API key for Google/Gemini provider.""" - # Test getting generic API key for Google provider - api_key = get_api_key("google", {"api_key": "test-generic-google-api-key"}) - self.assertEqual(api_key, "test-generic-google-api-key") - - # Test that provider-specific key takes priority for Google provider - api_key = get_api_key( - "google", {"google_api_key": "test-specific-google-api-key", "api_key": "test-generic-google-api-key"} - ) - self.assertEqual(api_key, "test-specific-google-api-key") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/interfaces/agents/test_namespace_warning.py b/tests/unit/interfaces/agents/test_namespace_warning.py deleted file mode 100644 index 3285932dce4..00000000000 --- a/tests/unit/interfaces/agents/test_namespace_warning.py +++ /dev/null @@ -1,10 +0,0 @@ -import warnings -import importlib - - -def test_no_model_namespace_warning_llmconfig_import_and_init(): - with warnings.catch_warnings(): - warnings.filterwarnings("error", message=r'.*protected namespace "model_"') - mod = importlib.import_module("mindsdb.integrations.utilities.rag.settings") - LLMConfig = getattr(mod, "LLMConfig") - _ = LLMConfig() diff --git a/tests/unit/interfaces/agents/test_pydantic_ai_agent.py b/tests/unit/interfaces/agents/test_pydantic_ai_agent.py deleted file mode 100644 index 47520d0a592..00000000000 --- a/tests/unit/interfaces/agents/test_pydantic_ai_agent.py +++ /dev/null @@ -1,135 +0,0 @@ -from contextlib import contextmanager -from unittest.mock import Mock, patch -import pandas as pd -from pydantic_ai.messages import ModelRequest, ModelResponse - -from mindsdb.interfaces.agents.pydantic_ai_agent import PydanticAIAgent - - -class TestPydanticAIAgent: - @staticmethod - def _new_agent(): - return PydanticAIAgent.__new__(PydanticAIAgent) - - @staticmethod - def _history_to_text(history): - if not history: - return "" - content = [] - for msg in history: - if not hasattr(msg, "parts"): - continue - for part in msg.parts: - if hasattr(part, "content"): - content.append(str(part.content)) - return "".join(content) - - @staticmethod - def _make_planning_agent(plan="Plan", estimated_steps=1): - planning_agent = Mock() - plan_output = Mock() - plan_output.plan = plan - plan_output.estimated_steps = estimated_steps - planning_agent.run_sync.return_value = Mock(output=plan_output) - return planning_agent - - @staticmethod - def _make_sql_agent(trace_id="trace-1", system_prompt="Test"): - agent = PydanticAIAgent.__new__(PydanticAIAgent) - agent.llm_params = {"model_name": "test-model"} - langfuse_client = Mock() - langfuse_client.get_trace_id.return_value = trace_id - langfuse_client.setup_trace = Mock() - langfuse_client.start_span = Mock(return_value=Mock()) - langfuse_client.end_span = Mock() - agent.langfuse_client_wrapper = langfuse_client - agent.model_instance = Mock() - agent.system_prompt = system_prompt - agent.agent_mode = "sql" - agent.select_targets = None - agent.sql_toolkit = Mock() - agent.sql_toolkit.knowledge_bases = [] - return agent - - @contextmanager - def _patched_agents(self, main_agent, catalog="Catalog"): - with ( - patch("mindsdb.interfaces.agents.pydantic_ai_agent.Agent") as MockAgent, - patch("mindsdb.interfaces.agents.pydantic_ai_agent.DataCatalogBuilder") as MockDataCatalog, - ): - MockAgent.side_effect = [self._make_planning_agent(), main_agent] - mock_catalog = Mock() - mock_catalog.build_data_catalog.return_value = catalog - MockDataCatalog.return_value = mock_catalog - yield - - def test_extracts_prompt_and_history_from_role_content_dataframe(self): - agent = self._new_agent() - df = pd.DataFrame( - [ - {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there"}, - {"role": "user", "content": "What's the status?"}, - ] - ) - - current_prompt, history = agent._extract_current_prompt_and_history(df, {}) - - assert current_prompt == "What's the status?" - assert len(history) == 2 - - def test_extracts_prompt_when_last_message_is_assistant(self): - agent = self._new_agent() - df = pd.DataFrame( - [ - {"role": "user", "content": "Ping"}, - {"role": "assistant", "content": "Pong"}, - ] - ) - - current_prompt, history = agent._extract_current_prompt_and_history(df, {}) - - assert current_prompt == "Ping" - assert len(history) == 1 - - def test_extracts_prompt_and_history_from_role_content_list(self): - agent = self._new_agent() - messages = [ - {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi"}, - {"role": "user", "content": "Next step?"}, - ] - - current_prompt, history = agent._extract_current_prompt_and_history(messages, {}) - - assert current_prompt == "Next step?" - assert len(history) == 2 - assert isinstance(history[0], ModelRequest) - assert isinstance(history[1], ModelResponse) - - def test_extracts_prompt_from_role_content_list_when_last_is_assistant(self): - agent = self._new_agent() - messages = [ - {"role": "user", "content": "Ping"}, - {"role": "assistant", "content": "Pong"}, - ] - - current_prompt, history = agent._extract_current_prompt_and_history(messages, {}) - - assert current_prompt == "Ping" - assert len(history) == 1 - assert isinstance(history[0], ModelRequest) - - def test_extracts_prompt_and_history_from_qa_list(self): - agent = self._new_agent() - messages = [ - {"question": "What time?", "answer": "Noon"}, - {"question": "Next?", "answer": ""}, - ] - - current_prompt, history = agent._extract_current_prompt_and_history(messages, {}) - - assert current_prompt == "Next?" - assert len(history) == 2 - assert isinstance(history[0], ModelRequest) - assert isinstance(history[1], ModelResponse) diff --git a/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py b/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py deleted file mode 100644 index 6543ef28f4a..00000000000 --- a/tests/unit/interfaces/knowledge_base/test_default_storage_resolution.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -from types import SimpleNamespace -from unittest.mock import MagicMock -from unittest.mock import patch - -from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseController -from mindsdb.interfaces.knowledge_base.default_storage_resolver import resolve_default_storage_engines -from mindsdb.utilities.config import config - - -def _make_controller(handler_meta_by_name): - integration_controller = MagicMock() - integration_controller.get_handler_meta.side_effect = lambda name: handler_meta_by_name.get(name) - integration_controller.get.return_value = None - - session = SimpleNamespace(integration_controller=integration_controller) - return KnowledgeBaseController(session), integration_controller - - -def test_resolve_default_vector_storage_uses_pgvector_from_config(): - previous_storage = config["knowledge_bases"].get("storage", None) - controller, _ = _make_controller({"pgvector": {"import": {"success": True}}}) - - try: - config.update({"knowledge_bases": {"storage": "pgvector"}}) - vector_db_name = "kb_pgvector_store" - controller._create_persistent_pgvector = MagicMock(return_value=vector_db_name) - - vector_db, vector_table = controller._resolve_default_vector_storage("kb_docs") - - assert vector_db == vector_db_name - assert vector_table == "kb_docs" - controller._create_persistent_pgvector.assert_called_once_with({}) - finally: - config.update({"knowledge_bases": {"storage": previous_storage}}) - - -def test_resolve_default_vector_storage_uses_faiss_from_config(): - previous_storage = config["knowledge_bases"].get("storage", None) - controller, _ = _make_controller({"duckdb_faiss": {"import": {"success": True}}}) - - try: - config.update({"knowledge_bases": {"storage": "faiss"}}) - - vector_db_name = "store_kb_docs" - controller._create_persistent_faiss = MagicMock(return_value=vector_db_name) - - vector_db, vector_table = controller._resolve_default_vector_storage("kb_docs") - - assert vector_db == vector_db_name - assert vector_table == "kb_docs" - controller._create_persistent_faiss.assert_called_once_with("kb_docs") - finally: - config.update({"knowledge_bases": {"storage": previous_storage}}) - - -def test_create_persistent_pgvector_reuses_existing_store(): - controller, integration_controller = _make_controller({}) - integration_controller.get.return_value = {"name": "kb_pgvector_store"} - - vector_store_name = controller._create_persistent_pgvector({"is_sparse": True, "vector_size": 30522}) - - assert vector_store_name == "kb_pgvector_store" - integration_controller.add.assert_not_called() - - -def test_resolver_uses_pgvector_url_fallback_when_storage_is_empty(): - previous_storage = config["knowledge_bases"].get("storage", None) - controller, _ = _make_controller({}) - - try: - config.update({"knowledge_bases": {"storage": None}}) - with patch.dict(os.environ, {"KB_PGVECTOR_URL": "postgresql://user:pass@host/db"}, clear=False): - resolved = resolve_default_storage_engines(config) - assert resolved["default_storage"] == "pgvector" - assert resolved["available_vector_engines"] == ["faiss", "pgvector"] - assert resolved["pgvector_enabled"] is True - finally: - config.update({"knowledge_bases": {"storage": previous_storage}}) diff --git a/tests/unit/interfaces/test_get_handler_meta.py b/tests/unit/interfaces/test_get_handler_meta.py deleted file mode 100644 index 70f01fd6712..00000000000 --- a/tests/unit/interfaces/test_get_handler_meta.py +++ /dev/null @@ -1,235 +0,0 @@ -""" -Unit tests for IntegrationController.get_handler_meta() focusing on the -handler_folder=None crash fix for community handler stubs. - -Covered scenarios: - 1. Community stub (path=None), no handler_folder passed → folder derived from stub metadata. - 2. Community stub (path=None), explicit handler_folder passed → explicit folder used as-is. - 3. Non-community (built-in) handler with path set → fetch path never triggered. - 4. Community stub whose "import.folder" is also None (malformed entry) → graceful None return. -""" - -import threading -import unittest -from pathlib import Path -from unittest.mock import MagicMock, patch - - -def _make_controller(): - """ - Return an IntegrationController instance with _load_handler_modules skipped - so no real filesystem / network access happens during construction. - """ - from mindsdb.interfaces.database.integrations import IntegrationController - - with patch.object(IntegrationController, "_load_handler_modules"): - ctrl = IntegrationController() - - # Minimal attributes that other methods rely on. - ctrl.handler_modules = {} - ctrl.handlers_import_status = {} - ctrl.handlers_cache = MagicMock() - ctrl._import_lock = threading.Lock() - ctrl._community_handlers_dir = None - return ctrl - - -def _community_stub(handler_name: str, folder: str | None = None): - """Build a community handler stub as created by _load_handler_modules.""" - from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL - - return { - "path": None, - "import": { - "success": None, - "error_message": None, - "folder": folder if folder is not None else f"{handler_name}_handler", - "dependencies": [], - }, - "name": handler_name, - "title": handler_name.capitalize(), - "description": "", - "permanent": False, - "connection_args": None, - "class_type": None, - "type": None, - "support_level": HANDLER_SUPPORT_LEVEL.COMMUNITY, - } - - -def _builtin_stub(handler_name: str, handler_path: Path): - """Build a built-in handler stub as created by _register_handler_dir.""" - return { - "path": handler_path, - "import": { - "success": True, - "error_message": None, - "folder": handler_path.name, - "dependencies": [], - }, - "name": handler_name, - "permanent": False, - "connection_args": None, - "class_type": None, - "type": None, - "support_level": None, - "community": False, - } - - -class TestGetHandlerMetaCommunityFolderFallback(unittest.TestCase): - """get_handler_meta() derives handler_folder from stub metadata when None.""" - - def setUp(self): - self.ctrl = _make_controller() - - def test_community_stub_folder_derived_from_metadata(self): - """ - When handler_folder is not supplied, get_handler_meta() must read - "import.folder" from the stub and pass it to _fetch_community_handler. - """ - stub = _community_stub("github", folder="github_handler") - self.ctrl.handlers_import_status["github"] = stub - - fetched_meta = {**stub, "path": Path("/tmp/github_handler")} - fetched_meta["import"] = {**stub["import"], "success": True} - - with patch.object(self.ctrl, "_fetch_community_handler", return_value=fetched_meta) as mock_fetch: - result = self.ctrl.get_handler_meta("github") # no handler_folder - - mock_fetch.assert_called_once_with("github", "github_handler") - self.assertIsNotNone(result) - - def test_community_stub_explicit_folder_not_overridden(self): - """ - When handler_folder is explicitly provided, it must be forwarded as-is - and the stub metadata must not override it. - """ - stub = _community_stub("github", folder="github_handler") - self.ctrl.handlers_import_status["github"] = stub - - fetched_meta = {**stub, "path": Path("/tmp/custom_dir")} - fetched_meta["import"] = {**stub["import"], "success": True} - - with patch.object(self.ctrl, "_fetch_community_handler", return_value=fetched_meta) as mock_fetch: - result = self.ctrl.get_handler_meta("github", handler_folder="custom_dir") - - mock_fetch.assert_called_once_with("github", "custom_dir") - self.assertIsNotNone(result) - - def test_builtin_handler_fetch_path_not_triggered(self): - """ - A built-in handler with a real path must not trigger the community fetch - path regardless of the handler_folder argument. - """ - stub = _builtin_stub("mysql", Path("/opt/mindsdb/handlers/mysql_handler")) - self.ctrl.handlers_import_status["mysql"] = stub - - with ( - patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch, - patch.object(self.ctrl, "import_handler", return_value=stub), - ): - result = self.ctrl.get_handler_meta("mysql") - - mock_fetch.assert_not_called() - self.assertIsNotNone(result) - - def test_community_stub_missing_folder_returns_none_gracefully(self): - """ - If the stub's "import.folder" is also None (malformed index entry), - the guard in get_handler_meta() must return None immediately — before - _fetch_community_handler is ever called — to avoid a TypeError from - fetch_handler(None, storage_dir). - """ - stub = _community_stub("broken") - stub["import"]["folder"] = None # simulate malformed entry - self.ctrl.handlers_import_status["broken"] = stub - - with patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch: - result = self.ctrl.get_handler_meta("broken") # no handler_folder - - mock_fetch.assert_not_called() # guard exits before reaching _fetch_community_handler - self.assertIsNone(result) - - def test_unknown_handler_returns_none(self): - """get_handler_meta() for a completely unknown handler name returns None.""" - result = self.ctrl.get_handler_meta("does_not_exist") - self.assertIsNone(result) - - -class TestGetHandlersImportStatus(unittest.TestCase): - """get_handlers_import_status() must not fetch/import community stubs.""" - - def setUp(self): - self.ctrl = _make_controller() - - def test_community_stub_not_fetched_during_listing(self): - """ - Community stubs (support_level="community", path=None) must not trigger - _fetch_community_handler() or import_handler() during listing. - """ - stub = _community_stub("github", folder="github_handler") - self.ctrl.handlers_import_status["github"] = stub - - with ( - patch.object(self.ctrl, "_fetch_community_handler") as mock_fetch, - patch.object(self.ctrl, "import_handler") as mock_import, - ): - self.ctrl.get_handlers_import_status() - - mock_fetch.assert_not_called() - mock_import.assert_not_called() - - def test_community_stub_metadata_returned_in_listing(self): - """ - Stub metadata must be present in the result so the UI can render the - handler entry without a fetch having occurred. - """ - stub = _community_stub("github", folder="github_handler") - self.ctrl.handlers_import_status["github"] = stub - - with ( - patch.object(self.ctrl, "_fetch_community_handler"), - patch.object(self.ctrl, "import_handler"), - ): - result = self.ctrl.get_handlers_import_status() - - self.assertIn("github", result) - meta = result["github"] - self.assertEqual(meta["name"], "github") - self.assertEqual(meta["support_level"], "community") - self.assertIsNone(meta["path"]) - self.assertIsNotNone(meta["import"]) - - def test_non_community_handler_uses_get_handler_meta(self): - """ - Built-in handlers (path != None) must still go through get_handler_meta() - so that lazy import is triggered if needed. - """ - stub = _builtin_stub("mysql", Path("/opt/mindsdb/handlers/mysql_handler")) - self.ctrl.handlers_import_status["mysql"] = stub - - with patch.object(self.ctrl, "get_handler_meta", return_value=stub) as mock_meta: - self.ctrl.get_handlers_import_status() - - mock_meta.assert_called_once_with("mysql", stub["import"]["folder"]) - - def test_fetched_community_handler_uses_get_handler_meta(self): - """ - A community handler that has already been fetched (path != None) must - also go through get_handler_meta() — the early-return guard only applies - when path is None. - """ - stub = _community_stub("github", folder="github_handler") - stub["path"] = Path("/tmp/community_handlers/github_handler") - stub["import"]["success"] = True - self.ctrl.handlers_import_status["github"] = stub - - with patch.object(self.ctrl, "get_handler_meta", return_value=stub) as mock_meta: - self.ctrl.get_handlers_import_status() - - mock_meta.assert_called_once_with("github", "github_handler") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/ml_handlers/__init__.py b/tests/unit/ml_handlers/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/ml_handlers/base_ml_test.py b/tests/unit/ml_handlers/base_ml_test.py deleted file mode 100644 index ed18e80f268..00000000000 --- a/tests/unit/ml_handlers/base_ml_test.py +++ /dev/null @@ -1,13 +0,0 @@ -import os - -from ..executor_test_base import BaseExecutorTest - - -class BaseMLAPITest(BaseExecutorTest): - """ - Base test class for API-based ML engines - """ - @staticmethod - def get_api_key(env_var: str): - """Retrieve API key from environment variables""" - return os.environ.get(env_var) diff --git a/tests/unit/ml_handlers/test_openai.py b/tests/unit/ml_handlers/test_openai.py deleted file mode 100644 index eeadf0a5b1c..00000000000 --- a/tests/unit/ml_handlers/test_openai.py +++ /dev/null @@ -1,634 +0,0 @@ -import pandas -import unittest -from collections import OrderedDict -from unittest.mock import patch, MagicMock - -from mindsdb.integrations.handlers.openai_handler.openai_handler import OpenAIHandler - - -class TestOpenAI(unittest.TestCase): - """ - Unit tests for the OpenAI handler. - """ - - dummy_connection_data = OrderedDict( - openai_api_key="dummy_api_key", - ) - - def setUp(self): - # Mock model storage and engine storage - mock_engine_storage = MagicMock() - mock_model_storage = MagicMock() - - # Define a return value for the `get_connection_args` method of the mock engine storage - mock_engine_storage.get_connection_args.return_value = self.dummy_connection_data - - # Assign mock engine storage to instance variable for create validation tests - self.mock_engine_storage = mock_engine_storage - - self.handler = OpenAIHandler( - mock_model_storage, mock_engine_storage, connection_data={"connection_data": self.dummy_connection_data} - ) - - def test_create_validation_without_using_clause_raises_exception(self): - """ - Test if model creation raises an exception without a USING clause. - """ - - with self.assertRaisesRegex( - Exception, "OpenAI engine requires a USING clause! Refer to its documentation for more details." - ): - self.handler.create_validation("target", args={}, handler_storage=None) - - def test_create_validation_without_required_parameters_raises_exception(self): - """ - Test if model creation raises an exception without required parameters. - """ - - with self.assertRaisesRegex( - Exception, "One of `question_column`, `prompt_template` or `prompt` is required for this engine." - ): - self.handler.create_validation("target", args={"using": {}}, handler_storage=self.mock_engine_storage) - - def test_create_validation_with_invalid_parameter_combinations_raises_exception(self): - """ - Test if model creation raises an exception with invalid parameter combinations. - """ - - with self.assertRaisesRegex(Exception, "^Please provide one of"): - self.handler.create_validation( - "target", - args={"using": {"prompt_template": "dummy_prompt_template", "question_column": "question"}}, - handler_storage=self.mock_engine_storage, - ) - - def test_create_validation_with_unknown_arguments_raises_exception(self): - """ - Test if model creation raises an exception with unknown arguments. - """ - - with self.assertRaisesRegex(Exception, "^Unknown arguments:"): - self.handler.create_validation( - "target", - args={"using": {"prompt_template": "dummy_prompt_template", "unknown_arg": "unknown_arg"}}, - handler_storage=self.mock_engine_storage, - ) - - def test_create_validation_with_invalid_api_key_raises_exception(self): - """ - Test if model creation raises an exception with an invalid API key. - """ - - with self.assertRaisesRegex(Exception, "Invalid api key"): - self.handler.create_validation( - "target", - args={"using": {"prompt_template": "dummy_prompt_template"}}, - handler_storage=self.mock_engine_storage, - ) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_create_validation_with_valid_arguments_runs_no_errors(self, mock_openai): - """ - Test if model creation is validated correctly with valid arguments. - """ - - # Mock the models.retrieve method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.models.retrieve.return_value = MagicMock() - - mock_openai.return_value = mock_openai_client - - self.handler.create_validation( - "target", - args={"using": {"prompt_template": "dummy_prompt_template"}}, - handler_storage=self.mock_engine_storage, - ) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_create_with_invalid_mode_raises_exception(self, mock_openai_handler_openai_client): - """ - Test if model creation raises an exception with an invalid mode. - """ - - # Mock the models.list method of the OpenAI client - mock_models_list = MagicMock() - mock_models_list.data = [MagicMock(id="dummy_model_name")] - - mock_openai_handler_openai_client.return_value.models.list.return_value = mock_models_list - - with self.assertRaisesRegex(Exception, "^Invalid operation mode."): - self.handler.create( - "dummy_target", args={"using": {"prompt_template": "dummy_prompt_template", "mode": "dummy_mode"}} - ) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_create_with_unsupported_model_raises_exception(self, mock_openai_handler_openai_client): - """ - Test if model creation raises an exception with an invalid model name. - """ - - # Mock the models.list method of the OpenAI client - mock_models_list = MagicMock() - mock_models_list.data = [MagicMock(id="dummy_model_name")] - - mock_openai_handler_openai_client.return_value.models.list.return_value = mock_models_list - - with self.assertRaisesRegex(Exception, "^Invalid model name."): - self.handler.create( - "dummy_target", - args={ - "using": {"model_name": "dummy_unsupported_model_name", "prompt_template": "dummy_prompt_template"} - }, - ) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_create_with_valid_arguments_runs_no_errors(self, mock_openai_handler_openai_client): - """ - Test if model creation runs without errors with valid arguments. - """ - - # Mock the models.list method of the OpenAI client - mock_models_list = MagicMock() - mock_models_list.data = [MagicMock(id="dummy_model_name")] - - mock_openai_handler_openai_client.return_value.models.list.return_value = mock_models_list - - self.handler.create("dummy_target", args={"using": {"prompt_template": "dummy_prompt_template"}}) - - def test_predict_with_invalid_mode_raises_exception(self): - """ - Test if model prediction raises an exception with an invalid mode. - """ - - # Create a dummy DataFrame - df = pandas.DataFrame() - - with self.assertRaisesRegex(Exception, "^Invalid operation mode."): - self.handler.predict(df=df, args={"predict_params": {"mode": "dummy_mode"}}) - - def test_predict_in_embedding_mode_without_question_column_raises_exception(self): - """ - Test if model prediction raises an exception in embedding mode without a question column. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "mode": "embedding", - } - - # Create a dummy DataFrame - df = pandas.DataFrame() - - with self.assertRaisesRegex(Exception, "Embedding mode needs a question_column"): - self.handler.predict(df=df, args={"predict_params": {"mode": "embedding"}}) - - def test_predict_in_image_mode_without_question_column_or_prompt_template_raises_exception(self): - """ - Test if model prediction raises an exception in image mode without a question column or prompt template. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "mode": "image", - } - - # Create a dummy DataFrame - df = pandas.DataFrame() - - with self.assertRaisesRegex(Exception, "Image mode needs either `prompt_template` or `question_column`."): - self.handler.predict(df=df, args={"predict_params": {"mode": "image"}}) - - def test_predict_in_default_mode_without_question_column_in_data_raises_exception(self): - """ - Test if model prediction raises an exception in default mode without a question column in the DataFrame. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = {"mode": "default", "question_column": "question"} - - # Create a dummy DataFrame - df = pandas.DataFrame() - - with self.assertRaisesRegex(Exception, "This model expects a question to answer in the 'question' column."): - self.handler.predict(df=df, args={"predict_params": {"mode": "default"}}) - - def test_predict_in_default_mode_without_context_column_in_data_raises_exception(self): - """ - Test if model prediction raises an exception in default mode without a context column in the DataFrame. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "mode": "default", - "question_column": "question", - "context_column": "context", - } - - # Create a dummy DataFrame - df = pandas.DataFrame(columns=["question"]) - - with self.assertRaisesRegex(Exception, "This model expects context in the 'context' column."): - self.handler.predict(df=df, args={"predict_params": {"mode": "default"}}) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_default_mode_with_question_column_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result in default mode using a question column. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "target": "answer", - "mode": "default", - "model_name": "gpt-3.5-turbo", - "question_column": "question", - } - - # Mock the chat.completions.create method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.chat.completions.create.return_value = MagicMock( - choices=[MagicMock(message=MagicMock(content="Sweden"))] - ) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"question": ["Where is Stockholm located?"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("answer" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"answer": ["Sweden"]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_default_mode_with_prompt_template_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result in default mode using a prompt template. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "target": "answer", - "mode": "default", - "model_name": "gpt-3.5-turbo", - "prompt_template": "Answer the question: {{question}}", - } - - # Mock the chat.completions.create method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.chat.completions.create.return_value = MagicMock( - choices=[MagicMock(message=MagicMock(content="Sweden"))] - ) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"question": ["Where is Stockholm located?"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("answer" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"answer": ["Sweden"]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_default_mode_with_question_column_and_completion_model_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result in default mode using a question column and a completion model. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "target": "answer", - "mode": "legacy", - "model_name": "babbage-002", - "question_column": "question", - } - - # Mock the completions.create method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.completions.create.return_value = MagicMock(choices=[MagicMock(text="Sweden")]) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"question": ["Where is Stockholm located?"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("answer" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"answer": ["Sweden"]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_default_mode_with_prompt_template_and_completion_model_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result in default mode using a prompt template and a completion model. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "target": "answer", - "mode": "default", - "model_name": "babbage-002", - "prompt_template": "Answer the question: {{question}}", - } - - # Mock the completions.create method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.chat.completions.create.return_value = MagicMock( - choices=[MagicMock(message=MagicMock(content="Sweden"))] - ) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"question": ["Where is Stockholm located?"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("answer" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"answer": ["Sweden"]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_embedding_mode_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result for an embeddings task. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "model_name": "dummy_model_name", - "question_column": "text", - "target": "embeddings", - "mode": "embedding", - } - - # Mock the embeddings.completions.create method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.embeddings.create.return_value = MagicMock(data=[MagicMock(embedding=[0, 1])]) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"text": ["MindsDB"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("embeddings" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"embeddings": [[0, 1]]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_image_mode_with_question_column_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result for an image task using a question column. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "question_column": "text", - "target": "image", - "mode": "image", - "model_name": "dall-e-2", - } - - # Mock the images.generate method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.images.generate.return_value = MagicMock(data=[MagicMock(url="dummy_image_url")]) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"text": ["Show me an image of two leapord cubs playing?"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("image" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"image": ["dummy_image_url"]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_image_mode_with_prompt_template_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result for an image task using a prompt template. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "prompt_template": "Generate an image of {{text}}", - "target": "image", - "mode": "image", - "model_name": "dall-e-2", - } - - # Mock the images.generate method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.images.generate.return_value = MagicMock(data=[MagicMock(url="dummy_image_url")]) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"text": ["Leopard cubs playing"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("image" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"image": ["dummy_image_url"]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_conversational_mode_with_using_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result for a conversational task. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "user_column": "question", - "prompt": "you are a helpful assistant", - "assistant_column": "answer", - "target": "answer", - "mode": "conversational", - } - - # Mock the chat.completions.create method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.chat.completions.create.return_value = MagicMock( - choices=[MagicMock(message=MagicMock(content="Gamla Stan"))] - ) - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame( - {"question": ["What is the capital of Sweden?", "What are some cool places to visit there?"]} - ) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("answer" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"answer": ["", "Gamla Stan"]})) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_predict_in_conversational_full_mode_using_valid_arguments_and_data_runs_no_errors( - self, mock_openai_handler_openai_client - ): - """ - Test if model prediction returns the expected result for a conversational-full task. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "user_column": "text", - "prompt": "you are a helpful assistant", - "assistant_column": "answer", - "target": "answer", - "mode": "conversational-full", - } - - # Mock the chat.completions.create method of the OpenAI client - mock_openai_client = MagicMock() - mock_openai_client.chat.completions.create.side_effect = [ - MagicMock(choices=[MagicMock(message=MagicMock(content="Stockholm"))]), - MagicMock(choices=[MagicMock(message=MagicMock(content="Gamla Stan"))]), - ] - - mock_openai_handler_openai_client.return_value = mock_openai_client - - df = pandas.DataFrame({"text": ["What is the capital of Sweden?", "What are some cool places to visit there?"]}) - result = self.handler.predict(df, args={}) - - self.assertIsInstance(result, pandas.DataFrame) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"answer": ["Stockholm", "Gamla Stan"]})) - - def test_describe_runs_no_errors(self): - """ - Test if model describe returns the expected result. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "user_column": "text", - "prompt": "you are a helpful assistant", - "assistant_column": "answer", - "target": "answer", - "mode": "conversational", - } - - result = self.handler.describe() - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("tables" in result.columns) - - pandas.testing.assert_frame_equal(result, pandas.DataFrame({"tables": ["args", "metadata"]})) - - def test_describe_args_runs_no_errors(self): - """ - Test if model describe returns the expected result. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "user_column": "text", - "prompt": "you are a helpful assistant", - "assistant_column": "answer", - "target": "answer", - "mode": "conversational", - } - - result = self.handler.describe("args") - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("key" in result.columns) - self.assertTrue("value" in result.columns) - - pandas.testing.assert_frame_equal( - result, - pandas.DataFrame( - { - "key": ["user_column", "prompt", "assistant_column", "target", "mode"], - "value": ["text", "you are a helpful assistant", "answer", "answer", "conversational"], - } - ), - ) - - @patch("mindsdb.integrations.handlers.openai_handler.openai_handler.OpenAI") - def test_describe_metadata_runs_no_errors(self, mock_openai_handler_openai_client): - """ - Test if model describe returns the expected result. - """ - - # Mock the json_get method of the model storage - self.handler.model_storage.json_get.return_value = { - "user_column": "text", - "prompt": "you are a helpful assistant", - "assistant_column": "answer", - "target": "answer", - "mode": "conversational", - } - - # Mock the models.retrieve method of the OpenAI client: return a dict directly because the result is converted to a dict later - mock_openai_client = MagicMock() - mock_openai_client.models.retrieve.return_value = { - "model": "dummy_model_name", - "id": "dummy_model_id", - "created_at": "dummy_created_at", - "owner": "dummy_owner", - } - - mock_openai_handler_openai_client.return_value = mock_openai_client - - result = self.handler.describe("metadata") - - self.assertIsInstance(result, pandas.DataFrame) - self.assertTrue("key" in result.columns) - self.assertTrue("value" in result.columns) - - pandas.testing.assert_frame_equal( - result, - pandas.DataFrame( - { - "key": ["model", "id", "created_at", "owner"], - "value": ["dummy_model_name", "dummy_model_id", "dummy_created_at", "dummy_owner"], - } - ), - ) - - def test_finetune_with_unsupported_model_raises_exception(self): - """ - Test if model fine-tuning raises an exception with an unsupported model. - """ - - # Create a mock base model storage and assign it to the handler - mock_base_model_storage = MagicMock() - self.handler.base_model_storage = mock_base_model_storage - - # Mock the json_get method of the base model storage - self.handler.base_model_storage.json_get.return_value = {"model_name": "dummy_model_name"} - - with self.assertRaisesRegex(Exception, "^This model cannot be finetuned."): - self.handler.finetune( - "dummy_target", - args={ - "using": {"model_name": "dummy_unsupported_model_name", "prompt_template": "dummy_prompt_template"} - }, - ) - - # TODO: Add more unit tests for the finetune method - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/planner/test_column_pruning.py b/tests/unit/planner/test_column_pruning.py deleted file mode 100644 index e9f51391404..00000000000 --- a/tests/unit/planner/test_column_pruning.py +++ /dev/null @@ -1,394 +0,0 @@ -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Identifier, Select - -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.steps import SubSelectStep - - -class TestColumnPruning: - """Test column pruning optimization.""" - - def test_basic_column_pruning(self): - """Test that only needed columns are fetched from tables.""" - query = parse_sql(""" - SELECT t1.id, t2.name - FROM int1.table1 t1 - JOIN int2.table2 t2 ON t1.id = t2.id - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - # First table should only fetch 'id' column - step0_query = str(plan.steps[0].query) - assert "`id`" in step0_query or "id" in step0_query - assert "SELECT *" not in step0_query - - # Second table should fetch 'id' (for join) and 'name' (for SELECT) - step1_query = str(plan.steps[1].query) - assert "id" in step1_query - assert "name" in step1_query - assert "SELECT *" not in step1_query - - def test_qualified_star_disables_pruning_for_that_table(self): - """Test that t1.* fetches all columns from t1 but t2 is still pruned.""" - query = parse_sql(""" - SELECT t1.*, t2.id - FROM int1.table1 t1 - JOIN int2.table2 t2 ON t1.id = t2.id - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - # First table should fetch all (qualified star) - step0_query = str(plan.steps[0].query) - assert "SELECT *" in step0_query - - # Second table should only fetch 'id' - step1_query = str(plan.steps[1].query) - assert "id" in step1_query - assert "SELECT *" not in step1_query - - def test_bare_star_disables_all_pruning(self): - """Test that SELECT * fetches all columns from all tables.""" - query = parse_sql(""" - SELECT * - FROM int1.table1 t1 - JOIN int2.table2 t2 ON t1.id = t2.id - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - # Both tables should fetch all columns - assert "SELECT * FROM table1" in str(plan.steps[0].query) - assert "SELECT * FROM table2" in str(plan.steps[1].query) - - def test_columns_from_where_clause_included(self): - """Test that columns used in WHERE clause are included.""" - query = parse_sql(""" - SELECT t1.id - FROM int1.table1 t1 - JOIN int2.table2 t2 ON t1.id = t2.id - WHERE t1.status = 'active' AND t2.type = 'premium' - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - # First table needs: id (SELECT + JOIN) + status (WHERE) - step0_query = str(plan.steps[0].query) - assert "id" in step0_query - assert "status" in step0_query - - # Second table needs: id (JOIN) + type (WHERE) - step1_query = str(plan.steps[1].query) - assert "id" in step1_query - assert "type" in step1_query - - def test_columns_from_join_conditions_included(self): - """Test that columns used in JOIN ON conditions are included.""" - query = parse_sql(""" - SELECT t1.name - FROM int1.table1 t1 - JOIN int2.table2 t2 ON t1.customer_id = t2.id AND t1.region = t2.region - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - # First table needs: name (SELECT) + customer_id, region (JOIN) - step0_query = str(plan.steps[0].query) - assert "name" in step0_query - assert "customer_id" in step0_query - assert "region" in step0_query - - # Second table needs: id, region (JOIN) - step1_query = str(plan.steps[1].query) - assert "id" in step1_query - assert "region" in step1_query - - def test_order_by_ordinal_resolution(self): - """Test that ORDER BY 1, 2 resolves to actual columns.""" - query = parse_sql(""" - SELECT t1.name, t1.created_at - FROM int1.table1 t1 - ORDER BY 1, 2 - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch both columns - query_str = str(plan.steps[0].query) - assert "name" in query_str - assert "created_at" in query_str - - def test_order_by_alias_resolution(self): - """Test that ORDER BY alias_name resolves to actual column.""" - query = parse_sql(""" - SELECT t1.customer_name AS cname, t1.id - FROM int1.table1 t1 - ORDER BY cname - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch customer_name (aliased as cname) - query_str = str(plan.steps[0].query) - assert "customer_name" in query_str - assert "id" in query_str - - def test_group_by_columns_included(self): - """Test that columns in GROUP BY are included.""" - query = parse_sql(""" - SELECT t1.category, COUNT(*) as cnt - FROM int1.table1 t1 - GROUP BY t1.category - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch category for grouping - query_str = str(plan.steps[0].query) - assert "category" in query_str - - def test_having_columns_included(self): - """Test that columns in HAVING clause are included.""" - query = parse_sql(""" - SELECT t1.category, COUNT(*) as cnt - FROM int1.table1 t1 - GROUP BY t1.category - HAVING t1.total > 100 - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch category (GROUP BY) + total (HAVING) - query_str = str(plan.steps[0].query) - assert "category" in query_str - assert "total" in query_str - - def test_case_sensitive_columns_preserved(self): - """Test that quoted identifiers (case-sensitive) preserve quoting.""" - # Build query with quoted identifier manually - query = Select( - targets=[Identifier(parts=["MyColumn"]), Identifier(parts=["regular_col"])], - from_table=Identifier("int.table1"), - ) - # Set is_quoted after creation - query.targets[0].is_quoted = [True] - - plan = plan_query(query, integrations=["int"]) - - # Should have both columns (quoting preservation is verified by is_quoted attribute) - query_str = str(plan.steps[0].query) - # At minimum, both columns should be present - assert "MyColumn" in query_str - assert "regular_col" in query_str - - def test_window_function_columns_included(self): - """Test that columns in window functions (PARTITION BY, ORDER BY) are included.""" - query = parse_sql(""" - SELECT - t1.id, - ROW_NUMBER() OVER (PARTITION BY t1.category ORDER BY t1.created_at) as row_num - FROM int1.table1 t1 - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch: id (SELECT) + category (PARTITION BY) + created_at (ORDER BY in OVER) - query_str = str(plan.steps[0].query) - assert "id" in query_str - assert "category" in query_str - assert "created_at" in query_str - - def test_subselect_pruning(self): - """Test that subselects with pure SELECT * get column pruning applied.""" - query = parse_sql(""" - SELECT sub.id - FROM (SELECT * FROM int1.table1) AS sub - JOIN int2.table2 t2 ON sub.id = t2.id - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - # Subselects with pure SELECT * should be pruned to only needed columns - found_pruned_subselect = False - for step in plan.steps: - # Look for SubSelectStep with id column (not SELECT *) - if isinstance(step, SubSelectStep): - query_str = str(step.query) - if "id" in query_str and "SELECT *" not in query_str: - found_pruned_subselect = True - break - - assert found_pruned_subselect, ( - f"Subselect with pure SELECT * should be pruned. Steps: {[str(s) for s in plan.steps]}" - ) - - def test_three_table_join_pruning(self): - """Test column pruning with 3-table join.""" - query = parse_sql(""" - SELECT t1.id, t2.name, t3.amount - FROM int1.table1 t1 - JOIN int2.table2 t2 ON t1.id = t2.customer_id - JOIN int3.table3 t3 ON t2.id = t3.order_id - """) - - plan = plan_query(query, integrations=["int1", "int2", "int3"]) - - # Find FetchDataframeSteps (not JoinSteps) - fetch_steps = [s for s in plan.steps if "FetchDataframe" in str(type(s))] - assert len(fetch_steps) == 3, f"Expected 3 fetch steps, got {len(fetch_steps)}" - - # Table1: id (SELECT + JOIN) - should NOT have SELECT * - step0_query = str(fetch_steps[0].query) - assert "id" in step0_query - assert "SELECT *" not in step0_query - - # Table2: customer_id (JOIN to t1), id (JOIN to t3), name (SELECT) - step1_query = str(fetch_steps[1].query) - assert "customer_id" in step1_query - assert "id" in step1_query - assert "name" in step1_query - assert "SELECT *" not in step1_query - - # Table3: order_id (JOIN), amount (SELECT) - step2_query = str(fetch_steps[2].query) - assert "order_id" in step2_query - assert "amount" in step2_query - assert "SELECT *" not in step2_query - - def test_no_pruning_when_no_columns_detected(self): - """Test fallback to SELECT * when column detection fails.""" - # Note: COUNT(*) is actually pushed down to the source in this implementation - # which is different behavior than falling back to SELECT * - # This is actually CORRECT behavior - the source database can compute COUNT(*) - query = parse_sql(""" - SELECT COUNT(*) - FROM int1.table1 t1 - """) - - plan = plan_query(query, integrations=["int1"]) - - # The query is sent to the source as-is (single table, no joins) - query_str = str(plan.steps[0].query) - # This query goes directly to integration (not pruned, sent as is) - assert "count(*)" in query_str.lower() or "COUNT(*)" in query_str - - def test_complex_expression_columns_included(self): - """Test that columns in complex expressions are included.""" - query = parse_sql(""" - SELECT t1.price * t1.quantity AS total - FROM int1.table1 t1 - WHERE t1.discount > 0 - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch: price, quantity (expression), discount (WHERE) - query_str = str(plan.steps[0].query) - assert "price" in query_str - assert "quantity" in query_str - assert "discount" in query_str - - -class TestColumnPruningEdgeCases: - """Test edge cases and error conditions.""" - - def test_duplicate_column_references(self): - """Test that duplicate column references don't break pruning.""" - query = parse_sql(""" - SELECT t1.id, t1.id, t1.id - FROM int1.table1 t1 - """) - - plan = plan_query(query, integrations=["int1"]) - - # Single table query gets sent as-is (no join optimization) - # The query goes directly to the integration - query_str = str(plan.steps[0].query) - assert "id" in query_str - - def test_mixed_qualified_and_unqualified_columns(self): - """Test mixing qualified (t1.col) and unqualified (col) column references.""" - query = parse_sql(""" - SELECT t1.id, name - FROM int1.table1 t1 - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch both columns - query_str = str(plan.steps[0].query) - assert "id" in query_str - assert "name" in query_str - - def test_self_join_pruning(self): - """Test column pruning in self-joins.""" - query = parse_sql(""" - SELECT t1.id, t2.name - FROM int1.table1 t1 - JOIN int1.table1 t2 ON t1.parent_id = t2.id - """) - - plan = plan_query(query, integrations=["int1"]) - - # Self-join from same integration is optimized - sent as single query to source! - fetch_steps = [s for s in plan.steps if "FetchDataframe" in str(type(s))] - - if len(fetch_steps) == 1: - # Optimized: entire join sent to source database - query_str = str(fetch_steps[0].query) - assert "JOIN" in query_str - assert "id" in query_str - assert "name" in query_str - assert "parent_id" in query_str - else: - # Not optimized: separate fetches with column pruning - assert len(fetch_steps) >= 2 - step0_query = str(fetch_steps[0].query) - assert "id" in step0_query - assert "parent_id" in step0_query - assert "SELECT *" not in step0_query - - step1_query = str(fetch_steps[1].query) - assert "id" in step1_query - assert "name" in step1_query - assert "SELECT *" not in step1_query - - def test_column_in_multiple_clauses(self): - """Test that column used in multiple clauses is included once.""" - query = parse_sql(""" - SELECT t1.status - FROM int1.table1 t1 - WHERE t1.status = 'active' - ORDER BY t1.status - """) - - plan = plan_query(query, integrations=["int1"]) - - # Should fetch 'status' (used in SELECT, WHERE, ORDER BY) - query_str = str(plan.steps[0].query) - assert query_str.count("status") >= 1 - - def test_subselect_with_mixed_star(self): - """Test that SubSelectStep passes through all columns (no pruning at SubSelect level).""" - query = parse_sql(""" - SELECT sub.id - FROM (SELECT *, 'dummy' as extra FROM int1.table1) AS sub - JOIN int2.table2 t2 ON sub.id = t2.id - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - # SubSelectStep should use SELECT * to pass through all columns - # Column pruning happens at the table fetch level, not at SubSelectStep - found_subselect_with_star = False - for step in plan.steps: - if isinstance(step, SubSelectStep): - query_str = str(step.query) - if "SELECT *" in query_str: - found_subselect_with_star = True - break - - assert found_subselect_with_star, ( - f"Expected SubSelectStep to use SELECT * (no pruning). Steps: {[str(s) for s in plan.steps]}" - ) diff --git a/tests/unit/planner/test_injected_data.py b/tests/unit/planner/test_injected_data.py deleted file mode 100644 index 9b3c2fd8377..00000000000 --- a/tests/unit/planner/test_injected_data.py +++ /dev/null @@ -1,90 +0,0 @@ -import copy - -from mindsdb_sql_parser.ast import Identifier, Select, Join, Constant, Data, BinaryOperation, Star -from mindsdb_sql_parser.utils import JoinType - -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import DataStep, JoinStep, ApplyPredictorStep, SubSelectStep, QueryStep - - -class TestInjectedData: - def test_select_from_table(self): - content = [ - {"a": 1}, - {"a": 2}, - ] - - query = Select( - targets=[Identifier("int1.t")], - from_table=Data(content), - where=BinaryOperation(op="=", args=[Identifier("a"), Constant(1)]), - ) - - plan = plan_query(query, integrations=["int1"], default_namespace="mindsdb", predictor_metadata=[]) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - DataStep(data=content), - SubSelectStep( - query=Select( - targets=[Identifier("int1.t")], - where=BinaryOperation(op="=", args=[Identifier("a"), Constant(1)]), - ), - dataframe=Result(0), - table_name=None, - add_absent_cols=True, - ), - ], - ) - - assert plan.steps == expected_plan.steps - - def test_join(self): - content = [ - {"a": 1}, - {"a": 2}, - ] - - query = Select( - targets=[Identifier("t.x")], - from_table=Join(left=Data(content, alias=Identifier("t")), right=Identifier("pred"), join_type="JOIN"), - where=BinaryOperation(op="=", args=[Identifier("t.a"), Constant(1)]), - ) - - subquery = copy.deepcopy(query) - subquery.from_table = None - - plan = plan_query( - query, - integrations=["int1"], - default_namespace="mindsdb", - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - DataStep(data=content), - SubSelectStep( - query=Select( - targets=[Star()], # No column pruning with predictor joins - where=BinaryOperation(op="=", args=[Identifier("a"), Constant(1)]), - ), - dataframe=Result(0), - table_name="t", - add_absent_cols=True, - ), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(1), predictor=Identifier("pred")), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(subquery, from_table=Result(3), strict_where=False), - ], - ) - - assert plan.steps == expected_plan.steps diff --git a/tests/unit/planner/test_insert_from_select.py b/tests/unit/planner/test_insert_from_select.py deleted file mode 100644 index 9648667fe87..00000000000 --- a/tests/unit/planner/test_insert_from_select.py +++ /dev/null @@ -1,272 +0,0 @@ -from mindsdb_sql_parser import parse_sql, Join -from mindsdb_sql_parser.ast import ( - Identifier, - Insert, - Select, - Constant, - Star, - BinaryOperation, - Function, -) -import pandas as pd - -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import ( - FetchDataframeStep, - InsertToTable, - QueryStep, - FetchDataframeStepPartition, - JoinStep, - ApplyPredictorStep, -) -from mindsdb_sql_parser.utils import JoinType - - -class TestPlanInsertFromSelect: - def test_insert_from_select_with_table_plan(self): - query = Insert( - table=Identifier("INT_1.table_1"), - columns=None, - from_select=Select( - targets=[Star()], - from_table=Identifier("INT_2.table_2"), - where=None, - ), - ) - plan = plan_query(query, integrations=["INT_1", "INT_2"]) - - step_1 = FetchDataframeStep( - integration="INT_2", - query=Select( - targets=[Star()], - from_table=Identifier("table_2"), - where=None, - ), - step_num=0, - ) - expected_plan = QueryPlan( - steps=[step_1, InsertToTable(table=Identifier("INT_1.table_1"), step_num=1, dataframe=Result(0))] - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_insert_from_select_with_table_and_columns_plan(self): - query = Insert( - table=Identifier("INT_1.table_1"), - from_select=Select( - targets=[Identifier("column_1"), Identifier("column_2")], - from_table=Identifier("INT_2.table_2"), - where=None, - ), - ) - plan = plan_query(query, integrations=["INT_1", "INT_2"]) - - step_1 = FetchDataframeStep( - integration="INT_2", - query=Select( - targets=[ - Identifier("column_1", alias=Identifier("column_1")), - Identifier("column_2", alias=Identifier("column_2")), - ], - from_table=Identifier("table_2"), - where=None, - ), - step_num=0, - ) - expected_plan = QueryPlan( - steps=[ - step_1, - InsertToTable( - table=Identifier("INT_1.table_1"), - step_num=1, - dataframe=Result(0), - ), - ] - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_insert_from_select_with_table_and_columns_and_where_plan(self): - query = Insert( - table=Identifier("INT_1.table_1"), - from_select=Select( - targets=[Identifier("column_1"), Identifier("column_2")], - from_table=Identifier("INT_2.table_2"), - where=BinaryOperation( - op=">", - args=[ - Identifier("column_3", alias=Identifier("column_3")), - Constant(10), - ], - ), - ), - ) - plan = plan_query(query, integrations=["int_1", "int_2"]) - - step_1 = FetchDataframeStep( - integration="int_2", - query=Select( - targets=[ - Identifier("column_1", alias=Identifier("column_1")), - Identifier("column_2", alias=Identifier("column_2")), - ], - from_table=Identifier("table_2"), - where=BinaryOperation( - op=">", - args=[ - Identifier("column_3", alias=Identifier("column_3")), - Constant(10), - ], - ), - ), - step_num=0, - ) - expected_plan = QueryPlan( - steps=[ - step_1, - InsertToTable( - table=Identifier("INT_1.table_1"), - step_num=1, - dataframe=Result(0), - ), - ] - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_insert_from_select_without_table_plan(self): - select_query = Select( - targets=[Function("function", args=[])], - from_table=None, - where=None, - ) - query = Insert(table=Identifier("INT_1.table_1"), from_select=select_query) - - plan = plan_query(query, integrations=["INT_1"]) - - step_1 = QueryStep( - query=select_query, - step_num=0, - from_table=pd.DataFrame([None]), - ) - expected_plan = QueryPlan( - steps=[step_1, InsertToTable(table=Identifier("INT_1.table_1"), step_num=1, dataframe=Result(0))] - ) - for i in range(len(plan.steps)): - step = plan.steps[i] - expected_step = expected_plan.steps[i] - - if ( - hasattr(step, "from_table") - and isinstance(step.from_table, pd.DataFrame) - and isinstance(expected_step.from_table, pd.DataFrame) - ): - assert step.from_table.equals(expected_step.from_table) - else: - assert step == expected_step - - -class TestPartitions: - def test_insert_from_select(self): - query = parse_sql(""" - insert into int2.table2 - select id from int1.table1 - using track_column = id, batch_size=100 - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStepPartition( - step_num=0, - integration="int1", - query=parse_sql("select id as id from table1"), - params={"batch_size": 100, "track_column": "id"}, - steps=[InsertToTable(table=Identifier("int2.table2"), step_num=1, dataframe=Result(0))], - ) - ], - ) - - assert plan.steps == expected_plan.steps - - def test_insert_from_join(self): - query = parse_sql(""" - insert into int2.table2 - ( select a, b from int1.table1 - join int1.table3 ) - using track_column = id, batch_size=100 - """) - - plan = plan_query(query, integrations=["int1", "int2"]) - - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStepPartition( - step_num=0, - integration="int1", - query=Select( - targets=[Identifier("a"), Identifier("b")], - from_table=Join(left=Identifier("table1"), right=Identifier("table3"), join_type="join"), - using={}, - ), - params={"track_column": "id", "batch_size": 100}, - steps=[InsertToTable(table=Identifier("int2.table2"), step_num=1, dataframe=Result(0))], - ) - ], - ) - - assert plan.steps == expected_plan.steps - - def test_select_join_model(self): - query = parse_sql(""" - select id from int1.table1 - join pred - using track_column = id, batch_size=100 - """) - - plan = plan_query( - query, - integrations=["int1", "int2"], - default_namespace="mindsdb", - predictor_metadata=[ - {"name": "pred", "integration_name": "mindsdb", "to_predict": ["ttt"]}, - ], - ) - - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStepPartition( - step_num=0, - integration="int1", - query=parse_sql("select * from table1"), - params={"batch_size": 100, "track_column": "id"}, - steps=[ - ApplyPredictorStep( - step_num=1, - namespace="mindsdb", - dataframe=Result(0), - params={}, - predictor=Identifier("pred"), - ), - JoinStep( - step_num=2, - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(step_num=3, query=parse_sql("select id"), from_table=Result(2), strict_where=False), - ], - ) - ], - ) - - assert plan.steps == expected_plan.steps diff --git a/tests/unit/planner/test_integration_select.py b/tests/unit/planner/test_integration_select.py deleted file mode 100644 index e8851b4b57f..00000000000 --- a/tests/unit/planner/test_integration_select.py +++ /dev/null @@ -1,899 +0,0 @@ -import pytest - -from mindsdb_sql_parser.ast import ( - Identifier, - Select, - NullConstant, - Constant, - Star, - Parameter, - BinaryOperation, - Function, - TableColumn, - OrderBy, -) -from mindsdb_sql_parser import parse_sql - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.query_planner import MINDSDB_SQL_FUNCTIONS -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import ( - FetchDataframeStep, - CreateTableStep, - SubSelectStep, - UpdateToTable, - DeleteStep, -) - - -class TestPlanIntegrationSelect: - def test_integration_select_plan(self): - query = Select( - targets=[Identifier("column1"), Constant(1), NullConstant(), Function("database", args=[])], - from_table=Identifier("INT.tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Constant(1), - NullConstant(), - Function("database", args=[]), - ], - from_table=Identifier("tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ), - step_num=0, - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_integration_name_is_case_insensitive(self): - query = Select( - targets=[Identifier("tab.column1")], - from_table=Identifier("INT.tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="INT", - query=Select( - targets=[Identifier("tab.column1", alias=Identifier("column1"))], - from_table=Identifier("tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["INT"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_limit_offset(self): - query = Select( - targets=[Identifier("column1")], - from_table=Identifier("int.tab"), - where=BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - limit=Constant(10), - offset=Constant(15), - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], - from_table=Identifier("tab"), - where=BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - limit=Constant(10), - offset=Constant(15), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_order_by(self): - query = Select( - targets=[Identifier("column1")], - from_table=Identifier("int.tab"), - where=BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - limit=Constant(10), - offset=Constant(15), - order_by=[OrderBy(field=Identifier("tab.column1"))], - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], - from_table=Identifier("tab"), - where=BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - limit=Constant(10), - offset=Constant(15), - order_by=[OrderBy(field=Identifier("tab.column1"))], - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_plan_star(self): - query = Select(targets=[Star()], from_table=Identifier("int.tab")) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep(integration="int", query=Select(targets=[Star()], from_table=Identifier("tab"))), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_plan_complex_path(self): - query = Select( - targets=[Identifier(parts=["int", "tab", "a column with spaces"])], from_table=Identifier("int.tab") - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier("tab.`a column with spaces`", alias=Identifier("a column with spaces"))], - from_table=Identifier("tab"), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_table_alias(self): - query = Select(targets=[Identifier("alias.col1")], from_table=Identifier("int.tab", alias=Identifier("alias"))) - - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier(parts=["alias", "col1"], alias=Identifier("col1"))], - from_table=Identifier(parts=["tab"], alias=Identifier("alias")), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_column_alias(self): - query = Select(targets=[Identifier("col1", alias=Identifier("column_alias"))], from_table=Identifier("int.tab")) - - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier(parts=["col1"], alias=Identifier("column_alias"))], - from_table=Identifier(parts=["tab"]), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_table_alias_full_query(self): - sql = "select ta.sqft from int.test_data.home_rentals as ta" - - query = parse_sql(sql) - - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier(parts=["ta", "sqft"], alias=Identifier("sqft"))], - from_table=Identifier(parts=["test_data", "home_rentals"], alias=Identifier("ta")), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_plan_group_by(self): - query = Select( - targets=[ - Identifier("column1"), - Identifier("column2"), - Function(op="sum", args=[Identifier(parts=["column3"])], alias=Identifier("total")), - ], - from_table=Identifier("int.tab"), - group_by=[Identifier("column1"), Identifier("column2")], - having=BinaryOperation("=", args=[Identifier("column1"), Constant(0)]), - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Identifier("column2", alias=Identifier("column2")), - Function(op="sum", args=[Identifier(parts=["column3"])], alias=Identifier("total")), - ], - from_table=Identifier("tab"), - group_by=[Identifier("column1"), Identifier("column2")], - having=BinaryOperation("=", args=[Identifier("column1"), Constant(0)]), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_no_integration_error(self): - query = Select( - targets=[Identifier("tab1.column1"), Identifier("pred.predicted")], from_table=Identifier("int.tab") - ) - with pytest.raises(PlanningException): - plan_query(query, integrations=[], predictor_namespace="mindsdb") - - def test_integration_select_subquery_in_target(self): - query = Select( - targets=[ - Identifier("column1"), - Select( - targets=[Identifier("column2")], - from_table=Identifier("int.tab"), - limit=Constant(1), - alias=Identifier("subquery"), - ), - ], - from_table=Identifier("int.tab"), - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Select( - targets=[Identifier("column2", alias=Identifier("column2"))], - from_table=Identifier("tab"), - limit=Constant(1), - alias=Identifier("subquery"), - ), - ], - from_table=Identifier("tab"), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_subquery_in_from(self): - query = Select( - targets=[Identifier("column1")], - from_table=Select( - targets=[Identifier("column1")], from_table=Identifier("int.tab"), alias=Identifier("subquery") - ), - ) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], - from_table=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], - from_table=Identifier("tab"), - alias=Identifier("subquery"), - ), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_subquery_in_where(self): - query = Select( - targets=[Star()], - from_table=Identifier("int.tab1"), - where=BinaryOperation( - op="in", - args=( - Identifier(parts=["column1"]), - Select(targets=[Identifier("column2")], from_table=Identifier("int.tab2"), parentheses=True), - ), - ), - ) - - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Star()], - from_table=Identifier("tab1"), - where=BinaryOperation( - op="in", - args=[ - Identifier("column1"), - Select( - targets=[Identifier("column2", alias=Identifier("column2"))], - from_table=Identifier("tab2"), - parentheses=True, - ), - ], - ), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"]) - - assert plan.steps == expected_plan.steps - - def test_integration_select_default_namespace(self): - query = Select( - targets=[Identifier("column1"), Constant(1), Function("database", args=[])], - from_table=Identifier("tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ) - - expected_plan = QueryPlan( - integrations=["int"], - default_namespace="int", - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Constant(1), - Function("database", args=[]), - ], - from_table=Identifier("tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ), - step_num=0, - ), - ], - ) - - plan = plan_query(query, integrations=["int"], default_namespace="int") - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_integration_select_default_namespace_subquery_in_from(self): - query = Select( - targets=[Identifier("column1")], - from_table=Select( - targets=[Identifier("column1")], from_table=Identifier("tab"), alias=Identifier("subquery") - ), - ) - expected_plan = QueryPlan( - integrations=["int"], - default_namespace="int", - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - ], - from_table=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], - from_table=Identifier("tab"), - alias=Identifier("subquery"), - ), - ), - ), - ], - ) - - plan = plan_query(query, integrations=["int"], default_namespace="int") - - assert plan.steps == expected_plan.steps - - def test_integration_select_3_level(self): - sql = "select * from xxx.yyy.zzz where x > 1" - query = parse_sql(sql) - - expected_plan = QueryPlan( - integrations=["int"], - default_namespace="xxx", - steps=[ - FetchDataframeStep( - integration="xxx", - query=Select( - targets=[Star()], - from_table=Identifier("yyy.zzz"), - where=BinaryOperation(op=">", args=[Identifier("x"), Constant(1)]), - ), - ) - ], - ) - - plan = plan_query(query, integrations=["xxx"]) - - assert plan.steps == expected_plan.steps - - def test_native_query_no_sub_select(self): - # Just select to integration - sql = "select * from integration1 (select * from task_items)" - query = parse_sql(sql) - - plan = plan_query(query, integrations=["integration1"]) - - expected_plan = QueryPlan( - default_namespace="integration1", - steps=[ - FetchDataframeStep(integration="integration1", raw_query="select * from task_items"), - ], - ) - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_native_query(self): - # select on results after select to integration - - sql = "select date_trunc('m', last_date) from integration1 (select * from task_items ) a limit 1" - query = parse_sql(sql) - - plan = plan_query(query, integrations=["integration1"]) - - expected_plan = QueryPlan( - default_namespace="integration1", - steps=[ - FetchDataframeStep(integration="integration1", raw_query="select * from task_items"), - SubSelectStep( - dataframe=Result(0), query=parse_sql("select date_trunc('m', last_date) limit 1"), table_name="a" - ), - ], - ) - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_update_table(self): - # select on results after select to integration - - sql = "update integration1.direct_messages set a=1 where b=2" - query = parse_sql(sql) - - plan = plan_query(query, integrations=["integration1"]) - - expected_plan = QueryPlan( - default_namespace="integration1", - steps=[ - UpdateToTable(dataframe=None, table=Identifier("integration1.direct_messages"), update_command=query), - ], - ) - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_select_from_table_subselect(self): - query = parse_sql( - """ - select * from int2.tab1 - where x1 in (select id from int1.tab1) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select id as id from tab1"), - ), - FetchDataframeStep( - integration="int2", - query=Select( - targets=[Star()], - from_table=Identifier("tab1"), - where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]), - ), - ), - ], - ) - - plan = plan_query( - query, integrations=["int1", "int2"], predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}] - ) - - assert plan.steps == expected_plan.steps - - def test_select_from_table_subselect_api_integration(self): - query = parse_sql( - """ - select x from int1.tab2 - where x1 in (select id from int1.tab1) - limit 1 - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select id AS id from tab1"), - ), - SubSelectStep( - dataframe=Result(0), query=parse_sql("select id"), table_name="tab1", skip_for_aggregation=True - ), - FetchDataframeStep( - integration="int1", - query=Select( - targets=[Identifier("x", alias=Identifier("x"))], - from_table=Identifier("tab2"), - where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(1))]), - limit=Constant(1), - ), - ), - SubSelectStep( - dataframe=Result(2), - query=parse_sql("select x limit 1"), - table_name="tab2", - skip_for_aggregation=True, - ), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "api", "type": "data"}], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_select_from_table_subselect_sql_integration(self): - query = parse_sql( - """ - select * from int1.tab1 - where x1 in (select id from int1.tab1) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select * from tab1 where x1 in (select id as id from tab1)"), - ), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "sql", "type": "data"}], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_select_from_single_integration(self): - sql_parsed = """ - with tab2 as ( - select * from int1.tabl2 - ) - select a from ( - select x from tab2 - union - select y from int1.tab1 - where x1 in (select id from int1.tab1) - limit 1 - ) - """ - - sql_integration = """ - with tab2 as ( - select * from tabl2 - ) - select a as a from ( - select x as x from tab2 - union - select y as y from tab1 - where x1 in (select id as id from tab1) - limit 1 - ) - """ - query = parse_sql(sql_parsed) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql(sql_integration), - ), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "sql", "type": "data"}], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - default_namespace="mindsdb", - ) - - assert plan.steps == expected_plan.steps - - def test_delete_from_table_subselect_api_integration(self): - query = parse_sql( - """ - delete from int1.tab1 - where x1 in (select id from int1.tab1) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select id AS id from tab1"), - ), - SubSelectStep( - dataframe=Result(0), query=parse_sql("select id"), table_name="tab1", skip_for_aggregation=True - ), - DeleteStep( - table=Identifier("int1.tab1"), - where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(1))]), - ), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "api", "type": "data"}], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_delete_from_table_subselect_sql_integration(self): - query = parse_sql( - """ - delete from int1.tab1 - where x1 in (select id from int1.tab1) - """ - ) - - subselect = parse_sql("select id as id from tab1") - subselect.parentheses = True - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - DeleteStep( - table=Identifier("int1.tab1"), - where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), subselect]), - ), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "sql", "type": "data"}], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_delete_from_table_subselect_sql_different_integration(self): - query = parse_sql( - """ - delete from int1.tab1 - where x1 in (select id from int2.tab1) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int2", - query=parse_sql("select id as id from tab1"), - ), - DeleteStep( - table=Identifier("int1.tab1"), - where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]), - ), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "api", "type": "data"}, "int2"], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_create_table(self): - query = parse_sql( - """ - CREATE or replace table int2.tab1 ( - id int8, - data varchar - ) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - CreateTableStep( - table=Identifier("int2.tab1"), - columns=[ - TableColumn(name="id", type="int8"), - TableColumn(name="data", type="varchar"), - ], - is_replace=True, - ), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "api", "type": "data"}, "int2"], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_select_with_user_functions(self): - query = parse_sql( - """ - select my.fnc(a, 1) from int1.tab1 - where x1 > my.fnc2(b) - order by x - limit 2 - """ - ) - - sub_query = parse_sql("select my.fnc(a, 1) from tab1 where x1 > my.fnc2(b)") - sub_query.from_table = None - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select * from tab1 where 0=0 order by x limit 2"), - ), - SubSelectStep(dataframe=Result(0), query=sub_query, table_name="tab1"), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "sql", "type": "data"}], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_select_with_mindsdb_functions(self): - for function in MINDSDB_SQL_FUNCTIONS: - query = parse_sql( - f""" - select {function}(a) from int1.tab1 - order by x - limit 2 - """ - ) - - sub_query = parse_sql(f"select {function}(a) from tab1") - sub_query.from_table = None - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select * from tab1 order by x limit 2"), - ), - SubSelectStep(dataframe=Result(0), query=sub_query, table_name="tab1"), - ], - ) - - plan = plan_query( - query, - integrations=[{"name": "int1", "class_type": "sql", "type": "data"}], - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps diff --git a/tests/unit/planner/test_join_predictor.py b/tests/unit/planner/test_join_predictor.py deleted file mode 100644 index 8d7fad4d677..00000000000 --- a/tests/unit/planner/test_join_predictor.py +++ /dev/null @@ -1,970 +0,0 @@ -import copy - -import pytest - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Identifier, Select, Join, Constant, Star, Parameter, BinaryOperation -from mindsdb_sql_parser.utils import JoinType - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import ( - FetchDataframeStep, - ProjectStep, - JoinStep, - ApplyPredictorStep, - QueryStep, - SubSelectStep, - ApplyPredictorRowStep, - MapReduceStep, -) - - -class TestPlanJoinPredictor: - def test_join_predictor_plan(self): - sql = """ - select tab1.column1, pred.predicted - from int.tab1, mindsdb.pred - """ - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Star()], from_table=Identifier("tab1") - ), # No column pruning with predictor joins (predictors may need all columns) - ), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.INNER_JOIN), - ), - QueryStep(parse_sql("select tab1.column1, pred.predicted"), from_table=Result(2), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - # test_predictor_namespace_is_case_insensitive - plan = plan_query(query, integrations=["int"], predictor_namespace="MINDSDB", predictor_metadata={"pred": {}}) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_plan_aliases(self): - sql = """ - select ta.column1, tb.predicted - from int.tab1 ta, mindsdb.pred tb - """ - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Star()], - from_table=Identifier("tab1", alias=Identifier("ta")), - ), # No column pruning with predictor joins - ), - ApplyPredictorStep( - namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred", alias=Identifier("tb")) - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.INNER_JOIN), - ), - QueryStep(parse_sql("select ta.column1, tb.predicted"), from_table=Result(2), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_join_predictor_plan_limit(self): - sql = """ - select tab.column1, pred.predicted - from int.tab, mindsdb.pred - where tab.product_id = 'x' and tab.time between '2021-01-01' and '2021-01-31' - order by tab.column2 - limit 10 - offset 1 - """ - query = parse_sql(sql) - - subquery = copy.deepcopy(query) - subquery.from_table = None - subquery.offset = None - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="int", - query=parse_sql( - """ - select * from tab - where product_id = 'x' and time between '2021-01-01' and '2021-01-31' - order by column2 - limit 10 - offset 1 - """ - ), # No column pruning with predictor joins - ), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.INNER_JOIN), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - # def test_join_predictor_error_when_filtering_on_predictions(self): - # """ - # Query: - # SELECT rental_price_confidence - # FROM postgres_90.test_data.home_rentals AS ta - # JOIN mindsdb.hrp3 AS tb - # WHERE ta.sqft > 1000 AND tb.rental_price_confidence > 0.5 - # LIMIT 5; - # """ - # - # query = Select(targets=[Identifier('rental_price_confidence')], - # from_table=Join(left=Identifier('postgres_90.test_data.home_rentals', alias=Identifier('ta')), - # right=Identifier('mindsdb.hrp3', alias=Identifier('tb')), - # join_type=JoinType.INNER_JOIN, - # implicit=True), - # where=BinaryOperation('and', args=[ - # BinaryOperation('>', args=[Identifier('ta.sqft'), Constant(1000)]), - # BinaryOperation('>', args=[Identifier('tb.rental_price_confidence'), Constant(0.5)]), - # ]), - # limit=5 - # ) - # - # with pytest.raises(PlanningException): - # plan_query(query, integrations=['postgres_90'], predictor_namespace='mindsdb', predictor_metadata={'hrp3': {}}) - - def test_join_predictor_plan_complex_query(self): - sql = """ - select t.asset, t.time, m.predicted - from int.tab t, mindsdb.pred m - where t.col1 = 'x' - group by t.asset - having t.asset = 'bitcoin' - order by t.asset - limit 1 - offset 2 - """ - query = parse_sql(sql) - - subquery = copy.deepcopy(query) - subquery.from_table = None - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="int", - query=parse_sql("select * from tab as t where col1 = 'x'"), - ), # No column pruning with predictor joins - ApplyPredictorStep( - namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred", alias=Identifier("m")) - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.INNER_JOIN), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_no_predictor_error(self): - query = Select( - targets=[Identifier("tab1.column1"), Identifier("pred.predicted")], - from_table=Join(left=Identifier("int.tab1"), right=Identifier("pred"), join_type=None, implicit=True), - ) - - with pytest.raises(PlanningException): - plan_query(query, integrations=["int"], predictor_metadata={"pred": {}}) - - def test_join_predictor_plan_default_namespace_integration(self): - sql = """ - select tab1.column1, pred.predicted - from tab1, mindsdb.pred - """ - query = parse_sql(sql) - expected_plan = QueryPlan( - default_namespace="int", - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Star()], from_table=Identifier("tab1") - ), # No column pruning with predictor joins - ), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.INNER_JOIN), - ), - QueryStep(parse_sql("select tab1.column1, pred.predicted"), from_table=Result(2), strict_where=False), - ], - ) - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="int", - predictor_metadata={"pred": {}}, - ) - - assert plan.steps == expected_plan.steps - - def test_join_predictor_plan_default_namespace_predictor(self): - sql = """ - select tab1.column1, pred.predicted - from int.tab1, pred - """ - query = parse_sql(sql) - - expected_plan = QueryPlan( - default_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Star()], from_table=Identifier("tab1") - ), # No column pruning with predictor joins - ), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.INNER_JOIN), - ), - QueryStep(parse_sql("select tab1.column1, pred.predicted"), from_table=Result(2), strict_where=False), - ], - ) - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="mindsdb", - predictor_metadata={"pred": {}}, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_nested_select(self): - # for tableau - - sql = """ - SELECT time - FROM ( - select * from int.covid - join mindsdb.pred - limit 10 - ) `Custom SQL Query` - limit 1 - """ - - query = parse_sql(sql) - - expected_plan = QueryPlan( - default_namespace="mindsdb", - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from covid limit 10")), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(Select(targets=[Star()], limit=Constant(10)), from_table=Result(2), strict_where=False), - SubSelectStep( - dataframe=Result(3), query=parse_sql("SELECT time limit 1"), table_name="Custom SQL Query" - ), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="mindsdb", - predictor_metadata={"pred": {}}, - ) - - assert plan.steps == expected_plan.steps - - sql = """ - SELECT `time` - FROM ( - select * from int.covid - join mindsdb.pred - ) `Custom SQL Query` - GROUP BY 1 - """ - - query = parse_sql(sql) - - expected_plan = QueryPlan( - default_namespace="mindsdb", - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from covid")), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - SubSelectStep( - dataframe=Result(2), - query=Select(targets=[Identifier("`time`")], group_by=[Constant(1)]), - table_name="Custom SQL Query", - ), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="mindsdb", - predictor_metadata={"pred": {}}, - ) - - assert plan.steps == expected_plan.steps - - def test_subselect(self): - # nested limit is greater - sql = """ - SELECT * - FROM ( - select col from int.covid - limit 10 - ) as t - join mindsdb.pred - limit 5 - """ - - query = parse_sql(sql) - - expected_plan = QueryPlan( - default_namespace="mindsdb", - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select col as col from covid limit 10")), - SubSelectStep(query=Select(targets=[Star()]), dataframe=Result(0), table_name="t"), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(1), predictor=Identifier("pred")), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(Select(targets=[Star()], limit=Constant(5)), from_table=Result(3), strict_where=False), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="mindsdb", - predictor_metadata={"pred": {}}, - ) - assert plan.steps == expected_plan.steps - - # only nested select with limit - sql = """ - SELECT * - FROM ( - select * from int.covid - join int.info - limit 5 - ) as t - join mindsdb.pred - """ - - query = parse_sql(sql) - - expected_plan = QueryPlan( - default_namespace="mindsdb", - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from covid join info limit 5")), - SubSelectStep(query=Select(targets=[Star()]), dataframe=Result(0), table_name="t"), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(1), predictor=Identifier("pred")), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="mindsdb", - predictor_metadata={"pred": {}}, - ) - assert plan.steps == expected_plan.steps - - -class TestPredictorWithUsing: - def test_using_join(self): - sql = """ - select * from int.tab1 - join mindsdb.pred - using a=1 - """ - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1")), - ApplyPredictorStep( - namespace="mindsdb", dataframe=Result(0), predictor=Identifier("pred"), params={"a": 1} - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(2), columns=[Star()]), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - # with native query - - sql = """ - select * from int (select * from tab1) t - join mindsdb.pred - using a=1 - """ - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", raw_query="select * from tab1"), - SubSelectStep(step_num=1, query=Select(targets=[Star()]), dataframe=Result(0), table_name="t"), - ApplyPredictorStep( - namespace="mindsdb", dataframe=Result(1), predictor=Identifier("pred"), params={"a": 1} - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_using_one_line(self): - sql = """ - select * from mindsdb.pred where x=2 using a=1 - """ - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - ApplyPredictorRowStep( - namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x": 2}, params={"a": 1} - ), - ProjectStep(dataframe=Result(0), columns=[Star()]), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - -class TestPredictorVersion: - def test_using_join(self): - sql = """ - select * from int.tab1 - join proj.pred.1 - using a=1 - """ - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1")), - ApplyPredictorStep( - namespace="proj", dataframe=Result(0), predictor=Identifier("pred.1"), params={"a": 1} - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(2), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - # default namespace - - sql = """ - select * from int.tab1 - join pred.1 - using a=1 - """ - query = parse_sql(sql) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="proj", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_where_using(self): - sql = """ - select * from int.tab1 a - join proj.pred.1 p - where a.x=1 and p.x=1 and p.ttt=2 and a.y=3 and p.y='' - """ - - subquery = parse_sql( - """ - select * from x - where a.x=1 and 0=0 and p.ttt=2 and a.y=3 and 0=0 - """ - ) - subquery.from_table = None - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1 as a where x=1 and y=3")), - ApplyPredictorStep( - namespace="proj", - dataframe=Result(0), - predictor=Identifier("pred.1", alias=Identifier("p")), - row_dict={"x": 1, "y": ""}, - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - predictor_metadata=[{"name": "pred", "integration_name": "proj", "to_predict": ["ttt"]}], - ) - - assert plan.steps == expected_plan.steps - - def test_using_one_line(self): - sql = """ - select * from proj.pred.1 where x=2 using a=1 - """ - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - ApplyPredictorRowStep( - namespace="proj", predictor=Identifier("pred.1"), row_dict={"x": 2}, params={"a": 1} - ), - ProjectStep(dataframe=Result(0), columns=[Star()]), - ], - ) - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - # default namespace - - sql = """ - select * from pred.1 where x=2 using a=1 - """ - query = parse_sql(sql) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - default_namespace="proj", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - -class TestPredictorParams: - def test_model_param(self): - sql = """ - select * from int.tab1 t - join mindsdb.pred m - where m.a=1 and t.b=2 - """ - - query = parse_sql(sql) - - subquery = parse_sql( - """ - select * from x - where 0=0 and t.b=2 - """ - ) - subquery.from_table = None - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1 as t where b=2")), - ApplyPredictorStep( - namespace="mindsdb", - dataframe=Result(0), - predictor=Identifier("pred", alias=Identifier("m")), - row_dict={"a": 1}, - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - # 3 table - sql = """ - select * from int.tab1 t - join int.tab2 t2 - join mindsdb.pred m - where m.a=1 - """ - - subquery = parse_sql( - """ - select * from x - where 0=0 - """ - ) - subquery.from_table = None - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1 as t")), - FetchDataframeStep(integration="int", query=parse_sql("select * from tab2 as t2")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - ApplyPredictorStep( - namespace="mindsdb", - dataframe=Result(2), - predictor=Identifier("pred", alias=Identifier("m")), - row_dict={"a": 1}, - ), - JoinStep( - left=Result(2), - right=Result(3), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(subquery, from_table=Result(4), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_complex_subselect(self): - sql = """ - select t2.x, m.id, (select a from int.tab0 where x=0) from int.tab1 t1 - left join int.tab2 t2 on t1.x = t2.a - join mindsdb.pred m - where m.a=(select a from int.tab3 where x=3) - and t2.x=(select a from int.tab4 where x=4) - and t1.b=1 and t2.b=2 and t1.a = t2.a - limit 3 - """ - - q_table2 = parse_sql("select * from tab2 as t2 where x=0 and b=2 AND a IN 1") - q_table2.where.args[0].args[0].args[1] = Parameter(Result(2)) - q_table2.where.args[1].args[1] = Parameter(Result(4)) - - subquery = parse_sql( - """ - select t2.x, m.id, x - from x - where 0=0 - and t2.x=x - and t1.b=1 and t2.b=2 and t1.a = t2.a - limit 3 - """ - ) - subquery.from_table = None - subquery.targets[2] = Parameter(Result(0)) - subquery.where.args[0].args[0].args[0].args[1].args[1] = Parameter(Result(2)) - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - # nested queries - FetchDataframeStep(integration="int", query=parse_sql("select a as a from tab0 where x=0")), - FetchDataframeStep(integration="int", query=parse_sql("select a as a from tab3 where x=3")), - FetchDataframeStep(integration="int", query=parse_sql("select a as a from tab4 where x=4")), - # tables - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1 as t1 where b=1 limit 3")), - SubSelectStep(dataframe=Result(3), query=Select(targets=[Identifier("x")], distinct=True)), - FetchDataframeStep(integration="int", query=q_table2), - JoinStep( - left=Result(3), - right=Result(5), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - join_type=JoinType.LEFT_JOIN, - condition=BinaryOperation(op="=", args=[Identifier("t1.x"), Identifier("t2.a")]), - ), - ), - # model - ApplyPredictorStep( - namespace="mindsdb", - dataframe=Result(6), - predictor=Identifier("pred", alias=Identifier("m")), - row_dict={"a": Result(1)}, - ), - JoinStep( - left=Result(6), - right=Result(7), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(subquery, from_table=Result(8), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_model_join_model(self): - sql = """ - select * from int.tab1 t - join mindsdb.pred m - join mindsdb.pred m2 - where m.a = 2 - using m.param1 = 'a', - m2.param2 = 'b', - param3 = 'c' - """ - - subquery = parse_sql( - """ - select * from x - where 0=0 - """ - ) - subquery.from_table = None - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1 as t")), - ApplyPredictorStep( - namespace="mindsdb", - dataframe=Result(0), - predictor=Identifier("pred", alias=Identifier("m")), - row_dict={"a": 2}, - params={"param1": "a", "param3": "c"}, - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - ApplyPredictorStep( - namespace="mindsdb", - dataframe=Result(2), - predictor=Identifier("pred", alias=Identifier("m2")), - params={"param2": "b", "param3": "c"}, - ), - JoinStep( - left=Result(2), - right=Result(3), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - QueryStep(subquery, from_table=Result(4), strict_where=False), - ], - ) - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_model_column_map(self): - sql = """ - select * from int.tab1 a - join proj.pred.1 p on a.data1 = p.data2 and p.x = a.y - """ - - # subquery = parse_sql(""" - # select * from x - # where a.x=1 and 0=0 and p.ttt=2 and a.y=3 and 0=0 - # """) - # subquery.from_table = None - - query = parse_sql(sql) - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1 as a")), - ApplyPredictorStep( - namespace="proj", - dataframe=Result(0), - predictor=Identifier("pred.1", alias=Identifier("p")), - columns_map={"data2": Identifier("a.data1"), "x": Identifier("a.y")}, - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - join_type=JoinType.JOIN, - condition=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Constant(0), Constant(0)]), - BinaryOperation("=", args=[Constant(0), Constant(0)]), - ], - ), - ), - ), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - predictor_metadata=[{"name": "pred", "integration_name": "proj", "to_predict": ["ttt"]}], - ) - - assert plan.steps == expected_plan.steps - - def test_partition(self): - sql = """ - select p1.* from int.tab1 a - join proj.pred1 p1 - join proj.pred2 p2 - using partition_size=1000 - """ - - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select * from tab1 as a")), - MapReduceStep( - values=Result(0), - step=[ - ApplyPredictorStep( - step_num="1_0", - namespace="proj", - dataframe=Result(0), - params={}, - predictor=Identifier("pred1", alias=Identifier("p1")), - ), - JoinStep( - step_num="1_1", - left=Result(0), - right=Result("1_0"), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - join_type=JoinType.JOIN, - ), - ), - ApplyPredictorStep( - step_num="1_2", - namespace="proj", - dataframe=Result("1_1"), - params={}, - predictor=Identifier("pred2", alias=Identifier("p2")), - ), - JoinStep( - step_num="1_3", - left=Result("1_1"), - right=Result("1_2"), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - join_type=JoinType.JOIN, - ), - ), - ], - partition=1000, - ), - QueryStep(parse_sql("select p1.*"), from_table=Result(1), strict_where=False), - ], - ) - - plan = plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - predictor_metadata=[ - {"name": "pred1", "integration_name": "proj", "to_predict": ["ttt"]}, - {"name": "pred2", "integration_name": "proj", "to_predict": ["ttt"]}, - ], - ) - - assert plan.steps == expected_plan.steps diff --git a/tests/unit/planner/test_join_tables.py b/tests/unit/planner/test_join_tables.py deleted file mode 100644 index 7bd8a463d7a..00000000000 --- a/tests/unit/planner/test_join_tables.py +++ /dev/null @@ -1,820 +0,0 @@ -import copy - -import pytest - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import ( - Identifier, - Select, - Join, - Constant, - Star, - BinaryOperation, - Function, - Parameter, -) -from mindsdb_sql_parser.utils import JoinType - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import ( - FetchDataframeStep, - FetchDataframeStepPartition, - ProjectStep, - JoinStep, - ApplyPredictorStep, - SubSelectStep, - QueryStep, -) - - -class TestPlanJoinTables: - def test_join_tables_plan(self): - query = Select( - targets=[Identifier("tab1.column1"), Identifier("tab2.column1"), Identifier("tab2.column2")], - from_table=Join( - left=Identifier("int.tab1"), - right=Identifier("int2.tab2"), - condition=BinaryOperation(op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")]), - join_type=JoinType.INNER_JOIN, - ), - ) - plan = plan_query(query, integrations=["int", "int2"]) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], # Column pruning - from_table=Identifier("tab1"), - ), - ), - FetchDataframeStep( - integration="int2", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Identifier("column2", alias=Identifier("column2")), - ], # Column pruning - from_table=Identifier("tab2"), - ), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation( - op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")] - ), - join_type=JoinType.INNER_JOIN, - ), - ), - QueryStep( - parse_sql("select tab1.column1, tab2.column1, tab2.column2"), - from_table=Result(2), - strict_where=False, - ), - ], - ) - - assert plan.steps == expected_plan.steps - - def test_join_tables_where_plan(self): - # `WHERE column1 = 1` without table, therefore should be processed in mindsdb - query = parse_sql(""" - SELECT * FROM int1.table1 ta LEFT JOIN int2.table2 tb ON ta.id = tb.id - WHERE column1 = 1 - """) - plan = plan_query(query, integrations=["int1", "int2"]) - subquery = copy.deepcopy(query) - subquery.from_table = None - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("SELECT * FROM table1 AS ta"), - ), - FetchDataframeStep( - integration="int2", - query=parse_sql("SELECT * FROM table2 AS tb"), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation(op="=", args=[Identifier("ta.id"), Identifier("tb.id")]), - join_type=JoinType.LEFT_JOIN, - ), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ] - ) - assert plan.steps == expected_plan.steps - - # `WHERE ta.column1 = 1` is with table, therefore should be processed in the integration - query = parse_sql(""" - SELECT * FROM int1.table1 ta LEFT JOIN int2.table2 tb ON ta.id = tb.id - WHERE ta.column1 = 1 - """) - plan = plan_query(query, integrations=["int1", "int2"]) - subquery = copy.deepcopy(query) - subquery.from_table = None - expected_plan.steps[0].query = parse_sql("SELECT * FROM table1 AS ta WHERE column1 = 1") - expected_plan.steps[3].query = subquery - assert plan.steps == expected_plan.steps - - # WHERE with IN (constants) - query = parse_sql(""" - SELECT * FROM int1.table1 ta LEFT JOIN int2.table2 tb ON ta.id = tb.id - WHERE ta.column1 in (1,2,3) - """) - plan = plan_query(query, integrations=["int1", "int2"]) - subquery = copy.deepcopy(query) - subquery.from_table = None - expected_plan.steps[0].query = parse_sql("SELECT * FROM table1 AS ta WHERE column1 in (1,2,3)") - expected_plan.steps[3].query = subquery - assert plan.steps == expected_plan.steps - - # WHERE with IN (columns) - should be processed in mindsdb - query = parse_sql(""" - SELECT * FROM int1.table1 ta LEFT JOIN int2.table2 tb ON ta.id = tb.id - WHERE ta.column1 in (column2, column3) - """) - plan = plan_query(query, integrations=["int1", "int2"]) - subquery = copy.deepcopy(query) - subquery.from_table = None - expected_plan.steps[0].query = parse_sql("SELECT * FROM table1 AS ta") - expected_plan.steps[3].query = subquery - assert plan.steps == expected_plan.steps - - query = parse_sql( - """ - SELECT tab1.column1, tab2.column1, tab2.column2 - FROM int.tab1 - INNER JOIN int2.tab2 ON tab1.column1 > tab2.column1 - WHERE ((tab1.column1 = 1) - AND (tab2.column1 = 0)) - AND (tab1.column3 = tab2.column3) - """ - ) - - subquery = copy.deepcopy(query) - subquery.from_table = None - subquery.offset = None - - plan = plan_query(query, integrations=["int", "int2"]) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=parse_sql("SELECT column1 AS column1, column3 AS column3 FROM tab1 WHERE (column1 = 1)"), - ), - FetchDataframeStep( - integration="int2", - query=parse_sql( - "SELECT column1 AS column1, column2 AS column2, column3 AS column3 FROM tab2 WHERE (column1 = 0)" - ), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation( - op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")] - ), - join_type=JoinType.INNER_JOIN, - ), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - - assert plan.steps == expected_plan.steps - - def test_join_tables_plan_groupby(self): - query = Select( - targets=[ - Identifier("tab1.column1"), - Identifier("tab2.column1"), - Function("sum", args=[Identifier("tab2.column2")], alias=Identifier("total")), - ], - from_table=Join( - left=Identifier("int.tab1"), - right=Identifier("int2.tab2"), - condition=BinaryOperation(op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")]), - join_type=JoinType.INNER_JOIN, - ), - group_by=[Identifier("tab1.column1"), Identifier("tab2.column1")], - having=BinaryOperation(op="=", args=[Identifier("tab1.column1"), Constant(0)]), - ) - - subquery = copy.deepcopy(query) - subquery.from_table = None - subquery.offset = None - - plan = plan_query(query, integrations=["int", "int2"]) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], # Column pruning - from_table=Identifier("tab1"), - ), - ), - FetchDataframeStep( - integration="int2", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Identifier("column2", alias=Identifier("column2")), - ], # Column pruning - from_table=Identifier("tab2"), - ), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation( - op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")] - ), - join_type=JoinType.INNER_JOIN, - ), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - assert plan.steps == expected_plan.steps - - def test_join_tables_plan_limit_offset(self): - query = Select( - targets=[Identifier("tab1.column1"), Identifier("tab2.column1"), Identifier("tab2.column2")], - from_table=Join( - left=Identifier("int.tab1"), - right=Identifier("int2.tab2"), - condition=BinaryOperation(op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")]), - join_type=JoinType.LEFT_JOIN, - ), - limit=Constant(10), - offset=Constant(15), - ) - - subquery = copy.deepcopy(query) - subquery.from_table = None - subquery.offset = None - - plan = plan_query(query, integrations=["int", "int2"]) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - integration="int", - query=Select( - targets=[Identifier("column1", alias=Identifier("column1"))], # Column pruning - from_table=Identifier("tab1"), - limit=Constant(10), - offset=Constant(15), - ), - ), - FetchDataframeStep( - integration="int2", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Identifier("column2", alias=Identifier("column2")), - ], # Column pruning - from_table=Identifier("tab2"), - ), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation( - op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")] - ), - join_type=JoinType.LEFT_JOIN, - ), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - - assert plan.steps == expected_plan.steps - - def test_join_tables_plan_order_by(self): - query = parse_sql(""" - WITH tab2 AS ( - SELECT * FROM int2.tab2 limit 100 - ), - categories as ( - SELECT * FROM int3.cats - ) - SELECT - tab1.column1, tab2.column1, tab2.column2 - FROM int.tab1 tab1 - INNER JOIN tab2 ON tab1.column1 > tab2.column1 - WHERE tab2.category_id = (SELECT id FROM categories WHERE name='book') - ORDER BY tab1.column1 - LIMIT 10 - """) - - subquery = copy.deepcopy(query) - subquery.cte = None - subquery.from_table = None - subquery.offset = None - subquery.where.args[1] = Parameter(Result(2)) - - plan = plan_query(query, integrations=["int", "int2", "int3"], default_namespace="mindsdb") - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep( - step_num=0, - integration="int2", - query=parse_sql("select * from tab2 limit 100"), - ), - FetchDataframeStep( - step_num=1, - integration="int3", - query=parse_sql("select * from cats"), - ), - SubSelectStep( - step_num=2, - query=Select( - targets=[Identifier("id")], - where=BinaryOperation(op="=", args=[Identifier("name"), Constant("book")]), - ), - dataframe=Result(1), - table_name="categories", - ), - FetchDataframeStepPartition( - step_num=3, - integration="int", - query=parse_sql("select column1 AS column1 from tab1 AS tab1 order by column1"), - condition={"limit": 10}, - steps=[ - SubSelectStep( - step_num=4, - dataframe=Result(0), - query=Select( - targets=[ - Star(), - ], # Column pruning - where=BinaryOperation(op="=", args=[Identifier("category_id"), Parameter(Result(2))]), - ), - table_name="tab2", - ), - JoinStep( - step_num=5, - left=Result(3), - right=Result(4), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation( - op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")] - ), - join_type=JoinType.INNER_JOIN, - ), - ), - ], - ), - QueryStep(subquery, from_table=Result(3), strict_where=False), - ], - ) - - assert plan.steps == expected_plan.steps - - def test_join_tables_plan_order_by_offset(self): - # no optimisation with offset - query = parse_sql(""" - SELECT - tab1.column1, tab2.column1, tab2.column2 - FROM int.tab1 INNER - JOIN int2.tab2 ON tab1.column1 > tab2.column1 - ORDER BY tab1.column1 - LIMIT 10 - OFFSET 15 - """) - - subquery = copy.deepcopy(query) - subquery.from_table = None - - plan = plan_query(query, integrations=["int", "int2"]) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep(integration="int", query=parse_sql("select column1 AS column1 from tab1")), - FetchDataframeStep( - integration="int2", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Identifier("column2", alias=Identifier("column2")), - ], # Column pruning - from_table=Identifier("tab2"), - ), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation( - op=">", args=[Identifier("tab1.column1"), Identifier("tab2.column1")] - ), - join_type=JoinType.INNER_JOIN, - ), - ), - QueryStep(subquery, from_table=Result(2), strict_where=False), - ], - ) - - assert plan.steps == expected_plan.steps - - # This quiery should be sent to integration without raising exception - # def test_join_tables_where_ambigous_column_error(self): - # query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], - # from_table=Join(left=Identifier('int.tab1'), - # right=Identifier('int.tab2'), - # condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), - # Identifier('tab2.column1')]), - # join_type=JoinType.INNER_JOIN - # ), - # where=BinaryOperation('and', - # args=[ - # BinaryOperation('and', - # args=[ - # BinaryOperation('=', - # args=[Identifier('tab1.column1'), - # Constant(1)]), - # BinaryOperation('=', - # args=[Identifier('tab2.column1'), - # Constant(0)]), - # - # ] - # ), - # BinaryOperation('=', - # args=[Identifier('column3'), - # Constant(0)]), - # # Ambigous column: no idea what table column3 comes from - # ] - # ) - # ) - # - # with pytest.raises(PlanningException) as e: - # plan_query(query, integrations=['int']) - - def test_join_tables_disambiguate_identifiers_in_condition(self): - query = parse_sql( - """ - SELECT tab1.column1, tab2.column1, tab2.column2 - FROM int.tab1 - INNER JOIN int.tab2 ON int.tab1.column1 = tab2.column1 - """ - ) - plan = plan_query(query, integrations=["int"]) - expected_plan = QueryPlan( - integrations=["int"], - steps=[ - FetchDataframeStep(integration="int", query=query), - FetchDataframeStep( - integration="int", - query=Select(targets=[Star()], from_table=Identifier("tab2")), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation( - op="=", - args=[ - Identifier("tab1.column1"), # integration name gets stripped out - Identifier("tab2.column1"), - ], - ), - join_type=JoinType.INNER_JOIN, - ), - ), - ProjectStep( - dataframe=Result(2), - columns=[Identifier("tab1.column1"), Identifier("tab2.column1"), Identifier("tab2.column2")], - ), - ], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def _disabled_test_join_tables_error_on_unspecified_table_in_condition(self): - # disabled: identifier can be environment of system variable - query = Select( - targets=[Identifier("tab1.column1"), Identifier("tab2.column1"), Identifier("tab2.column2")], - from_table=Join( - left=Identifier("int.tab1"), - right=Identifier("int.tab2"), - condition=BinaryOperation(op="=", args=[Identifier("tab1.column1"), Identifier("column1")]), - # Table name omitted - join_type=JoinType.INNER_JOIN, - ), - ) - with pytest.raises(PlanningException): - plan_query(query, integrations=["int"]) - - def test_join_tables_error_on_wrong_table_in_condition(self): - query = Select( - targets=[Identifier("tab1.column1"), Identifier("tab2.column1"), Identifier("tab2.column2")], - from_table=Join( - left=Identifier("int.tab1"), - right=Identifier("int2.tab2"), - condition=BinaryOperation(op="=", args=[Identifier("tab1.column1"), Identifier("tab3.column1")]), - # Wrong table name - join_type=JoinType.INNER_JOIN, - ), - ) - with pytest.raises(PlanningException): - plan_query(query, integrations=["int", "int2"]) - - def test_join_tables_plan_default_namespace(self): - query = parse_sql( - """ - SELECT tab1.column1, tab2.column1, tab2.column2 - FROM tab1 - INNER JOIN tab2 ON tab1.column1 = tab2.column1 - """ - ) - - expected_plan = QueryPlan( - integrations=["int"], - default_namespace="int", - steps=[ - FetchDataframeStep( - integration="int", - query=parse_sql( - """ - SELECT tab1.column1, tab2.column1, tab2.column2 - FROM tab1 - INNER JOIN tab2 ON tab1.column1 = tab2.column1 - """ - ), - ), - ], - ) - plan = plan_query(query, integrations=["int"], default_namespace="int") - - assert plan.steps == expected_plan.steps - - def test_complex_join_tables(self): - query = parse_sql( - """ - select * from int1.tbl1 t1 - right join int2.tbl2 t2 on t1.id>t2.id - join pred m - left join tbl3 on tbl3.id=t1.id - where t1.a=1 and t2.b=2 and 1=1 - """ - ) - - subquery = copy.deepcopy(query) - subquery.from_table = None - - plan = plan_query( - query, - integrations=["int1", "int2", "proj"], - default_namespace="proj", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int1", query=parse_sql("select * from tbl1 as t1 where a=1")), - FetchDataframeStep(integration="int2", query=parse_sql("select * from tbl2 as t2 where b=2")), - JoinStep( - left=Result(0), - right=Result(1), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation(op=">", args=[Identifier("t1.id"), Identifier("t2.id")]), - join_type=JoinType.RIGHT_JOIN, - ), - ), - ApplyPredictorStep( - namespace="proj", dataframe=Result(2), predictor=Identifier("pred", alias=Identifier("m")) - ), - JoinStep( - left=Result(2), - right=Result(3), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - # IN clause filter optimization is disabled - fetch full table - FetchDataframeStep(integration="proj", query=parse_sql("select * from tbl3")), - JoinStep( - left=Result(4), - right=Result(5), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation(op="=", args=[Identifier("tbl3.id"), Identifier("t1.id")]), - join_type=JoinType.LEFT_JOIN, - ), - ), - QueryStep(subquery, from_table=Result(6), strict_where=False), - ] - ) - - assert plan.steps == expected_plan.steps - - def test_complex_join_tables_subselect(self): - query = parse_sql( - """ - select * from int1.tbl1 t1 - join ( - select * from int2.tbl3 - join pred m - ) t2 on t1.id = t2.id - """ - ) - - plan = plan_query( - query, - integrations=["int1", "int2", "proj"], - default_namespace="proj", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int1", query=parse_sql("select * from tbl1 as t1")), - FetchDataframeStep(integration="int2", query=parse_sql("select * from tbl3")), - ApplyPredictorStep( - namespace="proj", dataframe=Result(1), predictor=Identifier("pred", alias=Identifier("m")) - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - SubSelectStep(dataframe=Result(3), query=Select(targets=[Star()]), table_name="t2"), - JoinStep( - left=Result(0), - right=Result(4), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - join_type=JoinType.JOIN, - condition=BinaryOperation(op="=", args=[Identifier("t1.id"), Identifier("t2.id")]), - ), - ), - ] - ) - - assert plan.steps == expected_plan.steps - - def test_join_with_select_from_native_query(self): - query = parse_sql( - """ - select * from ( - select * from int1 ( - select raw query - ) - ) t1 - join pred m - """ - ) - - plan = plan_query( - query, - integrations=["int1", "int2", "proj"], - default_namespace="proj", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int1", raw_query="select raw query"), - SubSelectStep(step_num=1, query=Select(targets=[Star()]), dataframe=Result(0), table_name="t1"), - ApplyPredictorStep( - namespace="proj", dataframe=Result(1), predictor=Identifier("pred", alias=Identifier("m")) - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ] - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - # select from native query - # has the same plan - - query = parse_sql( - """ - select * from int1 ( - select raw query - ) t1 - join pred m - """ - ) - - plan = plan_query( - query, - integrations=["int1", "int2", "proj"], - default_namespace="proj", - predictor_metadata=[{"name": "pred", "integration_name": "proj"}], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_one_integration(self): - query = parse_sql( - """ - SELECT tab1.column1 - FROM int.tab1 - JOIN tab2 ON tab1.column1 = tab2.column1 - """ - ) - - expected_plan = QueryPlan( - integrations=["int"], - default_namespace="int", - steps=[ - FetchDataframeStep( - integration="int", - query=parse_sql( - """ - SELECT tab1.column1 - FROM tab1 - JOIN tab2 ON tab1.column1 = tab2.column1 - """ - ), - ), - ], - ) - plan = plan_query(query, integrations=["int"], default_namespace="int") - - assert plan.steps == expected_plan.steps - - def test_cte(self): - query = parse_sql( - """ - with t1 as ( - select * from int1.tbl1 - ) - select t1.id, t2.* from t1 - join int2.tbl2 t2 on t1.id>t2.id - """ - ) - - subquery = copy.deepcopy(query) - subquery.from_table = None - - plan = plan_query(query, integrations=["int1", "int2"], default_namespace="mindsdb") - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int1", query=parse_sql("select * from tbl1")), - SubSelectStep( - dataframe=Result(0), query=parse_sql("SELECT *"), table_name="t1" - ), # TODO: CTE column pruning optimization - FetchDataframeStep(integration="int2", query=parse_sql("select * from tbl2 as t2")), - JoinStep( - left=Result(1), - right=Result(2), - query=Join( - left=Identifier("tab1"), - right=Identifier("tab2"), - condition=BinaryOperation(op=">", args=[Identifier("t1.id"), Identifier("t2.id")]), - join_type=JoinType.JOIN, - ), - ), - QueryStep(parse_sql("SELECT t1.id, t2.*"), from_table=Result(3), strict_where=False), - ] - ) - - assert plan.steps == expected_plan.steps diff --git a/tests/unit/planner/test_mindsdb_predictors_select.py b/tests/unit/planner/test_mindsdb_predictors_select.py deleted file mode 100644 index e61d9e909db..00000000000 --- a/tests/unit/planner/test_mindsdb_predictors_select.py +++ /dev/null @@ -1,55 +0,0 @@ -from mindsdb_sql_parser.ast import (Identifier, Select, NullConstant, Constant, Function, BinaryOperation) - -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.steps import FetchDataframeStep - - -class TestPlanPredictorsSelect: - def test_predictors_select_plan(self): - query = Select( - targets=[Identifier('column1'), Constant(1), NullConstant(), Function('database', args=[])], - from_table=Identifier('mindsdb.predictors'), - where=BinaryOperation( - 'and', args=[ - BinaryOperation('=', args=[Identifier('column1'), Identifier('column2')]), - BinaryOperation('>', args=[Identifier('column3'), Constant(0)]), - ] - ) - ) - expected_plan = QueryPlan( - integrations=['mindsdb'], - steps=[ - FetchDataframeStep( - integration='mindsdb', - query=Select( - targets=[Identifier('column1', alias=Identifier('column1')), - Constant(1), - NullConstant(), - Function('database', args=[]), - ], - from_table=Identifier('predictors'), - where=BinaryOperation( - 'and', args=[ - BinaryOperation( - '=', - args=[Identifier('column1'), - Identifier('column2')] - ), - BinaryOperation( - '>', - args=[Identifier('column3'), - Constant(0)] - ), - ] - ) - ), - step_num=0, - ), - ] - ) - - plan = plan_query(query, integrations=['mindsdb']) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] diff --git a/tests/unit/planner/test_plan_union.py b/tests/unit/planner/test_plan_union.py deleted file mode 100644 index 4bc3555f849..00000000000 --- a/tests/unit/planner/test_plan_union.py +++ /dev/null @@ -1,79 +0,0 @@ -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Identifier, Select, Join, Constant, Union, BinaryOperation, Star -from mindsdb_sql_parser.utils import JoinType - -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import FetchDataframeStep, JoinStep, ApplyPredictorStep, UnionStep, QueryStep - - -class TestPlanUnion: - def test_plan_union_queries(self): - query1 = Select( - targets=[Identifier("column1"), Constant(None, alias=Identifier("predicted"))], - from_table=Identifier("int.tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ) - - query2 = Select( - targets=[Identifier("tab1.column1"), Identifier("pred.predicted", alias=Identifier("predicted"))], - from_table=Join( - left=Identifier("int.tab1"), - right=Identifier("mindsdb.pred"), - join_type=JoinType.INNER_JOIN, - implicit=True, - ), - ) - - query = Union(left=query1, right=query2, unique=False) - expected_plan = QueryPlan( - steps=[ - # Query 1 - FetchDataframeStep( - integration="int", - query=Select( - targets=[ - Identifier("column1", alias=Identifier("column1")), - Constant(None, alias=Identifier("predicted")), - ], - from_table=Identifier("tab"), - where=BinaryOperation( - "and", - args=[ - BinaryOperation("=", args=[Identifier("column1"), Identifier("column2")]), - BinaryOperation(">", args=[Identifier("column3"), Constant(0)]), - ], - ), - ), - ), - # Query 2 (no column pruning with predictor joins) - FetchDataframeStep( - integration="int", - query=Select(targets=[Star()], from_table=Identifier("tab1")), - ), - ApplyPredictorStep(namespace="mindsdb", dataframe=Result(1), predictor=Identifier("pred")), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("tab1"), right=Identifier("tab2"), join_type=JoinType.INNER_JOIN), - ), - QueryStep( - parse_sql("select tab1.column1, pred.predicted as predicted"), - from_table=Result(3), - strict_where=False, - ), - # Union - UnionStep(left=Result(0), right=Result(4), unique=False), - ], - ) - - plan = plan_query(query, integrations=["int"], predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps diff --git a/tests/unit/planner/test_prepared_statement.py b/tests/unit/planner/test_prepared_statement.py deleted file mode 100644 index 73882f0a91f..00000000000 --- a/tests/unit/planner/test_prepared_statement.py +++ /dev/null @@ -1,162 +0,0 @@ -import inspect - -from mindsdb.api.executor.planner import query_planner -from mindsdb.api.executor.planner import steps -from mindsdb.api.executor.sql_query.result_set import ResultSet, Column - -from tests.unit.planner import test_integration_select -from tests.unit.planner import test_join_predictor -from tests.unit.planner import test_join_tables -from tests.unit.planner import test_plan_union -from tests.unit.planner import test_select_from_predictor -from tests.unit.planner import test_ts_predictor - - -class FakeExecutor: - def list_cols_return(self, table_name, columns): - table_alias = ("int", table_name, table_name) - data = ResultSet() - for column in columns: - data.add_column( - Column(name=column["name"], type=column.get("type"), table_name=table_name, table_alias=table_alias) - ) - return data - - def execute(self, step): - if ( - isinstance(step, steps.ProjectStep) - or isinstance(step, steps.FetchDataframeStep) - or isinstance(step, steps.UnionStep) - ): - return [{"id": 1, "name": "asdf"}, {"id": 2, "name": "jkl;"}] - if isinstance(step, steps.GetTableColumns): - if step.table in ( - "table1", - "table2", - "tab", - "tab1", - "tab2", - "tab3", - "data.ny_output", - "data", - "yyy.zzz", - "sweat", - "schem.sweat", - "predictors", - "v1", - ): - cols = [ - {"name": "id", "type": "int"}, - {"name": "name", "type": "str"}, - {"name": "a column with spaces", "type": "str"}, - {"name": "column1", "type": "str"}, - {"name": "column2", "type": "str"}, - {"name": "column3", "type": "str"}, - {"name": "col1", "type": "str"}, - {"name": "asset", "type": "float"}, - {"name": "time", "type": "datetime"}, - {"name": "predicted", "type": "float"}, - {"name": "target", "type": "float"}, - {"name": "sqft", "type": "float"}, - {"name": "x", "type": "int"}, - ] - return self.list_cols_return(step.table, cols) - return None - if isinstance(step, steps.GetPredictorColumns): - name = step.predictor.parts[-1] - if name.isdigit(): - name = step.predictor.parts[-2] - - if name in ("pred", "tp3", "pr", "embedding_model"): - cols = [ - {"name": "id", "type": "int"}, - {"name": "value", "type": "str"}, - {"name": "predicted", "type": "int"}, - {"name": "x1", "type": "int"}, - {"name": "x2", "type": "int"}, - {"name": "y", "type": "int"}, - {"name": "time", "type": "datetime"}, - {"name": "price", "type": "float"}, - {"name": "target", "type": "float"}, - ] - name = step.predictor.parts[-1] - return self.list_cols_return(name, cols) - else: - return None - - -executor = FakeExecutor() - - -def plan_query_patch(query, **kwargs): - plan = query_planner.QueryPlanner(**kwargs) - - steps = [] - # get prepared statement - for step in plan.prepare_steps(query): - result = executor.execute(step) - step.set_result(result) - if hasattr(step, "result_data"): - step.result_data = None - - # not include prepared steps yet - # steps.append(step) - - # print(plan.get_statement_info()) # raises if prepare_steps doesn't execute - - params = [] - for step in plan.execute_steps(params): - result = executor.execute(step) - step.set_result(result) - if hasattr(step, "result_data"): - step.result_data = None - steps.append(step) - - # plan.fetch(10) - plan.steps = steps - return plan - - -test_integration_select.plan_query = plan_query_patch -test_join_predictor.plan_query = plan_query_patch -test_join_tables.plan_query = plan_query_patch -test_plan_union.plan_query = plan_query_patch -test_select_from_predictor.plan_query = plan_query_patch -test_ts_predictor.plan_query = plan_query_patch - - -class TestPreparedStatement: - def test_from_planner_tests(self): - for module in ( - test_integration_select, - test_join_predictor, - test_join_tables, - test_plan_union, - test_select_from_predictor, - test_ts_predictor, - ): - for class_name, klass in inspect.getmembers(module, predicate=inspect.isclass): - if not class_name.startswith("Test"): - continue - - tests = klass() - for test_name, test_method in inspect.getmembers(tests, predicate=inspect.ismethod): - if not test_name.startswith("test_") or test_name.endswith("_error"): - continue - if test_name in ("test_native_query_no_sub_select",): - # skipped tests - continue - try: - test_method() - except query_planner.PlanningException as e: - if str(e) == "Predictor must be last table in query": - # TODO replace tables in tests: to predictor as left table - pass - elif str(e) == "Predictor is not at first level": - # TODO make prepared statement planner more sophisticated - pass - elif str(e).startswith("Table is not found "): - # Prepared statement planner doesn't resolve columns for nested tables. - pass - else: - raise e diff --git a/tests/unit/planner/test_select_from_predictor.py b/tests/unit/planner/test_select_from_predictor.py deleted file mode 100644 index 85ccf4af365..00000000000 --- a/tests/unit/planner/test_select_from_predictor.py +++ /dev/null @@ -1,487 +0,0 @@ -import pytest - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Identifier, Select, Constant, Star, Parameter, BinaryOperation - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import ( - ProjectStep, - ApplyPredictorRowStep, - GetPredictorColumns, - FetchDataframeStep, -) - - -class TestPlanSelectFromPredictor: - def test_select_from_predictor_plan(self): - query = Select( - targets=[Star()], - from_table=Identifier("mindsdb.pred"), - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]), - BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]), - ], - ), - ) - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), - ], - ) - - plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_select_from_predictor_negative_constant(self): - query = parse_sql( - """ - select * from mindsdb.pred - where x1 = -1 - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep( - namespace="mindsdb", - predictor=Identifier("pred"), - row_dict={ - "x1": -1, - }, - ), - ], - ) - - plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_select_from_predictor_plan_other_ml(self): - query = parse_sql( - """ - select * from mlflow.pred - where x1 = 1 and x2 = '2' - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep(namespace="mlflow", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), - ], - ) - - plan = plan_query(query, predictor_metadata=[{"name": "pred", "integration_name": "mlflow"}]) - - assert plan.steps == expected_plan.steps - - def test_select_from_predictor_aliases_in_project(self): - query = Select( - targets=[ - Identifier("tb.x1", alias=Identifier("col1")), - Identifier("tb.x2", alias=Identifier("col2")), - Identifier("tb.y", alias=Identifier("predicted")), - ], - from_table=Identifier("mindsdb.pred", alias=Identifier("tb")), - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("tb.x1"), Constant(1)]), - BinaryOperation(op="=", args=[Identifier("tb.x2"), Constant("2")]), - ], - ), - ) - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep( - namespace="mindsdb", - predictor=Identifier("pred", alias=Identifier("tb")), - row_dict={"x1": 1, "x2": "2"}, - ), - ProjectStep( - dataframe=Result(0), - columns=[ - Identifier("tb.x1", alias=Identifier("col1")), - Identifier("tb.x2", alias=Identifier("col2")), - Identifier("tb.y", alias=Identifier("predicted")), - ], - ), - ], - ) - - plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_select_from_predictor_plan_predictor_alias(self): - query = Select( - targets=[Star()], - from_table=Identifier("mindsdb.pred", alias=Identifier("pred_alias")), - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("pred_alias.x1"), Constant(1)]), - BinaryOperation(op="=", args=[Identifier("pred_alias.x2"), Constant("2")]), - ], - ), - ) - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep( - namespace="mindsdb", - predictor=Identifier("pred", alias=Identifier("pred_alias")), - row_dict={"x1": 1, "x2": "2"}, - ), - ], - ) - - plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - assert plan.steps == expected_plan.steps - - def test_select_from_predictor_plan_verbose_col_names(self): - query = Select( - targets=[Star()], - from_table=Identifier("mindsdb.pred"), - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("pred.x1"), Constant(1)]), - BinaryOperation(op="=", args=[Identifier("pred.x2"), Constant("2")]), - ], - ), - ) - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), - ProjectStep(dataframe=Result(0), columns=[Star()]), - ], - ) - - plan = plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_select_from_predictor_plan_group_by_error(self): - query = Select( - targets=[Identifier("x1"), Identifier("x2"), Identifier("pred.y")], - from_table=Identifier("mindsdb.pred"), - group_by=[Identifier("x1")], - ) - with pytest.raises(PlanningException): - plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - def test_select_from_predictor_wrong_where_op_error(self): - query = Select( - targets=[Star()], - from_table=Identifier("mindsdb.pred"), - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op=">", args=[Identifier("x1"), Constant(1)]), - BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]), - ], - ), - ) - - with pytest.raises(PlanningException): - plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - def test_select_from_predictor_multiple_values_error(self): - query = Select( - targets=[Star()], - from_table=Identifier("mindsdb.pred"), - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]), - BinaryOperation(op="=", args=[Identifier("x1"), Constant("2")]), - ], - ), - ) - - with pytest.raises(PlanningException): - plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - def test_select_from_predictor_no_where_error(self): - query = Select(targets=[Star()], from_table=Identifier("mindsdb.pred")) - - with pytest.raises(PlanningException): - plan_query(query, predictor_namespace="mindsdb", predictor_metadata={"pred": {}}) - - def test_select_from_predictor_default_namespace(self): - query = Select( - targets=[Star()], - from_table=Identifier("pred"), - where=BinaryOperation( - op="and", - args=[ - BinaryOperation(op="=", args=[Identifier("x1"), Constant(1)]), - BinaryOperation(op="=", args=[Identifier("x2"), Constant("2")]), - ], - ), - ) - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - default_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep(namespace="mindsdb", predictor=Identifier("pred"), row_dict={"x1": 1, "x2": "2"}), - ], - ) - - plan = plan_query( - query, predictor_namespace="mindsdb", default_namespace="mindsdb", predictor_metadata={"pred": {}} - ) - - assert plan.steps == expected_plan.steps - - def test_select_from_predictor_get_columns(self): - sql = "SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0" - query = parse_sql(sql) - - expected_query = Select( - targets=[Identifier("GDP_per_capita_USD")], - from_table=Identifier("hdi_predictor_external"), - where=BinaryOperation(op="=", args=[Constant(1), Constant(0)]), - ) - assert query.to_tree() == expected_query.to_tree() - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - default_namespace="mindsdb", - steps=[ - GetPredictorColumns(namespace="mindsdb", predictor=Identifier("hdi_predictor_external")), - ProjectStep(dataframe=Result(0), columns=[Identifier("GDP_per_capita_USD")]), - ], - ) - - plan = plan_query( - query, - predictor_namespace="mindsdb", - default_namespace="mindsdb", - predictor_metadata={"hdi_predictor_external": {}}, - ) - - assert plan.steps == expected_plan.steps - - def test_using_predictor_version(self): - query = parse_sql( - """ - select * from mindsdb.pred.21 - where x1 = 1 - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - ApplyPredictorRowStep( - namespace="mindsdb", predictor=Identifier(parts=["pred", "21"]), row_dict={"x1": 1} - ) - ], - ) - - plan = plan_query(query, predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}]) - - assert plan.steps == expected_plan.steps - - def test_select_from_predictor_subselect(self): - query = parse_sql( - """ - select * from mindsdb.pred.21 - where x1 = (select id from int1.t1) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select id as id from t1"), - ), - ApplyPredictorRowStep( - namespace="mindsdb", - predictor=Identifier(parts=["pred", "21"]), - row_dict={"x1": Parameter(Result(0))}, - ), - ], - ) - - plan = plan_query( - query, integrations=["int1"], predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}] - ) - - assert plan.steps == expected_plan.steps - - def test_select_from_view_subselect(self): - query = parse_sql( - """ - select * from v1 - where x1 in (select id from int1.tab1) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="int1", - query=parse_sql("select id as id from tab1"), - ), - FetchDataframeStep( - integration="mindsdb", - query=Select( - targets=[Star()], - from_table=Identifier("v1"), - where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]), - ), - ), - ], - ) - - plan = plan_query( - query, - integrations=["int1"], - default_namespace="mindsdb", - predictor_metadata=[{"name": "pred", "integration_name": "mindsdb"}], - ) - - assert plan.steps == expected_plan.steps - - def test_select_from_view_subselect_view(self): - query = parse_sql( - """ - select * from v1 - where x1 in (select v2.id from v2) - """ - ) - - expected_plan = QueryPlan( - predictor_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="mindsdb", - query=parse_sql("select v2.id as id from v2"), - ), - FetchDataframeStep( - integration="mindsdb", - query=Select( - targets=[Star()], - from_table=Identifier("v1"), - where=BinaryOperation(op="in", args=[Identifier(parts=["x1"]), Parameter(Result(0))]), - ), - ), - ], - ) - - plan = plan_query(query, integrations=[], default_namespace="mindsdb", predictor_metadata=[]) - - assert plan.steps == expected_plan.steps - - -class TestMLSelect: - def test_select_from_predictor_plan_other_ml(self): - # sends to integrations - query = parse_sql(""" select * from mlflow.predictors """) - - expected_plan = QueryPlan( - steps=[FetchDataframeStep(step_num=0, integration="mlflow", query=parse_sql("SELECT * FROM predictors"))], - ) - - plan = plan_query(query, predictor_metadata=[], integrations=["mlflow"]) - - assert plan.steps == expected_plan.steps - - -class TestNestedSelect: - def test_using_predictor_in_subselect(self): - """ - Use predictor in subselect when selecting from integration - """ - sql = """ - SELECT * - FROM vectordb.test_tabl - WHERE - search_vector = ( - SELECT emebddings - FROM mindsdb.embedding_model - WHERE - content = 'some text' - ) - """ - ast_tree = parse_sql(sql) - plan = plan_query( - ast_tree, - integrations=["vectordb"], - predictor_metadata=[{"name": "embedding_model", "integration_name": "mindsdb"}], - ) - - expected_plan = [ - ApplyPredictorRowStep( - step_num=0, - namespace="mindsdb", - predictor=Identifier(parts=["embedding_model"]), - row_dict={"content": "some text"}, - ), - ProjectStep(step_num=1, dataframe=Result(0), columns=[Identifier(parts=["emebddings"])]), - FetchDataframeStep( - step_num=2, - integration="vectordb", - query=Select( - targets=[Star()], - from_table=Identifier(parts=["test_tabl"]), - where=BinaryOperation(op="=", args=[Identifier(parts=["search_vector"]), Parameter(Result(1))]), - ), - ), - ] - - assert plan.steps == expected_plan - - def test_using_integration_in_subselect(self): - """ - Use integration in subselect when selecting from predictor - """ - sql = """ - - SELECT * - FROM mindsdb.embedding_model - WHERE - content = ( - SELECT content - FROM vectordb.test_tabl - LIMIT 1 - ) - """ - ast_tree = parse_sql(sql) - plan = plan_query( - ast_tree, - integrations=["vectordb"], - predictor_metadata=[{"name": "embedding_model", "integration_name": "mindsdb"}], - ) - - expected_plan = [ - FetchDataframeStep( - step_num=0, integration="vectordb", query=parse_sql("SELECT content AS content FROM test_tabl LIMIT 1") - ), - ApplyPredictorRowStep( - step_num=1, - namespace="mindsdb", - predictor=Identifier(parts=["embedding_model"]), - row_dict={"content": Parameter(Result(0))}, - ), - ] - - assert plan.steps == expected_plan diff --git a/tests/unit/planner/test_ts_predictor.py b/tests/unit/planner/test_ts_predictor.py deleted file mode 100644 index 75ecd6af341..00000000000 --- a/tests/unit/planner/test_ts_predictor.py +++ /dev/null @@ -1,1801 +0,0 @@ -import copy - -import pytest - -from mindsdb_sql_parser import parse_sql, NativeQuery, OrderBy, NullConstant -from mindsdb_sql_parser.ast import Select, Star, Identifier, Join, Constant, BinaryOperation, Update, BetweenOperation -from mindsdb_sql_parser.ast.mindsdb import Latest -from mindsdb_sql_parser.utils import JoinType - -from mindsdb.api.executor.planner.exceptions import PlanningException -from mindsdb.api.executor.planner import plan_query -from mindsdb.api.executor.planner.query_plan import QueryPlan -from mindsdb.api.executor.planner.step_result import Result -from mindsdb.api.executor.planner.steps import ( - JoinStep, - SaveToTable, - ProjectStep, - InsertToTable, - MapReduceStep, - MultipleSteps, - UpdateToTable, - LimitOffsetStep, - FetchDataframeStep, - ApplyTimeseriesPredictorStep, - SubSelectStep, -) - - -class TestJoinTimeseriesPredictor: - def test_join_predictor_timeseries(self): - predictor_window = 10 - group_by_column = "vendor_id" - query = Select( - targets=[Star()], - from_table=Join( - left=Identifier("mysql.data.ny_output", alias=Identifier("ta")), - right=Identifier("mindsdb.tp3", alias=Identifier("tb")), - join_type="left join", - ), - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour is not null and vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mindsdb", predictor=Identifier("tp3", alias=Identifier("tb")), dataframe=Result(1) - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_other_ml(self): - predictor_window = 10 - group_by_column = "vendor_id" - query = parse_sql("select * from mysql.data.ny_output ta left join mlflow.tp3 tb") - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour is not null and vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mlflow", predictor=Identifier("tp3", alias=Identifier("tb")), dataframe=Result(1) - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql", "mlflow"], - predictor_metadata=[ - { - "timeseries": True, - "name": "tp3", - "integration_name": "mlflow", - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - ], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_select_table_columns(self): - predictor_window = 10 - group_by_column = "vendor_id" - query = Select( - targets=[ - Identifier("ta.target", alias=Identifier("y_true")), - Identifier("tb.target", alias=Identifier("y_pred")), - ], - from_table=Join( - left=Identifier("mysql.data.ny_output", alias=Identifier("ta")), - right=Identifier("mindsdb.tp3", alias=Identifier("tb")), - join_type="left join", - ), - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour is not null and vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mindsdb", predictor=Identifier("tp3", alias=Identifier("tb")), dataframe=Result(1) - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep( - dataframe=Result(3), - columns=[ - Identifier("ta.target", alias=Identifier("y_true")), - Identifier("tb.target", alias=Identifier("y_pred")), - ], - ), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_query_with_limit(self): - predictor_window = 10 - group_by_column = "vendor_id" - query = Select( - targets=[Star()], - from_table=Join( - left=Identifier("mysql.data.ny_output", alias=Identifier("ta")), - right=Identifier("mindsdb.tp3", alias=Identifier("tb")), - join_type="left join", - ), - limit=Constant(1000), - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour is not null and vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mindsdb", predictor=Identifier("tp3", alias=Identifier("tb")), dataframe=Result(1) - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - LimitOffsetStep(dataframe=Result(3), limit=query.limit.value), - ProjectStep(dataframe=Result(4), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_filter_by_group_by_column(self): - predictor_window = 10 - group_by_column = "vendor_id" - query = Select( - targets=[Star()], - from_table=Join( - left=Identifier("mysql.data.ny_output", alias=Identifier("ta")), - right=Identifier("mindsdb.tp3", alias=Identifier("tb")), - join_type="left join", - ), - where=BinaryOperation("=", args=[Identifier("ta.vendor_id"), Constant(1)]), - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - where=BinaryOperation("=", args=[Identifier("vendor_id"), Constant(1)]), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE vendor_id = 1 AND pickup_hour is not null and vendor_id = '$var[vendor_id]' \ - ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mindsdb", predictor=Identifier("tp3", alias=Identifier("tb")), dataframe=Result(1) - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_latest(self): - predictor_window = 5 - group_by_column = "vendor_id" - query = Select( - targets=[Star()], - from_table=Join( - left=Identifier("mysql.data.ny_output", alias=Identifier("ta")), - right=Identifier("mindsdb.tp3", alias=Identifier("tb")), - join_type=JoinType.LEFT_JOIN, - implicit=True, - ), - where=BinaryOperation( - "and", - args=[ - BinaryOperation(">", args=[Identifier("ta.pickup_hour"), Latest()]), - BinaryOperation("=", args=[Identifier("ta.vendor_id"), Constant(1)]), - ], - ), - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - where=BinaryOperation("=", args=[Identifier("vendor_id"), Constant(1)]), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - f"SELECT * FROM data.ny_output AS ta\ - WHERE vendor_id = 1 AND pickup_hour is not null and vendor_id = '$var[vendor_id]'\ - ORDER BY pickup_hour DESC LIMIT {predictor_window}" - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("pickup_hour"), Latest()]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_between(self): - predictor_window = 5 - group_by_column = "vendor_id" - query = parse_sql( - "SELECT * FROM mysql.data.ny_output AS ta\ - left join mindsdb.tp3 AS tb\ - WHERE ta.pickup_hour BETWEEN 1 AND 10 AND ta.vendor_id = 1" - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT DISTINCT vendor_id AS vendor_id FROM data.ny_output AS ta\ - WHERE vendor_id = 1" - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=MultipleSteps( - reduce="union", - steps=[ - FetchDataframeStep( - integration="mysql", - query=parse_sql( - f"SELECT * FROM data.ny_output AS ta \ - WHERE pickup_hour < 1 AND vendor_id = 1 and pickup_hour is not null \ - AND vendor_id = '$var[vendor_id]' \ - ORDER BY pickup_hour DESC LIMIT {predictor_window}" - ), - ), - FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour BETWEEN 1 AND 10 AND vendor_id = 1 and pickup_hour is not null \ - AND vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ], - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BetweenOperation( - args=[Identifier("pickup_hour"), Constant(1), Constant(10)], - ), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_concrete_date_greater(self): - predictor_window = 10 - group_by_column = "vendor_id" - - sql = "select * from mysql.data.ny_output as ta left join mindsdb.tp3 as tb where ta.pickup_hour > 10 and ta.vendor_id = 1" - - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - where=BinaryOperation("=", args=[Identifier("vendor_id"), Constant(1)]), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=MultipleSteps( - reduce="union", - steps=[ - FetchDataframeStep( - integration="mysql", - query=parse_sql( - f"SELECT * FROM data.ny_output AS ta \ - WHERE pickup_hour <= 10 AND vendor_id = 1 and pickup_hour is not null \ - AND vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC LIMIT {predictor_window}" - ), - ), - FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta \ - WHERE pickup_hour > 10 AND vendor_id = 1 and pickup_hour is not null \ - AND vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ], - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("pickup_hour"), Constant(10)]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_concrete_date_greater_2_group_fields(self): - predictor_window = 10 - - sql = "select * from mysql.data.ny_output as ta left join mindsdb.tp3 as tb\ - where ta.pickup_hour > 10 and ta.vendor_id = 1 and ta.type = 2" - - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=parse_sql( - """ - select distinct vendor_id as vendor_id, type as type - from data.ny_output as ta - where vendor_id = 1 and type = 2 - """ - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=MultipleSteps( - reduce="union", - steps=[ - FetchDataframeStep( - integration="mysql", - query=parse_sql( - f"SELECT * FROM data.ny_output AS ta \ - WHERE pickup_hour <= 10 AND vendor_id = 1 and type = 2 and pickup_hour is not null \ - AND vendor_id = '$var[vendor_id]' AND type = '$var[type]'\ - ORDER BY pickup_hour DESC LIMIT {predictor_window}" - ), - ), - FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta \ - WHERE pickup_hour > 10 AND vendor_id = 1 and type = 2 and pickup_hour is not null \ - AND vendor_id = '$var[vendor_id]' AND type = '$var[type]'\ - ORDER BY pickup_hour DESC" - ), - ), - ], - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("pickup_hour"), Constant(10)]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": ["vendor_id", "type"], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_concrete_date_greater_or_equal(self): - predictor_window = 10 - group_by_column = "vendor_id" - - sql = "select * from mysql.data.ny_output as ta left join mindsdb.tp3 as tb where ta.pickup_hour >= 10 and ta.vendor_id = 1" - - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - where=BinaryOperation("=", args=[Identifier("vendor_id"), Constant(1)]), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=MultipleSteps( - reduce="union", - steps=[ - FetchDataframeStep( - integration="mysql", - query=parse_sql( - f"SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour < 10 AND vendor_id = 1 AND pickup_hour is not null and\ - vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC LIMIT {predictor_window}" - ), - ), - FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour >= 10 AND vendor_id = 1 AND pickup_hour is not null and\ - vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ], - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">=", args=[Identifier("pickup_hour"), Constant(10)]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_concrete_date_less(self): - predictor_window = 10 - group_by_column = "vendor_id" - - sql = "select * from mysql.data.ny_output as ta join mindsdb.tp3 as tb where ta.pickup_hour < 10 and ta.vendor_id = 1" - - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - where=BinaryOperation("=", args=[Identifier("vendor_id"), Constant(1)]), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta \ - WHERE pickup_hour < 10 AND vendor_id = 1 AND pickup_hour is not null and\ - vendor_id = '$var[vendor_id]' \ - ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation("<", args=[Identifier("pickup_hour"), Constant(10)]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_concrete_date_less_or_equal(self): - predictor_window = 10 - group_by_column = "vendor_id" - - sql = "select * from mysql.data.ny_output as ta left join mindsdb.tp3 as tb where ta.pickup_hour <= 10 and ta.vendor_id = 1" - - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - where=BinaryOperation("=", args=[Identifier("vendor_id"), Constant(1)]), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour <= 10 AND vendor_id = 1 AND pickup_hour is not null and\ - vendor_id = '$var[vendor_id]'\ - ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation("<=", args=[Identifier("pickup_hour"), Constant(10)]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("result_1"), right=Identifier("result_2"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_concrete_date_equal(self): - predictor_window = 10 - group_by_column = "vendor_id" - - sql = """ - select * from - mysql.data.ny_output as ta - join mindsdb.tp3 as tb - where - ta.pickup_hour = 10 - and ta.vendor_id = 1 - """ - - query = parse_sql(sql) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - where=BinaryOperation("=", args=[Identifier("vendor_id"), Constant(1)]), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - """ - SELECT * FROM data.ny_output AS ta - WHERE pickup_hour <= 10 AND vendor_id = 1 AND pickup_hour is not null and - vendor_id = '$var[vendor_id]' - ORDER BY pickup_hour DESC LIMIT 10 - """ - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("pickup_hour"), Constant(10)]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_error_on_nested_where(self): - query = Select( - targets=[Identifier("pred.time"), Identifier("pred.price")], - from_table=Join( - left=Identifier("int.tab1"), right=Identifier("mindsdb.pred"), join_type=None, implicit=True - ), - where=BinaryOperation( - "and", - args=[ - BinaryOperation( - "and", - args=[ - BinaryOperation(">", args=[Identifier("tab1.time"), Latest()]), - BinaryOperation(">", args=[Identifier("tab1.time"), Latest()]), - ], - ), - BinaryOperation("=", args=[Identifier("tab1.asset"), Constant("bitcoin")]), - ], - ), - ) - - with pytest.raises(PlanningException): - plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pred": {"timeseries": True, "order_by_column": "time", "group_by_columns": ["asset"], "window": 5} - }, - ) - - def test_join_predictor_timeseries_error_on_invalid_column_in_where(self): - query = Select( - targets=[Identifier("pred.time"), Identifier("pred.price")], - from_table=Join( - left=Identifier("int.tab1"), right=Identifier("mindsdb.pred"), join_type=None, implicit=True - ), - where=BinaryOperation( - "and", - args=[ - BinaryOperation(">", args=[Identifier("tab1.time"), Latest()]), - BinaryOperation("=", args=[Identifier("tab1.whatver"), Constant(0)]), - ], - ), - ) - - with pytest.raises(PlanningException): - plan_query( - query, - integrations=["int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pred": {"timeseries": True, "order_by_column": "time", "group_by_columns": ["asset"], "window": 5} - }, - ) - - def test_join_predictor_timeseries_default_namespace_predictor(self): - predictor_window = 10 - group_by_column = "vendor_id" - query = Select( - targets=[Star()], - from_table=Join( - left=Identifier("tp3", alias=Identifier("tb")), - right=Identifier("mysql.data.ny_output", alias=Identifier("ta")), - join_type=JoinType.LEFT_JOIN, - ), - ) - - expected_plan = QueryPlan( - default_namespace="mindsdb", - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour is not null and vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(2), - right=Result(1), - query=Join(left=Identifier("result_2"), right=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - default_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_predictor_timeseries_default_namespace_integration(self): - predictor_window = 10 - group_by_column = "vendor_id" - query = Select( - targets=[Star()], - from_table=Join( - left=Identifier("data.ny_output", alias=Identifier("ta")), - right=Identifier("mindsdb.tp3", alias=Identifier("tb")), - join_type=JoinType.JOIN, - ), - ) - - expected_plan = QueryPlan( - default_namespace="mysql", - steps=[ - FetchDataframeStep( - integration="mysql", - query=Select( - targets=[Identifier(parts=[group_by_column], alias=Identifier(group_by_column))], - from_table=Identifier("data.ny_output", alias=Identifier("ta")), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="mysql", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE pickup_hour is not null and vendor_id = '$var[vendor_id]' ORDER BY pickup_hour DESC" - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("result_1"), right=Identifier("result_2"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["mysql"], - predictor_namespace="mindsdb", - default_namespace="mysql", - predictor_metadata={ - "tp3": { - "timeseries": True, - "order_by_column": "pickup_hour", - "group_by_columns": [group_by_column], - "window": predictor_window, - } - }, - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_timeseries_planner_not_changes_query(self): - sql = "select * from ds.data as ta left join mindsdb.pr as tb where ta.f2 in ('a') and ta.f1 > LATEST" - query = parse_sql(sql) - - query_tree = query.to_tree() - - plan_query( - query, - integrations=["ds", "int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pr": {"timeseries": True, "window": 3, "order_by_column": "f1", "group_by_columns": ["f2"]} - }, - default_namespace="mindsdb", - ) - - assert query.to_tree() == query_tree - - def test_timeseries_without_group(self): - sql = "select * from ds.data.ny_output as ta join mindsdb.pr as tb where ta.f1 > LATEST" - query = parse_sql(sql) - - predictor_window = 3 - expected_plan = QueryPlan( - default_namespace="ds", - steps=[ - FetchDataframeStep( - integration="ds", - query=parse_sql( - f"SELECT * FROM data.ny_output AS ta\ - WHERE f1 is not null\ - ORDER BY f1 DESC LIMIT {predictor_window}" - ), - ), - ApplyTimeseriesPredictorStep( - namespace="mindsdb", - predictor=Identifier("pr", alias=Identifier("tb")), - dataframe=Result(0), - output_time_filter=BinaryOperation(">", args=[Identifier("f1"), Latest()]), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(left=Identifier("result_0"), right=Identifier("result_1"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(2), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["ds", "int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pr": {"timeseries": True, "window": predictor_window, "order_by_column": "f1", "group_by_columns": []} - }, - default_namespace="mindsdb", - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_timeseries_with_between_operator(self): - sql = "select * from ds.data.ny_output as ta \ - left join mindsdb.pr as tb \ - where ta.f2 between '2020-11-01' and '2020-12-01' and ta.f1 > LATEST" - - self._test_timeseries_with_between_operator(sql) - - sql = """select * from ( - select * from ds.data.ny_output as ta - where ta.f2 between '2020-11-01' and '2020-12-01' and ta.f1 > LATEST - ) - left join mindsdb.pr as tb - """ - - self._test_timeseries_with_between_operator(sql) - - def _test_timeseries_with_between_operator(self, sql): - query = parse_sql(sql) - - predictor_window = 3 - expected_plan = QueryPlan( - default_namespace="ds", - steps=[ - FetchDataframeStep( - integration="ds", - query=parse_sql( - "SELECT DISTINCT f2 AS f2 FROM data.ny_output as ta\ - WHERE f2 BETWEEN '2020-11-01' AND '2020-12-01'" - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="ds", - query=parse_sql( - f"SELECT * FROM data.ny_output as ta \ - WHERE f2 BETWEEN '2020-11-01' AND '2020-12-01' \ - AND f1 IS NOT NULL \ - AND f2 = '$var[f2]' \ - ORDER BY f1 DESC LIMIT {predictor_window}" - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("f1"), Latest()]), - namespace="mindsdb", - predictor=Identifier("pr", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["ds", "int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pr": { - "timeseries": True, - "window": predictor_window, - "order_by_column": "f1", - "group_by_columns": ["f2"], - } - }, - default_namespace="mindsdb", - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_timeseries_with_multigroup_and_different_case(self): - sql = "select * from ds.data.ny_output as ta \ - left join mindsdb.pr as tb \ - where ta.f2 > '2020-11-01' and ta.f1 > LATEST" - query = parse_sql(sql) - - predictor_window = 3 - expected_plan = QueryPlan( - default_namespace="ds", - steps=[ - FetchDataframeStep( - integration="ds", - query=parse_sql( - "SELECT DISTINCT F2 AS F2, f3 AS f3 FROM data.ny_output as ta\ - WHERE f2 > '2020-11-01'" - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="ds", - query=parse_sql( - "SELECT * FROM data.ny_output AS ta\ - WHERE f2 > '2020-11-01' AND F1 IS NOT NULL AND F2 = '$var[F2]' AND f3 = '$var[f3]'\ - ORDER BY F1 DESC LIMIT 3" - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("f1"), Latest()]), - namespace="mindsdb", - predictor=Identifier("pr", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.LEFT_JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - plan = plan_query( - query, - integrations=["ds", "int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pr": { - "timeseries": True, - "window": predictor_window, - "order_by_column": "F1", - "group_by_columns": ["F2", "f3"], - } - }, - default_namespace="mindsdb", - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_timeseries_no_group(self): - predictor_window = 3 - expected_plan = QueryPlan( - default_namespace="ds", - steps=[ - MultipleSteps( - reduce="union", - steps=[ - FetchDataframeStep( - integration="files", - query=parse_sql( - f"select * from schem.sweat as ta \ - WHERE date <= '2015-12-31' AND date IS NOT NULL \ - ORDER BY date DESC LIMIT {predictor_window}" - ), - ), - FetchDataframeStep( - integration="files", - query=parse_sql( - "select * from schem.sweat as ta \ - WHERE date > '2015-12-31' AND date IS NOT NULL \ - ORDER BY date DESC" - ), - ), - ], - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("date"), Constant("2015-12-31")]), - namespace="mindsdb", - predictor=Identifier("tp3", alias=Identifier("tb")), - dataframe=Result(0), - ), - JoinStep( - left=Result(0), - right=Result(1), - query=Join(right=Identifier("result_1"), left=Identifier("result_0"), join_type=JoinType.JOIN), - ), - ], - ) - - # different way to join predictor - - sql = """select * from files.schem.sweat as ta - join mindsdb.tp3 as tb - where ta.date > '2015-12-31' - """ - self._test_timeseries_no_group(sql, expected_plan) - - sql = """select * from ( - select * from files.schem.sweat as ta - where ta.date > '2015-12-31' - ) - join mindsdb.tp3 as tb - """ - self._test_timeseries_no_group(sql, expected_plan) - - # create table no integration - - sql = """ - create or replace table files.model_name ( - select * from ( - select * from schem.sweat as ta - where ta.date > '2015-12-31' - ) - join mindsdb.tp3 as tb - ) - """ - expected_plan2 = copy.deepcopy(expected_plan) - expected_plan2.add_step( - SaveToTable( - table=Identifier("files.model_name"), - dataframe=Result(expected_plan2.steps[-1].step_num), - is_replace=True, - ) - ) - self._test_timeseries_no_group(sql, expected_plan2) - - # create table with integration - - sql = """ - create or replace table int1.model_name ( - select * from ( - select * from files.schem.sweat as ta - where ta.date > '2015-12-31' - ) - join mindsdb.tp3 as tb - ) - """ - expected_plan2 = copy.deepcopy(expected_plan) - expected_plan2.add_step( - SaveToTable( - table=Identifier("int1.model_name"), - dataframe=Result(expected_plan2.steps[-1].step_num), - is_replace=True, - ) - ) - self._test_timeseries_no_group(sql, expected_plan2) - - # insert into table - expected_plan2 = copy.deepcopy(expected_plan) - expected_plan2.add_step( - InsertToTable( - table=Identifier("int1.model_name"), - dataframe=Result(expected_plan2.steps[-1].step_num), - ) - ) - - sql = """ - insert into int1.model_name ( - select * from ( - select * from files.schem.sweat as ta - where ta.date > '2015-12-31' - ) - join mindsdb.tp3 as tb - ) - """ - self._test_timeseries_no_group(sql, expected_plan2) - - sql = """ - insert into int1.model_name - select * from ( - select * from files.schem.sweat as ta - where ta.date > '2015-12-31' - ) - join mindsdb.tp3 as tb - """ - - self._test_timeseries_no_group(sql, expected_plan2) - - # update table from select - - expected_plan2 = copy.deepcopy(expected_plan) - expected_plan2.add_step( - UpdateToTable( - table=Identifier("int1.tbl1"), - dataframe=expected_plan2.steps[-1], - update_command=Update( - table=Identifier("int1.tbl1"), - update_columns={ - "a": Identifier("df.a"), - "b": Identifier("df.b"), - }, - where=BinaryOperation(op="=", args=[Identifier("c"), Identifier("df.c")]), - ), - ) - ) - - sql = """ - update - int1.tbl1 - set - a = df.a, - b = df.b - from - ( - select * from ( - select * from files.schem.sweat as ta - where ta.date > '2015-12-31' - ) - join mindsdb.tp3 as tb - ) - as df - where - c = df.c - """ - self._test_timeseries_no_group(sql, expected_plan2) - - def _test_timeseries_no_group(self, sql, expected_plan): - predictor_window = 3 - query = parse_sql(sql) - - plan = plan_query( - query, - integrations=["files", "int1"], - predictor_namespace="mindsdb", - predictor_metadata={ - "tp3": { - "timeseries": True, - "window": predictor_window, - "order_by_column": "date", - "group_by_columns": [], - } - }, - default_namespace="mindsdb", - ) - - assert plan.steps == expected_plan.steps - - def test_several_groups(self): - sql = """ - SELECT tb.saledate as date, tb.MA as forecast - FROM mindsdb.pr as tb - JOIN ds.HR_MA as t - WHERE t.saledate > LATEST AND t.type = 'house' AND t.bedrooms = 2 - LIMIT 4 - USING param1 = 1, param2 = 'a'; - """ - predictor_window = 3 - query = parse_sql(sql) - - plan = plan_query( - query, - integrations=["ds", "int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pr": { - "timeseries": True, - "window": predictor_window, - "order_by_column": "saledate", - "group_by_columns": ["type", "bedrooms"], - } - }, - default_namespace="mindsdb", - ) - - expected_plan = QueryPlan( - default_namespace="ds", - steps=[ - FetchDataframeStep( - integration="ds", - query=parse_sql( - "SELECT DISTINCT type AS type, bedrooms AS bedrooms FROM HR_MA as t\ - WHERE type = 'house' AND bedrooms = 2" - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="ds", - query=parse_sql( - f"SELECT * FROM HR_MA as t \ - WHERE type = 'house' AND bedrooms = 2 \ - AND saledate IS NOT NULL \ - AND type = '$var[type]' \ - AND bedrooms = '$var[bedrooms]' \ - ORDER BY saledate DESC LIMIT {predictor_window}" - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("saledate"), Latest()]), - namespace="mindsdb", - predictor=Identifier("pr", alias=Identifier("tb")), - dataframe=Result(1), - params={"param1": 1, "param2": "a"}, - ), - JoinStep( - left=Result(2), - right=Result(1), - query=Join(right=Identifier("result_1"), left=Identifier("result_2"), join_type=JoinType.JOIN), - ), - LimitOffsetStep(step_num=4, dataframe=Result(3), limit=4), - ProjectStep( - dataframe=Result(4), - columns=[ - Identifier(parts=["tb", "saledate"], alias=Identifier("date")), - Identifier(parts=["tb", "MA"], alias=Identifier("forecast")), - ], - ), - ], - ) - - for i in range(len(plan.steps)): - assert plan.steps[i] == expected_plan.steps[i] - - def test_dbt_latest(self): - sql = """ - select * from ( - SELECT - * - from ds.HR_MA as t - WHERE t.type = 'house' - ) as t1 - JOIN mindsdb.pr as tb - WHERE t1.saledate > LATEST - """ - predictor_window = 3 - query = parse_sql(sql) - - plan = plan_query( - query, - integrations=["ds", "int"], - predictor_namespace="mindsdb", - predictor_metadata={ - "pr": { - "timeseries": True, - "window": predictor_window, - "order_by_column": "saledate", - "group_by_columns": ["type"], - } - }, - default_namespace="mindsdb", - ) - - expected_plan = QueryPlan( - default_namespace="ds", - steps=[ - FetchDataframeStep( - integration="ds", - query=parse_sql( - "SELECT DISTINCT type AS type FROM HR_MA as t\ - WHERE type = 'house'" - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="ds", - query=parse_sql( - f"SELECT * FROM HR_MA as t \ - WHERE type = 'house' \ - AND saledate IS NOT NULL \ - AND type = '$var[type]' \ - ORDER BY saledate DESC LIMIT {predictor_window}" - ), - ), - ), - ApplyTimeseriesPredictorStep( - output_time_filter=BinaryOperation(">", args=[Identifier("saledate"), Latest()]), - namespace="mindsdb", - predictor=Identifier("pr", alias=Identifier("tb")), - dataframe=Result(1), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(right=Identifier("result_2"), left=Identifier("result_1"), join_type=JoinType.JOIN), - ), - ProjectStep(dataframe=Result(3), columns=[Star()]), - ], - ) - - for i in range(len(plan.steps)): - # print(plan.steps[i]) - # print(expected_plan.steps[i]) - assert plan.steps[i] == expected_plan.steps[i] - - def test_join_native_query(self): - query = parse_sql( - """ - SELECT * - FROM int1 (select * from tab) as t - JOIN pred as m - WHERE t.date > LATEST - """ - ) - - group_by_column = "type" - - plan = plan_query( - query, - integrations=["int1"], - default_namespace="proj", - predictor_metadata=[ - { - "name": "pred", - "integration_name": "proj", - "timeseries": True, - "window": 10, - "horizon": 10, - "order_by_column": "date", - "group_by_columns": [group_by_column], - } - ], - ) - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep( - integration="int1", - query=Select( - targets=[Identifier("type", alias=Identifier("type"))], - from_table=NativeQuery( - query="select * from tab", integration=Identifier("int1"), alias=Identifier("t") - ), - distinct=True, - ), - ), - MapReduceStep( - values=Result(0), - reduce="union", - step=FetchDataframeStep( - integration="int1", - query=Select( - targets=[Star()], - from_table=NativeQuery( - query="select * from tab", integration=Identifier("int1"), alias=Identifier("t") - ), - distinct=False, - limit=Constant(10), - order_by=[OrderBy(field=Identifier("date"), direction="DESC")], - where=BinaryOperation( - "and", - args=[ - BinaryOperation("is not", args=[Identifier("date"), NullConstant()]), - BinaryOperation("=", args=[Identifier("type"), Constant("$var[type]")]), - ], - ), - ), - ), - ), - ApplyTimeseriesPredictorStep( - namespace="proj", - predictor=Identifier("pred", alias=Identifier("m")), - dataframe=Result(1), - output_time_filter=BinaryOperation(">", args=[Identifier("date"), Latest()]), - ), - JoinStep( - left=Result(1), - right=Result(2), - query=Join(left=Identifier("result_1"), right=Identifier("result_2"), join_type=JoinType.JOIN), - ), - ] - ) - - assert len(plan.steps) == len(expected_plan.steps) - assert plan.steps == expected_plan.steps - - def test_ts_with_cte(self): - query = parse_sql( - """ - WITH tab AS ( - select * from int1.tbl1 a - ) - SELECT * - FROM tab as t - JOIN pred as m - WHERE t.date > LATEST - """ - ) - - plan = plan_query( - query, - integrations=["int1"], - default_namespace="proj", - predictor_metadata=[ - { - "name": "pred", - "integration_name": "proj", - "timeseries": True, - "window": 20, - "horizon": 10, - "order_by_column": "date", - "group_by_columns": ["vendor_id"], - } - ], - ) - - sub_select_query = parse_sql( - "SELECT * from t WHERE date IS NOT NULL AND vendor_id = '$var[vendor_id]' ORDER BY date DESC LIMIT 20" - ) - sub_select_query.from_table = None - - expected_plan = QueryPlan( - steps=[ - FetchDataframeStep(integration="int1", query=parse_sql("select * from tbl1 a")), - SubSelectStep(dataframe=Result(0), query=parse_sql("select distinct vendor_id"), table_name="t"), - MapReduceStep( - values=Result(1), - reduce="union", - step=SubSelectStep(dataframe=Result(0), query=sub_select_query, table_name="t"), - ), - ApplyTimeseriesPredictorStep( - namespace="proj", - predictor=Identifier("pred", alias=Identifier("m")), - dataframe=Result(2), - output_time_filter=BinaryOperation(">", args=[Identifier("date"), Latest()]), - ), - JoinStep( - left=Result(2), - right=Result(3), - query=Join(left=Identifier("result_2"), right=Identifier("result_3"), join_type=JoinType.JOIN), - ), - ] - ) - - assert len(plan.steps) == len(expected_plan.steps) - assert plan.steps == expected_plan.steps diff --git a/tests/unit/render/test_from_parser.py b/tests/unit/render/test_from_parser.py deleted file mode 100644 index 444afe2e37c..00000000000 --- a/tests/unit/render/test_from_parser.py +++ /dev/null @@ -1,180 +0,0 @@ -import copy -import inspect -import pytest - -from mindsdb_sql_parser import parse_sql -from mindsdb_sql_parser.ast import Select, Constant, WindowFunction, Function - -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender -from mindsdb.integrations.utilities.query_traversal import query_traversal - - -def parse_sql2(sql, dialect="mindsdb"): - # convert to ast - query = parse_sql(sql, dialect) - - # skip - - # step1: use mysql dialect and parse again - dialect = "mysql" - if "distinct on" in sql.lower(): - dialect = "postgres" - try: - sql2 = SqlalchemyRender(dialect).get_string(query, with_failback=False) - except NotImplementedError: - # skip not implemented, immediately exit - return query - - # remove generated join condition - sql2 = sql2.replace("ON 1=1", "") - - # workarounds for joins - if "INNER JOIN" not in sql: - sql2 = sql2.replace("INNER JOIN", "JOIN") - - if "LEFT OUTER JOIN" not in sql: - sql2 = sql2.replace("LEFT OUTER JOIN", "LEFT JOIN") - - if "FULL OUTER JOIN" not in sql: - sql2 = sql2.replace("FULL OUTER JOIN", "FULL JOIN") - - for clause in ["union", "intersect", "except"]: - if f"{clause} distinct" in sql.lower() and f"{clause} distinct" not in sql2.lower(): - sql2 = sql2.lower().replace(clause, f"{clause} distinct") - - if "RIGHT JOIN" in sql: - # TODO skip now, but fix later - return query - - # cast - # TODO fix parse error 'SELECT CAST(4 AS SIGNED INTEGER)' - if " CAST(4 AS SIGNED INTEGER)" in sql2: - return query - sql2 = sql2.replace(" FLOAT", " float") - - query2 = parse_sql(sql2, "mindsdb") - - # exclude cases when sqlalchemy replaces some parts of sql - if not ( - "not a=" in sql # replaced to a!= - or "NOT col1 =" in sql # replaced to col1!= - or " || " in sql # replaced to concat( - or "current_user()" in sql # replaced to CURRENT_USER - or "user()" in sql # replaced to USER - or "not exists" in sql # replaced to not(exits( - or "WHEN R.DELETE_RULE = 'CASCADE'" in sql # wrapped in parens by sqlalchemy - ): - # sqlalchemy could add own aliases for constant - def clear_target_aliases(node, **args): - # clear target aliases - if isinstance(node, Select): - if node.targets is not None: - for target in node.targets: - if ( - isinstance(target, Constant) - or isinstance(target, Select) - or isinstance(target, WindowFunction) - or isinstance(target, Function) - ): - target.alias = None - - # clear subselect alias - if isinstance(node.from_table, Select): - node.from_table.alias = None - - query_ = copy.deepcopy(query) - query_traversal(query_, clear_target_aliases) - query_traversal(query2, clear_target_aliases) - - # and compare with ast before render - repr1, repr2 = query2.to_tree(), query_.to_tree() - if "unbounded preceding" in repr2: - # sqlalchemy changes case - assert repr1.lower() == repr2.lower() - else: - assert repr1 == repr2 - - # step 2: render to different dialects - dialects = ("postgresql", "sqlite", "mssql", "oracle") - - for dialect2 in dialects: - try: - SqlalchemyRender(dialect2).get_string(query, with_failback=False) - except Exception as e: - # skips for dialects - if dialect2 == "oracle" and "does not support in-place multirow inserts" in str(e): - pass - elif ( - dialect2 == "mssql" - and "requires an order_by when using an OFFSET or a non-simple LIMIT clause" in str(e) - ): - pass - elif dialect2 == "sqlite" and "extract(MONTH" in sql: - pass - else: - print(dialect2, query.to_string()) - raise - - # keep original behavior - return query - - -class TestFromParser: - def test_from_parser(self, pytestconfig): - try: - from parser_tests.tests.test_base_sql import ( - test_select_operations, - test_delete, - test_insert, - test_select_common_table_expression, - test_select_structure, - test_union, - test_misc_sql_queries, - ) - - except ImportError as e: - print( - "Unable to import render's tests. Make sure they are in the mindsdb folder. It can be done by:" - "- git clone https://github.com/mindsdb/mindsdb_sql_parser.git parser_tests" - f"\nError: {e}" - ) - if pytestconfig.getoption("runslow") is True: - pytest.fail("Failing on above error because --runslow option is set.") - pytest.skip("Parser tests not found") - - modules = ( - test_select_operations, - test_delete, - test_insert, - test_select_common_table_expression, - test_select_structure, - test_union, - test_misc_sql_queries, - ) - for module in modules: - # inject function - module.parse_sql = parse_sql2 - - for class_name, klass in inspect.getmembers(module, predicate=inspect.isclass): - if not class_name.startswith("Test"): - continue - - tests = klass() - for test_name, test_method in inspect.getmembers(tests, predicate=inspect.ismethod): - if ( - not test_name.startswith("test_") - or test_name.endswith("_error") - or test_name.endswith("_render_skip") - ): - continue - if test_name == "test_mixed_join": - # FIXME alchemy can't render it - continue - - sig = inspect.signature(test_method) - args = [] - # add dialect - if "dialect" in sig.parameters: - args.append("mysql") - - test_method(*args) diff --git a/tests/unit/render/test_sqlalchemyrender.py b/tests/unit/render/test_sqlalchemyrender.py deleted file mode 100644 index fd01bd4b2f1..00000000000 --- a/tests/unit/render/test_sqlalchemyrender.py +++ /dev/null @@ -1,237 +0,0 @@ -import datetime as dt -from textwrap import dedent - -from mindsdb_sql_parser.ast import ( - Identifier, - Select, - Star, - Constant, - Tuple, - BinaryOperation, - CreateTable, - TableColumn, - Insert, -) -from mindsdb_sql_parser import parse_sql -from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender - - -class TestMysqlRender: - def test_create_table(self): - query = CreateTable( - name="tbl1", - columns=[ - TableColumn(name="a", type="DATE"), - TableColumn(name="b", type="INTEGER"), - ], - ) - - sql = SqlalchemyRender("mysql").get_string(query, with_failback=False) - - sql2 = """CREATE TABLE tbl1 (a DATE, b INTEGER)""" - - assert sql.replace("\n", "").replace("\t", "").replace(" ", " ") == sql2 - - def test_datetype(self): - query = Select(targets=[Constant(value=dt.datetime(2011, 1, 1))]) - - sql = SqlalchemyRender("mysql").get_string(query, with_failback=False) - - sql2 = """SELECT '2011-01-01 00:00:00' AS `2011-01-01 00:00:00`""" - assert sql == sql2 - - query = Select( - targets=[Star()], - from_table=Identifier("tb1"), - where=BinaryOperation( - op="in", - args=[ - Identifier("x"), - Tuple(items=[Constant(value=dt.datetime(2011, 1, 1)), Constant(value=dt.datetime(2011, 1, 2))]), - ], - ), - ) - sql = SqlalchemyRender("mysql").get_string(query, with_failback=False) - - sql2 = """SELECT * FROM tb1 WHERE x IN ('2011-01-01 00:00:00', '2011-01-02 00:00:00')""" - assert sql.replace("\n", "").replace("\t", "").replace(" ", " ") == sql2 - - def test_exec_params(self): - values = [ - [1, "2"], - [3, "b"], - ] - - query = Insert( - table=Identifier("tbl1"), - columns=[ - Identifier("a"), - Identifier("b"), - ], - values=values, - is_plain=True, - ) - - sql, params = SqlalchemyRender("mysql").get_exec_params(query, with_failback=False) - - assert sql == """INSERT INTO tbl1 (a, b) VALUES (%s, %s)""" - assert params == values - - -class TestPostgresRender: - def test_alias_in_case(self): - sql = """ - select case mean when 0 then null else stdev/mean end cov from table1 - """ - - query = parse_sql(sql) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - # check queries are the same after render - assert str(query) == str(parse_sql(rendered)) - - def test_extra_cast_in_division(self): - sql = """ - select a / b as col1 from table1 - """ - - query = parse_sql(sql) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - # check queries are the same after render - assert str(query) == str(parse_sql(rendered)) - - def test_quoted_mixed_case(self): - query = Select(targets=[Identifier("Test", alias=Identifier("Test2"))]) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - assert rendered == "SELECT Test AS Test2" - - query = Select(targets=[Identifier("table")]) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - assert rendered == 'SELECT "table"' - - def test_star_in_path(self): - sql = "select t.* from table t" - - query = parse_sql(sql) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - # check queries are the same after render - assert str(query) == str(parse_sql(rendered)) - - def test_div(self): - sql0 = "select 1 / 2 - (9 / 4 - 1) * 3 as x" - query = parse_sql(sql0) - - sql = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - assert sql.lower() == sql0 - - def test_quoted_identifier(self): - sql = "SELECT `A`.*, A.`B` AS `Bb`, `c` as Cc FROM Tbl.`Tab` AS `Tt`" - - query = parse_sql(sql) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - # check queries are the same after render - assert rendered.replace("\n", "") == 'SELECT "A".*, A."B" AS "Bb", "c" AS Cc FROM Tbl."Tab" AS "Tt"' - - def test_intersect_except(self): - for op in ("EXCEPT", "INTERSECT"): - sql = dedent(f""" - SELECT * FROM tbl1 - {op} SELECT * FROM tbl2 - """).strip() - - query = parse_sql(sql) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - assert rendered.replace("\n", "") == sql.replace("\n", " ") - - def test_in_with_single_value(self): - sql = "SELECT * FROM tbl1 WHERE x IN (1)" - query = parse_sql(sql) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - assert rendered.replace("\n", "") == sql - - def test_join(self): - sql = """ - SELECT * FROM tbl1 - {JOIN} tbl2 ON tbl1.x = tbl2.x - """ - for input_join_type, output_join_type in [ - ("JOIN", "JOIN"), - ("INNER JOIN", "JOIN"), - ("LEFT JOIN", "LEFT OUTER JOIN"), - ("LEFT OUTER JOIN", "LEFT OUTER JOIN"), - # ('RIGHT JOIN', 'RIGHT OUTER JOIN'), - # ('RIGHT OUTER JOIN', 'RIGHT OUTER JOIN'), - ]: - original_query = sql.format(JOIN=input_join_type) - query = parse_sql(original_query) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - assert " ".join(rendered.split()) == " ".join(sql.format(JOIN=output_join_type).split()) - - def test_mixed_join(self): - sql = """ - SELECT * FROM tbl1 - join tbl2 on tbl1.x = tbl2.x, - tbl3 - """ - query = parse_sql(sql) - rendered = SqlalchemyRender("postgres").get_string(query, with_failback=False) - - expected = dedent(""" - SELECT * FROM tbl1 - JOIN tbl2 ON tbl1.x = tbl2.x - JOIN tbl3 ON 1=1 - """).strip() - - assert rendered.replace("\n", "") == expected.replace("\n", " ") - - def test_group_by_rollup(self): - # test statements wth GROUP BY ROLLUP - sql = "SELECT * FROM tbl1 GROUP BY a, b WITH ROLLUP" - ast = parse_sql(sql) - - assert ast.group_by[-1].with_rollup is True - - rendered = SqlalchemyRender("postgres").get_string(ast, with_failback=False) - expected = "SELECT * FROM tbl1 GROUP BY ROLLUP(a, b)" - assert rendered.replace("\n", "").replace(" ", " ").upper() == expected.upper() - - rendered = SqlalchemyRender("mysql").get_string(ast, with_failback=False) - expected = "SELECT * FROM tbl1 GROUP BY a, b WITH ROLLUP" - assert rendered.replace("\n", "").replace(" ", " ").upper() == expected.upper() - - # renderer for 'oracle' is not explicetly specified - should be rollup() in result - rendered = SqlalchemyRender("oracle").get_string(ast, with_failback=False) - expected = "SELECT * FROM tbl1 GROUP BY ROLLUP(a, b)" - assert rendered.replace("\n", "").replace(" ", " ").upper() == expected.upper() - - # try query with differ ending - sql = "SELECT * FROM tbl1 GROUP BY a, b WITH ROLLUP LIMIT 100" - ast = parse_sql(sql) - - assert ast.group_by[-1].with_rollup is True - - rendered = SqlalchemyRender("postgres").get_string(ast, with_failback=False) - expected = "SELECT * FROM tbl1 GROUP BY ROLLUP(a, b) LIMIT 100" - assert rendered.replace("\n", "").replace(" ", " ").upper() == expected.upper() - - -class TestMSSQLRender: - def test_mixed_join(self): - sql = """ - select * from car_info order by year limit 10 offset 1 - """ - query = parse_sql(sql) - rendered = SqlalchemyRender("mssql").get_string(query, with_failback=False) - - expected = dedent(""" - SELECT * FROM car_info ORDER BY year - OFFSET 1 ROWS FETCH FIRST 10 ROWS ONLY - """).strip() - - assert rendered.replace("\n", "") == expected.replace("\n", " ") diff --git a/tests/unit/test_passthrough.py b/tests/unit/test_passthrough.py deleted file mode 100644 index 9b86bc9f7ee..00000000000 --- a/tests/unit/test_passthrough.py +++ /dev/null @@ -1,400 +0,0 @@ -"""Unit tests for PassthroughMixin.""" - -import unittest -from unittest.mock import MagicMock, patch - -from mindsdb.integrations.libs.passthrough import ( - PassthroughMixin, - REDACTED_SENTINEL, -) -from mindsdb.integrations.libs.passthrough_types import ( - HostNotAllowedError, - PassthroughConfigError, - PassthroughRequest, - PassthroughValidationError, -) - - -class _FakeHandler(PassthroughMixin): - """Minimal handler stub for exercising the mixin.""" - - _bearer_token_arg = "api_key" - _base_url_default = "https://api.example.com" - _test_request = PassthroughRequest(method="GET", path="/me") - - def __init__(self, connection_data: dict): - self.name = "fake_ds" - self.connection_data = connection_data - - -def _mock_response(status_code=200, body=b'{"ok":true}', headers=None, content_type="application/json"): - """Return a mock requests.Response exposing the bits the mixin uses.""" - resp = MagicMock() - resp.status_code = status_code - resp.headers = {"Content-Type": content_type, **(headers or {})} - resp.iter_content = MagicMock(return_value=iter([body])) - resp.close = MagicMock() - return resp - - -class PassthroughHappyPathTests(unittest.TestCase): - def setUp(self): - self.handler = _FakeHandler({"api_key": "secret-token-abcdef1234567890"}) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_injects_bearer_and_uses_default_base_url(self, mock_request): - mock_request.return_value = _mock_response() - resp = self.handler.api_passthrough(PassthroughRequest("GET", "/me")) - - self.assertEqual(resp.status_code, 200) - self.assertEqual(resp.body, {"ok": True}) - - args, kwargs = mock_request.call_args - self.assertEqual(args[0], "GET") - self.assertEqual(args[1], "https://api.example.com/me") - self.assertEqual(kwargs["headers"]["Authorization"], "Bearer secret-token-abcdef1234567890") - self.assertEqual(kwargs["headers"]["X-Minds-Passthrough"], "1") - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_user_base_url_overrides_default(self, mock_request): - self.handler.connection_data["base_url"] = "https://api.eu.example.com" - mock_request.return_value = _mock_response() - self.handler.api_passthrough(PassthroughRequest("GET", "/me")) - self.assertEqual(mock_request.call_args[0][1], "https://api.eu.example.com/me") - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_query_params_forwarded(self, mock_request): - mock_request.return_value = _mock_response() - self.handler.api_passthrough(PassthroughRequest("GET", "/x", query={"a": "1"})) - self.assertEqual(mock_request.call_args.kwargs["params"], {"a": "1"}) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_json_body_forwarded(self, mock_request): - mock_request.return_value = _mock_response() - self.handler.api_passthrough(PassthroughRequest("POST", "/x", body={"name": "foo"})) - self.assertEqual(mock_request.call_args.kwargs["json"], {"name": "foo"}) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_default_headers_merged(self, mock_request): - self.handler.connection_data["default_headers"] = {"Accept": "application/json"} - mock_request.return_value = _mock_response() - self.handler.api_passthrough(PassthroughRequest("GET", "/x")) - self.assertEqual(mock_request.call_args.kwargs["headers"]["Accept"], "application/json") - - -class PassthroughHeaderFilteringTests(unittest.TestCase): - def setUp(self): - self.handler = _FakeHandler({"api_key": "secret-token-abcdef1234567890"}) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_caller_cannot_override_authorization(self, mock_request): - mock_request.return_value = _mock_response() - self.handler.api_passthrough( - PassthroughRequest("GET", "/x", headers={"Authorization": "Bearer hijack", "Cookie": "s=1"}) - ) - outgoing = mock_request.call_args.kwargs["headers"] - self.assertEqual(outgoing["Authorization"], "Bearer secret-token-abcdef1234567890") - self.assertNotIn("Cookie", outgoing) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_proxy_headers_stripped(self, mock_request): - mock_request.return_value = _mock_response() - self.handler.api_passthrough(PassthroughRequest("GET", "/x", headers={"Proxy-Authorization": "hijack"})) - outgoing = mock_request.call_args.kwargs["headers"] - self.assertNotIn("Proxy-Authorization", outgoing) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_hop_by_hop_response_headers_stripped(self, mock_request): - mock_request.return_value = _mock_response( - headers={"Connection": "close", "X-Safe": "1", "Transfer-Encoding": "chunked"} - ) - resp = self.handler.api_passthrough(PassthroughRequest("GET", "/x")) - self.assertNotIn("Connection", resp.headers) - self.assertNotIn("Transfer-Encoding", resp.headers) - self.assertEqual(resp.headers.get("X-Safe"), "1") - - -class PassthroughHostAllowlistTests(unittest.TestCase): - def test_rejects_host_outside_allowlist(self): - handler = _FakeHandler( - { - "api_key": "t", - "base_url": "https://api.example.com", - "allowed_hosts": ["api.example.com"], - } - ) - # Direct host check using a bad URL - with self.assertRaises(HostNotAllowedError): - handler._check_host_allowed("evil.com") - - def test_wildcard_allows_any_host(self): - handler = _FakeHandler( - { - "api_key": "t", - "base_url": "https://api.example.com", - "allowed_hosts": ["*"], - } - ) - handler._check_host_allowed("evil.com") # must not raise - - def test_private_ip_rejected_by_default(self): - handler = _FakeHandler({"api_key": "t", "base_url": "http://10.0.0.1"}) - with self.assertRaises(HostNotAllowedError): - handler._check_host_allowed("10.0.0.1") - - def test_private_ip_allowed_when_explicitly_listed(self): - handler = _FakeHandler( - { - "api_key": "t", - "base_url": "http://10.0.0.1", - "allowed_hosts": ["10.0.0.1"], - } - ) - # Explicitly allowlisted private IP should still be rejected — the - # mixin treats explicit private-IP allowlisting as a foot-gun that - # requires the "*" escape hatch. Document this behavior. - with self.assertRaises(HostNotAllowedError): - handler._check_host_allowed("10.0.0.1") - - def test_loopback_rejected_with_wildcard_when_asterisk_not_used(self): - handler = _FakeHandler( - { - "api_key": "t", - "base_url": "http://127.0.0.1", - "allowed_hosts": ["127.0.0.1"], - } - ) - with self.assertRaises(HostNotAllowedError): - handler._check_host_allowed("127.0.0.1") - - -class PassthroughValidationTests(unittest.TestCase): - def test_missing_bearer_raises(self): - handler = _FakeHandler({}) # no api_key - with self.assertRaises(PassthroughConfigError): - handler.api_passthrough(PassthroughRequest("GET", "/me")) - - def test_missing_base_url_raises(self): - class NoDefault(_FakeHandler): - _base_url_default = None - - handler = NoDefault({"api_key": "t"}) - with self.assertRaises(PassthroughConfigError): - handler.api_passthrough(PassthroughRequest("GET", "/me")) - - def test_path_must_start_with_slash(self): - handler = _FakeHandler({"api_key": "t"}) - with self.assertRaises(PassthroughValidationError): - handler.api_passthrough(PassthroughRequest("GET", "me")) - - def test_method_allowlist(self): - handler = _FakeHandler({"api_key": "t"}) - with self.assertRaises(PassthroughValidationError): - handler.api_passthrough(PassthroughRequest("TRACE", "/me")) - - -class PassthroughSecretScrubTests(unittest.TestCase): - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_token_scrubbed_from_json_body(self, mock_request): - token = "secret-token-abcdef1234567890" - # Non-UTF-8 byte (0xFF) positioned adjacent to the token. Spec §7.6 - # mandates byte-level scrubbing: if the scrub ran after a - # errors="replace" decode, U+FFFD insertions would risk fragmenting - # a token mid-match. Byte-level scrub avoids that entirely. - body = b'{"error":"Invalid token ' + token.encode("utf-8") + b' \xff trailing"}' - handler = _FakeHandler({"api_key": token}) - # Use plain-text content-type so the non-UTF-8 body survives without - # a json.loads detour; the scrub is still invoked. - mock_request.return_value = _mock_response(body=body, content_type="text/plain") - - resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) - # Token must not survive anywhere in the body. - self.assertNotIn(token, str(resp.body)) - self.assertIn(REDACTED_SENTINEL, str(resp.body)) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_token_scrubbed_from_headers(self, mock_request): - token = "secret-token-abcdef1234567890" - handler = _FakeHandler({"api_key": token}) - mock_request.return_value = _mock_response( - headers={"X-Debug-Auth": f"Bearer {token}"}, - ) - resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) - self.assertIn(REDACTED_SENTINEL, resp.headers["X-Debug-Auth"]) - self.assertNotIn(token, resp.headers["X-Debug-Auth"]) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_long_default_header_values_scrubbed(self, mock_request): - token = "secret-token-abcdef1234567890" - long_secret = "x" * 32 - handler = _FakeHandler( - { - "api_key": token, - "default_headers": {"X-Api-Secondary": long_secret}, - } - ) - mock_request.return_value = _mock_response(body=('{"echoed":"' + long_secret + '"}').encode("utf-8")) - resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) - self.assertEqual(resp.body["echoed"], REDACTED_SENTINEL) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_token_scrubbed_in_nested_json_without_corrupting_structure(self, mock_request): - token = "secret-token-abcdef1234567890" - handler = _FakeHandler({"api_key": token}) - body = ('{"data": {"nested": {"token": "' + token + '"}}}').encode("utf-8") - mock_request.return_value = _mock_response(body=body) - - resp = handler.api_passthrough(PassthroughRequest("GET", "/x")) - - # Structure preserved: dict-of-dict-of-dict with the expected keys. - self.assertIsInstance(resp.body, dict) - self.assertIsInstance(resp.body["data"], dict) - self.assertIsInstance(resp.body["data"]["nested"], dict) - self.assertEqual(set(resp.body.keys()), {"data"}) - self.assertEqual(set(resp.body["data"].keys()), {"nested"}) - self.assertEqual(set(resp.body["data"]["nested"].keys()), {"token"}) - # Value redacted at the leaf; token does not survive anywhere. - self.assertEqual(resp.body["data"]["nested"]["token"], REDACTED_SENTINEL) - self.assertNotIn(token, str(resp.body)) - - -class PassthroughTestEndpointTests(unittest.TestCase): - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_returns_ok_on_200(self, mock_request): - handler = _FakeHandler({"api_key": "t"}) - mock_request.return_value = _mock_response(status_code=200) - result = handler.test_passthrough() - self.assertTrue(result["ok"]) - self.assertEqual(result["status_code"], 200) - self.assertEqual(result["host"], "api.example.com") - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_returns_auth_failed_on_401(self, mock_request): - handler = _FakeHandler({"api_key": "t"}) - mock_request.return_value = _mock_response(status_code=401) - result = handler.test_passthrough() - self.assertFalse(result["ok"]) - self.assertEqual(result["error_code"], "auth_failed") - - def test_returns_not_implemented_when_no_test_request(self): - class NoTest(_FakeHandler): - _test_request = None - - handler = NoTest({"api_key": "t"}) - result = handler.test_passthrough() - self.assertFalse(result["ok"]) - self.assertEqual(result["error_code"], "not_implemented") - - -class PassthroughAllowedMethodsTests(unittest.TestCase): - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_rejects_method_not_in_allowed_methods(self, mock_request): - handler = _FakeHandler( - { - "api_key": "t", - "allowed_methods": ["GET"], - } - ) - mock_request.return_value = _mock_response() - - with self.assertRaises(PassthroughValidationError) as cm: - handler.api_passthrough(PassthroughRequest("POST", "/x")) - - self.assertEqual(cm.exception.error_code, "method_not_allowed") - self.assertEqual(cm.exception.http_status, 405) - mock_request.assert_not_called() - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_all_methods_allowed_when_config_absent(self, mock_request): - handler = _FakeHandler({"api_key": "t"}) - mock_request.return_value = _mock_response() - - for method in ("GET", "POST", "PUT", "PATCH", "DELETE"): - mock_request.reset_mock() - mock_request.return_value = _mock_response() - handler.api_passthrough(PassthroughRequest(method, "/x")) - self.assertEqual(mock_request.call_args[0][0], method) - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_string_allowed_methods_raises_config_error(self, mock_request): - handler = _FakeHandler({"api_key": "t", "allowed_methods": "GET"}) - - with self.assertRaises(PassthroughConfigError): - handler.api_passthrough(PassthroughRequest("GET", "/x")) - - mock_request.assert_not_called() - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_lowercase_allowed_methods_normalized(self, mock_request): - handler = _FakeHandler({"api_key": "t", "allowed_methods": ["get"]}) - mock_request.return_value = _mock_response() - - # GET passes after uppercase normalization. - handler.api_passthrough(PassthroughRequest("GET", "/x")) - self.assertEqual(mock_request.call_args[0][0], "GET") - - mock_request.reset_mock() - # POST is rejected with method_not_allowed. - with self.assertRaises(PassthroughValidationError) as cm: - handler.api_passthrough(PassthroughRequest("POST", "/x")) - self.assertEqual(cm.exception.error_code, "method_not_allowed") - self.assertEqual(cm.exception.http_status, 405) - mock_request.assert_not_called() - - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_unknown_verb_in_allowed_methods_raises_config_error(self, mock_request): - handler = _FakeHandler({"api_key": "t", "allowed_methods": ["GET", "TRACE"]}) - - with self.assertRaises(PassthroughConfigError) as cm: - handler.api_passthrough(PassthroughRequest("GET", "/x")) - self.assertIn("TRACE", str(cm.exception)) - mock_request.assert_not_called() - - -class PassthroughAuthHeaderOverrideTests(unittest.TestCase): - @patch("mindsdb.integrations.libs.passthrough.requests.request") - def test_custom_auth_header_name_and_format(self, mock_request): - class ShopifyLikeHandler(_FakeHandler): - _auth_header_name = "X-Shopify-Access-Token" - _auth_header_format = "{token}" - - handler = ShopifyLikeHandler({"api_key": "shpat_abc123"}) - mock_request.return_value = _mock_response() - - handler.api_passthrough(PassthroughRequest("GET", "/x")) - - outgoing = mock_request.call_args.kwargs["headers"] - # Custom header present, with raw token (no "Bearer " prefix). - self.assertEqual(outgoing["X-Shopify-Access-Token"], "shpat_abc123") - # Default Authorization header must NOT be added when the handler - # overrides the auth header name. - self.assertNotIn("Authorization", outgoing) - - -class PassthroughProtocolTests(unittest.TestCase): - def test_non_mixin_class_satisfies_protocol(self): - from mindsdb.integrations.libs.passthrough import PassthroughProtocol - from mindsdb.integrations.libs.passthrough_types import PassthroughResponse - - class ManualHandler: - def api_passthrough(self, req: PassthroughRequest) -> PassthroughResponse: - return PassthroughResponse(status_code=200, headers={}, body=None, content_type=None) - - def test_passthrough(self) -> dict: - return {"ok": True} - - self.assertIsInstance(ManualHandler(), PassthroughProtocol) - - def test_class_missing_methods_fails_protocol(self): - from mindsdb.integrations.libs.passthrough import PassthroughProtocol - - class Incomplete: - def api_passthrough(self, req): ... - - # missing test_passthrough - - self.assertNotIsInstance(Incomplete(), PassthroughProtocol) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/utilities/ml_task_queue/test_ml_task_queue.py b/tests/unit/utilities/ml_task_queue/test_ml_task_queue.py deleted file mode 100644 index 1880c90fc6d..00000000000 --- a/tests/unit/utilities/ml_task_queue/test_ml_task_queue.py +++ /dev/null @@ -1,139 +0,0 @@ -import json -import os - -import pytest -from walrus import Database - -from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE -from mindsdb.utilities.ml_task_queue.const import TASKS_STREAM_NAME - -from tests.integration.utils.http_test_helpers import HTTPHelperMixin -from tests.integration.conftest import HTTP_API_ROOT - -REDIS_HOST = os.environ.get("INTERNAL_URL", "").replace("mindsdb", "redis-master") - - -@pytest.mark.skipif("localhost" in HTTP_API_ROOT or "127.0.0.1" in HTTP_API_ROOT, reason="Requires redis") -class TestMLTaskQueue(HTTPHelperMixin): - def test_redis_connection(self): - db = Database(protocol=3, host=REDIS_HOST) - db.ping() - - def test_create_model(self, train_finetune_lock): - """1. create db connection - 2. create test dataset - 3. start to train model in 'async' mode: check status - 4. start to train model in 'sync' mode: check status - 5. await model 2 is finished - 6. 2 messages in redis stream - """ - - db_details = { - "type": "postgres", - "connection_data": { - "type": "postgres", - "host": "samples.mindsdb.com", - "port": "5432", - "user": "demo_user", - "password": "demo_password", - "database": "demo", - }, - } - - self.sql_via_http("DROP MODEL IF EXISTS p_test_queue_async;", RESPONSE_TYPE.OK) - self.sql_via_http("DROP MODEL IF EXISTS p_test_queue_sync;", RESPONSE_TYPE.OK) - - query = f""" - CREATE DATABASE IF NOT EXISTS test_demo_queue - WITH ENGINE = 'postgres', - PARAMETERS = {json.dumps(db_details["connection_data"])}; - """ - self.sql_via_http(query, RESPONSE_TYPE.OK) - - with train_finetune_lock.acquire(timeout=600): - query = """ - create predictor p_test_queue_async - from test_demo_queue (select sqft, location, rental_price from demo_data.home_rentals limit 30) - predict rental_price; - """ - response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response["data"][0][response["column_names"].index("STATUS")] - assert status in ("generating", "training") - - query = """ - create predictor p_test_queue_sync - from test_demo_queue (select sqft, location, rental_price from demo_data.home_rentals limit 30) - predict rental_price - USING join_learn_process=true; - """ - response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response["data"][0][response["column_names"].index("STATUS")] - assert status == "complete" - - status = self.await_model("p_test_queue_async") - assert status == "complete" - - db = Database(protocol=3, host=REDIS_HOST) - assert TASKS_STREAM_NAME in db.keys() - - assert db.type(TASKS_STREAM_NAME) == b"stream" - xlen = db.xlen(TASKS_STREAM_NAME) - if xlen != 0: - lol = db.xrange(TASKS_STREAM_NAME) - assert False, "Caught non-zero length ml queue: " + str(lol) - assert db.xlen(TASKS_STREAM_NAME) == 0 - - def test_predict(self): - """make predict queries to both trained models""" - - query = """ - SELECT rental_price, - rental_price_explain - FROM mindsdb.p_test_queue_async - WHERE b = 10; - """ - response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - assert len(response["data"]) == 1 - assert len(response["data"][0]) == 2 - - query = """ - SELECT rental_price, - rental_price_explain - FROM test_demo_queue.demo_data.home_rentals - JOIN mindsdb.p_test_queue_async - LIMIT 3; - """ - response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - assert len(response["data"]) == 3 - assert len(response["data"][0]) == 2 - - db = Database(protocol=3, host=REDIS_HOST) - assert db.xlen(TASKS_STREAM_NAME) == 0 - - def test_finetune(self, train_finetune_lock): - """check that finetune is working""" - - with train_finetune_lock.acquire(timeout=600): - query = """ - FINETUNE p_test_queue_sync - FROM test_demo_queue (SELECT * FROM demo_data.home_rentals LIMIT 10) - USING join_learn_process=true; - """ - response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response["data"][0][response["column_names"].index("STATUS")] - assert status == "complete" - - query = """ - FINETUNE p_test_queue_async - FROM test_demo_queue (SELECT * FROM demo_data.home_rentals LIMIT 10); - """ - response = self.sql_via_http(query, RESPONSE_TYPE.TABLE) - status = response["data"][0][response["column_names"].index("STATUS")] - # FINETUNE in this case may be very fast, so add 'complete' to check - assert status in ("generating", "training", "complete") - - status = self.await_model("p_test_queue_async", version_number=2) - assert status == "complete" - - db = Database(protocol=3, host=REDIS_HOST) - assert db.xlen(TASKS_STREAM_NAME) == 0 diff --git a/tests/unit/utilities/test_community_handler_fetcher.py b/tests/unit/utilities/test_community_handler_fetcher.py deleted file mode 100644 index 3fd1ad3b942..00000000000 --- a/tests/unit/utilities/test_community_handler_fetcher.py +++ /dev/null @@ -1,302 +0,0 @@ -import shutil -import tempfile -import unittest -from pathlib import Path -from unittest.mock import MagicMock, patch - -import requests - -from mindsdb.integrations.utilities.community_handler_fetcher import ( - _fetch_tree_recursive, - _resolve_tree_sha, - fetch_handler, -) - -REPO = "mindsdb/mindsdb-community-handlers" -BRANCH = "main" -PATH_PREFIX = "community_handlers" -HANDLER = "elasticsearch_handler" -REMOTE_PREFIX = f"{PATH_PREFIX}/{HANDLER}" -TREE_SHA = "abc123deadbeef" - - -def _make_response(status_code=200, json_data=None, raise_for_status=None): - """Helper: build a MagicMock that looks like a requests.Response.""" - resp = MagicMock() - resp.status_code = status_code - resp.json.return_value = json_data if json_data is not None else {} - resp.text = "" - if raise_for_status is not None: - resp.raise_for_status.side_effect = raise_for_status - else: - resp.raise_for_status.return_value = None - return resp - - -def _make_get_side_effect(contents_resp, trees_resp, raw_resp=None): - """Return a side_effect callable that dispatches mocked responses by URL.""" - - def _get(url, **kwargs): - if "git/trees" in url: - return trees_resp - if "raw.githubusercontent.com" in url: - return raw_resp if raw_resp is not None else _make_response(200, b"") - # Contents API (resolve SHA or other) - return contents_resp - - return _get - - -class TestResolveTreSha(unittest.TestCase): - """Unit tests for _resolve_tree_sha().""" - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_returns_sha_when_found(self, mock_get): - parent_listing = [ - {"name": "other_handler", "type": "dir", "sha": "000"}, - {"name": HANDLER, "type": "dir", "sha": TREE_SHA}, - ] - mock_get.return_value = _make_response(200, parent_listing) - - result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) - - self.assertEqual(result, TREE_SHA) - mock_get.assert_called_once() - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_returns_none_on_404(self, mock_get): - mock_get.return_value = _make_response(404) - - result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) - - self.assertIsNone(result) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_returns_none_when_dir_not_in_listing(self, mock_get): - parent_listing = [{"name": "other_handler", "type": "dir", "sha": "000"}] - mock_get.return_value = _make_response(200, parent_listing) - - result = _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) - - self.assertIsNone(result) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_raises_on_non_200_non_404(self, mock_get): - mock_get.return_value = _make_response(503) - - with self.assertRaises(RuntimeError): - _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_raises_on_network_error(self, mock_get): - mock_get.side_effect = requests.RequestException("timeout") - - with self.assertRaises(RuntimeError): - _resolve_tree_sha(REPO, BRANCH, REMOTE_PREFIX, {}) - - -class TestFetchTreeRecursive(unittest.TestCase): - """Unit tests for _fetch_tree_recursive().""" - - def setUp(self): - self.tmp = Path(tempfile.mkdtemp()) - - def tearDown(self): - shutil.rmtree(self.tmp, ignore_errors=True) - - def _trees_response(self, entries, truncated=False): - return _make_response(200, {"sha": TREE_SHA, "tree": entries, "truncated": truncated}) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_downloads_flat_and_nested_files(self, mock_get): - tree_entries = [ - {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10}, - {"path": "elasticsearch_handler.py", "type": "blob", "sha": "s2", "size": 500}, - {"path": "tests", "type": "tree", "sha": "s3"}, - {"path": "tests/__init__.py", "type": "blob", "sha": "s4", "size": 0}, - {"path": "tests/test_elasticsearch_handler.py", "type": "blob", "sha": "s5", "size": 1107}, - ] - trees_resp = self._trees_response(tree_entries) - raw_resp = _make_response(200) - raw_resp.content = b"# file content" - - def _get(url, **kwargs): - if "git/trees" in url: - return trees_resp - return raw_resp - - mock_get.side_effect = _get - - count = _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}) - - self.assertEqual(count, 4) - self.assertTrue((self.tmp / "__init__.py").exists()) - self.assertTrue((self.tmp / "elasticsearch_handler.py").exists()) - self.assertTrue((self.tmp / "tests" / "__init__.py").exists()) - self.assertTrue((self.tmp / "tests" / "test_elasticsearch_handler.py").exists()) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_max_depth_enforcement(self, mock_get): - # depth 4 means path has 4 slashes → 5 components → should be skipped - deep_path = "a/b/c/d/e.py" - self.assertEqual(deep_path.count("/"), 4) # 4 >= max_depth=4 → skipped - - tree_entries = [ - {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 0}, - {"path": deep_path, "type": "blob", "sha": "s2", "size": 99}, - ] - trees_resp = self._trees_response(tree_entries) - raw_resp = _make_response(200) - raw_resp.content = b"" - - def _get(url, **kwargs): - if "git/trees" in url: - return trees_resp - return raw_resp - - mock_get.side_effect = _get - - count = _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}, max_depth=4) - - self.assertEqual(count, 1) - self.assertTrue((self.tmp / "__init__.py").exists()) - self.assertFalse((self.tmp / deep_path).exists()) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_truncated_tree_logs_warning(self, mock_get): - trees_resp = self._trees_response([], truncated=True) - mock_get.return_value = trees_resp - - with self.assertLogs("mindsdb.integrations.utilities.community_handler_fetcher", level="WARNING") as cm: - _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}) - - self.assertTrue(any("truncated" in line for line in cm.output)) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_raises_on_file_download_failure(self, mock_get): - tree_entries = [{"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10}] - trees_resp = self._trees_response(tree_entries) - raw_resp = _make_response(500) - raw_resp.raise_for_status.side_effect = requests.HTTPError("500 Server Error") - - def _get(url, **kwargs): - if "git/trees" in url: - return trees_resp - return raw_resp - - mock_get.side_effect = _get - - with self.assertRaises(RuntimeError): - _fetch_tree_recursive(REPO, BRANCH, TREE_SHA, REMOTE_PREFIX, self.tmp, {}) - - -class TestFetchHandler(unittest.TestCase): - """Integration-style unit tests for fetch_handler().""" - - def setUp(self): - self.storage = Path(tempfile.mkdtemp()) - - def tearDown(self): - shutil.rmtree(self.storage, ignore_errors=True) - - def _parent_listing(self): - return [{"name": HANDLER, "type": "dir", "sha": TREE_SHA}] - - def _tree_entries(self): - return [ - {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 10}, - {"path": "tests/__init__.py", "type": "blob", "sha": "s2", "size": 0}, - {"path": "tests/test_elasticsearch_handler.py", "type": "blob", "sha": "s3", "size": 1107}, - ] - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_handler_with_subdirectories(self, mock_get): - contents_resp = _make_response(200, self._parent_listing()) - trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": self._tree_entries(), "truncated": False}) - raw_resp = _make_response(200) - raw_resp.content = b"# content" - - mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp) - - result = fetch_handler(HANDLER, self.storage) - - dest = self.storage / HANDLER - self.assertEqual(result, dest) - self.assertTrue((dest / "__init__.py").exists()) - self.assertTrue((dest / "tests" / "__init__.py").exists()) - self.assertTrue((dest / "tests" / "test_elasticsearch_handler.py").exists()) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_404_handler_not_found(self, mock_get): - mock_get.return_value = _make_response(404) - - result = fetch_handler(HANDLER, self.storage) - - self.assertIsNone(result) - # tmp dir must not be left behind - self.assertFalse((self.storage / f".tmp_{HANDLER}").exists()) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_atomic_rename_cleanup_on_failure(self, mock_get): - contents_resp = _make_response(200, self._parent_listing()) - trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": self._tree_entries(), "truncated": False}) - # Simulate a download failure for raw files - raw_resp = _make_response(500) - raw_resp.raise_for_status.side_effect = requests.HTTPError("500") - - mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp) - - with self.assertRaises(RuntimeError): - fetch_handler(HANDLER, self.storage) - - # tmp dir must be cleaned up after the exception - self.assertFalse((self.storage / f".tmp_{HANDLER}").exists()) - # dest dir must not exist either - self.assertFalse((self.storage / HANDLER).exists()) - - def test_existing_handler_skips_fetch(self): - dest = self.storage / HANDLER - dest.mkdir(parents=True) - (dest / "__init__.py").write_text("# existing") - - with patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") as mock_get: - result = fetch_handler(HANDLER, self.storage) - - self.assertEqual(result, dest) - mock_get.assert_not_called() - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_max_depth_files_not_written(self, mock_get): - deep_path = "a/b/c/d/deep.py" - tree_entries = [ - {"path": "__init__.py", "type": "blob", "sha": "s1", "size": 0}, - {"path": deep_path, "type": "blob", "sha": "s2", "size": 99}, - ] - contents_resp = _make_response(200, self._parent_listing()) - trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": tree_entries, "truncated": False}) - raw_resp = _make_response(200) - raw_resp.content = b"" - - mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp, raw_resp) - - fetch_handler(HANDLER, self.storage) - - dest = self.storage / HANDLER - self.assertTrue((dest / "__init__.py").exists()) - self.assertFalse((dest / deep_path).exists()) - - @patch("mindsdb.integrations.utilities.community_handler_fetcher.requests.get") - def test_truncated_tree_warning_propagates(self, mock_get): - contents_resp = _make_response(200, self._parent_listing()) - trees_resp = _make_response(200, {"sha": TREE_SHA, "tree": [], "truncated": True}) - - mock_get.side_effect = _make_get_side_effect(contents_resp, trees_resp) - - with self.assertLogs("mindsdb.integrations.utilities.community_handler_fetcher", level="WARNING") as cm: - fetch_handler(HANDLER, self.storage) - - self.assertTrue(any("truncated" in line for line in cm.output)) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/utilities/test_config.py b/tests/unit/utilities/test_config.py deleted file mode 100644 index d5bd93d46b7..00000000000 --- a/tests/unit/utilities/test_config.py +++ /dev/null @@ -1,61 +0,0 @@ -import os -import json -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -from mindsdb.utilities.config import Config - - -class TestConfig: - """Tests for Config class""" - - def test_invalid_mindsdb_db_con_raises_error(self): - """Test that invalid MINDSDB_DB_CON value raises ValueError with helpful message""" - # Reset the singleton instance before test - Config._Config__instance = None - - with tempfile.TemporaryDirectory() as tmpdir: - config_file = Path(tmpdir) / "config.json" - config_file.write_text(json.dumps({})) - - invalid_db_con = "invalid_connection_string" - - with patch.dict( - os.environ, - { - "MINDSDB_CONFIG_PATH": str(config_file), - "MINDSDB_STORAGE_DIR": tmpdir, - "MINDSDB_DB_CON": invalid_db_con, - }, - clear=False, - ): - # Should raise ValueError with helpful message - with pytest.raises(ValueError) as exc_info: - Config() - - error_message = str(exc_info.value) - assert "Invalid MINDSDB_DB_CON value" in error_message - assert invalid_db_con in error_message - - def test_knowledge_bases_storage_env_does_not_override_storage_config(self): - Config._Config__instance = None - - with tempfile.TemporaryDirectory() as tmpdir: - config_file = Path(tmpdir) / "config.json" - config_file.write_text(json.dumps({})) - - with patch.dict( - os.environ, - { - "MINDSDB_CONFIG_PATH": str(config_file), - "MINDSDB_STORAGE_DIR": tmpdir, - "KNOWLEDGE_BASES_STORAGE": "faiss, pgvector", - }, - clear=False, - ): - cfg = Config() - - assert cfg["knowledge_bases"]["storage"] is None diff --git a/tests/unit/utilities/test_json_encoder.py b/tests/unit/utilities/test_json_encoder.py deleted file mode 100644 index 661b7141c19..00000000000 --- a/tests/unit/utilities/test_json_encoder.py +++ /dev/null @@ -1,48 +0,0 @@ -from datetime import datetime, date, timedelta -from decimal import Decimal - -import orjson -import pandas as pd -import numpy as np - -from mindsdb.utilities.json_encoder import CustomJSONEncoder - - -DEFAULT = CustomJSONEncoder().default - - -def dumps(payload): - return orjson.dumps( - payload, - default=DEFAULT, - option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_NON_STR_KEYS | orjson.OPT_PASSTHROUGH_DATETIME, - ).decode("utf-8") - - -def test_date_and_datetime_and_timedelta(): - s = dumps( - { - "d": date(2024, 7, 9), - "dt": datetime(2024, 7, 9, 1, 2, 3, 0), - "td": timedelta(hours=1, minutes=2, seconds=3), - } - ) - assert '"d":"2024-07-09"' in s - assert '"dt":"2024-07-09 01:02:03.000000"' in s - assert '"td":"1:02:03"' in s - - -def test_pandas_na_serializes_to_null(): - s = dumps({"x": pd.NA}) - assert '"x":null' in s - - -def test_decimal_serialization_to_number(): - # Our default maps Decimal to float - s = dumps({"price": Decimal("12.34")}) - assert '"price":12.34' in s - - -def test_numpy_array(): - s = dumps({"name": np.array(["x", "y", "z"], dtype="object")}) - assert s == '{"name":["x","y","z"]}' diff --git a/tests/unit/utilities/test_log_sanitizer.py b/tests/unit/utilities/test_log_sanitizer.py deleted file mode 100644 index 26fd51a0663..00000000000 --- a/tests/unit/utilities/test_log_sanitizer.py +++ /dev/null @@ -1,80 +0,0 @@ -import io -import logging -import pytest - -from mindsdb.utilities.log import StreamSanitizingHandler - - -MASK = "********" -SECRET = "Pa$Sw0rd" - - -class TestStreamSanitizingHandler: - """Test StreamSanitizingHandler class""" - - @pytest.fixture - def logger_with_handler(self): - """Create logger with StreamSanitizingHandler and string buffer""" - logger = logging.getLogger("test_logger") - logger.handlers.clear() - logger.setLevel(logging.INFO) - - # Create string buffer to capture output - stream = io.StringIO() - handler = StreamSanitizingHandler(stream) - handler.setFormatter(logging.Formatter("%(message)s")) - logger.addHandler(handler) - - return logger, stream - - def test_handler_sanitizes_string_message(self, logger_with_handler): - """Test that handler sanitizes string messages""" - logger, stream = logger_with_handler - - logger.info(f"Login with password={SECRET}") - logger.info(f'CREATE DATABASE test WITH PARAMETERS={{"ToKeN": "{SECRET}"}}') - logger.info(f"CREATE DATABASE test USING api_KEY = {SECRET}") - output = stream.getvalue() - - assert SECRET not in output - assert MASK in output - - def test_handler_sanitizes_dict_message(self, logger_with_handler): - """Test that handler sanitizes dictionary messages""" - logger, stream = logger_with_handler - - logger.info({"user": "john", "password": SECRET, "token": SECRET}) - output = stream.getvalue() - - assert SECRET not in output - assert MASK in output - - def test_handler_sanitizes_formatted_message(self, logger_with_handler): - """Test that handler sanitizes formatted messages with args""" - logger, stream = logger_with_handler - - logger.info("CREATE MODEL test WITH password = %s", SECRET) - output = stream.getvalue() - - assert SECRET not in output - assert MASK in output - - def test_normal_messages(self, logger_with_handler): - """Test that handler preserves non-sensitive messages""" - logger, stream = logger_with_handler - - logger.info('CREATE MODEL test WITH ENGINE = "%s"', "postgres") - output = stream.getvalue() - - assert output.strip("\n") == 'CREATE MODEL test WITH ENGINE = "postgres"' - assert MASK not in output - - def test_handler_sanitizes_multiple_sensitive_keys(self, logger_with_handler): - """Test that handler sanitizes multiple types of sensitive data""" - logger, stream = logger_with_handler - - logger.info(f"Connecting: password={SECRET}, api_key = {SECRET}, token: {SECRET}") - output = stream.getvalue() - - assert SECRET not in output - assert MASK in output diff --git a/tests/unit/utilities/test_pid_file.py b/tests/unit/utilities/test_pid_file.py deleted file mode 100644 index 17acdc93f9e..00000000000 --- a/tests/unit/utilities/test_pid_file.py +++ /dev/null @@ -1,305 +0,0 @@ -import os -import json -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -from mindsdb.utilities.fs import create_pid_file, delete_pid_file - - -class TestCreatePidFile: - """Tests for create_pid_file function""" - - @pytest.fixture - def temp_dir(self): - """Create a temporary directory for PID files""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) - - @pytest.fixture - def mock_tmp_dir(self, temp_dir): - """Mock get_tmp_dir to return our temp directory""" - with patch("mindsdb.utilities.fs.get_tmp_dir", return_value=temp_dir): - yield temp_dir - - def test_does_nothing_when_use_pidfile_not_set(self, mock_tmp_dir): - """Test that function does nothing when USE_PIDFILE env var is not '1'""" - with patch.dict(os.environ, {"USE_PIDFILE": "0"}, clear=False): - create_pid_file({}) - - pid_file = mock_tmp_dir / "pid" - assert not pid_file.exists() - - def test_does_nothing_when_use_pidfile_missing(self, mock_tmp_dir): - """Test that function does nothing when USE_PIDFILE env var is missing""" - env_copy = os.environ.copy() - env_copy.pop("USE_PIDFILE", None) - with patch.dict(os.environ, env_copy, clear=True): - create_pid_file({}) - - pid_file = mock_tmp_dir / "pid" - assert not pid_file.exists() - - def test_creates_pid_file_when_not_exists(self, mock_tmp_dir): - """Test that PID file is created when it doesn't exist""" - config = { - "api": {"http": {"host": "127.0.0.1", "port": 47334}}, - "auth": {"username": "mindsdb", "password": "secret"}, - "pid_file_content": { - "http_host": "api.http.host", - "http_port": "api.http.port", - "username": "auth.username", - "password": "auth.password", - }, - } - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - create_pid_file(config) - - pid_file = mock_tmp_dir / "pid" - assert pid_file.exists() - - data = json.loads(pid_file.read_text()) - assert data["pid"] == os.getpid() - assert data["http_host"] == "127.0.0.1" - assert data["http_port"] == 47334 - assert data["username"] == "mindsdb" - assert data["password"] == "secret" - - def test_creates_pid_file_with_empty_config(self, mock_tmp_dir): - """Test that PID file is created with only PID number when pid_file_content is None""" - config = {"pid_file_content": None} - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - create_pid_file(config) - - pid_file = mock_tmp_dir / "pid" - assert pid_file.exists() - - # Should be just a number, not JSON - content = pid_file.read_text() - assert content == str(os.getpid()) - - def test_removes_invalid_json_pid_file(self, mock_tmp_dir): - """Test that PID file with invalid JSON is removed and recreated""" - pid_file = mock_tmp_dir / "pid" - pid_file.write_text("not valid json") - - config = {"pid_file_content": None} - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - create_pid_file(config) - - assert pid_file.exists() - content = pid_file.read_text() - assert int(content) == os.getpid() - - def test_removes_pid_file_with_nonexistent_process(self, mock_tmp_dir): - """Test that PID file with non-existent process is removed and recreated""" - pid_file = mock_tmp_dir / "pid" - # Use a very high PID that's unlikely to exist - old_data = {"pid": 999999999, "http_host": "old_host", "http_port": 12345} - pid_file.write_text(json.dumps(old_data)) - - config = { - "api": {"http": {"host": "new_host", "port": 54321}}, - "pid_file_content": {"http_host": "api.http.host", "http_port": "api.http.port"}, - } - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - create_pid_file(config) - - data = json.loads(pid_file.read_text()) - assert data["pid"] == os.getpid() - assert data["http_host"] == "new_host" - assert data["http_port"] == 54321 - - def test_raises_exception_when_process_exists(self, mock_tmp_dir): - """Test that exception is raised when PID file points to existing process""" - pid_file = mock_tmp_dir / "pid" - # Use current process PID to simulate existing process - old_data = {"pid": os.getpid()} - pid_file.write_text(json.dumps(old_data)) - - config = {"pid_file_content": None} - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - with pytest.raises(Exception, match="Found PID file with existing process"): - create_pid_file(config) - - def test_creates_pid_file_with_empty_pid_file_content(self, mock_tmp_dir): - """Test that PID file is created with only PID number when pid_file_content is empty dict""" - config = {"pid_file_content": {}} - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - create_pid_file(config) - - pid_file = mock_tmp_dir / "pid" - assert pid_file.exists() - - # Should be just a number, not JSON - content = pid_file.read_text() - assert content == str(os.getpid()) - - def test_creates_pid_file_with_json_content(self, mock_tmp_dir): - """Test that PID file is created with JSON when pid_file_content has fields""" - config = { - "api": {"http": {"host": "0.0.0.0", "port": 47334}}, - "pid_file_content": { - "http_host": "api.http.host", - "http_port": "api.http.port", - }, - } - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - create_pid_file(config) - - pid_file = mock_tmp_dir / "pid" - assert pid_file.exists() - - # Should be JSON - data = json.loads(pid_file.read_text()) - assert data["pid"] == os.getpid() - assert data["http_host"] == "0.0.0.0" - assert data["http_port"] == 47334 - - def test_creates_pid_file_with_none_values_in_json(self, mock_tmp_dir): - """Test that PID file JSON includes None for missing config values""" - config = { - "pid_file_content": { - "http_host": "api.http.host", - "missing_value": "path.to.missing.value", - } - } - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - create_pid_file(config) - - pid_file = mock_tmp_dir / "pid" - assert pid_file.exists() - - # Should be JSON with None values for missing paths - data = json.loads(pid_file.read_text()) - assert data["pid"] == os.getpid() - assert data["http_host"] is None - assert data["missing_value"] is None - - def test_raises_exception_when_process_exists_with_simple_pid(self, mock_tmp_dir): - """Test that exception is raised when PID file contains just a number of existing process""" - pid_file = mock_tmp_dir / "pid" - # Use current process PID to simulate existing process (as simple number) - pid_file.write_text(str(os.getpid())) - - config = {"pid_file_content": None} - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - with pytest.raises(Exception, match="Found PID file with existing process"): - create_pid_file(config) - - -class TestDeletePidFile: - """Tests for delete_pid_file function""" - - @pytest.fixture - def temp_dir(self): - """Create a temporary directory for PID files""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) - - @pytest.fixture - def mock_tmp_dir(self, temp_dir): - """Mock get_tmp_dir to return our temp directory""" - with patch("mindsdb.utilities.fs.get_tmp_dir", return_value=temp_dir): - yield temp_dir - - def test_does_nothing_when_use_pidfile_not_set(self, mock_tmp_dir): - """Test that function does nothing when USE_PIDFILE env var is not '1'""" - pid_file = mock_tmp_dir / "pid" - pid_file.write_text(json.dumps({"pid": os.getpid()})) - - with patch.dict(os.environ, {"USE_PIDFILE": "0"}, clear=False): - delete_pid_file() - - # File should still exist - assert pid_file.exists() - - def test_does_nothing_when_use_pidfile_missing(self, mock_tmp_dir): - """Test that function does nothing when USE_PIDFILE env var is missing""" - pid_file = mock_tmp_dir / "pid" - pid_file.write_text(json.dumps({"pid": os.getpid()})) - - env_copy = os.environ.copy() - env_copy.pop("USE_PIDFILE", None) - with patch.dict(os.environ, env_copy, clear=True): - delete_pid_file() - - # File should still exist - assert pid_file.exists() - - def test_does_nothing_when_pid_file_not_exists(self, mock_tmp_dir): - """Test that function does nothing when PID file doesn't exist""" - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - # Should not raise any exception - delete_pid_file() - - def test_deletes_pid_file_when_pid_matches(self, mock_tmp_dir): - """Test that PID file is deleted when PID matches current process""" - pid_file = mock_tmp_dir / "pid" - pid_file.write_text(json.dumps({"pid": os.getpid()})) - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - delete_pid_file() - - assert not pid_file.exists() - - def test_does_not_delete_when_pid_mismatch(self, mock_tmp_dir): - """Test that PID file is not deleted when PID doesn't match""" - pid_file = mock_tmp_dir / "pid" - # Use a different PID - other_pid = os.getpid() + 1 - pid_file.write_text(json.dumps({"pid": other_pid})) - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - delete_pid_file() - - # File should still exist - assert pid_file.exists() - - def test_handles_invalid_json_gracefully(self, mock_tmp_dir): - """Test that invalid JSON in PID file is handled gracefully""" - pid_file = mock_tmp_dir / "pid" - pid_file.write_text("not valid json") - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - # Should not raise exception, function logs a warning and removes corrupted file - delete_pid_file() - - # Corrupted PID file should be removed - assert not pid_file.exists() - - def test_deletes_pid_file_with_simple_pid_when_matches(self, mock_tmp_dir): - """Test that PID file with simple PID number is deleted when it matches current process""" - pid_file = mock_tmp_dir / "pid" - # Write just the PID as a number - pid_file.write_text(str(os.getpid())) - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - delete_pid_file() - - # File should be deleted since PID matches - assert not pid_file.exists() - - def test_does_not_delete_simple_pid_when_mismatch(self, mock_tmp_dir): - """Test that PID file with simple PID number is not deleted when PID doesn't match""" - pid_file = mock_tmp_dir / "pid" - # Write a different PID as a number - other_pid = os.getpid() + 1 - pid_file.write_text(str(other_pid)) - - with patch.dict(os.environ, {"USE_PIDFILE": "1"}, clear=False): - delete_pid_file() - - # File should NOT be deleted since PID doesn't match - assert pid_file.exists() diff --git a/tests/unit/various/test_arg_probing.py b/tests/unit/various/test_arg_probing.py deleted file mode 100644 index 3da3d834b12..00000000000 --- a/tests/unit/various/test_arg_probing.py +++ /dev/null @@ -1,122 +0,0 @@ -import pytest - -from mindsdb.integrations.handlers.openai_handler.openai_handler import OpenAIHandler -from mindsdb.integrations.libs.base import ArgProbeMixin - -""" -Tests for the arg probing mixin -""" - - -@pytest.fixture -def mock_handler_class(): - class MockHandler(ArgProbeMixin): - def __init__(self, **kwargs): ... - - def create(self, args): - args["test_required"] - args.get("test_optional", "default") - args.get("test_optional2") - # assign a value to a key in args should be ignored - # since it does not require read access to the argument with the key - args["test_required2"] = "default" - # write access to the argument with the key should be ignored - args.setdefault("test_optional3", "default") - - def predict(self, args): - args["test_required_at_some_point"] - args.get("test_optional", "default") - args.get("test_optional2") - args["test_required2"] = "default" # this should be ignored - args.setdefault("test_optional3", "default") # this should be ignored - # this will trigger the tracking - args.get("test_required_at_some_point", "but_not_always") - - # a read access with default value - _ = args.get("this_is_actually_required", "default") - # a read access without default value - _ = args["this_is_actually_required"] - - return MockHandler - - -@pytest.fixture -def mock_openai_handler_class(): - # let the openai handler use the arg probing mixin - class MockOpenAIHandler(OpenAIHandler, ArgProbeMixin): - def __init__(self, **kwargs): ... - - return MockOpenAIHandler - - -def test_arg_probing(mock_handler_class): - handler = mock_handler_class - - # Test create - prediction_args = handler.prediction_args() - # sort - prediction_args = sorted(prediction_args, key=lambda x: x["name"]) - - assert prediction_args == [ - { - "name": "test_optional", - "required": False, - }, - { - "name": "test_optional2", - "required": False, - }, - { - "name": "test_required_at_some_point", - "required": False, - }, - { - "name": "this_is_actually_required", - "required": False, - }, - ] - - creation_args = handler.creation_args() - # sort - creation_args = sorted(creation_args, key=lambda x: x["name"]) - assert creation_args == [ - { - "name": "test_optional", - "required": False, - }, - { - "name": "test_optional2", - "required": False, - }, - ] - - -def test_openai_handler_probing(mock_openai_handler_class): - handler = mock_openai_handler_class - - # Test create - known_args = [ - { - "name": "mode", - "required": False, - }, - { - "name": "temperature", - "required": False, - }, - ] - # Check that the known args are in the creation args - for arg in known_args: - assert arg in handler.prediction_args() - - # Check that some unknown args are not in the creation args - assert { - "name": "unknown_arg", - "required": False, - } not in handler.prediction_args() - - # inspiring example in https://github.com/mindsdb/mindsdb/issues/6846 - assert { - "name": "enigne", - "required": False, - } not in handler.prediction_args() diff --git a/tests/unit/various/test_data_handlers_cache.py b/tests/unit/various/test_data_handlers_cache.py deleted file mode 100644 index 16ba619b2cc..00000000000 --- a/tests/unit/various/test_data_handlers_cache.py +++ /dev/null @@ -1,196 +0,0 @@ -import time -import threading -from unittest.mock import patch - -import pytest - -from mindsdb.interfaces.database.data_handlers_cache import HandlersCache, HandlersCacheRecord -from mindsdb.utilities.context import context as ctx - - -class MockDatabaseHandler: - """Mock database handler for testing""" - - def __init__( - self, - name: str, - cache_thread_safe: bool = True, - cache_single_instance: bool = False, - cache_usage_lock: bool = True, - ): - self.name = name - self.cache_thread_safe = cache_thread_safe - self.cache_single_instance = cache_single_instance - self.cache_usage_lock = cache_usage_lock - self.is_connected = False - - def connect(self): - self.is_connected = True - - def disconnect(self): - self.is_connected = False - - -class TestHandlersCache: - def test_record(self): - """Test HandlersCacheRecord""" - record = HandlersCacheRecord(handler=MockDatabaseHandler("test_handler"), expired_at=time.time() + 60) - - assert record.expired is False - record.expired_at = time.time() - 1 - assert record.expired is True - - assert record.handler.is_connected is False - record.connect() - assert record.handler.is_connected is True - - assert record.has_references is False - ref = record.handler # noqa - assert record.has_references is True - ref = None # noqa - assert record.has_references is False - - def test_cache(self): - cache = HandlersCache(clean_timeout=0.1) - - def first_key(): - try: - return list(cache.handlers.keys())[0] - except Exception: - return None - - assert len(cache.handlers) == 0 - assert cache.cleaner_thread is None - - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True)) - - assert len(cache.handlers) == 1 - assert cache.cleaner_thread is not None - - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True)) - - assert len(cache.handlers) == 1 - assert first_key()[0] == "test_handler_a" - assert first_key()[1] == ctx.company_id - assert first_key()[2] == ctx.user_id - assert first_key()[3] == 0 # Thread id for thread safe handler (index 3 after name, company_id, user_id) - assert len(cache.handlers[first_key()]) == 2 - - handler_1 = cache.get("test_handler_a") - handler_2 = cache.get("test_handler_a") - handler_3 = cache.get("test_handler_a") - assert handler_1 is not None - assert handler_2 is not None - assert handler_3 is None - assert id(handler_1) != id(handler_2) - - # release handlers and try to get again - handler_1 = None - handler_2 = None - handler_1 = cache.get("test_handler_a") - assert handler_1 is not None - - # mark both as expired, and check that only one deleted (handler_1 has references, therefore is in use) - for handler_key in cache.handlers: - for record in cache.handlers[handler_key]: - record.expired_at = time.time() - 1 - - time.sleep(0.3) - assert len(cache.handlers[first_key()]) == 1 - handler_1 = None - time.sleep(0.3) - assert len(cache.handlers) == 0 - - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True)) - cache.set(MockDatabaseHandler("test_handler_b", cache_thread_safe=True)) - cache.set(MockDatabaseHandler("test_handler_c", cache_thread_safe=False)) - - # get non-thread-safe record from different threads - # and set thread-safe handler in another thread - def thread_handler(): - ctx.set_default() - handler = cache.get("test_handler_c") - assert handler is None - cache.set(MockDatabaseHandler("test_handler_c", cache_thread_safe=False)) - cache.set(MockDatabaseHandler("test_handler_d", cache_thread_safe=True)) - handler = cache.get("test_handler_c") - assert handler is not None - - t = threading.Thread(target=thread_handler) - t.start() - t.join() - - # should be 2 keys for thread_c, for different threads - keys_count = sum([1 for key in cache.handlers if key[0] == "test_handler_c"]) - assert keys_count == 2 - - # only one handler_c can be returned - from current thread - handler = cache.get("test_handler_c") - assert handler is not None - handler = cache.get("test_handler_c") - assert handler is None - - # thread-save handler created in another thread - handler = cache.get("test_handler_d") - assert handler is not None - - # check that handler b can be retrieved once, while there are another handlers records - handler_b_1 = cache.get("test_handler_b") - handler_b_2 = cache.get("test_handler_b") - assert handler_b_1 is not None - assert handler_b_2 is None - - # expire all handlers and check that they are cleared - handler = None - handler_b_1 = None - handler_b_2 = None - assert len(cache.handlers) != 0 - for handler_key in cache.handlers: - for record in cache.handlers[handler_key]: - record.expired_at = time.time() - 1 - time.sleep(0.3) - assert len(cache.handlers) == 0 - - # Test cache_usage_lock - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True, cache_usage_lock=True)) - handler_a_1 = cache.get("test_handler_a") - handler_a_2 = cache.get("test_handler_a") - assert handler_a_1 is not None - assert handler_a_2 is None - handler_a_1 = None - cache.delete("test_handler_a") - assert len(cache.handlers) == 0 - - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True, cache_usage_lock=False)) - handler_a_1 = cache.get("test_handler_a") - handler_a_2 = cache.get("test_handler_a") - assert handler_a_1 is not None - assert handler_a_2 is not None - assert handler_a_1 is handler_a_2 - cache.delete("test_handler_a") - assert len(cache.handlers) == 0 - - # Test cache_single_instance - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True, cache_single_instance=False)) - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True, cache_single_instance=False)) - assert len(cache.handlers[first_key()]) == 2 - cache.delete("test_handler_a") - assert len(cache.handlers) == 0 - - cache.set( - MockDatabaseHandler( - "test_handler_a", cache_thread_safe=True, cache_single_instance=True, cache_usage_lock=True - ) - ) - with pytest.raises(ValueError): - # can't add second instance with cache_single_instance=True - cache.set(MockDatabaseHandler("test_handler_a", cache_thread_safe=True, cache_single_instance=True)) - handler_a_1 = cache.get("test_handler_a") - assert handler_a_2 is not None - - # Mock wait_no_references (to not wait timeout while it still has references) - # and check that it is called when trying to get the handler again - with patch.object(HandlersCacheRecord, "wait_no_references", return_value=handler_a_1) as mock_wait: - handler_a_2 = cache.get("test_handler_a") - assert handler_a_2 is handler_a_1 - mock_wait.assert_called_once() diff --git a/tests/unit/various/test_document_preprocessor.py b/tests/unit/various/test_document_preprocessor.py deleted file mode 100644 index f4de1d3a4f6..00000000000 --- a/tests/unit/various/test_document_preprocessor.py +++ /dev/null @@ -1,697 +0,0 @@ -from collections import defaultdict - -import pandas as pd -import pytest -from unittest.mock import Mock, patch, AsyncMock -from mindsdb.interfaces.knowledge_base.preprocessing.models import ( - Document, - TextChunkingConfig, - ContextualConfig, -) -from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter - -# Mock all langchain imports to avoid pydantic version conflicts -with patch.dict( - "sys.modules", - { - "mindsdb.interfaces.agents.langchain_agent": Mock(), - "mindsdb.interfaces.agents.constants": Mock(), - "langchain_openai": Mock(), - }, -): - from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import ( - DocumentPreprocessor, - TextChunkingPreprocessor, - ContextualPreprocessor, - ) - -SAMPLE_DOC = """ -Federal Register, Volume 89 Issue 153 (Thursday, August 8, 2024) -[Federal Register Volume 89, Number 153 (Thursday, August 8, 2024)] -[Notices] -[Pages 64964-64965] -From the Federal Register Online via the Government Publishing Office [www.gpo.gov] -[FR Doc No: 2024-17521] -[[Page 64964]] -======================================================================= ------------------------------------------------------------------------ -NUCLEAR REGULATORY COMMISSION -[Docket No. 50-0320; NRC-2024-0099] -TMI-2SOLUTIONS, LLC; Three Mile Island Nuclear Station, Unit No. -2; Environmental Assessment and Finding of No Significant Impact -AGENCY: Nuclear Regulatory Commission. -ACTION: Notice; issuance. ------------------------------------------------------------------------ -SUMMARY: The U.S. Nuclear Regulatory Commission (NRC) is issuing a -final environmental assessment (EA) and finding of no significant -impact (FONSI) for a proposed amendment of NRC Possession Only License -(POL) DPR-73 for the Three Mile Island Nuclear Station, Unit No. 2 -(TMI-2), located in Londonderry Township, Dauphin County, Pennsylvania. -The proposed amendment would ensure that TMI-2 Energy Solutions (TMI- -2Solutions, the licensee) can continue decommissioning the facility in -accordance with NRC regulations. TMI-2Solutions will be engaging in -certain major decommissioning activities, including the physical -demolition of buildings previously deemed eligible for the National -Register of Historic Places (NRHP). The EA, ``Environmental Assessment -for Specific Decommissioning Activities at Three Mile Island, Unit 2 in -Dauphin County, Pennsylvania,'' documents the NRC staff's environmental -review of the license amendment application. -DATES: The EA and FONSI referenced in this document are available on -August 8, 2024. -ADDRESSES: Please refer to Docket ID NRC-2024-0099 when contacting the -NRC about the availability of information regarding this document. You -may obtain publicly available information related to this document -using any of the following methods: - Federal Rulemaking Website: Go to https://www.regulations.gov and search for Docket ID NRC-2024-0099. Address -questions about Docket IDs to Stacy Schumann; telephone: 301-415-0624; -email: [email protected]. For technical questions, contact the -individual listed in the FOR FURTHER INFORMATION CONTACT section of -this document. - NRC's Agencywide Documents Access and Management System -(ADAMS): You may obtain publicly available documents online in the -ADAMS Public Documents collection at https://www.nrc.gov/reading-rm/adams.html. To begin the search, select ``Begin Web-based ADAMS -Search.'' For problems with ADAMS, please contact the NRC's Public -Document Room (PDR) reference staff at 1-800-397-4209, at 301-415-4737, -or by email to [email protected]. The ADAMS accession number for -each document referenced (if it is available in ADAMS) is provided the -first time that it is mentioned in this document. - NRC's PDR: The PDR, where you may examine and order copies -of publicly available documents, is open by appointment. To make an -appointment to visit the PDR, please send an email to -[email protected] or call 1-800-397-4209 or 301-415-4737, between 8 -a.m. and 4 p.m. eastern time (ET), Monday through Friday, except -Federal holidays. - Project Website: Information related to the TMI-2 project -can be accessed on NRC's TMI-2 public website at https://www.nrc.gov/info-finder/decommissioning/power-reactor/three-mile-island-unit-2.html. -FOR FURTHER INFORMATION CONTACT: Jean Trefethen, Office of Nuclear -Material Safety and Safeguards, U.S. Nuclear Regulatory Commission, -Washington, DC 20555-0001; telephone: 301-415-0867; email: -[email protected]. -SUPPLEMENTARY INFORMATION: -I. Background - The Three Mile Island Nuclear Station (TMINS) is approximately 16 -kilometers (10 miles) southeast of Harrisburg, Pennsylvania. The TMINS -site includes Three Mile Island Nuclear Station, Unit 1 and TMI-2. It -encompasses approximately 178 hectares (440 acres), including the -adjacent islands on the north end, a strip of land on the mainland -along the eastern shore of the river, and an area on the eastern shore -of Shelley Island. The TMINS site has significance in U.S. history -because it is the site of the nation's most serious commercial nuclear -power plant accident, occurring at TMI-2. On March 28, 1979, TMI-2 -experienced an accident initiated by interruption of secondary -feedwater flow which led to a core heat up that caused fuel damage. The -partial meltdown of the reactor core led to a very small offsite -release of radioactivity. In response to this accident many changes -occurred at nuclear power plants including emergency response planning, -reactor operator training, human factors engineering, radiation -protection and heightened NRC regulatory oversight. -II. Discussion - By letter dated February 22, 2023 (ADAMS Accession No. -ML23058A064), TMI-2Solutions requested an amendment to POL No. DPR-73. -TMI-2Solutions will be engaging in certain major decommissioning -activities, including the physical demolition of buildings previously -deemed eligible for the NRHP. Because the impacts on the historic -properties from these decommissioning activities have not been -previously evaluated and are not bounded by the impact's discussion in -NUREG-0586, ``Final Generic Environmental Impact Statement on -Decommissioning of Nuclear Facilities,'' TMI-2Solutions requested an -amendment that would require evaluation of the impacts of the -decommissioning activities on the NRHP-eligible properties, in -compliance with paragraph 50.82(a)(6)(ii) of title 10 of the Code of -Federal Regulations (10 CFR). - Pursuant to 36 CFR 800.8, the NRC used its National Environmental -Policy Act process for developing the EA to facilitate consultation -pursuant to section 106 of the National Historic Preservation Act -(NHPA). - Adverse effects to historic properties would result from -decommissioning activities at TMI-2. Therefore, the NRC and consulting -parties proceeded with development of a programmatic agreement (PA) to -resolve adverse effects. The draft PA was issued for public comment -through a Federal Register notice dated March 6, 2024 (89 FR 16037). -One comment was received and considered before finalizing the PA. The -PA addresses the potential direct and indirect adverse effects from the -decommissioning activities and ensures that appropriate mitigation -measures are implemented. The NRC's EA references the final PA and, -therefore, conclude NHPA section 106 consultation. - In accordance with NRC's regulations in 10 CFR part 51, -``Environmental Protection Regulations for Domestic Licensing and -Related Regulatory Functions,'' that implement the National Environment -Protection Agency (NEPA), the NRC staff has prepared an EA documenting -its environmental review of the license amendment application. Based on -the environmental review, the NRC has made a determination that the -proposed action will not significantly affect the quality of the human -environment and that a FONSI is therefore appropriate. -III. Summary of Environmental Assessment - The EA is publicly available in ADAMS under Accession No. -ML24197A005. A summary description of the proposed action and expected -environmental impacts is provided as follows. -[[Page 64965]] -Description of the Proposed Action - The proposed action is to amend POL No. DPR-73 so that TMI- -2Solutions can continue with certain major decommissioning activities -planned under Phase 2 of its decommissioning schedule. Phase 2 -decommissioning activities include the removal of any radioactive -components in preparation for demolition of structures, decommissioning -and dismantlement of the TMI-2 site to a level that permits the release -of the site, except for an area potentially to be set aside for storage -of fuel-bearing material (small quantities of spent nuclear fuel, -damaged core material, and high-level waste) on the independent spent -fuel storage installation, backfilling of the site, license termination -plan submittal and implementation, and site restoration activities. In -order to comply with 10 CFR 50.82(a)(6)(ii), TMI-2Solutions requested -that NRC evaluate the impacts of certain major decommissioning -activities on historic and cultural resources and NRHP-eligible -properties. The definition of major decommissioning activity is in 10 -CFR 50.2, which states ``major decommissioning activity means, for a -nuclear power reactor facility, any activity that results in permanent -removal of major radioactive components, permanently modifies the -structure of the containment, or results in dismantling components for -shipment containing greater than class C waste in accordance with Sec. -61.55 of this chapter.'' Due to radioactive contamination, the TMI-2 -structures must be demolished and removed during decommissioning. -Environmental Impacts of the Proposed Action - In the EA, the staff assessed the potential environmental impacts -from the proposed license amendment to the following resource areas: -land use; visual and scenic resources; the geologic environment; -surface and groundwater resources; ecological resources; air quality; -noise; historic and cultural resources; socioeconomic conditions; -environmental justice; public and occupational health; transportation; -and waste generation and management. The NRC staff also considered the -cumulative impacts from past, present, and reasonably foreseeable -actions when combined with the proposed action. The TMI-2 Historic -District would be adversely affected by the TMI-2 decommissioning, and -adverse effects cannot be avoided. The mitigation of adverse effects to -the TMI-2 Historic District will be completed in accordance with the -TMI-2 Demolition and Decommissioning Programmatic Agreement (NRC -2024a). - As part of the NRC's consultation under section 7 of the Endangered -Species Act, NRC staff determined that the proposed action may affect -but is not likely to adversely affect the Indiana bat (Myotis sodalis), -northern long-eared bat (Myotis septentrionalis), tricolored bat -(Perimyotis subflavus), monarch butterfly (Danaus plexippus), -northeastern bulrush (Scirpus ancistrochaetus), or green floater -(Lasmigona subviridis). The NRC staff transmitted a letter to the U.S. -Fish and Wildlife Service (FWS) for its review and concurrence on May -24, 2024 (ADAMS Accession No. ML24120A324). The FWS concurred with the -NRC's findings on July 15, 2024 (ADAMS Accession No. ML24199A062). - All other potential impacts from the proposed action were -determined to be not significant, as described in the EA. The NRC staff -found that there would be no significant negative cumulative impact to -any resource area from the proposed action when added to other past, -present, and reasonably foreseeable actions. -Environmental Impacts of the Alternative to the Proposed Action - As an alternative to the proposed action, the NRC staff considered -denial of the proposed action (i.e., the ``no-action'' alternative). -Under the no-action alternative, the NRC would deny the licensee's -request to allow for the continuation of major decommissioning -activities under Phase 2. In this case, the NRC staff would not review -the historic and cultural resource impacts of the major decommissioning -activities as defined in 10 CFR 50.2 and would therefore disallow the -removal of NRHP-eligible structures and any impacts to historic and -cultural resources. However, due to the presence of radioactive -contamination, TMI-2 structures, including the NRHP-eligible -structures, must be removed during the decommissioning process. -Therefore, the NRC staff concludes that denying the amendment request -is not a reasonable alternative. -IV. Finding of No Significant Impact - In accordance with the NEPA and 10 CFR part 51, the NRC staff has -conducted an environmental review of a request for an amendment to POL -No. DPR-73. The proposed amendment would revise the POL to allow the -licensee to conduct decommissioning at TMI-2 covering activities that -were not previously addressed in the staff's environmental assessments -(site-specific historical and cultural resources). Based on its -environmental review of the proposed action, the NRC staff has made a -finding of no significant impact in the EA. Therefore, the NRC staff -has determined, pursuant to 10 CFR 51.31, that preparation of an -environmental impact statement is not required for the proposed action -and a FONSI is appropriate. - Dated: August 2, 2024. - For the Nuclear Regulatory Commission. -Christopher M. Regan, -Director, Division of Rulemaking, Environmental, and Financial Support, -Office of Nuclear Material Safety, and Safeguards. -[FR Doc. 2024-17521 Filed 8-7-24; 8:45 am] -BILLING CODE 7590-01-P -Federal Register, Volume 89 Issue 153 (Thursday, August 8, 2024) -[Federal Register Volume 89, Number 153 (Thursday, August 8, 2024)] -[Notices] -[Pages 64964-64965] -From the Federal Register Online via the Government Publishing Office [www.gpo.gov] -[FR Doc No: 2024-17521] -[[Page 64964]]""" - - -class TestDocumentPreprocessor: - def test_deterministic_id_generation(self): - """Test that ID generation is deterministic for same content""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - # Same content should generate same ID - content = "test content" - content_column = "test_column" - id1 = generate_document_id(content, content_column) - id2 = generate_document_id(content, content_column) - assert id1 == id2 - assert len(id1) == 16 # Check hash length - # Different content should generate different IDs - different_content = "different content" - id3 = generate_document_id(different_content, content_column) - assert id1 != id3 - # Test with provided_id - provided_id = "test_id" - id4 = generate_document_id(content, content_column, provided_id) - assert id4 == provided_id - - def test_chunk_id_generation(self): - """Test human-readable chunk ID generation""" - provided_id = "test_id" - content_column = "test_column" - preprocessor = DocumentPreprocessor() - - # Test with all parameters - chunk_id = preprocessor._generate_chunk_id( - chunk_index=0, - total_chunks=3, - start_char=0, - end_char=100, - provided_id=provided_id, - content_column=content_column, - ) - assert chunk_id == f"test_id:{content_column}:1of3:0to100" - - # Test error when no document ID provided - with pytest.raises(ValueError, match="Document ID must be provided"): - preprocessor._generate_chunk_id( - chunk_index=0, total_chunks=3, start_char=0, end_char=100, content_column=content_column - ) - - # Test error when no content column provided - with pytest.raises(ValueError, match="Content column must be provided"): - preprocessor._generate_chunk_id( - chunk_index=0, total_chunks=3, start_char=0, end_char=100, provided_id=provided_id - ) - - def test_split_document_without_splitter(self): - """Test that splitting without a configured splitter raises error""" - doc = Document(content="test content") - preprocessor = DocumentPreprocessor() - with pytest.raises(ValueError, match="Splitter not configured"): - preprocessor._split_document(doc) - - def test_chunk_overlap(self): - """Test chunk overlap""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - config = TextChunkingConfig(chunk_size=10, chunk_overlap=5) - preprocessor = TextChunkingPreprocessor(config) - long_content = " ".join(["word"] * 50) - doc_id = generate_document_id(long_content, "test_column") - doc = Document(content=long_content, id=doc_id) - chunks = preprocessor.process_documents([doc]) - # Ensure correct number of chunks is created - assert len(chunks) > 1 - # Validate overlap in the produced chunks - for i in range(len(chunks) - 1): - overlap_length = config.chunk_overlap - # Compare content without worrying about whitespace positioning - assert ( - chunks[i].content.strip()[-overlap_length:].strip() - == chunks[i + 1].content.strip()[:overlap_length].strip() - ) - - def test_standard_chunking_strategy(self): - """Test standard chunking strategy with different overlap values""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - content = " ".join(["word"] * 30) - doc_id = generate_document_id(content, "test_column") - doc = Document(content=content, id=doc_id) - - # Test with no overlap - config_no_overlap = TextChunkingConfig(chunk_size=10, chunk_overlap=0) - preprocessor_no_overlap = TextChunkingPreprocessor(config_no_overlap) - chunks_no_overlap = preprocessor_no_overlap.process_documents([doc]) - - # Test with medium overlap - config_medium_overlap = TextChunkingConfig(chunk_size=10, chunk_overlap=3) - preprocessor_medium_overlap = TextChunkingPreprocessor(config_medium_overlap) - chunks_medium_overlap = preprocessor_medium_overlap.process_documents([doc]) - # Test with high overlap - config_high_overlap = TextChunkingConfig(chunk_size=10, chunk_overlap=7) - preprocessor_high_overlap = TextChunkingPreprocessor(config_high_overlap) - chunks_high_overlap = preprocessor_high_overlap.process_documents([doc]) - # Verify that all chunking strategies produce multiple chunks - assert len(chunks_no_overlap) > 1 - assert len(chunks_medium_overlap) > 1 - assert len(chunks_high_overlap) > 1 - # Verify that highest overlap results in more chunks - assert len(chunks_high_overlap) >= len(chunks_medium_overlap) - # Verify that chunks with overlap have the correct overlap content - if len(chunks_medium_overlap) > 1: - for i in range(len(chunks_medium_overlap) - 1): - # Get the stripped content - current_chunk = chunks_medium_overlap[i].content.strip() - next_chunk = chunks_medium_overlap[i + 1].content.strip() - # Get the last overlap_length words of current chunk - current_end = " ".join(current_chunk.split()[-config_medium_overlap.chunk_overlap :]) - # Get the first overlap_length words of next chunk - next_start = " ".join(next_chunk.split()[: config_medium_overlap.chunk_overlap]) - # Compare the overlap content (allowing for whitespace differences) - assert current_end.strip() == next_start.strip() - - def test_parent_child_relationship(self): - """Test that parent-child relationship is preserved during processing""" - config = TextChunkingConfig(chunk_size=10, chunk_overlap=2) - preprocessor = TextChunkingPreprocessor(config) - parent_content = " ".join(["parent"] * 30) - parent_doc = Document(content=parent_content, id="parent_doc") - - # Test default behavior (delete_existing=False) - chunks = preprocessor.process_documents([parent_doc]) - # Verify that all chunks have reference to the parent document - for chunk in chunks: - assert "_original_doc_id" in chunk.metadata - assert chunk.metadata["_original_doc_id"] == "parent_doc" - # Verify chunk position metadata - assert "_start_char" in chunk.metadata - assert "_end_char" in chunk.metadata - assert chunk.metadata["_end_char"] > chunk.metadata["_start_char"] - - # Test with delete_existing=True - chunks = preprocessor.process_documents([parent_doc]) - - # Verify chunk IDs follow the new format - for i, chunk in enumerate(chunks): - chunk_id_parts = chunk.id.split(":") - assert len(chunk_id_parts) == 4 - assert chunk_id_parts[0] == "parent_doc" - assert chunk_id_parts[1] == "content" # Default content column - - def test_document_update_modes(self): - """Test document update behavior in different modes""" - config = TextChunkingConfig(chunk_size=10, chunk_overlap=2) - preprocessor = TextChunkingPreprocessor(config) - - # Create initial document - doc_id = "test_doc" - initial_content = " ".join(["initial"] * 20) - initial_doc = Document(content=initial_content, id=doc_id) - - # Test default mode (delete_existing=False) - updated_content_1 = " ".join(["updated1"] * 20) - updated_doc_1 = Document(content=updated_content_1, id=doc_id) - - # Process both versions with default settings - initial_chunks = preprocessor.process_documents([initial_doc]) - updated_chunks_1 = preprocessor.process_documents([updated_doc_1]) - - # Verify initial chunks have delete_existing=False - for chunk in initial_chunks: - assert chunk.metadata["_original_doc_id"] == doc_id - - # Verify updated chunks also have delete_existing=False - for chunk in updated_chunks_1: - assert chunk.metadata["_original_doc_id"] == doc_id - - # Test full document deletion mode (delete_existing=True) - updated_content_2 = " ".join(["updated2"] * 20) - updated_doc_2 = Document(content=updated_content_2, id=doc_id) - updated_chunks_2 = preprocessor.process_documents([updated_doc_2]) - - # Verify chunks are marked for full document deletion - for chunk in updated_chunks_2: - assert chunk.metadata["_original_doc_id"] == doc_id - - # Verify chunk IDs are properly formatted in all cases - for chunks in [initial_chunks, updated_chunks_1, updated_chunks_2]: - for i, chunk in enumerate(chunks): - chunk_id_parts = chunk.id.split(":") - assert len(chunk_id_parts) == 4 - assert chunk_id_parts[0] == doc_id - assert chunk_id_parts[1] == "content" # Default content column - - -def test_document_id_generation(): - """Test the new document ID generation logic""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - preprocessor = TextChunkingPreprocessor() - - # Test consistent base ID across different columns - content = "test content" - content_column = "test_column" - doc_id1 = generate_document_id(content, content_column) - doc_id2 = generate_document_id(content, content_column) - - # Same content should get same doc ID - assert doc_id1 == doc_id2 - # Doc ID should be 16 chars (MD5 hash truncated) + column name - assert len(doc_id1) == 16 - - # Test different content gets different IDs - different_content = "different content" - doc_id3 = generate_document_id(different_content, content_column) - assert doc_id3 != doc_id1 - - # Test provided ID is preserved - custom_id = "custom_doc_123" - doc_id4 = generate_document_id(content, content_column, custom_id) - assert doc_id4 == custom_id - - # Test chunk ID format - doc = Document(content=content, id=doc_id1) - chunks = preprocessor.process_documents([doc]) - for chunk in chunks: - # Format should be: :of:to - parts = chunk.id.split(":") - assert len(parts) == 4 - assert "of" in parts[2] - assert "to" in parts[3] - - -def test_metadata_preservation(): - """Test that metadata is preserved during processing""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - preprocessor = TextChunkingPreprocessor() - metadata = {"key": "value", "content_column": "test_column"} - content = "Test content" - doc_id = generate_document_id(content, "test_column") - doc = Document(content=content, metadata=metadata, id=doc_id) - chunks = preprocessor.process_documents([doc]) - # Verify metadata is preserved and includes source - assert chunks[0].metadata["_source"] == "TextChunkingPreprocessor" - assert chunks[0].metadata["key"] == "value" - assert chunks[0].metadata["content_column"] == "test_column" - - -def test_content_column_handling(): - """Test handling of content column in metadata""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - preprocessor = TextChunkingPreprocessor() - content = "Test content" - metadata = {"content_column": "test_column"} - doc_id = generate_document_id(content, "test_column") - doc = Document(content=content, metadata=metadata, id=doc_id) - chunks = preprocessor.process_documents([doc]) - # Verify content column is preserved in metadata - assert "content_column" in chunks[0].metadata - assert chunks[0].metadata["content_column"] == "test_column" - - -def test_provided_id_handling(): - """Test handling of provided document IDs""" - preprocessor = TextChunkingPreprocessor() - doc = Document(content="Test content", id="test_id") - chunks = preprocessor.process_documents([doc]) - # Verify provided ID is incorporated into chunk ID - assert chunks[0].metadata["_original_doc_id"] == "test_id" - - -def test_empty_content_handling(): - """Test handling of empty content""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - preprocessor = TextChunkingPreprocessor() - content = "" - doc_id = generate_document_id(content, "test_column") - doc = Document(content=content, id=doc_id) - chunks = preprocessor.process_documents([doc]) - assert len(chunks) == 0 - - -def test_whitespace_content_handling(): - """Test handling of whitespace-only content""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - preprocessor = TextChunkingPreprocessor() - content = " \n \t " - doc_id = generate_document_id(content, "test_column") - doc = Document(content=content, id=doc_id) - chunks = preprocessor.process_documents([doc]) - assert len(chunks) == 0 - - -@pytest.mark.parametrize( - "content,metadata,expected_source", - [ - ("test", None, "TextChunkingPreprocessor"), - ("test", {}, "TextChunkingPreprocessor"), - ("test", {"key": "value"}, "TextChunkingPreprocessor"), - ], -) -def test_source_metadata(content, metadata, expected_source): - """Test source metadata is correctly set""" - from mindsdb.interfaces.knowledge_base.utils import generate_document_id - - preprocessor = TextChunkingPreprocessor() - doc_id = generate_document_id(content, "test_column") - doc = Document(content=content, metadata=metadata, id=doc_id) - chunks = preprocessor.process_documents([doc]) - assert chunks[0].metadata["_source"] == expected_source - - -class TestContextualPreprocessor: - def test_source_metadata(self, preprocessor_sync): - """Test that source metadata is correctly set""" - # Create a pre-defined chunk with the correct metadata - chunk = Mock(metadata={"source": "ContextualPreprocessor"}) - # Mock the entire process_documents method - with patch.object(ContextualPreprocessor, "process_documents", return_value=[chunk]): - doc = Document(content="Test content") - chunks = preprocessor_sync.process_documents([doc]) - assert chunks[0].metadata["source"] == "ContextualPreprocessor" - - def test_prepare_prompts(self, preprocessor_sync): - chunk_contents = [f"Chunk contents {i}" for i in range(10)] - full_documents = [f"Full document {i}" for i in range(10)] - _ = preprocessor_sync._prepare_prompts(chunk_contents, full_documents) - - def test_process_documents(self, sample_document, preprocessor_sync): - """Test document processing""" - # Create pre-defined chunks - chunks = [Mock(content="Test context") for _ in range(3)] - # Mock the entire process_documents method - with patch.object(ContextualPreprocessor, "process_documents", return_value=chunks): - docs = [Document(content=sample_document, id=f"{i}") for i in range(3)] - result = preprocessor_sync.process_documents(docs) - assert len(result) > 0 - - def test_process_documents_async(self, sample_document, preprocessor_async): - """Test document processing with async LLM""" - # Create pre-defined chunks - chunks = [Mock(content="Test context") for _ in range(3)] - # Mock the entire process_documents method - with patch.object(ContextualPreprocessor, "process_documents", return_value=chunks): - docs = [Document(content=sample_document, id=f"{i}") for i in range(3)] - result = preprocessor_async.process_documents(docs) - assert len(result) > 0 - assert all(chunk.content == "Test context" for chunk in result) - - def test_process_documents_sync(self, sample_document, preprocessor_sync): - """Test document processing with sync LLM""" - # Create pre-defined chunks - chunks = [Mock(content="Test context") for _ in range(3)] - # Mock the entire process_documents method - with patch.object(ContextualPreprocessor, "process_documents", return_value=chunks): - docs = [Document(content=sample_document, id=f"{i}") for i in range(3)] - result = preprocessor_sync.process_documents(docs) - assert len(result) > 0 - assert all(chunk.content == "Test context" for chunk in result) - - @pytest.fixture - def preprocessor_async(self): - with patch( - "mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor.create_chat_model" - ) as mock_create_chat_model: - # Create a mock async LLM - class MockResponse: - def __init__(self, content): - self.content = content - - # Create a mock with async support - mock_llm = Mock() - # Add an async batch method that returns appropriate responses - mock_llm.abatch = AsyncMock(return_value=[MockResponse("Test context") for _ in range(10)]) - mock_create_chat_model.return_value = mock_llm - config = ContextualConfig( - chunk_size=100, - chunk_overlap=20, - llm_config={"model_name": "test_model", "provider": "test"}, - summarize=True, # Set summarize to True to only return context - ) - return ContextualPreprocessor(config) - - @pytest.fixture - def preprocessor_sync(self): - with patch( - "mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor.create_chat_model" - ) as mock_create_chat_model: - # Create a mock with async support - mock_llm = Mock() - # Define MockResponse class - - class MockResponse: - def __init__(self, content): - self.content = content - - # Regular batch method - mock_llm.batch = Mock(return_value=[MockResponse("Test context") for _ in range(10)]) - # Async batch method - mock_llm.abatch = AsyncMock(return_value=[MockResponse("Test context") for _ in range(10)]) - mock_create_chat_model.return_value = mock_llm - config = ContextualConfig( - chunk_size=100, - chunk_overlap=20, - llm_config={"model_name": "test_model", "provider": "test"}, - summarize=True, # Set summarize to True to only return context - ) - return ContextualPreprocessor(config) - - @pytest.fixture - def sample_document(self): - return SAMPLE_DOC - - -class TextSplitEval: - def evaluate(self, text, chunks, chunk_size): - lengths = [len(chunk) for chunk in chunks] - len_df = pd.DataFrame(lengths, columns=["length"]) - - text2 = text3 = text - sep_stat = defaultdict(int) - for chunk in chunks: - pos = text2.find(chunk) - if pos == -1: - pos = text3.find(chunk) - if pos == -1: - raise Exception("chunk is not from text:" + chunk) - print("----------------\noverlap: ", text3[pos : len(text3) - len(text2)]) - text2 = text3 - - else: - sep = text2[:pos] - - sep_stat[sep] += 1 - - text3 = text2 # previous - text2 = text2[pos + len(chunk) :] - - print("______________") - print("avg len:", len_df["length"].mean()) - print("median len:", len_df["length"].median()) - print("count:", len(len_df)) - c_75 = len(len_df[len_df["length"] > chunk_size * 0.75]) - print("count > 75%:", int(c_75 / len(len_df) * 100), "%") - print("separators using:", dict(sep_stat)) - - def eval_text_splitter(self): - chunk_size = 1000 - chunk_overlap = 200 - separators = ["\n\n", ".\n", ". ", " ", ""] - splitter = TextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=separators) - text = SAMPLE_DOC - chunks = splitter.split_text(text) - - self.evaluate(text, chunks, chunk_overlap) diff --git a/tests/unit/various/test_dump_result_set_to_mysql.py b/tests/unit/various/test_dump_result_set_to_mysql.py deleted file mode 100644 index 4bdb46de37c..00000000000 --- a/tests/unit/various/test_dump_result_set_to_mysql.py +++ /dev/null @@ -1,437 +0,0 @@ -from copy import deepcopy -import datetime -from decimal import Decimal - -import pytest -from pandas import DataFrame, NA, Timestamp - -from mindsdb.api.executor.sql_query.result_set import ResultSet -from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE, DATA_C_TYPE_MAP -from mindsdb.api.mysql.mysql_proxy.utilities.dump import dump_result_set_to_mysql - - -# Test cases without specifying column's mysql_type. -# In this case output mysql_type should be determined by the input dataframe.dtypes. -dtype_tests = [ - # Datetime types - { - 'input': [ - datetime.date(2023, 10, 15), - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - None - ], - 'dtype': 'datetime64[ns]', - 'output': [ - datetime.date(2023, 10, 15).strftime("%Y-%m-%d %H:%M:%S"), - datetime.datetime(2023, 10, 16, 12, 30).strftime("%Y-%m-%d %H:%M:%S"), - Timestamp('2023-10-15 14:30:45.123456789').strftime("%Y-%m-%d %H:%M:%S"), - None - ], - 'mysql_type': MYSQL_DATA_TYPE.DATETIME - }, - { - 'input': [ - datetime.date(2023, 10, 15), - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - datetime.time(14, 30, 45, 123456), - None - ], - 'dtype': 'object', - 'output': [ - str(datetime.date(2023, 10, 15)), - str(datetime.datetime(2023, 10, 16, 12, 30)), - str(Timestamp('2023-10-15 14:30:45.123456789')), - str(datetime.time(14, 30, 45, 123456)), - None - ], - 'mysql_type': MYSQL_DATA_TYPE.TEXT - }, - - # BOOL types - { - # None is True in dataframe with dtype=bool, we can't change it - 'input': [1, 2, 0, -1, None], - 'dtype': 'bool', - 'output': ['1', '1', '0', '1', '1'], - 'mysql_type': MYSQL_DATA_TYPE.BOOL - }, - - # FLOAT types - { - 'input': [1, 2, 3], - 'dtype': 'float64', - 'output': ['1.0', '2.0', '3.0'], - 'mysql_type': MYSQL_DATA_TYPE.FLOAT - }, { - 'input': [1.1, 2.2, 3.3, Decimal('4.4')], - 'dtype': 'float64', - 'output': ['1.1', '2.2', '3.3', '4.4'], - 'mysql_type': MYSQL_DATA_TYPE.FLOAT - }, { - 'input': [1.1, NA, None, Decimal('4.4')], - 'dtype': 'Float64', - 'output': ['1.1', None, None, '4.4'], - 'mysql_type': MYSQL_DATA_TYPE.FLOAT - }, - - # INT types - { - 'input': [1, 2, 3], - 'dtype': 'int64', - 'output': ['1', '2', '3'], - 'mysql_type': MYSQL_DATA_TYPE.INT - }, { - 'input': [1, NA, None], - 'dtype': 'Int64', - 'output': ['1', None, None], - 'mysql_type': MYSQL_DATA_TYPE.INT - }, - - # STR types - { - 'input': ['a', 1, NA, None, Decimal('4.4')], - 'dtype': 'object', - 'output': ['a', '1', None, None, '4.4'], - 'mysql_type': MYSQL_DATA_TYPE.TEXT - }, { - 'input': ['a', 1, NA, None, Decimal('4.4')], - 'dtype': 'string', - 'output': ['a', '1', None, None, '4.4'], - 'mysql_type': MYSQL_DATA_TYPE.TEXT - }, { - 'input': [1, 2, 3], - 'dtype': 'string', - 'output': ['1', '2', '3'], - 'mysql_type': MYSQL_DATA_TYPE.TEXT - }, { - 'input': [1, 2, 3], - 'dtype': 'object', - 'output': ['1', '2', '3'], - 'mysql_type': MYSQL_DATA_TYPE.TEXT - }, { - 'input': [1, NA, None], - 'dtype': 'object', - 'output': ['1', None, None], - 'mysql_type': MYSQL_DATA_TYPE.TEXT - }, { - 'input': ['1', NA, None], - 'dtype': 'object', - 'output': ['1', None, None], - 'mysql_type': MYSQL_DATA_TYPE.TEXT - }, -] - -# Test cases with specifying column's mysql_type. -# In this case output mysql_type should be the same as the input mysql_type. -column_types_tests = [] - - -def _map_test_case(test_cases: list[dict], column_types: list[MYSQL_DATA_TYPE]): - """Make test cases for each column type. Some test cases may be used with group of similar column types - (like INT, BIGINT, SMALLINT etc.), so we clone test cases for each column type. - - Args: - test_cases: List of test cases. - column_types: List of column types. - """ - for column_type in column_types: - for test_case in test_cases: - test_case = deepcopy(test_case) - test_case['column_type'] = column_type - test_case['mysql_type'] = column_type - column_types_tests.append(test_case) - - -str_test_cases = [ - { - 'input': ['a', 1, NA, None, Decimal('4.4')], - 'dtype': 'object', - 'output': ['a', '1', None, None, '4.4'] - }, { - 'input': ['a', 1, NA, None, Decimal('4.4')], - 'dtype': 'string', - 'output': ['a', '1', None, None, '4.4'] - }, { - 'input': [1, 2, 3], - 'dtype': 'string', - 'output': ['1', '2', '3'] - }, { - 'input': [1, 2, 3], - 'dtype': 'int64', - 'output': ['1', '2', '3'] - }, { - 'input': [1, 2, 3], - 'dtype': 'object', - 'output': ['1', '2', '3'] - }, { - 'input': [1, NA, None], - 'dtype': 'object', - 'output': ['1', None, None] - }, { - 'input': ['1', NA, None], - 'dtype': 'object', - 'output': ['1', None, None] - }, -] -_map_test_case(str_test_cases, [ - MYSQL_DATA_TYPE.CHAR, - MYSQL_DATA_TYPE.VARCHAR, - MYSQL_DATA_TYPE.TINYTEXT, - MYSQL_DATA_TYPE.TEXT, - MYSQL_DATA_TYPE.MEDIUMTEXT, - MYSQL_DATA_TYPE.LONGTEXT -]) - -bool_test_cases = [ - { - 'input': [1, 2, 0, -1, None], - 'dtype': 'bool', - 'output': ['1', '1', '0', '1', '1'], - }, - { - 'input': [1, 2, 0, -1, None, 'a', NA], - 'dtype': 'object', - 'output': ['1', '1', '0', '1', None, '1', None], - } -] -_map_test_case(bool_test_cases, [ - MYSQL_DATA_TYPE.BOOL, - MYSQL_DATA_TYPE.BOOLEAN -]) - -float_test_cases = [ - { - 'input': [1, 2, 3], - 'dtype': 'float64', - 'output': ['1.0', '2.0', '3.0'] - }, { - 'input': [1.1, 2.2, 3.3, Decimal('4.4')], - 'dtype': 'float64', - 'output': ['1.1', '2.2', '3.3', '4.4'] - }, { - 'input': [1.1, NA, None, Decimal('4.4')], - 'dtype': 'Float64', - 'output': ['1.1', None, None, '4.4'] - }, { - 'input': [1, 2, 3], - 'dtype': 'object', - 'output': ['1', '2', '3'] - }, { - 'input': [1.1, 2.2, 3.3, Decimal('4.4')], - 'dtype': 'object', - 'output': ['1.1', '2.2', '3.3', '4.4'] - }, { - 'input': [1.1, NA, None, Decimal('4.4')], - 'dtype': 'object', - 'output': ['1.1', None, None, '4.4'] - } -] -_map_test_case(float_test_cases, [ - MYSQL_DATA_TYPE.FLOAT, - MYSQL_DATA_TYPE.DOUBLE, - MYSQL_DATA_TYPE.DECIMAL -]) - -int_test_cases = [ - { - 'input': [1, 2, -3], - 'dtype': 'int64', - 'output': ['1', '2', '-3'] - }, - { - 'input': [1, '2', -3], - 'dtype': 'object', - 'output': ['1', '2', '-3'] - }, - { - 'input': ['1', NA, None], - 'dtype': 'object', - 'output': ['1', None, None] - }, - { - 'input': ['1', NA, None], - 'dtype': 'Int64', - 'output': ['1', None, None] - } -] -_map_test_case(int_test_cases, [ - MYSQL_DATA_TYPE.TINYINT, - MYSQL_DATA_TYPE.SMALLINT, - MYSQL_DATA_TYPE.MEDIUMINT, - MYSQL_DATA_TYPE.INT, - MYSQL_DATA_TYPE.BIGINT -]) - -dt_test_cases = [ - # with TZ - { - 'input': [ - datetime.datetime(2023, 1, 1, 15, 30, 45, tzinfo=datetime.timezone(datetime.timedelta(hours=3))), - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 9:30:45.123456789', tz='Atlantic/Stanley'), # -3 - datetime.time(15, 30, 45, tzinfo=datetime.timezone(datetime.timedelta(hours=3))), - datetime.time(12, 30, 45), - None - ], - 'dtype': 'object', - 'output': [ - '12:30:45', - '12:30:00', - '12:30:45', - '12:30:45', - '12:30:45', - None - ], - 'column_type': MYSQL_DATA_TYPE.TIME, - 'mysql_type': MYSQL_DATA_TYPE.TIME - }, - { - 'input': [ - datetime.datetime(2023, 1, 1, 15, 30, 45, tzinfo=datetime.timezone(datetime.timedelta(hours=3))), - datetime.datetime(2023, 10, 16, 12, 30), - datetime.date(2023, 10, 16), - Timestamp('2023-10-15 9:30:45.123456789', tz='Atlantic/Stanley'), # -3 - None - ], - 'dtype': 'object', - 'output': [ - '2023-01-01 12:30:45', - '2023-10-16 12:30:00', - '2023-10-16 00:00:00', - '2023-10-15 12:30:45', - None - ], - 'column_type': MYSQL_DATA_TYPE.DATETIME, - 'mysql_type': MYSQL_DATA_TYPE.DATETIME - }, - # no TZ - { - 'input': [ - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - None - ], - 'dtype': 'object', - 'output': [ - datetime.datetime(2023, 10, 16, 12, 30).strftime("%H:%M:%S"), - Timestamp('2023-10-15 14:30:45.123456789').strftime("%H:%M:%S"), - None - ], - 'column_type': MYSQL_DATA_TYPE.TIME, - 'mysql_type': MYSQL_DATA_TYPE.TIME - }, - { - 'input': [ - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - None - ], - 'dtype': 'datetime64[ns]', - 'output': [ - datetime.datetime(2023, 10, 16, 12, 30).strftime("%H:%M:%S"), - Timestamp('2023-10-15 14:30:45.123456789').strftime("%H:%M:%S"), - None - ], - 'column_type': MYSQL_DATA_TYPE.TIME, - 'mysql_type': MYSQL_DATA_TYPE.TIME - }, - { - 'input': [ - datetime.date(2023, 10, 15), - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - None - ], - 'dtype': 'datetime64[ns]', - 'output': [ - datetime.date(2023, 10, 15).strftime("%Y-%m-%d"), - datetime.datetime(2023, 10, 16, 12, 30).strftime("%Y-%m-%d"), - Timestamp('2023-10-15 14:30:45.123456789').strftime("%Y-%m-%d"), - None - ], - 'column_type': MYSQL_DATA_TYPE.DATE, - 'mysql_type': MYSQL_DATA_TYPE.DATE - }, - { - 'input': [ - datetime.date(2023, 10, 15), - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - None - ], - 'dtype': 'object', - 'output': [ - datetime.date(2023, 10, 15).strftime("%Y-%m-%d"), - datetime.datetime(2023, 10, 16, 12, 30).strftime("%Y-%m-%d"), - Timestamp('2023-10-15 14:30:45.123456789').strftime("%Y-%m-%d"), - None - ], - 'column_type': MYSQL_DATA_TYPE.DATE, - 'mysql_type': MYSQL_DATA_TYPE.DATE - }, - { - 'input': [ - datetime.date(2023, 10, 15), - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - None - ], - 'dtype': 'datetime64[ns]', - 'output': [ - datetime.date(2023, 10, 15).strftime("%Y-%m-%d %H:%M:%S"), - datetime.datetime(2023, 10, 16, 12, 30).strftime("%Y-%m-%d %H:%M:%S"), - Timestamp('2023-10-15 14:30:45.123456789').strftime("%Y-%m-%d %H:%M:%S"), - None - ], - 'column_type': MYSQL_DATA_TYPE.DATETIME, - 'mysql_type': MYSQL_DATA_TYPE.DATETIME - }, - { - 'input': [ - datetime.date(2023, 10, 15), - datetime.datetime(2023, 10, 16, 12, 30), - Timestamp('2023-10-15 14:30:45.123456789'), - None - ], - 'dtype': 'object', - 'output': [ - datetime.date(2023, 10, 15).strftime("%Y-%m-%d %H:%M:%S"), - datetime.datetime(2023, 10, 16, 12, 30).strftime("%Y-%m-%d %H:%M:%S"), - Timestamp('2023-10-15 14:30:45.123456789').strftime("%Y-%m-%d %H:%M:%S"), - None - ], - 'column_type': MYSQL_DATA_TYPE.DATETIME, - 'mysql_type': MYSQL_DATA_TYPE.DATETIME - }, -] -column_types_tests += dt_test_cases - - -@pytest.mark.parametrize('test_index, test_case', enumerate(column_types_tests + dtype_tests)) -def test_mysql_dump_int(test_index: int, test_case: dict): - """Test that ResultSet.dump_to_mysql returns correct mysql types and values. - - Args: - test_index: Index of the test case. Used only in error message to identify test case. - test_case: Test case - input data, expected output data and output mysql type. - May also contain column_type to specify mysql type in input dataframe. - """ - df = DataFrame(test_case['input'], columns=['a'], dtype=test_case['dtype']) - mysql_types = None - if 'column_type' in test_case: - mysql_types = [test_case['column_type']] - rs = ResultSet.from_df(df, mysql_types=mysql_types) - df, columns = dump_result_set_to_mysql(rs) - type_attrs = DATA_C_TYPE_MAP[test_case['mysql_type']] - for result_attr, expected_attr in [('type', 'code'), ('size', 'size'), ('flags', 'flags')]: - assert columns[0][result_attr] == getattr(type_attrs, expected_attr), ( - f'Test case {test_index}: ' - f'Wrong mysql type attribute "{result_attr}" for {test_case["input"]}:{test_case["dtype"]}: ' - f'{columns[0][result_attr]} (result) != {getattr(type_attrs, expected_attr)} (expected)' - ) - for i in range(len(test_case['input'])): - assert df[0][i] == test_case['output'][i], ( - f'Test case {test_index}: Wrong cast for {test_case["input"][i]} -> {test_case["output"][i]}' - ) diff --git a/tests/unit/various/test_json_chunker.py b/tests/unit/various/test_json_chunker.py deleted file mode 100644 index 701edcf2c47..00000000000 --- a/tests/unit/various/test_json_chunker.py +++ /dev/null @@ -1,535 +0,0 @@ -import pytest -import json - -import pandas as pd - -from mindsdb.interfaces.knowledge_base.preprocessing.models import ( - Document, -) -from mindsdb.interfaces.knowledge_base.preprocessing.json_chunker import JSONChunkingConfig, JSONChunkingPreprocessor - - -class TestJSONChunker: - """Test suite for the JSON chunker""" - - def test_basic_chunking(self): - """Test basic chunking of JSON objects""" - # Create a simple JSON object - json_data = [ - { - "id": 1, - "name": "John Doe", - "skills": ["Python", "SQL", "Machine Learning"], - "contact": {"email": "john@example.com", "phone": "123-456-7890"}, - }, - { - "id": 2, - "name": "Jane Smith", - "skills": ["Java", "C++", "Data Science"], - "contact": {"email": "jane@example.com", "phone": "987-654-3210"}, - }, - ] - - # Create documents - documents = [] - for i, item in enumerate(json_data): - doc = Document(id=f"doc_{i}", content=json.dumps(item), metadata={"source": "test"}) - documents.append(doc) - - # Create preprocessor with default config - preprocessor = JSONChunkingPreprocessor() - - # Process documents - chunks = preprocessor.process_documents(documents) - - # Check results - assert len(chunks) == 2 # One chunk per JSON object - assert "John Doe" in chunks[0].content - assert "Jane Smith" in chunks[1].content - - # Check metadata - assert chunks[0].metadata["_original_doc_id"] == "doc_0" - assert chunks[1].metadata["_original_doc_id"] == "doc_1" - - # Check chunk IDs - assert chunks[0].id.startswith("doc_0:") - assert chunks[1].id.startswith("doc_1:") - - def test_flatten_nested(self): - """Test flattening of nested JSON structures""" - # Create a nested JSON object - json_data = { - "id": 1, - "name": "John Doe", - "contact": { - "email": "john@example.com", - "address": {"street": "123 Main St", "city": "Anytown", "zip": "12345"}, - }, - "experience": [ - {"company": "ABC Corp", "position": "Developer", "years": 3}, - {"company": "XYZ Inc", "position": "Senior Developer", "years": 2}, - ], - } - - # Create document - doc = Document(id="doc_1", content=json.dumps(json_data), metadata={"source": "test"}) - - # Create preprocessor with flatten_nested=True - config = JSONChunkingConfig(flatten_nested=True) - preprocessor = JSONChunkingPreprocessor(config) - - # Process document - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "contact.email: john@example.com" in chunks[0].content - assert "contact.address.street: 123 Main St" in chunks[0].content - - # Create preprocessor with flatten_nested=False - config = JSONChunkingConfig(flatten_nested=False) - preprocessor = JSONChunkingPreprocessor(config) - - # Process document - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert '"contact": {' in chunks[0].content - assert '"address": {' in chunks[0].content - - def test_chunk_by_field(self): - """Test chunking by field instead of object""" - # Create a JSON object - json_data = { - "id": 1, - "name": "John Doe", - "skills": ["Python", "SQL", "Machine Learning"], - "contact": {"email": "john@example.com", "phone": "123-456-7890"}, - "summary": "Experienced software developer with 5+ years of experience", - } - - # Create document - doc = Document(id="doc_1", content=json.dumps(json_data), metadata={"source": "test"}) - - # Create preprocessor with chunk_by_object=False - config = JSONChunkingConfig(chunk_by_object=False) - preprocessor = JSONChunkingPreprocessor(config) - - # Process document - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 5 # One chunk per field - - # Check for chunk content to verify field-based chunking - chunk_contents = [chunk.content for chunk in chunks] - - # Verify each field is in a separate chunk - assert any("id: 1" in content for content in chunk_contents) - assert any("name: John Doe" in content for content in chunk_contents) - assert any("skills:" in content for content in chunk_contents) - assert any("contact.email: john@example.com" in content for content in chunk_contents) - assert any("summary: Experienced software developer" in content for content in chunk_contents) - - # Verify metadata extraction is working - for chunk in chunks: - # Each chunk should have the same metadata extracted from the original JSON - assert chunk.metadata.get("field_name") == "John Doe" - assert chunk.metadata.get("field_id") == 1 - assert "field_summary" in chunk.metadata - - def test_include_exclude_fields(self): - """Test including and excluding specific fields""" - # Create a JSON object - json_data = { - "id": 1, - "name": "John Doe", - "skills": ["Python", "SQL", "Machine Learning"], - "contact": {"email": "john@example.com", "phone": "123-456-7890"}, - "summary": "Experienced software developer with 5+ years of experience", - } - - # Create document - doc = Document(id="doc_1", content=json.dumps(json_data), metadata={"source": "test"}) - - # Test exclude_fields - config = JSONChunkingConfig(exclude_fields=["id", "contact"]) - preprocessor = JSONChunkingPreprocessor(config) - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "id: 1" not in chunks[0].content - assert "contact.email: john@example.com" not in chunks[0].content - assert "name: John Doe" in chunks[0].content - - # Test include_fields - config = JSONChunkingConfig(include_fields=["name", "summary"]) - preprocessor = JSONChunkingPreprocessor(config) - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "name: John Doe" in chunks[0].content - assert "summary: Experienced" in chunks[0].content - assert "skills" not in chunks[0].content - assert "contact" not in chunks[0].content - - def test_utility_bill_schema(self): - """Test chunking of a utility bill JSON schema""" - # Create a utility bill JSON object based on the provided schema - utility_bill = { - "billingPeriod": "03/17/2025 - 04/14/2025", - "amountDue": 86.77, - "accountNumber": "110 167 082 509", - "dueDate": "05/09/2025", - "previousBalance": 99.51, - "payments": 99.51, - "kwhUsed": 592, - "balancesByCompany": [ - {"previousBalance": 59.39, "payments": 59.39, "currentCharges": 52.49, "amountDue": 52.49}, - {"previousBalance": 40.12, "payments": 40.12, "currentCharges": 34.28, "amountDue": 34.28}, - ], - } - - # Create document - doc = Document( - id="utility_bill_1", - content=json.dumps(utility_bill), - metadata={"source": "test", "document_type": "utility_bill"}, - ) - - # Test 1: Default chunking (chunk by object, flatten nested) - preprocessor = JSONChunkingPreprocessor() - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 # One chunk for the whole bill - assert "billingPeriod: 03/17/2025 - 04/14/2025" in chunks[0].content - assert "accountNumber: 110 167 082 509" in chunks[0].content - assert "balancesByCompany[0].previousBalance: 59.39" in chunks[0].content - assert "balancesByCompany[1].amountDue: 34.28" in chunks[0].content - - # Test 2: Chunk by field instead of object - config = JSONChunkingConfig(chunk_by_object=False) - preprocessor = JSONChunkingPreprocessor(config) - chunks = preprocessor.process_documents([doc]) - - # Check results - should have one chunk per top-level field - assert len(chunks) == 8 # One chunk per top-level field - - # Verify field names in metadata - field_names = [chunk.metadata.get("field_name") for chunk in chunks] - assert "billingPeriod" in field_names - assert "amountDue" in field_names - assert "accountNumber" in field_names - assert "balancesByCompany" in field_names - - # Test 3: Non-flattened nested structure - config = JSONChunkingConfig(flatten_nested=False) - preprocessor = JSONChunkingPreprocessor(config) - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert '"balancesByCompany": [' in chunks[0].content - assert '"previousBalance": 59.39' in chunks[0].content - assert '"currentCharges": 34.28' in chunks[0].content - - # Test 4: Include only specific fields - config = JSONChunkingConfig(include_fields=["accountNumber", "dueDate", "amountDue"]) - preprocessor = JSONChunkingPreprocessor(config) - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "accountNumber: 110 167 082 509" in chunks[0].content - assert "dueDate: 05/09/2025" in chunks[0].content - assert "amountDue: 86.77" in chunks[0].content - assert "billingPeriod" not in chunks[0].content - assert "kwhUsed" not in chunks[0].content - assert "balancesByCompany" not in chunks[0].content - - # Test 5: Include nested fields - config = JSONChunkingConfig(include_fields=["accountNumber", "balancesByCompany"], flatten_nested=False) - preprocessor = JSONChunkingPreprocessor(config) - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "accountNumber" in chunks[0].content - assert "balancesByCompany" in chunks[0].content - assert "previousBalance" in chunks[0].content # From the nested structure - assert "currentCharges" in chunks[0].content # From the nested structure - assert "billingPeriod" not in chunks[0].content - - # Test 6: Extract metadata fields - config = JSONChunkingConfig(metadata_fields=["accountNumber", "dueDate", "amountDue"]) - preprocessor = JSONChunkingPreprocessor(config) - chunks = preprocessor.process_documents([doc]) - - # Check metadata extraction - assert chunks[0].metadata.get("field_accountNumber") == "110 167 082 509" - assert chunks[0].metadata.get("field_dueDate") == "05/09/2025" - assert chunks[0].metadata.get("field_amountDue") == 86.77 - - def test_error_handling(self): - """Test handling of invalid JSON data""" - # Create an invalid JSON document - doc = Document(id="doc_error", content="This is not valid JSON", metadata={"source": "test"}) - - # Create preprocessor - preprocessor = JSONChunkingPreprocessor() - - # Process document - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "Error processing document" in chunks[0].content - assert chunks[0].id == "doc_error_error" - - def test_metadata_extraction_specific_fields(self): - """Test extracting specific fields to metadata""" - # Create a nested JSON object - json_data = { - "id": 1, - "name": "John Doe", - "contact": {"email": "john@example.com", "phone": "123-456-7890"}, - "skills": ["Python", "SQL", "Machine Learning"], - } - - # Create document - doc = Document(id="doc_1", content=json.dumps(json_data), metadata={"source": "test"}) - - # Create preprocessor with metadata_fields - config = JSONChunkingConfig(metadata_fields=["name", "contact.email", "skills"]) - preprocessor = JSONChunkingPreprocessor(config) - - # Process document - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "field_name" in chunks[0].metadata - assert chunks[0].metadata["field_name"] == "John Doe" - assert "field_contact.email" in chunks[0].metadata - assert chunks[0].metadata["field_contact.email"] == "john@example.com" - assert "field_skills" in chunks[0].metadata - assert chunks[0].metadata["field_skills"] == ["Python", "SQL", "Machine Learning"] - - def test_metadata_extraction_all_primitives(self): - """Test extracting all primitive values to metadata""" - # Create a nested JSON object - json_data = { - "id": 1, - "name": "John Doe", - "age": 30, - "is_active": True, - "contact": {"email": "john@example.com", "phone": "123-456-7890"}, - } - - # Create document - doc = Document(id="doc_1", content=json.dumps(json_data), metadata={"source": "test"}) - - # Create preprocessor with extract_all_primitives=True - config = JSONChunkingConfig(extract_all_primitives=True) - preprocessor = JSONChunkingPreprocessor(config) - - # Process document - chunks = preprocessor.process_documents([doc]) - - # Check results - assert len(chunks) == 1 - assert "field_id" in chunks[0].metadata - assert chunks[0].metadata["field_id"] == 1 - assert "field_name" in chunks[0].metadata - assert chunks[0].metadata["field_name"] == "John Doe" - assert "field_age" in chunks[0].metadata - assert chunks[0].metadata["field_age"] == 30 - assert "field_is_active" in chunks[0].metadata - assert chunks[0].metadata["field_is_active"] is True - assert "field_contact.email" in chunks[0].metadata - assert chunks[0].metadata["field_contact.email"] == "john@example.com" - assert "field_contact.phone" in chunks[0].metadata - assert chunks[0].metadata["field_contact.phone"] == "123-456-7890" - - def test_to_dataframe(self): - """Test conversion to DataFrame""" - # Create a simple JSON object - json_data = {"id": 1, "name": "John Doe"} - - # Create document - doc = Document(id="doc_1", content=json.dumps(json_data), metadata={"source": "test"}) - - # Create preprocessor - preprocessor = JSONChunkingPreprocessor() - - # Process document - chunks = preprocessor.process_documents([doc]) - - # Convert to DataFrame - df = preprocessor.to_dataframe(chunks) - - # Check results - assert isinstance(df, pd.DataFrame) - assert len(df) == 1 - assert "id" in df.columns - assert "content" in df.columns - assert "metadata" in df.columns - - def test_metadata_field_extraction(self): - """Test that metadata fields are correctly extracted from JSON content""" - # Sample CV data similar to what you're working with - cv_data = { - "_id": {"$oid": "680f82222b413ec943328950"}, - "skills": { - "soft_skills": None, - "human_languages": ["English", "French"], - "technical_skills": ["Operations Management", "Strategic Planning"], - }, - "summary": "Product Management leader with 13+ years of experience", - "metadata": {"user_id": "2024_67eee2750c5f19.58992827", "timestamp": "2025-04-28T13:26:34.474653Z"}, - "contact_information": {"name": "John Doe", "email": "john@example.com"}, - } - - # Create a document with the CV data - doc = Document(id="test_cv", content=json.dumps(cv_data)) - - # Configure the JSON chunker to extract specific metadata fields - config = JSONChunkingConfig( - # Extract these specific fields into metadata - metadata_fields=[ - "contact_information.name", # Nested field using dot notation - "contact_information.email", - "metadata.user_id", - "summary", - ], - # Other useful configuration options - chunk_by_object=True, # Process the whole object as one chunk - flatten_nested=True, # Flatten nested structures for better text representation - extract_all_primitives=False, # Don't extract all primitive values, just the ones specified - ) - - # Create the JSON chunker with the config - preprocessor = JSONChunkingPreprocessor(config) - - # Process the document - chunks = preprocessor.process_documents([doc]) - - # Verify that the metadata fields were extracted - assert len(chunks) == 1 # Should have one chunk since chunk_by_object=True - chunk = chunks[0] - - # Check that the specified fields were extracted into metadata with 'field_' prefix - assert chunk.metadata.get("field_contact_information.name") == "John Doe" - assert chunk.metadata.get("field_contact_information.email") == "john@example.com" - assert chunk.metadata.get("field_metadata.user_id") == "2024_67eee2750c5f19.58992827" - assert chunk.metadata.get("field_summary") == "Product Management leader with 13+ years of experience" - - def test_extract_all_primitives(self): - """Test that all primitive values are extracted when extract_all_primitives=True""" - # Simple JSON data - data = {"name": "John Doe", "age": 30, "is_active": True, "nested": {"key1": "value1", "key2": 42}} - - # Create a document - doc = Document(id="test_primitives", content=json.dumps(data)) - - # Configure the JSON chunker to extract all primitive values - config = JSONChunkingConfig( - extract_all_primitives=True, # Extract all primitive values - chunk_by_object=True, # Process the whole object as one chunk - flatten_nested=True, # Flatten nested structures - ) - - # Create the JSON chunker with the config - preprocessor = JSONChunkingPreprocessor(config) - - # Process the document - chunks = preprocessor.process_documents([doc]) - - # Verify that all primitive values were extracted - assert len(chunks) == 1 - chunk = chunks[0] - - # Check that all primitive fields were extracted with 'field_' prefix - assert chunk.metadata.get("field_name") == "John Doe" - assert chunk.metadata.get("field_age") == 30 - assert chunk.metadata.get("field_is_active") is True - assert chunk.metadata.get("field_nested.key1") == "value1" - assert chunk.metadata.get("field_nested.key2") == 42 - - def test_default_metadata_extraction(self): - """Test that top-level primitive fields are extracted by default when metadata_fields is empty""" - # Simple JSON data with top-level primitives - data = {"name": "John Doe", "age": 30, "is_active": True, "nested": {"key1": "value1", "key2": 42}} - - # Create a document - doc = Document(id="test_default", content=json.dumps(data)) - - # Configure the JSON chunker with default metadata_fields (empty list) - config = JSONChunkingConfig( - # metadata_fields is empty by default - chunk_by_object=True, # Process the whole object as one chunk - flatten_nested=False, # Don't flatten nested structures - ) - - # Create the JSON chunker with the config - preprocessor = JSONChunkingPreprocessor(config) - - # Process the document - chunks = preprocessor.process_documents([doc]) - - # Verify that top-level primitive fields were extracted - assert len(chunks) == 1 - chunk = chunks[0] - - # Check that top-level primitive fields were extracted with 'field_' prefix - assert chunk.metadata.get("field_name") == "John Doe" - assert chunk.metadata.get("field_age") == 30 - assert chunk.metadata.get("field_is_active") is True - - # Nested fields should not be extracted by default - assert chunk.metadata.get("field_nested.key1") is None - assert chunk.metadata.get("field_nested.key2") is None - - def test_default_metadata_extraction_complex_object(self): - """Test that all primitive fields are extracted when metadata_fields is empty and there are no top-level primitives""" - # Complex JSON data with no top-level primitives - data = { - "contact_information": {"name": "John Doe", "email": "john@example.com", "phone": "123-456-7890"}, - "skills": {"technical": ["Python", "SQL", "Machine Learning"], "languages": ["English", "Spanish"]}, - "metadata": {"user_id": "user123", "timestamp": "2025-05-12T15:50:12+03:00"}, - } - - # Create a document - doc = Document(id="test_complex", content=json.dumps(data)) - - # Configure the JSON chunker with default metadata_fields (empty list) - config = JSONChunkingConfig( - # metadata_fields is empty by default - chunk_by_object=True, # Process the whole object as one chunk - flatten_nested=True, # Flatten nested structures - ) - - # Create the JSON chunker with the config - preprocessor = JSONChunkingPreprocessor(config) - - # Process the document - chunks = preprocessor.process_documents([doc]) - - # Verify that primitive fields were extracted - assert len(chunks) == 1 - chunk = chunks[0] - - # Check that primitive fields were extracted with 'field_' prefix - assert chunk.metadata.get("field_contact_information.name") == "John Doe" - assert chunk.metadata.get("field_contact_information.email") == "john@example.com" - assert chunk.metadata.get("field_contact_information.phone") == "123-456-7890" - assert chunk.metadata.get("field_metadata.user_id") == "user123" - assert chunk.metadata.get("field_metadata.timestamp") == "2025-05-12T15:50:12+03:00" - - -if __name__ == "__main__": - pytest.main(["-xvs", __file__]) diff --git a/tests/unit/various/test_json_storage.py b/tests/unit/various/test_json_storage.py deleted file mode 100644 index 42812cbbba7..00000000000 --- a/tests/unit/various/test_json_storage.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import unittest -import tempfile - -from mindsdb.interfaces.storage import db # noqa -from mindsdb.migrations import migrate # noqa -from mindsdb.interfaces.storage.fs import RESOURCE_GROUP -from mindsdb.interfaces.storage.json import get_json_storage # noqa -from mindsdb.utilities.config import config -from mindsdb.utilities import log - -logger = log.getLogger(__name__) - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls._temp_dir = tempfile.TemporaryDirectory(prefix="test_tmp_") - os.environ["MINDSDB_STORAGE_DIR"] = cls._temp_dir.name - os.environ["MINDSDB_DB_CON"] = ( - "sqlite:///" - + os.path.join(os.environ["MINDSDB_STORAGE_DIR"], "mindsdb.sqlite3.db") - + "?check_same_thread=False&timeout=30" - ) - config.prepare_env_config() - config.merge_configs() - db.init() - migrate.migrate_to_head() - - @classmethod - def tearDownClass(cls): - if cls._temp_dir: - try: - cls._temp_dir.cleanup() - except (NotADirectoryError, PermissionError) as e: - logger.warning("Failed to cleanup temporary directory %s: %s", cls._temp_dir.name, str(e)) - except Exception as e: - raise e - - def test_1_insert(self): - storage_1 = get_json_storage(1) - storage_1["x"] = {"y": 1} - assert storage_1["x"]["y"] == 1 - assert storage_1["x"]["y"] == storage_1.get("x")["y"] - - another_storage_1 = get_json_storage(1) - assert another_storage_1["x"]["y"] == storage_1["x"]["y"] - - storage_2 = get_json_storage(2) - assert storage_2["x"] is None - - another_storage_2 = get_json_storage(2) - another_storage_2.set("x", {"y": 2}) - assert storage_2["x"]["y"] == 2 - - def test_2_resource_independent(self): - storage_1 = get_json_storage(1, resource_group=RESOURCE_GROUP.INTEGRATION) - storage_1["x"] = {"y": 1} - assert storage_1["x"]["y"] == 1 - - storage_2 = get_json_storage(1, resource_group=RESOURCE_GROUP.TAB) - assert storage_2["x"] is None - storage_2["x"] = {"y": 2} - assert storage_1["x"]["y"] != storage_2["x"]["y"] - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/various/test_knowledge_base_hybrid_search.py b/tests/unit/various/test_knowledge_base_hybrid_search.py deleted file mode 100644 index 56d69b26224..00000000000 --- a/tests/unit/various/test_knowledge_base_hybrid_search.py +++ /dev/null @@ -1,112 +0,0 @@ -from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs, FilterCondition -import pytest -from unittest.mock import patch, MagicMock -from mindsdb_sql_parser import parse_sql -import pandas as pd - -# Assume the code you provided is in a file named 'kb_table.py' -from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseTable -from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase - - -class MockVectorStoreHandler(KeywordSearchBase): - """A mock VectorStoreHandler that returns predefined data for testing.""" - - def __init__(self, vector_search_data: pd.DataFrame, keyword_search_data: pd.DataFrame = None): - self._vector_data = vector_search_data - self._keyword_data = keyword_search_data if keyword_search_data is not None else pd.DataFrame() - - def dispatch_select( - self, - query, - conditions: list[FilterCondition], - keyword_search_args: KeywordSearchArgs = None, - allowed_metadata_columns=None, - ) -> pd.DataFrame: - # Return the predefined keyword search data. - if keyword_search_args: - return self._vector_data.copy() - print("MockVectorStoreHandler.dispatch_keyword_select called!") - return self._keyword_data.copy() - - def extract_conditions(self, where_clause): - if where_clause is None: - return [] - return [ - FilterCondition(column="content", op="=", value="test query"), - FilterCondition(column="hybrid_search", op="=", value=True), - ] - - -@pytest.fixture -def mock_kb_table(): - """Pytest fixture to set up a KnowledgeBaseTable with mock dependencies.""" - - # 1. Define the fake data our mock DB will return - mock_vector_results = pd.DataFrame( - { - "id": ["doc1_chunk1", "doc2_chunk1"], - "content": ["some vector content", "more vector content"], - "metadata": [{"original_doc_id": "doc1"}, {"original_doc_id": "doc2"}], - "distance": [0.1, 0.5], - } - ) - - mock_keyword_results = pd.DataFrame( - { - "id": ["doc3_chunk1"], - "content": ["keyword match content"], - "metadata": [{"original_doc_id": "doc3"}], - "distance": [0.1], - } - ) - - # 2. Create the mock handler with the fake data - mock_db_handler = MockVectorStoreHandler( - vector_search_data=mock_vector_results, keyword_search_data=mock_keyword_results - ) - - # 3. Mock the KnowledgeBase object and session - mock_kb = MagicMock() - mock_kb.params = {} - mock_kb.vector_database_table = "mock_table" - - mock_session = MagicMock() - - # 4. Create the instance of the class we are testing - kb_table = KnowledgeBaseTable(kb=mock_kb, session=mock_session) - - # 5. Inject our mock database handler - kb_table._vector_db = mock_db_handler - - return kb_table - - -def test_hybrid_search_merges_results(mock_kb_table): - """ - Tests that select_query correctly merges vector and keyword search results - when hybrid_search is enabled. - """ - # ARRANGE - # Use patch to prevent the test from trying to create a real embedding - with patch.object(mock_kb_table, "_content_to_embeddings", return_value=[0.1, 0.2, 0.3]) as mock_embedding: - # A simple query that will trigger the logic we want to test - query_str = "SELECT * FROM my_kb WHERE content = 'test query' AND hybrid_search_alpha = 0.5" - query = parse_sql(query_str, dialect="mindsdb") - - # ACT - result_df = mock_kb_table.select_query(query) - - # ASSERT - # We expect 3 rows: 2 from vector search + 1 from keyword search - assert len(result_df) == 3 - - # Check that the embedding function was called - mock_embedding.assert_called_once_with("test query") - - # Check that the results are sorted by relevance (descending) - # Relevance is 1 / (1 + distance) - assert result_df["relevance"].is_monotonic_decreasing - - # Check that the keyword result is present - assert "doc3" in result_df["id"].values diff --git a/tests/unit/various/test_llm_utils.py b/tests/unit/various/test_llm_utils.py deleted file mode 100644 index d5df7a77e42..00000000000 --- a/tests/unit/various/test_llm_utils.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest - -from numpy import int64 -import pandas as pd - -from mindsdb.integrations.libs.llm.utils import get_completed_prompts - - -class TestLLM(unittest.TestCase): - def test_get_completed_prompts(self): - placeholder = "{{text}}" - prefix = "You are a helpful assistant. Here is the user's input:" - user_inputs = ["Hi! I would love some help.", "Hello! Are you sentient?", None] - - # send all rows at once - base_template = prefix + placeholder - df = pd.DataFrame({"text": user_inputs}) - prompts, empties = get_completed_prompts(base_template, df) - - # should detect a single missing value in the relevant column (last row) - assert empties.shape == (1,) - assert empties.dtype in [int, int64] - assert empties[0] == 2 - - # check results - for i in range(len(empties)): - # in-fill - assert prompts[0] == prefix + user_inputs[i] - - # edge case - invalid template - placeholder = "" - base_template = prefix + placeholder - df = pd.DataFrame({"text": user_inputs}) - with self.assertRaises(Exception): - get_completed_prompts(base_template, df) diff --git a/tests/unit/various/test_main.py b/tests/unit/various/test_main.py deleted file mode 100644 index 689a9eb93af..00000000000 --- a/tests/unit/various/test_main.py +++ /dev/null @@ -1,183 +0,0 @@ -import pathlib -import shutil -from unittest.mock import patch -import pytest - - -class TestMainCleanup: - @pytest.fixture - def patch_main_config(self, tmp_path, monkeypatch): - import mindsdb.__main__ as main_mod - - monkeypatch.setattr(main_mod, "config", {"paths": {"tmp": tmp_path}}) - return tmp_path, main_mod - - @pytest.fixture - def errors(self, caplog): - """Capture only ERROR logs as concatenated text""" - - class ErrorCapture: - @property - def text(self): - return "\n".join(r.getMessage() for r in caplog.records if r.levelname == "ERROR") - - caplog.clear() - caplog.set_level("ERROR") - return ErrorCapture() - - def test_cleans_files_and_dirs_but_keeps_tmp_path(self, patch_main_config): - tmp_path, main_mod = patch_main_config - (tmp_path / "a.txt").write_text("hello") - sub = tmp_path / "sub" - sub.mkdir() - (sub / "b.txt").write_text("world") - - main_mod.clean_mindsdb_tmp_dir() - - assert tmp_path.exists(), "tmp_path itself should not be deleted" - assert list(tmp_path.iterdir()) == [], "All content should be removed" - - def test_empty_directory(self, patch_main_config): - tmp_path, main_mod = patch_main_config - main_mod.clean_mindsdb_tmp_dir() - assert tmp_path.exists() - assert list(tmp_path.iterdir()) == [] - - def test_deeply_nested_directories(self, patch_main_config): - tmp_path, main_mod = patch_main_config - deep = tmp_path / "a" / "b" / "c" / "d" - deep.mkdir(parents=True) - (deep / "file.txt").write_text("deep") - - main_mod.clean_mindsdb_tmp_dir() - - assert tmp_path.exists() - assert not (tmp_path / "a").exists() - - def test_symlinks_are_handled(self, patch_main_config): - tmp_path, main_mod = patch_main_config - - external_file = tmp_path.parent / "external.txt" - external_file.write_text("external") - - (tmp_path / "link_to_external").symlink_to(external_file) - - main_mod.clean_mindsdb_tmp_dir() - - assert tmp_path.exists() - assert list(tmp_path.iterdir()) == [] - assert external_file.exists() - - external_file.unlink() - - def test_unlink_failure_continues_and_logs(self, patch_main_config, errors): - tmp_path, main_mod = patch_main_config - (tmp_path / "ok1.txt").write_text("a") - (tmp_path / "failing_file.txt").write_text("b") - (tmp_path / "ok2.txt").write_text("c") - - original_unlink = pathlib.Path.unlink - - def mock_unlink(self, *args, **kwargs): - if self.name == "failing_file.txt": - raise PermissionError("Cannot delete file") - return original_unlink(self, *args, **kwargs) - - with patch.object(pathlib.Path, "unlink", mock_unlink): - main_mod.clean_mindsdb_tmp_dir() - - txt = errors.text - assert "Failed to clean" in txt - assert "Cannot delete file" in txt - - assert not (tmp_path / "ok1.txt").exists() - assert not (tmp_path / "ok2.txt").exists() - assert (tmp_path / "failing_file.txt").exists() - - def test_rmtree_failure_continues_and_logs(self, patch_main_config, errors): - tmp_path, main_mod = patch_main_config - - (tmp_path / "file.txt").write_text("content") - (tmp_path / "failing_dir").mkdir() - (tmp_path / "another_file.txt").write_text("more content") - (tmp_path / "good_dir").mkdir() - - original_rmtree = shutil.rmtree - - def mock_rmtree(path, *args, **kwargs): - if "failing_dir" in str(path): - raise PermissionError("Cannot delete directory") - return original_rmtree(path, *args, **kwargs) - - with patch("shutil.rmtree", mock_rmtree): - main_mod.clean_mindsdb_tmp_dir() - - txt = errors.text - assert "Failed to clean" in txt - assert "Cannot delete directory" in txt - - assert not (tmp_path / "file.txt").exists() - assert not (tmp_path / "another_file.txt").exists() - assert not (tmp_path / "good_dir").exists() - assert (tmp_path / "failing_dir").exists() - - def test_mixed_failures_continue_cleanup(self, patch_main_config, errors): - tmp_path, main_mod = patch_main_config - - (tmp_path / "good_file1.txt").write_text("a") - (tmp_path / "failing_file.txt").write_text("b") - (tmp_path / "good_file2.txt").write_text("c") - (tmp_path / "failing_dir").mkdir() - (tmp_path / "good_dir").mkdir() - - original_unlink = pathlib.Path.unlink - original_rmtree = shutil.rmtree - - def mock_unlink(self, *args, **kwargs): - if self.name == "failing_file.txt": - raise PermissionError("Cannot delete file") - return original_unlink(self, *args, **kwargs) - - def mock_rmtree(path, *args, **kwargs): - if "failing_dir" in str(path): - raise PermissionError("Cannot delete directory") - return original_rmtree(path, *args, **kwargs) - - with patch.object(pathlib.Path, "unlink", mock_unlink), patch("shutil.rmtree", mock_rmtree): - main_mod.clean_mindsdb_tmp_dir() - - txt = errors.text - # We should have at least two "Failed to clean" lines (file + dir) - assert txt.count("Failed to clean") >= 2 - - assert not (tmp_path / "good_file1.txt").exists() - assert not (tmp_path / "good_file2.txt").exists() - assert not (tmp_path / "good_dir").exists() - assert (tmp_path / "failing_file.txt").exists() - assert (tmp_path / "failing_dir").exists() - - def test_nonexistent_tmp_path(self, monkeypatch): - import mindsdb.__main__ as main_mod - from pathlib import Path - - nonexistent = Path("/tmp/nonexistent_mindsdb_test_dir_12345") - assert not nonexistent.exists() - - monkeypatch.setattr(main_mod, "config", {"paths": {"tmp": nonexistent}}) - main_mod.clean_mindsdb_tmp_dir() - assert not nonexistent.exists() - - def test_logger_called_with_correct_level(self, patch_main_config): - tmp_path, main_mod = patch_main_config - (tmp_path / "failing_file.txt").write_text("content") - - original_unlink = pathlib.Path.unlink - - def mock_unlink(self, *args, **kwargs): - if self.name == "failing_file.txt": - raise PermissionError("Test error") - return original_unlink(self, *args, **kwargs) - - with patch.object(pathlib.Path, "unlink", mock_unlink), patch("mindsdb.__main__.logger") as mock_logger: - main_mod.clean_mindsdb_tmp_dir() - assert mock_logger.error.called or mock_logger.exception.called diff --git a/tests/unit/various/test_security.py b/tests/unit/various/test_security.py deleted file mode 100644 index f7132ca0c3f..00000000000 --- a/tests/unit/various/test_security.py +++ /dev/null @@ -1,294 +0,0 @@ -# NOTE: generated with llm -import pytest -from mindsdb.utilities.security import validate_urls - - -class TestValidateUrls: - """Test cases for validate_urls function""" - - def test_single_url_allowed(self): - """Test that a single allowed URL returns True""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_single_url_not_allowed(self): - """Test that a single not allowed URL returns False""" - urls = "https://malicious.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is False - - def test_single_url_disallowed(self): - """Test that a single disallowed URL returns False""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com"] - disallowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls, disallowed_urls) is False - - def test_url_allowed_but_disallowed(self): - """Test that a URL is allowed but disallowed returns False""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com"] - disallowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls, disallowed_urls) is False - - def test_url_allowed_and_not_disallowed(self): - """Test that a URL is allowed and not disallowed returns True""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com"] - disallowed_urls = ["https://malicious.com"] - assert validate_urls(urls, allowed_urls, disallowed_urls) is True - - def test_multiple_urls_some_disallowed(self): - """Test that multiple URLs with some disallowed returns False""" - urls = ["https://example.com/file1", "https://malicious.com/file2"] - allowed_urls = ["https://example.com", "https://malicious.com"] - disallowed_urls = ["https://malicious.com"] - assert validate_urls(urls, allowed_urls, disallowed_urls) is False - - def test_multiple_urls_all_disallowed(self): - """Test that multiple URLs all disallowed returns False""" - urls = ["https://example.com/file1", "https://malicious.com/file2"] - allowed_urls = ["https://example.com", "https://malicious.com"] - disallowed_urls = ["https://example.com", "https://malicious.com"] - assert validate_urls(urls, allowed_urls, disallowed_urls) is False - - def test_multiple_urls_none_disallowed(self): - """Test that multiple URLs none disallowed returns True""" - urls = ["https://example.com/file1", "https://trusted.com/file2"] - allowed_urls = ["https://example.com", "https://trusted.com"] - disallowed_urls = ["https://malicious.com"] - assert validate_urls(urls, allowed_urls, disallowed_urls) is True - - def test_empty_disallowed_urls(self): - """Test that empty disallowed_urls list returns True (allows everything)""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com"] - disallowed_urls = [] - assert validate_urls(urls, allowed_urls, disallowed_urls) is True - - def test_disallowed_urls_none(self): - """Test that None disallowed_urls returns True (allows everything)""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls, None) is True - - def test_empty_allowed_urls_and_disallowed(self): - """Test that empty allowed_urls and disallowed_urls returns True (allows everything)""" - urls = "https://any.com/file" - allowed_urls = [] - disallowed_urls = [] - assert validate_urls(urls, allowed_urls, disallowed_urls) is True - - def test_empty_allowed_urls_but_disallowed(self): - """Test that empty allowed_urls but disallowed_urls returns False""" - urls = "https://bad.com/file" - allowed_urls = [] - disallowed_urls = ["https://bad.com"] - assert validate_urls(urls, allowed_urls, disallowed_urls) is False - - def test_multiple_allowed_urls(self): - """Test with multiple allowed URLs""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com", "https://trusted.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_different_schemes(self): - """Test that different schemes are treated as different URLs""" - urls = "http://example.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is False - - def test_same_scheme_allowed(self): - """Test that same scheme is allowed""" - urls = "https://example.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_url_without_scheme_raises_exception(self): - """Test that URL without scheme raises ValueError""" - urls = "example.com/file" - allowed_urls = ["https://example.com"] - with pytest.raises(ValueError, match="URL must include protocol and host name"): - validate_urls(urls, allowed_urls) - - def test_url_without_netloc_raises_exception(self): - """Test that URL without netloc raises ValueError""" - urls = "https:///file" - allowed_urls = ["https://example.com"] - with pytest.raises(ValueError, match="URL must include protocol and host name"): - validate_urls(urls, allowed_urls) - - def test_allowed_url_without_scheme_raises_exception(self): - """Test that allowed URL without scheme raises ValueError""" - urls = "https://example.com/file" - allowed_urls = ["example.com"] - with pytest.raises(ValueError, match="URL must include protocol and host name"): - validate_urls(urls, allowed_urls) - - def test_allowed_url_without_netloc_raises_exception(self): - """Test that allowed URL without netloc raises ValueError""" - urls = "https://example.com/file" - allowed_urls = ["https://"] - with pytest.raises(ValueError, match="URL must include protocol and host name"): - validate_urls(urls, allowed_urls) - - def test_subdomain_not_allowed(self): - """Test that subdomain is not allowed unless explicitly specified""" - urls = "https://sub.example.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is False - - def test_subdomain_allowed(self): - """Test that subdomain is allowed when explicitly specified""" - urls = "https://sub.example.com/file" - allowed_urls = ["https://sub.example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_port_in_url(self): - """Test URLs with ports""" - urls = "https://example.com:8080/file" - allowed_urls = ["https://example.com:8080"] - assert validate_urls(urls, allowed_urls) is True - - def test_port_mismatch(self): - """Test that different ports are treated as different URLs""" - urls = "https://example.com:8080/file" - allowed_urls = ["https://example.com:3000"] - assert validate_urls(urls, allowed_urls) is False - - def test_path_ignored(self): - """Test that path is ignored in comparison""" - urls = "https://example.com/different/path" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_query_params_ignored(self): - """Test that query parameters are ignored in comparison""" - urls = "https://example.com/file?param=value" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_fragment_ignored(self): - """Test that fragment is ignored in comparison""" - urls = "https://example.com/file#section" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_mixed_case_netloc_case_insensitive(self): - """Test that netloc comparison is case-insensitive""" - urls = "https://EXAMPLE.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_mixed_case_scheme_case_insensitive(self): - """Test that scheme comparison is case-insensitive""" - urls = "HTTPS://example.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_mixed_case_both_case_insensitive(self): - """Test that both scheme and netloc comparison are case-insensitive""" - urls = "HTTPS://EXAMPLE.COM/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_mixed_case_allowed_urls_case_insensitive(self): - """Test that allowed URLs are also case-insensitive""" - urls = "https://example.com/file" - allowed_urls = ["HTTPS://EXAMPLE.COM"] - assert validate_urls(urls, allowed_urls) is True - - def test_empty_string_url(self): - """Test empty string URL raises exception""" - urls = "" - allowed_urls = ["https://example.com"] - with pytest.raises(ValueError, match="URL must include protocol and host name"): - validate_urls(urls, allowed_urls) - - def test_none_url(self): - """Test None URL raises TypeError""" - urls = None - allowed_urls = ["https://example.com"] - with pytest.raises(TypeError): - validate_urls(urls, allowed_urls) - - def test_complex_url_with_all_components(self): - """Test complex URL with path, query, and fragment""" - urls = "https://example.com/path/to/file?param1=value1¶m2=value2#section" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_ip_address_url(self): - """Test URL with IP address""" - urls = "https://192.168.1.1/file" - allowed_urls = ["https://192.168.1.1"] - assert validate_urls(urls, allowed_urls) is True - - def test_ip_address_mismatch(self): - """Test that different IP addresses are treated as different URLs""" - urls = "https://192.168.1.1/file" - allowed_urls = ["https://192.168.1.2"] - assert validate_urls(urls, allowed_urls) is False - - def test_empty_string_in_allowed_urls_raises_exception(self): - """Test that empty string in allowed_urls raises ValueError""" - urls = "https://example.com/file" - allowed_urls = [""] - with pytest.raises(ValueError, match="URL must include protocol and host name"): - validate_urls(urls, allowed_urls) - - def test_url_without_scheme_in_allowed_urls_raises_exception(self): - """Test that URL without scheme in allowed_urls raises ValueError""" - urls = "https://example.com/file" - allowed_urls = ["example.com"] - with pytest.raises(ValueError, match="URL must include protocol and host name"): - validate_urls(urls, allowed_urls) - - def test_both_http_and_https_allowed(self): - """Test that both HTTP and HTTPS can be allowed for the same domain""" - urls = ["http://example.com/file", "https://example.com/file"] - allowed_urls = ["http://example.com", "https://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_only_http_allowed(self): - """Test that only HTTP is allowed""" - urls = "https://example.com/file" - allowed_urls = ["http://example.com"] - assert validate_urls(urls, allowed_urls) is False - - def test_only_https_allowed(self): - """Test that only HTTPS is allowed""" - urls = "http://example.com/file" - allowed_urls = ["https://example.com"] - assert validate_urls(urls, allowed_urls) is False - - def test_ftp_scheme(self): - """Test FTP scheme""" - urls = "ftp://example.com/file" - allowed_urls = ["ftp://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_custom_scheme(self): - """Test custom scheme""" - urls = "custom://example.com/file" - allowed_urls = ["custom://example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_case_insensitive_with_ports(self): - """Test case-insensitive comparison with ports""" - urls = "HTTPS://EXAMPLE.COM:8080/file" - allowed_urls = ["https://example.com:8080"] - assert validate_urls(urls, allowed_urls) is True - - def test_case_insensitive_with_subdomains(self): - """Test case-insensitive comparison with subdomains""" - urls = "https://SUB.EXAMPLE.COM/file" - allowed_urls = ["https://sub.example.com"] - assert validate_urls(urls, allowed_urls) is True - - def test_case_insensitive_ip_addresses(self): - """Test case-insensitive comparison with IP addresses""" - urls = "HTTPS://192.168.1.1/file" - allowed_urls = ["https://192.168.1.1"] - assert validate_urls(urls, allowed_urls) is True diff --git a/tests/unit/various/test_utils_sql.py b/tests/unit/various/test_utils_sql.py deleted file mode 100644 index b0b906d4778..00000000000 --- a/tests/unit/various/test_utils_sql.py +++ /dev/null @@ -1,32 +0,0 @@ -import pytest - -from mindsdb.utilities.sql import clear_sql - -test_cases = [ - [ - '--comment\nselect a, /*comment*/ from b; # comment\n', - 'select a, from b' - ], [ - '#comment\nselect a, /*comment*/ from b; -- comment\n', - 'select a, from b' - ], [ - 'select "--comment\n", "/*comment*/" from b "#comment"/*comment*/;', - 'select "--comment\n", "/*comment*/" from b "#comment"' - ], [ - 'select `--comment\t`, `/*comment*/` from b `#comment`--comment\n;', - 'select `--comment\t`, `/*comment*/` from b `#comment`' - ], [ - "select '--comment\n', '/*comment*/' from b '#comment'#comment\n;", - "select '--comment\n', '/*comment*/' from b '#comment'" - ], [ - "select `'`--comment\n`'`, '/*comment*/' from b '#comment'#comment\n;", - "select `'`--comment\n`'`, '/*comment*/' from b '#comment'" - ] -] - - -@pytest.mark.parametrize('test_index, test_case', enumerate(test_cases)) -def test_utils_sql(test_index, test_case): - input_sql, expected_sql = test_case - result = clear_sql(input_sql) - assert result == expected_sql, f"Test case {test_index} failed: {result} != {expected_sql}" diff --git a/tests/unused/unit/handler_tests/test_pgvector_handler.py b/tests/unused/unit/handler_tests/test_pgvector_handler.py deleted file mode 100644 index 3eaa5316317..00000000000 --- a/tests/unused/unit/handler_tests/test_pgvector_handler.py +++ /dev/null @@ -1,84 +0,0 @@ -import os -import psycopg2 -import pytest - -from mindsdb.integrations.handlers.pgvector_handler.pgvector_handler import PgVectorHandler - - -TEST_DB_NAME = os.environ.get('MDB_TEST_PGVECTOR_DATABASE', 'pgvector_handler_test_db') -# Should match table name in data/pgvector/seed.sql -TEST_TABLE_NAME = 'items' -# Should match column names in data/pgvector/seed.sql -COLUMN_NAMES = ['id', 'content', 'embeddings', 'metadata'] - -HANDLER_KWARGS = { - 'connection_data': { - 'host': os.environ.get('MDB_TEST_PGVECTOR_HOST', '127.0.0.1'), - 'port': os.environ.get('MDB_TEST_PGVECTOR_PORT', '5432'), - 'user': os.environ.get('MDB_TEST_PGVECTOR_USER', 'postgres'), - 'password': os.environ.get('MDB_TEST_PGVECTOR_PASSWORD', 'supersecret'), - 'database': TEST_DB_NAME - } -} - - -def init_db(): - '''Seed the test DB with some data''' - conn_info = HANDLER_KWARGS['connection_data'].copy() - conn_info['database'] = 'postgres' - db = psycopg2.connect(**conn_info) - db.autocommit = True - cursor = db.cursor() - - try: - cursor.execute(f'DROP DATABASE IF EXISTS {TEST_DB_NAME};') - db.commit() - - # Create the test database - cursor.execute(f'CREATE DATABASE {TEST_DB_NAME};') - db.commit() - - # Reconnect to the new database - conn_info['database'] = TEST_DB_NAME - db = psycopg2.connect(**conn_info) - db.autocommit = True - cursor = db.cursor() - - # Seed the database with data - curr_dir = os.path.dirname(os.path.realpath(__file__)) - seed_sql_path = os.path.join(curr_dir, 'data', 'pgvector', 'seed.sql') - with open(seed_sql_path, 'r') as sql_seed_file: - cursor.execute(sql_seed_file.read()) - db.commit() - - finally: - # Close the cursor and the connection - cursor.close() - db.close() - - -@pytest.fixture(scope='module') -def handler(): - init_db() - handler = PgVectorHandler('test_handler', **HANDLER_KWARGS) - yield handler - - -@pytest.mark.skipif(os.environ.get('MDB_TEST_PGVECTOR_HOST') is None, reason='MDB_TEST_PGVECTOR_HOST environment variable not set') -class TestPgvectorConnection: - def test_connect(self, handler): - handler.connect() - assert handler.is_connected, 'connection error' - - def test_check_connection(self, handler): - res = handler.check_connection() - assert res.success, res.error_message - - -@pytest.mark.skipif(os.environ.get('MDB_TEST_PGVECTOR_HOST') is None, reason='MDB_TEST_PGVECTOR_HOST environment variable not set') -class TestPgvectorQuery: - def test_select(self, handler): - result = handler.select(TEST_TABLE_NAME) - assert not result.empty - for col in COLUMN_NAMES: - assert col in result.columns diff --git a/tests/unused/unit/interfaces/agents/test_api_key_handling.py b/tests/unused/unit/interfaces/agents/test_api_key_handling.py deleted file mode 100644 index 8a45b931d6c..00000000000 --- a/tests/unused/unit/interfaces/agents/test_api_key_handling.py +++ /dev/null @@ -1,101 +0,0 @@ -import os -import unittest -from unittest.mock import patch, MagicMock - -from mindsdb.integrations.utilities.handler_utils import get_api_key -from mindsdb.interfaces.agents.agents_controller import AgentsController - - -class TestAgentApiKeyHandling(unittest.TestCase): - """Test API key handling in agent creation and usage.""" - - def setUp(self): - """Set up test environment.""" - # Mock environment variables - self.env_patcher = patch.dict( - os.environ, {"OPENAI_API_KEY": "test-env-api-key", "ANTHROPIC_API_KEY": "test-env-anthropic-key"} - ) - self.env_patcher.start() - - def tearDown(self): - """Clean up after tests.""" - self.env_patcher.stop() - - def test_get_api_key_from_env(self): - """Test retrieving API key from environment variables.""" - # Test getting API key from environment variable - api_key = get_api_key("openai", {}) - self.assertEqual(api_key, "test-env-api-key") - - def test_get_api_key_from_args(self): - """Test retrieving API key from create_args.""" - # Test getting API key from create_args - api_key = get_api_key("openai", {"openai_api_key": "test-args-api-key"}) - self.assertEqual(api_key, "test-args-api-key") - - def test_get_api_key_from_params(self): - """Test retrieving API key from params dictionary.""" - # Test getting API key from params dictionary - api_key = get_api_key("openai", {"params": {"openai_api_key": "test-params-api-key"}}) - self.assertEqual(api_key, "test-params-api-key") - - def test_get_api_key_priority(self): - """Test API key retrieval priority.""" - # Test that create_args takes priority over environment variables - api_key = get_api_key("openai", {"openai_api_key": "test-args-api-key"}) - self.assertEqual(api_key, "test-args-api-key") - - # Test that params takes priority over environment variables - api_key = get_api_key("openai", {"params": {"openai_api_key": "test-params-api-key"}}) - self.assertEqual(api_key, "test-params-api-key") - - # Test that create_args takes priority over params - api_key = get_api_key( - "openai", {"openai_api_key": "test-args-api-key", "params": {"openai_api_key": "test-params-api-key"}} - ) - self.assertEqual(api_key, "test-args-api-key") - - @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.check_model_provider") - @patch("mindsdb.interfaces.agents.agents_controller.AgentsController.get_agent") - @patch("mindsdb.interfaces.agents.agents_controller.ProjectController") - @patch("mindsdb.interfaces.storage.db.session") - def test_add_agent_with_api_key( - self, mock_session, mock_project_controller, mock_get_agent, mock_check_model_provider - ): - """Test adding an agent with an API key in params.""" - # Mock project controller - mock_project = MagicMock() - mock_project_controller.return_value.get.return_value = mock_project - - # Mock get_agent to return None (agent doesn't exist yet) - mock_get_agent.return_value = None - - # Mock check_model_provider to return a provider - mock_check_model_provider.return_value = (None, "openai") - - # Create an instance of AgentsController - agent_controller = AgentsController() - - # Test adding an agent with an API key in params - params = {"openai_api_key": "test-agent-api-key", "other_param": "value"} - - # Create a mock agent with proper params - mock_agent = MagicMock() - mock_agent.params = params.copy() # Set params directly - - # Mock db.Agents to return our prepared mock agent - with patch("mindsdb.interfaces.storage.db.Agents", return_value=mock_agent): - # Add the agent - agent = agent_controller.add_agent( - name="test_agent", - project_name="mindsdb", - model={"model_name": "gpt-4", "provider": "openai"}, - params=params, - ) - - # Verify that the API key was preserved in the params - self.assertEqual(agent.params.get("openai_api_key"), "test-agent-api-key") - - -if __name__ == "__main__": - unittest.main()