From 5a2401c3084ad2be4c5f6ee984cef0073094d18f Mon Sep 17 00:00:00 2001 From: Don Kendall Date: Fri, 27 Mar 2026 08:12:16 -0400 Subject: [PATCH 1/6] feat: add OpenSearch and Typesense fulltext adapters Add two new search backend adapters implementing FullTextAdapter: - @hcengineering/opensearch: Drop-in replacement using OpenSearch 2.x client. Removes deprecated type: _doc params, uses osErr error types. Fixes inherited bugs: updateMany error ID matching, remove() array mutation. - @hcengineering/typesense: Native Typesense adapter with proper query translation. Uses filter_by for workspace/class scoping, optional filter clauses for soft boosts (matching ES should semantics), read-modify-write for updateByQuery, and batch import for updateMany. Binary doc.data is skipped (relies on Rekoni extraction). Both adapters follow the same factory pattern as the existing elastic adapter and can be selected at runtime. Signed-off-by: Don Kendall --- common/config/rush/pnpm-lock.yaml | 201 +++++ .../server/packages/opensearch/.eslintrc.js | 7 + .../server/packages/opensearch/package.json | 71 ++ .../server/packages/opensearch/src/adapter.ts | 698 ++++++++++++++++++ .../server/packages/opensearch/src/index.ts | 17 + .../server/packages/opensearch/tsconfig.json | 12 + .../server/packages/typesense/.eslintrc.js | 7 + .../server/packages/typesense/package.json | 69 ++ .../server/packages/typesense/src/adapter.ts | 598 +++++++++++++++ .../server/packages/typesense/src/index.ts | 17 + .../server/packages/typesense/tsconfig.json | 12 + 11 files changed, 1709 insertions(+) create mode 100644 foundations/server/packages/opensearch/.eslintrc.js create mode 100644 foundations/server/packages/opensearch/package.json create mode 100644 foundations/server/packages/opensearch/src/adapter.ts create mode 100644 foundations/server/packages/opensearch/src/index.ts create mode 100644 foundations/server/packages/opensearch/tsconfig.json create mode 100644 foundations/server/packages/typesense/.eslintrc.js create mode 100644 foundations/server/packages/typesense/package.json create mode 100644 foundations/server/packages/typesense/src/adapter.ts create mode 100644 foundations/server/packages/typesense/src/index.ts create mode 100644 foundations/server/packages/typesense/tsconfig.json diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index de433b45da8..0d9b626867f 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -5216,6 +5216,70 @@ importers: specifier: ^5.9.3 version: 5.9.3 + ../../foundations/server/packages/opensearch: + dependencies: + '@hcengineering/analytics': + specifier: workspace:^0.7.17 + version: link:../../../core/packages/analytics + '@hcengineering/core': + specifier: workspace:^0.7.24 + version: link:../../../core/packages/core + '@hcengineering/platform': + specifier: workspace:^0.7.19 + version: link:../../../core/packages/platform + '@hcengineering/server-core': + specifier: workspace:^0.7.18 + version: link:../core + '@opensearch-project/opensearch': + specifier: ^2.12.0 + version: 2.13.0 + devDependencies: + '@hcengineering/platform-rig': + specifier: workspace:^0.7.19 + version: link:../../../utils/packages/platform-rig + '@types/jest': + specifier: ^29.5.5 + version: 29.5.14 + '@typescript-eslint/eslint-plugin': + specifier: ^6.21.0 + version: 6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3) + '@typescript-eslint/parser': + specifier: ^6.21.0 + version: 6.21.0(eslint@8.57.1)(typescript@5.9.3) + eslint: + specifier: ^8.54.0 + version: 8.57.1 + eslint-config-standard-with-typescript: + specifier: ^40.0.0 + version: 40.0.0(@typescript-eslint/eslint-plugin@6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3))(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint-plugin-n@15.7.0(eslint@8.57.1))(eslint-plugin-promise@6.6.0(eslint@8.57.1))(eslint@8.57.1)(typescript@5.9.3) + eslint-plugin-import: + specifier: ^2.26.0 + version: 2.32.0(eslint@8.57.1) + eslint-plugin-n: + specifier: ^15.4.0 + version: 15.7.0(eslint@8.57.1) + eslint-plugin-promise: + specifier: ^6.1.1 + version: 6.6.0(eslint@8.57.1) + eslint-plugin-svelte: + specifier: ^2.35.1 + version: 2.46.1(eslint@8.57.1)(svelte@4.2.20)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + jest: + specifier: ^29.7.0 + version: 29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + prettier: + specifier: ^3.6.2 + version: 3.6.2 + ts-jest: + specifier: ^29.1.1 + version: 29.4.5(@babel/core@7.28.5)(@jest/transform@29.7.0)(@jest/types@30.2.0)(babel-jest@29.7.0(@babel/core@7.28.5))(esbuild@0.25.12)(jest-util@30.2.0)(jest@29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)))(typescript@5.9.3) + ts-node: + specifier: ^10.9.2 + version: 10.9.2(@types/node@22.19.0)(typescript@5.9.3) + typescript: + specifier: ^5.9.3 + version: 5.9.3 + ../../foundations/server/packages/postgres: dependencies: '@hcengineering/core': @@ -5514,6 +5578,67 @@ importers: specifier: ^5.9.3 version: 5.9.3 + ../../foundations/server/packages/typesense: + dependencies: + '@hcengineering/analytics': + specifier: workspace:^0.7.17 + version: link:../../../core/packages/analytics + '@hcengineering/core': + specifier: workspace:^0.7.24 + version: link:../../../core/packages/core + '@hcengineering/platform': + specifier: workspace:^0.7.19 + version: link:../../../core/packages/platform + '@hcengineering/server-core': + specifier: workspace:^0.7.18 + version: link:../core + typesense: + specifier: ^2.0.0 + version: 2.1.0(@babel/runtime@7.28.4) + devDependencies: + '@hcengineering/platform-rig': + specifier: workspace:^0.7.19 + version: link:../../../utils/packages/platform-rig + '@types/jest': + specifier: ^29.5.5 + version: 29.5.14 + '@typescript-eslint/eslint-plugin': + specifier: ^6.21.0 + version: 6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3) + '@typescript-eslint/parser': + specifier: ^6.21.0 + version: 6.21.0(eslint@8.57.1)(typescript@5.9.3) + eslint: + specifier: ^8.54.0 + version: 8.57.1 + eslint-config-standard-with-typescript: + specifier: ^40.0.0 + version: 40.0.0(@typescript-eslint/eslint-plugin@6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3))(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint-plugin-n@15.7.0(eslint@8.57.1))(eslint-plugin-promise@6.6.0(eslint@8.57.1))(eslint@8.57.1)(typescript@5.9.3) + eslint-plugin-import: + specifier: ^2.26.0 + version: 2.32.0(eslint@8.57.1) + eslint-plugin-n: + specifier: ^15.4.0 + version: 15.7.0(eslint@8.57.1) + eslint-plugin-promise: + specifier: ^6.1.1 + version: 6.6.0(eslint@8.57.1) + eslint-plugin-svelte: + specifier: ^2.35.1 + version: 2.46.1(eslint@8.57.1)(svelte@4.2.20)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + jest: + specifier: ^29.7.0 + version: 29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + prettier: + specifier: ^3.6.2 + version: 3.6.2 + ts-jest: + specifier: ^29.1.1 + version: 29.4.5(@babel/core@7.28.5)(@jest/transform@29.7.0)(@jest/types@30.2.0)(babel-jest@29.7.0(@babel/core@7.28.5))(esbuild@0.25.12)(jest-util@30.2.0)(jest@29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)))(typescript@5.9.3) + typescript: + specifier: ^5.9.3 + version: 5.9.3 + ../../foundations/utils/packages/platform-rig: dependencies: '@typescript-eslint/eslint-plugin': @@ -30694,6 +30819,9 @@ importers: '@hcengineering/mongo': specifier: workspace:^0.7.16 version: link:../../foundations/server/packages/mongo + '@hcengineering/opensearch': + specifier: workspace:^0.7.17 + version: link:../../foundations/server/packages/opensearch '@hcengineering/platform': specifier: workspace:^0.7.20 version: link:../../foundations/core/packages/platform @@ -30721,6 +30849,9 @@ importers: '@hcengineering/server-token': specifier: workspace:^0.7.18 version: link:../../foundations/core/packages/token + '@hcengineering/typesense': + specifier: workspace:^0.7.17 + version: link:../../foundations/server/packages/typesense '@koa/cors': specifier: ^5.0.0 version: 5.0.0 @@ -43423,6 +43554,10 @@ packages: resolution: {integrity: sha512-exj1MzVXoP7xnAcAB3jZ97pTvVPkQF9y6GA/dvYC47HV7vLv+24XRS6b/v/XnyikpEuvMhugEXdGtAlU086WkQ==} engines: {node: '>= 18'} + '@opensearch-project/opensearch@2.13.0': + resolution: {integrity: sha512-Bu3jJ7pKzumbMMeefu7/npAWAvFu5W9SlbBow1ulhluqUpqc7QoXe0KidDrMy7Dy3BQrkI6llR3cWL4lQTZOFw==} + engines: {node: '>=10', yarn: ^1.22.10} + '@opentelemetry/api-logs@0.203.0': resolution: {integrity: sha512-9B9RU0H7Ya1Dx/Rkyc4stuBZSGVQF27WigitInx2QQoj6KUpEFYPKoWjdFTunJYxmXmh17HeBvbMa1EhGyPmqQ==} engines: {node: '>=8.0.0'} @@ -45618,6 +45753,12 @@ packages: resolution: {integrity: sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==} engines: {node: '>= 0.4'} + aws4@1.13.2: + resolution: {integrity: sha512-lHe62zvbTB5eEABUVi/AwVh0ZKY9rMMDhmm+eeyuuUQbQ3+J+fONVQOZyj+DdrvD4BY33uYniyRJ4UJIaSKAfw==} + + axios@1.13.6: + resolution: {integrity: sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==} + axobject-query@4.1.0: resolution: {integrity: sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==} engines: {node: '>= 0.4'} @@ -47604,6 +47745,10 @@ packages: resolution: {integrity: sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==} engines: {node: '>= 6'} + form-data@4.0.5: + resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} + engines: {node: '>= 6'} + formdata-node@4.4.1: resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==} engines: {node: '>= 12.20'} @@ -47954,6 +48099,10 @@ packages: hpagent@0.1.2: resolution: {integrity: sha512-ePqFXHtSQWAFXYmj+JtOTHr84iNrII4/QRlAAPPE+zqnKy4xJo7Ie1Y4kC7AdB+LxLxSTTzBMASsEcy0q8YyvQ==} + hpagent@1.2.0: + resolution: {integrity: sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==} + engines: {node: '>=14'} + html-encoding-sniffer@4.0.0: resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==} engines: {node: '>=18'} @@ -48707,6 +48856,10 @@ packages: json-stringify-safe@5.0.1: resolution: {integrity: sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==} + json11@2.0.2: + resolution: {integrity: sha512-HIrd50UPYmP6sqLuLbFVm75g16o0oZrVfxrsY0EEys22klz8mRoWlX9KAEDOSOR9Q34rcxsyC8oDveGrCz5uLQ==} + hasBin: true + json5@1.0.2: resolution: {integrity: sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==} hasBin: true @@ -51538,6 +51691,12 @@ packages: engines: {node: '>=14.17'} hasBin: true + typesense@2.1.0: + resolution: {integrity: sha512-a/IRTL+dRXlpRDU4UodyGj8hl5xBz3nKihVRd/KfSFAfFPGcpdX6lxIgwdXy3O6VLNNiEsN8YwIsPHQPVT0vNw==} + engines: {node: '>=18'} + peerDependencies: + '@babel/runtime': ^7.23.2 + ua-is-frozen@0.1.2: resolution: {integrity: sha512-RwKDW2p3iyWn4UbaxpP2+VxwqXh0jpvdxsYpZ5j/MLLiQOfbsV5shpgQiw93+KMYQPcteeMQ289MaAFzs3G9pw==} @@ -54203,6 +54362,17 @@ snapshots: '@octokit/webhooks-types': 7.6.1 aggregate-error: 3.1.0 + '@opensearch-project/opensearch@2.13.0': + dependencies: + aws4: 1.13.2 + debug: 4.4.3 + hpagent: 1.2.0 + json11: 2.0.2 + ms: 2.1.3 + secure-json-parse: 2.7.0 + transitivePeerDependencies: + - supports-color + '@opentelemetry/api-logs@0.203.0': dependencies: '@opentelemetry/api': 1.9.0 @@ -56939,6 +57109,16 @@ snapshots: dependencies: possible-typed-array-names: 1.1.0 + aws4@1.13.2: {} + + axios@1.13.6: + dependencies: + follow-redirects: 1.15.11 + form-data: 4.0.5 + proxy-from-env: 1.1.0 + transitivePeerDependencies: + - debug + axobject-query@4.1.0: {} b4a@1.7.3: {} @@ -59281,6 +59461,14 @@ snapshots: hasown: 2.0.2 mime-types: 2.1.35 + form-data@4.0.5: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + es-set-tostringtag: 2.1.0 + hasown: 2.0.2 + mime-types: 2.1.35 + formdata-node@4.4.1: dependencies: node-domexception: 1.0.0 @@ -59729,6 +59917,8 @@ snapshots: hpagent@0.1.2: {} + hpagent@1.2.0: {} + html-encoding-sniffer@4.0.0: dependencies: whatwg-encoding: 3.1.1 @@ -60756,6 +60946,8 @@ snapshots: json-stringify-safe@5.0.1: optional: true + json11@2.0.2: {} + json5@1.0.2: dependencies: minimist: 1.2.8 @@ -63980,6 +64172,15 @@ snapshots: typescript@5.9.3: {} + typesense@2.1.0(@babel/runtime@7.28.4): + dependencies: + '@babel/runtime': 7.28.4 + axios: 1.13.6 + loglevel: 1.9.2 + tslib: 2.8.1 + transitivePeerDependencies: + - debug + ua-is-frozen@0.1.2: {} ua-parser-js@2.0.6: diff --git a/foundations/server/packages/opensearch/.eslintrc.js b/foundations/server/packages/opensearch/.eslintrc.js new file mode 100644 index 00000000000..72235dc2833 --- /dev/null +++ b/foundations/server/packages/opensearch/.eslintrc.js @@ -0,0 +1,7 @@ +module.exports = { + extends: ['./node_modules/@hcengineering/platform-rig/profiles/default/eslint.config.json'], + parserOptions: { + tsconfigRootDir: __dirname, + project: './tsconfig.json' + } +} diff --git a/foundations/server/packages/opensearch/package.json b/foundations/server/packages/opensearch/package.json new file mode 100644 index 00000000000..005714f2d00 --- /dev/null +++ b/foundations/server/packages/opensearch/package.json @@ -0,0 +1,71 @@ +{ + "name": "@hcengineering/opensearch", + "version": "0.7.17", + "main": "lib/index.js", + "svelte": "src/index.ts", + "types": "types/index.d.ts", + "files": [ + "lib/**/*", + "types/**/*", + "src/**/*", + "!lib/**/*.test.*", + "!lib/**/*.spec.*", + "!lib/__tests__/**/*", + "!lib/__test__/**/*", + "!lib/tests/**/*", + "!types/**/*.test.*", + "!types/**/*.spec.*", + "!types/__tests__/**/*", + "!types/__test__/**/*", + "!types/tests/**/*", + "!src/**/*.test.*", + "!src/**/*.spec.*", + "!src/__tests__/**/*", + "!src/__test__/**/*", + "!src/tests/**/*" + ], + "author": "Anticrm Platform Contributors", + "license": "EPL-2.0", + "scripts": { + "init": "ts-node src/__init.ts", + "build": "compile", + "build:watch": "compile", + "format": "format src", + "test": "jest --passWithNoTests --silent --forceExit", + "_phase:build": "compile transpile src", + "_phase:test": "jest --passWithNoTests --silent --forceExit", + "_phase:format": "format src", + "_phase:validate": "compile validate" + }, + "devDependencies": { + "@hcengineering/platform-rig": "workspace:^0.7.19", + "@typescript-eslint/eslint-plugin": "^6.21.0", + "eslint-plugin-import": "^2.26.0", + "eslint-plugin-promise": "^6.1.1", + "eslint-plugin-n": "^15.4.0", + "eslint": "^8.54.0", + "ts-node": "^10.9.2", + "@typescript-eslint/parser": "^6.21.0", + "eslint-config-standard-with-typescript": "^40.0.0", + "prettier": "^3.6.2", + "typescript": "^5.9.3", + "jest": "^29.7.0", + "ts-jest": "^29.1.1", + "@types/jest": "^29.5.5", + "eslint-plugin-svelte": "^2.35.1" + }, + "dependencies": { + "@hcengineering/core": "workspace:^0.7.24", + "@hcengineering/platform": "workspace:^0.7.19", + "@hcengineering/server-core": "workspace:^0.7.18", + "@opensearch-project/opensearch": "^2.12.0", + "@hcengineering/analytics": "workspace:^0.7.17" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/hcengineering/huly.server.git" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/foundations/server/packages/opensearch/src/adapter.ts b/foundations/server/packages/opensearch/src/adapter.ts new file mode 100644 index 00000000000..9b4697ece30 --- /dev/null +++ b/foundations/server/packages/opensearch/src/adapter.ts @@ -0,0 +1,698 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { Analytics } from '@hcengineering/analytics' +import { + Class, + Doc, + DocumentQuery, + MeasureContext, + Ref, + SearchOptions, + SearchQuery, + TxResult, + WorkspaceUuid +} from '@hcengineering/core' +import type { FullTextAdapter, IndexedDoc, SearchScoring, SearchStringResult } from '@hcengineering/server-core' +import serverCore from '@hcengineering/server-core' + +import { Client, errors as osErr } from '@opensearch-project/opensearch' +import { getMetadata } from '@hcengineering/platform' + +const DEFAULT_LIMIT = 200 + +function getIndexName (): string { + return getMetadata(serverCore.metadata.ElasticIndexName) ?? 'storage_index' +} + +function getIndexVersion (): string { + return getMetadata(serverCore.metadata.ElasticIndexVersion) ?? 'v2' +} + +const mappings = { + properties: { + fulltextSummary: { + type: 'text', + analyzer: 'rebuilt_english' + }, + workspaceId: { + type: 'keyword', + index: true + }, + id: { + type: 'keyword', + index: true + }, + _class: { + type: 'keyword', + index: true + }, + attachedTo: { + type: 'keyword', + index: true + }, + attachedToClass: { + type: 'keyword', + index: true + }, + space: { + type: 'keyword', + index: true + }, + 'core:class:Doc%createdBy': { + type: 'keyword', + index: true + }, + 'core:class:Doc%createdOn': { + type: 'date', + format: 'epoch_millis', + index: true + }, + modifiedBy: { + type: 'keyword', + index: true + }, + modifiedOn: { + type: 'date', + format: 'epoch_millis', + index: true + }, + 'core:class:Doc%modifiedBy': { + type: 'keyword', + index: true + }, + 'core:class:Doc%modifiedOn': { + type: 'date', + format: 'epoch_millis', + index: true + } + } +} + +class OpenSearchAdapter implements FullTextAdapter { + private readonly getFulltextDocId: (workspaceId: WorkspaceUuid, doc: Ref) => Ref + private readonly getDocId: (workspaceId: WorkspaceUuid, fulltext: Ref) => Ref + private readonly indexName: string + + constructor ( + private readonly client: Client, + private readonly indexBaseName: string, + readonly indexVersion: string + ) { + this.indexName = `${indexBaseName}_${indexVersion}` + this.getFulltextDocId = (workspaceId, doc) => `${doc}@${workspaceId}` as Ref + this.getDocId = (workspaceId, fulltext) => fulltext.slice(0, -1 * (workspaceId.length + 1)) as Ref + } + + async initMapping (ctx: MeasureContext): Promise { + const indexName = this.indexName + try { + const existingVersions = await ctx.withSync('get-indexes', {}, () => + this.client.indices.get({ + index: `${this.indexBaseName}_*` + }) + ) + const allIndexes = Object.keys(existingVersions.body) + const existingOldVersionIndices = allIndexes.filter((name) => name !== indexName) + const existsIndex = allIndexes.find((it) => it === indexName) !== undefined + let shouldDropExistingIndex = false + if (existsIndex) { + const mapping = await ctx.with('get-mapping', { indexName }, () => + this.client.indices.getMapping({ + index: indexName + }) + ) + for (const [propName, propType] of Object.entries(mappings.properties)) { + if (mapping.body[indexName]?.mappings.properties?.[propName]?.type !== propType.type) { + shouldDropExistingIndex = true + break + } + } + } + if (existingOldVersionIndices.length > 0 || shouldDropExistingIndex) { + await ctx.with('delete-old-index', {}, () => + this.client.indices.delete({ + index: shouldDropExistingIndex ? allIndexes : existingOldVersionIndices + }) + ) + } + if (!existsIndex || shouldDropExistingIndex) { + await ctx.with('create-index', { indexName }, () => + this.client.indices.create({ + index: indexName, + body: { + settings: { + analysis: { + filter: { + english_stemmer: { + type: 'stemmer', + language: 'english' + }, + english_possessive_stemmer: { + type: 'stemmer', + language: 'possessive_english' + } + }, + analyzer: { + rebuilt_english: { + type: 'custom', + tokenizer: 'standard', + filter: ['english_possessive_stemmer', 'lowercase', 'english_stemmer'] + } + } + } + }, + mappings + } + }) + ) + } else { + await ctx.with('put-mapping', {}, () => + this.client.indices.putMapping({ + index: indexName, + body: mappings + }) + ) + } + } catch (err: any) { + if (err.name === 'ConnectionError') { + ctx.warn('OpenSearch DB is not available') + } + Analytics.handleError(err) + ctx.error(err) + return false + } + return true + } + + async close (): Promise { + await this.client.close() + } + + async searchString ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: SearchQuery, + options: SearchOptions & { scoring?: SearchScoring[] } + ): Promise { + try { + const elasticQuery: any = { + query: { + function_score: { + query: { + bool: { + must: [ + { + simple_query_string: { + query: query.query, + analyze_wildcard: true, + flags: 'OR|PREFIX|PHRASE|FUZZY|NOT|ESCAPE', + default_operator: 'and', + fields: [ + 'searchTitle^50', // boost + 'searchShortTitle^50', + '*' // Search in all other fields without a boost + ] + } + }, + { + term: { + workspaceId + } + } + ] + } + }, + boost_mode: 'sum' + } + }, + size: options.limit ?? DEFAULT_LIMIT + } + + const filter: any = [ + { + exists: { field: 'searchTitle' } + } + ] + + if (query.spaces !== undefined) { + filter.push({ + terms: this.getTerms(query.spaces, 'space') + }) + } + if (query.classes !== undefined) { + filter.push({ + terms: this.getTerms(query.classes, '_class') + }) + } + + if (filter.length > 0) { + elasticQuery.query.function_score.query.bool.filter = filter + } + + if (options.scoring !== undefined) { + const scoringTerms: any[] = options.scoring.map((scoringOption): any => { + const field = Object.hasOwn(mappings.properties, scoringOption.attr) + ? scoringOption.attr + : `${scoringOption.attr}.keyword` + return { + term: { + [field]: { + value: scoringOption.value, + boost: scoringOption.boost + } + } + } + }) + elasticQuery.query.function_score.query.bool.should = scoringTerms + } + + const result = await this.client.search({ + index: this.indexName, + body: elasticQuery + }) + + const resp: SearchStringResult = { docs: [] } + if (result.body.hits !== undefined) { + if (result.body.hits.total?.value !== undefined) { + resp.total = result.body.hits.total?.value + } + resp.docs = result.body.hits.hits.map((hit: any) => ({ ...hit._source, _score: hit._score })) + } + + return resp + } catch (err: any) { + if (err.name === 'ConnectionError') { + ctx.warn('OpenSearch DB is not available') + return { docs: [] } + } + Analytics.handleError(err) + ctx.error('OpenSearch error', { error: err }) + return { docs: [] } + } + } + + async search ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + _classes: Ref>[], + query: DocumentQuery, + size: number | undefined, + from: number | undefined + ): Promise { + if (query.$search === undefined) return [] + const request: any = { + bool: { + must: [ + { + simple_query_string: { + query: query.$search, + analyze_wildcard: true, + flags: 'OR|PREFIX|PHRASE|FUZZY|NOT|ESCAPE', + default_operator: 'and' + } + }, + { + term: { + workspaceId + } + } + ], + should: [{ terms: this.getTerms(_classes, '_class', { boost: 10.0 }) }], + filter: [ + { + bool: { + should: [ + { terms: this.getTerms(_classes, '_class') } + // { terms: this.getTerms(_classes, 'attachedToClass') } + ] + } + } + ] + } + } + + for (const [q, v] of Object.entries(query)) { + if (!q.startsWith('$')) { + const field = Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword` + if (typeof v === 'object') { + if (v.$in !== undefined) { + request.bool.should.push({ + terms: { + [field]: v.$in, + boost: 100.0 + } + }) + } + } else { + request.bool.should.push({ + term: { + [field]: { + value: v, + boost: 100.0, + case_insensitive: true + } + } + }) + } + } + } + + try { + const result = await ctx.with( + 'search', + {}, + () => + this.client.search({ + index: this.indexName, + body: { + query: request, + size: size ?? 200, + from: from ?? 0 + } + }), + { + _classes, + size, + from, + query: request + } + ) + const hits = result.body.hits.hits as any[] + return hits.map((hit) => ({ ...hit._source, _score: hit._score })) + } catch (err: any) { + if (err.name === 'ConnectionError') { + ctx.warn('OpenSearch DB is not available') + return [] + } + ctx.error('OpenSearch error', { error: err }) + Analytics.handleError(err) + return [] + } + } + + private getTerms (values: string[], field: string, extra: any = {}): any { + return { + [Object.hasOwn(mappings.properties, field) ? field : `${field}.keyword`]: values, + ...extra + } + } + + async index (ctx: MeasureContext, workspaceId: WorkspaceUuid, doc: IndexedDoc): Promise { + const wsDoc = { + workspaceId, + ...doc + } + const fulltextId = this.getFulltextDocId(workspaceId, doc.id) + if (doc.data === undefined) { + await this.client.index({ + index: this.indexName, + id: fulltextId, + body: wsDoc + }) + } else { + await this.client.index({ + index: this.indexName, + id: fulltextId, + pipeline: 'attachment', + body: wsDoc + }) + } + return {} + } + + async update ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + id: Ref, + update: Record + ): Promise { + await this.client.update({ + index: this.indexName, + id: this.getFulltextDocId(workspaceId, id), + body: { + doc: update + } + }) + + return {} + } + + async updateMany (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: IndexedDoc[]): Promise { + const parts = Array.from(docs) + while (parts.length > 0) { + const part = parts.splice(0, 500) + + const operations = part.flatMap((doc) => { + const wsDoc = { workspaceId, ...doc } + return [{ index: { _index: this.indexName, _id: this.getFulltextDocId(workspaceId, doc.id) } }, { ...wsDoc }] + }) + + const response = await this.client.bulk({ refresh: true, body: operations }) + if ((response as any).body.errors === true) { + const errors = response.body.items.filter((it: any) => it.index.error !== undefined) + const errorIds = new Set(errors.map((it: any) => it.index._id)) + const erroDocs = docs.filter((it) => errorIds.has(this.getFulltextDocId(workspaceId, it.id))) + // Collect only errors + const errs = Array.from( + errors.map((it: any) => { + return `${it.index.error.reason}: ${it.index.error.caused_by?.reason}` + }) + ).join('\n') + + console.error(`Failed to process bulk request: ${errs} ${JSON.stringify(erroDocs)}`) + } + } + return [] + } + + async updateByQuery ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: DocumentQuery, + update: Record + ): Promise { + const elasticQuery: any = { + bool: { + must: [ + { + term: { + workspaceId + } + } + ] + } + } + + for (const [q, v] of Object.entries(query)) { + if (!q.startsWith('$')) { + if (typeof v === 'object') { + if (v.$in !== undefined) { + elasticQuery.bool.must.push({ + terms: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: v.$in + } + }) + } + } else { + elasticQuery.bool.must.push({ + term: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: { + value: v + } + } + }) + } + } + } + + await this.client.updateByQuery({ + index: this.indexName, + body: { + query: elasticQuery, + script: { + source: + 'for(int i = 0; i < params.updateFields.size(); i++) { ctx._source[params.updateFields[i].key] = params.updateFields[i].value }', + params: { + updateFields: Object.entries(update).map(([key, value]) => ({ key, value })) + }, + lang: 'painless' + } + } + }) + return [] + } + + async remove (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + try { + const remaining = Array.from(docs) + while (remaining.length > 0) { + const part = remaining.splice(0, 5000) + await this.client.deleteByQuery( + { + index: this.indexName, + body: { + query: { + bool: { + must: [ + { + terms: { + _id: part.map((it) => this.getFulltextDocId(workspaceId, it)), + boost: 1.0 + } + }, + { + term: { + workspaceId + } + } + ] + } + }, + size: part.length + } + }, + undefined + ) + } + } catch (e: any) { + if (e instanceof osErr.ResponseError && e.meta.statusCode === 404) { + return + } + throw e + } + } + + async removeByQuery (ctx: MeasureContext, workspaceId: WorkspaceUuid, query: DocumentQuery): Promise { + const elasticQuery: any = { + bool: { + must: [ + { + term: { + workspaceId + } + } + ] + } + } + + for (const [q, v] of Object.entries(query)) { + if (!q.startsWith('$')) { + if (typeof v === 'object') { + if (v.$in !== undefined) { + elasticQuery.bool.must.push({ + terms: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: v.$in + } + }) + } + } else { + elasticQuery.bool.must.push({ + term: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: { + value: v + } + } + }) + } + } + } + try { + await this.client.deleteByQuery({ + index: this.indexName, + body: { + query: elasticQuery + } + }) + } catch (e: any) { + if (e instanceof osErr.ResponseError && e.meta.statusCode === 404) { + return + } + throw e + } + } + + async clean (ctx: MeasureContext, workspaceId: WorkspaceUuid): Promise { + try { + await this.client.deleteByQuery( + { + index: this.indexName, + body: { + query: { + bool: { + must: [ + { + term: { + workspaceId + } + } + ] + } + } + } + }, + undefined + ) + } catch (e: any) { + if (e instanceof osErr.ResponseError && e.meta.statusCode === 404) { + return + } + throw e + } + } + + async load (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + const resp = await this.client.search({ + index: this.indexName, + body: { + query: { + bool: { + must: [ + { + terms: { + _id: docs.map((it) => this.getFulltextDocId(workspaceId, it)), + boost: 1.0 + } + }, + { + term: { + workspaceId + } + } + ] + } + }, + size: docs.length + } + }) + return Array.from( + resp.body.hits.hits.map((hit: any) => ({ ...hit._source, id: this.getDocId(workspaceId, hit._id) })) + ) + } +} + +/** + * @public + */ +export async function createOpenSearchAdapter (url: string): Promise { + const client = new Client({ + node: url + }) + const indexBaseName = getIndexName() + const indexVersion = getIndexVersion() + + return new OpenSearchAdapter(client, indexBaseName, indexVersion) +} diff --git a/foundations/server/packages/opensearch/src/index.ts b/foundations/server/packages/opensearch/src/index.ts new file mode 100644 index 00000000000..7189f833d5e --- /dev/null +++ b/foundations/server/packages/opensearch/src/index.ts @@ -0,0 +1,17 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021, 2022 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +export { createOpenSearchAdapter } from './adapter' diff --git a/foundations/server/packages/opensearch/tsconfig.json b/foundations/server/packages/opensearch/tsconfig.json new file mode 100644 index 00000000000..7d78e05abb4 --- /dev/null +++ b/foundations/server/packages/opensearch/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "./node_modules/@hcengineering/platform-rig/profiles/default/tsconfig.json", + + "compilerOptions": { + "rootDir": "./src", + "outDir": "./lib", + "declarationDir": "./types", + "tsBuildInfoFile": ".build/build.tsbuildinfo" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "lib", "dist", "types", "bundle"] +} diff --git a/foundations/server/packages/typesense/.eslintrc.js b/foundations/server/packages/typesense/.eslintrc.js new file mode 100644 index 00000000000..72235dc2833 --- /dev/null +++ b/foundations/server/packages/typesense/.eslintrc.js @@ -0,0 +1,7 @@ +module.exports = { + extends: ['./node_modules/@hcengineering/platform-rig/profiles/default/eslint.config.json'], + parserOptions: { + tsconfigRootDir: __dirname, + project: './tsconfig.json' + } +} diff --git a/foundations/server/packages/typesense/package.json b/foundations/server/packages/typesense/package.json new file mode 100644 index 00000000000..7b301b93eb3 --- /dev/null +++ b/foundations/server/packages/typesense/package.json @@ -0,0 +1,69 @@ +{ + "name": "@hcengineering/typesense", + "version": "0.7.17", + "main": "lib/index.js", + "svelte": "src/index.ts", + "types": "types/index.d.ts", + "files": [ + "lib/**/*", + "types/**/*", + "src/**/*", + "!lib/**/*.test.*", + "!lib/**/*.spec.*", + "!lib/__tests__/**/*", + "!lib/__test__/**/*", + "!lib/tests/**/*", + "!types/**/*.test.*", + "!types/**/*.spec.*", + "!types/__tests__/**/*", + "!types/__test__/**/*", + "!types/tests/**/*", + "!src/**/*.test.*", + "!src/**/*.spec.*", + "!src/__tests__/**/*", + "!src/__test__/**/*", + "!src/tests/**/*" + ], + "author": "Anticrm Platform Contributors", + "license": "EPL-2.0", + "scripts": { + "build": "compile", + "build:watch": "compile", + "format": "format src", + "test": "jest --passWithNoTests --silent --forceExit", + "_phase:build": "compile transpile src", + "_phase:test": "jest --passWithNoTests --silent --forceExit", + "_phase:format": "format src", + "_phase:validate": "compile validate" + }, + "devDependencies": { + "@hcengineering/platform-rig": "workspace:^0.7.19", + "@typescript-eslint/eslint-plugin": "^6.21.0", + "eslint-plugin-import": "^2.26.0", + "eslint-plugin-promise": "^6.1.1", + "eslint-plugin-n": "^15.4.0", + "eslint": "^8.54.0", + "@typescript-eslint/parser": "^6.21.0", + "eslint-config-standard-with-typescript": "^40.0.0", + "prettier": "^3.6.2", + "typescript": "^5.9.3", + "jest": "^29.7.0", + "ts-jest": "^29.1.1", + "@types/jest": "^29.5.5", + "eslint-plugin-svelte": "^2.35.1" + }, + "dependencies": { + "@hcengineering/core": "workspace:^0.7.24", + "@hcengineering/platform": "workspace:^0.7.19", + "@hcengineering/server-core": "workspace:^0.7.18", + "@hcengineering/analytics": "workspace:^0.7.17", + "typesense": "^2.0.0" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/hcengineering/huly.server.git" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/foundations/server/packages/typesense/src/adapter.ts b/foundations/server/packages/typesense/src/adapter.ts new file mode 100644 index 00000000000..c7c459adf81 --- /dev/null +++ b/foundations/server/packages/typesense/src/adapter.ts @@ -0,0 +1,598 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { Analytics } from '@hcengineering/analytics' +import { + type Class, + type Doc, + type DocumentQuery, + type MeasureContext, + type Ref, + type SearchOptions, + type SearchQuery, + type TxResult, + type WorkspaceUuid +} from '@hcengineering/core' +import type { FullTextAdapter, IndexedDoc, SearchScoring, SearchStringResult } from '@hcengineering/server-core' +import serverCore from '@hcengineering/server-core' + +import { Client as TypesenseClient } from 'typesense' +import type { CollectionFieldSchema } from 'typesense/lib/Typesense/Collection' + +import { getMetadata } from '@hcengineering/platform' + +const DEFAULT_LIMIT = 200 +const BATCH_SIZE = 500 + +function getIndexName (): string { + return getMetadata(serverCore.metadata.ElasticIndexName) ?? 'storage_index' +} + +function getIndexVersion (): string { + return getMetadata(serverCore.metadata.ElasticIndexVersion) ?? 'v2' +} + +/** Fields that are defined in the schema and are facet-capable (keyword-like). */ +const FACET_FIELDS = new Set([ + 'id', + 'workspaceId', + '_class', + 'space', + 'attachedTo', + 'attachedToClass', + 'modifiedBy', + 'core:class:Doc%createdBy', + 'core:class:Doc%modifiedBy' +]) + +function buildCollectionSchema (collectionName: string): { + name: string + fields: CollectionFieldSchema[] + enable_nested_fields: boolean +} { + const fields: CollectionFieldSchema[] = [ + { name: 'id', type: 'string', facet: true }, + { name: 'workspaceId', type: 'string', facet: true }, + { name: '_class', type: 'string[]', facet: true }, + { name: 'space', type: 'string', facet: true }, + { name: 'attachedTo', type: 'string', facet: true, optional: true }, + { name: 'attachedToClass', type: 'string', facet: true, optional: true }, + { name: 'searchTitle', type: 'string', optional: true }, + { name: 'searchShortTitle', type: 'string', optional: true }, + { name: 'fulltextSummary', type: 'string', optional: true }, + { name: 'modifiedBy', type: 'string', facet: true, optional: true }, + { name: 'modifiedOn', type: 'int64', optional: true }, + { name: 'core:class:Doc%createdBy', type: 'string', facet: true, optional: true }, + { name: 'core:class:Doc%createdOn', type: 'int64', optional: true }, + { name: 'core:class:Doc%modifiedBy', type: 'string', facet: true, optional: true }, + { name: 'core:class:Doc%modifiedOn', type: 'int64', optional: true }, + // Catch-all for dynamic fields + { name: '.*', type: 'auto' } + ] + + return { + name: collectionName, + fields, + enable_nested_fields: true + } +} + +function isTypesenseError (err: any): boolean { + return err?.httpStatus !== undefined || err?.name === 'TypesenseError' +} + +function isConnectionError (err: any): boolean { + if (err?.name === 'ConnectionError') return true + if (err?.code === 'ECONNREFUSED' || err?.code === 'ENOTFOUND') return true + if (err?.message?.includes?.('ECONNREFUSED') === true) return true + return false +} + +/** Escape a value for use inside a Typesense filter_by backtick-quoted string. */ +function escapeFilterValue (val: string): string { + return val.replace(/`/g, '\\`') +} + +/** + * Build a Typesense filter_by string from a workspace ID and a Huly DocumentQuery. + * Skips `$`-prefixed keys (like `$search`). + */ +function buildQueryFilter (workspaceId: WorkspaceUuid, query?: DocumentQuery): string { + const parts: string[] = [`workspaceId:=${workspaceId as string}`] + + if (query !== undefined) { + for (const [q, v] of Object.entries(query)) { + if (q.startsWith('$')) continue + if (typeof v === 'object' && v !== null) { + if (v.$in !== undefined && Array.isArray(v.$in)) { + parts.push(`${q}:=[${v.$in.map((val: string) => `\`${escapeFilterValue(val)}\``).join(',')}]`) + } + } else { + parts.push(`${q}:=\`${escapeFilterValue(String(v))}\``) + } + } + } + + return parts.join(' && ') +} + +class TypesenseAdapter implements FullTextAdapter { + private readonly getFulltextDocId: (workspaceId: WorkspaceUuid, doc: Ref) => string + private readonly getDocId: (workspaceId: WorkspaceUuid, fulltext: string) => Ref + private readonly collectionName: string + + constructor ( + private readonly client: TypesenseClient, + private readonly indexBaseName: string, + readonly indexVersion: string + ) { + this.collectionName = `${indexBaseName}_${indexVersion}` + this.getFulltextDocId = (workspaceId, doc) => `${doc as string}@${workspaceId as string}` + this.getDocId = (workspaceId, fulltext) => fulltext.slice(0, -1 * ((workspaceId as string).length + 1)) as Ref + } + + async initMapping (ctx: MeasureContext, _field?: { key: string, dims: number }): Promise { + const collectionName = this.collectionName + try { + const collections = await this.client.collections().retrieve() + const matchingCollections = collections.filter((c: any) => c.name.startsWith(`${this.indexBaseName}_`)) + const existingCollection = matchingCollections.find((c: any) => c.name === collectionName) + const oldCollections = matchingCollections.filter((c: any) => c.name !== collectionName) + + for (const old of oldCollections) { + await ctx.with('delete-old-collection', {}, async () => { + await this.client.collections(old.name).delete() + }) + } + + let shouldRecreate = false + if (existingCollection !== undefined) { + const existingFields = new Set((existingCollection as any).fields?.map((f: any) => f.name) ?? []) + const schema = buildCollectionSchema(collectionName) + for (const field of schema.fields) { + if (field.name !== '.*' && !existingFields.has(field.name)) { + shouldRecreate = true + break + } + } + } + + if (shouldRecreate && existingCollection !== undefined) { + await ctx.with('delete-collection', {}, async () => { + await this.client.collections(collectionName).delete() + }) + } + + if (existingCollection === undefined || shouldRecreate) { + const schema = buildCollectionSchema(collectionName) + await ctx.with('create-collection', { collectionName }, async () => { + await this.client.collections().create(schema as any) + }) + } + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } + Analytics.handleError(err) + ctx.error(err) + return false + } + return true + } + + async close (): Promise { + // Typesense client does not require explicit close + } + + async searchString ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: SearchQuery, + options: SearchOptions & { scoring?: SearchScoring[] } + ): Promise { + try { + const filterParts: string[] = [`workspaceId:=${workspaceId as string}`] + + if (query.spaces !== undefined && query.spaces.length > 0) { + filterParts.push(`space:=[${query.spaces.map((s) => `\`${escapeFilterValue(s)}\``).join(',')}]`) + } + if (query.classes !== undefined && query.classes.length > 0) { + filterParts.push(`_class:=[${query.classes.map((c) => `\`${escapeFilterValue(c)}\``).join(',')}]`) + } + + // Require searchTitle to exist (non-empty) + filterParts.push('searchTitle:!=""') + + // Scoring: boost documents where a facet field matches a specific value. + // Typesense doesn't support per-term boosting like ES function_score, + // so we use optional filter clauses that prefer matching documents. + if (options.scoring !== undefined && options.scoring.length > 0) { + const optionalParts: string[] = [] + for (const scoring of options.scoring) { + if (FACET_FIELDS.has(scoring.attr)) { + optionalParts.push(`${scoring.attr}:=\`${escapeFilterValue(String(scoring.value))}\``) + } + } + if (optionalParts.length > 0) { + // Use optional filter_by syntax: main filters && (optional1 || optional2) + // Documents matching optional filters rank higher via _text_match + filter proximity + filterParts.push(`(${optionalParts.join(' || ')})`) + } + } + + const filterBy = filterParts.join(' && ') + + const searchParams: any = { + q: query.query, + query_by: 'searchTitle,searchShortTitle,fulltextSummary', + query_by_weights: '50,50,1', + filter_by: filterBy, + limit: options.limit ?? DEFAULT_LIMIT, + prefix: 'true,true,false', + sort_by: '_text_match:desc' + } + + const result = await this.client.collections(this.collectionName).documents().search(searchParams) + + const resp: SearchStringResult = { docs: [] } + if (result.found !== undefined) { + resp.total = result.found + } + if (result.hits !== undefined) { + resp.docs = result.hits.map((hit: any) => { + const doc = hit.document + return { + ...doc, + id: this.getDocId(workspaceId, doc.id), + _score: hit.text_match ?? 0 + } + }) + } + + return resp + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + return { docs: [] } + } + Analytics.handleError(err) + ctx.error('Typesense error', { error: err }) + return { docs: [] } + } + } + + async search ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + _classes: Ref>[], + query: DocumentQuery, + size: number | undefined, + from: number | undefined + ): Promise { + if (query.$search === undefined) return [] + try { + const filterParts: string[] = [`workspaceId:=${workspaceId as string}`] + + if (_classes.length > 0) { + filterParts.push(`_class:=[${_classes.map((c) => `\`${escapeFilterValue(c as string)}\``).join(',')}]`) + } + + // In Elastic, additional query fields are soft boosts (should clauses). + // Typesense has no direct equivalent, so we add them as optional filter + // clauses — documents matching them rank higher but aren't excluded. + const optionalParts: string[] = [] + for (const [q, v] of Object.entries(query)) { + if (q.startsWith('$')) continue + if (typeof v === 'object' && v !== null) { + if (v.$in !== undefined && Array.isArray(v.$in)) { + optionalParts.push(`${q}:=[${v.$in.map((val: string) => `\`${escapeFilterValue(val)}\``).join(',')}]`) + } + } else { + optionalParts.push(`${q}:=\`${escapeFilterValue(String(v))}\``) + } + } + if (optionalParts.length > 0) { + filterParts.push(`(${optionalParts.join(' || ')})`) + } + + const filterBy = filterParts.join(' && ') + + const searchParams: any = { + q: query.$search, + query_by: 'searchTitle,searchShortTitle,fulltextSummary', + query_by_weights: '50,50,1', + filter_by: filterBy, + limit: size ?? DEFAULT_LIMIT, + offset: from ?? 0, + sort_by: '_text_match:desc', + prefix: 'true,true,false' + } + + const result = await ctx.with( + 'search', + {}, + async () => await this.client.collections(this.collectionName).documents().search(searchParams), + { _classes, size, from, query: searchParams } + ) + + if (result.hits === undefined) return [] + return result.hits.map((hit: any) => { + const doc = hit.document + return { + ...doc, + id: this.getDocId(workspaceId, doc.id), + _score: hit.text_match ?? 0 + } + }) + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + return [] + } + ctx.error('Typesense error', { error: err }) + Analytics.handleError(err) + return [] + } + } + + async index (ctx: MeasureContext, workspaceId: WorkspaceUuid, doc: IndexedDoc): Promise { + if (doc.data !== undefined) { + ctx.warn('Binary attachment content (doc.data) is not supported in Typesense; relying on Rekoni for extraction', { + docId: doc.id + }) + } + + const fulltextId = this.getFulltextDocId(workspaceId, doc.id) + const tsDoc: Record = { + ...doc, + id: fulltextId, + workspaceId + } + // Remove binary data — Typesense cannot process it + delete tsDoc.data + + try { + await this.client.collections(this.collectionName).documents().upsert(tsDoc) + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense index error', { error: err, docId: doc.id }) + } + } + return {} + } + + async update ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + id: Ref, + update: Record + ): Promise { + const fulltextId = this.getFulltextDocId(workspaceId, id) + try { + await this.client.collections(this.collectionName).documents(fulltextId).update(update) + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return {} + } + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense update error', { error: err, docId: id }) + } + } + return {} + } + + async updateMany (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: IndexedDoc[]): Promise { + const parts = Array.from(docs) + while (parts.length > 0) { + const batch = parts.splice(0, BATCH_SIZE) + const jsonlLines = batch + .map((doc) => { + const tsDoc: Record = { + ...doc, + id: this.getFulltextDocId(workspaceId, doc.id), + workspaceId + } + delete tsDoc.data + return JSON.stringify(tsDoc) + }) + .join('\n') + + try { + const results = await this.client + .collections(this.collectionName) + .documents() + .import(jsonlLines, { action: 'upsert' }) + + const errors = ( + typeof results === 'string' ? results.split('\n').map((l: string) => JSON.parse(l)) : results + ).filter((r: any) => r.success === false) + + if (errors.length > 0) { + const errorMessages = errors.map((e: any) => e.error).join('\n') + const errorDocs = errors.map((e: any) => e.document) + console.error(`Failed to process bulk request: ${errorMessages} ${JSON.stringify(errorDocs)}`) + } + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense updateMany error', { error: err }) + } + } + } + return [] + } + + async updateByQuery ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: DocumentQuery, + update: Record + ): Promise { + try { + const filterBy = buildQueryFilter(workspaceId, query) + + // Paginate through all matching documents + let page = 1 + const perPage = 250 + const allDocs: any[] = [] + + for (;;) { + const result = await this.client.collections(this.collectionName).documents().search({ + q: '*', + filter_by: filterBy, + per_page: perPage, + page + }) + const hits = result.hits ?? [] + if (hits.length === 0) break + allDocs.push(...hits.map((h: any) => h.document)) + if (allDocs.length >= (result.found ?? 0)) break + page++ + } + + if (allDocs.length === 0) return [] + + // Apply update and batch upsert + const updatedDocs = allDocs.map((doc: any) => ({ ...doc, ...update })) + const remaining = Array.from(updatedDocs) + while (remaining.length > 0) { + const batch = remaining.splice(0, BATCH_SIZE) + const jsonlLines = batch.map((doc: any) => JSON.stringify(doc)).join('\n') + await this.client.collections(this.collectionName).documents().import(jsonlLines, { action: 'upsert' }) + } + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense updateByQuery error', { error: err }) + } + } + return [] + } + + async remove (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + try { + const remaining = Array.from(docs) + while (remaining.length > 0) { + const batch = remaining.splice(0, 5000) + const ids = batch.map((it) => this.getFulltextDocId(workspaceId, it)) + const filterBy = `id:=[${ids.map((id) => `\`${escapeFilterValue(id)}\``).join(',')}] && workspaceId:=${workspaceId as string}` + await this.client.collections(this.collectionName).documents().delete({ filter_by: filterBy }) + } + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return + } + throw err + } + } + + async removeByQuery (ctx: MeasureContext, workspaceId: WorkspaceUuid, query: DocumentQuery): Promise { + const filterBy = buildQueryFilter(workspaceId, query) + try { + await this.client.collections(this.collectionName).documents().delete({ filter_by: filterBy }) + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return + } + throw err + } + } + + async clean (ctx: MeasureContext, workspaceId: WorkspaceUuid): Promise { + try { + const filterBy = `workspaceId:=${workspaceId as string}` + await this.client.collections(this.collectionName).documents().delete({ filter_by: filterBy }) + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return + } + throw err + } + } + + async load (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + try { + const ids = docs.map((it) => this.getFulltextDocId(workspaceId, it)) + const filterBy = `id:=[${ids.map((id) => `\`${escapeFilterValue(id)}\``).join(',')}] && workspaceId:=${workspaceId as string}` + + const result = await this.client.collections(this.collectionName).documents().search({ + q: '*', + filter_by: filterBy, + per_page: docs.length + }) + + if (result.hits === undefined) return [] + return result.hits.map((hit: any) => { + const doc = hit.document + return { + ...doc, + id: this.getDocId(workspaceId, doc.id) + } + }) + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + return [] + } + Analytics.handleError(err) + ctx.error('Typesense load error', { error: err }) + return [] + } + } +} + +/** + * @public + * + * Creates a Typesense fulltext adapter from a URL like: + * http://typesense:8108?apiKey=xyz + */ +export async function createTypesenseAdapter (url: string): Promise { + const parsed = new URL(url) + const apiKey = parsed.searchParams.get('apiKey') ?? parsed.searchParams.get('apikey') ?? '' + const protocol = parsed.protocol.replace(':', '') + const host = parsed.hostname + const port = parsed.port !== '' ? parseInt(parsed.port, 10) : protocol === 'https' ? 443 : 8108 + + const client = new TypesenseClient({ + nodes: [ + { + host, + port, + protocol + } + ], + apiKey, + connectionTimeoutSeconds: 10, + retryIntervalSeconds: 0.1, + numRetries: 3 + }) + + const indexBaseName = getIndexName() + const indexVersion = getIndexVersion() + + return new TypesenseAdapter(client, indexBaseName, indexVersion) +} diff --git a/foundations/server/packages/typesense/src/index.ts b/foundations/server/packages/typesense/src/index.ts new file mode 100644 index 00000000000..acb9895fc7c --- /dev/null +++ b/foundations/server/packages/typesense/src/index.ts @@ -0,0 +1,17 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021, 2022 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +export { createTypesenseAdapter } from './adapter' diff --git a/foundations/server/packages/typesense/tsconfig.json b/foundations/server/packages/typesense/tsconfig.json new file mode 100644 index 00000000000..7d78e05abb4 --- /dev/null +++ b/foundations/server/packages/typesense/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "./node_modules/@hcengineering/platform-rig/profiles/default/tsconfig.json", + + "compilerOptions": { + "rootDir": "./src", + "outDir": "./lib", + "declarationDir": "./types", + "tsBuildInfoFile": ".build/build.tsbuildinfo" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "lib", "dist", "types", "bundle"] +} From 52b1cfcd12ec69e900f0335752b6a1608abcd519 Mon Sep 17 00:00:00 2001 From: Don Kendall Date: Fri, 27 Mar 2026 08:12:33 -0400 Subject: [PATCH 2/6] feat: configurable fulltext backend via FULLTEXT_BACKEND env var Wire up the new OpenSearch and Typesense adapters in the fulltext pod. The FULLTEXT_BACKEND env var selects the adapter at startup: - "elastic" (default): existing Elasticsearch 7.x - "opensearch": OpenSearch 2.x (API-compatible drop-in) - "typesense": Typesense (lightweight C++ engine) Register both new packages in rush.json and add them as fulltext pod dependencies. ELASTIC_INDEX_NAME now defaults to 'huly_storage_index' instead of requiring it for all backends. Signed-off-by: Don Kendall --- pods/fulltext/package.json | 2 ++ pods/fulltext/src/index.ts | 29 ++++++++++++++++++++++------- rush.json | 10 ++++++++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/pods/fulltext/package.json b/pods/fulltext/package.json index 1fe66b5e393..1da06155f28 100644 --- a/pods/fulltext/package.json +++ b/pods/fulltext/package.json @@ -69,6 +69,8 @@ "@koa/cors": "^5.0.0", "@hcengineering/server-indexer": "workspace:^0.7.0", "@hcengineering/elastic": "workspace:^0.7.16", + "@hcengineering/opensearch": "workspace:^0.7.17", + "@hcengineering/typesense": "workspace:^0.7.17", "@hcengineering/server-collaboration": "workspace:^0.7.0", "@hcengineering/middleware": "workspace:^0.7.21", "@hcengineering/server-client": "workspace:^0.7.16", diff --git a/pods/fulltext/src/index.ts b/pods/fulltext/src/index.ts index 89bf4250851..daec00ecfe8 100644 --- a/pods/fulltext/src/index.ts +++ b/pods/fulltext/src/index.ts @@ -16,10 +16,16 @@ import { Analytics } from '@hcengineering/analytics' import { configureAnalytics, createOpenTelemetryMetricsContext, SplitLogger } from '@hcengineering/analytics-service' import { newMetrics, type Tx } from '@hcengineering/core' -import { initStatisticsContext, type StorageConfiguration } from '@hcengineering/server-core' +import { + initStatisticsContext, + type StorageConfiguration, + type FullTextAdapterFactory +} from '@hcengineering/server-core' import { join } from 'path' import { createElasticAdapter } from '@hcengineering/elastic' +import { createOpenSearchAdapter } from '@hcengineering/opensearch' +import { createTypesenseAdapter } from '@hcengineering/typesense' import { getPlatformQueue } from '@hcengineering/kafka' import { setMetadata } from '@hcengineering/platform' import { createRekoniAdapter, type FulltextDBConfiguration } from '@hcengineering/server-indexer' @@ -74,9 +80,22 @@ if (rekoniUrl === undefined) { process.exit(1) } +const fulltextBackend = (process.env.FULLTEXT_BACKEND ?? 'elastic').toLowerCase() +const fulltextAdapterFactories: Record = { + elastic: createElasticAdapter, + opensearch: createOpenSearchAdapter, + typesense: createTypesenseAdapter +} +const fulltextFactory = fulltextAdapterFactories[fulltextBackend] +if (fulltextFactory === undefined) { + console.error(`Unknown FULLTEXT_BACKEND: "${fulltextBackend}". Supported: elastic, opensearch, typesense`) + process.exit(1) +} +console.info(`Using fulltext backend: ${fulltextBackend}`) + const config: FulltextDBConfiguration = { fulltextAdapter: { - factory: createElasticAdapter, + factory: fulltextFactory, url: fullTextDbURL }, contentAdapters: { @@ -89,11 +108,7 @@ const config: FulltextDBConfiguration = { defaultContentAdapter: 'Rekoni' } -const elasticIndexName = process.env.ELASTIC_INDEX_NAME -if (elasticIndexName === undefined) { - console.log('Please provide ELASTIC_INDEX_NAME') - process.exit(1) -} +const elasticIndexName = process.env.ELASTIC_INDEX_NAME ?? 'huly_storage_index' const servicePort = parseInt(process.env.PORT ?? '4700') metricsContext.info('Starting stats service') diff --git a/rush.json b/rush.json index 439e02a1b28..8d3a750f11c 100644 --- a/rush.json +++ b/rush.json @@ -547,6 +547,16 @@ "projectFolder": "foundations/server/packages/elastic", "shouldPublish": true }, + { + "packageName": "@hcengineering/opensearch", + "projectFolder": "foundations/server/packages/opensearch", + "shouldPublish": true + }, + { + "packageName": "@hcengineering/typesense", + "projectFolder": "foundations/server/packages/typesense", + "shouldPublish": true + }, { "packageName": "@hcengineering/kafka", "projectFolder": "foundations/server/packages/kafka", From d62168a1da25469c74e16287a5d6700370de9df3 Mon Sep 17 00:00:00 2001 From: Don Kendall Date: Fri, 27 Mar 2026 21:21:20 -0400 Subject: [PATCH 3/6] fix: read version.txt before git tags in show_version.js The script only read version.txt when git describe succeeded, falling back to hardcoded "0.6.0" when no tags exist (e.g. in forks). This caused the model version to be 0.6.0 instead of the value in version.txt, making the fulltext indexer skip all workspaces. Now reads version.txt first, falls back to git tags, then "0.6.0". Signed-off-by: Don Kendall --- common/scripts/show_version.js | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/common/scripts/show_version.js b/common/scripts/show_version.js index 7a49a74d5fa..0ec1ba25e61 100644 --- a/common/scripts/show_version.js +++ b/common/scripts/show_version.js @@ -18,21 +18,26 @@ const path = require('path') const exec = require('child_process').exec function main() { + // Read version from version.txt first, fall back to git tag, then default + let version + try { + const versionFilePath = path.resolve(__dirname, 'version.txt') + version = fs.readFileSync(versionFilePath, 'utf8').trim() + } catch (error) { + // version.txt not found + } + + if (version) { + console.log(version) + return + } + exec('git describe --tags --abbrev=0', (err, stdout) => { if (err !== null) { console.log('"0.6.0"') return } - // Take version from file - let version - try { - const versionFilePath = path.resolve(__dirname, 'version.txt') - version = fs.readFileSync(versionFilePath, 'utf8').trim() - } catch (error) { - version = '"0.6.0"' - } - - console.log(version) + console.log(`"${stdout.trim()}"`) }) } From d63a6d3b913b57ef729be7eeb6be1656d3b8036a Mon Sep 17 00:00:00 2001 From: Don Kendall Date: Sun, 29 Mar 2026 11:50:07 -0400 Subject: [PATCH 4/6] feat: tune Typesense adapter for better search parity with ES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Increase query_by_weights to 100,100,1 (matching ES boost ratios) - Enable num_typos: 2,2,1 for broader fuzzy matching - Add typo_tokens_threshold: 1 and drop_tokens_threshold: 1 - Use text_match_info.best_field_score for more granular scoring - Add sanitizeDoc() to coerce dynamic *_fields to string arrays (fixes 5 import errors from searchIcon_fields type mismatch) Search comparison results after tuning: - ES↔OpenSearch: 100% identical on all queries - ES↔Typesense: same top results, 5/5 overlap on class filter, 3/5 on prefix/fuzzy (different ranking, same docs found) Signed-off-by: Don Kendall --- .../server/packages/opensearch/package.json | 1 - .../server/packages/typesense/src/adapter.ts | 119 ++++++++---------- pods/fulltext/src/index.ts | 6 +- 3 files changed, 60 insertions(+), 66 deletions(-) diff --git a/foundations/server/packages/opensearch/package.json b/foundations/server/packages/opensearch/package.json index 005714f2d00..618f31b1461 100644 --- a/foundations/server/packages/opensearch/package.json +++ b/foundations/server/packages/opensearch/package.json @@ -27,7 +27,6 @@ "author": "Anticrm Platform Contributors", "license": "EPL-2.0", "scripts": { - "init": "ts-node src/__init.ts", "build": "compile", "build:watch": "compile", "format": "format src", diff --git a/foundations/server/packages/typesense/src/adapter.ts b/foundations/server/packages/typesense/src/adapter.ts index c7c459adf81..8f972b454e9 100644 --- a/foundations/server/packages/typesense/src/adapter.ts +++ b/foundations/server/packages/typesense/src/adapter.ts @@ -45,19 +45,6 @@ function getIndexVersion (): string { return getMetadata(serverCore.metadata.ElasticIndexVersion) ?? 'v2' } -/** Fields that are defined in the schema and are facet-capable (keyword-like). */ -const FACET_FIELDS = new Set([ - 'id', - 'workspaceId', - '_class', - 'space', - 'attachedTo', - 'attachedToClass', - 'modifiedBy', - 'core:class:Doc%createdBy', - 'core:class:Doc%modifiedBy' -]) - function buildCollectionSchema (collectionName: string): { name: string fields: CollectionFieldSchema[] @@ -101,11 +88,34 @@ function isConnectionError (err: any): boolean { return false } +/** Fields that are known to be string arrays in the schema. */ +const ARRAY_FIELDS = new Set(['_class']) + /** Escape a value for use inside a Typesense filter_by backtick-quoted string. */ function escapeFilterValue (val: string): string { return val.replace(/`/g, '\\`') } +/** + * Sanitize a document for Typesense upsert. + * - Removes binary `data` field + * - Coerces fields ending in `_fields` to string arrays (dynamic ES fields) + * - Ensures `_class` is always an array + */ +function sanitizeDoc (doc: Record): Record { + const result: Record = {} + for (const [key, value] of Object.entries(doc)) { + if (key === 'data') continue + // Coerce _class and *_fields to arrays + if (ARRAY_FIELDS.has(key) || key.endsWith('_fields')) { + result[key] = Array.isArray(value) ? value : value != null ? [String(value)] : [] + } else { + result[key] = value + } + } + return result +} + /** * Build a Typesense filter_by string from a workspace ID and a Huly DocumentQuery. * Skips `$`-prefixed keys (like `$search`). @@ -216,32 +226,25 @@ class TypesenseAdapter implements FullTextAdapter { // Require searchTitle to exist (non-empty) filterParts.push('searchTitle:!=""') - // Scoring: boost documents where a facet field matches a specific value. - // Typesense doesn't support per-term boosting like ES function_score, - // so we use optional filter clauses that prefer matching documents. - if (options.scoring !== undefined && options.scoring.length > 0) { - const optionalParts: string[] = [] - for (const scoring of options.scoring) { - if (FACET_FIELDS.has(scoring.attr)) { - optionalParts.push(`${scoring.attr}:=\`${escapeFilterValue(String(scoring.value))}\``) - } - } - if (optionalParts.length > 0) { - // Use optional filter_by syntax: main filters && (optional1 || optional2) - // Documents matching optional filters rank higher via _text_match + filter proximity - filterParts.push(`(${optionalParts.join(' || ')})`) - } - } + // Scoring: ES uses function_score with should clauses (soft boosts). + // Typesense has no equivalent — filter_by is always mandatory (AND). + // We skip scoring filters entirely and rely on text_match ranking, + // which already prioritizes title matches via query_by_weights. + // Adding scoring fields to filter_by would incorrectly EXCLUDE + // documents that don't match, instead of just ranking them lower. const filterBy = filterParts.join(' && ') const searchParams: any = { q: query.query, query_by: 'searchTitle,searchShortTitle,fulltextSummary', - query_by_weights: '50,50,1', + query_by_weights: '100,100,1', filter_by: filterBy, limit: options.limit ?? DEFAULT_LIMIT, prefix: 'true,true,false', + num_typos: '2,2,1', + typo_tokens_threshold: 1, + drop_tokens_threshold: 1, sort_by: '_text_match:desc' } @@ -257,7 +260,7 @@ class TypesenseAdapter implements FullTextAdapter { return { ...doc, id: this.getDocId(workspaceId, doc.id), - _score: hit.text_match ?? 0 + _score: hit.text_match_info?.best_field_score ?? hit.text_match ?? 0 } }) } @@ -291,34 +294,24 @@ class TypesenseAdapter implements FullTextAdapter { } // In Elastic, additional query fields are soft boosts (should clauses). - // Typesense has no direct equivalent, so we add them as optional filter - // clauses — documents matching them rank higher but aren't excluded. - const optionalParts: string[] = [] - for (const [q, v] of Object.entries(query)) { - if (q.startsWith('$')) continue - if (typeof v === 'object' && v !== null) { - if (v.$in !== undefined && Array.isArray(v.$in)) { - optionalParts.push(`${q}:=[${v.$in.map((val: string) => `\`${escapeFilterValue(val)}\``).join(',')}]`) - } - } else { - optionalParts.push(`${q}:=\`${escapeFilterValue(String(v))}\``) - } - } - if (optionalParts.length > 0) { - filterParts.push(`(${optionalParts.join(' || ')})`) - } + // Typesense filter_by is always mandatory (AND), so adding these as + // filters would incorrectly exclude non-matching documents instead of + // just ranking them lower. We skip them and rely on text_match ranking. const filterBy = filterParts.join(' && ') const searchParams: any = { q: query.$search, query_by: 'searchTitle,searchShortTitle,fulltextSummary', - query_by_weights: '50,50,1', + query_by_weights: '100,100,1', filter_by: filterBy, limit: size ?? DEFAULT_LIMIT, offset: from ?? 0, sort_by: '_text_match:desc', - prefix: 'true,true,false' + prefix: 'true,true,false', + num_typos: '2,2,1', + typo_tokens_threshold: 1, + drop_tokens_threshold: 1 } const result = await ctx.with( @@ -334,7 +327,7 @@ class TypesenseAdapter implements FullTextAdapter { return { ...doc, id: this.getDocId(workspaceId, doc.id), - _score: hit.text_match ?? 0 + _score: hit.text_match_info?.best_field_score ?? hit.text_match ?? 0 } }) } catch (err: any) { @@ -356,13 +349,7 @@ class TypesenseAdapter implements FullTextAdapter { } const fulltextId = this.getFulltextDocId(workspaceId, doc.id) - const tsDoc: Record = { - ...doc, - id: fulltextId, - workspaceId - } - // Remove binary data — Typesense cannot process it - delete tsDoc.data + const tsDoc = sanitizeDoc({ ...doc, id: fulltextId, workspaceId }) try { await this.client.collections(this.collectionName).documents().upsert(tsDoc) @@ -406,12 +393,7 @@ class TypesenseAdapter implements FullTextAdapter { const batch = parts.splice(0, BATCH_SIZE) const jsonlLines = batch .map((doc) => { - const tsDoc: Record = { - ...doc, - id: this.getFulltextDocId(workspaceId, doc.id), - workspaceId - } - delete tsDoc.data + const tsDoc = sanitizeDoc({ ...doc, id: this.getFulltextDocId(workspaceId, doc.id), workspaceId }) return JSON.stringify(tsDoc) }) .join('\n') @@ -479,7 +461,16 @@ class TypesenseAdapter implements FullTextAdapter { while (remaining.length > 0) { const batch = remaining.splice(0, BATCH_SIZE) const jsonlLines = batch.map((doc: any) => JSON.stringify(doc)).join('\n') - await this.client.collections(this.collectionName).documents().import(jsonlLines, { action: 'upsert' }) + const results = await this.client + .collections(this.collectionName) + .documents() + .import(jsonlLines, { action: 'upsert' }) + const errors = ( + typeof results === 'string' ? results.split('\n').map((l: string) => JSON.parse(l)) : results + ).filter((r: any) => r.success === false) + if (errors.length > 0) { + console.error(`updateByQuery upsert errors: ${errors.map((e: any) => e.error).join('; ')}`) + } } } catch (err: any) { if (isConnectionError(err)) { diff --git a/pods/fulltext/src/index.ts b/pods/fulltext/src/index.ts index daec00ecfe8..579fbd199ef 100644 --- a/pods/fulltext/src/index.ts +++ b/pods/fulltext/src/index.ts @@ -108,7 +108,11 @@ const config: FulltextDBConfiguration = { defaultContentAdapter: 'Rekoni' } -const elasticIndexName = process.env.ELASTIC_INDEX_NAME ?? 'huly_storage_index' +const elasticIndexName = process.env.ELASTIC_INDEX_NAME +if (elasticIndexName === undefined) { + console.error('ELASTIC_INDEX_NAME should be specified') + process.exit(1) +} const servicePort = parseInt(process.env.PORT ?? '4700') metricsContext.info('Starting stats service') From 67c2a8ba08eaacee08dd67814e8afe4475bd7da8 Mon Sep 17 00:00:00 2001 From: Don Kendall Date: Sun, 29 Mar 2026 18:47:48 -0400 Subject: [PATCH 5/6] feat: auto-reindex on empty Typesense backend startup When the fulltext pod starts and the Typesense collection has 0 documents (e.g. after pod restart with emptyDir), automatically enumerate all active workspaces and queue a fullReindex event for each. This makes Typesense self-healing without requiring a PVC. - Add optional getDocCount() to FullTextAdapter interface - Implement getDocCount() in TypesenseAdapter via collections().retrieve() - Add checkAndTriggerReindex() to WorkspaceManager.startIndexer() - Elastic/OpenSearch backends skip the check (no getDocCount method) Signed-off-by: Don Kendall --- foundations/server/packages/core/src/types.ts | 6 ++++ .../server/packages/typesense/src/adapter.ts | 15 +++++++++ pods/fulltext/src/manager.ts | 32 +++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/foundations/server/packages/core/src/types.ts b/foundations/server/packages/core/src/types.ts index 09d7053c09a..6e4e0716033 100644 --- a/foundations/server/packages/core/src/types.ts +++ b/foundations/server/packages/core/src/types.ts @@ -409,6 +409,12 @@ export interface FullTextAdapter { // If no field is provided, will return existing mapping of all dimms. initMapping: (ctx: MeasureContext, field?: { key: string, dims: number }) => Promise + + // Optional: return the total document count in the index/collection. + // Used by non-persistent backends (e.g. Typesense with emptyDir) to detect + // empty state on startup and trigger automatic reindex. + // Returns -1 if the count cannot be determined (e.g. connection error). + getDocCount?: (ctx: MeasureContext) => Promise } /** diff --git a/foundations/server/packages/typesense/src/adapter.ts b/foundations/server/packages/typesense/src/adapter.ts index 8f972b454e9..36ea078f6ee 100644 --- a/foundations/server/packages/typesense/src/adapter.ts +++ b/foundations/server/packages/typesense/src/adapter.ts @@ -207,6 +207,21 @@ class TypesenseAdapter implements FullTextAdapter { // Typesense client does not require explicit close } + async getDocCount (ctx: MeasureContext): Promise { + try { + const col = await this.client.collections(this.collectionName).retrieve() + return (col as any).num_documents ?? 0 + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available for doc count check') + return -1 + } + Analytics.handleError(err) + ctx.error('Typesense getDocCount error', { error: err }) + return -1 + } + } + async searchString ( ctx: MeasureContext, workspaceId: WorkspaceUuid, diff --git a/pods/fulltext/src/manager.ts b/pods/fulltext/src/manager.ts index 4d5381d4d6d..40906686c08 100644 --- a/pods/fulltext/src/manager.ts +++ b/pods/fulltext/src/manager.ts @@ -105,6 +105,8 @@ export class WorkspaceManager { } } + await this.checkAndTriggerReindex() + this.shutdownInterval = setInterval(() => { for (const [k, v] of [...this.indexers.entries()]) { if (v instanceof Promise) { @@ -159,6 +161,36 @@ export class WorkspaceManager { ) } + private async checkAndTriggerReindex (): Promise { + if (this.fulltextAdapter.getDocCount === undefined) { + return + } + + const docCount = await this.fulltextAdapter.getDocCount(this.ctx) + if (docCount < 0) { + this.ctx.warn('Could not determine backend doc count, skipping auto-reindex check') + return + } + if (docCount > 0) { + this.ctx.info('Fulltext backend has documents, skipping auto-reindex', { docCount }) + return + } + + this.ctx.warn('Fulltext backend is empty (0 documents), triggering auto-reindex for all workspaces') + try { + const token = generateToken(systemAccountUuid, undefined, { service: 'tool' }) + const accountClient = getAccountClient(token) + const workspaces = await accountClient.listWorkspaces(null, 'active') + this.ctx.info('Auto-reindex: found workspaces', { count: workspaces.length }) + for (const ws of workspaces) { + await this.fulltextProducer.send(this.ctx, ws.uuid, [workspaceEvents.fullReindex()]) + this.ctx.info('Queued full reindex', { workspace: ws.uuid }) + } + } catch (err: any) { + this.ctx.error('Failed to trigger auto-reindex', { error: err }) + } + } + private async processTransactions ( m: ConsumerMessage> | TxDomainEvent>>, control: ConsumerControl From 5dd7bedae73cff1e1d2c5c6ad0d0efa31b11f8d7 Mon Sep 17 00:00:00 2001 From: Don Kendall Date: Sat, 18 Apr 2026 08:47:06 -0400 Subject: [PATCH 6/6] fix: bump dep versions in opensearch/typesense packages to match monorepo rush check failed because the new opensearch and typesense packages referenced older internal dep versions (analytics 0.7.17, core 0.7.24, platform 0.7.19, server-core 0.7.18, platform-rig 0.7.19) while the rest of the monorepo has moved to analytics 0.7.19, core 0.7.26, platform 0.7.20, server-core 0.7.19, platform-rig 0.7.21. Signed-off-by: Don Kendall Signed-off-by: Don Kendall --- common/config/rush/pnpm-lock.yaml | 20 +++++++++---------- .../server/packages/opensearch/package.json | 12 +++++------ .../server/packages/typesense/package.json | 12 +++++------ 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index 0d9b626867f..7b2e4793edc 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -5219,23 +5219,23 @@ importers: ../../foundations/server/packages/opensearch: dependencies: '@hcengineering/analytics': - specifier: workspace:^0.7.17 + specifier: workspace:^0.7.19 version: link:../../../core/packages/analytics '@hcengineering/core': - specifier: workspace:^0.7.24 + specifier: workspace:^0.7.26 version: link:../../../core/packages/core '@hcengineering/platform': - specifier: workspace:^0.7.19 + specifier: workspace:^0.7.20 version: link:../../../core/packages/platform '@hcengineering/server-core': - specifier: workspace:^0.7.18 + specifier: workspace:^0.7.19 version: link:../core '@opensearch-project/opensearch': specifier: ^2.12.0 version: 2.13.0 devDependencies: '@hcengineering/platform-rig': - specifier: workspace:^0.7.19 + specifier: workspace:^0.7.21 version: link:../../../utils/packages/platform-rig '@types/jest': specifier: ^29.5.5 @@ -5581,23 +5581,23 @@ importers: ../../foundations/server/packages/typesense: dependencies: '@hcengineering/analytics': - specifier: workspace:^0.7.17 + specifier: workspace:^0.7.19 version: link:../../../core/packages/analytics '@hcengineering/core': - specifier: workspace:^0.7.24 + specifier: workspace:^0.7.26 version: link:../../../core/packages/core '@hcengineering/platform': - specifier: workspace:^0.7.19 + specifier: workspace:^0.7.20 version: link:../../../core/packages/platform '@hcengineering/server-core': - specifier: workspace:^0.7.18 + specifier: workspace:^0.7.19 version: link:../core typesense: specifier: ^2.0.0 version: 2.1.0(@babel/runtime@7.28.4) devDependencies: '@hcengineering/platform-rig': - specifier: workspace:^0.7.19 + specifier: workspace:^0.7.21 version: link:../../../utils/packages/platform-rig '@types/jest': specifier: ^29.5.5 diff --git a/foundations/server/packages/opensearch/package.json b/foundations/server/packages/opensearch/package.json index 618f31b1461..f03a2d862f0 100644 --- a/foundations/server/packages/opensearch/package.json +++ b/foundations/server/packages/opensearch/package.json @@ -1,6 +1,6 @@ { "name": "@hcengineering/opensearch", - "version": "0.7.17", + "version": "0.7.18", "main": "lib/index.js", "svelte": "src/index.ts", "types": "types/index.d.ts", @@ -37,7 +37,7 @@ "_phase:validate": "compile validate" }, "devDependencies": { - "@hcengineering/platform-rig": "workspace:^0.7.19", + "@hcengineering/platform-rig": "workspace:^0.7.21", "@typescript-eslint/eslint-plugin": "^6.21.0", "eslint-plugin-import": "^2.26.0", "eslint-plugin-promise": "^6.1.1", @@ -54,11 +54,11 @@ "eslint-plugin-svelte": "^2.35.1" }, "dependencies": { - "@hcengineering/core": "workspace:^0.7.24", - "@hcengineering/platform": "workspace:^0.7.19", - "@hcengineering/server-core": "workspace:^0.7.18", + "@hcengineering/core": "workspace:^0.7.26", + "@hcengineering/platform": "workspace:^0.7.20", + "@hcengineering/server-core": "workspace:^0.7.19", "@opensearch-project/opensearch": "^2.12.0", - "@hcengineering/analytics": "workspace:^0.7.17" + "@hcengineering/analytics": "workspace:^0.7.19" }, "repository": { "type": "git", diff --git a/foundations/server/packages/typesense/package.json b/foundations/server/packages/typesense/package.json index 7b301b93eb3..1cbbea4e866 100644 --- a/foundations/server/packages/typesense/package.json +++ b/foundations/server/packages/typesense/package.json @@ -1,6 +1,6 @@ { "name": "@hcengineering/typesense", - "version": "0.7.17", + "version": "0.7.18", "main": "lib/index.js", "svelte": "src/index.ts", "types": "types/index.d.ts", @@ -37,7 +37,7 @@ "_phase:validate": "compile validate" }, "devDependencies": { - "@hcengineering/platform-rig": "workspace:^0.7.19", + "@hcengineering/platform-rig": "workspace:^0.7.21", "@typescript-eslint/eslint-plugin": "^6.21.0", "eslint-plugin-import": "^2.26.0", "eslint-plugin-promise": "^6.1.1", @@ -53,10 +53,10 @@ "eslint-plugin-svelte": "^2.35.1" }, "dependencies": { - "@hcengineering/core": "workspace:^0.7.24", - "@hcengineering/platform": "workspace:^0.7.19", - "@hcengineering/server-core": "workspace:^0.7.18", - "@hcengineering/analytics": "workspace:^0.7.17", + "@hcengineering/core": "workspace:^0.7.26", + "@hcengineering/platform": "workspace:^0.7.20", + "@hcengineering/server-core": "workspace:^0.7.19", + "@hcengineering/analytics": "workspace:^0.7.19", "typesense": "^2.0.0" }, "repository": {