From 753193186afa0fbb17f5b52218c832f32bf2bd47 Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Thu, 19 Mar 2026 15:01:08 +0000 Subject: [PATCH] Refactor auth filtering to be more efficient Add `visibility` query param Update OpenAPI spec --- api/src/api/request/pipeline.js | 79 +++++++++++++++------- api/test/unit/api/request/pipeline.test.js | 61 ++++++++++------- docs/docs/spec/openapi.yaml | 17 ++++- docs/docs/spec/types.yaml | 14 ++++ 4 files changed, 120 insertions(+), 51 deletions(-) diff --git a/api/src/api/request/pipeline.js b/api/src/api/request/pipeline.js index 66fa8a0e..87049232 100644 --- a/api/src/api/request/pipeline.js +++ b/api/src/api/request/pipeline.js @@ -1,22 +1,57 @@ const sortJson = require("sort-json"); const { defaultSearchSize } = require("../../environment"); -function filterFor(query, event) { - const matchTheQuery = query; - const beUnpublished = { term: { published: false } }; - const beRestricted = { term: { visibility: "Private" } }; - - let filter = { must: [matchTheQuery], must_not: [] }; - - if (!event.userToken.can("read:Unpublished")) { - filter.must_not.push(beUnpublished); - } +function filterFor(event) { + const publishedValues = event.userToken.can("read:Unpublished") + ? [true, false] + : [true]; + const userVisibility = new Set( + event.userToken.can("read:Private") + ? ["Private", "Institution", "Public"] + : ["Institution", "Public"] + ); + const requestVisibility = event?.queryStringParameters?.visibility + ?.split(",") + ?.map((v) => v[0].toUpperCase() + v.slice(1)) || [ + "Private", + "Institution", + "Public", + ]; + const visibilityValues = requestVisibility.filter((v) => + userVisibility.has(v) + ); + + return [ + { terms: { published: publishedValues } }, + { terms: { visibility: visibilityValues } }, + ]; +} - if (!event.userToken.can("read:Private")) { - filter.must_not.push(beRestricted); +function addFilter(query, filter) { + let result = { ...query }; + if (result.bool) { + result.bool.filter ||= []; + result.bool.filter.push(...filter); + } else if (result.neural) { + const boolFilter = { bool: { filter: filter } }; + if (result.neural.filter) { + boolFilter.bool.filter.push(result.neural.filter); + } + const neuralField = Object.keys(result.neural)[0]; + result.neural[neuralField].filter = boolFilter; + } else if (result.hybrid) { + result.hybrid.queries = result.hybrid.queries.map((subQuery) => + addFilter(subQuery, filter) + ); + } else { + result = { + bool: { + must: [result], + filter: filter, + }, + }; } - - return { bool: filter }; + return result; } module.exports = class RequestPipeline { @@ -33,19 +68,11 @@ module.exports = class RequestPipeline { // - Add `track_total_hits` to search context (so we can get accurate hits.total.value) authFilter(event) { - if (this.searchContext.query?.hybrid?.queries) { - this.searchContext.query = { - hybrid: { - queries: this.searchContext.query.hybrid.queries.map((query) => - filterFor(query, event) - ), - }, - }; - } else { - this.searchContext.query = filterFor(this.searchContext.query, event); - } + this.searchContext.query = addFilter( + this.searchContext.query, + filterFor(event) + ); this.searchContext.track_total_hits = true; - return this; } diff --git a/api/test/unit/api/request/pipeline.test.js b/api/test/unit/api/request/pipeline.test.js index bd15c917..701e51f3 100644 --- a/api/test/unit/api/request/pipeline.test.js +++ b/api/test/unit/api/request/pipeline.test.js @@ -36,9 +36,9 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } + expect(result.searchContext.query.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } ); }); @@ -56,9 +56,9 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } + expect(result.searchContext.query.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } ); }); @@ -71,11 +71,11 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include({ - term: { published: false }, + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { published: [true] }, }); - expect(result.searchContext.query.bool.must_not).not.to.deep.include({ - term: { visibility: "Private" }, + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { visibility: ["Private", "Institution", "Public"] }, }); }); }); @@ -90,9 +90,9 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } + expect(result.searchContext.query.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } ); }); @@ -105,7 +105,12 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.be.empty; + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { published: [true, false] }, + }); + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { visibility: ["Private", "Institution", "Public"] }, + }); }); }); @@ -140,15 +145,25 @@ describe("RequestPipeline", () => { }; pipeline = new RequestPipeline(requestBody); const result = pipeline.authFilter(event); - for (const i in requestBody.query.hybrid.queries) { - const originalQuery = requestBody.query.hybrid.queries[i]; - const newQuery = result.searchContext.query.hybrid.queries[i]; - expect(newQuery.bool.must).to.deep.include(originalQuery); - expect(newQuery.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } - ); - } + const [originalNeuralQuery, originalMatchQuery] = + requestBody.query.hybrid.queries; + const [newNeuralQuery, newMatchQuery] = + result.searchContext.query.hybrid.queries; + + expect(newNeuralQuery.neural.embedding).to.deep.include( + originalNeuralQuery.neural.embedding + ); + expect(newMatchQuery).to.deep.include(originalMatchQuery); + expect( + newNeuralQuery.neural.embedding.filter.bool.filter + ).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } + ); + expect(newMatchQuery.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } + ); }); }); diff --git a/docs/docs/spec/openapi.yaml b/docs/docs/spec/openapi.yaml index 53303c2e..8d47ca22 100644 --- a/docs/docs/spec/openapi.yaml +++ b/docs/docs/spec/openapi.yaml @@ -36,7 +36,7 @@ components: bearerAuth: type: http scheme: bearer - bearerFormat: JWT + bearerFormat: JWT security: - anonymous: [] - bearerAuth: [] @@ -279,6 +279,7 @@ paths: - $ref: "./types.yaml#/components/parameters/size" - $ref: "./types.yaml#/components/parameters/sort" - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" @@ -286,12 +287,17 @@ paths: operationId: postSearch tags: - Search + parameters: + - $ref: "./types.yaml#/components/parameters/page" + - $ref: "./types.yaml#/components/parameters/size" + - $ref: "./types.yaml#/components/parameters/sort" + - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" requestBody: content: application/json: schema: type: object - responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" @@ -308,6 +314,7 @@ paths: - $ref: "./types.yaml#/components/parameters/size" - $ref: "./types.yaml#/components/parameters/sort" - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" @@ -321,6 +328,12 @@ paths: - $ref: "./types.yaml#/components/parameters/size" - $ref: "./types.yaml#/components/parameters/sort" - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" + requestBody: + content: + application/json: + schema: + type: object responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" diff --git a/docs/docs/spec/types.yaml b/docs/docs/spec/types.yaml index 3a75e2d5..95edac92 100644 --- a/docs/docs/spec/types.yaml +++ b/docs/docs/spec/types.yaml @@ -103,6 +103,20 @@ components: type: integer minimum: 1 maximum: 300 + visibility: + name: visibility + in: query + required: false + description: Filter search results by visibility + explode: false + schema: + type: array + items: + type: string + enum: + - public + - institution + - private responses: DocumentResponse: description: A single document response