From 8ca1749d96351814c6b51eb393ee69b2acd7c52f Mon Sep 17 00:00:00 2001 From: Wilson Date: Thu, 30 Apr 2026 15:19:47 +0100 Subject: [PATCH 1/3] fix(migrations): fix postgres integer to boolean cast in meter_snapshots (#320) --- .../backend/drizzle/migrations_pg/0040_fuzzy_metal_master.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/backend/drizzle/migrations_pg/0040_fuzzy_metal_master.sql b/packages/backend/drizzle/migrations_pg/0040_fuzzy_metal_master.sql index 73d49de0..1a87c3da 100644 --- a/packages/backend/drizzle/migrations_pg/0040_fuzzy_metal_master.sql +++ b/packages/backend/drizzle/migrations_pg/0040_fuzzy_metal_master.sql @@ -1,2 +1,3 @@ -ALTER TABLE "meter_snapshots" ALTER COLUMN "success" SET DATA TYPE boolean;--> statement-breakpoint +ALTER TABLE "meter_snapshots" ALTER COLUMN "success" DROP DEFAULT;--> statement-breakpoint +ALTER TABLE "meter_snapshots" ALTER COLUMN "success" SET DATA TYPE boolean USING (success::integer::boolean);--> statement-breakpoint ALTER TABLE "meter_snapshots" ALTER COLUMN "success" SET DEFAULT true; \ No newline at end of file From c40d694e1b2b2f4fb9fd400db36d972fa285110d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 07:20:08 -0700 Subject: [PATCH 2/3] build(deps): bump @mariozechner/pi-ai from 0.70.5 to 0.70.6 (#321) --- bun.lock | 4 ++-- packages/backend/package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bun.lock b/bun.lock index 8360dbe7..dc041f92 100644 --- a/bun.lock +++ b/bun.lock @@ -23,7 +23,7 @@ "@fastify/multipart": "^10.0.0", "@fastify/static": "^9.1.3", "@google/genai": "^1.50.1", - "@mariozechner/pi-ai": "0.70.5", + "@mariozechner/pi-ai": "0.70.6", "@plexus/shared": "workspace:*", "@sinclair/typebox": "^0.34.49", "dotenv": "^17.4.2", @@ -317,7 +317,7 @@ "@lukeed/ms": ["@lukeed/ms@2.0.2", "", {}, "sha512-9I2Zn6+NJLfaGoz9jN3lpwDgAYvfGeNYdbAIjJOqzs4Tpc+VU3Jqq4IofSUBKajiDS8k9fZIg18/z13mpk1bsA=="], - "@mariozechner/pi-ai": ["@mariozechner/pi-ai@0.70.5", "", { "dependencies": { "@anthropic-ai/sdk": "^0.90.0", "@aws-sdk/client-bedrock-runtime": "^3.1030.0", "@google/genai": "^1.40.0", "@mistralai/mistralai": "^2.2.0", "chalk": "^5.6.2", "openai": "6.26.0", "partial-json": "^0.1.7", "proxy-agent": "^6.5.0", "typebox": "^1.1.24", "undici": "^7.19.1", "zod-to-json-schema": "^3.24.6" }, "bin": { "pi-ai": "dist/cli.js" } }, "sha512-eyeyOfu/YiqzY6q391oRYdmnPIIU1VTKAn3hWIvzqkRHkcArd41/YynG8mw6bgoLdmCnIBoY3fD6nzEHEHLIMA=="], + "@mariozechner/pi-ai": ["@mariozechner/pi-ai@0.70.6", "", { "dependencies": { "@anthropic-ai/sdk": "^0.90.0", "@aws-sdk/client-bedrock-runtime": "^3.1030.0", "@google/genai": "^1.40.0", "@mistralai/mistralai": "^2.2.0", "chalk": "^5.6.2", "openai": "6.26.0", "partial-json": "^0.1.7", "proxy-agent": "^6.5.0", "typebox": "^1.1.24", "undici": "^7.19.1", "zod-to-json-schema": "^3.24.6" }, "bin": { "pi-ai": "dist/cli.js" } }, "sha512-LVAadu0Y+hb7Bj7EDiLsx6AuGxHlxDq0euLzyqX698i9qt0BW6a+oQSUIZQz4rJwExF18OvyL7ygJ5781ojrIQ=="], "@mistralai/mistralai": ["@mistralai/mistralai@2.2.1", "", { "dependencies": { "ws": "^8.18.0", "zod": "^3.25.0 || ^4.0.0", "zod-to-json-schema": "^3.25.0" } }, "sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ=="], diff --git a/packages/backend/package.json b/packages/backend/package.json index aa381df9..b1a086e4 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -17,7 +17,7 @@ "@fastify/multipart": "^10.0.0", "@fastify/static": "^9.1.3", "@google/genai": "^1.50.1", - "@mariozechner/pi-ai": "0.70.5", + "@mariozechner/pi-ai": "0.70.6", "@plexus/shared": "workspace:*", "@sinclair/typebox": "^0.34.49", "dotenv": "^17.4.2", From 68d10a7e7f5647861cbbd9603d19f42189b0af1b Mon Sep 17 00:00:00 2001 From: Matt Cowger Date: Thu, 30 Apr 2026 10:07:06 -0700 Subject: [PATCH 3/3] feat(quota): add disableQuotaCooldown option to quota checkers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a per-quota-checker disable_quota_cooldown flag that suppresses proactive cooldown injection from the quota scheduler when utilization exceeds the exhaustion threshold. Unlike the provider-level disable_cooldown (which blocks all cooldowns including circuit-breaker), this only prevents quota-based cooldowns — failure/429 cooldowns continue to work normally. - Extract QuotaCheckerBaseSchema to share common fields across all 22 discriminated union variants, adding disable_quota_cooldown there - Add disableQuotaCooldown to QuotaConfigSchema and thread it through buildProviderQuotaConfigs in both config.ts and config-service.ts - Add quotaCheckerDisableCooldown column to both SQLite and Postgres provider schemas (integer/boolean with default false) - Read/write the new column in config-repository using existing fromBool/toBool helpers to avoid the integer-vs-boolean dialect issue - Gate cooldown injection in QuotaScheduler.applyCooldownsFromResult - Add test suite covering skip, default path, and persistence behaviour --- .../drizzle/schema/postgres/providers.ts | 1 + .../drizzle/schema/sqlite/providers.ts | 1 + packages/backend/src/config.ts | 134 ++++++------------ packages/backend/src/db/config-repository.ts | 2 + .../backend/src/services/config-service.ts | 1 + .../quota/__tests__/quota-scheduler.test.ts | 79 ++++++++++- .../src/services/quota/quota-scheduler.ts | 7 + 7 files changed, 136 insertions(+), 89 deletions(-) diff --git a/packages/backend/drizzle/schema/postgres/providers.ts b/packages/backend/drizzle/schema/postgres/providers.ts index 8525e4ec..56b74a35 100644 --- a/packages/backend/drizzle/schema/postgres/providers.ts +++ b/packages/backend/drizzle/schema/postgres/providers.ts @@ -38,6 +38,7 @@ export const providers = pgTable( quotaCheckerEnabled: boolean('quota_checker_enabled').notNull().default(true), quotaCheckerInterval: integer('quota_checker_interval').notNull().default(30), quotaCheckerOptions: text('quota_checker_options'), // JSON or encrypted string + quotaCheckerDisableCooldown: boolean('quota_checker_disable_cooldown').notNull().default(false), // GPU Profile settings — display hint + resolved numeric params // gpu_profile is kept as a display hint; the 4 numeric fields are the source of truth. gpuProfile: text('gpu_profile'), // GPU profile name (e.g. 'H100', 'custom') — display hint only diff --git a/packages/backend/drizzle/schema/sqlite/providers.ts b/packages/backend/drizzle/schema/sqlite/providers.ts index 7630960d..409705b7 100644 --- a/packages/backend/drizzle/schema/sqlite/providers.ts +++ b/packages/backend/drizzle/schema/sqlite/providers.ts @@ -25,6 +25,7 @@ export const providers = sqliteTable( quotaCheckerEnabled: integer('quota_checker_enabled').notNull().default(1), quotaCheckerInterval: integer('quota_checker_interval').notNull().default(30), quotaCheckerOptions: text('quota_checker_options'), // JSON + quotaCheckerDisableCooldown: integer('quota_checker_disable_cooldown').notNull().default(0), // GPU Profile settings — display hint + resolved numeric params // gpu_profile is kept as a display hint; the 4 numeric fields are the source of truth. gpuProfile: text('gpu_profile'), // GPU profile name (e.g. 'H100', 'custom') — display hint only diff --git a/packages/backend/src/config.ts b/packages/backend/src/config.ts index bbd83bc7..ff24cfa6 100644 --- a/packages/backend/src/config.ts +++ b/packages/backend/src/config.ts @@ -196,159 +196,108 @@ const PoeQuotaCheckerOptionsSchema = z.object({ endpoint: z.string().url().optional(), }); +// Common fields shared by every quota checker variant. +const QuotaCheckerBaseSchema = z.object({ + enabled: z.boolean().default(true), + intervalMinutes: z.number().min(1).default(30), + id: z.string().trim().min(1).optional(), + /** + * When true, the quota scheduler will not inject a provider-wide cooldown + * when utilization exceeds the exhaustion threshold. Quota data is still + * fetched and persisted. Circuit-breaker cooldowns (from failures/429s) + * are unaffected — use the provider-level disable_cooldown for those. + * Defaults to false (current behaviour). + */ + disable_quota_cooldown: z.boolean().optional().default(false), +}); + const ProviderQuotaCheckerSchema = z.discriminatedUnion('type', [ - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('naga'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: NagaQuotaCheckerOptionsSchema.optional(), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('synthetic'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: SyntheticQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('nanogpt'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: NanoGPTQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('zai'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: ZAIQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('moonshot'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: MoonshotQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('novita'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: NovitaQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('minimax'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: MiniMaxQuotaCheckerOptionsSchema, }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('openrouter'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: OpenRouterQuotaCheckerOptionsSchema, }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('kilo'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: KiloQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('openai-codex'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: OpenAICodexQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('kimi-code'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: KimiCodeQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('claude-code'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: ClaudeCodeQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('copilot'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: CopilotQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('wisdomgate'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: WisdomGateQuotaCheckerOptionsSchema.optional(), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('apertis'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: ApertisQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('minimax-coding'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: MiniMaxCodingQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('poe'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: PoeQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('gemini-cli'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: GeminiCliQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('antigravity'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: AntigravityQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('neuralwatt'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: NeuralwattQuotaCheckerOptionsSchema.optional().default({}), }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('ollama'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: OllamaQuotaCheckerOptionsSchema, }), - z.object({ + QuotaCheckerBaseSchema.extend({ type: z.literal('zenmux'), - enabled: z.boolean().default(true), - intervalMinutes: z.number().min(1).default(30), - id: z.string().trim().min(1).optional(), options: ZenmuxQuotaCheckerOptionsSchema.optional(), }), ]); @@ -560,6 +509,14 @@ const QuotaConfigSchema = z.object({ enabled: z.boolean().default(true), intervalMinutes: z.number().min(1).default(30), options: z.record(z.string(), z.any()).default({}), + /** + * When true, the quota scheduler will not inject a provider-wide cooldown + * when utilization exceeds the exhaustion threshold. Quota data is still + * fetched and persisted. Circuit-breaker cooldowns (from failures/429s) + * are unaffected — use the provider-level disable_cooldown for those. + * Defaults to false (current behaviour). + */ + disableQuotaCooldown: z.boolean().default(false), }); export const McpServerConfigSchema = z.object({ @@ -829,6 +786,7 @@ function buildProviderQuotaConfigs(config: z.infer enabled: true, intervalMinutes: quotaChecker.intervalMinutes, options, + disableQuotaCooldown: quotaChecker.disable_quota_cooldown === true, }); } diff --git a/packages/backend/src/db/config-repository.ts b/packages/backend/src/db/config-repository.ts index 4db69c57..33184f7e 100644 --- a/packages/backend/src/db/config-repository.ts +++ b/packages/backend/src/db/config-repository.ts @@ -299,6 +299,7 @@ export class ConfigRepository { quotaCheckerOptions: config.quota_checker?.options ? encryptJsonField(config.quota_checker.options) : null, + quotaCheckerDisableCooldown: fromBool(config.quota_checker?.disable_quota_cooldown === true), // GPU Profile settings for inference energy calculation gpuProfile: config.gpu_profile ?? null, gpuRamGb: config.gpu_ram_gb ?? null, @@ -442,6 +443,7 @@ export class ConfigRepository { type: row.quotaCheckerType, enabled: toBool(row.quotaCheckerEnabled), intervalMinutes: row.quotaCheckerInterval, + disable_quota_cooldown: toBool(row.quotaCheckerDisableCooldown), ...(row.quotaCheckerId ? { id: row.quotaCheckerId } : {}), ...(row.quotaCheckerOptions ? { options: decryptJsonField(row.quotaCheckerOptions) } : {}), }; diff --git a/packages/backend/src/services/config-service.ts b/packages/backend/src/services/config-service.ts index 154a2a58..545c7265 100644 --- a/packages/backend/src/services/config-service.ts +++ b/packages/backend/src/services/config-service.ts @@ -445,6 +445,7 @@ export class ConfigService { enabled: true, intervalMinutes: quotaChecker.intervalMinutes, options, + disableQuotaCooldown: quotaChecker.disable_quota_cooldown === true, }); } diff --git a/packages/backend/src/services/quota/__tests__/quota-scheduler.test.ts b/packages/backend/src/services/quota/__tests__/quota-scheduler.test.ts index c1d39ba5..af9786e0 100644 --- a/packages/backend/src/services/quota/__tests__/quota-scheduler.test.ts +++ b/packages/backend/src/services/quota/__tests__/quota-scheduler.test.ts @@ -16,13 +16,17 @@ import type { QuotaConfig } from '../../../config'; const CHECKER_ID = 'quota-persistence-checker'; const makeConfig = ( - overrides: Partial<{ maxUtilizationPercent: number }> & { id?: string; provider?: string } = {} + overrides: Partial<{ maxUtilizationPercent: number; disableQuotaCooldown: boolean }> & { + id?: string; + provider?: string; + } = {} ): QuotaConfig => ({ id: overrides.id ?? CHECKER_ID, provider: overrides.provider ?? 'test-provider', type: 'synthetic', enabled: true, intervalMinutes: 60, + disableQuotaCooldown: overrides.disableQuotaCooldown ?? false, options: { ...(overrides.maxUtilizationPercent !== undefined ? { maxUtilizationPercent: overrides.maxUtilizationPercent } @@ -279,3 +283,76 @@ describe('QuotaScheduler maxUtilizationPercent', () => { expect(isHealthy).toBe(false); }); }); + +describe('QuotaScheduler disableQuotaCooldown', () => { + const PROVIDER = 'disable-quota-cooldown-test-provider'; + + beforeEach(async () => { + await closeDatabase(); + process.env.DATABASE_URL = process.env.PLEXUS_TEST_DB_URL ?? process.env.DATABASE_URL; + initializeDatabase(process.env.DATABASE_URL); + await runMigrations(); + + const db = getDatabase() as any; + const schema = getSchema() as any; + await db.delete(schema.meterSnapshots); + }); + + afterEach(async () => { + QuotaScheduler.getInstance().stop(); + const cooldownManager = CooldownManager.getInstance(); + await cooldownManager.markProviderSuccess(PROVIDER, ''); + await closeDatabase(); + }); + + it('does not inject a quota cooldown when disableQuotaCooldown is true, even at 100% utilization', async () => { + const scheduler = QuotaScheduler.getInstance() as any; + const config = makeConfig({ provider: PROVIDER, disableQuotaCooldown: true }); + scheduler.configs.set('no-quota-cooldown-checker', config); + + await scheduler.applyCooldownsFromResult( + makeMeterResult(100, 'no-quota-cooldown-checker', PROVIDER), + config + ); + + const isHealthy = await CooldownManager.getInstance().isProviderHealthy(PROVIDER, ''); + expect(isHealthy).toBe(true); + }); + + it('still injects a cooldown when disableQuotaCooldown is false (default)', async () => { + const scheduler = QuotaScheduler.getInstance() as any; + const config = makeConfig({ provider: PROVIDER, disableQuotaCooldown: false }); + scheduler.configs.set('with-quota-cooldown-checker', config); + + await scheduler.applyCooldownsFromResult( + makeMeterResult(99, 'with-quota-cooldown-checker', PROVIDER), + config + ); + + const isHealthy = await CooldownManager.getInstance().isProviderHealthy(PROVIDER, ''); + expect(isHealthy).toBe(false); + }); + + it('disableQuotaCooldown does not affect quota data persistence', async () => { + const scheduler = QuotaScheduler.getInstance() as any; + const config = makeConfig({ provider: PROVIDER, disableQuotaCooldown: true }); + scheduler.configs.set('no-quota-cooldown-persist-checker', config); + + const result = makeMeterResult(100, 'no-quota-cooldown-persist-checker', PROVIDER); + await scheduler.persistResult(result); + await scheduler.applyCooldownsFromResult(result, config); + + const db = getDatabase() as any; + const schema = getSchema() as any; + const rows = await db + .select() + .from(schema.meterSnapshots) + .where(eq(schema.meterSnapshots.checkerId, 'no-quota-cooldown-persist-checker')); + + // Meter data was still persisted + expect(rows.length).toBeGreaterThan(0); + // But no cooldown was injected + const isHealthy = await CooldownManager.getInstance().isProviderHealthy(PROVIDER, ''); + expect(isHealthy).toBe(true); + }); +}); diff --git a/packages/backend/src/services/quota/quota-scheduler.ts b/packages/backend/src/services/quota/quota-scheduler.ts index c3fd6d5e..4508ebf1 100644 --- a/packages/backend/src/services/quota/quota-scheduler.ts +++ b/packages/backend/src/services/quota/quota-scheduler.ts @@ -136,6 +136,13 @@ export class QuotaScheduler { ): Promise { if (!result.success || result.meters.length === 0) return; + if (config.disableQuotaCooldown) { + logger.debug( + `Quota checker '${result.checkerId}' has disableQuotaCooldown=true — skipping cooldown injection for provider '${result.provider}'.` + ); + return; + } + const exhaustionThreshold = (config.options.maxUtilizationPercent as number | undefined) ?? DEFAULT_EXHAUSTION_THRESHOLD; const cooldownManager = CooldownManager.getInstance();