Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

## Unreleased

### Fixed
- **`hippo invalidate` tag matching is now EXACT** (incident 2026-06-09: a pattern containing the word "hippo" weakened 10 bystander memories tagged `hippo` while the actual target escaped). A memory's tags now only match when the FULL pattern equals a tag; token-level matching applies to content only. Both the CLI command and the auto-learn-from-git invalidation path inherit the safer contract.

### Added
- **`hippo invalidate --dry-run`** previews exactly which memories would be hit (id + headline) and writes nothing.
- **`hippo invalidate --id <memory-id>`** invalidates exactly one memory (tenant-scoped; pattern and `--id` are mutually exclusive). Pinned memories are never touched and are now reported as skipped.

### Changed
- **Value-less boolean flags no longer swallow a following positional.** `--dry-run` is parsed as boolean everywhere, so `hippo <cmd> --dry-run <arg>` now works in any argument order (previously the argument was silently consumed as the flag's value).

## 1.23.0 (2026-06-08): pluggable embedding providers (bring a frontier embedder)

### Added
Expand Down
59 changes: 50 additions & 9 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,12 @@ async function runViaServerIfAvailable(
}
}

function parseArgs(argv: string[]): { command: string; args: string[]; flags: Record<string, string | boolean | string[]> } {
// Flags that NEVER take a value. Without this, a positional following the
// flag is silently swallowed as its value (`invalidate --dry-run "X"` would
// eat the pattern). Every existing --dry-run consumer reads it as boolean.
const BOOLEAN_FLAGS = new Set(['dry-run']);

export function parseArgs(argv: string[]): { command: string; args: string[]; flags: Record<string, string | boolean | string[]> } {
const [, , command = '', ...rest] = argv;
const args: string[] = [];
const flags: Record<string, string | boolean | string[]> = {};
Expand All @@ -356,7 +361,7 @@ function parseArgs(argv: string[]): { command: string; args: string[]; flags: Re
const key = part.slice(2);
const next = rest[i + 1];

if (!next || next.startsWith('--')) {
if (!next || next.startsWith('--') || BOOLEAN_FLAGS.has(key)) {
// Boolean flag
flags[key] = true;
i++;
Expand Down Expand Up @@ -7824,6 +7829,13 @@ Commands:
graph extract Rebuild the entity/relation graph from consolidated objects
(decisions/policies/customer-notes/project-briefs); idempotent
invalidate "<pattern>" Actively weaken memories matching an old pattern
(content overlap, or a tag EXACTLY equal to the
full pattern - never token-level tag matching)
--id <memory-id> Invalidate exactly one memory (instead of a pattern)
--dry-run Preview what would be hit; writes nothing
Note: a pattern equal to the system tag
'invalidated' re-weakens previously invalidated
memories - preview with --dry-run first
--reason "<why>" Optional: what replaced it
wm <sub> Working memory — bounded buffer for current state
wm push Push a working memory entry
Expand Down Expand Up @@ -7915,7 +7927,9 @@ Examples:
hippo note new "Acme Corp" --text "Renewal call: wants SSO before Q3; champion is the VP Eng"
hippo note list --customer "Acme Corp" --status active
hippo graph extract
hippo invalidate "REST API" --dry-run
hippo invalidate "REST API" --reason "migrated to GraphQL"
hippo invalidate --id mem_a1b2c3d4e5f6 --reason "superseded by new policy"
hippo export memories.json
hippo export --format markdown memories.md
hippo sleep --dry-run
Expand Down Expand Up @@ -8527,23 +8541,50 @@ async function main(): Promise<void> {
case 'invalidate': {
requireInit(hippoRoot);
const target = args[0];
if (!target) {
console.error('Usage: hippo invalidate "<old pattern>" [--reason "<why>"]');
if (flags['id'] === true) {
// Value-less --id must never silently fall through to pattern mode
// (pattern mode writes broadly; an ignored --id reverses user intent).
console.error('--id requires a memory id');
process.exit(1);
}
const onlyId = typeof flags['id'] === 'string' ? (flags['id'] as string) : undefined;
if (typeof flags['dry-run'] === 'string') {
// Unreachable via argv (dry-run is in BOOLEAN_FLAGS); guards
// programmatically-built flags objects.
console.error('--dry-run takes no value');
process.exit(1);
}
const dryRun = flags['dry-run'] === true;
if ((target && onlyId) || (!target && !onlyId)) {
console.error('Usage: hippo invalidate "<old pattern>" [--dry-run] [--reason "<why>"]');
console.error(' hippo invalidate --id <memory-id> [--dry-run] [--reason "<why>"]');
console.error('Pass a pattern OR --id, not both. Tag matching is EXACT: the full pattern must equal a tag.');
process.exit(1);
}
const reason = flags['reason'] as string || null;
const invTarget: InvalidationTarget = {
from: target,
from: target ?? `id:${onlyId}`,
to: reason,
type: 'migration',
};
const result = invalidateMatching(hippoRoot, invTarget, resolveTenantId({}));
if (result.invalidated === 0) {
console.log(`No memories matched "${target}".`);
const result = invalidateMatching(hippoRoot, invTarget, resolveTenantId({}), { dryRun, onlyId });
const label = target ? `"${target}"` : `--id ${onlyId}`;
if (result.dryRun) {
if (result.invalidated === 0) {
console.log(`DRY RUN - no memories would match ${label}.`);
} else {
console.log(`DRY RUN - ${result.invalidated} memories WOULD be invalidated:`);
result.preview.forEach(p => console.log(` ${p.id} ${p.headline}`));
}
} else if (result.invalidated === 0) {
console.log(`No memories matched ${label}.`);
} else {
console.log(`Invalidated ${result.invalidated} memories referencing "${target}".`);
console.log(`Invalidated ${result.invalidated} memories referencing ${label}.`);
result.targets.forEach(id => console.log(` ${id}`));
}
if (result.skippedPinned.length > 0) {
console.log(`Skipped ${result.skippedPinned.length} pinned: ${result.skippedPinned.join(', ')}`);
}
break;
}

Expand Down
82 changes: 62 additions & 20 deletions src/invalidation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ export interface InvalidationTarget {
export interface InvalidationResult {
invalidated: number;
targets: string[]; // IDs of affected memories
skippedPinned: string[]; // matched but pinned — never touched
dryRun: boolean;
preview: { id: string; headline: string }[]; // what was (or would be) hit
}

export interface InvalidationOptions {
/** Evaluate matches but write nothing. */
dryRun?: boolean;
/** Consider ONLY this memory id (no content/tag matching). */
onlyId?: string;
}

/**
Expand Down Expand Up @@ -64,40 +74,72 @@ export function extractInvalidationTarget(message: string): InvalidationTarget |
* - Halves half_life_days
* - Sets confidence to 'stale'
* - Adds 'invalidated' tag
* - Skips pinned memories
* - Skips pinned memories (reported in skippedPinned)
*
* Tag matching is EXACT (2026-06-09 incident): the FULL pattern must equal a
* tag. Token-level matching applies to content only, so a pattern that merely
* CONTAINS a common tag word ("hippo") can no longer mass-weaken every memory
* carrying that tag. Both callers (the CLI `invalidate` command and the
* auto-learn-from-git path) inherit this contract.
*/
export function invalidateMatching(
hippoRoot: string,
target: InvalidationTarget,
tenantId?: string,
options?: InvalidationOptions,
): InvalidationResult {
// L9: tenantId opt-in. When provided, only this tenant's memories are
// considered for weakening. When undefined, behaves as it did pre-1.12.1
// (host-wide invalidation across all tenants in the store).
// options.onlyId resolves by FILTERING this tenant-scoped list — never a
// direct id lookup — so an id from another tenant is invisible here.
const entries = loadAllEntries(hippoRoot, tenantId);
const fromTokens = invalidationTokenize(target.from);
const result: InvalidationResult = { invalidated: 0, targets: [] };
const exactTag = target.from.toLowerCase().trim();
const dryRun = options?.dryRun === true;
const result: InvalidationResult = {
invalidated: 0,
targets: [],
skippedPinned: [],
dryRun,
preview: [],
};

for (const entry of entries) {
if (entry.pinned) continue;

const contentTokens = invalidationTokenize(entry.content);
const tagTokens = entry.tags.map(t => t.toLowerCase());

// Check if the memory references the old pattern
const tokenMatch = matchScore(fromTokens, contentTokens);
const tagMatch = fromTokens.some(t => tagTokens.includes(t));

if (tokenMatch >= 0.5 || tagMatch) {
entry.half_life_days = Math.max(1, Math.floor(entry.half_life_days / 2));
entry.confidence = 'stale';
if (!entry.tags.includes('invalidated')) {
entry.tags.push('invalidated');
}
writeEntry(hippoRoot, entry);
result.invalidated++;
result.targets.push(entry.id);
if (options?.onlyId !== undefined) {
if (entry.id !== options.onlyId) continue;
} else {
const contentTokens = invalidationTokenize(entry.content);
const tagTokens = entry.tags.map(t => t.toLowerCase());

// Check if the memory references the old pattern
const tokenMatch = matchScore(fromTokens, contentTokens);
const tagMatch = tagTokens.includes(exactTag);

if (!(tokenMatch >= 0.5 || tagMatch)) continue;
}

// Pinned check runs AFTER matching so pinned would-be targets are
// observable in skippedPinned (pattern mode and onlyId mode alike).
if (entry.pinned) {
result.skippedPinned.push(entry.id);
continue;
}

result.invalidated++;
result.targets.push(entry.id);
result.preview.push({
id: entry.id,
headline: entry.content.replace(/\s+/g, ' ').slice(0, 60),
});
if (dryRun) continue;

entry.half_life_days = Math.max(1, Math.floor(entry.half_life_days / 2));
entry.confidence = 'stale';
if (!entry.tags.includes('invalidated')) {
entry.tags.push('invalidated');
}
writeEntry(hippoRoot, entry);
}

return result;
Expand Down
100 changes: 100 additions & 0 deletions tests/cli-invalidate.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/**
* CLI integration tests for `hippo invalidate` safety (2026-06-09 fix):
* pattern-XOR-id argument contract, value-less --id rejection, --dry-run
* preview output, and exact-tag matching through the real CLI surface.
*/

import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { mkdtempSync, rmSync, existsSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { execFileSync } from 'node:child_process';
import { initStore, writeEntry, readEntry } from '../src/store.js';
import { createMemory } from '../src/memory.js';

const CLI = resolve(__dirname, '..', 'bin', 'hippo.js');

function runCli(
cwd: string,
args: string[],
opts: { ok?: boolean } = {},
): { stdout: string; stderr: string } {
if (!existsSync(CLI)) {
throw new Error(`bin/hippo.js not found at ${CLI} - run \`npm run build\` first`);
}
try {
const stdout = execFileSync('node', [CLI, ...args], {
cwd,
encoding: 'utf8',
env: { ...process.env, HIPPO_HOME: join(cwd, '.hippo') },
});
return { stdout, stderr: '' };
} catch (err) {
const e = err as { stdout?: string; stderr?: string; status?: number };
if (opts.ok === false) return { stdout: e.stdout ?? '', stderr: e.stderr ?? '' };
throw new Error(`CLI exit ${e.status}: ${e.stderr ?? ''}\nstdout: ${e.stdout ?? ''}`);
}
}

describe('hippo invalidate CLI contract', () => {
let tmpDir: string;
let hippoRoot: string;

beforeEach(() => {
tmpDir = mkdtempSync(join(tmpdir(), 'hippo-cli-invalidate-'));
hippoRoot = join(tmpDir, '.hippo');
initStore(hippoRoot);
});

afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});

it('rejects pattern + --id together', () => {
const { stderr } = runCli(tmpDir, ['invalidate', 'pattern', '--id', 'mem_x'], { ok: false });
expect(stderr).toContain('Pass a pattern OR --id, not both');
});

it('rejects neither pattern nor --id', () => {
const { stderr } = runCli(tmpDir, ['invalidate'], { ok: false });
expect(stderr).toContain('Usage: hippo invalidate');
});

it('rejects a value-less --id instead of falling through to pattern mode', () => {
const { stderr } = runCli(tmpDir, ['invalidate', 'pattern', '--id'], { ok: false });
expect(stderr).toContain('--id requires a memory id');
});

it('--dry-run before the pattern previews without writing (parser allowlist end-to-end)', () => {
const mem = createMemory('REST API uses Bearer tokens everywhere', { tags: ['api'] });
writeEntry(hippoRoot, mem);

const { stdout } = runCli(tmpDir, ['invalidate', '--dry-run', 'REST API']);
expect(stdout).toContain('DRY RUN');
expect(stdout).toContain(mem.id);
const untouched = readEntry(hippoRoot, mem.id);
expect(untouched!.tags).not.toContain('invalidated');
expect(untouched!.confidence).toBe(mem.confidence);
});

it('--id invalidates exactly one memory end-to-end', () => {
const target = createMemory('Unrelated gardening notes', { tags: ['garden'] });
const other = createMemory('Unrelated cooking notes', { tags: ['cooking'] });
writeEntry(hippoRoot, target);
writeEntry(hippoRoot, other);

const { stdout } = runCli(tmpDir, ['invalidate', '--id', target.id]);
expect(stdout).toContain('Invalidated 1 memories');
expect(readEntry(hippoRoot, target.id)!.confidence).toBe('stale');
expect(readEntry(hippoRoot, other.id)!.confidence).toBe(other.confidence);
});

it('a pattern merely CONTAINING a tag word does not hit that tag (incident shape)', () => {
const bystander = createMemory('Weekly grocery budget tracking notes', { tags: ['hippo'] });
writeEntry(hippoRoot, bystander);

const { stdout } = runCli(tmpDir, ['invalidate', 'hippo salience gate experiment']);
expect(stdout).toContain('No memories matched');
expect(readEntry(hippoRoot, bystander.id)!.tags).not.toContain('invalidated');
});
});
45 changes: 45 additions & 0 deletions tests/cli-parse-boolean-flags.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/**
* Locks the BOOLEAN_FLAGS allowlist behavior in parseArgs (2026-06-09
* invalidate-safety fix): a value-less boolean flag followed by a POSITIONAL
* must not swallow the positional as its value. Pre-fix,
* `hippo invalidate --dry-run "REST API"` parsed as
* flags['dry-run']="REST API", args=[] — eating the pattern.
*
* Import note: src/cli.ts runs main() at module load; under vitest argv the
* command resolves to '' (usage print, no exit), same pattern
* cli-context-render-snapshot.test.ts relies on.
*/
import { describe, it, expect } from 'vitest';
import { parseArgs } from '../src/cli.js';

const argv = (...rest: string[]) => ['node', 'hippo', ...rest];

describe('parseArgs boolean-flag allowlist', () => {
it('--dry-run followed by a positional keeps the positional', () => {
const { command, args, flags } = parseArgs(argv('invalidate', '--dry-run', 'REST API'));
expect(command).toBe('invalidate');
expect(flags['dry-run']).toBe(true);
expect(args).toEqual(['REST API']);
});

it('positional before --dry-run parses identically', () => {
const { args, flags } = parseArgs(argv('invalidate', 'REST API', '--dry-run'));
expect(flags['dry-run']).toBe(true);
expect(args).toEqual(['REST API']);
});

it('--dry-run composes with a value flag', () => {
const { args, flags } = parseArgs(
argv('invalidate', '--dry-run', '--id', 'mem_abc123'),
);
expect(flags['dry-run']).toBe(true);
expect(flags['id']).toBe('mem_abc123');
expect(args).toEqual([]);
});

it('non-allowlisted flags keep value semantics', () => {
const { args, flags } = parseArgs(argv('invalidate', '--reason', 'migrated away', 'pattern'));
expect(flags['reason']).toBe('migrated away');
expect(args).toEqual(['pattern']);
});
});
Loading
Loading