diff --git a/README.md b/README.md index a400e81c..241586b7 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Each plugin lives in `plugins/`. The directory name is the install keyword | `linear` | Linear SDK scripting skill for issue, project, team, cycle, and comment workflows. | | `mac-notify` | macOS notifications when a Cline run completes. | | `nanobanana` | Image generation through OpenRouter and Gemini image models. | +| `redis-development` | Redis data modeling, Query Engine, vector search, caching, security, clustering, and observability skills. | | `speak` | Speaks completed Cline replies with ElevenLabs text to speech. | | `typescript-lsp` | TypeScript language service `goto_definition` support. | | `weather-metrics` | Demo weather tool plus runtime metrics hooks. | diff --git a/plugins/redis-development/NOTICE.redis-agent-skills b/plugins/redis-development/NOTICE.redis-agent-skills new file mode 100644 index 00000000..85589eae --- /dev/null +++ b/plugins/redis-development/NOTICE.redis-agent-skills @@ -0,0 +1,24 @@ +This plugin includes Redis workflow skill material adapted from Redis agent skills. + +Source: https://github.com/redis/agent-skills +License: MIT + +Copyright (c) Redis, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plugins/redis-development/README.md b/plugins/redis-development/README.md new file mode 100644 index 00000000..12560fd5 --- /dev/null +++ b/plugins/redis-development/README.md @@ -0,0 +1,46 @@ +# redis-development + +Use Redis development guidance in Cline for data modeling, connection tuning, Query Engine indexes, vector search, semantic caching, clustering, security, observability, and Redis AI product workflows. + +## What It Adds + +The plugin bundles Redis workflow skills for: + +- Choosing Redis data structures and key naming conventions. +- Designing Redis Query Engine indexes and search queries. +- Building vector search and RAG retrieval flows. +- Tuning client connections, pooling, pipelining, timeouts, and client-side caching. +- Planning cluster hash tags, multi-key operations, and replica reads. +- Hardening authentication, ACLs, TLS, network exposure, and dangerous commands. +- Monitoring Redis health with INFO, SLOWLOG, MEMORY, CLIENT, and FT.PROFILE guidance. +- Using Redis LangCache and Redis Agent Memory services. + +## Install + +```bash +cline plugin install redis-development +``` + +For local development from this repository: + +```bash +cline plugin install ./plugins/redis-development --cwd . +``` + +## Example Usage + +Ask Cline to design a Redis key model for a cache-heavy feature, review a Redis Query Engine index, troubleshoot connection pool exhaustion, plan Redis Cluster hash tags, or harden ACL/TLS/network settings before production. + +## Requirements + +No external runtime is required to install the plugin. Some workflows may ask the user to run Redis CLI commands, Redis client libraries, Redis Insight, Redis Cloud setup steps, or SDK examples depending on the user's project and task. + +## Safety Notes + +The bundled skills include live Redis safety guidance: confirm the target environment and ask before destructive, broad, blocking, administrative, credential-changing, SDK/REST write, delete, bulk-read, or production smoke-test operations. + +The plugin does not connect to Redis or start local services at install time. It is an offline skill pack with safety guidance inside the skills. + +## License + +The bundled Redis workflow skills are adapted from Redis agent skill material under the MIT license. See `NOTICE.redis-agent-skills`. diff --git a/plugins/redis-development/index.ts b/plugins/redis-development/index.ts new file mode 100644 index 00000000..e553d3ce --- /dev/null +++ b/plugins/redis-development/index.ts @@ -0,0 +1,10 @@ +import type { AgentPlugin } from "@cline/sdk" + +const plugin: AgentPlugin = { + name: "redis-development", + manifest: { + capabilities: ["skills"], + }, +} + +export default plugin diff --git a/plugins/redis-development/package.json b/plugins/redis-development/package.json new file mode 100644 index 00000000..642822b4 --- /dev/null +++ b/plugins/redis-development/package.json @@ -0,0 +1,19 @@ +{ + "name": "redis-development", + "version": "0.0.0", + "private": true, + "type": "module", + "description": "Cline plugin that bundles Redis development, Query Engine, vector search, caching, security, clustering, and observability skills.", + "cline": { + "plugins": [ + { + "paths": [ + "./index.ts" + ], + "capabilities": [ + "skills" + ] + } + ] + } +} diff --git a/plugins/redis-development/skills/iris-development/SKILL.md b/plugins/redis-development/skills/iris-development/SKILL.md new file mode 100644 index 00000000..a64f887e --- /dev/null +++ b/plugins/redis-development/skills/iris-development/SKILL.md @@ -0,0 +1,92 @@ +--- +name: iris-development +description: Iris is Redis's umbrella for AI-focused products. Use this skill when integrating with the Iris Redis Agent Memory (RAM) data plane on Redis Cloud - recording session events for an AI agent, creating or searching long-term memories, configuring a memory store, or tuning background memory promotion. Code examples use the official `redis-agent-memory` (Python) and `@redis-iris/agent-memory` (TypeScript) SDKs. +license: MIT +metadata: + author: redis + version: "1.0.0" +--- + +# Iris: Redis Agent Memory + +Iris is the umbrella brand for Redis's AI-focused products. This skill currently covers one product in that family: Redis Agent Memory (RAM) - the persistent memory layer for AI agents, delivered as a managed service on Redis Cloud. Additional Iris products will be added as separate sections when they ship. + +Redis Agent Memory exposes a REST/JSON data-plane API with two memory tiers: + +- Session memory - append-only conversation history per session (working memory). +- Long-term memory - semantically searchable records extracted from sessions (or created directly). + +A background promotion worker - managed by Redis Cloud - extracts durable facts from session events and writes them into long-term memory. + +## Official SDKs + +All code samples use the official SDKs: + + +| Language | Package | Class | Install | +| ---------- | -------------------------- | ------------- | ---------------------------------- | +| Python | `redis-agent-memory` | `AgentMemory` | `pip install redis-agent-memory` | +| TypeScript | `@redis-iris/agent-memory` | `AgentMemory` | `npm add @redis-iris/agent-memory` | + + +Both SDKs read the bearer token from `AGENT_MEMORY_API_KEY` and the default store ID from `AGENT_MEMORY_STORE_ID`. The production data-plane URL is `https://gcp-us-east4.memory.redis.io`; the exact URL for your service is also shown in the Cloud console after provisioning. + +## When to Apply + +Reference these guidelines when: + +- Creating a memory service on Redis Cloud ([https://cloud.redis.io/#/agent-memory](https://cloud.redis.io/#/agent-memory)) +- Wiring an agent to call `AgentMemory.add_session_event(...)` / `addSessionEvent(...)` +- Searching long-term memory with `search_long_term_memory(...)` / `searchLongTermMemory(...)` +- Choosing between session events and direct long-term memory writes + +## Rule Categories by Priority + + +| Priority | Category | Impact | Prefix | +| -------- | ----------------------- | ------ | ------------ | +| 1 | Setup & Cloud Service | HIGH | `setup-` | +| 2 | Session Memory / Events | HIGH | `session-` | +| 3 | Long-Term Memory | HIGH | `ltm-` | +| 4 | Memory Promotion | MEDIUM | `promotion-` | + + +## Quick Reference + +### 1. Setup & Cloud Service (HIGH) + +- [`setup-cloud-service`](references/setup-cloud-service.md) - Create a Memory service on Redis Cloud +- [`setup-auth-token`](references/setup-auth-token.md) - Authenticate the SDK with a store API key + +### 2. Session Memory / Events (HIGH) + +- [`session-when-to-use`](references/session-when-to-use.md) - Choose session events vs direct long-term memory +- [`session-add-event`](references/session-add-event.md) - Append a session event correctly +- [`session-retrieval`](references/session-retrieval.md) - Retrieve session memory and individual events + +### 3. Long-Term Memory (HIGH) + +- [`ltm-bulk-create`](references/ltm-bulk-create.md) - Create long-term memories in bulk with idempotent IDs +- [`ltm-search`](references/ltm-search.md) - Search long-term memory semantically with filters +- [`ltm-organize`](references/ltm-organize.md) - Organize records with namespace, ownerId, topics, and memoryType + +### 4. Memory Promotion (MEDIUM) + +- [`promotion-overview`](references/promotion-overview.md) - How background promotion works + +## How to Use + +Read individual rule files under `references/` for detailed explanations and code examples: + +``` +references/setup-cloud-service.md +references/session-add-event.md +references/promotion-overview.md +``` + +Each rule file contains: + +- Brief explanation of why it matters +- Correct example(s) with Python and TypeScript SDK code +- Either an "Incorrect" example or "When to use / When NOT needed" guidance +- Additional context and references \ No newline at end of file diff --git a/plugins/redis-development/skills/iris-development/references/ltm-bulk-create.md b/plugins/redis-development/skills/iris-development/references/ltm-bulk-create.md new file mode 100644 index 00000000..7f7ba92d --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/ltm-bulk-create.md @@ -0,0 +1,113 @@ + +## Create Long-Term Memories in Bulk with Idempotent IDs + +`bulk_create_long_term_memories` (Python) / `bulkCreateLongTermMemories` (TypeScript) accepts up to 100 records per call. The client supplies the `id` for each record so a retry never creates a duplicate. The response splits into `created` (IDs that landed) and `errors` (per-ID failures). + +Correct: Generate a deterministic ID per logical fact and batch up to 100. + +Python: + +```python +import uuid +from redis_agent_memory import AgentMemory, models + +def upsert_facts(agent_memory: AgentMemory, facts: list[dict]): + # Cap at 100 per call - the API enforces this. + res = agent_memory.bulk_create_long_term_memories(memories=[ + { + "id": fact["id"], # stable, deterministic + "text": fact["text"], # 1-50000 chars + "memory_type": fact.get("memory_type", models.MemoryType.SEMANTIC), + "session_id": fact.get("session_id"), + "owner_id": fact.get("owner_id"), + "namespace": fact.get("namespace"), + "topics": fact.get("topics", []), + } + for fact in facts[:100] + ]) + # res.created = [...ids...], res.errors = [BulkOperationError(...)] + return res + +# Deterministic IDs make retries safe: same fact -> same id -> no duplicate. +facts = [{ + "id": f"user-42-pref-{uuid.uuid5(uuid.NAMESPACE_OID, 'theme:dark')}", + "text": "User 42 prefers dark mode.", + "owner_id": "user-42", + "topics": ["profile", "ui-preferences"], +}] +upsert_facts(agent_memory, facts) +``` + +TypeScript: + +```typescript +import { AgentMemory } from "@redis-iris/agent-memory"; + +async function upsertFacts( + agentMemory: AgentMemory, + facts: Array<{ + id: string; text: string; + memoryType?: "semantic" | "episodic" | "message"; + sessionId?: string; ownerId?: string; namespace?: string; + topics?: string[]; + }>, +) { + const res = await agentMemory.bulkCreateLongTermMemories({ + memories: facts.slice(0, 100).map((f) => ({ + id: f.id, + text: f.text, + memoryType: f.memoryType ?? "semantic", + sessionId: f.sessionId, + ownerId: f.ownerId, + namespace: f.namespace, + topics: f.topics ?? [], + })), + }); + // res.created: string[], res.errors?: Array<{id: string; error: string}> + return res; +} +``` + +Incorrect: One call per memory, or random IDs on every retry. + +```python +# Bad: N round-trips + N embedding calls - slow and hammers your rate limit. +for fact in facts: + agent_memory.bulk_create_long_term_memories(memories=[{ + "id": str(uuid.uuid4()), # <-- new id on every retry -> duplicates on transient failures + "text": fact["text"], + }]) +``` + +Partial-success contract - always inspect `errors`: + +```python +res = upsert_facts(agent_memory, facts) +if res.errors: + for err in res.errors: + log.warning("LTM create failed", id=err.id, reason=err.error) + # res.created IS persisted; do not retry those. + failed_ids = {e.id for e in res.errors} + retry_later([f for f in facts if f["id"] in failed_ids]) +``` + +```typescript +const res = await upsertFacts(agentMemory, facts); +if (res.errors?.length) { + for (const err of res.errors) { + console.warn("LTM create failed", err.id, err.error); + } + const failedIds = new Set(res.errors.map((e) => e.id)); + await retryLater(facts.filter((f) => failedIds.has(f.id))); +} +``` + +Constraints: +- `memories`: 1-100 items per call. +- `id`: 1-64 chars, `[a-zA-Z0-9-]`. +- `text`: 1-50000 chars. +- `memory_type` / `memoryType`: `semantic` | `episodic` | `message`. +- `topics`: up to 50, each 1-100 chars. +- TTL: defaults to 1 year (`31_536_000` seconds) unless the store's long-term-memory TTL overrides it. + +To update a record's text or tags later, use `update_long_term_memory(memory_id=...)` / `updateLongTermMemory(memoryId, ...)` rather than re-creating with the same ID. diff --git a/plugins/redis-development/skills/iris-development/references/ltm-organize.md b/plugins/redis-development/skills/iris-development/references/ltm-organize.md new file mode 100644 index 00000000..c4690d41 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/ltm-organize.md @@ -0,0 +1,125 @@ + +## Organize Long-Term Memory with namespace, ownerId, topics, and memoryType + +LTM records carry four structured fields that exist purely to scope search. They cost nothing extra to populate at write time and make every later search call faster and more precise. + +| Field | Type | Purpose | Typical use | +|---|---|---|---| +| `owner_id` / `ownerId` | 1-64 chars `[a-zA-Z0-9-]` | The user/agent the memory is *about* | Multi-tenant scoping - always set this for per-user memories | +| `namespace` | 1-64 chars `[a-zA-Z0-9-]` | Logical bucket within a store | Separate `profile` facts from `interactions` from `tools` | +| `topics` | List of up to 50 tags, each 1-100 chars | Categorical labels | `["preferences", "ui"]`, `["incident", "p1"]` | +| `memory_type` / `memoryType` | `semantic` \| `episodic` \| `message` | What the record *is* | See below | + +`memory_type` semantics: +- `semantic` - a durable fact ("user prefers dark mode"). Cheapest to keep around long-term; survives across sessions. +- `episodic` - something that happened at a point in time ("user asked about pricing on 2026-05-10"). Pair with `created_at` filters. +- `message` - a raw conversational turn that was deemed worth retaining verbatim. + +Correct: Populate every applicable field at create time. + +Python: + +```python +from redis_agent_memory import models + +agent_memory.bulk_create_long_term_memories(memories=[ + { + "id": "user-42-pref-theme", + "text": "User 42 prefers dark mode in the dashboard.", + "memory_type": models.MemoryType.SEMANTIC, + "owner_id": "user-42", + "namespace": "preferences", + "topics": ["ui", "theme"], + }, + { + "id": "user-42-incident-7821", + "text": "User 42 hit a 500 on /api/checkout on 2026-05-10 and was refunded.", + "memory_type": models.MemoryType.EPISODIC, + "owner_id": "user-42", + "namespace": "interactions", + "topics": ["incident", "billing"], + }, +]) +``` + +TypeScript: + +```typescript +await agentMemory.bulkCreateLongTermMemories({ + memories: [ + { + id: "user-42-pref-theme", + text: "User 42 prefers dark mode in the dashboard.", + memoryType: "semantic", + ownerId: "user-42", + namespace: "preferences", + topics: ["ui", "theme"], + }, + { + id: "user-42-incident-7821", + text: "User 42 hit a 500 on /api/checkout on 2026-05-10 and was refunded.", + memoryType: "episodic", + ownerId: "user-42", + namespace: "interactions", + topics: ["incident", "billing"], + }, + ], +}); +``` + +Later searches can then scope cheaply: + +Python: + +```python +from datetime import datetime, timedelta, timezone + +# All preferences for one user +agent_memory.search_long_term_memory( + filter_={"owner_id": {"eq": "user-42"}, "namespace": {"eq": "preferences"}}, +) + +# Incidents across all users in the last 7 days +seven_days_ago = datetime.now(timezone.utc) - timedelta(days=7) +agent_memory.search_long_term_memory( + text="checkout failure", + filter_={ + "topics": {"all": ["incident", "billing"]}, + "created_at": {"gte": seven_days_ago}, # tz-aware UTC datetime + }, +) +``` + +TypeScript: + +```typescript +// All preferences for one user +await agentMemory.searchLongTermMemory({ + filter: { ownerId: { eq: "user-42" }, namespace: { eq: "preferences" } }, +}); + +// Incidents across all users in the last 7 days +const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); +await agentMemory.searchLongTermMemory({ + text: "checkout failure", + filter: { + topics: { all: ["incident", "billing"] }, + createdAt: { gte: sevenDaysAgo }, // Date + }, +}); +``` + +Incorrect: Stuffing all of these into the `text` field. + +```python +# Bad: structured signals hidden inside free text. Search can't filter on them +# without an LLM re-parse, and similarity threshold becomes the only knob. +agent_memory.bulk_create_long_term_memories(memories=[{ + "id": "fact-1", + "text": "[owner=user-42][namespace=preferences][topic=ui] prefers dark mode", +}]) +``` + +Updating organization later: `update_long_term_memory(memory_id=..., ...)` / `updateLongTermMemory(memoryId, ...)` accepts `namespace`, `owner_id`, `session_id`, `topics`, and `memory_type`. To clear a field, send an empty string (`""`) - omitting the field leaves it unchanged. + +Avoid leakage between owners. If a record can be attributed to one user, set `owner_id`. A search request without an `owner_id` filter will happily return facts from any user in the same store. diff --git a/plugins/redis-development/skills/iris-development/references/ltm-search.md b/plugins/redis-development/skills/iris-development/references/ltm-search.md new file mode 100644 index 00000000..ae0403e6 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/ltm-search.md @@ -0,0 +1,124 @@ + +## Search Long-Term Memory Semantically with Filters + +`search_long_term_memory(...)` (Python) / `searchLongTermMemory(...)` (TypeScript) runs a vector search over LTM records and applies structured filters in the same call. Combining both is the supported path - do not pull a wide vector result and filter on the client. + +Correct: Pre-filter by the structured fields you already know, then rank by semantic similarity. + +Python: + +```python +from redis_agent_memory import AgentMemory, models + +def recall( + agent_memory: AgentMemory, + *, + owner_id: str, + query: str, + namespace: str | None = None, + k: int = 5, +): + filt = { + "owner_id": {"eq": owner_id}, + "memory_type": {"in": ["semantic", "episodic"]}, + } + if namespace is not None: + filt["namespace"] = {"eq": namespace} + + res = agent_memory.search_long_term_memory( + text=query, # embedded server-side + similarity_threshold=0.7, # normalized cosine, 0-1 + filter_op=models.FilterConjunction.ALL, # AND across filter keys + filter_=filt, # NB: trailing underscore - `filter` is reserved in Python + limit=k, # 1-100, default 10 + ) + return res.memories +``` + +TypeScript: + +```typescript +import { AgentMemory } from "@redis-iris/agent-memory"; + +async function recall( + agentMemory: AgentMemory, + args: { ownerId: string; query: string; namespace?: string; k?: number }, +) { + const res = await agentMemory.searchLongTermMemory({ + text: args.query, + similarityThreshold: 0.7, + filterOp: "all", // AND across filter keys + filter: { + ownerId: { eq: args.ownerId }, + ...(args.namespace ? { namespace: { eq: args.namespace } } : {}), + memoryType: { in: ["semantic", "episodic"] }, + }, + limit: args.k ?? 5, + }); + return res.memories; +} +``` + +Incorrect: Querying with only `text` and filtering client-side. + +```python +# Bad: pulls up to 100 unrelated records per user, then re-filters in Python. +# Pays the vector-search cost on the full store, and capped at 100 results +# you may miss the one you needed. +hits = agent_memory.search_long_term_memory(text=query, limit=100).memories +for m in hits: + if m.owner_id == owner_id and m.namespace == namespace: + ... +``` + +Filter operators (per field): + +| Field | Operators | +|---|---| +| `session_id`, `owner_id`, `namespace` | `eq`, `ne`, `in`, `all` | +| `topics`, `memory_type` | `eq`, `ne`, `in`, `all` (tag filter) | +| `created_at` | `gt`, `lt`, `gte`, `lte`, `eq` (tz-aware `datetime` / `Date`) | + +`filter_op` / `filterOp` controls how the top-level filter fields combine: `"all"` (default, AND) or `"any"` (OR). Inside one field, `eq` / `ne` / `in` / `all` are mutually exclusive - set exactly one. + +Similarity threshold: Normalized cosine similarity (0-1). Start at 0.7 and tune per workload - too high returns empty pages; too low returns noise. + +Pagination: Pass `next_page_token` / `nextPageToken` back verbatim. Don't decode it; the server may change the encoding. + +```python +def iter_results(agent_memory, *, query: str, owner_id: str): + token = None + while True: + page = agent_memory.search_long_term_memory( + text=query, + filter_={"owner_id": {"eq": owner_id}}, + limit=50, + page_token=token, + ) + yield from page.memories + token = page.next_page_token + if not token: + return +``` + +```typescript +async function* iterResults( + agentMemory: AgentMemory, + args: { query: string; ownerId: string }, +) { + let pageToken: string | undefined; + while (true) { + const page = await agentMemory.searchLongTermMemory({ + text: args.query, + filter: { ownerId: { eq: args.ownerId } }, + limit: 50, + pageToken, + }); + yield* page.memories; + if (!page.nextPageToken) return; + pageToken = page.nextPageToken; + } +} +``` + +No-query browsing: Omit `text` to apply only the structured filters (vector ranking is skipped, results are returned in record order). diff --git a/plugins/redis-development/skills/iris-development/references/promotion-overview.md b/plugins/redis-development/skills/iris-development/references/promotion-overview.md new file mode 100644 index 00000000..c5da1c91 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/promotion-overview.md @@ -0,0 +1,90 @@ + +## Understand Background Memory Promotion + +Every successful `add_session_event` / `addSessionEvent` enqueues a promote-working-memory job, fire-and-forget. The data plane never blocks on the LLM call; Redis Cloud's worker pool consumes the job, reads the session's events, calls an LLM to extract durable facts, and writes resulting records into long-term memory. + +``` +Agent -> Data plane: addSessionEvent +Data plane -> Job queue: enqueue +Data plane -> Agent: 200 OK +Worker -> Job queue: poll +Worker -> Data plane: read session +Worker -> LLM: call LLM +Worker -> Long-term memory: write LTM +``` + +### Deduplication window + +Submitting a job per event would mean an LLM call per turn. To prevent that, the worker groups events into time windows. Jobs whose deduplication key would collide are run only once for that window. + +- Two events landing in the same window for the same session share a deduplication key, so only one promotion job runs for that bucket. +- Window: 5 minutes (managed by Redis Cloud - not user-configurable today). +- The job is delayed until the end of the window so it sees every event in that bucket. + +### Eventually consistent - design for it + +After an `add_session_event` returns 200, a `search_long_term_memory` for the extracted facts may not see them for *up to one deduplication window plus the LLM round-trip*. Don't assert synchronously in tests; poll. + +Python: + +```python +import time +from redis_agent_memory import AgentMemory + +def wait_for_ltm( + agent_memory: AgentMemory, + *, + query: str, + owner_id: str, + timeout_s: float = 30, +): + deadline = time.monotonic() + timeout_s + while time.monotonic() < deadline: + hits = agent_memory.search_long_term_memory( + text=query, + filter_={"owner_id": {"eq": owner_id}}, + limit=5, + ).memories + if hits: + return hits + time.sleep(1.0) + raise AssertionError("promotion did not materialize in time") +``` + +TypeScript: + +```typescript +import { AgentMemory } from "@redis-iris/agent-memory"; + +async function waitForLtm( + agentMemory: AgentMemory, + args: { query: string; ownerId: string; timeoutMs?: number }, +) { + const deadline = Date.now() + (args.timeoutMs ?? 30_000); + while (Date.now() < deadline) { + const res = await agentMemory.searchLongTermMemory({ + text: args.query, + filter: { ownerId: { eq: args.ownerId } }, + limit: 5, + }); + if (res.memories.length) return res.memories; + await new Promise((r) => setTimeout(r, 1000)); + } + throw new Error("promotion did not materialize in time"); +} +``` + +Incorrect: Assuming LTM is updated synchronously with the session write. + +```python +# Bad: race. The promotion job is enqueued but the worker hasn't run yet. +agent_memory.add_session_event(...) +results = agent_memory.search_long_term_memory(text="...").memories +assert results, "expected the new fact to be retrievable" # flaky +``` + +### What if a promotion fails? + +- Submission errors are logged on the data plane but do not fail the write - `add_session_event` still returns 200. The trade-off is that a queue outage silently delays promotion until Cloud's monitoring picks it up. +- Worker-side failures (LLM timeout, embedding-provider 429) are retried by the workflow engine. +- Sessions that have stopped receiving events may keep trailing turns un-promoted until the next event for that session arrives. diff --git a/plugins/redis-development/skills/iris-development/references/session-add-event.md b/plugins/redis-development/skills/iris-development/references/session-add-event.md new file mode 100644 index 00000000..fd320323 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/session-add-event.md @@ -0,0 +1,121 @@ + +## Append a Session Event Correctly + +`AgentMemory.add_session_event(...)` (Python) / `agentMemory.addSessionEvent(...)` (TypeScript) appends a single event to a session. The session is created on first write; if `session_id` / `sessionId` is omitted the server generates one (32-char UUID without dashes) and returns it on the response. Every successful write also enqueues a promotion job - so payload quality directly affects what lands in long-term memory. + +Correct: Pass `actor_id`, `role`, `content`, and a tz-aware UTC `created_at` on every turn. Carry the same `session_id` for the whole conversation. + +Python - `created_at` is a `datetime.datetime` (UTC, tz-aware): + +```python +from datetime import datetime, timezone +from redis_agent_memory import AgentMemory, models + +def append_event( + agent_memory: AgentMemory, + *, + session_id: str, + actor_id: str, + role: models.MessageRole, + text: str, + metadata: dict | None = None, +): + return agent_memory.add_session_event( + session_id=session_id, # client-supplied - keeps the turn ordered with prior turns + actor_id=actor_id, # who said this (user-42, agent-1, system) + role=role, # MessageRole.USER | .ASSISTANT | .SYSTEM + content=[{"text": text}], # list of typed content parts + created_at=datetime.now(timezone.utc), # tz-aware UTC datetime - required + metadata=metadata, # any JSON, <= 16 KB + ).event # -> server-assigned eventId, etc. + +append_event( + agent_memory, + session_id="chat-2026-05-18-42", + actor_id="user-42", + role=models.MessageRole.USER, + text="What did we agree on yesterday?", +) +``` + +TypeScript - `createdAt` is a `Date` (SDK serializes to UTC ISO-8601): + +```typescript +import { AgentMemory } from "@redis-iris/agent-memory"; + +async function appendEvent( + agentMemory: AgentMemory, + args: { + sessionId: string; + actorId: string; + role: "USER" | "ASSISTANT" | "SYSTEM"; + text: string; + metadata?: Record; + }, +) { + const res = await agentMemory.addSessionEvent({ + sessionId: args.sessionId, + actorId: args.actorId, + role: args.role, + content: [{ text: args.text }], + createdAt: new Date(), // UTC Date - required + metadata: args.metadata, + }); + return res.event; // server-assigned eventId, etc. +} + +await appendEvent(agentMemory, { + sessionId: "chat-2026-05-18-42", + actorId: "user-42", + role: "USER", + text: "What did we agree on yesterday?", +}); +``` + +Incorrect: Letting the server generate a new `session_id` on every turn, or passing a naive (tz-less) datetime in Python. + +```python +from datetime import datetime + +# Bad: omitting session_id on every call creates a new session per turn, +# so the session memory contains exactly one event and promotion has no +# context to extract from. +agent_memory.add_session_event( + actor_id="user-42", + role=models.MessageRole.USER, + content=[{"text": msg}], + created_at=datetime.now(), # <-- naive datetime; ambiguous timezone. + # Use datetime.now(timezone.utc). +) +``` + +Constraints worth remembering: +- `store_id`, `session_id`, `actor_id`: 1-64 chars, `[a-zA-Z0-9-]` only. +- `role`: one of `USER`, `ASSISTANT`, `SYSTEM`. +- `content`: list of typed parts; today only `{"text": "..."}` is supported. +- `created_at` / `createdAt`: tz-aware UTC `datetime` (Python) or `Date` (TypeScript). The SDKs serialize to ISO-8601 on the wire. +- `metadata`: any valid JSON document, <= 16 KB. +- Session TTL is governed by the store's short-memory TTL (configured at store creation). Each new event refreshes the TTL on the session key. + +The response (`res.event` / `result.event`) includes the server-generated `event_id` / `eventId` (32-char UUID without dashes) - store it if you might need `delete_session_event` / `deleteSessionEvent` later. It also includes a `system_timestamp` / `systemTimestamp` (set by the data plane on ingestion) alongside the client-supplied `created_at` - see [`session-retrieval`](session-retrieval.md) for how to use the two timestamps. + +### Async (Python) + +The Python SDK exposes an `_async` variant for every method when used inside an `async` function: + +```python +import asyncio +from datetime import datetime, timezone + +async def main(): + async with AgentMemory(URL, store_id=SID, api_key=KEY) as agent_memory: + await agent_memory.add_session_event_async( + session_id="chat-1", + actor_id="user-42", + role=models.MessageRole.USER, + content=[{"text": "hi"}], + created_at=datetime.now(timezone.utc), + ) + +asyncio.run(main()) +``` diff --git a/plugins/redis-development/skills/iris-development/references/session-retrieval.md b/plugins/redis-development/skills/iris-development/references/session-retrieval.md new file mode 100644 index 00000000..4e9b34c6 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/session-retrieval.md @@ -0,0 +1,90 @@ + +## Retrieve Session Memory and Individual Events + +The SDK exposes three read paths against session memory; pick the narrowest one for the job. + +| Python | TypeScript | Returns | Use when | +|---|---|---|---| +| `get_session_memory(session_id=...)` | `getSessionMemory(sessionId)` | All events for the session in order, plus `owner_id` | Rebuilding the prompt context for a conversation | +| `get_session_event(session_id=..., event_id=...)` | `getSessionEvent(sessionId, eventId)` | One event | You already have the `eventId` (e.g. from `addSessionEvent` response) | +| `list_sessions(limit=..., page_token=...)` | `listSessions({limit, pageToken})` | Page of session IDs + `total` | Admin/debug listing of sessions in a store | + +The session's `owner_id` is set from the first event's `actor_id` and is immutable for the lifetime of the session. + +Correct: Fetch the whole session when reconstructing the agent's working context. + +Python: + +```python +def load_session(agent_memory, session_id: str) -> list: + res = agent_memory.get_session_memory(session_id=session_id) + # res.session_id, res.owner_id, res.events (ordered by created_at) + return res.events +``` + +TypeScript: + +```typescript +async function loadSession(agentMemory: AgentMemory, sessionId: string) { + const res = await agentMemory.getSessionMemory(sessionId); + // res.sessionId, res.ownerId, res.events (ordered by createdAt) + return res.events; +} +``` + +Two timestamps on each event. Every `SessionEvent` in the response carries: + +- `created_at` / `createdAt` - the client-supplied UTC timestamp you passed at write time. This is what the agent considers "when the turn happened" and what events are ordered by. +- `system_timestamp` / `systemTimestamp` - a server-set UTC timestamp recording when the data plane ingested the event. Useful for diagnostics (e.g. clock skew between agent and server, or detecting replayed events with stale `created_at` values). + +Both are `datetime` (Python) / `Date` (TypeScript) on the SDK side; serialized as UTC ISO-8601 on the wire. + +Correct: Page through sessions for admin tools. + +Python: + +```python +def iter_session_ids(agent_memory): + token = None + while True: + page = agent_memory.list_sessions(limit=200, page_token=token) + yield from page.sessions + token = page.next_page_token + if not token: + return +``` + +TypeScript: + +```typescript +async function* iterSessionIds(agentMemory: AgentMemory) { + let pageToken: string | undefined; + while (true) { + const page = await agentMemory.listSessions({ limit: 200, pageToken }); + yield* page.sessions; + if (!page.nextPageToken) return; + pageToken = page.nextPageToken; + } +} +``` + +Incorrect: Paging the full session list to find one event you already have an ID for. + +```python +# Bad: O(sessions) just to find one eventId you already have. +for sid in iter_session_ids(agent_memory): + for ev in load_session(agent_memory, sid): + if ev.event_id == target_event_id: + return ev +``` + +Use `get_session_event` / `getSessionEvent` instead - it is an O(1) lookup. + +Pagination limits: +- `list_sessions.limit` defaults to 100, max 1000. +- `next_page_token` / `nextPageToken` is opaque - pass it back verbatim, don't try to decode it. + +Deletion: +- `delete_session_memory(session_id=...)` / `deleteSessionMemory(sessionId)` removes the entire session and all its events. +- `delete_session_event(session_id=..., event_id=...)` / `deleteSessionEvent(sessionId, eventId)` removes one event. +- Already-promoted long-term memories are not affected - delete those separately via `bulk_delete_long_term_memories` / `bulkDeleteLongTermMemories`. diff --git a/plugins/redis-development/skills/iris-development/references/session-when-to-use.md b/plugins/redis-development/skills/iris-development/references/session-when-to-use.md new file mode 100644 index 00000000..a9b5e1c4 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/session-when-to-use.md @@ -0,0 +1,103 @@ + +## Choose Session Events vs Long-Term Memory + +Redis Agent Memory has two tiers. They serve different jobs - picking the wrong one is the single biggest source of cost and correctness problems. + +| Tier | What it stores | Retrieval | Lifetime | Cost shape | +|---|---|---|---|---| +| Session memory | Raw, ordered conversation events for one session | Whole session or by `eventId` | Session-scoped TTL (configured at store creation) | Cheap writes, no LLM cost on the write path | +| Long-term memory | Extracted facts/summaries/messages | Semantic search across sessions | Default 1 year TTL | Each promotion runs an LLM call | + +Correct: Append every turn of the conversation as a session event. Let the background promotion worker decide what becomes long-term memory. + +Python: + +```python +from datetime import datetime, timezone +from redis_agent_memory import AgentMemory, models + +# Every user/assistant turn -> add_session_event. That's it. +agent_memory.add_session_event( + session_id=session_id, + actor_id="user-42", + role=models.MessageRole.USER, + content=[{"text": user_msg}], + created_at=datetime.now(timezone.utc), +) +agent_memory.add_session_event( + session_id=session_id, + actor_id="agent-1", + role=models.MessageRole.ASSISTANT, + content=[{"text": reply}], + created_at=datetime.now(timezone.utc), +) +# Promotion happens asynchronously - see promotion-overview. +``` + +TypeScript: + +```typescript +await agentMemory.addSessionEvent({ + sessionId: sessionId, + actorId: "user-42", + role: "USER", + content: [{ text: userMsg }], + createdAt: new Date(), +}); +await agentMemory.addSessionEvent({ + sessionId: sessionId, + actorId: "agent-1", + role: "ASSISTANT", + content: [{ text: reply }], + createdAt: new Date(), +}); +``` + +Correct: Write to long-term memory directly when you already have a structured fact and don't want to pay for extraction. + +Python: + +```python +# Pre-known fact - skip the LLM and write LTM directly. +agent_memory.bulk_create_long_term_memories(memories=[ + { + "id": "user-42-timezone", + "text": "User 42 is in Europe/Sofia (UTC+2/+3).", + "memory_type": models.MemoryType.SEMANTIC, + "owner_id": "user-42", + "topics": ["profile", "timezone"], + }, +]) +``` + +TypeScript: + +```typescript +await agentMemory.bulkCreateLongTermMemories({ + memories: [ + { + id: "user-42-timezone", + text: "User 42 is in Europe/Sofia (UTC+2/+3).", + memoryType: "semantic", + ownerId: "user-42", + topics: ["profile", "timezone"], + }, + ], +}); +``` + +Incorrect: Using long-term memory as the conversation buffer. + +```python +# Bad: each turn pays for embedding + LTM write, and the agent loses turn order. +for turn in conversation: + agent_memory.bulk_create_long_term_memories(memories=[{ + "id": f"{session_id}-{turn.idx}", + "text": turn.text, + "memory_type": models.MemoryType.MESSAGE, + }]) +``` + +Why it's bad: LTM is vector-indexed (cost per write) and unordered (you re-paginate to reconstruct a session). Session memory is append-only and keeps `createdAt` order for free. + +Rule of thumb: if you'd want to retrieve it in a *different* future conversation, it belongs in LTM (usually via promotion). If you only need it for the current turn or the rest of this session, it stays in session memory. diff --git a/plugins/redis-development/skills/iris-development/references/setup-auth-token.md b/plugins/redis-development/skills/iris-development/references/setup-auth-token.md new file mode 100644 index 00000000..fd7d90b1 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/setup-auth-token.md @@ -0,0 +1,80 @@ + +## Authenticate the SDK with a Store API Key + +Every data-plane request carries `Authorization: Bearer `. The SDKs add this header for you - just pass the key (and store ID) at client construction. Both SDKs follow the same convention: + +| Field | Python | TypeScript | Environment variable | +|---|---|---|---| +| Server URL | `server_url` (1st positional) | `serverURL` | (set yourself, e.g. `AGENT_MEMORY_BASE_URL`) | +| API key | `api_key` | `apiKey` | `AGENT_MEMORY_API_KEY` | +| Store ID | `store_id` (global) | `storeId` (global) | `AGENT_MEMORY_STORE_ID` | + +`store_id` / `storeId` is a *global parameter* - set it on the client once and every per-store operation uses it by default. You can still override it per call. + +Correct: Read the key from a secrets manager (or environment) and construct the client once per process. + +Python: + +```python +import os +from redis_agent_memory import AgentMemory + +# Construct once at startup; reuse across requests. +# The `with` block ensures the underlying httpx client is closed cleanly. +def make_client() -> AgentMemory: + return AgentMemory( + os.environ["AGENT_MEMORY_BASE_URL"], # e.g. https://gcp-us-east4.memory.redis.io + store_id=os.environ["AGENT_MEMORY_STORE_ID"], + api_key=os.environ["AGENT_MEMORY_API_KEY"], + ) + +with make_client() as agent_memory: + agent_memory.health() +``` + +TypeScript: + +```typescript +import { AgentMemory } from "@redis-iris/agent-memory"; + +// Construct once at module scope; the SDK is safe to share across requests. +export const agentMemory = new AgentMemory({ + serverURL: process.env.AGENT_MEMORY_BASE_URL!, + storeId: process.env.AGENT_MEMORY_STORE_ID!, + apiKey: process.env.AGENT_MEMORY_API_KEY!, +}); + +await agentMemory.health(); +``` + +Incorrect: Hard-coding the key, building the client per request, or constructing the bearer header by hand. + +```python +# Bad: key in source - leaks through git history and any traceback log. +agent_memory = AgentMemory( + "https://gcp-us-east4.memory.redis.io", + store_id="01HZ...", + api_key="example-store-api-key-do-not-use", # <-- never +) + +# Bad: new client per request - wastes the underlying TCP/TLS pool. +def handle(req): + with AgentMemory(URL, store_id=SID, api_key=KEY) as agent_memory: + return agent_memory.get_session_memory(session_id=req.session_id) +``` + +```typescript +// Bad: rebuilding the Authorization header manually defeats the SDK's typing, +// retry, and error-class machinery. +const res = await fetch(`${URL}/v1/stores/${SID}/session-memory/events`, { + method: "POST", + headers: { Authorization: `Bearer ${KEY}` }, + body: JSON.stringify(event), +}); +``` + +Rotation: Regenerate the key from the Cloud console. There is no short-lived-token flow - rotation is the only mitigation if a key leaks. + +Per-operation override: Both SDKs accept a `store_id` / `storeId` argument on every call, which overrides the global. Use this when one process talks to multiple stores; do not use it to "scope" calls - global is fine for single-store apps. + +Reference: [Redis Cloud Agent Memory](https://cloud.redis.io/#/agent-memory) diff --git a/plugins/redis-development/skills/iris-development/references/setup-cloud-service.md b/plugins/redis-development/skills/iris-development/references/setup-cloud-service.md new file mode 100644 index 00000000..c5ebaf68 --- /dev/null +++ b/plugins/redis-development/skills/iris-development/references/setup-cloud-service.md @@ -0,0 +1,77 @@ + +## Create a Memory Service on Redis Cloud + +Redis Cloud provisions the memory store, the backing Redis database, the background promotion worker, and the LLM/embedding provider credentials. Each store gets a unique `storeId` and a store API key used as a bearer token on every data-plane request. + +Correct: Provision through the Redis Cloud Agent Memory console. + +1. Sign in at [https://cloud.redis.io/#/agent-memory](https://cloud.redis.io/#/agent-memory). +2. Click New service and pick the correct settings for the user. +3. After provisioning, copy three values from the console: + - Server URL - the production data-plane URL is `https://gcp-us-east4.memory.redis.io` (your exact URL is shown in the Cloud console) + - Store ID - 32-character UUID without dashes + - Store API key - Bearer token (treat like a secret) +4. Export them so the SDKs can read them from the environment: + +```bash +export AGENT_MEMORY_BASE_URL="https://gcp-us-east4.memory.redis.io" +export AGENT_MEMORY_STORE_ID="" +export AGENT_MEMORY_API_KEY="" +``` + +5. Install the SDK and run a smoke test. + +Python - `pip install redis-agent-memory`: + +```python +import os +from datetime import datetime, timezone +from redis_agent_memory import AgentMemory, models + +with AgentMemory( + os.environ["AGENT_MEMORY_BASE_URL"], + store_id=os.environ["AGENT_MEMORY_STORE_ID"], + api_key=os.environ["AGENT_MEMORY_API_KEY"], +) as agent_memory: + # Health check + print(agent_memory.health()) + + # Sanity write + res = agent_memory.add_session_event( + actor_id="user-42", + role=models.MessageRole.USER, + content=[{"text": "hello"}], + created_at=datetime.now(timezone.utc), # tz-aware UTC datetime + ) + print(res.event.event_id) +``` + +TypeScript - `npm add @redis-iris/agent-memory`: + +```typescript +import { AgentMemory } from "@redis-iris/agent-memory"; + +const agentMemory = new AgentMemory({ + serverURL: process.env.AGENT_MEMORY_BASE_URL!, + storeId: process.env.AGENT_MEMORY_STORE_ID!, + apiKey: process.env.AGENT_MEMORY_API_KEY!, +}); + +async function smokeTest() { + console.log(await agentMemory.health()); + + const res = await agentMemory.addSessionEvent({ + actorId: "user-42", + role: "USER", + content: [{ text: "hello" }], + createdAt: new Date(), // SDK serializes to UTC ISO-8601 + }); + console.log(res.event.eventId); +} + +smokeTest(); +``` + +Store the API key in a secrets manager. It scopes access to a single store; rotating it requires regenerating from the Cloud console. + +Reference: [Redis Cloud Agent Memory](https://cloud.redis.io/#/agent-memory) [Python SDK](https://pypi.org/project/redis-agent-memory/) [TypeScript SDK](https://www.npmjs.com/package/@redis-iris/agent-memory) diff --git a/plugins/redis-development/skills/redis-clustering/SKILL.md b/plugins/redis-development/skills/redis-clustering/SKILL.md new file mode 100644 index 00000000..8f66437f --- /dev/null +++ b/plugins/redis-development/skills/redis-clustering/SKILL.md @@ -0,0 +1,82 @@ +--- +name: redis-clustering +description: Redis Cluster and replication guidance covering hash tags for multi-key operations, avoiding CROSSSLOT errors, and reading from replicas to scale read-heavy workloads. Use when designing keys for a sharded Redis Cluster, debugging CROSSSLOT errors on MGET / SDIFF / pipelines, configuring a multi-key transaction in a cluster, or routing reads to replicas for caches, analytics, or dashboards. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Clustering + +Guidance for designing keys and routing reads in a sharded Redis Cluster (and in standalone primary/replica replication). Covers the two failure modes that bite most new cluster users: `CROSSSLOT` errors on multi-key operations, and overloading primaries with read traffic. + +## When to apply + +- Designing keys for a Redis Cluster deployment. +- Debugging a `CROSSSLOT` error on `MGET`, `SDIFF`, transactions, or pipelines. +- Implementing transactions / Lua scripts that touch multiple keys. +- Scaling out read traffic without adding shards. + +## 1. Hash tags for multi-key operations + +Redis Cluster distributes keys across 16,384 slots by hashing the key name. Any command that touches multiple keys (`MGET`, `SDIFF`, `SUNIONSTORE`, transactions, pipelines, Lua scripts with multiple `KEYS[]`) requires all keys to live on the same slot - otherwise the server returns a `CROSSSLOT` error. + +Hash tags force this: the part between `{` and `}` is the only thing hashed for slot assignment, so two keys sharing a hash tag always land together. + +```python +# Same slot - multi-key ops work +redis.set("{user:1001}:profile", "...") +redis.set("{user:1001}:settings", "...") +redis.lmove("{user:1001}:pending", "{user:1001}:processed", "LEFT", "RIGHT") +``` + +```python +# Different keys, no hash tag - CROSSSLOT on multi-key commands in cluster mode +redis.set("user:1001:profile", "...") +redis.set("user:1001:settings", "...") +pipe = redis.pipeline() +pipe.get("user:1001:profile") +pipe.get("user:1001:settings") +pipe.execute() # CROSSSLOT error in cluster +``` + +Rules of thumb: + +- Use a tag scoped to the meaningful entity, e.g. `{user:1001}`. Avoid bare `{1001}` - unrelated namespaces (`purchase:{1001}`, `employee:{1001}`) would all collide on the same slot. +- Only tag where you actually need multi-key ops. Tagging everything creates hotspots and defeats the point of sharding. +- A single-key command on a hash-tagged key works fine, so adding tags later is incremental - but renaming keys in production is painful, so plan tagging up front for entities you'll group. + +See [references/hash-tags.md](references/hash-tags.md). + +## 2. Read replicas for read-heavy workloads + +If reads dominate writes, route them to replicas to free primary capacity. Works both in Redis Cluster (each shard has 1+ replica) and in standalone primary/replica replication. + +```python +# Redis Cluster: enable replica reads on the client +from redis.cluster import RedisCluster + +rc = RedisCluster(host="localhost", port=6379, read_from_replicas=True) +rc.set("key", "value") # -> primary +value = rc.get("key") # -> may be served by a replica +``` + +For non-cluster setups, point two clients at the right nodes: + +```python +primary = Redis(host="primary-host", port=6379) +replica = Redis(host="replica-host", port=6379) +primary.set("key", "value") +value = replica.get("key") +``` + +The trade-off is consistency: replicas are eventually consistent. Don't read your own writes from a replica; don't use replica reads for anything that requires strict freshness (financial balances, idempotency state). Good fits: cache layers, analytics, dashboards, recommendation feeds. + +See [references/read-replicas.md](references/read-replicas.md). + +## References + +- [Redis Cluster spec - hash tags](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/#hash-tags) +- [Redis: multi-key operations in cluster](https://redis.io/docs/latest/operate/rs/databases/durability-ha/clustering/#multikey-operations) +- [Redis: Replication](https://redis.io/docs/latest/operate/oss_and_stack/management/replication/) diff --git a/plugins/redis-development/skills/redis-clustering/references/hash-tags.md b/plugins/redis-development/skills/redis-clustering/references/hash-tags.md new file mode 100644 index 00000000..cc61e27c --- /dev/null +++ b/plugins/redis-development/skills/redis-clustering/references/hash-tags.md @@ -0,0 +1,69 @@ +# Use Hash Tags for Multi-Key Operations + +In Redis Cluster, keys are distributed across slots based on their hash. Use hash tags to ensure keys that must be used together in [multi-key operations](https://redis.io/docs/latest/operate/rs/databases/durability-ha/clustering/#multikey-operations) are on the same slot. + +Correct: Use hash tags for keys used in multi-key operations. + +Python (redis-py): +```python +# These keys go to the same slot because {user:1001} is the hash tag +redis.set("{user:1001}:profile", "...") +redis.set("{user:1001}:settings", "...") +redis.set("{user:1001}:cart", "...") + +# Now you can use transactions and pipelines +pipe = redis.pipeline() +pipe.get("{user:1001}:profile") +pipe.get("{user:1001}:settings") +pipe.execute() + +# Multi-key commands also work +redis.lmove("{user:1001}:pending", "{user:1001}:processed", "LEFT", "RIGHT") +``` + +Java (Jedis): +```java +import redis.clients.jedis.UnifiedJedis; +import java.util.Set; + +try (UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379")) { + // Hash tags ensure keys go to the same slot + jedis.sadd("{bikes:racing}:france", "bike:1", "bike:2", "bike:3"); + jedis.sadd("{bikes:racing}:usa", "bike:1", "bike:4"); + + // Multi-key operation works because of matching hash tags + Set result = jedis.sdiff("{bikes:racing}:france", "{bikes:racing}:usa"); +} +``` + +Incorrect: Keys without hash tags that need multi-key operations. + +Python (redis-py): +```python +# Bad: These may be on different slots +redis.set("user:1001:profile", "...") # No hash tag +redis.set("user:1001:settings", "...") + +# This will fail in cluster mode +pipe = redis.pipeline() +pipe.get("user:1001:profile") +pipe.get("user:1001:settings") +pipe.execute() # CROSSSLOT error +``` + +Java (Jedis): +```java +// Bad: No hash tags - keys may be on different slots +jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3"); +jedis.sadd("bikes:racing:usa", "bike:1", "bike:4"); + +// This will fail in cluster mode with CROSSSLOT error +Set result = jedis.sdiff("bikes:racing:france", "bikes:racing:usa"); +``` + +Hash tag rules: +- Only the part between `{` and `}` is hashed for slot assignment +- Use meaningful identifiers like `{user:1001}` not just `{1001}` to avoid unrelated keys (e.g., `purchase:{1001}`, `employee:{1001}`) saturating the same slot +- Use hash tags only where multi-key operations are needed, not as a general habit + +Reference: [Redis Cluster Key Distribution](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/#hash-tags) diff --git a/plugins/redis-development/skills/redis-clustering/references/read-replicas.md b/plugins/redis-development/skills/redis-clustering/references/read-replicas.md new file mode 100644 index 00000000..d926b86a --- /dev/null +++ b/plugins/redis-development/skills/redis-clustering/references/read-replicas.md @@ -0,0 +1,46 @@ +# Use Read Replicas for Read-Heavy Workloads + +For read-heavy workloads, distribute reads across replicas to reduce load on primaries. + +Correct: Configure replica reads in Redis Cluster. + +```python +from redis.cluster import RedisCluster + +rc = RedisCluster( + host='localhost', + port=6379, + read_from_replicas=True # Distribute reads to replicas +) + +# Writes go to primary +rc.set("key", "value") + +# Reads can be served by replicas (eventually consistent) +value = rc.get("key") +``` + +Correct: Use replica reads in standalone replication setup. + +```python +from redis import Redis + +# Connect to primary for writes +primary = Redis(host='primary-host', port=6379) + +# Connect to replica for reads +replica = Redis(host='replica-host', port=6379) + +# Write to primary +primary.set("key", "value") + +# Read from replica (eventually consistent) +value = replica.get("key") +``` + +Considerations: +- Replica reads are eventually consistent +- Don't read from replicas for data that was just written +- Use for read-heavy, slightly-stale-OK workloads (caches, analytics, dashboards) + +Reference: [Redis Replication](https://redis.io/docs/latest/operate/oss_and_stack/management/replication/) diff --git a/plugins/redis-development/skills/redis-connections/SKILL.md b/plugins/redis-development/skills/redis-connections/SKILL.md new file mode 100644 index 00000000..ae3c8e73 --- /dev/null +++ b/plugins/redis-development/skills/redis-connections/SKILL.md @@ -0,0 +1,122 @@ +--- +name: redis-connections +description: Redis client and connection guidance covering connection pooling, multiplexing, pipelining, client-side caching with RESP3, avoiding slow commands (KEYS, SMEMBERS, HGETALL), and tuning socket timeouts. Use when configuring a Redis client (redis-py, Jedis, Lettuce, NRedisStack), batching commands for throughput, eliminating per-request connection creation, iterating large keyspaces with SCAN, enabling client-side caching for read-heavy workloads, or setting connect and read timeouts. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Connections + +Client-side guidance for talking to Redis efficiently: how to share connections, how to batch commands, which commands not to call in production, when to turn on client-side caching, and how to set timeouts that fail fast without breaking healthy traffic. + +## When to apply + +- Creating or reviewing a Redis client setup (redis-py, Jedis, Lettuce, go-redis, NRedisStack). +- Making many small Redis calls and wondering where the latency is going. +- Iterating large keyspaces, sets, hashes, or lists. +- Enabling client-side caching for hot keys. +- Tuning connect / read / write timeouts. + +## 1. Pool or multiplex - never one connection per request + +The single biggest mistake in Redis client code is opening a new TCP connection for every operation. Always either: + +- Pool - keep N persistent connections that the application leases per call (redis-py `ConnectionPool`, Jedis `JedisPooled`, go-redis client). +- Multiplex - share a single connection across all requests (Lettuce, NRedisStack). + +| Style | Used by | Note | +|---|---|---| +| Pool | redis-py, Jedis, go-redis | Each lease blocks if pool exhausted; size the pool to your concurrency | +| Multiplex | Lettuce, NRedisStack | Single connection; cannot carry blocking commands like `BLPOP` | + +```python +# redis-py - connection pool +pool = redis.ConnectionPool(host="localhost", port=6379, max_connections=50) +r = redis.Redis(connection_pool=pool) +``` + +See [references/pooling.md](references/pooling.md) for Python + Java + Lettuce examples. + +## 2. Pipeline bulk work + +For N commands that don't depend on each other's results, send them as a single batch with pipelining. One round-trip instead of N. + +```python +pipe = redis.pipeline() +for user_id in user_ids: + pipe.get(f"user:{user_id}") +results = pipe.execute() +``` + +Use non-transactional pipelining for performance, and `pipeline(transaction=True)` only when you actually need atomicity (see redis-core's transactions guidance). + +See [references/pipelining.md](references/pipelining.md). + +## 3. Avoid commands that scan everything + +Anything that walks the whole keyspace (or a whole large container) blocks the server. Use incremental variants instead. + +| Don't | Use | +|---|---| +| `KEYS pattern` | `SCAN` cursor loop | +| `SMEMBERS large_set` | `SSCAN` | +| `HGETALL large_hash` | `HSCAN` | +| `LRANGE 0 -1` on a huge list | Paginate (`LRANGE 0 100`) | + +```python +cursor = 0 +while True: + cursor, keys = redis.scan(cursor, match="user:*", count=100) + for key in keys: + process(key) + if cursor == 0: + break +``` + +Blocking commands (`BLPOP`, `BRPOP`, `BLMOVE`) are different - they intentionally wait for data and are fine for queue consumers, but always pass a timeout, and don't issue them on a multiplexed connection (Lettuce, NRedisStack). + +See [references/blocking.md](references/blocking.md). + +## 4. Client-side caching for hot keys + +For data that's read often and written rarely (config, feature flags, sessions on every request), enable RESP3 client-side caching. The client keeps a local copy and the server invalidates it on writes - saving the round trip for hot reads. + +```python +client = redis.Redis( + host="localhost", + port=6379, + protocol=3, # RESP3 is required + cache_config=redis.CacheConfig(max_size=1000), +) +``` + +Skip it for write-heavy workloads or data that changes constantly - the invalidation traffic overruns the savings. + +See [references/client-cache.md](references/client-cache.md). + +## 5. Set explicit timeouts + +Defaults vary by client and may be too generous. Pick values that match the *application's* failure model: + +```python +r = redis.Redis( + host="localhost", + socket_connect_timeout=2.0, # fail fast on dead nodes + socket_timeout=5.0, # tune to expected operation time + retry_on_timeout=True, +) +``` + +Rule of thumb: connect timeout shorter than read/write timeout. Tight timeouts + retry-on-timeout for latency-sensitive paths; longer timeouts for batch jobs. + +See [references/timeouts.md](references/timeouts.md). + +## References + +- [Redis: Connection Pools and Multiplexing](https://redis.io/docs/latest/develop/clients/pools-and-muxing/) +- [Redis: Pipelining](https://redis.io/docs/latest/develop/use/pipelining/) +- [Redis: SCAN](https://redis.io/docs/latest/commands/scan/) +- [Redis: Client-side caching](https://redis.io/docs/latest/develop/clients/client-side-caching/) +- [Redis: Clients](https://redis.io/docs/latest/develop/clients/) diff --git a/plugins/redis-development/skills/redis-connections/references/blocking.md b/plugins/redis-development/skills/redis-connections/references/blocking.md new file mode 100644 index 00000000..5355e1ab --- /dev/null +++ b/plugins/redis-development/skills/redis-connections/references/blocking.md @@ -0,0 +1,66 @@ +# Avoid Slow Commands in Production + +Some Redis commands are slow because they scan large datasets. Use incremental alternatives to avoid blocking the server. + +| Avoid | Use Instead | +|-------|-------------| +| `KEYS *` | `SCAN` with cursor | +| `SMEMBERS` on large sets | `SSCAN` | +| `HGETALL` on large hashes | `HSCAN` | +| `LRANGE 0 -1` on large lists | Paginate with `LRANGE 0 100` | + +Correct: Use SCAN for iteration. + +Python (redis-py): +```python +# Good: Non-blocking iteration +cursor = 0 +while True: + cursor, keys = redis.scan(cursor, match="user:*", count=100) + for key in keys: + process(key) + if cursor == 0: + break +``` + +Java (Jedis): +```java +import redis.clients.jedis.ScanIteration; +import redis.clients.jedis.UnifiedJedis; +import java.util.List; + +try (UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379")) { + // ScanIteration manages the cursor automatically + ScanIteration scan = jedis.scanIteration(10, "user:*", "hash"); + + while (!scan.isIterationCompleted()) { + List result = scan.nextBatch().getResult(); + for (String key : result) { + process(key); + } + } +} +``` + +Incorrect: Using KEYS in production. + +Python (redis-py): +```python +# Bad: Scans all keys, slow on large datasets +keys = redis.keys("user:*") +``` + +Java (Jedis): +```java +// Bad: Scans all keys, blocks the server +Set result = jedis.keys("*"); +``` + +Note: Truly blocking commands (like `BLPOP`, `BRPOP`, `BLMOVE`) that wait indefinitely for data are appropriate for some use cases like job queues, but should be used with timeouts. + +```python +# Blocking pop with timeout - appropriate for queue consumers +result = redis.blpop("task_queue", timeout=5) +``` + +Reference: [Redis SCAN](https://redis.io/docs/latest/commands/scan/) diff --git a/plugins/redis-development/skills/redis-connections/references/client-cache.md b/plugins/redis-development/skills/redis-connections/references/client-cache.md new file mode 100644 index 00000000..3ff9b19f --- /dev/null +++ b/plugins/redis-development/skills/redis-connections/references/client-cache.md @@ -0,0 +1,60 @@ +# Use Client-Side Caching for Frequently Read Data + +Use a connection with client-side caching enabled for any data that will be read frequently but written only occasionally. Client-side caching avoids contacting the server for repeated access to data that has recently been read, reducing network traffic and improving performance. + +Correct: Enable client-side caching with RESP3 protocol for frequently accessed data. + +Python (redis-py): +```python +import redis + +# Enable client-side caching with RESP3 +client = redis.Redis( + host='localhost', + port=6379, + protocol=3, # RESP3 required for client-side caching + cache_config=redis.CacheConfig(max_size=1000) +) + +# Cached reads avoid server round-trips +value = client.get("frequently:read:key") +``` + +Java (Jedis): +```java +import redis.clients.jedis.DefaultJedisClientConfig; +import redis.clients.jedis.UnifiedJedis; +import redis.clients.jedis.HostAndPort; +import redis.clients.jedis.CacheConfig; + +HostAndPort endpoint = new HostAndPort("localhost", 6379); + +DefaultJedisClientConfig config = DefaultJedisClientConfig + .builder() + .password("secretPassword") + .protocol(RedisProtocol.RESP3) + .build(); + +CacheConfig cacheConfig = CacheConfig.builder().maxSize(1000).build(); + +UnifiedJedis client = new UnifiedJedis(endpoint, config, cacheConfig); +``` + +When to use: +- Configuration data read frequently, updated rarely +- User session data accessed on every request +- Feature flags or settings checked repeatedly +- Any read-heavy workload with low write frequency + +When NOT needed: +- Data that changes frequently (cache invalidation overhead outweighs benefits) +- Write-heavy workloads +- Simple applications where network latency is not a bottleneck +- When you need guaranteed real-time consistency + +Trade-offs: +- Adds memory overhead on the client +- Requires RESP3 protocol +- Cache invalidation adds complexity for frequently changing data + +Reference: [Client-side caching](https://redis.io/docs/latest/develop/clients/client-side-caching/) diff --git a/plugins/redis-development/skills/redis-connections/references/pipelining.md b/plugins/redis-development/skills/redis-connections/references/pipelining.md new file mode 100644 index 00000000..59496f88 --- /dev/null +++ b/plugins/redis-development/skills/redis-connections/references/pipelining.md @@ -0,0 +1,48 @@ +# Use Pipelining for Bulk Operations + +Batch multiple commands into a single round trip to reduce network latency. + +Correct: Use pipeline for multiple commands. + +Python (redis-py): +```python +# Good: Single round trip for multiple commands +pipe = redis.pipeline() +for user_id in user_ids: + pipe.get(f"user:{user_id}") +results = pipe.execute() +``` + +Java (Jedis): +```java +import redis.clients.jedis.Pipeline; + +// Good: Buffer commands and send as single batch +Pipeline pipe = (Pipeline) jedis.pipelined(); + +pipe.set("person:1:name", "Alex"); +pipe.set("person:1:rank", "Captain"); +pipe.set("person:1:serial", "AB1234"); + +pipe.sync(); +``` + +Incorrect: Sequential commands in a loop. + +Python (redis-py): +```python +# Bad: N round trips +results = [] +for user_id in user_ids: + results.append(redis.get(f"user:{user_id}")) +``` + +Java (Jedis): +```java +// Bad: 3 separate round trips +jedis.set("person:1:name", "Alex"); +jedis.set("person:1:rank", "Captain"); +jedis.set("person:1:serial", "AB1234"); +``` + +Reference: [Redis Pipelining](https://redis.io/docs/latest/develop/use/pipelining/) diff --git a/plugins/redis-development/skills/redis-connections/references/pooling.md b/plugins/redis-development/skills/redis-connections/references/pooling.md new file mode 100644 index 00000000..579aa458 --- /dev/null +++ b/plugins/redis-development/skills/redis-connections/references/pooling.md @@ -0,0 +1,62 @@ +# Use Connection Pooling or Multiplexing + +Reuse connections via a pool or multiplexing instead of creating new connections per request. + +Correct: Use a connection pool. + +Python (redis-py): +```python +import redis + +# Good: Connection pool - reuses existing connections +pool = redis.ConnectionPool(host='localhost', port=6379, max_connections=50) +r = redis.Redis(connection_pool=pool) +``` + +Java (Jedis): +```java +import redis.clients.jedis.JedisPooled; + +// JedisPooled manages a connection pool internally +try (JedisPooled jedis = new JedisPooled("redis://localhost:6379")) { + jedis.set("testKey", "testValue"); +} +``` + +Correct: Use multiplexing (Lettuce, NRedisStack). + +```java +// Lettuce uses multiplexing by default - single connection handles all traffic +RedisClient client = RedisClient.create("redis://localhost:6379"); +StatefulRedisConnection connection = client.connect(); + +// All commands share the single connection efficiently +connection.sync().set("key", "value"); +``` + +Incorrect: Creating new connections per request. + +Python (redis-py): +```python +# Bad: New connection every time +def get_user(user_id): + r = redis.Redis(host='localhost', port=6379) # Don't do this + return r.get(f"user:{user_id}") +``` + +Java (Jedis): +```java +// Bad: Creating new client per request +public String getUser(String userId) { + try (UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379")) { + return jedis.get("user:" + userId); // Don't do this + } +} +``` + +Pooling vs Multiplexing: +- Pooling: Multiple connections shared across requests (redis-py, Jedis, go-redis) +- Multiplexing: Single connection handles all traffic (NRedisStack, Lettuce) +- Multiplexing cannot support blocking commands (BLPOP, etc.) as they would stall all callers + +Reference: [Connection Pools and Multiplexing](https://redis.io/docs/latest/develop/clients/pools-and-muxing/) diff --git a/plugins/redis-development/skills/redis-connections/references/timeouts.md b/plugins/redis-development/skills/redis-connections/references/timeouts.md new file mode 100644 index 00000000..f981502f --- /dev/null +++ b/plugins/redis-development/skills/redis-connections/references/timeouts.md @@ -0,0 +1,32 @@ +# Configure Connection Timeouts + +Configure appropriate timeout values to improve your application's connection resilience. While most Redis clients set default timeouts, choosing well-tuned values based on your application's usage patterns leads to better failure recovery. + +Correct: Set timeouts based on your application needs. + +```python +r = redis.Redis( + host='localhost', + socket_timeout=5.0, # Read/write timeout - tune based on expected operation time + socket_connect_timeout=2.0, # Connection timeout - shorter for fast failure detection + retry_on_timeout=True # Automatic retry on timeout +) +``` + +Incorrect: Relying solely on defaults without considering your use case. + +```python +# Not ideal: Default timeouts may not match your application's needs +r = redis.Redis(host='localhost') + +# For example, if your app needs fast failure detection, +# the default timeouts might be too generous +``` + +Considerations: +- Set `socket_connect_timeout` shorter than `socket_timeout` for quick connection failure detection +- For latency-sensitive apps, use tighter timeouts with retry logic +- For batch operations, allow longer timeouts to complete large operations +- Consider using health checks alongside timeouts for robust failure handling + +Reference: [Redis Client Configuration](https://redis.io/docs/latest/develop/clients/) diff --git a/plugins/redis-development/skills/redis-core/SKILL.md b/plugins/redis-development/skills/redis-core/SKILL.md new file mode 100644 index 00000000..731bac72 --- /dev/null +++ b/plugins/redis-development/skills/redis-core/SKILL.md @@ -0,0 +1,67 @@ +--- +name: redis-core +description: Core Redis modeling guidance - choose the right data structure (String, Hash, List, Set, Sorted Set, JSON, Stream, Vector Set) and use consistent colon-separated key names. Use when designing a Redis data model, caching objects, deciding between Hash and JSON, building counters, leaderboards, membership sets, or session stores, or when reviewing/cleaning up Redis key naming. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Core + +Foundational guidance for modeling data in Redis. Covers data-type selection and key-name conventions - the two decisions that most directly drive memory, performance, and maintainability. + +## When to apply + +- Caching objects, sessions, or per-user state. +- Counters, leaderboards, recent-items lists, unique-membership sets. +- Reviewing or refactoring Redis key names. +- Deciding between a Redis Hash and a JSON document for an entity. + +## 1. Choose the right data structure + +Pick the type that matches the *access pattern*, not just the shape of the data. + +| Use case | Recommended type | Why | +|---|---|---| +| Simple values, counters | String | Atomic `INCR`/`DECR`, `SET`/`GET` | +| Object with independently updated fields | Hash | Per-field reads/writes, no whole-object rewrite | +| Queue, recent-N items | List | O(1) push/pop at ends | +| Unique items, membership checks | Set | O(1) `SADD`/`SISMEMBER`/`SCARD` | +| Rankings, score-based ranges | Sorted Set | Score-ordered; `ZADD`/`ZRANGE`/`ZRANK` | +| Nested / hierarchical data | JSON | Path-level updates, nested arrays, RQE indexing | +| Event log, fan-out messaging | Stream | Persistent, consumer groups | +| Vector similarity | Vector Set | Native vector storage with HNSW | + +Common anti-pattern: stuffing a flat object into a serialized string. Updating one field means fetch + parse + mutate + rewrite. Use a Hash instead. + +See [references/choose-data-structure.md](references/choose-data-structure.md) for full rationale and Python/Java examples. + +## 2. Use consistent key names + +Use `colon-separated` segments with a stable hierarchy: + +``` +{entity}:{id}:{attribute} +user:1001:profile +user:1001:settings +order:2024:items +session:abc123 +article:987:likes +game:space-invaders:leaderboard +``` + +Rules of thumb: + +- Lowercase, colon-separated. No spaces, no mixed casing (`User_1001_Profile` is bad). +- Keep keys short but readable - keys live in memory and appear in every command. +- Don't use full URLs or long strings as keys. Extract a short identifier, or use a hash digest of the URL. +- Prefix for multi-tenancy (`tenant:42:user:7:cart`) so scans and ACLs can target a tenant cleanly. +- Be consistent. Pick one convention per service and apply it across all keys. + +See [references/key-naming.md](references/key-naming.md) for cleanup examples and edge cases. + +## References + +- [Redis: Choosing the right data type](https://redis.io/docs/latest/develop/data-types/compare-data-types/) +- [Redis: Keys](https://redis.io/docs/latest/develop/use/keyspace/) diff --git a/plugins/redis-development/skills/redis-core/references/choose-data-structure.md b/plugins/redis-development/skills/redis-core/references/choose-data-structure.md new file mode 100644 index 00000000..f518e351 --- /dev/null +++ b/plugins/redis-development/skills/redis-core/references/choose-data-structure.md @@ -0,0 +1,67 @@ +# Choose the Right Data Structure + +Selecting the appropriate Redis data type for your use case is fundamental to performance and memory efficiency. + +| Use Case | Recommended Type | Why | +|----------|------------------|-----| +| Simple values, counters | String | Fast, atomic operations | +| Object with fields | Hash | Memory efficient, partial updates, field-level expiration | +| Queue, recent items | List | O(1) push/pop at ends | +| Unique items, membership | Set | O(1) add/remove/check | +| Rankings, ranges | Sorted Set | Score-based ordering | +| Nested/hierarchical data | JSON | Path queries, nested structures, geospatial indexing with RQE | +| Event logs, messaging | Stream | Persistent, consumer groups | +| Similarity search | Vector Set | Native vector storage with built-in HNSW indexing | + +Incorrect: Using strings for everything. + +Python (redis-py): +```python +# Storing object as JSON string loses atomic field updates +redis.set("user:1001", json.dumps({"name": "Alice", "email": "alice@example.com"})) + +# To update email, must fetch, parse, modify, and rewrite entire object +user = json.loads(redis.get("user:1001")) +user["email"] = "new@example.com" +redis.set("user:1001", json.dumps(user)) +``` + +Java (Jedis): +```java +// Bad: Storing as delimited string requires manual parsing +jedis.set("bicycle", "Deimos;Ergonom;Enduro bikes;4972"); +String bike = jedis.get("bicycle"); +String[] fields = bike.split(";"); +String model = fields[0]; // Fragile and error-prone +``` + +Correct: Use Hash for objects with fields. + +Python (redis-py): +```python +# Hash allows atomic field updates +redis.hset("user:1001", mapping={"name": "Alice", "email": "alice@example.com"}) + +# Update single field without touching others +redis.hset("user:1001", "email", "new@example.com") +``` + +Java (Jedis): +```java +import java.util.Map; +import java.util.HashMap; + +// Good: Hash models properties naturally +Map hashFields = new HashMap<>(); +hashFields.put("model", "Deimos"); +hashFields.put("brand", "Ergonom"); +hashFields.put("type", "Enduro bikes"); +hashFields.put("price", "4972"); + +jedis.hset("bicycle", hashFields); + +// Read individual field +String model = jedis.hget("bicycle", "model"); +``` + +Reference: [Choosing the Right Data Type](https://redis.io/docs/latest/develop/data-types/compare-data-types/) diff --git a/plugins/redis-development/skills/redis-core/references/key-naming.md b/plugins/redis-development/skills/redis-core/references/key-naming.md new file mode 100644 index 00000000..028dfae7 --- /dev/null +++ b/plugins/redis-development/skills/redis-core/references/key-naming.md @@ -0,0 +1,53 @@ +# Use Consistent Key Naming Conventions + +Well-structured key names improve code maintainability, debugging, and enable efficient key scanning. + +Correct: Use colons as separators with a consistent hierarchy. + +``` +# Pattern: service:entity:id:attribute +user:1001:profile +user:1001:settings +order:2024:items +cache:api:users:list +session:abc123 +``` + +Python (redis-py): +```python +# Good: Short, meaningful key +redis.set("product:8361", cached_html) +page = redis.get("product:8361") +``` + +Java (Jedis): +```java +// Good: Short, meaningful key derived from URL +jedis.set("product:8361", ""); +String page = jedis.get("product:8361"); +``` + +Incorrect: Inconsistent naming, spaces, or very long keys. + +``` +# These cause confusion and waste memory +User_1001_Profile +my key with spaces +com.mycompany.myapp.production.users.profile.data.1001 +``` + +Java (Jedis): +```java +// Bad: Using full URL as key wastes memory and slows comparisons +jedis.set("http://www.verylongurlkey.com/store/products/product.html?id=8361", + ""); +``` + +Key naming tips: +- Keep keys short but readable-they consume memory +- Consider key prefixes for multi-tenant applications +- Extract short identifiers from URLs or long strings rather than using the whole thing +- For large binary values, consider using a hash digest as the key instead of the value itself +- Use consistent separators (colons are conventional) + +Reference: [Redis Keys](https://redis.io/docs/latest/develop/use/keyspace/) diff --git a/plugins/redis-development/skills/redis-observability/SKILL.md b/plugins/redis-development/skills/redis-observability/SKILL.md new file mode 100644 index 00000000..6638ecd7 --- /dev/null +++ b/plugins/redis-development/skills/redis-observability/SKILL.md @@ -0,0 +1,78 @@ +--- +name: redis-observability +description: Redis observability guidance - which metrics to monitor (memory, connections, hit ratio, ops/sec, rejected connections), which built-in commands to reach for during incident triage (SLOWLOG, INFO, MEMORY DOCTOR, CLIENT LIST, FT.PROFILE), and when to use the Redis Insight GUI. Use when setting up monitoring or alerts for a Redis instance, diagnosing a performance regression, profiling a slow FT.SEARCH query, or wiring Redis metrics into Prometheus, Datadog, or similar. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Observability + +What to watch, what to run, and what to alert on. Covers the metrics every Redis deployment should monitor and the built-in commands for ad-hoc diagnosis. + +## When to apply + +- Setting up monitoring or alerts for a Redis instance. +- Diagnosing a Redis performance regression (high latency, memory pressure, connection storms). +- Profiling a slow `FT.SEARCH` or pipeline. +- Wiring Redis metrics into Prometheus, Datadog, CloudWatch, or similar. + +## 1. Monitor these metrics + +These come from `INFO` and should be exported to your monitoring system. + +| Metric | What it tells you | Alert when | +|---|---|---| +| `used_memory` | Current memory usage | > 80% of `maxmemory` | +| `connected_clients` | Open connections | Sudden spikes or drops | +| `blocked_clients` | Clients waiting on blocking ops | > 0 sustained | +| `instantaneous_ops_per_sec` | Current throughput | Significant drops | +| `keyspace_hits` / `keyspace_misses` | Cache hit ratio | Hit ratio < 80% | +| `rejected_connections` | Hit `maxclients` cap | > 0 | +| `rdb_last_save_time` | Last persistence snapshot | Too old vs. RPO | + +```python +info = redis.info() +hit_ratio = info["keyspace_hits"] / max(1, info["keyspace_hits"] + info["keyspace_misses"]) +print(f"Memory: {info['used_memory_human']}") +print(f"Clients: {info['connected_clients']}") +print(f"Ops/sec: {info['instantaneous_ops_per_sec']}") +print(f"Hit ratio: {hit_ratio:.1%}") +``` + +See [references/metrics.md](references/metrics.md). + +## 2. Built-in commands for debugging + +Reach for these when something looks off. + +| Topic | Command | +|---|---| +| Slow commands | `SLOWLOG GET 10` / `SLOWLOG LEN` | +| Server snapshot | `INFO all` (or `INFO memory` / `INFO stats` / `INFO clients` / `INFO replication`) | +| Memory diagnostics | `MEMORY DOCTOR` / `MEMORY STATS` / `MEMORY USAGE ` | +| Connections | `CLIENT LIST` / `CLIENT INFO` | +| RQE / Search | `FT.INFO ` / `FT.PROFILE SEARCH QUERY "..."` | + +The two most useful for incident triage: + +- `SLOWLOG GET` to find queries that exceeded the `slowlog-log-slower-than` threshold (10ms by default). The output shows the exact command and duration in microseconds. +- `SLOWLOG RESET` clears diagnostic history. Use it only after exporting needed entries and getting explicit confirmation. +- `MEMORY DOCTOR` for memory pressure - it returns a one-paragraph summary of what's unusual about memory usage right now. + +```python +for entry in redis.slowlog_get(10): + print(f"{entry['duration']}us {entry['command']}") +``` + +See [references/commands.md](references/commands.md). + +## 3. Redis Insight + +For interactive use (running queries, browsing keys, profiling indexes), [Redis Insight](https://redis.io/insight/) is the official GUI. It surfaces the same `SLOWLOG` / `INFO` / `FT.PROFILE` data visually and includes Redis Copilot for natural-language queries. Useful during development and incident response; not a replacement for exporting metrics to your monitoring system. + +## References + +- [Redis: Latency monitoring](https://redis.io/docs/latest/operate/oss_and_stack/management/optimization/latency/) +- [Redis Insight](https://redis.io/insight/) diff --git a/plugins/redis-development/skills/redis-observability/references/commands.md b/plugins/redis-development/skills/redis-observability/references/commands.md new file mode 100644 index 00000000..adeeb3ed --- /dev/null +++ b/plugins/redis-development/skills/redis-observability/references/commands.md @@ -0,0 +1,45 @@ +# Use Observability Commands for Debugging + +Redis provides built-in commands for monitoring and debugging. + +Key commands: + +``` +# Slow query log - find slow commands +SLOWLOG GET 10 +SLOWLOG LEN +# SLOWLOG RESET clears history; run only after export and explicit confirmation. + +# Server info - comprehensive stats +INFO all +INFO memory +INFO stats +INFO replication +INFO clients + +# Memory analysis +MEMORY DOCTOR +MEMORY STATS +MEMORY USAGE mykey + +# Client connections +CLIENT LIST +CLIENT INFO + +# Index info (RQE) +FT.INFO idx:products +FT.PROFILE idx:products SEARCH QUERY "@name:laptop" +``` + +Do not run `SLOWLOG RESET` during incident triage unless the user has confirmed the current slow log is exported or no longer needed. + +Correct: Check SLOWLOG regularly. + +```python +# Get recent slow queries +slow_queries = redis.slowlog_get(10) +for query in slow_queries: + print(f"Duration: {query['duration']}us, Command: {query['command']}") +``` + +Reference: [Redis Monitoring](https://redis.io/docs/latest/operate/oss_and_stack/management/optimization/latency/) diff --git a/plugins/redis-development/skills/redis-observability/references/metrics.md b/plugins/redis-development/skills/redis-observability/references/metrics.md new file mode 100644 index 00000000..7408528f --- /dev/null +++ b/plugins/redis-development/skills/redis-observability/references/metrics.md @@ -0,0 +1,29 @@ +# Monitor Key Redis Metrics + +Track these metrics to catch issues before they impact users. + +| Metric | What It Tells You | Alert When | +|--------|-------------------|------------| +| `used_memory` | Current memory usage | > 80% of maxmemory | +| `connected_clients` | Number of connections | Sudden spikes or drops | +| `blocked_clients` | Clients waiting on blocking ops | > 0 sustained | +| `instantaneous_ops_per_sec` | Current throughput | Significant drops | +| `keyspace_hits/misses` | Cache hit ratio | Hit ratio < 80% | +| `rejected_connections` | Connection limit issues | > 0 | +| `rdb_last_save_time` | Last persistence snapshot | Too old | + +Correct: Export metrics to your monitoring system. + +```python +# Get key metrics +info = redis.info() +print(f"Memory: {info['used_memory_human']}") +print(f"Connections: {info['connected_clients']}") +print(f"Ops/sec: {info['instantaneous_ops_per_sec']}") +print(f"Hit ratio: {info['keyspace_hits'] / (info['keyspace_hits'] + info['keyspace_misses']) * 100:.1f}%") +``` + +Redis Insight: +Use Redis Insight for visual monitoring, query profiling, and debugging. It includes Redis Copilot for natural language queries. + +Reference: [Redis Insight](https://redis.io/insight/) diff --git a/plugins/redis-development/skills/redis-query-engine/SKILL.md b/plugins/redis-development/skills/redis-query-engine/SKILL.md new file mode 100644 index 00000000..878653b4 --- /dev/null +++ b/plugins/redis-development/skills/redis-query-engine/SKILL.md @@ -0,0 +1,132 @@ +--- +name: redis-query-engine +description: Redis Query Engine (RQE) guidance covering FT.CREATE schema design, field type selection (TEXT, TAG, NUMERIC, GEO, GEOSHAPE, VECTOR), DIALECT 2 query syntax, efficient FT.SEARCH and FT.AGGREGATE queries, zero-downtime index updates via aliases, and the SKIPINITIALSCAN option. Use when defining a search index on Hash or JSON documents, picking between TEXT and TAG for filtering, writing FT.SEARCH queries with filters and SORTBY, managing or swapping indexes in production, or troubleshooting slow searches with FT.PROFILE. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Query Engine + +Guidance for using the Redis Query Engine (RQE) to index and search Hash or JSON documents. Covers schema design with `FT.CREATE`, field-type choices, query syntax, index lifecycle management, and the most common performance pitfalls. + +## When to apply + +- Creating, modifying, or reviewing an RQE index (`FT.CREATE`, `FT.ALTER`). +- Writing or optimizing `FT.SEARCH` / `FT.AGGREGATE` queries. +- Deciding between `TEXT`, `TAG`, `NUMERIC`, `GEO`, `GEOSHAPE`, or `VECTOR` for a field. +- Rolling out a new index schema without downtime. +- Spinning up an index that should only cover newly written keys. + +## 1. Use DIALECT 2 (the modern default) + +`DIALECT 2` is the baseline. Other dialects (1, 3, 4) are deprecated as of Redis 8. Most modern client libraries already default to it - but specify it explicitly in raw commands for portability. + +``` +FT.SEARCH idx:products "@name:laptop" DIALECT 2 +``` + +`DIALECT 2` is required for vector search queries. It also handles special characters and NULLs predictably. + +See [references/dialect.md](references/dialect.md). + +## 2. Pick the right field type + +The field type decides both what you can query and how fast that query is. Use the narrowest type that supports your access pattern. + +| Field type | Use when | Notes | +|---|---|---| +| `TEXT` | Full-text search needed | Tokenized + stemmed; not for exact match | +| `TAG` | Exact match / filtering | Add `SORTABLE UNF` for fastest tag queries | +| `NUMERIC` | Range queries, sorting | Prices, counts, timestamps | +| `GEO` | Lat/long point queries | Single points (stores, users) | +| `GEOSHAPE` | Polygon / area queries | Delivery zones, regions | +| `VECTOR` | Similarity search | HNSW or FLAT; see redis-vector-search | + +The classic mistake is using `TEXT` for a category or status field because "it's a string." `TAG` is 10x faster for those. + +See [references/field-types.md](references/field-types.md). + +## 3. Index only what you query - and always set a prefix + +`FT.CREATE` without a `PREFIX` indexes every matching key in the database; with a wide schema it can blow up index size and write latency. + +``` +FT.CREATE idx:products ON HASH PREFIX 1 product: + SCHEMA + name TEXT WEIGHT 2.0 + category TAG SORTABLE + price NUMERIC SORTABLE + location GEO +``` + +Rules of thumb: + +- Start with the minimum schema. Add fields as new query patterns emerge. +- Always set `PREFIX` (or filter via `FILTER` expression). +- Use `FT.INFO idx:` to monitor index size after adding fields. +- Use `SORTABLE` only on fields you actually sort by; it has a memory cost. + +See [references/index-creation.md](references/index-creation.md). + +## 4. Zero-downtime index updates - use aliases + +For schema changes in production, keep application queries pointed at an alias and swap the underlying index. + +``` +FT.CREATE idx:products_v2 ON HASH PREFIX 1 product: SCHEMA ... +FT.ALIASUPDATE products idx:products_v2 + +# App queries are stable: +FT.SEARCH products "@category:{electronics}" +``` + +Useful management commands: `FT.INFO`, `FT._LIST`, `FT.ALIASADD/UPDATE/DEL`. Treat `FT.DROPINDEX` as destructive for application query surfaces; use it only in development or after explicit production confirmation and rollback planning. + +See [references/index-management.md](references/index-management.md). + +## 5. SKIPINITIALSCAN - only when historical data is irrelevant + +By default `FT.CREATE` walks all existing keys that match the prefix and indexes them. Use `SKIPINITIALSCAN` only when: + +- You're standing up the index for a *new* feature and existing data shouldn't be queryable. +- Existing data is too large to scan synchronously. +- You're indexing event streams where only future events matter. + +For most schema migrations, the default (scan everything) is what you want. + +See [references/skip-initial-scan.md](references/skip-initial-scan.md). + +## 6. Write specific queries, not `*` + +Narrow the result set with filters before paging or aggregating. + +``` +# Good - specific filter, limited fields returned +FT.SEARCH idx:products "@category:{electronics} @price:[100 500]" + LIMIT 0 20 + RETURN 3 name price category +``` + +``` +# Bad - full scan plus unbounded LIMIT +FT.SEARCH idx:products "*" LIMIT 0 10000 +``` + +Other levers: + +- `SORTBY` requires `SORTABLE` on the sort field. Without it, sort is slow. +- `LIMIT` early; the engine still processes everything above the limit if you don't. +- `RETURN` specific fields - don't fetch the whole document if you only need a few. +- Profile with `FT.PROFILE idx: SEARCH QUERY ""` when a query is slow. + +See [references/query-optimization.md](references/query-optimization.md). + +## References + +- [Redis: Query Engine - Indexing](https://redis.io/docs/latest/develop/interact/search-and-query/indexing/) +- [Redis: Query syntax](https://redis.io/docs/latest/develop/interact/search-and-query/query/) +- [Redis: Query dialects](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/dialects/) +- [Redis: Administration (aliases, dropindex)](https://redis.io/docs/latest/develop/interact/search-and-query/administration/) +- [FT.CREATE](https://redis.io/docs/latest/commands/ft.create/) diff --git a/plugins/redis-development/skills/redis-query-engine/references/dialect.md b/plugins/redis-development/skills/redis-query-engine/references/dialect.md new file mode 100644 index 00000000..22d67f6d --- /dev/null +++ b/plugins/redis-development/skills/redis-query-engine/references/dialect.md @@ -0,0 +1,38 @@ +# Use DIALECT 2 for Query Syntax + +Use DIALECT 2 for consistent query behavior. Many Redis client libraries now default to DIALECT 2, and other dialects (1, 3, 4) are deprecated as of Redis 8. + +Correct: Use DIALECT 2 explicitly or rely on modern client defaults. + +```python +from redis import Redis + +r = Redis() + +# Modern redis-py (6.0+) defaults to DIALECT 2 +# You can also set it explicitly +results = r.ft("idx:products").search( + "@name:laptop", + dialect=2 +) +``` + +``` +# In raw commands, specify DIALECT 2 +FT.SEARCH idx:products "@name:laptop" DIALECT 2 + +FT.AGGREGATE idx:products "@category:{electronics}" + GROUPBY 1 @category + REDUCE COUNT 0 AS count + DIALECT 2 +``` + +Note: DIALECT 2 is required for vector search queries. Most modern client libraries (redis-py 6.0+, go-redis, Lettuce) now use DIALECT 2 by default. + +Why DIALECT 2: +- Consistent handling of special characters +- Better NULL value handling +- More predictable query parsing +- Required for vector search + +Reference: [Query Dialects](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/dialects/) diff --git a/plugins/redis-development/skills/redis-query-engine/references/field-types.md b/plugins/redis-development/skills/redis-query-engine/references/field-types.md new file mode 100644 index 00000000..26f20cb5 --- /dev/null +++ b/plugins/redis-development/skills/redis-query-engine/references/field-types.md @@ -0,0 +1,72 @@ +# Choose the Correct Field Type + +Each field type has different capabilities and performance characteristics. + +| Field Type | Use When | Notes | +|------------|----------|-------| +| TEXT | Full-text search needed | Tokenized, stemmed | +| TAG | Exact match, filtering | Faster than TEXT for filtering | +| NUMERIC | Range queries, sorting | Use for prices, counts, timestamps | +| GEO | Point location queries | Lat/long coordinates (single points) | +| GEOSHAPE | Area/region queries | Polygons, circles, rectangles | +| VECTOR | Similarity search | HNSW or FLAT algorithm | + +Correct: Use TAG for exact matching. + +``` +# Good: TAG for exact category matching +FT.CREATE idx:products ON HASH PREFIX 1 product: + SCHEMA + category TAG SORTABLE + status TAG +``` + +Java (Jedis): +```java +import redis.clients.jedis.search.*; + +Schema schema = new Schema() + .addTextField("name", 1) + .addTagField("categories"); // TAG for exact matching + +IndexDefinition def = new IndexDefinition(IndexDefinition.Type.HASH); + +jedis.ftCreate("idx", IndexOptions.defaultOptions().setDefinition(def), schema); + +// Query with TAG syntax +SearchResult result = jedis.ftSearch("idx", "@categories:{chef|runner}"); +``` + +Incorrect: Using TEXT when you don't need full-text features. + +``` +# Overkill: TEXT for category adds unnecessary tokenization +FT.CREATE idx:products ON HASH PREFIX 1 product: + SCHEMA + category TEXT + status TEXT +``` + +Java (Jedis): +```java +// Bad: TEXT for categories adds unnecessary overhead +Schema schema = new Schema() + .addTextField("name", 1) + .addTextField("categories", 1); // Overkill for exact matching +``` + +Correct: Use GEO for points, GEOSHAPE for areas. + +``` +# GEO for point locations (stores, users) +FT.CREATE idx:stores ON HASH PREFIX 1 store: + SCHEMA + location GEO + +# GEOSHAPE for areas (delivery zones, boundaries) +FT.CREATE idx:zones ON JSON PREFIX 1 zone: + SCHEMA + $.boundary AS boundary GEOSHAPE +``` + +Reference: [Redis Search Field Types](https://redis.io/docs/latest/develop/interact/search-and-query/indexing/geoindex/) diff --git a/plugins/redis-development/skills/redis-query-engine/references/index-creation.md b/plugins/redis-development/skills/redis-query-engine/references/index-creation.md new file mode 100644 index 00000000..fc56616a --- /dev/null +++ b/plugins/redis-development/skills/redis-query-engine/references/index-creation.md @@ -0,0 +1,63 @@ +# Index Only Fields You Query + +Create indexes with only the fields you need to search, filter, or sort on. + +Correct: Index specific fields and use prefixes. + +``` +FT.CREATE idx:products ON HASH PREFIX 1 product: + SCHEMA + name TEXT WEIGHT 2.0 + description TEXT + category TAG SORTABLE + price NUMERIC SORTABLE + location GEO +``` + +Java (Jedis): +```java +import redis.clients.jedis.search.*; + +Schema schema = new Schema() + .addTextField("name", 1) + .addTagField("categories"); + +// Good: Specify prefix to index only matching keys +IndexDefinition def = new IndexDefinition(IndexDefinition.Type.HASH) + .setPrefixes("person:"); + +jedis.ftCreate("idx", IndexOptions.defaultOptions().setDefinition(def), schema); +``` + +Incorrect: Over-indexing or indexing unused fields. + +``` +# Bad: Indexing every field "just in case" +FT.CREATE idx:products ON HASH PREFIX 1 product: + SCHEMA + name TEXT + description TEXT + category TEXT + subcategory TEXT + brand TEXT + sku TEXT + price NUMERIC + cost NUMERIC + margin NUMERIC + ... +``` + +Java (Jedis): +```java +// Bad: No prefix means all hashes get indexed +IndexDefinition def = new IndexDefinition(IndexDefinition.Type.HASH); +// This will index every hash in the database! +``` + +Tips: +- Start with the minimum required fields +- Add fields as query patterns emerge +- Use `FT.INFO` to monitor index size +- Always specify a prefix to avoid indexing unrelated keys + +Reference: [Redis Search Indexing](https://redis.io/docs/latest/develop/interact/search-and-query/indexing/) diff --git a/plugins/redis-development/skills/redis-query-engine/references/index-management.md b/plugins/redis-development/skills/redis-query-engine/references/index-management.md new file mode 100644 index 00000000..229445af --- /dev/null +++ b/plugins/redis-development/skills/redis-query-engine/references/index-management.md @@ -0,0 +1,37 @@ +# Manage Indexes for Zero-Downtime Updates + +Use aliases to swap indexes without application changes. + +Correct: Use aliases for production indexes. + +``` +# Create versioned index +FT.CREATE idx:products_v2 ON HASH PREFIX 1 product: + SCHEMA + name TEXT + category TAG SORTABLE + price NUMERIC SORTABLE + +# Point alias to new index +FT.ALIASADD products idx:products_v2 + +# Application queries use alias +FT.SEARCH products "@category:{electronics}" + +# Later, swap to new version +FT.ALIASUPDATE products idx:products_v3 +``` + +Useful management commands: + +``` +# Check index info +FT.INFO idx:products + +# List all indexes +FT._LIST +``` + +`FT.DROPINDEX` removes an index surface and can break application queries. Use aliases for production schema changes; drop old indexes only after traffic has moved, rollback is no longer needed, and the user has explicitly confirmed the cleanup. + +Reference: [Redis Search Index Management](https://redis.io/docs/latest/develop/interact/search-and-query/administration/) diff --git a/plugins/redis-development/skills/redis-query-engine/references/query-optimization.md b/plugins/redis-development/skills/redis-query-engine/references/query-optimization.md new file mode 100644 index 00000000..aa3fbb37 --- /dev/null +++ b/plugins/redis-development/skills/redis-query-engine/references/query-optimization.md @@ -0,0 +1,39 @@ +# Write Efficient Queries + +Be specific and use filters to reduce the result set early. + +Correct: Use specific filters and limit results. + +``` +# Good: Specific query with filters +FT.SEARCH idx:products "@category:{electronics} @price:[100 500]" + LIMIT 0 20 + RETURN 3 name price category + +# Good: Use SORTBY and LIMIT +FT.SEARCH idx:products "@name:laptop" + SORTBY price ASC + LIMIT 0 10 +``` + +Incorrect: Broad queries returning large result sets. + +``` +# Bad: Wildcard prefix scans entire index +FT.SEARCH idx:products "*" LIMIT 0 10000 + +# Bad: Loading all fields from source document +FT.AGGREGATE idx:products "*" LOAD * +``` + +Performance tips: +- Add `SORTABLE` to fields used in `SORTBY` +- Use `TAG SORTABLE UNF` for best performance on tag fields +- Use `NOSTEM` if you don't need stemming +- Profile queries with `FT.PROFILE` + +``` +FT.PROFILE idx:products SEARCH QUERY "@category:{electronics}" +``` + +Reference: [Redis Search Query Syntax](https://redis.io/docs/latest/develop/interact/search-and-query/query/) diff --git a/plugins/redis-development/skills/redis-query-engine/references/skip-initial-scan.md b/plugins/redis-development/skills/redis-query-engine/references/skip-initial-scan.md new file mode 100644 index 00000000..9860047e --- /dev/null +++ b/plugins/redis-development/skills/redis-query-engine/references/skip-initial-scan.md @@ -0,0 +1,72 @@ +# Use SKIPINITIALSCAN for New Data Only Indexes + +Enable the `SKIPINITIALSCAN` option when creating an index if you only want to include items that are added after the index is created. This makes index creation faster and avoids indexing existing data that you don't need to search. + +Correct: Use SKIPINITIALSCAN when you only need to index new data. + +Python (redis-py): +```python +import redis +from redis.commands.search.field import TextField, TagField +from redis.commands.search.indexDefinition import IndexDefinition, IndexType + +client = redis.Redis(host='localhost', port=6379) + +# Create index that only indexes new documents +schema = ( + TextField("name"), + TagField("categories") +) + +definition = IndexDefinition( + prefix=["person:"], + index_type=IndexType.HASH +) + +# SKIPINITIALSCAN - only index documents added after creation +client.ft("idx").create_index( + schema, + definition=definition, + skip_initial_scan=True +) +``` + +Java (Jedis): +```java +import redis.clients.jedis.UnifiedJedis; +import redis.clients.jedis.search.FTCreateParams; +import redis.clients.jedis.search.IndexDataType; +import redis.clients.jedis.search.schemafields.SchemaField; +import redis.clients.jedis.search.schemafields.TagField; +import redis.clients.jedis.search.schemafields.TextField; + +try (UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379")) { + FTCreateParams params = new FTCreateParams() + .on(IndexDataType.HASH) + .skipInitialScan(); // Only index new documents + + jedis.ftCreate( + "idx", + params, + new SchemaField[]{ + new TextField("name"), + new TagField("categories") + } + ); +} +``` + +When to use SKIPINITIALSCAN: +- Creating an index for a new feature where existing data is irrelevant +- Setting up indexes in advance before data arrives +- When existing data would be too large to scan during index creation +- Event-driven architectures where you only care about new events + +When NOT to use (default behavior is correct): +- You need to search existing data immediately after index creation +- Migrating to a new index schema and need all data indexed +- Most typical use cases where historical data matters + +Note: The default behavior (without SKIPINITIALSCAN) indexes all existing matching keys, which is usually what you want. + +Reference: [FT.CREATE SKIPINITIALSCAN](https://redis.io/docs/latest/commands/ft.create/) diff --git a/plugins/redis-development/skills/redis-security/SKILL.md b/plugins/redis-development/skills/redis-security/SKILL.md new file mode 100644 index 00000000..abe3b293 --- /dev/null +++ b/plugins/redis-development/skills/redis-security/SKILL.md @@ -0,0 +1,106 @@ +--- +name: redis-security +description: Redis security guidance covering authentication (requirepass and ACL users), TLS, ACL-based least-privilege access control, restricting network exposure via bind and protected-mode, firewall rules, and disabling dangerous commands. Use when deploying Redis to production, defining ACL users for an application, configuring TLS connections, locking down a Redis instance behind a firewall, or auditing a Redis deployment for security hardening. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Security + +Production hardening for Redis: authentication, ACL-based access control, and network exposure. Cover all three together - any one of them on its own leaves an exploitable gap. + +## When to apply + +- Deploying or reviewing a Redis instance destined for production. +- Setting up application credentials beyond a shared password. +- Auditing a Redis deployment against a security checklist. +- Receiving "Redis exposed to the internet" findings from a scanner. + +## 1. Always authenticate (and use TLS) + +Never run a production Redis without a password. Pair authentication with TLS so credentials and data aren't sent in clear text. + +``` +# redis.conf +requirepass your-strong-password +tls-port 6380 +tls-cert-file /path/to/redis.crt +tls-key-file /path/to/redis.key +``` + +```python +r = redis.Redis( + host="localhost", + port=6380, + password="your-strong-password", + ssl=True, + ssl_cert_reqs="required", +) +``` + +If you can use ACL users (next section) instead of the single `requirepass`, do - `requirepass` is effectively the legacy "default user" shortcut. + +See [references/auth.md](references/auth.md). + +## 2. ACLs for least-privilege access + +The `default` user with a shared password is fine for development. For production, give each application a dedicated ACL user with only the commands and key patterns it actually needs. + +``` +# Cache-only reader +ACL SETUSER app_readonly on >password ~cache:* +get +mget +scan + +# Writer that can't run dangerous ops +ACL SETUSER app_writer on >password ~* +@all -@dangerous + +# Admin (use sparingly, never for application traffic) +ACL SETUSER admin on >strong-password ~* +@all +``` + +Useful command categories: + +| Category | What it covers | +|---|---| +| `@read` | Read commands (`GET`, `MGET`, `HGET`, ...) | +| `@write` | Write commands (`SET`, `DEL`, `XADD`, ...) | +| `@dangerous` | `FLUSHALL`, `DEBUG`, `KEYS`, etc. | +| `@admin` | Administrative commands | + +If app credentials leak, a tight ACL bounds the blast radius - the attacker can't `FLUSHALL` your DB just because they grabbed a cache reader's password. + +See [references/acls.md](references/acls.md). + +## 3. Restrict network access + +The most common Redis breach is a public-internet Redis with no auth. Avoid that with three layers: + +``` +# redis.conf - bind to specific interfaces, keep protected-mode on +bind 127.0.0.1 192.168.1.100 +protected-mode yes +``` + +```bash +# Firewall - allow only application subnets +iptables -A INPUT -p tcp --dport 6379 -s 192.168.1.0/24 -j ACCEPT +iptables -A INPUT -p tcp --dport 6379 -j DROP +``` + +Anti-pattern: `bind 0.0.0.0` + `protected-mode no` - exposes Redis to the whole network without protection. + +Optional but recommended: rename or disable destructive commands so a compromised client can't trash the DB: + +``` +rename-command FLUSHALL "" +rename-command DEBUG "" +rename-command CONFIG "" +``` + +See [references/network.md](references/network.md). + +## References + +- [Redis: Security](https://redis.io/docs/latest/operate/oss_and_stack/management/security/) +- [Redis: ACL](https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/) diff --git a/plugins/redis-development/skills/redis-security/references/acls.md b/plugins/redis-development/skills/redis-security/references/acls.md new file mode 100644 index 00000000..6a3d01c3 --- /dev/null +++ b/plugins/redis-development/skills/redis-security/references/acls.md @@ -0,0 +1,31 @@ +# Use ACLs for Fine-Grained Access Control + +Create users with only the permissions they need (principle of least privilege). + +Correct: Create specific users with limited permissions. + +``` +# Read-only user for cache access +ACL SETUSER app_readonly on >password ~cache:* +get +mget +scan + +# Writer that can't run dangerous commands +ACL SETUSER app_writer on >password ~* +@all -@dangerous + +# Admin user (use sparingly) +ACL SETUSER admin on >strong-password ~* +@all +``` + +Incorrect: Using the default user for everything. + +``` +# Bad: Single password for all access +requirepass shared-password +``` + +ACL categories: +- `@read` - Read commands +- `@write` - Write commands +- `@dangerous` - Commands like FLUSHALL, DEBUG +- `@admin` - Administrative commands + +Reference: [Redis ACL](https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/) diff --git a/plugins/redis-development/skills/redis-security/references/auth.md b/plugins/redis-development/skills/redis-security/references/auth.md new file mode 100644 index 00000000..2daf2de4 --- /dev/null +++ b/plugins/redis-development/skills/redis-security/references/auth.md @@ -0,0 +1,68 @@ +# Always Use Authentication in Production + +Never run Redis without authentication in production environments. + +Correct: Use password and TLS. + +Python (redis-py): +```python +r = redis.Redis( + host='localhost', + port=6379, + password='your-strong-password', + ssl=True, + ssl_cert_reqs='required' +) +``` + +Java (Jedis): +```java +import redis.clients.jedis.*; +import javax.net.ssl.*; +import java.security.KeyStore; + +// Create SSL context with trust store and key store +KeyStore trustStore = KeyStore.getInstance("jks"); +trustStore.load(new FileInputStream("./truststore.jks"), "password".toCharArray()); + +TrustManagerFactory tmf = TrustManagerFactory.getInstance("X509"); +tmf.init(trustStore); + +SSLContext sslContext = SSLContext.getInstance("TLS"); +sslContext.init(null, tmf.getTrustManagers(), null); + +JedisClientConfig config = DefaultJedisClientConfig.builder() + .ssl(true) + .sslSocketFactory(sslContext.getSocketFactory()) + .user("redisUser") + .password("redisPassword") + .build(); + +JedisPooled jedis = new JedisPooled(new HostAndPort("redis-host", 6379), config); +``` + +Incorrect: Connecting without authentication. + +Python (redis-py): +```python +# Bad: No authentication +r = redis.Redis(host='localhost', port=6379) +``` + +Java (Jedis): +```java +// Bad: No authentication or TLS +UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379"); +``` + +Configuration: + +``` +# redis.conf +requirepass your-strong-password +tls-port 6380 +tls-cert-file /path/to/redis.crt +tls-key-file /path/to/redis.key +``` + +Reference: [Redis Security](https://redis.io/docs/latest/operate/oss_and_stack/management/security/) diff --git a/plugins/redis-development/skills/redis-security/references/network.md b/plugins/redis-development/skills/redis-security/references/network.md new file mode 100644 index 00000000..537b8f5b --- /dev/null +++ b/plugins/redis-development/skills/redis-security/references/network.md @@ -0,0 +1,42 @@ +# Secure Network Access + +Restrict network access to Redis to only trusted sources. + +Correct: Bind to specific interfaces. + +``` +# redis.conf +bind 127.0.0.1 192.168.1.100 +protected-mode yes +``` + +Correct: Use firewall rules. + +```bash +# Allow only application servers +iptables -A INPUT -p tcp --dport 6379 -s 192.168.1.0/24 -j ACCEPT +iptables -A INPUT -p tcp --dport 6379 -j DROP +``` + +Incorrect: Exposing Redis to the internet. + +``` +# Bad: Binds to all interfaces +bind 0.0.0.0 +protected-mode no +``` + +Security checklist: +- Use TLS for connections +- Bind to specific interfaces, not `0.0.0.0` +- Use firewall rules to restrict access +- Disable dangerous commands in production + +``` +# Disable dangerous commands +rename-command FLUSHALL "" +rename-command DEBUG "" +rename-command CONFIG "" +``` + +Reference: [Redis Security](https://redis.io/docs/latest/operate/oss_and_stack/management/security/) diff --git a/plugins/redis-development/skills/redis-semantic-cache/SKILL.md b/plugins/redis-development/skills/redis-semantic-cache/SKILL.md new file mode 100644 index 00000000..e6368a44 --- /dev/null +++ b/plugins/redis-development/skills/redis-semantic-cache/SKILL.md @@ -0,0 +1,88 @@ +--- +name: redis-semantic-cache +description: Redis LangCache guidance for semantic caching of LLM responses on Redis Cloud - calling search/set via the SDK or REST API, tuning the similarity threshold, separating caches per task type, and filtering with custom attributes. Use when caching LLM completions or RAG answers to cut API cost and latency, building a cache-aside layer in front of OpenAI / Anthropic / etc., tuning hit rate vs precision, or splitting one app's LLM workloads into multiple LangCache caches. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Semantic Cache + +Semantic caching for LLM responses with Redis Cloud's LangCache service. Stores prompts as embeddings; subsequent semantically-similar prompts return the cached response without re-calling the model. + +> LangCache is currently in preview on Redis Cloud. Features and behavior may change. + +## When to apply + +- Wrapping an LLM call (OpenAI, Anthropic, etc.) with a cache layer to cut cost and latency. +- Caching RAG answers, classification outputs, or any deterministic LLM workload. +- Tuning the precision/hit-rate trade-off for a semantic cache. +- Splitting one application's LLM workloads across multiple cache instances. + +## 1. The cache-aside flow + +LangCache fits in front of any LLM call as a standard cache-aside pattern: + +1. Send the user's prompt to LangCache's `search`. +2. Cache hit - return the stored response directly. +3. Cache miss - call the LLM, then `set` the response so future similar prompts hit. + +```python +from langcache import LangCache +import os + +lang_cache = LangCache( + server_url=f"https://{os.getenv('HOST')}", + cache_id=os.getenv("CACHE_ID"), + api_key=os.getenv("API_KEY"), +) + +result = lang_cache.search(prompt="What is Redis?", similarity_threshold=0.9) +if result: + response = result[0]["response"] +else: + response = llm.generate("What is Redis?") + lang_cache.set(prompt="What is Redis?", response=response) +``` + +The same operations are available via REST (`POST /v1/caches/{cacheId}/entries/search` and `POST /v1/caches/{cacheId}/entries`) when an SDK isn't an option. + +See [references/langcache-usage.md](references/langcache-usage.md) for full SDK + REST samples and attribute-based storage. + +## 2. Tune the similarity threshold + +The threshold controls how close (in embedding cosine distance) a new prompt must be to a cached one to count as a hit. Higher = stricter match, fewer false positives. Lower = more hits, more risk of returning an off-topic answer. + +| Threshold | Behavior | Use when | +|---|---|---| +| 0.95+ | Near-exact match required | Customer-facing answers where wrong responses are costly | +| 0.9 | Balanced default | Most workloads - start here | +| 0.8 | Loose semantic match | Internal tools, exploratory queries, FAQ deduplication | + +```python +# Stricter - fewer false positives +result = lang_cache.search(prompt="What is Redis?", similarity_threshold=0.95) + +# Looser - higher hit rate +result = lang_cache.search(prompt="What is Redis?", similarity_threshold=0.8) +``` + +Adjust by watching the actual cache-hit rate and spot-checking that returned answers are still relevant. + +See [references/best-practices.md](references/best-practices.md). + +## 3. Separate caches per task type + +Different LLM workloads should not share one cache - a "code question" prompt is semantically close to other code questions but has nothing to do with a password-reset support query, and crossing them returns garbage. + +```python +support_cache = LangCache(server_url=..., cache_id="support-cache-id", api_key=...) +code_cache = LangCache(server_url=..., cache_id="code-cache-id", api_key=...) +``` + +Create distinct cache IDs in Redis Cloud per task, and route each call to the right one. As a finer-grained alternative, store and search with custom attributes (e.g. `{"category": "database"}`) to keep tasks in the same cache but isolated by attribute filter - useful when the same prompt format spans subtopics. + +## References + +- [LangCache documentation](https://redis.io/docs/latest/develop/ai/langcache/) diff --git a/plugins/redis-development/skills/redis-semantic-cache/references/best-practices.md b/plugins/redis-development/skills/redis-semantic-cache/references/best-practices.md new file mode 100644 index 00000000..8f011130 --- /dev/null +++ b/plugins/redis-development/skills/redis-semantic-cache/references/best-practices.md @@ -0,0 +1,62 @@ +# Configure Semantic Cache Properly + +> Note: LangCache is currently in preview on Redis Cloud. Features and behavior may change. + +Tune similarity threshold and cache separation for optimal LangCache results. + +Correct: Tune similarity threshold for your use case. + +```python +from langcache import LangCache + +lang_cache = LangCache( + server_url=f"https://{os.getenv('HOST')}", + cache_id=os.getenv("CACHE_ID"), + api_key=os.getenv("API_KEY") +) + +# Stricter matching - fewer false positives (0.95 = very similar) +result = lang_cache.search( + prompt="What is Redis?", + similarity_threshold=0.95 +) + +# Looser matching - higher hit rate (0.8 = somewhat similar) +result = lang_cache.search( + prompt="What is Redis?", + similarity_threshold=0.8 +) +``` + +Correct: Use separate caches for different use cases. + +```python +# Create different cache IDs in Redis Cloud for different LLM tasks +support_cache = LangCache( + server_url=server_url, + cache_id="support-cache-id", + api_key=api_key +) + +code_cache = LangCache( + server_url=server_url, + cache_id="code-cache-id", + api_key=api_key +) +``` + +Incorrect: Using a single cache for all LLM tasks. + +```python +# All tasks share one cache - responses may not be relevant +result = lang_cache.search(prompt="How do I reset my password?") +# Could return a code snippet if someone asked a similar coding question +``` + +Best practices: +- Start with threshold 0.9, adjust based on your use case +- Use custom attributes to filter results within a single cache +- Monitor cache hit rates to evaluate effectiveness +- Use separate cache IDs for fundamentally different LLM tasks + +Reference: [LangCache Best Practices](https://redis.io/docs/latest/develop/ai/langcache/) diff --git a/plugins/redis-development/skills/redis-semantic-cache/references/langcache-usage.md b/plugins/redis-development/skills/redis-semantic-cache/references/langcache-usage.md new file mode 100644 index 00000000..945d7261 --- /dev/null +++ b/plugins/redis-development/skills/redis-semantic-cache/references/langcache-usage.md @@ -0,0 +1,76 @@ +# Use LangCache for LLM Response Caching + +> Note: LangCache is currently in preview on Redis Cloud. Features and behavior may change. + +LangCache is a fully-managed semantic caching service on Redis Cloud that reduces LLM costs and latency. + +How it works: +1. Your app sends a prompt to LangCache via `POST /v1/caches/{cacheId}/entries/search` +2. LangCache generates an embedding and searches for similar cached responses +3. If found (cache hit), returns the cached response instantly +4. If not found (cache miss), your app calls the LLM and stores the response + +Correct: Use the LangCache Python SDK. + +```python +from langcache import LangCache +import os + +lang_cache = LangCache( + server_url=f"https://{os.getenv('HOST')}", + cache_id=os.getenv("CACHE_ID"), + api_key=os.getenv("API_KEY") +) + +# Search for cached response +result = lang_cache.search( + prompt="What is Redis?", + similarity_threshold=0.9 +) + +if result: + response = result[0]["response"] +else: + response = llm.generate("What is Redis?") + # Store for future queries + lang_cache.set( + prompt="What is Redis?", + response=response + ) +``` + +LangCache REST API: + +```bash +# Search cache +curl -X POST "https://$HOST/v1/caches/$CACHE_ID/entries/search" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"prompt": "What is Redis?"}' + +# Store a response +curl -X POST "https://$HOST/v1/caches/$CACHE_ID/entries" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"prompt": "What is Redis?", "response": "Redis is an in-memory database..."}' +``` + +With custom attributes for filtering: + +```python +# Store with attributes +lang_cache.set( + prompt="What is Redis?", + response="Redis is an in-memory database...", + attributes={"category": "database", "version": "v1"} +) + +# Search with attribute filter +result = lang_cache.search( + prompt="Tell me about Redis", + attributes={"category": "database"}, + similarity_threshold=0.9 +) +``` + +Reference: [LangCache Documentation](https://redis.io/docs/latest/develop/ai/langcache/) diff --git a/plugins/redis-development/skills/redis-vector-search/SKILL.md b/plugins/redis-development/skills/redis-vector-search/SKILL.md new file mode 100644 index 00000000..6d02f9b7 --- /dev/null +++ b/plugins/redis-development/skills/redis-vector-search/SKILL.md @@ -0,0 +1,140 @@ +--- +name: redis-vector-search +description: Redis vector search guidance covering HNSW vs FLAT algorithm choice, vector index configuration (dims, distance metric, datatype), filtered hybrid search combining vector similarity with TAG or NUMERIC filters, and the RAG retrieval pattern with RedisVL. Use when defining a VECTOR field in FT.CREATE, integrating embeddings (OpenAI, Cohere, sentence-transformers), tuning HNSW parameters (M, EF_CONSTRUCTION, EF_RUNTIME), building a retrieval-augmented generation pipeline, or filtering vector results by attribute. +license: MIT +metadata: + author: Redis, Inc. + version: "0.1.0" +--- + +# Redis Vector Search + +Guidance for storing and searching embeddings in Redis. Covers index configuration, algorithm selection, hybrid filtering, and the RAG retrieval pattern with RedisVL. + +## When to apply + +- Defining a `VECTOR` field in `FT.CREATE` (raw RQE) or a RedisVL `IndexSchema`. +- Choosing HNSW vs FLAT and tuning HNSW parameters. +- Adding category, date, or tenant filters to a vector query. +- Building a retrieval-augmented generation (RAG) pipeline on top of Redis. + +This skill builds on the `redis-query-engine` skill - vector fields live inside RQE indexes and share the same `FT.CREATE` / `FT.SEARCH` machinery. + +## 1. Configure the vector index properly + +Three settings must match the embedding model: + +- `DIM` - the model's output dimensionality (e.g. 1536 for OpenAI `text-embedding-3-small`). A mismatch produces silent garbage. +- `DISTANCE_METRIC` - `COSINE` for normalized text embeddings (the common case), `IP` for unnormalized inner-product, `L2` for raw Euclidean. +- `TYPE` / `datatype` - usually `FLOAT32`. Use `FLOAT16` or quantized variants only when memory cost is a hard constraint. + +Raw RQE: + +``` +FT.CREATE idx:docs ON HASH PREFIX 1 doc: + SCHEMA + content TEXT + embedding VECTOR HNSW 6 + TYPE FLOAT32 + DIM 1536 + DISTANCE_METRIC COSINE +``` + +RedisVL: + +```python +schema = IndexSchema.from_dict({ + "index": {"name": "idx:docs", "prefix": "doc:"}, + "fields": [ + {"name": "content", "type": "text"}, + {"name": "embedding", "type": "vector", "attrs": { + "dims": 1536, "algorithm": "HNSW", + "datatype": "FLOAT32", "distance_metric": "COSINE", + }}, + ] +}) +``` + +See [references/index-creation.md](references/index-creation.md) for redis-py and RedisVL variants. + +## 2. HNSW vs FLAT + +| Algorithm | Speed | Accuracy | Memory | Best for | +|---|---|---|---|---| +| HNSW | Fast (approximate) | ~95%+ recall (tunable) | Higher | Large datasets (>10k vectors), latency-sensitive | +| FLAT | Slow (exact) | 100% | Lower | Small datasets (<10k), accuracy-critical | + +Default to HNSW for any production-scale workload. Tuning levers: + +- `M` - connections per node (16-64). Higher = better recall, more memory. +- `EF_CONSTRUCTION` - build-time graph quality (100-500). Higher = better index, slower build. +- `EF_RUNTIME` - query-time candidate-list size. Higher = better recall, slower queries. + +Use FLAT when the corpus is small and you need exact results (e.g. semantic dedup over a few thousand items). + +See [references/algorithm-choice.md](references/algorithm-choice.md). + +## 3. Hybrid search - filter before vector + +Apply attribute filters (TAG / NUMERIC) so the engine narrows the search space *before* the vector comparison. Don't fetch a wide result set and then filter client-side - that's slower and less accurate. + +```python +from redisvl.query import VectorQuery +from redisvl.query.filter import Num, Tag + +filters = (Tag("category") == "technology") & (Num("date") >= 2024) + +query = VectorQuery( + vector=query_embedding, + vector_field_name="embedding", + return_fields=["content", "category", "date"], + num_results=10, + filter_expression=filters, +) +results = index.query(query) +``` + +For text + vector fusion (BM25-weighted text scoring combined with vector similarity), use `HybridQuery` on Redis >= 8.4 with redis-py >= 7.1, or `AggregateHybridQuery` on older Redis. That's a different "hybrid" from filtered vector search above. + +See [references/hybrid-search.md](references/hybrid-search.md). + +## 4. RAG pattern + +Standard pipeline: embed the user query -> vector search Redis -> pass top-K context to the LLM. + +```python +# Index documents with embeddings +records = [{"content": doc.content, + "embedding": embed_model.encode(doc.content).tolist(), + "source": doc.source} + for doc in documents] +index.load(records) + +# Retrieve relevant context for a user question +q_emb = embed_model.encode(user_question) +results = index.query(VectorQuery( + vector=q_emb, + vector_field_name="embedding", + return_fields=["content", "source"], + num_results=5, +)) + +# Generate with retrieved context +context = "\n".join(r["content"] for r in results) +response = llm.generate(f"Context: {context}\n\nQuestion: {user_question}") +``` + +Practical tips: + +- Match metric to model. Most modern text embedding models pair best with `COSINE`. +- Chunk long documents before indexing - retrieval over 200-500-token chunks usually beats indexing whole pages. +- Batch inserts with `index.load([...])` instead of one call per record. +- Pre-filter with attributes (tenant, recency, document type) before the vector search. + +See [references/rag-pattern.md](references/rag-pattern.md). + +## References + +- [Redis: Vectors](https://redis.io/docs/latest/develop/ai/search-and-query/vectors/) +- [Redis: RAG quickstart](https://redis.io/docs/latest/develop/get-started/rag/) +- [RedisVL documentation](https://docs.redisvl.com/en/latest/) diff --git a/plugins/redis-development/skills/redis-vector-search/references/algorithm-choice.md b/plugins/redis-development/skills/redis-vector-search/references/algorithm-choice.md new file mode 100644 index 00000000..78eec811 --- /dev/null +++ b/plugins/redis-development/skills/redis-vector-search/references/algorithm-choice.md @@ -0,0 +1,52 @@ +# Choose HNSW vs FLAT Based on Requirements + +Select the right algorithm based on your accuracy requirements and dataset size. + +| Algorithm | Speed | Accuracy | Memory | Best For | +|-----------|-------|----------|--------|----------| +| HNSW | Fast (approximate) | ~95%+ recall tunable | Higher | Large datasets (>10k vectors) | +| FLAT | Slower (exact) | 100% (exact) | Lower | Small datasets, accuracy-critical | + +Correct: Use HNSW for large-scale production workloads. + +```python +from redisvl.schema import IndexSchema + +# HNSW - fast approximate search, tunable accuracy +schema = IndexSchema.from_dict({ + "index": {"name": "idx:docs", "prefix": "doc:"}, + "fields": [ + {"name": "embedding", "type": "vector", "attrs": { + "dims": 1536, + "algorithm": "HNSW", + "distance_metric": "COSINE", + "datatype": "FLOAT32", + "m": 16, # Higher = more accurate, more memory + "ef_construction": 200 # Higher = better index quality, slower build + }} + ] +}) +``` + +Correct: Use FLAT when exact results are required. + +```python +# FLAT - exact brute-force search, guaranteed accuracy +schema = IndexSchema.from_dict({ + "index": {"name": "idx:small", "prefix": "small:"}, + "fields": [ + {"name": "embedding", "type": "vector", "attrs": { + "dims": 1536, + "algorithm": "FLAT", + "distance_metric": "COSINE" + }} + ] +}) +``` + +Tuning HNSW accuracy vs speed: +- `M`: Connections per node (16-64). Higher = better recall, more memory +- `EF_CONSTRUCTION`: Build-time parameter (100-500). Higher = better graph quality +- `EF_RUNTIME`: Query-time parameter. Higher = better recall, slower queries + +Reference: [Redis Vector Search](https://redis.io/docs/latest/develop/ai/search-and-query/vectors/) diff --git a/plugins/redis-development/skills/redis-vector-search/references/hybrid-search.md b/plugins/redis-development/skills/redis-vector-search/references/hybrid-search.md new file mode 100644 index 00000000..b9960203 --- /dev/null +++ b/plugins/redis-development/skills/redis-vector-search/references/hybrid-search.md @@ -0,0 +1,43 @@ +# Use Hybrid Search for Better Results + +Combine vector similarity with attribute filtering for more relevant results. In this rule, "hybrid" means filtered vector search. Redis and RedisVL also use "hybrid search" for text + vector fusion via `FT.HYBRID` / `HybridQuery`. + +Correct: Apply filters to reduce search space. + +```python +from redisvl.query import VectorQuery +from redisvl.query.filter import Num, Tag + +filters = (Tag("category") == "technology") & (Num("date") >= 2024) & (Num("date") <= 2025) + +query = VectorQuery( + vector=query_embedding, + vector_field_name="embedding", + return_fields=["content", "category", "date"], + num_results=10, + filter_expression=filters +) + +results = index.query(query) +``` + +Incorrect: Searching entire vector space when filters apply. + +```python +# Bad: No filter - searches all vectors then filters client-side +results = index.query(VectorQuery( + vector=query_embedding, + vector_field_name="embedding", + num_results=1000 +)) +# Client-side filtering - wasteful +filtered = [r for r in results if r["category"] == "technology"] +``` + +Tips: +- Use TAG fields for category filters +- Use NUMERIC fields for date/price ranges +- Redis auto-selects the filtered vector execution strategy; tune `hybrid_policy` only when needed +- For true text + vector fusion, use `HybridQuery` on Redis >= 8.4.0 with redis-py >= 7.1.0; use `AggregateHybridQuery` on earlier Redis versions + +Reference: [Redis Vector Search](https://redis.io/docs/latest/develop/ai/search-and-query/vectors/) diff --git a/plugins/redis-development/skills/redis-vector-search/references/index-creation.md b/plugins/redis-development/skills/redis-vector-search/references/index-creation.md new file mode 100644 index 00000000..8b73905e --- /dev/null +++ b/plugins/redis-development/skills/redis-vector-search/references/index-creation.md @@ -0,0 +1,76 @@ +# Configure Vector Indexes Properly + +Set the correct dimensions, algorithm, and distance metric for your embeddings. Vector indexes can be created via CLI, Redis Insight, or any client library. + +Correct: Create index via Redis CLI or Insight. + +``` +FT.CREATE idx:docs ON HASH PREFIX 1 doc: + SCHEMA + content TEXT + embedding VECTOR HNSW 6 + TYPE FLOAT32 + DIM 1536 + DISTANCE_METRIC COSINE +``` + +Correct: Create index via Python (redis-py). + +```python +from redis import Redis +from redis.commands.search.field import TextField, VectorField +from redis.commands.search.index_definition import IndexDefinition + +r = Redis() + +# Define schema with vector field +schema = [ + TextField("content"), + VectorField( + "embedding", + algorithm="HNSW", + attributes={ + "TYPE": "FLOAT32", + "DIM": 1536, # Must match your embedding model + "DISTANCE_METRIC": "COSINE" + } + ) +] + +r.ft("idx:docs").create_index(schema, definition=IndexDefinition(prefix=["doc:"])) +``` + +Correct: Create index via RedisVL. + +```python +from redisvl.index import SearchIndex +from redisvl.schema import IndexSchema + +schema = IndexSchema.from_dict({ + "index": {"name": "idx:docs", "prefix": "doc:"}, + "fields": [ + {"name": "content", "type": "text"}, + {"name": "embedding", "type": "vector", "attrs": { + "dims": 1536, + "algorithm": "HNSW", + "datatype": "FLOAT32", + "distance_metric": "COSINE" + }} + ] +}) + +index = SearchIndex(schema) +index.create(overwrite=True) +``` + +Incorrect: Mismatched dimensions or wrong distance metric. + +```python +# Bad: Wrong dimensions for your model +{"dims": 768} # But your selected embedding model outputs a different size + +# Bad: Wrong metric for normalized embeddings +{"distance_metric": "L2"} # When embeddings are normalized for COSINE +``` + +Reference: [Redis Vector Search](https://redis.io/docs/latest/develop/ai/search-and-query/vectors/) diff --git a/plugins/redis-development/skills/redis-vector-search/references/rag-pattern.md b/plugins/redis-development/skills/redis-vector-search/references/rag-pattern.md new file mode 100644 index 00000000..e8709698 --- /dev/null +++ b/plugins/redis-development/skills/redis-vector-search/references/rag-pattern.md @@ -0,0 +1,43 @@ +# Implement RAG Pattern Correctly + +Store documents with embeddings, retrieve relevant context, and pass to LLM. + +Correct: Full RAG pipeline with RedisVL. + +```python +from redisvl.index import SearchIndex +from redisvl.query import VectorQuery + +# 1. Store documents with embeddings +records = [] +for doc in documents: + records.append({ + "content": doc["content"], + "embedding": embed_model.encode(doc["content"]).tolist(), + "source": doc["source"] + }) + +index.load(records) + +# 2. Query with vector similarity +query_embedding = embed_model.encode(user_question) +results = index.query(VectorQuery( + vector=query_embedding, + vector_field_name="embedding", + return_fields=["content", "source"], + num_results=5 +)) + +# 3. Pass context to LLM +context = "\n".join([r["content"] for r in results]) +response = llm.generate(f"Context: {context}\n\nQuestion: {user_question}") +``` + +Best practices: +- Match your distance metric to your embedding model; many modern text embeddings already work well with COSINE +- Batch inserts using `index.load()` with lists +- Set appropriate M and EF_CONSTRUCTION for HNSW based on dataset size +- Use filters to reduce the search space before vector comparison +- Consider chunking long documents for better retrieval + +Reference: [Redis RAG Quickstart](https://redis.io/docs/latest/develop/get-started/rag/)