diff --git a/package-lock.json b/package-lock.json
index d1b3279..249a0ee 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1396,7 +1396,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1410,7 +1409,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1424,7 +1422,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1438,7 +1435,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1452,7 +1448,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1466,7 +1461,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1480,7 +1474,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1494,7 +1487,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1508,7 +1500,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1522,7 +1513,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1536,7 +1526,6 @@
       "cpu": [
         "loong64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1550,7 +1539,6 @@
       "cpu": [
         "loong64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1564,7 +1552,6 @@
       "cpu": [
         "ppc64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1578,7 +1565,6 @@
       "cpu": [
         "ppc64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1592,7 +1578,6 @@
       "cpu": [
         "riscv64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1606,7 +1591,6 @@
       "cpu": [
         "riscv64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1620,7 +1604,6 @@
       "cpu": [
         "s390x"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1634,7 +1617,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1648,7 +1630,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1662,7 +1643,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1676,7 +1656,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1690,7 +1669,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1704,7 +1682,6 @@
       "cpu": [
         "ia32"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1718,7 +1695,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1732,7 +1708,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1879,7 +1854,7 @@
       "version": "1.0.8",
       "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
       "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/@types/json-schema": {
@@ -1903,7 +1878,6 @@
       "version": "25.5.0",
       "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.0.tgz",
       "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==",
-      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "undici-types": "~7.18.0"
@@ -3776,7 +3750,6 @@
       "version": "2.3.3",
       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
       "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
-      "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
       "optional": true,
@@ -4704,6 +4677,18 @@
         "node": ">=8.6"
       }
     },
+    "node_modules/micromatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
     "node_modules/mimic-response": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
@@ -18245,12 +18230,13 @@
       "license": "ISC"
     },
     "node_modules/picomatch": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
-      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
+      "dev": true,
       "license": "MIT",
       "engines": {
-        "node": ">=8.6"
+        "node": ">=12"
       },
       "funding": {
         "url": "https://github.com/sponsors/jonschlinkert"
@@ -18673,7 +18659,7 @@
       "version": "4.60.1",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.1.tgz",
       "integrity": "sha512-VmtB2rFU/GroZ4oL8+ZqXgSA38O6GR8KSIvWmEFv63pQ0G6KaBH9s07PO8XTXP4vI+3UJUEypOfjkGfmSBBR0w==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "@types/estree": "1.0.8"
@@ -19230,19 +19216,6 @@
         "url": "https://github.com/sponsors/SuperchupuDev"
       }
     },
-    "node_modules/tinyglobby/node_modules/picomatch": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
-      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/jonschlinkert"
-      }
-    },
     "node_modules/tinyrainbow": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-3.1.0.tgz",
@@ -19490,7 +19463,7 @@
       "version": "5.9.3",
       "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
-      "dev": true,
+      "devOptional": true,
       "license": "Apache-2.0",
       "bin": {
         "tsc": "bin/tsc",
@@ -19544,7 +19517,6 @@
       "version": "7.18.2",
       "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz",
       "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
-      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/uri-js": {
@@ -19648,19 +19620,6 @@
         }
       }
     },
-    "node_modules/vite/node_modules/picomatch": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
-      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/jonschlinkert"
-      }
-    },
     "node_modules/vitest": {
       "version": "4.1.2",
       "resolved": "https://registry.npmjs.org/vitest/-/vitest-4.1.2.tgz",
@@ -19743,19 +19702,6 @@
         }
       }
     },
-    "node_modules/vitest/node_modules/picomatch": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
-      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/jonschlinkert"
-      }
-    },
     "node_modules/web-tree-sitter": {
       "version": "0.22.6",
       "resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.22.6.tgz",
@@ -20011,6 +19957,7 @@
       "license": "Apache-2.0",
       "dependencies": {
         "better-sqlite3": "^12.6.2",
+        "cheerio": "^1.0.0-rc.12",
         "fast-glob": "^3.3.3",
         "openai": "^6.18.0"
       },
@@ -20026,7 +19973,7 @@
       }
     },
     "packages/toolpack-sdk": {
-      "version": "1.3.0",
+      "version": "1.2.0",
       "license": "Apache-2.0",
       "dependencies": {
         "@anthropic-ai/sdk": "^0.73.0",
diff --git a/packages/toolpack-knowledge/README.md b/packages/toolpack-knowledge/README.md
index 8acfd9d..bbf92fe 100644
--- a/packages/toolpack-knowledge/README.md
+++ b/packages/toolpack-knowledge/README.md
@@ -1,6 +1,6 @@
 # toolpack-knowledge
 
-RAG (Retrieval-Augmented Generation) package for Toolpack SDK.
+RAG (Retrieval-Augmented Generation) package for Toolpack SDK with advanced features for web crawling, API indexing, streaming ingestion, and hybrid search.
 
 ## Installation
 
@@ -54,6 +54,38 @@ const results = await kb.query('authentication setup', {
 });
 ```
 
+### Advanced Usage
+
+```typescript
+import { Knowledge, WebUrlSource, ApiDataSource, PersistentKnowledgeProvider, OllamaEmbedder } from '@toolpack-sdk/knowledge';
+
+// Web crawling + API indexing with hybrid search
+const kb = await Knowledge.create({
+  provider: new PersistentKnowledgeProvider({ namespace: 'advanced-docs' }),
+  sources: [
+    new WebUrlSource(['https://docs.example.com'], {
+      maxDepth: 2,
+      delayMs: 1000,
+    }),
+    new ApiDataSource('https://api.example.com/docs', {
+      pagination: { param: 'page', start: 1, maxPages: 5 },
+      contentExtractor: (doc) => `${doc.title}\n\n${doc.content}`,
+    }),
+  ],
+  embedder: new OllamaEmbedder({ model: 'nomic-embed-text' }),
+  streamingBatchSize: 50,  // Efficient processing of large datasets
+  description: 'Comprehensive documentation from web and API sources.',
+});
+
+// Hybrid search combining semantic and keyword matching
+const results = await kb.query('authentication setup', {
+  searchType: 'hybrid',
+  semanticWeight: 0.6,  // 60% semantic, 40% keyword
+  limit: 10,
+  threshold: 0.7,
+});
+```
+
 ### Agent Integration
 
 ```typescript
@@ -75,6 +107,126 @@ const toolpack = await Toolpack.init({
 const response = await toolpack.chat('How do I configure authentication?');
 ```
 
+## Advanced Features
+
+### Web URL Sources
+
+Crawl and index websites with automatic HTML parsing and link following.
+
+```typescript
+import { WebUrlSource } from '@toolpack-sdk/knowledge';
+
+const webSource = new WebUrlSource(['https://docs.example.com'], {
+  maxDepth: 3,                    // Follow links up to 3 levels deep
+  delayMs: 1000,                  // Respectful crawling delay
+  userAgent: 'MyApp/1.0',         // Custom user agent
+  maxChunkSize: 1500,             // Chunk size for web content
+  timeoutMs: 30000,               // Request timeout
+});
+
+const kb = await Knowledge.create({
+  provider: new MemoryProvider(),
+  sources: [webSource],
+  embedder: new OllamaEmbedder({ model: 'nomic-embed-text' }),
+  description: 'Web documentation and guides.',
+});
+```
+
+**Features:**
+- Recursive website crawling with depth control
+- Automatic HTML text extraction (removes scripts/styles)
+- Link discovery and following
+- Respectful crawling with configurable delays
+- Metadata includes title, URL, and source type
+
+### API Data Sources
+
+Index data from REST APIs with pagination support.
+
+```typescript
+import { ApiDataSource } from '@toolpack-sdk/knowledge';
+
+const apiSource = new ApiDataSource('https://api.github.com/repos/toolpack-ai/toolpack-sdk/issues', {
+  headers: {
+    'Authorization': `Bearer ${process.env.GITHUB_TOKEN}`,
+    'Accept': 'application/vnd.github.v3+json',
+  },
+  pagination: {
+    param: 'page',
+    start: 1,
+    maxPages: 5,
+  },
+  dataPath: '',  // Root level array
+  contentExtractor: (issue: any) => `${issue.title}\n\n${issue.body}`,
+  metadataExtractor: (issue: any) => ({
+    id: issue.id,
+    state: issue.state,
+    labels: issue.labels?.map(l => l.name),
+  }),
+});
+
+const kb = await Knowledge.create({
+  provider: new PersistentKnowledgeProvider({ namespace: 'github-issues' }),
+  sources: [apiSource],
+  embedder: new OpenAIEmbedder({ model: 'text-embedding-3-small' }),
+  description: 'GitHub issues and discussions.',
+});
+```
+
+**Features:**
+- REST API data ingestion (GET/POST)
+- Automatic pagination handling
+- Custom content and metadata extractors
+- JSON path support for nested data
+- Flexible data transformation
+
+### Streaming Ingestion
+
+Process large datasets efficiently with batch processing.
+
+```typescript
+const kb = await Knowledge.create({
+  provider: new PersistentKnowledgeProvider({ namespace: 'large-dataset' }),
+  sources: [new ApiDataSource('https://api.example.com/large-dataset')],
+  embedder: new OllamaEmbedder({ model: 'nomic-embed-text' }),
+  streamingBatchSize: 50,  // Process 50 chunks at a time
+  description: 'Large dataset with streaming ingestion.',
+  onEmbeddingProgress: (event) => {
+    console.log(`Processed: ${event.current}/${event.total} chunks`);
+  },
+});
+```
+
+### Hybrid Search
+
+Combine semantic and keyword search for better results.
+
+```typescript
+// Semantic search (default)
+const semanticResults = await kb.query('machine learning algorithms', {
+  searchType: 'semantic',
+  limit: 5,
+});
+
+// Keyword search
+const keywordResults = await kb.query('machine learning algorithms', {
+  searchType: 'keyword',
+  limit: 5,
+});
+
+// Hybrid search (recommended)
+const hybridResults = await kb.query('machine learning algorithms', {
+  searchType: 'hybrid',
+  semanticWeight: 0.7,  // 70% semantic, 30% keyword
+  limit: 5,
+});
+```
+
+**Search Types:**
+- `semantic` — Vector similarity search (default)
+- `keyword` — Text matching search
+- `hybrid` — Combined semantic + keyword search
+
 ## Providers
 
 ### MemoryProvider
@@ -121,6 +273,70 @@ new MarkdownSource('./docs/**/*.md', {
 - Code block detection (`hasCode` metadata)
 - Deterministic chunk IDs
 
+### WebUrlSource
+
+Crawl and index web pages with HTML parsing.
+
+```typescript
+new WebUrlSource(['https://example.com', 'https://docs.example.com'], {
+  maxDepth: 2,                    // Crawl depth (default: 1)
+  delayMs: 1000,                  // Delay between requests (default: 1000ms)
+  userAgent: 'MyApp/1.0',         // Custom user agent
+  maxChunkSize: 2000,             // Max tokens per chunk
+  chunkOverlap: 200,              // Overlap between chunks
+  timeoutMs: 30000,               // Request timeout (default: 30000ms)
+  namespace: 'web',               // Chunk ID prefix
+  metadata: { source: 'web' },    // Added to all chunks
+})
+```
+
+**Features:**
+- Recursive website crawling
+- Automatic HTML text extraction
+- Link discovery and following
+- Respectful crawling with delays
+- Error handling for failed requests
+
+### ApiDataSource
+
+Index data from REST APIs with pagination.
+
+```typescript
+new ApiDataSource('https://api.example.com/data', {
+  method: 'GET',                  // HTTP method (default: 'GET')
+  headers: {                      // Request headers
+    'Authorization': 'Bearer token',
+    'Content-Type': 'application/json',
+  },
+  body: JSON.stringify({}),       // Request body for POST
+  pagination: {                   // Pagination config
+    param: 'page',                // Query param name
+    start: 1,                     // Starting page number
+    step: 1,                      // Page increment
+    maxPages: 10,                 // Max pages to fetch
+  },
+  dataPath: 'data.items',         // JSON path to data array
+  contentExtractor: (item) =>     // Custom content extraction
+    `${item.title}\n\n${item.description}`,
+  metadataExtractor: (item) => ({ // Custom metadata extraction
+    id: item.id,
+    category: item.category,
+  }),
+  maxChunkSize: 2000,             // Max tokens per chunk
+  chunkOverlap: 200,              // Overlap between chunks
+  timeoutMs: 30000,               // Request timeout
+  namespace: 'api',               // Chunk ID prefix
+  metadata: { source: 'api' },    // Added to all chunks
+})
+```
+
+**Features:**
+- REST API data ingestion
+- Automatic pagination handling
+- Custom data extractors
+- JSON path support
+- Flexible content transformation
+
 ## Embedders
 
 ### OllamaEmbedder
@@ -159,6 +375,7 @@ interface KnowledgeOptions {
   embedder: Embedder;
   description: string;                        // Required: used as tool description
   reSync?: boolean;                           // default: true
+  streamingBatchSize?: number;                // Process chunks in batches (default: 100)
   onError?: (error, context) => 'skip' | 'abort';
   onSync?: (event: SyncEvent) => void;
   onEmbeddingProgress?: (event: EmbeddingProgressEvent) => void;
@@ -171,6 +388,8 @@ interface KnowledgeOptions {
 await kb.query('search query', {
   limit: 10,              // Max results
   threshold: 0.7,         // Similarity threshold (0-1)
+  searchType: 'hybrid',   // 'semantic' | 'keyword' | 'hybrid' (default: 'semantic')
+  semanticWeight: 0.7,    // Weight for semantic vs keyword in hybrid search (0-1)
   filter: {               // Metadata filters
     hasCode: true,
     category: { $in: ['api', 'guide'] },
@@ -180,6 +399,20 @@ await kb.query('search query', {
 });
 ```
 
+### Utility Functions
+
+```typescript
+import { keywordSearch, combineScores } from '@toolpack-sdk/knowledge';
+
+// Manual keyword search
+const score = keywordSearch('document content', 'search query');
+// Returns: number between 0-1
+
+// Combine semantic and keyword scores
+const combinedScore = combineScores(semanticScore, keywordScore, 0.7);
+// Returns: weighted combination
+```
+
 ### Metadata Filters
 
 ```typescript
diff --git a/packages/toolpack-knowledge/examples/advanced-features.ts b/packages/toolpack-knowledge/examples/advanced-features.ts
new file mode 100644
index 0000000..22c026a
--- /dev/null
+++ b/packages/toolpack-knowledge/examples/advanced-features.ts
@@ -0,0 +1,97 @@
+import {
+  Knowledge,
+  MemoryProvider,
+  WebUrlSource,
+  ApiDataSource,
+  MarkdownSource,
+  OllamaEmbedder
+} from '../src/index.js';
+
+async function main() {
+  console.log('Creating advanced knowledge base...');
+
+  const kb = await Knowledge.create({
+    provider: new MemoryProvider(),
+    sources: [
+      // Web URL source - crawl websites
+      new WebUrlSource(['https://example.com', 'https://httpbin.org'], {
+        maxDepth: 2,
+        delayMs: 1000, // Be respectful to servers
+        maxChunkSize: 1500,
+      }),
+
+      // API data source - index REST API data
+      new ApiDataSource('https://jsonplaceholder.typicode.com/posts', {
+        dataPath: '', // Root level array
+        contentExtractor: (item: any) => `${item.title}\n\n${item.body}`,
+        metadataExtractor: (item: any) => ({
+          id: item.id,
+          userId: item.userId,
+        }),
+      }),
+
+      // Traditional markdown source
+      new MarkdownSource('./docs/**/*.md'),
+    ],
+    embedder: new OllamaEmbedder({ model: 'nomic-embed-text' }),
+    description: 'Advanced knowledge base with web crawling, API indexing, and hybrid search',
+    streamingBatchSize: 50, // Process in batches for large datasets
+    onSync: (event) => {
+      if (event.type === 'start') {
+        console.log('Starting sync...');
+      } else if (event.type === 'complete') {
+        console.log(`Sync complete! Indexed ${event.chunksAffected} chunks`);
+      }
+    },
+    onEmbeddingProgress: (event) => {
+      console.log(`Embedding progress: ${event.percent}% (${event.current}/${event.total})`);
+    },
+  });
+
+  console.log('\n=== Semantic Search ===');
+  const semanticResults = await kb.query('web development technologies', {
+    limit: 3,
+    searchType: 'semantic',
+  });
+
+  console.log(`Found ${semanticResults.length} semantic results:`);
+  for (const result of semanticResults) {
+    console.log(`Score: ${result.score.toFixed(3)}`);
+    console.log(`Content: ${result.chunk.content.substring(0, 100)}...`);
+    console.log(`Source: ${result.chunk.metadata.source}`);
+    console.log('---\n');
+  }
+
+  console.log('\n=== Keyword Search ===');
+  const keywordResults = await kb.query('web development', {
+    limit: 3,
+    searchType: 'keyword',
+  });
+
+  console.log(`Found ${keywordResults.length} keyword results:`);
+  for (const result of keywordResults) {
+    console.log(`Score: ${result.score.toFixed(3)}`);
+    console.log(`Content: ${result.chunk.content.substring(0, 100)}...`);
+    console.log(`Source: ${result.chunk.metadata.source}`);
+    console.log('---\n');
+  }
+
+  console.log('\n=== Hybrid Search ===');
+  const hybridResults = await kb.query('web development technologies', {
+    limit: 3,
+    searchType: 'hybrid',
+    semanticWeight: 0.6, // 60% semantic, 40% keyword
+  });
+
+  console.log(`Found ${hybridResults.length} hybrid results:`);
+  for (const result of hybridResults) {
+    console.log(`Score: ${result.score.toFixed(3)}`);
+    console.log(`Content: ${result.chunk.content.substring(0, 100)}...`);
+    console.log(`Source: ${result.chunk.metadata.source}`);
+    console.log('---\n');
+  }
+
+  await kb.stop();
+}
+
+main().catch(console.error);
\ No newline at end of file
diff --git a/packages/toolpack-knowledge/package.json b/packages/toolpack-knowledge/package.json
index 1a497d9..aabcd10 100644
--- a/packages/toolpack-knowledge/package.json
+++ b/packages/toolpack-knowledge/package.json
@@ -31,7 +31,11 @@
     "embeddings",
     "vector-search",
     "knowledge-base",
-    "sdk"
+    "sdk",
+    "web-crawling",
+    "api-indexing",
+    "hybrid-search",
+    "streaming-ingestion"
   ],
   "engines": {
     "node": ">=20"
@@ -48,6 +52,7 @@
   },
   "dependencies": {
     "better-sqlite3": "^12.6.2",
+    "cheerio": "^1.0.0-rc.12",
     "fast-glob": "^3.3.3",
     "openai": "^6.18.0"
   },
diff --git a/packages/toolpack-knowledge/src/__tests__/api-source.test.ts b/packages/toolpack-knowledge/src/__tests__/api-source.test.ts
new file mode 100644
index 0000000..5117a0b
--- /dev/null
+++ b/packages/toolpack-knowledge/src/__tests__/api-source.test.ts
@@ -0,0 +1,87 @@
+import { describe, it, expect, vi } from 'vitest';
+import { ApiDataSource } from '../sources/api.js';
+
+// Mock fetch globally
+global.fetch = vi.fn();
+
+describe('ApiDataSource', () => {
+  it('should fetch and chunk API data', async () => {
+    const mockData = {
+      data: [
+        {
+          id: 1,
+          title: 'First Item',
+          content: 'This is the content of the first item.',
+          author: 'Author 1',
+        },
+        {
+          id: 2,
+          title: 'Second Item',
+          content: 'This is the content of the second item.',
+          author: 'Author 2',
+        },
+      ],
+    };
+
+    (global.fetch as any).mockResolvedValueOnce({
+      ok: true,
+      json: () => Promise.resolve(mockData),
+    });
+
+    const source = new ApiDataSource('https://api.example.com/data', {
+      dataPath: 'data',
+      contentExtractor: (item: any) => `${item.title}\n\n${item.content}`,
+    });
+
+    const chunks = [];
+    for await (const chunk of source.load()) {
+      chunks.push(chunk);
+    }
+
+    expect(chunks.length).toBe(2);
+    expect(chunks[0].content).toContain('First Item');
+    expect(chunks[0].content).toContain('content of the first item');
+    expect(chunks[0].metadata.id).toBe(1);
+    expect(chunks[0].metadata.author).toBe('Author 1');
+    expect(chunks[1].metadata.title).toBe('Second Item');
+  });
+
+  it('should handle pagination', async () => {
+    const mockPage1 = { data: [{ id: 1, content: 'Page 1 content' }] };
+    const mockPage2 = { data: [{ id: 2, content: 'Page 2 content' }] };
+    const mockPage3 = { data: [] }; // Empty page to stop pagination
+
+    (global.fetch as any)
+      .mockResolvedValueOnce({
+        ok: true,
+        json: () => Promise.resolve(mockPage1),
+      })
+      .mockResolvedValueOnce({
+        ok: true,
+        json: () => Promise.resolve(mockPage2),
+      })
+      .mockResolvedValueOnce({
+        ok: true,
+        json: () => Promise.resolve(mockPage3),
+      });
+
+    const source = new ApiDataSource('https://api.example.com/data', {
+      dataPath: 'data',
+      pagination: {
+        param: 'page',
+        start: 1,
+        step: 1,
+        maxPages: 10,
+      },
+    });
+
+    const chunks = [];
+    for await (const chunk of source.load()) {
+      chunks.push(chunk);
+    }
+
+    expect(chunks.length).toBe(2);
+    expect(chunks[0].metadata.id).toBe(1);
+    expect(chunks[1].metadata.id).toBe(2);
+  });
+});
\ No newline at end of file
diff --git a/packages/toolpack-knowledge/src/__tests__/keyword.test.ts b/packages/toolpack-knowledge/src/__tests__/keyword.test.ts
new file mode 100644
index 0000000..f0a115d
--- /dev/null
+++ b/packages/toolpack-knowledge/src/__tests__/keyword.test.ts
@@ -0,0 +1,46 @@
+import { describe, it, expect } from 'vitest';
+import { keywordSearch, combineScores } from '../../dist/index.js';
+
+describe('keywordSearch', () => {
+  it('should return 1.0 for exact matches', () => {
+    const text = 'This is a test document with some content.';
+    const query = 'test document';
+    expect(keywordSearch(text, query)).toBe(1.0);
+  });
+
+  it('should return partial scores for word matches', () => {
+    const text = 'This is a test document with some content.';
+    const query = 'test content extra';
+    const score = keywordSearch(text, query);
+    expect(score).toBeGreaterThan(0);
+    expect(score).toBeLessThan(1.0);
+  });
+
+  it('should return 0 for no matches', () => {
+    const text = 'This is a test document.';
+    const query = 'nonexistent';
+    expect(keywordSearch(text, query)).toBe(0);
+  });
+
+  it('should handle case insensitive matching', () => {
+    const text = 'This is a TEST document.';
+    const query = 'test';
+    expect(keywordSearch(text, query)).toBe(1.0);
+  });
+});
+
+describe('combineScores', () => {
+  it('should combine semantic and keyword scores', () => {
+    const semanticScore = 0.8;
+    const keywordScore = 0.6;
+    const combined = combineScores(semanticScore, keywordScore, 0.7);
+    expect(combined).toBe(0.8 * 0.7 + 0.6 * 0.3);
+  });
+
+  it('should handle equal weights', () => {
+    const semanticScore = 0.9;
+    const keywordScore = 0.5;
+    const combined = combineScores(semanticScore, keywordScore, 0.5);
+    expect(combined).toBe(0.7);
+  });
+});
\ No newline at end of file
diff --git a/packages/toolpack-knowledge/src/__tests__/web-url-source.test.ts b/packages/toolpack-knowledge/src/__tests__/web-url-source.test.ts
new file mode 100644
index 0000000..5569921
--- /dev/null
+++ b/packages/toolpack-knowledge/src/__tests__/web-url-source.test.ts
@@ -0,0 +1,60 @@
+import { describe, it, expect, vi } from 'vitest';
+import { WebUrlSource } from '../sources/web-url.js';
+
+// Mock fetch globally
+global.fetch = vi.fn();
+
+describe('WebUrlSource', () => {
+  it('should crawl and chunk web pages', async () => {
+    const mockHtml = `
+      <html>
+        <head><title>Test Page</title></head>
+        <body>
+          <h1>Main Title</h1>
+          <p>This is some content from a web page.</p>
+          <p>This is more content that should be extracted.</p>
+          <a href="/internal">Internal Link</a>
+          <a href="https://external.com">External Link</a>
+        </body>
+      </html>
+    `;
+
+    (global.fetch as any).mockResolvedValueOnce({
+      ok: true,
+      text: () => Promise.resolve(mockHtml),
+    });
+
+    const source = new WebUrlSource(['https://example.com'], {
+      maxDepth: 1,
+      delayMs: 0, // No delay for tests
+    });
+
+    const chunks = [];
+    for await (const chunk of source.load()) {
+      chunks.push(chunk);
+    }
+
+    expect(chunks.length).toBeGreaterThan(0);
+    expect(chunks[0].content).toContain('Main Title');
+    expect(chunks[0].content).toContain('content from a web page');    
+    expect(chunks[0].metadata.title).toBe('Test Page');
+    expect(chunks[0].metadata.url).toBe('https://example.com');
+    expect(chunks[0].metadata.source).toBe('web');
+  });
+
+  it('should handle fetch errors gracefully', async () => {
+    (global.fetch as any).mockRejectedValueOnce(new Error('Network error'));
+
+    const source = new WebUrlSource(['https://failing-url.com'], {
+      delayMs: 0,
+    });
+
+    const chunks = [];
+    for await (const chunk of source.load()) {
+      chunks.push(chunk);
+    }
+
+    // Should not throw, just skip the failing URL
+    expect(chunks.length).toBe(0);
+  });
+});
\ No newline at end of file
diff --git a/packages/toolpack-knowledge/src/index.ts b/packages/toolpack-knowledge/src/index.ts
index 020414a..13a0ef3 100644
--- a/packages/toolpack-knowledge/src/index.ts
+++ b/packages/toolpack-knowledge/src/index.ts
@@ -11,8 +11,17 @@ export type { PersistentKnowledgeProviderOptions } from './providers/persistent.
 export { MarkdownSource } from './sources/markdown.js';
 export type { MarkdownSourceOptions } from './sources/markdown.js';
 
+export { WebUrlSource } from './sources/web-url.js';
+export type { WebUrlSourceOptions } from './sources/web-url.js';
+
+export { ApiDataSource } from './sources/api.js';
+export type { ApiDataSourceOptions } from './sources/api.js';
+
 export { OllamaEmbedder } from './embedders/ollama.js';
 export type { OllamaEmbedderOptions } from './embedders/ollama.js';
 
 export { OpenAIEmbedder } from './embedders/openai.js';
 export type { OpenAIEmbedderOptions } from './embedders/openai.js';
+
+// Utility functions
+export { keywordSearch, combineScores } from './utils/keyword.js';
diff --git a/packages/toolpack-knowledge/src/interfaces.ts b/packages/toolpack-knowledge/src/interfaces.ts
index 5846f75..c7882c2 100644
--- a/packages/toolpack-knowledge/src/interfaces.ts
+++ b/packages/toolpack-knowledge/src/interfaces.ts
@@ -16,6 +16,8 @@ export interface QueryOptions {
   filter?: MetadataFilter;
   includeMetadata?: boolean;
   includeVectors?: boolean;
+  searchType?: 'semantic' | 'keyword' | 'hybrid';
+  semanticWeight?: number; // For hybrid search, weight of semantic vs keyword (0-1)
 }
 
 export interface MetadataFilter {
@@ -34,9 +36,11 @@ export interface QueryResult {
 export interface KnowledgeProvider {
   add(chunks: Chunk[]): Promise<void>;
   query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
+  keywordQuery?(query: string, options?: QueryOptions): Promise<QueryResult[]>;
   delete(ids: string[]): Promise<void>;
   clear(): Promise<void>;
   validateDimensions(dimensions: number): Promise<void>;
+  getAllChunks?(): Promise<Chunk[]>;
   close?(): void;
 }
 
diff --git a/packages/toolpack-knowledge/src/knowledge.ts b/packages/toolpack-knowledge/src/knowledge.ts
index 51a105e..13790bd 100644
--- a/packages/toolpack-knowledge/src/knowledge.ts
+++ b/packages/toolpack-knowledge/src/knowledge.ts
@@ -1,4 +1,6 @@
 import { KnowledgeProvider, KnowledgeSource, Embedder, QueryOptions, QueryResult, Chunk } from './interfaces.js';
+import { keywordSearch, combineScores } from './utils/keyword.js';
+import { matchesFilter } from './utils/cosine.js';
 
 export interface KnowledgeOptions {
   provider: KnowledgeProvider;
@@ -9,6 +11,7 @@ export interface KnowledgeOptions {
   onError?: ErrorHandler;
   onSync?: SyncEventHandler;
   onEmbeddingProgress?: EmbeddingProgressHandler;
+  streamingBatchSize?: number;
 }
 
 export type ErrorHandler = (
@@ -71,10 +74,134 @@ export class Knowledge {
   }
 
   async query(text: string, options?: QueryOptions): Promise<QueryResult[]> {
+    const searchType = options?.searchType ?? 'semantic';
+    const semanticWeight = options?.semanticWeight ?? 0.7;
+
+    if (searchType === 'keyword') {
+      return this.keywordQuery(text, options);
+    } else if (searchType === 'hybrid') {
+      const [semanticResults, keywordResults] = await Promise.all([
+        this.semanticQuery(text, options),
+        this.keywordQuery(text, options)
+      ]);
+
+      return this.combineHybridResults(semanticResults, keywordResults, semanticWeight, options);
+    } else {
+      return this.semanticQuery(text, options);
+    }
+  }
+
+  private async semanticQuery(text: string, options?: QueryOptions): Promise<QueryResult[]> {
     const vector = await this.embedder.embed(text);
     return this.provider.query(vector, options);
   }
 
+  private async keywordQuery(text: string, options?: QueryOptions): Promise<QueryResult[]> {
+    const {
+      limit = 10,
+      threshold = 0.1,
+      filter,
+      includeMetadata = true,
+      includeVectors = false,
+    } = options || {};
+
+    // Use provider's keywordQuery if available for better performance
+    if (typeof this.provider.keywordQuery === 'function') {
+      return this.provider.keywordQuery(text, options);
+    }
+
+    // Fallback: get all chunks and score them in memory
+    const allChunks = await this.getAllChunks();
+
+    const results: QueryResult[] = [];
+
+    for (const chunk of allChunks) {
+      if (filter && !matchesFilter(chunk.metadata, filter)) {
+        continue;
+      }
+
+      const score = keywordSearch(chunk.content, text);
+
+      if (score >= threshold) {
+        results.push({
+          chunk: {
+            id: chunk.id,
+            content: chunk.content,
+            metadata: includeMetadata ? chunk.metadata : {},
+            vector: includeVectors ? chunk.vector : undefined,
+          },
+          score,
+          distance: 1 - score,
+        });
+      }
+    }
+
+    results.sort((a, b) => b.score - a.score);
+    return results.slice(0, limit);
+  }
+
+  private combineHybridResults(
+    semanticResults: QueryResult[],
+    keywordResults: QueryResult[],
+    semanticWeight: number,
+    options?: QueryOptions
+  ): QueryResult[] {
+    const {
+      limit = 10,
+      threshold = 0.5,
+      includeMetadata = true,
+      includeVectors = false,
+    } = options || {};
+
+    // Create a map of chunk IDs to results for efficient lookup
+    const semanticMap = new Map(semanticResults.map(r => [r.chunk.id, r]));
+    const keywordMap = new Map(keywordResults.map(r => [r.chunk.id, r]));
+
+    const combinedResults: QueryResult[] = [];
+
+    // Combine results from both searches
+    const allIds = new Set([...semanticMap.keys(), ...keywordMap.keys()]);
+
+    for (const id of allIds) {
+      const semanticResult = semanticMap.get(id);
+      const keywordResult = keywordMap.get(id);
+
+      if (!semanticResult && !keywordResult) continue;
+
+      const semanticScore = semanticResult?.score ?? 0;
+      const keywordScore = keywordResult?.score ?? 0;
+      const combinedScore = combineScores(semanticScore, keywordScore, semanticWeight);
+
+      if (combinedScore >= threshold) {
+        combinedResults.push({
+          chunk: {
+            id: id,
+            content: semanticResult?.chunk.content ?? keywordResult!.chunk.content,
+            metadata: includeMetadata ? (semanticResult?.chunk.metadata ?? keywordResult!.chunk.metadata) : {},
+            vector: includeVectors ? (semanticResult?.chunk.vector ?? keywordResult!.chunk.vector) : undefined,
+          },
+          score: combinedScore,
+          distance: 1 - combinedScore,
+        });
+      }
+    }
+
+    combinedResults.sort((a, b) => b.score - a.score);
+    return combinedResults.slice(0, limit);
+  }
+
+  private async getAllChunks(): Promise<Chunk[]> {
+    if (typeof this.provider.getAllChunks === 'function') {
+      return this.provider.getAllChunks();
+    }
+
+    // Fallback: query with a dummy vector to get all chunks
+    // This won't work with all providers, but works with our current ones
+    const dummyVector = new Array(this.embedder.dimensions).fill(0);
+    return (await this.provider.query(dummyVector, { limit: 10000, threshold: 0 }))
+      .map(r => r.chunk);
+  }
+
   async sync(): Promise<void> {
     this.options.onSync?.({ type: 'start' });
 
@@ -83,21 +210,35 @@ export class Knowledge {
       await this.provider.clear();
       await this.provider.validateDimensions(dimensions);
 
-      const allChunks: Chunk[] = [];
-      
+      const batchSize = this.options.streamingBatchSize ?? 100;
+      let totalChunks = 0;
+      const chunkBuffer: Chunk[] = [];
+
       for (const source of this.sources) {
         for await (const chunk of source.load()) {
-          allChunks.push(chunk);
+          chunkBuffer.push(chunk);
+
+          if (chunkBuffer.length >= batchSize) {
+            const embeddedBatch = await this.embedChunks(chunkBuffer);
+            if (embeddedBatch.length > 0) {
+              await this.provider.add(embeddedBatch);
+              totalChunks += embeddedBatch.length;
+            }
+            chunkBuffer.length = 0; // Clear buffer
+          }
         }
       }
 
-      const embeddedChunks = await this.embedChunks(allChunks);
-
-      if (embeddedChunks.length > 0) {
-        await this.provider.add(embeddedChunks);
+      // Process remaining chunks
+      if (chunkBuffer.length > 0) {
+        const embeddedBatch = await this.embedChunks(chunkBuffer);
+        if (embeddedBatch.length > 0) {
+          await this.provider.add(embeddedBatch);
+          totalChunks += embeddedBatch.length;
+        }
       }
 
-      this.options.onSync?.({ type: 'complete', chunksAffected: embeddedChunks.length });
+      this.options.onSync?.({ type: 'complete', chunksAffected: totalChunks });
     } catch (error) {
       this.options.onSync?.({ type: 'error', error: error as Error });
       throw error;
diff --git a/packages/toolpack-knowledge/src/providers/memory.ts b/packages/toolpack-knowledge/src/providers/memory.ts
index d9b03c0..3e34f22 100644
--- a/packages/toolpack-knowledge/src/providers/memory.ts
+++ b/packages/toolpack-knowledge/src/providers/memory.ts
@@ -1,6 +1,7 @@
 import { KnowledgeProvider, Chunk, QueryOptions, QueryResult } from '../interfaces.js';
 import { DimensionMismatchError, KnowledgeProviderError } from '../errors.js';
 import { cosineSimilarity, matchesFilter } from '../utils/cosine.js';
+import { keywordSearch } from '../utils/keyword.js';
 
 export interface MemoryProviderOptions {
   maxChunks?: number;
@@ -77,6 +78,43 @@ export class MemoryProvider implements KnowledgeProvider {
     return results.slice(0, limit);
   }
 
+  async keywordQuery(query: string, options: QueryOptions = {}): Promise<QueryResult[]> {
+    const {
+      limit = 10,
+      threshold = 0.1,
+      filter,
+      includeMetadata = true,
+      includeVectors = false,
+    } = options;
+
+    const results: QueryResult[] = [];
+
+    for (const { chunk, vector } of this.chunks.values()) {
+      if (filter && !matchesFilter(chunk.metadata, filter)) {
+        continue;
+      }
+
+      const score = keywordSearch(chunk.content, query);
+
+      if (score >= threshold) {
+        results.push({
+          chunk: {
+            id: chunk.id,
+            content: chunk.content,
+            metadata: includeMetadata ? chunk.metadata : {},
+            vector: includeVectors ? vector : undefined,
+          },
+          score,
+          distance: 1 - score,
+        });
+      }
+    }
+
+    results.sort((a, b) => b.score - a.score);
+
+    return results.slice(0, limit);
+  }
+
   async delete(ids: string[]): Promise<void> {
     for (const id of ids) {
       this.chunks.delete(id);
@@ -87,4 +125,11 @@ export class MemoryProvider implements KnowledgeProvider {
     this.chunks.clear();
     this.dimensions = undefined;
   }
+
+  async getAllChunks(): Promise<Chunk[]> {
+    return Array.from(this.chunks.values()).map(({ chunk, vector }) => ({
+      ...chunk,
+      vector,
+    }));
+  }
 }
diff --git a/packages/toolpack-knowledge/src/providers/persistent.ts b/packages/toolpack-knowledge/src/providers/persistent.ts
index e4127db..708a33a 100644
--- a/packages/toolpack-knowledge/src/providers/persistent.ts
+++ b/packages/toolpack-knowledge/src/providers/persistent.ts
@@ -5,6 +5,7 @@ import * as os from 'os';
 import { KnowledgeProvider, Chunk, QueryOptions, QueryResult } from '../interfaces.js';
 import { DimensionMismatchError, KnowledgeProviderError } from '../errors.js';
 import { cosineSimilarity, matchesFilter } from '../utils/cosine.js';
+import { keywordSearch } from '../utils/keyword.js';
 
 export interface PersistentKnowledgeProviderOptions {
   namespace: string;
@@ -40,10 +41,29 @@ export class PersistentKnowledgeProvider implements KnowledgeProvider {
         synced_at INTEGER NOT NULL
       );
 
+      CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
+        id, content, metadata
+      );
+
       CREATE TABLE IF NOT EXISTS provider_meta (
         key   TEXT PRIMARY KEY,
         value TEXT NOT NULL
       );
+
+      CREATE TRIGGER IF NOT EXISTS chunks_fts_insert AFTER INSERT ON chunks
+      BEGIN
+        INSERT INTO chunks_fts (id, content, metadata) VALUES (new.id, new.content, new.metadata);
+      END;
+
+      CREATE TRIGGER IF NOT EXISTS chunks_fts_delete AFTER DELETE ON chunks
+      BEGIN
+        DELETE FROM chunks_fts WHERE id = old.id;
+      END;
+
+      CREATE TRIGGER IF NOT EXISTS chunks_fts_update AFTER UPDATE ON chunks
+      BEGIN
+        UPDATE chunks_fts SET content = new.content, metadata = new.metadata WHERE id = new.id;
+      END;
     `);
   }
 
@@ -139,6 +159,65 @@ export class PersistentKnowledgeProvider implements KnowledgeProvider {
     return results.slice(0, limit);
   }
 
+  async keywordQuery(query: string, options: QueryOptions = {}): Promise<QueryResult[]> {
+    const {
+      limit = 10,
+      threshold = 0.1,
+      filter,
+      includeMetadata = true,
+      includeVectors = false,
+    } = options;
+
+    // Use FTS for efficient keyword search
+    const ftsQuery = query.split(/\s+/).map(term => `"${term}"`).join(' OR ');
+    const rows = this.db.prepare(`
+      SELECT c.id, c.content, c.metadata, c.vector, highlight(chunks_fts, 1, '<mark>', '</mark>') as highlighted
+      FROM chunks_fts fts
+      JOIN chunks c ON fts.id = c.id
+      WHERE chunks_fts MATCH ?
+      ORDER BY bm25(chunks_fts) DESC
+      LIMIT ?
+    `).all(ftsQuery, limit * 2) as Array<{
+      id: string;
+      content: string;
+      metadata: string;
+      vector: Buffer;
+      highlighted: string;
+    }>;
+
+    const results: QueryResult[] = [];
+
+    for (const row of rows) {
+      const metadata = JSON.parse(row.metadata);
+
+      if (filter && !matchesFilter(metadata, filter)) {
+        continue;
+      }
+
+      // Use keywordSearch for scoring since FTS doesn't give scores directly
+      const score = keywordSearch(row.content, query);
+
+      if (score >= threshold) {
+        const vector = new Float32Array(row.vector.buffer, row.vector.byteOffset, row.vector.byteLength / 4);
+
+        results.push({
+          chunk: {
+            id: row.id,
+            content: row.content,
+            metadata: includeMetadata ? metadata : {},
+            vector: includeVectors ? Array.from(vector) : undefined,
+          },
+          score,
+          distance: 1 - score,
+        });
+      }
+    }
+
+    results.sort((a, b) => b.score - a.score);
+
+    return results.slice(0, limit);
+  }
+
   async delete(ids: string[]): Promise<void> {
     const del = this.db.prepare('DELETE FROM chunks WHERE id = ?');
     const transaction = this.db.transaction((ids: string[]) => {
@@ -155,6 +234,27 @@ export class PersistentKnowledgeProvider implements KnowledgeProvider {
     this.dimensions = undefined;
   }
 
+  async getAllChunks(): Promise<Chunk[]> {
+    const rows = this.db.prepare('SELECT id, content, metadata, vector FROM chunks').all() as Array<{
+      id: string;
+      content: string;
+      metadata: string;
+      vector: Buffer;
+    }>;
+
+    return rows.map(row => {
+      const metadata = JSON.parse(row.metadata);
+      const vector = new Float32Array(row.vector.buffer, row.vector.byteOffset, row.vector.byteLength / 4);
+
+      return {
+        id: row.id,
+        content: row.content,
+        metadata,
+        vector: Array.from(vector),
+      };
+    });
+  }
+
   shouldReSync(): boolean {
     if (this.options.reSync === false) {
       const count = this.db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number };
diff --git a/packages/toolpack-knowledge/src/sources/api.ts b/packages/toolpack-knowledge/src/sources/api.ts
new file mode 100644
index 0000000..785be66
--- /dev/null
+++ b/packages/toolpack-knowledge/src/sources/api.ts
@@ -0,0 +1,231 @@
+import * as crypto from 'crypto';
+import { KnowledgeSource, Chunk } from '../interfaces.js';
+import { IngestionError } from '../errors.js';
+import { estimateTokens, splitLargeChunk, applyOverlap } from '../utils/chunking.js';
+
+export interface ApiDataSourceOptions {
+  maxChunkSize?: number;
+  chunkOverlap?: number;
+  minChunkSize?: number;
+  namespace?: string;
+  metadata?: Record<string, unknown>;
+  headers?: Record<string, string>;
+  method?: 'GET' | 'POST';
+  body?: unknown;
+  timeoutMs?: number;
+  pagination?: {
+    param: string;
+    start: number;
+    step: number;
+    maxPages?: number;
+  } | null;
+  dataPath?: string; // JSON path to extract data array (e.g., 'data.items')
+  contentExtractor?: (item: unknown) => string;
+  metadataExtractor?: (item: unknown) => Record<string, unknown>;
+}
+
+export class ApiDataSource implements KnowledgeSource {
+  private options: ApiDataSourceOptions;
+
+  constructor(
+    private url: string,
+    options: ApiDataSourceOptions = {}
+  ) {
+    this.options = {
+      maxChunkSize: options.maxChunkSize ?? 2000,
+      chunkOverlap: options.chunkOverlap ?? 200,
+      minChunkSize: options.minChunkSize ?? 100,
+      namespace: options.namespace ?? 'api',
+      metadata: options.metadata ?? {},
+      headers: options.headers ?? {},
+      method: options.method ?? 'GET',
+      timeoutMs: options.timeoutMs ?? 30000,
+      pagination: options.pagination,
+      dataPath: options.dataPath ?? '',
+      contentExtractor: options.contentExtractor ?? this.defaultContentExtractor,
+      metadataExtractor: options.metadataExtractor ?? this.defaultMetadataExtractor,
+    };
+  }
+
+  async *load(): AsyncIterable<Chunk> {
+    const items = await this.fetchData();
+
+    for (const item of items) {
+      try {
+        const chunks = this.chunkItem(item);
+
+        for (const chunk of chunks) {
+          yield chunk;
+        }
+      } catch (error) {
+        throw new IngestionError(`Failed to process API item: ${(error as Error).message}`, this.url);
+      }
+    }
+  }
+
+  private async fetchData(): Promise<unknown[]> {
+    const allItems: unknown[] = [];
+    let page = this.options.pagination?.start ?? 0;
+    const maxPages = this.options.pagination?.maxPages ?? 1;
+
+    while (page < maxPages) {
+      const pageUrl = this.buildUrl(page);
+      const items = await this.fetchPage(pageUrl);
+
+      if (items.length === 0) {
+        break; // No more data
+      }
+
+      allItems.push(...items);
+      page++;
+
+      if (!this.options.pagination) {
+        break; // No pagination configured
+      }
+    }
+
+    return allItems;
+  }
+
+  private buildUrl(page: number): string {
+    if (!this.options.pagination) {
+      return this.url;
+    }
+
+    const url = new URL(this.url);
+    url.searchParams.set(this.options.pagination.param, page.toString());
+    return url.href;
+  }
+
+  private async fetchPage(url: string): Promise<unknown[]> {
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this.options.timeoutMs);
+
+    try {
+      const response = await fetch(url, {
+        method: this.options.method,
+        headers: {
+          'Content-Type': 'application/json',
+          ...this.options.headers,
+        },
+        body: this.options.body ? JSON.stringify(this.options.body) : undefined,
+        signal: controller.signal,
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+      }
+
+      const data = await response.json();
+      return this.extractItems(data);
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+
+  private extractItems(data: unknown): unknown[] {
+    if (!this.options.dataPath) {
+      return Array.isArray(data) ? data : [data];
+    }
+
+    const path = this.options.dataPath.split('.');
+    let current: unknown = data;
+
+    for (const key of path) {
+      if (current && typeof current === 'object' && key in current) {
+        current = (current as Record<string, unknown>)[key];
+      } else {
+        throw new Error(`Data path '${this.options.dataPath}' not found in response`);
+      }
+    }
+
+    return Array.isArray(current) ? current : [current];
+  }
+
+  private chunkItem(item: unknown): Chunk[] {
+    const content = this.options.contentExtractor!(item);
+    const itemMetadata = this.options.metadataExtractor!(item);
+
+    const tokens = estimateTokens(content);
+
+    let itemChunks: string[];
+    if (tokens > (this.options.maxChunkSize ?? 2000)) {
+      itemChunks = splitLargeChunk(content, this.options.maxChunkSize ?? 2000);
+    } else {
+      itemChunks = [content];
+    }
+
+    if ((this.options.chunkOverlap ?? 200) > 0 && itemChunks.length > 1) {
+      itemChunks = applyOverlap(itemChunks, this.options.chunkOverlap ?? 200);
+    }
+
+    const chunks: Chunk[] = [];
+
+    for (let i = 0; i < itemChunks.length; i++) {
+      const chunkContent = itemChunks[i];
+      const chunkId = this.generateChunkId(item, chunkContent, i);
+
+      chunks.push({
+        id: chunkId,
+        content: chunkContent,
+        metadata: {
+          ...this.options.metadata,
+          ...itemMetadata,
+          source: 'api',
+          apiUrl: this.url,
+          chunkIndex: i,
+          totalChunks: itemChunks.length,
+        },
+      });
+    }
+
+    return chunks;
+  }
+
+  private defaultContentExtractor(item: unknown): string {
+    if (typeof item === 'string') {
+      return item;
+    }
+
+    if (typeof item === 'object' && item !== null) {
+      // Try common content fields
+      const contentFields = ['content', 'text', 'description', 'body', 'message'];
+
+      for (const field of contentFields) {
+        if (field in item && typeof (item as Record<string, unknown>)[field] === 'string') {
+          return (item as Record<string, unknown>)[field] as string;
+        }
+      }
+
+      // Fallback to JSON string
+      return JSON.stringify(item);
+    }
+
+    return String(item);
+  }
+
+  private defaultMetadataExtractor(item: unknown): Record<string, unknown> {
+    if (typeof item === 'object' && item !== null) {
+      const metadata: Record<string, unknown> = {};
+
+      // Extract common metadata fields
+      const metadataFields = ['id', 'title', 'name', 'created_at', 'updated_at', 'author', 'tags'];
+
+      for (const field of metadataFields) {
+        if (field in item) {
+          metadata[field] = (item as Record<string, unknown>)[field];
+        }
+      }
+
+      return metadata;
+    }
+
+    return {};
+  }
+
+  private generateChunkId(item: unknown, content: string, index: number): string {
+    const hash = crypto.createHash('md5').update(content).digest('hex').substring(0, 8);
+    const itemHash = crypto.createHash('md5').update(JSON.stringify(item)).digest('hex').substring(0, 8);
+    return `${this.options.namespace}:${itemHash}:${index}:${hash}`;
+  }
+}
\ No newline at end of file
diff --git a/packages/toolpack-knowledge/src/sources/web-url.ts b/packages/toolpack-knowledge/src/sources/web-url.ts
new file mode 100644
index 0000000..1763820
--- /dev/null
+++ b/packages/toolpack-knowledge/src/sources/web-url.ts
@@ -0,0 +1,237 @@
+import * as crypto from 'crypto';
+import * as cheerio from 'cheerio';
+import { KnowledgeSource, Chunk } from '../interfaces.js';
+import { IngestionError } from '../errors.js';
+import { estimateTokens, splitLargeChunk, applyOverlap } from '../utils/chunking.js';
+
+export interface WebUrlSourceOptions {
+  maxChunkSize?: number;
+  chunkOverlap?: number;
+  minChunkSize?: number;
+  namespace?: string;
+  metadata?: Record<string, unknown>;
+  maxDepth?: number;
+  userAgent?: string;
+  delayMs?: number;
+  timeoutMs?: number;
+  sameDomainOnly?: boolean;
+  maxPagesPerDomain?: number;
+}
+
+interface CrawledPage {
+  url: string;
+  title: string;
+  content: string;
+  links: string[];
+}
+
+export class WebUrlSource implements KnowledgeSource {
+  private options: Required<WebUrlSourceOptions>;
+  private crawledUrls = new Set<string>();
+  private domainPageCount = new Map<string, number>();
+  private lastRequestTime = new Map<string, number>();
+
+  constructor(
+    private urls: string[],
+    options: WebUrlSourceOptions = {}
+  ) {
+    this.options = {
+      maxChunkSize: options.maxChunkSize ?? 2000,
+      chunkOverlap: options.chunkOverlap ?? 200,
+      minChunkSize: options.minChunkSize ?? 100,
+      namespace: options.namespace ?? 'web',
+      metadata: options.metadata ?? {},
+      maxDepth: options.maxDepth ?? 1,
+      userAgent: options.userAgent ?? 'Toolpack-Knowledge/1.0',
+      delayMs: options.delayMs ?? 1000,
+      timeoutMs: options.timeoutMs ?? 30000,
+      sameDomainOnly: options.sameDomainOnly ?? true,
+      maxPagesPerDomain: options.maxPagesPerDomain ?? 10,
+    };
+  }
+
+  async *load(): AsyncIterable<Chunk> {
+    const pages = await this.crawlUrls(this.urls, 0);
+
+    for (const page of pages) {
+      try {
+        const chunks = this.chunkPage(page);
+
+        for (const chunk of chunks) {
+          yield chunk;
+        }
+      } catch (error) {
+        throw new IngestionError(`Failed to process URL ${page.url}: ${(error as Error).message}`, page.url);
+      }
+    }
+  }
+
+  private async crawlUrls(urls: string[], depth: number): Promise<CrawledPage[]> {
+    if (depth >= this.options.maxDepth) {
+      return [];
+    }
+
+    const pages: CrawledPage[] = [];
+    const newUrls: string[] = [];
+    const initialDomains = new Set(urls.map(url => new URL(url).hostname));
+
+    for (const url of urls) {
+      if (this.crawledUrls.has(url)) {
+        continue;
+      }
+
+      const domain = new URL(url).hostname;
+      const pageCount = this.domainPageCount.get(domain) ?? 0;
+
+      if (this.options.sameDomainOnly && !initialDomains.has(domain)) {
+        continue; // Skip external domains
+      }
+
+      if (pageCount >= this.options.maxPagesPerDomain) {
+        continue; // Skip if too many pages from this domain
+      }
+
+      this.crawledUrls.add(url);
+      this.domainPageCount.set(domain, pageCount + 1);
+
+      try {
+        // Rate limiting per domain
+        const lastTime = this.lastRequestTime.get(domain) ?? 0;
+        const timeSince = Date.now() - lastTime;
+        if (timeSince < this.options.delayMs) {
+          await new Promise(resolve => setTimeout(resolve, this.options.delayMs - timeSince));
+        }
+
+        const page = await this.fetchPage(url);
+        pages.push(page);
+        this.lastRequestTime.set(domain, Date.now());
+
+        if (depth < this.options.maxDepth - 1) {
+          newUrls.push(...page.links);
+        }
+      } catch (error) {
+        console.warn(`Failed to crawl ${url}: ${(error as Error).message}`);
+      }
+    }
+
+    if (newUrls.length > 0) {
+      const subPages = await this.crawlUrls(newUrls, depth + 1);
+      pages.push(...subPages);
+    }
+
+    return pages;
+  }
+
+  private async fetchPage(url: string): Promise<CrawledPage> {
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this.options.timeoutMs);
+
+    try {
+      const response = await fetch(url, {
+        signal: controller.signal,
+        headers: {
+          'User-Agent': this.options.userAgent,
+        },
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+      }
+
+      const html = await response.text();
+      const $ = cheerio.load(html);
+
+      // Remove script and style elements
+      $('script, style, nav, header, footer, aside').remove();
+
+      // Extract title
+      const title = $('title').text().trim() || $('h1').first().text().trim() || 'Untitled';
+
+      // Extract main content
+      const contentSelectors = ['main', 'article', '.content', '#content', 'body'];
+      let content = '';
+
+      for (const selector of contentSelectors) {
+        const element = $(selector);
+        if (element.length > 0) {
+          content = element.text().trim();
+          break;
+        }
+      }
+
+      if (!content) {
+        content = $('body').text().trim();
+      }
+
+      // Clean up whitespace
+      content = content.replace(/\s+/g, ' ').trim();
+
+      // Extract links
+      const links: string[] = [];
+      $('a[href]').each((_, element) => {
+        const href = $(element).attr('href');
+        if (href) {
+          try {
+            const absoluteUrl = new URL(href, url).href;
+            if (absoluteUrl.startsWith('http') && !absoluteUrl.includes('#')) {
+              links.push(absoluteUrl);
+            }
+          } catch {
+            // Invalid URL, skip
+          }
+        }
+      });
+
+      return {
+        url,
+        title,
+        content,
+        links: [...new Set(links)], // Remove duplicates
+      };
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+
+  private chunkPage(page: CrawledPage): Chunk[] {
+    const chunks: Chunk[] = [];
+    const tokens = estimateTokens(page.content);
+
+    let pageChunks: string[];
+    if (tokens > this.options.maxChunkSize) {
+      pageChunks = splitLargeChunk(page.content, this.options.maxChunkSize);
+    } else {
+      pageChunks = [page.content];
+    }
+
+    if (this.options.chunkOverlap > 0 && pageChunks.length > 1) {
+      pageChunks = applyOverlap(pageChunks, this.options.chunkOverlap);
+    }
+
+    for (let i = 0; i < pageChunks.length; i++) {
+      const chunkContent = pageChunks[i];
+      const chunkId = this.generateChunkId(page.url, chunkContent, i);
+
+      chunks.push({
+        id: chunkId,
+        content: chunkContent,
+        metadata: {
+          ...this.options.metadata,
+          title: page.title,
+          url: page.url,
+          source: 'web',
+          chunkIndex: i,
+          totalChunks: pageChunks.length,
+        },
+      });
+    }
+
+    return chunks;
+  }
+
+  private generateChunkId(url: string, content: string, index: number): string {
+    const hash = crypto.createHash('md5').update(content).digest('hex').substring(0, 8);
+    const urlHash = crypto.createHash('md5').update(url).digest('hex').substring(0, 8);
+    return `${this.options.namespace}:${urlHash}:${index}:${hash}`;
+  }
+}
\ No newline at end of file
diff --git a/packages/toolpack-knowledge/src/utils/keyword.ts b/packages/toolpack-knowledge/src/utils/keyword.ts
new file mode 100644
index 0000000..1ac7640
--- /dev/null
+++ b/packages/toolpack-knowledge/src/utils/keyword.ts
@@ -0,0 +1,29 @@
+export function keywordSearch(text: string, query: string): number {
+  const textLower = text.toLowerCase();
+  const queryLower = query.toLowerCase();
+
+  // Exact match gets highest score
+  if (textLower.includes(queryLower)) {
+    return 1.0;
+  }
+
+  // Word-level matching
+  const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
+  if (queryWords.length === 0) {
+    return 0.0;
+  }
+
+  let matchCount = 0;
+  for (const word of queryWords) {
+    if (textLower.includes(word)) {
+      matchCount++;
+    }
+  }
+
+  return matchCount / queryWords.length;
+}
+
+export function combineScores(semanticScore: number, keywordScore: number, semanticWeight: number = 0.7): number {
+  const keywordWeight = 1 - semanticWeight;
+  return semanticScore * semanticWeight + keywordScore * keywordWeight;
+}
\ No newline at end of file