From 750f3dd669b3a415c5a8589253658c2ef7bfbdce Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy <ananduk1993@gmail.com>
Date: Sun, 3 May 2026 22:52:09 -0400
Subject: [PATCH 1/9] Add cookbook firecrawl

---
 examples/cookbook/firecrawl/.env.example      |  18 +
 .../.ipynb_checkpoints/README-checkpoint.md   |  50 +++
 .../firecrawl_moss-checkpoint.ipynb           | 267 +++++++++++++++
 examples/cookbook/firecrawl/README.md         |  50 +++
 .../cookbook/firecrawl/firecrawl_moss.ipynb   | 308 ++++++++++++++++++
 5 files changed, 693 insertions(+)
 create mode 100644 examples/cookbook/firecrawl/.env.example
 create mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md
 create mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb
 create mode 100644 examples/cookbook/firecrawl/README.md
 create mode 100644 examples/cookbook/firecrawl/firecrawl_moss.ipynb

diff --git a/examples/cookbook/firecrawl/.env.example b/examples/cookbook/firecrawl/.env.example
new file mode 100644
index 00000000..d95c476c
--- /dev/null
+++ b/examples/cookbook/firecrawl/.env.example
@@ -0,0 +1,18 @@
+# Example env for Firecrawl + Moss cookbook
+# Copy to .env and fill in values before running the notebook.
+
+# Moss credentials
+MOSS_PROJECT_ID=your_moss_project_id
+MOSS_PROJECT_KEY=your_moss_project_key
+
+# Firecrawl API key
+FIRECRAWL_API_KEY=your_firecrawl_api_key
+
+# Optional: default index name used by the notebook
+MOSS_INDEX_NAME=firecrawl-demo
+
+# Optional LLM keys for RAG synthesis (set one or both)
+# Set `GEMINI_API_KEY` to use Google Gemini (if you adapt the helper),
+# or set `OPENAI_API_KEY` to use OpenAI in the provided example.
+GEMINI_API_KEY=
+OPENAI_API_KEY=
diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md b/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md
new file mode 100644
index 00000000..123ab8d4
--- /dev/null
+++ b/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md
@@ -0,0 +1,50 @@
+# Firecrawl + Moss Cookbook Example
+
+Use Firecrawl to turn one or more URLs into clean markdown, then index the results into Moss and query them semantically from a notebook.
+
+> This is a cookbook example, not a packaged integration. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) to follow the full URL-to-query pipeline.
+
+## Installation
+
+```bash
+pip install firecrawl-py moss python-dotenv
+```
+
+## Setup
+
+Set these environment variables in your shell or a `.env` file:
+
+```bash
+FIRECRAWL_API_KEY=your-firecrawl-api-key
+MOSS_PROJECT_ID=your-project-id
+MOSS_PROJECT_KEY=your-project-key
+```
+
+## Quick Start
+
+1. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) in Jupyter or VS Code.
+2. Run the setup and helper cells.
+3. Set `urls` to the pages you want to ingest.
+4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content.
+
+## What the notebook does
+
+```python
+from firecrawl import Firecrawl
+from moss import DocumentInfo, MossClient, QueryOptions
+
+job = Firecrawl(api_key=FIRECRAWL_API_KEY).crawl(
+	url="https://example.com",
+	limit=3,
+	scrape_options={"formats": ["markdown"]},
+)
+
+documents = [DocumentInfo(id="1", text=job.data[0].markdown, metadata={"source_url": "https://example.com"})]
+await MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY).create_index("firecrawl-demo", documents)
+```
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `firecrawl_moss.ipynb` | Notebook that crawls URLs, indexes markdown into Moss, and runs semantic search |
diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb b/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb
new file mode 100644
index 00000000..d47416a9
--- /dev/null
+++ b/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb
@@ -0,0 +1,267 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "15050d77",
+   "metadata": {},
+   "source": [
+    "# Firecrawl + Moss Cookbook\n",
+    "\n",
+    "Crawl one or more URLs with Firecrawl, convert the results to clean markdown, index them into Moss, and query the knowledge base semantically."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca524d0e",
+   "metadata": {},
+   "source": [
+    "## 1. Set Up Project Environment\n",
+    "\n",
+    "Install the SDKs and set your credentials before running the notebook.\n",
+    "\n",
+    "```bash\n",
+    "pip install firecrawl-py moss python-dotenv\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c22437b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import os\n",
+    "import uuid\n",
+    "from dataclasses import dataclass\n",
+    "from typing import Any\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "from firecrawl import Firecrawl\n",
+    "from moss import DocumentInfo, MossClient, QueryOptions\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "FIRECRAWL_API_KEY = os.getenv(\"FIRECRAWL_API_KEY\")\n",
+    "MOSS_PROJECT_ID = os.getenv(\"MOSS_PROJECT_ID\")\n",
+    "MOSS_PROJECT_KEY = os.getenv(\"MOSS_PROJECT_KEY\")\n",
+    "DEFAULT_QUERY = \"What does the knowledge base say about the topic?\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ce4dae7b",
+   "metadata": {},
+   "source": [
+    "## 2. Define Core Data Structures\n",
+    "\n",
+    "Normalize each crawled page into a small Python structure before converting it into Moss documents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf5da039",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dataclass\n",
+    "class CrawledPage:\n",
+    "    url: str\n",
+    "    markdown: str\n",
+    "    title: str | None = None\n",
+    "\n",
+    "\n",
+    "def page_to_crawled_page(page: Any) -> CrawledPage:\n",
+    "    markdown = getattr(page, \"markdown\", None)\n",
+    "    if markdown is None and isinstance(page, dict):\n",
+    "        markdown = page.get(\"markdown\")\n",
+    "\n",
+    "    metadata = getattr(page, \"metadata\", None)\n",
+    "    if metadata is None and isinstance(page, dict):\n",
+    "        metadata = page.get(\"metadata\", {})\n",
+    "\n",
+    "    url = None\n",
+    "    title = None\n",
+    "    if isinstance(metadata, dict):\n",
+    "        url = metadata.get(\"source_url\") or metadata.get(\"sourceURL\") or metadata.get(\"url\")\n",
+    "        title = metadata.get(\"title\") or metadata.get(\"og_title\")\n",
+    "    elif metadata is not None:\n",
+    "        url = getattr(metadata, \"source_url\", None) or getattr(metadata, \"sourceURL\", None) or getattr(metadata, \"url\", None)\n",
+    "        title = getattr(metadata, \"title\", None) or getattr(metadata, \"og_title\", None)\n",
+    "\n",
+    "    return CrawledPage(url=url or \"unknown\", markdown=markdown or \"\", title=title)\n",
+    "\n",
+    "\n",
+    "def crawled_pages_to_moss_docs(pages: list[CrawledPage]) -> list[DocumentInfo]:\n",
+    "    docs: list[DocumentInfo] = []\n",
+    "    for index, page in enumerate(pages, start=1):\n",
+    "        docs.append(\n",
+    "            DocumentInfo(\n",
+    "                id=f\"firecrawl-{index}\",\n",
+    "                text=page.markdown,\n",
+    "                metadata={\"source_url\": page.url, \"title\": page.title or \"\"},\n",
+    "            )\n",
+    "        )\n",
+    "    return docs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fcb41889",
+   "metadata": {},
+   "source": [
+    "## 3. Implement Main Functionality\n",
+    "\n",
+    "Firecrawl handles URL-to-markdown extraction. Moss handles indexing and semantic search."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "42c24a13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def validate_configuration(urls: list[str]) -> None:\n",
+    "    if not urls:\n",
+    "        raise ValueError(\"Provide at least one URL to crawl.\")\n",
+    "    if not FIRECRAWL_API_KEY:\n",
+    "        raise ValueError(\"Set FIRECRAWL_API_KEY before running the notebook.\")\n",
+    "    if not MOSS_PROJECT_ID or not MOSS_PROJECT_KEY:\n",
+    "        raise ValueError(\"Set MOSS_PROJECT_ID and MOSS_PROJECT_KEY before running the notebook.\")\n",
+    "\n",
+    "\n",
+    "def crawl_urls(urls: list[str], limit: int = 3) -> list[CrawledPage]:\n",
+    "    firecrawl = Firecrawl(api_key=FIRECRAWL_API_KEY)\n",
+    "    pages: list[CrawledPage] = []\n",
+    "\n",
+    "    for url in urls:\n",
+    "        job = firecrawl.crawl(url=url, limit=limit, scrape_options={\"formats\": [\"markdown\"]})\n",
+    "        raw_pages = getattr(job, \"data\", None) or (job.get(\"data\") if isinstance(job, dict) else []) or []\n",
+    "        pages.extend(page_to_crawled_page(page) for page in raw_pages)\n",
+    "\n",
+    "    return [page for page in pages if page.markdown.strip()]\n",
+    "\n",
+    "\n",
+    "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n",
+    "    validate_configuration(urls)\n",
+    "    crawled_pages = crawl_urls(urls)\n",
+    "    documents = crawled_pages_to_moss_docs(crawled_pages)\n",
+    "\n",
+    "    if not documents:\n",
+    "        raise RuntimeError(\"Firecrawl returned no markdown content to index.\")\n",
+    "\n",
+    "    index_name = f\"firecrawl-cookbook-{uuid.uuid4().hex[:8]}\"\n",
+    "    client = MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY)\n",
+    "\n",
+    "    await client.create_index(index_name, documents)\n",
+    "    await client.load_index(index_name)\n",
+    "    results = await client.query(index_name, query, QueryOptions(top_k=5, alpha=0.8))\n",
+    "\n",
+    "    print(f\"Indexed {len(documents)} documents into {index_name}\")\n",
+    "    print(f\"Query: {query}\")\n",
+    "    for item in results.docs:\n",
+    "        source_url = item.metadata.get(\"source_url\", \"unknown\") if item.metadata else \"unknown\"\n",
+    "        print(f\"- [{item.score:.3f}] {source_url}\")\n",
+    "        print(f\"  {item.text[:200].strip()}\")\n",
+    "\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5e8c4a3b",
+   "metadata": {},
+   "source": [
+    "## 4. Add Input Validation and Error Handling\n",
+    "\n",
+    "Keep the notebook explicit about missing credentials and empty crawl results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcc9ed11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preview_documents(documents: list[Any]) -> None:\n",
+    "    if not documents:\n",
+    "        print(\"No documents to preview.\")\n",
+    "        return\n",
+    "\n",
+    "    print(f\"Previewing {len(documents)} documents:\")\n",
+    "    for document in documents[:3]:\n",
+    "        source_url = document.metadata.get(\"source_url\", \"unknown\") if getattr(document, \"metadata\", None) else \"unknown\"\n",
+    "        print(f\"- {document.id} from {source_url}\")\n",
+    "        print(document.text[:120].replace(\"\\n\", \" \"))\n",
+    "\n",
+    "\n",
+    "def safe_run(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n",
+    "    try:\n",
+    "        validate_configuration(urls)\n",
+    "        pages = crawl_urls(urls)\n",
+    "        documents = crawled_pages_to_moss_docs(pages)\n",
+    "        preview_documents(documents)\n",
+    "    except Exception as exc:\n",
+    "        print(f\"Validation or crawl failed: {exc}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c6892bde",
+   "metadata": {},
+   "source": [
+    "## 5. Write Unit Tests\n",
+    "\n",
+    "These lightweight tests verify the page normalization and document conversion logic without requiring live API calls."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7bd9052d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class FakePage:\n",
+    "    def __init__(self, markdown: str, source_url: str, title: str | None = None) -> None:\n",
+    "        self.markdown = markdown\n",
+    "        self.metadata = {\"source_url\": source_url, \"title\": title}\n",
+    "\n",
+    "\n",
+    "sample_page = FakePage(\"# Sample page\\n\\nThis is a test.\", \"https://example.com\", \"Example\")\n",
+    "normalized = page_to_crawled_page(sample_page)\n",
+    "assert normalized.url == \"https://example.com\"\n",
+    "assert normalized.title == \"Example\"\n",
+    "assert \"Sample page\" in normalized.markdown\n",
+    "\n",
+    "documents = crawled_pages_to_moss_docs([normalized])\n",
+    "assert documents[0].id == \"firecrawl-1\"\n",
+    "assert documents[0].metadata[\"source_url\"] == \"https://example.com\"\n",
+    "assert documents[0].text.startswith(\"# Sample page\")\n",
+    "print(\"All notebook sanity checks passed.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45761029",
+   "metadata": {},
+   "source": [
+    "## 6. Run and Inspect Results\n",
+    "\n",
+    "After setting the API keys, call `await build_and_query_knowledge_base(urls, query)` with your own URLs and inspect the returned passages."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/cookbook/firecrawl/README.md b/examples/cookbook/firecrawl/README.md
new file mode 100644
index 00000000..123ab8d4
--- /dev/null
+++ b/examples/cookbook/firecrawl/README.md
@@ -0,0 +1,50 @@
+# Firecrawl + Moss Cookbook Example
+
+Use Firecrawl to turn one or more URLs into clean markdown, then index the results into Moss and query them semantically from a notebook.
+
+> This is a cookbook example, not a packaged integration. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) to follow the full URL-to-query pipeline.
+
+## Installation
+
+```bash
+pip install firecrawl-py moss python-dotenv
+```
+
+## Setup
+
+Set these environment variables in your shell or a `.env` file:
+
+```bash
+FIRECRAWL_API_KEY=your-firecrawl-api-key
+MOSS_PROJECT_ID=your-project-id
+MOSS_PROJECT_KEY=your-project-key
+```
+
+## Quick Start
+
+1. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) in Jupyter or VS Code.
+2. Run the setup and helper cells.
+3. Set `urls` to the pages you want to ingest.
+4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content.
+
+## What the notebook does
+
+```python
+from firecrawl import Firecrawl
+from moss import DocumentInfo, MossClient, QueryOptions
+
+job = Firecrawl(api_key=FIRECRAWL_API_KEY).crawl(
+	url="https://example.com",
+	limit=3,
+	scrape_options={"formats": ["markdown"]},
+)
+
+documents = [DocumentInfo(id="1", text=job.data[0].markdown, metadata={"source_url": "https://example.com"})]
+await MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY).create_index("firecrawl-demo", documents)
+```
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `firecrawl_moss.ipynb` | Notebook that crawls URLs, indexes markdown into Moss, and runs semantic search |
diff --git a/examples/cookbook/firecrawl/firecrawl_moss.ipynb b/examples/cookbook/firecrawl/firecrawl_moss.ipynb
new file mode 100644
index 00000000..a64ae6da
--- /dev/null
+++ b/examples/cookbook/firecrawl/firecrawl_moss.ipynb
@@ -0,0 +1,308 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "15050d77",
+   "metadata": {},
+   "source": [
+    "# Firecrawl + Moss Cookbook\n",
+    "\n",
+    "Crawl one or more URLs with Firecrawl, convert the results to clean markdown, index them into Moss, and query the knowledge base semantically."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca524d0e",
+   "metadata": {},
+   "source": [
+    "## 1. Set Up Project Environment\n",
+    "\n",
+    "Install the SDKs and set your credentials before running the notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6da4124d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#pip install firecrawl-py moss python-dotenv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c22437b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import os\n",
+    "import uuid\n",
+    "from dataclasses import dataclass\n",
+    "from typing import Any\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "from firecrawl import Firecrawl\n",
+    "from moss import DocumentInfo, MossClient, QueryOptions\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "FIRECRAWL_API_KEY = os.getenv(\"FIRECRAWL_API_KEY\")\n",
+    "MOSS_PROJECT_ID = os.getenv(\"MOSS_PROJECT_ID\")\n",
+    "MOSS_PROJECT_KEY = os.getenv(\"MOSS_PROJECT_KEY\")\n",
+    "DEFAULT_QUERY = \"What does the knowledge base say about the topic?\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ce4dae7b",
+   "metadata": {},
+   "source": [
+    "## 2. Define Core Data Structures\n",
+    "\n",
+    "Normalize each crawled page into a small Python structure before converting it into Moss documents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "bf5da039",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dataclass\n",
+    "class CrawledPage:\n",
+    "    url: str\n",
+    "    markdown: str\n",
+    "    title: str | None = None\n",
+    "\n",
+    "\n",
+    "def page_to_crawled_page(page: Any) -> CrawledPage:\n",
+    "    markdown = getattr(page, \"markdown\", None)\n",
+    "    if markdown is None and isinstance(page, dict):\n",
+    "        markdown = page.get(\"markdown\")\n",
+    "\n",
+    "    metadata = getattr(page, \"metadata\", None)\n",
+    "    if metadata is None and isinstance(page, dict):\n",
+    "        metadata = page.get(\"metadata\", {})\n",
+    "\n",
+    "    url = None\n",
+    "    title = None\n",
+    "    if isinstance(metadata, dict):\n",
+    "        url = metadata.get(\"source_url\") or metadata.get(\"sourceURL\") or metadata.get(\"url\")\n",
+    "        title = metadata.get(\"title\") or metadata.get(\"og_title\")\n",
+    "    elif metadata is not None:\n",
+    "        url = getattr(metadata, \"source_url\", None) or getattr(metadata, \"sourceURL\", None) or getattr(metadata, \"url\", None)\n",
+    "        title = getattr(metadata, \"title\", None) or getattr(metadata, \"og_title\", None)\n",
+    "\n",
+    "    return CrawledPage(url=url or \"unknown\", markdown=markdown or \"\", title=title)\n",
+    "\n",
+    "\n",
+    "def crawled_pages_to_moss_docs(pages: list[CrawledPage]) -> list[DocumentInfo]:\n",
+    "    docs: list[DocumentInfo] = []\n",
+    "    for index, page in enumerate(pages, start=1):\n",
+    "        docs.append(\n",
+    "            DocumentInfo(\n",
+    "                id=f\"firecrawl-{index}\",\n",
+    "                text=page.markdown,\n",
+    "                metadata={\"source_url\": page.url, \"title\": page.title or \"\"},\n",
+    "            )\n",
+    "        )\n",
+    "    return docs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fcb41889",
+   "metadata": {},
+   "source": [
+    "## 3. Implement Main Functionality\n",
+    "\n",
+    "Firecrawl handles URL-to-markdown extraction. Moss handles indexing and semantic search."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "42c24a13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def validate_configuration(urls: list[str]) -> None:\n",
+    "    if not urls:\n",
+    "        raise ValueError(\"Provide at least one URL to crawl.\")\n",
+    "    if not FIRECRAWL_API_KEY:\n",
+    "        raise ValueError(\"Set FIRECRAWL_API_KEY before running the notebook.\")\n",
+    "    if not MOSS_PROJECT_ID or not MOSS_PROJECT_KEY:\n",
+    "        raise ValueError(\"Set MOSS_PROJECT_ID and MOSS_PROJECT_KEY before running the notebook.\")\n",
+    "\n",
+    "\n",
+    "def crawl_urls(urls: list[str], limit: int = 3) -> list[CrawledPage]:\n",
+    "    firecrawl = Firecrawl(api_key=FIRECRAWL_API_KEY)\n",
+    "    pages: list[CrawledPage] = []\n",
+    "\n",
+    "    for url in urls:\n",
+    "        job = firecrawl.crawl(url=url, limit=limit, scrape_options={\"formats\": [\"markdown\"]})\n",
+    "        raw_pages = getattr(job, \"data\", None) or (job.get(\"data\") if isinstance(job, dict) else []) or []\n",
+    "        pages.extend(page_to_crawled_page(page) for page in raw_pages)\n",
+    "\n",
+    "    return [page for page in pages if page.markdown.strip()]\n",
+    "\n",
+    "\n",
+    "async def prepare_knowledge_base(urls: list[str], limit: int = 10) -> tuple[MossClient, str]:\n",
+    "    validate_configuration(urls)\n",
+    "    crawled_pages = crawl_urls(urls, limit=limit)\n",
+    "    documents = crawled_pages_to_moss_docs(crawled_pages)\n",
+    "\n",
+    "    if not documents:\n",
+    "        raise RuntimeError(\"Firecrawl returned no markdown content to index.\")\n",
+    "\n",
+    "    index_name = f\"firecrawl-cookbook-{uuid.uuid4().hex[:8]}\"\n",
+    "    client = MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY)\n",
+    "\n",
+    "    await client.create_index(index_name, documents)\n",
+    "    await client.load_index(index_name)\n",
+    "\n",
+    "    print(f\"Indexed {len(documents)} documents into {index_name}\")\n",
+    "    return client, index_name\n",
+    "\n",
+    "\n",
+    "async def query_knowledge_base(client: MossClient, index_name: str, query: str = DEFAULT_QUERY) -> None:\n",
+    "    results = await client.query(index_name, query, QueryOptions(top_k=3, alpha=0.8))\n",
+    "\n",
+    "    print(f\"Query: {query}\")\n",
+    "    for item in results.docs:\n",
+    "        source_url = item.metadata.get(\"source_url\", \"unknown\") if item.metadata else \"unknown\"\n",
+    "        print(f\"- [{item.score:.3f}] {source_url}\")\n",
+    "        print(f\"  {item.text[:200].strip()}\")\n",
+    "\n",
+    "\n",
+    "# Build knowledgebase and query it in one step\n",
+    "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n",
+    "    client, index_name = await prepare_knowledge_base(urls)\n",
+    "    await query_knowledge_base(client, index_name, query)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b47066ee",
+   "metadata": {},
+   "source": [
+    "## 4. Full Firecrawl + Moss Test (Crawl, Index, and Query)\n",
+    "\n",
+    "\n",
+    "Enter URLs and a question to run end-to-end Firecrawl ingestion and Moss semantic search."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "bb2790da",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Indexed 10 documents into firecrawl-cookbook-af681b7b\n"
+     ]
+    }
+   ],
+   "source": [
+    "urls = [\"https://docs.moss.dev\"]\n",
+    "\n",
+    "# Crawl + index once\n",
+    "client, index_name = await prepare_knowledge_base(urls)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "1bfe1d30",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Query: What is Moss used for?\n",
+      "- [1.000] https://docs.moss.dev/docs/start/what-is-moss\n",
+      "  [Skip to main content](https://docs.moss.dev/docs/start/what-is-moss#content-area)\n",
+      "\n",
+      "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/moss-wordmark-light.svg?fi\n",
+      "- [0.939] https://docs.moss.dev/docs/api-reference/v1/getting-started/introduction\n",
+      "  [Skip to main content](https://docs.moss.dev/docs/api-reference/v1/getting-started/introduction#content-area)\n",
+      "\n",
+      "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo\n",
+      "- [0.912] https://docs.moss.dev/docs/reference/python/interfaces/JobStatus\n",
+      "  [Skip to main content](https://docs.moss.dev/docs/reference/python/interfaces/JobStatus#content-area)\n",
+      "\n",
+      "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/moss-wo\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Query multiple times without crawling again\n",
+    "await query_knowledge_base(client, index_name, \"What is Moss used for?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "6956e7a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Query: What evidence in the docs supports the claim of sub-10 ms search, and what assumptions or caveats should an engineering team validate before adoption?\n",
+      "- [0.952] https://docs.moss.dev/docs/start/what-is-moss\n",
+      "  [Skip to main content](https://docs.moss.dev/docs/start/what-is-moss#content-area)\n",
+      "\n",
+      "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/moss-wordmark-light.svg?fi\n",
+      "- [0.907] https://docs.moss.dev/docs/api-reference/v1/document-operations/getDocs\n",
+      "  [Skip to main content](https://docs.moss.dev/docs/api-reference/v1/document-operations/getDocs#content-area)\n",
+      "\n",
+      "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/\n",
+      "- [0.891] https://docs.moss.dev/docs/api-reference/v1/document-operations/deleteDocs\n",
+      "  [Skip to main content](https://docs.moss.dev/docs/api-reference/v1/document-operations/deleteDocs#content-area)\n",
+      "\n",
+      "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/lo\n"
+     ]
+    }
+   ],
+   "source": [
+    "await query_knowledge_base(\n",
+    "    client,\n",
+    "    index_name,\n",
+    "    \"What evidence in the docs supports the claim of sub-10 ms search, and what assumptions or caveats should an engineering team validate before adoption?\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:base] *",
+   "language": "python",
+   "name": "conda-base-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 32fb7a4d859a183270b0d4b9de9cc3876f4d087f Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy <ananduk1993@gmail.com>
Date: Sun, 3 May 2026 23:05:23 -0400
Subject: [PATCH 2/9] Add Moss+firecrawl architecture

---
 examples/cookbook/firecrawl/README.md | 48 +++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/examples/cookbook/firecrawl/README.md b/examples/cookbook/firecrawl/README.md
index 123ab8d4..04b050c3 100644
--- a/examples/cookbook/firecrawl/README.md
+++ b/examples/cookbook/firecrawl/README.md
@@ -27,6 +27,54 @@ MOSS_PROJECT_KEY=your-project-key
 3. Set `urls` to the pages you want to ingest.
 4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content.
 
+## Workflow
+
+The notebook is structured for efficiency:
+
+1. **Prepare** (one-time): Crawl URLs → normalize markdown → index into Moss
+2. **Query** (repeated): Run semantic queries against the indexed knowledge base without re-crawling
+
+This design lets you crawl once (which can be slow/expensive) and then iterate on queries quickly.
+
+## Architecture
+
+```
+┌─────────────┐
+│   URLs      │
+└──────┬──────┘
+       │
+       ├──> Firecrawl (crawl + scrape)
+       │
+┌──────▼─────────────────┐
+│  Crawled Pages         │
+│  (raw HTML/markdown)   │
+└──────┬─────────────────┘
+       │
+       ├──> Markdown Normalization
+       │    (clean text, remove chrome)
+       │
+┌──────▼─────────────────┐
+│  Cleaned Markdown      │
+│  (one DocumentInfo     │
+│   per page)            │
+└──────┬─────────────────┘
+       │
+       ├──> Moss Create Index
+       │
+┌──────▼─────────────────┐
+│  Indexed Knowledge     │
+│  Base (local or cloud) │
+└──────┬─────────────────┘
+       │
+       ├──> Semantic Query (reusable)
+       │    (no re-crawling needed)
+       │
+┌──────▼─────────────────┐
+│  Top-K Results         │
+│  (scored passages)     │
+└─────────────────────────┘
+```
+
 ## What the notebook does
 
 ```python

From 02db2ef460c8e164a5c340919163f9bc54dff188 Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy
 <56905709+AnandKrishnamoorthy1@users.noreply.github.com>
Date: Sun, 3 May 2026 23:25:03 -0400
Subject: [PATCH 3/9] Delete firecrawl/.ipynb_checkpoints directory

---
 .../.ipynb_checkpoints/README-checkpoint.md   |  50 ----
 .../firecrawl_moss-checkpoint.ipynb           | 267 ------------------
 2 files changed, 317 deletions(-)
 delete mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md
 delete mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb

diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md b/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md
deleted file mode 100644
index 123ab8d4..00000000
--- a/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Firecrawl + Moss Cookbook Example
-
-Use Firecrawl to turn one or more URLs into clean markdown, then index the results into Moss and query them semantically from a notebook.
-
-> This is a cookbook example, not a packaged integration. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) to follow the full URL-to-query pipeline.
-
-## Installation
-
-```bash
-pip install firecrawl-py moss python-dotenv
-```
-
-## Setup
-
-Set these environment variables in your shell or a `.env` file:
-
-```bash
-FIRECRAWL_API_KEY=your-firecrawl-api-key
-MOSS_PROJECT_ID=your-project-id
-MOSS_PROJECT_KEY=your-project-key
-```
-
-## Quick Start
-
-1. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) in Jupyter or VS Code.
-2. Run the setup and helper cells.
-3. Set `urls` to the pages you want to ingest.
-4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content.
-
-## What the notebook does
-
-```python
-from firecrawl import Firecrawl
-from moss import DocumentInfo, MossClient, QueryOptions
-
-job = Firecrawl(api_key=FIRECRAWL_API_KEY).crawl(
-	url="https://example.com",
-	limit=3,
-	scrape_options={"formats": ["markdown"]},
-)
-
-documents = [DocumentInfo(id="1", text=job.data[0].markdown, metadata={"source_url": "https://example.com"})]
-await MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY).create_index("firecrawl-demo", documents)
-```
-
-## Files
-
-| File | Description |
-|------|-------------|
-| `firecrawl_moss.ipynb` | Notebook that crawls URLs, indexes markdown into Moss, and runs semantic search |
diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb b/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb
deleted file mode 100644
index d47416a9..00000000
--- a/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb
+++ /dev/null
@@ -1,267 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "15050d77",
-   "metadata": {},
-   "source": [
-    "# Firecrawl + Moss Cookbook\n",
-    "\n",
-    "Crawl one or more URLs with Firecrawl, convert the results to clean markdown, index them into Moss, and query the knowledge base semantically."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ca524d0e",
-   "metadata": {},
-   "source": [
-    "## 1. Set Up Project Environment\n",
-    "\n",
-    "Install the SDKs and set your credentials before running the notebook.\n",
-    "\n",
-    "```bash\n",
-    "pip install firecrawl-py moss python-dotenv\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c22437b8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "import os\n",
-    "import uuid\n",
-    "from dataclasses import dataclass\n",
-    "from typing import Any\n",
-    "\n",
-    "from dotenv import load_dotenv\n",
-    "from firecrawl import Firecrawl\n",
-    "from moss import DocumentInfo, MossClient, QueryOptions\n",
-    "\n",
-    "load_dotenv()\n",
-    "\n",
-    "FIRECRAWL_API_KEY = os.getenv(\"FIRECRAWL_API_KEY\")\n",
-    "MOSS_PROJECT_ID = os.getenv(\"MOSS_PROJECT_ID\")\n",
-    "MOSS_PROJECT_KEY = os.getenv(\"MOSS_PROJECT_KEY\")\n",
-    "DEFAULT_QUERY = \"What does the knowledge base say about the topic?\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ce4dae7b",
-   "metadata": {},
-   "source": [
-    "## 2. Define Core Data Structures\n",
-    "\n",
-    "Normalize each crawled page into a small Python structure before converting it into Moss documents."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bf5da039",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@dataclass\n",
-    "class CrawledPage:\n",
-    "    url: str\n",
-    "    markdown: str\n",
-    "    title: str | None = None\n",
-    "\n",
-    "\n",
-    "def page_to_crawled_page(page: Any) -> CrawledPage:\n",
-    "    markdown = getattr(page, \"markdown\", None)\n",
-    "    if markdown is None and isinstance(page, dict):\n",
-    "        markdown = page.get(\"markdown\")\n",
-    "\n",
-    "    metadata = getattr(page, \"metadata\", None)\n",
-    "    if metadata is None and isinstance(page, dict):\n",
-    "        metadata = page.get(\"metadata\", {})\n",
-    "\n",
-    "    url = None\n",
-    "    title = None\n",
-    "    if isinstance(metadata, dict):\n",
-    "        url = metadata.get(\"source_url\") or metadata.get(\"sourceURL\") or metadata.get(\"url\")\n",
-    "        title = metadata.get(\"title\") or metadata.get(\"og_title\")\n",
-    "    elif metadata is not None:\n",
-    "        url = getattr(metadata, \"source_url\", None) or getattr(metadata, \"sourceURL\", None) or getattr(metadata, \"url\", None)\n",
-    "        title = getattr(metadata, \"title\", None) or getattr(metadata, \"og_title\", None)\n",
-    "\n",
-    "    return CrawledPage(url=url or \"unknown\", markdown=markdown or \"\", title=title)\n",
-    "\n",
-    "\n",
-    "def crawled_pages_to_moss_docs(pages: list[CrawledPage]) -> list[DocumentInfo]:\n",
-    "    docs: list[DocumentInfo] = []\n",
-    "    for index, page in enumerate(pages, start=1):\n",
-    "        docs.append(\n",
-    "            DocumentInfo(\n",
-    "                id=f\"firecrawl-{index}\",\n",
-    "                text=page.markdown,\n",
-    "                metadata={\"source_url\": page.url, \"title\": page.title or \"\"},\n",
-    "            )\n",
-    "        )\n",
-    "    return docs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fcb41889",
-   "metadata": {},
-   "source": [
-    "## 3. Implement Main Functionality\n",
-    "\n",
-    "Firecrawl handles URL-to-markdown extraction. Moss handles indexing and semantic search."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "42c24a13",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def validate_configuration(urls: list[str]) -> None:\n",
-    "    if not urls:\n",
-    "        raise ValueError(\"Provide at least one URL to crawl.\")\n",
-    "    if not FIRECRAWL_API_KEY:\n",
-    "        raise ValueError(\"Set FIRECRAWL_API_KEY before running the notebook.\")\n",
-    "    if not MOSS_PROJECT_ID or not MOSS_PROJECT_KEY:\n",
-    "        raise ValueError(\"Set MOSS_PROJECT_ID and MOSS_PROJECT_KEY before running the notebook.\")\n",
-    "\n",
-    "\n",
-    "def crawl_urls(urls: list[str], limit: int = 3) -> list[CrawledPage]:\n",
-    "    firecrawl = Firecrawl(api_key=FIRECRAWL_API_KEY)\n",
-    "    pages: list[CrawledPage] = []\n",
-    "\n",
-    "    for url in urls:\n",
-    "        job = firecrawl.crawl(url=url, limit=limit, scrape_options={\"formats\": [\"markdown\"]})\n",
-    "        raw_pages = getattr(job, \"data\", None) or (job.get(\"data\") if isinstance(job, dict) else []) or []\n",
-    "        pages.extend(page_to_crawled_page(page) for page in raw_pages)\n",
-    "\n",
-    "    return [page for page in pages if page.markdown.strip()]\n",
-    "\n",
-    "\n",
-    "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n",
-    "    validate_configuration(urls)\n",
-    "    crawled_pages = crawl_urls(urls)\n",
-    "    documents = crawled_pages_to_moss_docs(crawled_pages)\n",
-    "\n",
-    "    if not documents:\n",
-    "        raise RuntimeError(\"Firecrawl returned no markdown content to index.\")\n",
-    "\n",
-    "    index_name = f\"firecrawl-cookbook-{uuid.uuid4().hex[:8]}\"\n",
-    "    client = MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY)\n",
-    "\n",
-    "    await client.create_index(index_name, documents)\n",
-    "    await client.load_index(index_name)\n",
-    "    results = await client.query(index_name, query, QueryOptions(top_k=5, alpha=0.8))\n",
-    "\n",
-    "    print(f\"Indexed {len(documents)} documents into {index_name}\")\n",
-    "    print(f\"Query: {query}\")\n",
-    "    for item in results.docs:\n",
-    "        source_url = item.metadata.get(\"source_url\", \"unknown\") if item.metadata else \"unknown\"\n",
-    "        print(f\"- [{item.score:.3f}] {source_url}\")\n",
-    "        print(f\"  {item.text[:200].strip()}\")\n",
-    "\n",
-    "    return None"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5e8c4a3b",
-   "metadata": {},
-   "source": [
-    "## 4. Add Input Validation and Error Handling\n",
-    "\n",
-    "Keep the notebook explicit about missing credentials and empty crawl results."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dcc9ed11",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def preview_documents(documents: list[Any]) -> None:\n",
-    "    if not documents:\n",
-    "        print(\"No documents to preview.\")\n",
-    "        return\n",
-    "\n",
-    "    print(f\"Previewing {len(documents)} documents:\")\n",
-    "    for document in documents[:3]:\n",
-    "        source_url = document.metadata.get(\"source_url\", \"unknown\") if getattr(document, \"metadata\", None) else \"unknown\"\n",
-    "        print(f\"- {document.id} from {source_url}\")\n",
-    "        print(document.text[:120].replace(\"\\n\", \" \"))\n",
-    "\n",
-    "\n",
-    "def safe_run(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n",
-    "    try:\n",
-    "        validate_configuration(urls)\n",
-    "        pages = crawl_urls(urls)\n",
-    "        documents = crawled_pages_to_moss_docs(pages)\n",
-    "        preview_documents(documents)\n",
-    "    except Exception as exc:\n",
-    "        print(f\"Validation or crawl failed: {exc}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c6892bde",
-   "metadata": {},
-   "source": [
-    "## 5. Write Unit Tests\n",
-    "\n",
-    "These lightweight tests verify the page normalization and document conversion logic without requiring live API calls."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7bd9052d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class FakePage:\n",
-    "    def __init__(self, markdown: str, source_url: str, title: str | None = None) -> None:\n",
-    "        self.markdown = markdown\n",
-    "        self.metadata = {\"source_url\": source_url, \"title\": title}\n",
-    "\n",
-    "\n",
-    "sample_page = FakePage(\"# Sample page\\n\\nThis is a test.\", \"https://example.com\", \"Example\")\n",
-    "normalized = page_to_crawled_page(sample_page)\n",
-    "assert normalized.url == \"https://example.com\"\n",
-    "assert normalized.title == \"Example\"\n",
-    "assert \"Sample page\" in normalized.markdown\n",
-    "\n",
-    "documents = crawled_pages_to_moss_docs([normalized])\n",
-    "assert documents[0].id == \"firecrawl-1\"\n",
-    "assert documents[0].metadata[\"source_url\"] == \"https://example.com\"\n",
-    "assert documents[0].text.startswith(\"# Sample page\")\n",
-    "print(\"All notebook sanity checks passed.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "45761029",
-   "metadata": {},
-   "source": [
-    "## 6. Run and Inspect Results\n",
-    "\n",
-    "After setting the API keys, call `await build_and_query_knowledge_base(urls, query)` with your own URLs and inspect the returned passages."
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 17d3bc80801e25c0e373c5af06fa9718ba68f2d9 Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy
 <56905709+AnandKrishnamoorthy1@users.noreply.github.com>
Date: Mon, 4 May 2026 23:41:39 -0400
Subject: [PATCH 4/9] Remove unused API keys

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 examples/cookbook/firecrawl/.env.example | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/examples/cookbook/firecrawl/.env.example b/examples/cookbook/firecrawl/.env.example
index d95c476c..b284a227 100644
--- a/examples/cookbook/firecrawl/.env.example
+++ b/examples/cookbook/firecrawl/.env.example
@@ -10,9 +10,3 @@ FIRECRAWL_API_KEY=your_firecrawl_api_key
 
 # Optional: default index name used by the notebook
 MOSS_INDEX_NAME=firecrawl-demo
-
-# Optional LLM keys for RAG synthesis (set one or both)
-# Set `GEMINI_API_KEY` to use Google Gemini (if you adapt the helper),
-# or set `OPENAI_API_KEY` to use OpenAI in the provided example.
-GEMINI_API_KEY=
-OPENAI_API_KEY=

From df24bd9cae84eb5962349c27f857932c7fcf4724 Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy <ananduk1993@gmail.com>
Date: Mon, 4 May 2026 23:55:08 -0400
Subject: [PATCH 5/9] Implement copilot fixes

---
 examples/cookbook/firecrawl/.env.example         | 11 +----------
 examples/cookbook/firecrawl/firecrawl_moss.ipynb |  8 ++++----
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/examples/cookbook/firecrawl/.env.example b/examples/cookbook/firecrawl/.env.example
index d95c476c..07a84e0d 100644
--- a/examples/cookbook/firecrawl/.env.example
+++ b/examples/cookbook/firecrawl/.env.example
@@ -6,13 +6,4 @@ MOSS_PROJECT_ID=your_moss_project_id
 MOSS_PROJECT_KEY=your_moss_project_key
 
 # Firecrawl API key
-FIRECRAWL_API_KEY=your_firecrawl_api_key
-
-# Optional: default index name used by the notebook
-MOSS_INDEX_NAME=firecrawl-demo
-
-# Optional LLM keys for RAG synthesis (set one or both)
-# Set `GEMINI_API_KEY` to use Google Gemini (if you adapt the helper),
-# or set `OPENAI_API_KEY` to use OpenAI in the provided example.
-GEMINI_API_KEY=
-OPENAI_API_KEY=
+FIRECRAWL_API_KEY=your_firecrawl_api_key
\ No newline at end of file
diff --git a/examples/cookbook/firecrawl/firecrawl_moss.ipynb b/examples/cookbook/firecrawl/firecrawl_moss.ipynb
index a64ae6da..06c95ec9 100644
--- a/examples/cookbook/firecrawl/firecrawl_moss.ipynb
+++ b/examples/cookbook/firecrawl/firecrawl_moss.ipynb
@@ -126,7 +126,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "42c24a13",
    "metadata": {},
    "outputs": [],
@@ -180,7 +180,7 @@
     "        print(f\"  {item.text[:200].strip()}\")\n",
     "\n",
     "\n",
-    "# Build knowledgebase and query it in one step\n",
+    "# Build knowledge base and query it in one step\n",
     "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n",
     "    client, index_name = await prepare_knowledge_base(urls)\n",
     "    await query_knowledge_base(client, index_name, query)"
@@ -286,9 +286,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:base] *",
+   "display_name": "base",
    "language": "python",
-   "name": "conda-base-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {

From 6c01c0c4fab0234b0e54674936a5d2496d42cc2f Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy <ananduk1993@gmail.com>
Date: Mon, 11 May 2026 21:18:48 -0400
Subject: [PATCH 6/9] Add pyproject.toml for firecrawl cookbook module -
 Updated README.md

---
 examples/cookbook/firecrawl/README.md      |  7 +++---
 examples/cookbook/firecrawl/pyproject.toml | 28 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 examples/cookbook/firecrawl/pyproject.toml

diff --git a/examples/cookbook/firecrawl/README.md b/examples/cookbook/firecrawl/README.md
index 04b050c3..cd2a31cc 100644
--- a/examples/cookbook/firecrawl/README.md
+++ b/examples/cookbook/firecrawl/README.md
@@ -43,18 +43,17 @@ This design lets you crawl once (which can be slow/expensive) and then iterate o
 │   URLs      │
 └──────┬──────┘
        │
-       ├──> Firecrawl (crawl + scrape)
+       |
        │
 ┌──────▼─────────────────┐
 │  Crawled Pages         │
 │  (raw HTML/markdown)   │
 └──────┬─────────────────┘
        │
-       ├──> Markdown Normalization
-       │    (clean text, remove chrome)
+       |
        │
 ┌──────▼─────────────────┐
-│  Cleaned Markdown      │
+│   Markdown             │
 │  (one DocumentInfo     │
 │   per page)            │
 └──────┬─────────────────┘
diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml
new file mode 100644
index 00000000..8fdc17ff
--- /dev/null
+++ b/examples/cookbook/firecrawl/pyproject.toml
@@ -0,0 +1,28 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "firecrawl-moss"
+version = "0.1.0"
+description = "Firecrawl integration for Moss semantic search"
+readme = "README.md"
+requires-python = ">=3.11,<3.14"
+license = { text = "Apache-2.0" }
+authors = [
+    { name = "InferEdge Inc.", email = "contact@moss.dev" }
+]
+dependencies = [
+    "firecrawl-py",
+    "moss>=1.0.0",
+    "python-dotenv",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["firecrawl_moss.py"]
+
+[tool.hatch.build.targets.sdist]
+include = [
+    "README.md",
+    ".env.example",
+]

From 485a3caec6a334b12129c51dc6dfc7d21875760b Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy <ananduk1993@gmail.com>
Date: Wed, 13 May 2026 19:58:35 -0400
Subject: [PATCH 7/9] fix: Remove wheel target and update sdist to include
 notebook

---
 examples/cookbook/firecrawl/pyproject.toml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml
index 8fdc17ff..b6c7591f 100644
--- a/examples/cookbook/firecrawl/pyproject.toml
+++ b/examples/cookbook/firecrawl/pyproject.toml
@@ -18,11 +18,9 @@ dependencies = [
     "python-dotenv",
 ]
 
-[tool.hatch.build.targets.wheel]
-packages = ["firecrawl_moss.py"]
-
 [tool.hatch.build.targets.sdist]
 include = [
     "README.md",
     ".env.example",
+    "firecrawl_moss.ipynb",
 ]

From 0c96b952d956f9b2ca19e23145e31cdbdb4c8337 Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy <ananduk1993@gmail.com>
Date: Mon, 15 Jun 2026 22:12:19 -0400
Subject: [PATCH 8/9] Update firecrawl cookbook license to BSD-2-Clause

---
 examples/cookbook/firecrawl/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml
index b6c7591f..986e2a57 100644
--- a/examples/cookbook/firecrawl/pyproject.toml
+++ b/examples/cookbook/firecrawl/pyproject.toml
@@ -8,7 +8,7 @@ version = "0.1.0"
 description = "Firecrawl integration for Moss semantic search"
 readme = "README.md"
 requires-python = ">=3.11,<3.14"
-license = { text = "Apache-2.0" }
+license = { text = "BSD-2-Clause" }
 authors = [
     { name = "InferEdge Inc.", email = "contact@moss.dev" }
 ]

From 4ea0d28ee166b97d9c514dec62c9785be5911776 Mon Sep 17 00:00:00 2001
From: Anand Krishnamoorthy <ananduk1993@gmail.com>
Date: Mon, 15 Jun 2026 22:24:53 -0400
Subject: [PATCH 9/9] fix: remove unsupported esbuild.jsx option and add
 firecrawl-py version constraint

---
 apps/next-js/vitest.config.ts              | 3 ---
 examples/cookbook/firecrawl/pyproject.toml | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/apps/next-js/vitest.config.ts b/apps/next-js/vitest.config.ts
index 2db69d1a..70d9c675 100644
--- a/apps/next-js/vitest.config.ts
+++ b/apps/next-js/vitest.config.ts
@@ -1,9 +1,6 @@
 import { defineConfig } from 'vitest/config'
 
 export default defineConfig({
-  esbuild: {
-    jsx: 'automatic',
-  },
   test: {
     environment: 'jsdom',
     globals: true,
diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml
index 986e2a57..91c4bf5e 100644
--- a/examples/cookbook/firecrawl/pyproject.toml
+++ b/examples/cookbook/firecrawl/pyproject.toml
@@ -13,7 +13,7 @@ authors = [
     { name = "InferEdge Inc.", email = "contact@moss.dev" }
 ]
 dependencies = [
-    "firecrawl-py",
+    "firecrawl-py>=1.0.0",
     "moss>=1.0.0",
     "python-dotenv",
 ]