From 750f3dd669b3a415c5a8589253658c2ef7bfbdce Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy Date: Sun, 3 May 2026 22:52:09 -0400 Subject: [PATCH 1/9] Add cookbook firecrawl --- examples/cookbook/firecrawl/.env.example | 18 + .../.ipynb_checkpoints/README-checkpoint.md | 50 +++ .../firecrawl_moss-checkpoint.ipynb | 267 +++++++++++++++ examples/cookbook/firecrawl/README.md | 50 +++ .../cookbook/firecrawl/firecrawl_moss.ipynb | 308 ++++++++++++++++++ 5 files changed, 693 insertions(+) create mode 100644 examples/cookbook/firecrawl/.env.example create mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md create mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb create mode 100644 examples/cookbook/firecrawl/README.md create mode 100644 examples/cookbook/firecrawl/firecrawl_moss.ipynb diff --git a/examples/cookbook/firecrawl/.env.example b/examples/cookbook/firecrawl/.env.example new file mode 100644 index 00000000..d95c476c --- /dev/null +++ b/examples/cookbook/firecrawl/.env.example @@ -0,0 +1,18 @@ +# Example env for Firecrawl + Moss cookbook +# Copy to .env and fill in values before running the notebook. + +# Moss credentials +MOSS_PROJECT_ID=your_moss_project_id +MOSS_PROJECT_KEY=your_moss_project_key + +# Firecrawl API key +FIRECRAWL_API_KEY=your_firecrawl_api_key + +# Optional: default index name used by the notebook +MOSS_INDEX_NAME=firecrawl-demo + +# Optional LLM keys for RAG synthesis (set one or both) +# Set `GEMINI_API_KEY` to use Google Gemini (if you adapt the helper), +# or set `OPENAI_API_KEY` to use OpenAI in the provided example. +GEMINI_API_KEY= +OPENAI_API_KEY= diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md b/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md new file mode 100644 index 00000000..123ab8d4 --- /dev/null +++ b/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md @@ -0,0 +1,50 @@ +# Firecrawl + Moss Cookbook Example + +Use Firecrawl to turn one or more URLs into clean markdown, then index the results into Moss and query them semantically from a notebook. + +> This is a cookbook example, not a packaged integration. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) to follow the full URL-to-query pipeline. + +## Installation + +```bash +pip install firecrawl-py moss python-dotenv +``` + +## Setup + +Set these environment variables in your shell or a `.env` file: + +```bash +FIRECRAWL_API_KEY=your-firecrawl-api-key +MOSS_PROJECT_ID=your-project-id +MOSS_PROJECT_KEY=your-project-key +``` + +## Quick Start + +1. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) in Jupyter or VS Code. +2. Run the setup and helper cells. +3. Set `urls` to the pages you want to ingest. +4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content. + +## What the notebook does + +```python +from firecrawl import Firecrawl +from moss import DocumentInfo, MossClient, QueryOptions + +job = Firecrawl(api_key=FIRECRAWL_API_KEY).crawl( + url="https://example.com", + limit=3, + scrape_options={"formats": ["markdown"]}, +) + +documents = [DocumentInfo(id="1", text=job.data[0].markdown, metadata={"source_url": "https://example.com"})] +await MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY).create_index("firecrawl-demo", documents) +``` + +## Files + +| File | Description | +|------|-------------| +| `firecrawl_moss.ipynb` | Notebook that crawls URLs, indexes markdown into Moss, and runs semantic search | diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb b/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb new file mode 100644 index 00000000..d47416a9 --- /dev/null +++ b/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "15050d77", + "metadata": {}, + "source": [ + "# Firecrawl + Moss Cookbook\n", + "\n", + "Crawl one or more URLs with Firecrawl, convert the results to clean markdown, index them into Moss, and query the knowledge base semantically." + ] + }, + { + "cell_type": "markdown", + "id": "ca524d0e", + "metadata": {}, + "source": [ + "## 1. Set Up Project Environment\n", + "\n", + "Install the SDKs and set your credentials before running the notebook.\n", + "\n", + "```bash\n", + "pip install firecrawl-py moss python-dotenv\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c22437b8", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import os\n", + "import uuid\n", + "from dataclasses import dataclass\n", + "from typing import Any\n", + "\n", + "from dotenv import load_dotenv\n", + "from firecrawl import Firecrawl\n", + "from moss import DocumentInfo, MossClient, QueryOptions\n", + "\n", + "load_dotenv()\n", + "\n", + "FIRECRAWL_API_KEY = os.getenv(\"FIRECRAWL_API_KEY\")\n", + "MOSS_PROJECT_ID = os.getenv(\"MOSS_PROJECT_ID\")\n", + "MOSS_PROJECT_KEY = os.getenv(\"MOSS_PROJECT_KEY\")\n", + "DEFAULT_QUERY = \"What does the knowledge base say about the topic?\"" + ] + }, + { + "cell_type": "markdown", + "id": "ce4dae7b", + "metadata": {}, + "source": [ + "## 2. Define Core Data Structures\n", + "\n", + "Normalize each crawled page into a small Python structure before converting it into Moss documents." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf5da039", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class CrawledPage:\n", + " url: str\n", + " markdown: str\n", + " title: str | None = None\n", + "\n", + "\n", + "def page_to_crawled_page(page: Any) -> CrawledPage:\n", + " markdown = getattr(page, \"markdown\", None)\n", + " if markdown is None and isinstance(page, dict):\n", + " markdown = page.get(\"markdown\")\n", + "\n", + " metadata = getattr(page, \"metadata\", None)\n", + " if metadata is None and isinstance(page, dict):\n", + " metadata = page.get(\"metadata\", {})\n", + "\n", + " url = None\n", + " title = None\n", + " if isinstance(metadata, dict):\n", + " url = metadata.get(\"source_url\") or metadata.get(\"sourceURL\") or metadata.get(\"url\")\n", + " title = metadata.get(\"title\") or metadata.get(\"og_title\")\n", + " elif metadata is not None:\n", + " url = getattr(metadata, \"source_url\", None) or getattr(metadata, \"sourceURL\", None) or getattr(metadata, \"url\", None)\n", + " title = getattr(metadata, \"title\", None) or getattr(metadata, \"og_title\", None)\n", + "\n", + " return CrawledPage(url=url or \"unknown\", markdown=markdown or \"\", title=title)\n", + "\n", + "\n", + "def crawled_pages_to_moss_docs(pages: list[CrawledPage]) -> list[DocumentInfo]:\n", + " docs: list[DocumentInfo] = []\n", + " for index, page in enumerate(pages, start=1):\n", + " docs.append(\n", + " DocumentInfo(\n", + " id=f\"firecrawl-{index}\",\n", + " text=page.markdown,\n", + " metadata={\"source_url\": page.url, \"title\": page.title or \"\"},\n", + " )\n", + " )\n", + " return docs" + ] + }, + { + "cell_type": "markdown", + "id": "fcb41889", + "metadata": {}, + "source": [ + "## 3. Implement Main Functionality\n", + "\n", + "Firecrawl handles URL-to-markdown extraction. Moss handles indexing and semantic search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42c24a13", + "metadata": {}, + "outputs": [], + "source": [ + "def validate_configuration(urls: list[str]) -> None:\n", + " if not urls:\n", + " raise ValueError(\"Provide at least one URL to crawl.\")\n", + " if not FIRECRAWL_API_KEY:\n", + " raise ValueError(\"Set FIRECRAWL_API_KEY before running the notebook.\")\n", + " if not MOSS_PROJECT_ID or not MOSS_PROJECT_KEY:\n", + " raise ValueError(\"Set MOSS_PROJECT_ID and MOSS_PROJECT_KEY before running the notebook.\")\n", + "\n", + "\n", + "def crawl_urls(urls: list[str], limit: int = 3) -> list[CrawledPage]:\n", + " firecrawl = Firecrawl(api_key=FIRECRAWL_API_KEY)\n", + " pages: list[CrawledPage] = []\n", + "\n", + " for url in urls:\n", + " job = firecrawl.crawl(url=url, limit=limit, scrape_options={\"formats\": [\"markdown\"]})\n", + " raw_pages = getattr(job, \"data\", None) or (job.get(\"data\") if isinstance(job, dict) else []) or []\n", + " pages.extend(page_to_crawled_page(page) for page in raw_pages)\n", + "\n", + " return [page for page in pages if page.markdown.strip()]\n", + "\n", + "\n", + "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n", + " validate_configuration(urls)\n", + " crawled_pages = crawl_urls(urls)\n", + " documents = crawled_pages_to_moss_docs(crawled_pages)\n", + "\n", + " if not documents:\n", + " raise RuntimeError(\"Firecrawl returned no markdown content to index.\")\n", + "\n", + " index_name = f\"firecrawl-cookbook-{uuid.uuid4().hex[:8]}\"\n", + " client = MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY)\n", + "\n", + " await client.create_index(index_name, documents)\n", + " await client.load_index(index_name)\n", + " results = await client.query(index_name, query, QueryOptions(top_k=5, alpha=0.8))\n", + "\n", + " print(f\"Indexed {len(documents)} documents into {index_name}\")\n", + " print(f\"Query: {query}\")\n", + " for item in results.docs:\n", + " source_url = item.metadata.get(\"source_url\", \"unknown\") if item.metadata else \"unknown\"\n", + " print(f\"- [{item.score:.3f}] {source_url}\")\n", + " print(f\" {item.text[:200].strip()}\")\n", + "\n", + " return None" + ] + }, + { + "cell_type": "markdown", + "id": "5e8c4a3b", + "metadata": {}, + "source": [ + "## 4. Add Input Validation and Error Handling\n", + "\n", + "Keep the notebook explicit about missing credentials and empty crawl results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcc9ed11", + "metadata": {}, + "outputs": [], + "source": [ + "def preview_documents(documents: list[Any]) -> None:\n", + " if not documents:\n", + " print(\"No documents to preview.\")\n", + " return\n", + "\n", + " print(f\"Previewing {len(documents)} documents:\")\n", + " for document in documents[:3]:\n", + " source_url = document.metadata.get(\"source_url\", \"unknown\") if getattr(document, \"metadata\", None) else \"unknown\"\n", + " print(f\"- {document.id} from {source_url}\")\n", + " print(document.text[:120].replace(\"\\n\", \" \"))\n", + "\n", + "\n", + "def safe_run(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n", + " try:\n", + " validate_configuration(urls)\n", + " pages = crawl_urls(urls)\n", + " documents = crawled_pages_to_moss_docs(pages)\n", + " preview_documents(documents)\n", + " except Exception as exc:\n", + " print(f\"Validation or crawl failed: {exc}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c6892bde", + "metadata": {}, + "source": [ + "## 5. Write Unit Tests\n", + "\n", + "These lightweight tests verify the page normalization and document conversion logic without requiring live API calls." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7bd9052d", + "metadata": {}, + "outputs": [], + "source": [ + "class FakePage:\n", + " def __init__(self, markdown: str, source_url: str, title: str | None = None) -> None:\n", + " self.markdown = markdown\n", + " self.metadata = {\"source_url\": source_url, \"title\": title}\n", + "\n", + "\n", + "sample_page = FakePage(\"# Sample page\\n\\nThis is a test.\", \"https://example.com\", \"Example\")\n", + "normalized = page_to_crawled_page(sample_page)\n", + "assert normalized.url == \"https://example.com\"\n", + "assert normalized.title == \"Example\"\n", + "assert \"Sample page\" in normalized.markdown\n", + "\n", + "documents = crawled_pages_to_moss_docs([normalized])\n", + "assert documents[0].id == \"firecrawl-1\"\n", + "assert documents[0].metadata[\"source_url\"] == \"https://example.com\"\n", + "assert documents[0].text.startswith(\"# Sample page\")\n", + "print(\"All notebook sanity checks passed.\")" + ] + }, + { + "cell_type": "markdown", + "id": "45761029", + "metadata": {}, + "source": [ + "## 6. Run and Inspect Results\n", + "\n", + "After setting the API keys, call `await build_and_query_knowledge_base(urls, query)` with your own URLs and inspect the returned passages." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/cookbook/firecrawl/README.md b/examples/cookbook/firecrawl/README.md new file mode 100644 index 00000000..123ab8d4 --- /dev/null +++ b/examples/cookbook/firecrawl/README.md @@ -0,0 +1,50 @@ +# Firecrawl + Moss Cookbook Example + +Use Firecrawl to turn one or more URLs into clean markdown, then index the results into Moss and query them semantically from a notebook. + +> This is a cookbook example, not a packaged integration. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) to follow the full URL-to-query pipeline. + +## Installation + +```bash +pip install firecrawl-py moss python-dotenv +``` + +## Setup + +Set these environment variables in your shell or a `.env` file: + +```bash +FIRECRAWL_API_KEY=your-firecrawl-api-key +MOSS_PROJECT_ID=your-project-id +MOSS_PROJECT_KEY=your-project-key +``` + +## Quick Start + +1. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) in Jupyter or VS Code. +2. Run the setup and helper cells. +3. Set `urls` to the pages you want to ingest. +4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content. + +## What the notebook does + +```python +from firecrawl import Firecrawl +from moss import DocumentInfo, MossClient, QueryOptions + +job = Firecrawl(api_key=FIRECRAWL_API_KEY).crawl( + url="https://example.com", + limit=3, + scrape_options={"formats": ["markdown"]}, +) + +documents = [DocumentInfo(id="1", text=job.data[0].markdown, metadata={"source_url": "https://example.com"})] +await MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY).create_index("firecrawl-demo", documents) +``` + +## Files + +| File | Description | +|------|-------------| +| `firecrawl_moss.ipynb` | Notebook that crawls URLs, indexes markdown into Moss, and runs semantic search | diff --git a/examples/cookbook/firecrawl/firecrawl_moss.ipynb b/examples/cookbook/firecrawl/firecrawl_moss.ipynb new file mode 100644 index 00000000..a64ae6da --- /dev/null +++ b/examples/cookbook/firecrawl/firecrawl_moss.ipynb @@ -0,0 +1,308 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "15050d77", + "metadata": {}, + "source": [ + "# Firecrawl + Moss Cookbook\n", + "\n", + "Crawl one or more URLs with Firecrawl, convert the results to clean markdown, index them into Moss, and query the knowledge base semantically." + ] + }, + { + "cell_type": "markdown", + "id": "ca524d0e", + "metadata": {}, + "source": [ + "## 1. Set Up Project Environment\n", + "\n", + "Install the SDKs and set your credentials before running the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6da4124d", + "metadata": {}, + "outputs": [], + "source": [ + "#pip install firecrawl-py moss python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c22437b8", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import os\n", + "import uuid\n", + "from dataclasses import dataclass\n", + "from typing import Any\n", + "\n", + "from dotenv import load_dotenv\n", + "from firecrawl import Firecrawl\n", + "from moss import DocumentInfo, MossClient, QueryOptions\n", + "\n", + "load_dotenv()\n", + "\n", + "FIRECRAWL_API_KEY = os.getenv(\"FIRECRAWL_API_KEY\")\n", + "MOSS_PROJECT_ID = os.getenv(\"MOSS_PROJECT_ID\")\n", + "MOSS_PROJECT_KEY = os.getenv(\"MOSS_PROJECT_KEY\")\n", + "DEFAULT_QUERY = \"What does the knowledge base say about the topic?\"" + ] + }, + { + "cell_type": "markdown", + "id": "ce4dae7b", + "metadata": {}, + "source": [ + "## 2. Define Core Data Structures\n", + "\n", + "Normalize each crawled page into a small Python structure before converting it into Moss documents." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bf5da039", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class CrawledPage:\n", + " url: str\n", + " markdown: str\n", + " title: str | None = None\n", + "\n", + "\n", + "def page_to_crawled_page(page: Any) -> CrawledPage:\n", + " markdown = getattr(page, \"markdown\", None)\n", + " if markdown is None and isinstance(page, dict):\n", + " markdown = page.get(\"markdown\")\n", + "\n", + " metadata = getattr(page, \"metadata\", None)\n", + " if metadata is None and isinstance(page, dict):\n", + " metadata = page.get(\"metadata\", {})\n", + "\n", + " url = None\n", + " title = None\n", + " if isinstance(metadata, dict):\n", + " url = metadata.get(\"source_url\") or metadata.get(\"sourceURL\") or metadata.get(\"url\")\n", + " title = metadata.get(\"title\") or metadata.get(\"og_title\")\n", + " elif metadata is not None:\n", + " url = getattr(metadata, \"source_url\", None) or getattr(metadata, \"sourceURL\", None) or getattr(metadata, \"url\", None)\n", + " title = getattr(metadata, \"title\", None) or getattr(metadata, \"og_title\", None)\n", + "\n", + " return CrawledPage(url=url or \"unknown\", markdown=markdown or \"\", title=title)\n", + "\n", + "\n", + "def crawled_pages_to_moss_docs(pages: list[CrawledPage]) -> list[DocumentInfo]:\n", + " docs: list[DocumentInfo] = []\n", + " for index, page in enumerate(pages, start=1):\n", + " docs.append(\n", + " DocumentInfo(\n", + " id=f\"firecrawl-{index}\",\n", + " text=page.markdown,\n", + " metadata={\"source_url\": page.url, \"title\": page.title or \"\"},\n", + " )\n", + " )\n", + " return docs" + ] + }, + { + "cell_type": "markdown", + "id": "fcb41889", + "metadata": {}, + "source": [ + "## 3. Implement Main Functionality\n", + "\n", + "Firecrawl handles URL-to-markdown extraction. Moss handles indexing and semantic search." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "42c24a13", + "metadata": {}, + "outputs": [], + "source": [ + "def validate_configuration(urls: list[str]) -> None:\n", + " if not urls:\n", + " raise ValueError(\"Provide at least one URL to crawl.\")\n", + " if not FIRECRAWL_API_KEY:\n", + " raise ValueError(\"Set FIRECRAWL_API_KEY before running the notebook.\")\n", + " if not MOSS_PROJECT_ID or not MOSS_PROJECT_KEY:\n", + " raise ValueError(\"Set MOSS_PROJECT_ID and MOSS_PROJECT_KEY before running the notebook.\")\n", + "\n", + "\n", + "def crawl_urls(urls: list[str], limit: int = 3) -> list[CrawledPage]:\n", + " firecrawl = Firecrawl(api_key=FIRECRAWL_API_KEY)\n", + " pages: list[CrawledPage] = []\n", + "\n", + " for url in urls:\n", + " job = firecrawl.crawl(url=url, limit=limit, scrape_options={\"formats\": [\"markdown\"]})\n", + " raw_pages = getattr(job, \"data\", None) or (job.get(\"data\") if isinstance(job, dict) else []) or []\n", + " pages.extend(page_to_crawled_page(page) for page in raw_pages)\n", + "\n", + " return [page for page in pages if page.markdown.strip()]\n", + "\n", + "\n", + "async def prepare_knowledge_base(urls: list[str], limit: int = 10) -> tuple[MossClient, str]:\n", + " validate_configuration(urls)\n", + " crawled_pages = crawl_urls(urls, limit=limit)\n", + " documents = crawled_pages_to_moss_docs(crawled_pages)\n", + "\n", + " if not documents:\n", + " raise RuntimeError(\"Firecrawl returned no markdown content to index.\")\n", + "\n", + " index_name = f\"firecrawl-cookbook-{uuid.uuid4().hex[:8]}\"\n", + " client = MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY)\n", + "\n", + " await client.create_index(index_name, documents)\n", + " await client.load_index(index_name)\n", + "\n", + " print(f\"Indexed {len(documents)} documents into {index_name}\")\n", + " return client, index_name\n", + "\n", + "\n", + "async def query_knowledge_base(client: MossClient, index_name: str, query: str = DEFAULT_QUERY) -> None:\n", + " results = await client.query(index_name, query, QueryOptions(top_k=3, alpha=0.8))\n", + "\n", + " print(f\"Query: {query}\")\n", + " for item in results.docs:\n", + " source_url = item.metadata.get(\"source_url\", \"unknown\") if item.metadata else \"unknown\"\n", + " print(f\"- [{item.score:.3f}] {source_url}\")\n", + " print(f\" {item.text[:200].strip()}\")\n", + "\n", + "\n", + "# Build knowledgebase and query it in one step\n", + "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n", + " client, index_name = await prepare_knowledge_base(urls)\n", + " await query_knowledge_base(client, index_name, query)" + ] + }, + { + "cell_type": "markdown", + "id": "b47066ee", + "metadata": {}, + "source": [ + "## 4. Full Firecrawl + Moss Test (Crawl, Index, and Query)\n", + "\n", + "\n", + "Enter URLs and a question to run end-to-end Firecrawl ingestion and Moss semantic search." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bb2790da", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Indexed 10 documents into firecrawl-cookbook-af681b7b\n" + ] + } + ], + "source": [ + "urls = [\"https://docs.moss.dev\"]\n", + "\n", + "# Crawl + index once\n", + "client, index_name = await prepare_knowledge_base(urls)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1bfe1d30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query: What is Moss used for?\n", + "- [1.000] https://docs.moss.dev/docs/start/what-is-moss\n", + " [Skip to main content](https://docs.moss.dev/docs/start/what-is-moss#content-area)\n", + "\n", + "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/moss-wordmark-light.svg?fi\n", + "- [0.939] https://docs.moss.dev/docs/api-reference/v1/getting-started/introduction\n", + " [Skip to main content](https://docs.moss.dev/docs/api-reference/v1/getting-started/introduction#content-area)\n", + "\n", + "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo\n", + "- [0.912] https://docs.moss.dev/docs/reference/python/interfaces/JobStatus\n", + " [Skip to main content](https://docs.moss.dev/docs/reference/python/interfaces/JobStatus#content-area)\n", + "\n", + "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/moss-wo\n" + ] + } + ], + "source": [ + "# Query multiple times without crawling again\n", + "await query_knowledge_base(client, index_name, \"What is Moss used for?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6956e7a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query: What evidence in the docs supports the claim of sub-10 ms search, and what assumptions or caveats should an engineering team validate before adoption?\n", + "- [0.952] https://docs.moss.dev/docs/start/what-is-moss\n", + " [Skip to main content](https://docs.moss.dev/docs/start/what-is-moss#content-area)\n", + "\n", + "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/moss-wordmark-light.svg?fi\n", + "- [0.907] https://docs.moss.dev/docs/api-reference/v1/document-operations/getDocs\n", + " [Skip to main content](https://docs.moss.dev/docs/api-reference/v1/document-operations/getDocs#content-area)\n", + "\n", + "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/logo/\n", + "- [0.891] https://docs.moss.dev/docs/api-reference/v1/document-operations/deleteDocs\n", + " [Skip to main content](https://docs.moss.dev/docs/api-reference/v1/document-operations/deleteDocs#content-area)\n", + "\n", + "[Moss Docs home page![light logo](https://mintcdn.com/moss-afcfb0b6/b460p8xEydp14WML/lo\n" + ] + } + ], + "source": [ + "await query_knowledge_base(\n", + " client,\n", + " index_name,\n", + " \"What evidence in the docs supports the claim of sub-10 ms search, and what assumptions or caveats should an engineering team validate before adoption?\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 32fb7a4d859a183270b0d4b9de9cc3876f4d087f Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy Date: Sun, 3 May 2026 23:05:23 -0400 Subject: [PATCH 2/9] Add Moss+firecrawl architecture --- examples/cookbook/firecrawl/README.md | 48 +++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/examples/cookbook/firecrawl/README.md b/examples/cookbook/firecrawl/README.md index 123ab8d4..04b050c3 100644 --- a/examples/cookbook/firecrawl/README.md +++ b/examples/cookbook/firecrawl/README.md @@ -27,6 +27,54 @@ MOSS_PROJECT_KEY=your-project-key 3. Set `urls` to the pages you want to ingest. 4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content. +## Workflow + +The notebook is structured for efficiency: + +1. **Prepare** (one-time): Crawl URLs → normalize markdown → index into Moss +2. **Query** (repeated): Run semantic queries against the indexed knowledge base without re-crawling + +This design lets you crawl once (which can be slow/expensive) and then iterate on queries quickly. + +## Architecture + +``` +┌─────────────┐ +│ URLs │ +└──────┬──────┘ + │ + ├──> Firecrawl (crawl + scrape) + │ +┌──────▼─────────────────┐ +│ Crawled Pages │ +│ (raw HTML/markdown) │ +└──────┬─────────────────┘ + │ + ├──> Markdown Normalization + │ (clean text, remove chrome) + │ +┌──────▼─────────────────┐ +│ Cleaned Markdown │ +│ (one DocumentInfo │ +│ per page) │ +└──────┬─────────────────┘ + │ + ├──> Moss Create Index + │ +┌──────▼─────────────────┐ +│ Indexed Knowledge │ +│ Base (local or cloud) │ +└──────┬─────────────────┘ + │ + ├──> Semantic Query (reusable) + │ (no re-crawling needed) + │ +┌──────▼─────────────────┐ +│ Top-K Results │ +│ (scored passages) │ +└─────────────────────────┘ +``` + ## What the notebook does ```python From 02db2ef460c8e164a5c340919163f9bc54dff188 Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy <56905709+AnandKrishnamoorthy1@users.noreply.github.com> Date: Sun, 3 May 2026 23:25:03 -0400 Subject: [PATCH 3/9] Delete firecrawl/.ipynb_checkpoints directory --- .../.ipynb_checkpoints/README-checkpoint.md | 50 ---- .../firecrawl_moss-checkpoint.ipynb | 267 ------------------ 2 files changed, 317 deletions(-) delete mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md delete mode 100644 examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md b/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md deleted file mode 100644 index 123ab8d4..00000000 --- a/examples/cookbook/firecrawl/.ipynb_checkpoints/README-checkpoint.md +++ /dev/null @@ -1,50 +0,0 @@ -# Firecrawl + Moss Cookbook Example - -Use Firecrawl to turn one or more URLs into clean markdown, then index the results into Moss and query them semantically from a notebook. - -> This is a cookbook example, not a packaged integration. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) to follow the full URL-to-query pipeline. - -## Installation - -```bash -pip install firecrawl-py moss python-dotenv -``` - -## Setup - -Set these environment variables in your shell or a `.env` file: - -```bash -FIRECRAWL_API_KEY=your-firecrawl-api-key -MOSS_PROJECT_ID=your-project-id -MOSS_PROJECT_KEY=your-project-key -``` - -## Quick Start - -1. Open [firecrawl_moss.ipynb](firecrawl_moss.ipynb) in Jupyter or VS Code. -2. Run the setup and helper cells. -3. Set `urls` to the pages you want to ingest. -4. Run `await build_and_query_knowledge_base(urls)` to crawl, index, and query the content. - -## What the notebook does - -```python -from firecrawl import Firecrawl -from moss import DocumentInfo, MossClient, QueryOptions - -job = Firecrawl(api_key=FIRECRAWL_API_KEY).crawl( - url="https://example.com", - limit=3, - scrape_options={"formats": ["markdown"]}, -) - -documents = [DocumentInfo(id="1", text=job.data[0].markdown, metadata={"source_url": "https://example.com"})] -await MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY).create_index("firecrawl-demo", documents) -``` - -## Files - -| File | Description | -|------|-------------| -| `firecrawl_moss.ipynb` | Notebook that crawls URLs, indexes markdown into Moss, and runs semantic search | diff --git a/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb b/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb deleted file mode 100644 index d47416a9..00000000 --- a/examples/cookbook/firecrawl/.ipynb_checkpoints/firecrawl_moss-checkpoint.ipynb +++ /dev/null @@ -1,267 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "15050d77", - "metadata": {}, - "source": [ - "# Firecrawl + Moss Cookbook\n", - "\n", - "Crawl one or more URLs with Firecrawl, convert the results to clean markdown, index them into Moss, and query the knowledge base semantically." - ] - }, - { - "cell_type": "markdown", - "id": "ca524d0e", - "metadata": {}, - "source": [ - "## 1. Set Up Project Environment\n", - "\n", - "Install the SDKs and set your credentials before running the notebook.\n", - "\n", - "```bash\n", - "pip install firecrawl-py moss python-dotenv\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c22437b8", - "metadata": {}, - "outputs": [], - "source": [ - "from __future__ import annotations\n", - "\n", - "import os\n", - "import uuid\n", - "from dataclasses import dataclass\n", - "from typing import Any\n", - "\n", - "from dotenv import load_dotenv\n", - "from firecrawl import Firecrawl\n", - "from moss import DocumentInfo, MossClient, QueryOptions\n", - "\n", - "load_dotenv()\n", - "\n", - "FIRECRAWL_API_KEY = os.getenv(\"FIRECRAWL_API_KEY\")\n", - "MOSS_PROJECT_ID = os.getenv(\"MOSS_PROJECT_ID\")\n", - "MOSS_PROJECT_KEY = os.getenv(\"MOSS_PROJECT_KEY\")\n", - "DEFAULT_QUERY = \"What does the knowledge base say about the topic?\"" - ] - }, - { - "cell_type": "markdown", - "id": "ce4dae7b", - "metadata": {}, - "source": [ - "## 2. Define Core Data Structures\n", - "\n", - "Normalize each crawled page into a small Python structure before converting it into Moss documents." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf5da039", - "metadata": {}, - "outputs": [], - "source": [ - "@dataclass\n", - "class CrawledPage:\n", - " url: str\n", - " markdown: str\n", - " title: str | None = None\n", - "\n", - "\n", - "def page_to_crawled_page(page: Any) -> CrawledPage:\n", - " markdown = getattr(page, \"markdown\", None)\n", - " if markdown is None and isinstance(page, dict):\n", - " markdown = page.get(\"markdown\")\n", - "\n", - " metadata = getattr(page, \"metadata\", None)\n", - " if metadata is None and isinstance(page, dict):\n", - " metadata = page.get(\"metadata\", {})\n", - "\n", - " url = None\n", - " title = None\n", - " if isinstance(metadata, dict):\n", - " url = metadata.get(\"source_url\") or metadata.get(\"sourceURL\") or metadata.get(\"url\")\n", - " title = metadata.get(\"title\") or metadata.get(\"og_title\")\n", - " elif metadata is not None:\n", - " url = getattr(metadata, \"source_url\", None) or getattr(metadata, \"sourceURL\", None) or getattr(metadata, \"url\", None)\n", - " title = getattr(metadata, \"title\", None) or getattr(metadata, \"og_title\", None)\n", - "\n", - " return CrawledPage(url=url or \"unknown\", markdown=markdown or \"\", title=title)\n", - "\n", - "\n", - "def crawled_pages_to_moss_docs(pages: list[CrawledPage]) -> list[DocumentInfo]:\n", - " docs: list[DocumentInfo] = []\n", - " for index, page in enumerate(pages, start=1):\n", - " docs.append(\n", - " DocumentInfo(\n", - " id=f\"firecrawl-{index}\",\n", - " text=page.markdown,\n", - " metadata={\"source_url\": page.url, \"title\": page.title or \"\"},\n", - " )\n", - " )\n", - " return docs" - ] - }, - { - "cell_type": "markdown", - "id": "fcb41889", - "metadata": {}, - "source": [ - "## 3. Implement Main Functionality\n", - "\n", - "Firecrawl handles URL-to-markdown extraction. Moss handles indexing and semantic search." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "42c24a13", - "metadata": {}, - "outputs": [], - "source": [ - "def validate_configuration(urls: list[str]) -> None:\n", - " if not urls:\n", - " raise ValueError(\"Provide at least one URL to crawl.\")\n", - " if not FIRECRAWL_API_KEY:\n", - " raise ValueError(\"Set FIRECRAWL_API_KEY before running the notebook.\")\n", - " if not MOSS_PROJECT_ID or not MOSS_PROJECT_KEY:\n", - " raise ValueError(\"Set MOSS_PROJECT_ID and MOSS_PROJECT_KEY before running the notebook.\")\n", - "\n", - "\n", - "def crawl_urls(urls: list[str], limit: int = 3) -> list[CrawledPage]:\n", - " firecrawl = Firecrawl(api_key=FIRECRAWL_API_KEY)\n", - " pages: list[CrawledPage] = []\n", - "\n", - " for url in urls:\n", - " job = firecrawl.crawl(url=url, limit=limit, scrape_options={\"formats\": [\"markdown\"]})\n", - " raw_pages = getattr(job, \"data\", None) or (job.get(\"data\") if isinstance(job, dict) else []) or []\n", - " pages.extend(page_to_crawled_page(page) for page in raw_pages)\n", - "\n", - " return [page for page in pages if page.markdown.strip()]\n", - "\n", - "\n", - "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n", - " validate_configuration(urls)\n", - " crawled_pages = crawl_urls(urls)\n", - " documents = crawled_pages_to_moss_docs(crawled_pages)\n", - "\n", - " if not documents:\n", - " raise RuntimeError(\"Firecrawl returned no markdown content to index.\")\n", - "\n", - " index_name = f\"firecrawl-cookbook-{uuid.uuid4().hex[:8]}\"\n", - " client = MossClient(MOSS_PROJECT_ID, MOSS_PROJECT_KEY)\n", - "\n", - " await client.create_index(index_name, documents)\n", - " await client.load_index(index_name)\n", - " results = await client.query(index_name, query, QueryOptions(top_k=5, alpha=0.8))\n", - "\n", - " print(f\"Indexed {len(documents)} documents into {index_name}\")\n", - " print(f\"Query: {query}\")\n", - " for item in results.docs:\n", - " source_url = item.metadata.get(\"source_url\", \"unknown\") if item.metadata else \"unknown\"\n", - " print(f\"- [{item.score:.3f}] {source_url}\")\n", - " print(f\" {item.text[:200].strip()}\")\n", - "\n", - " return None" - ] - }, - { - "cell_type": "markdown", - "id": "5e8c4a3b", - "metadata": {}, - "source": [ - "## 4. Add Input Validation and Error Handling\n", - "\n", - "Keep the notebook explicit about missing credentials and empty crawl results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dcc9ed11", - "metadata": {}, - "outputs": [], - "source": [ - "def preview_documents(documents: list[Any]) -> None:\n", - " if not documents:\n", - " print(\"No documents to preview.\")\n", - " return\n", - "\n", - " print(f\"Previewing {len(documents)} documents:\")\n", - " for document in documents[:3]:\n", - " source_url = document.metadata.get(\"source_url\", \"unknown\") if getattr(document, \"metadata\", None) else \"unknown\"\n", - " print(f\"- {document.id} from {source_url}\")\n", - " print(document.text[:120].replace(\"\\n\", \" \"))\n", - "\n", - "\n", - "def safe_run(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n", - " try:\n", - " validate_configuration(urls)\n", - " pages = crawl_urls(urls)\n", - " documents = crawled_pages_to_moss_docs(pages)\n", - " preview_documents(documents)\n", - " except Exception as exc:\n", - " print(f\"Validation or crawl failed: {exc}\")" - ] - }, - { - "cell_type": "markdown", - "id": "c6892bde", - "metadata": {}, - "source": [ - "## 5. Write Unit Tests\n", - "\n", - "These lightweight tests verify the page normalization and document conversion logic without requiring live API calls." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7bd9052d", - "metadata": {}, - "outputs": [], - "source": [ - "class FakePage:\n", - " def __init__(self, markdown: str, source_url: str, title: str | None = None) -> None:\n", - " self.markdown = markdown\n", - " self.metadata = {\"source_url\": source_url, \"title\": title}\n", - "\n", - "\n", - "sample_page = FakePage(\"# Sample page\\n\\nThis is a test.\", \"https://example.com\", \"Example\")\n", - "normalized = page_to_crawled_page(sample_page)\n", - "assert normalized.url == \"https://example.com\"\n", - "assert normalized.title == \"Example\"\n", - "assert \"Sample page\" in normalized.markdown\n", - "\n", - "documents = crawled_pages_to_moss_docs([normalized])\n", - "assert documents[0].id == \"firecrawl-1\"\n", - "assert documents[0].metadata[\"source_url\"] == \"https://example.com\"\n", - "assert documents[0].text.startswith(\"# Sample page\")\n", - "print(\"All notebook sanity checks passed.\")" - ] - }, - { - "cell_type": "markdown", - "id": "45761029", - "metadata": {}, - "source": [ - "## 6. Run and Inspect Results\n", - "\n", - "After setting the API keys, call `await build_and_query_knowledge_base(urls, query)` with your own URLs and inspect the returned passages." - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 17d3bc80801e25c0e373c5af06fa9718ba68f2d9 Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy <56905709+AnandKrishnamoorthy1@users.noreply.github.com> Date: Mon, 4 May 2026 23:41:39 -0400 Subject: [PATCH 4/9] Remove unused API keys Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- examples/cookbook/firecrawl/.env.example | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/cookbook/firecrawl/.env.example b/examples/cookbook/firecrawl/.env.example index d95c476c..b284a227 100644 --- a/examples/cookbook/firecrawl/.env.example +++ b/examples/cookbook/firecrawl/.env.example @@ -10,9 +10,3 @@ FIRECRAWL_API_KEY=your_firecrawl_api_key # Optional: default index name used by the notebook MOSS_INDEX_NAME=firecrawl-demo - -# Optional LLM keys for RAG synthesis (set one or both) -# Set `GEMINI_API_KEY` to use Google Gemini (if you adapt the helper), -# or set `OPENAI_API_KEY` to use OpenAI in the provided example. -GEMINI_API_KEY= -OPENAI_API_KEY= From df24bd9cae84eb5962349c27f857932c7fcf4724 Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy Date: Mon, 4 May 2026 23:55:08 -0400 Subject: [PATCH 5/9] Implement copilot fixes --- examples/cookbook/firecrawl/.env.example | 11 +---------- examples/cookbook/firecrawl/firecrawl_moss.ipynb | 8 ++++---- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/examples/cookbook/firecrawl/.env.example b/examples/cookbook/firecrawl/.env.example index d95c476c..07a84e0d 100644 --- a/examples/cookbook/firecrawl/.env.example +++ b/examples/cookbook/firecrawl/.env.example @@ -6,13 +6,4 @@ MOSS_PROJECT_ID=your_moss_project_id MOSS_PROJECT_KEY=your_moss_project_key # Firecrawl API key -FIRECRAWL_API_KEY=your_firecrawl_api_key - -# Optional: default index name used by the notebook -MOSS_INDEX_NAME=firecrawl-demo - -# Optional LLM keys for RAG synthesis (set one or both) -# Set `GEMINI_API_KEY` to use Google Gemini (if you adapt the helper), -# or set `OPENAI_API_KEY` to use OpenAI in the provided example. -GEMINI_API_KEY= -OPENAI_API_KEY= +FIRECRAWL_API_KEY=your_firecrawl_api_key \ No newline at end of file diff --git a/examples/cookbook/firecrawl/firecrawl_moss.ipynb b/examples/cookbook/firecrawl/firecrawl_moss.ipynb index a64ae6da..06c95ec9 100644 --- a/examples/cookbook/firecrawl/firecrawl_moss.ipynb +++ b/examples/cookbook/firecrawl/firecrawl_moss.ipynb @@ -126,7 +126,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "42c24a13", "metadata": {}, "outputs": [], @@ -180,7 +180,7 @@ " print(f\" {item.text[:200].strip()}\")\n", "\n", "\n", - "# Build knowledgebase and query it in one step\n", + "# Build knowledge base and query it in one step\n", "async def build_and_query_knowledge_base(urls: list[str], query: str = DEFAULT_QUERY) -> None:\n", " client, index_name = await prepare_knowledge_base(urls)\n", " await query_knowledge_base(client, index_name, query)" @@ -286,9 +286,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:base] *", + "display_name": "base", "language": "python", - "name": "conda-base-py" + "name": "python3" }, "language_info": { "codemirror_mode": { From 6c01c0c4fab0234b0e54674936a5d2496d42cc2f Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy Date: Mon, 11 May 2026 21:18:48 -0400 Subject: [PATCH 6/9] Add pyproject.toml for firecrawl cookbook module - Updated README.md --- examples/cookbook/firecrawl/README.md | 7 +++--- examples/cookbook/firecrawl/pyproject.toml | 28 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 examples/cookbook/firecrawl/pyproject.toml diff --git a/examples/cookbook/firecrawl/README.md b/examples/cookbook/firecrawl/README.md index 04b050c3..cd2a31cc 100644 --- a/examples/cookbook/firecrawl/README.md +++ b/examples/cookbook/firecrawl/README.md @@ -43,18 +43,17 @@ This design lets you crawl once (which can be slow/expensive) and then iterate o │ URLs │ └──────┬──────┘ │ - ├──> Firecrawl (crawl + scrape) + | │ ┌──────▼─────────────────┐ │ Crawled Pages │ │ (raw HTML/markdown) │ └──────┬─────────────────┘ │ - ├──> Markdown Normalization - │ (clean text, remove chrome) + | │ ┌──────▼─────────────────┐ -│ Cleaned Markdown │ +│ Markdown │ │ (one DocumentInfo │ │ per page) │ └──────┬─────────────────┘ diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml new file mode 100644 index 00000000..8fdc17ff --- /dev/null +++ b/examples/cookbook/firecrawl/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "firecrawl-moss" +version = "0.1.0" +description = "Firecrawl integration for Moss semantic search" +readme = "README.md" +requires-python = ">=3.11,<3.14" +license = { text = "Apache-2.0" } +authors = [ + { name = "InferEdge Inc.", email = "contact@moss.dev" } +] +dependencies = [ + "firecrawl-py", + "moss>=1.0.0", + "python-dotenv", +] + +[tool.hatch.build.targets.wheel] +packages = ["firecrawl_moss.py"] + +[tool.hatch.build.targets.sdist] +include = [ + "README.md", + ".env.example", +] From 485a3caec6a334b12129c51dc6dfc7d21875760b Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy Date: Wed, 13 May 2026 19:58:35 -0400 Subject: [PATCH 7/9] fix: Remove wheel target and update sdist to include notebook --- examples/cookbook/firecrawl/pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml index 8fdc17ff..b6c7591f 100644 --- a/examples/cookbook/firecrawl/pyproject.toml +++ b/examples/cookbook/firecrawl/pyproject.toml @@ -18,11 +18,9 @@ dependencies = [ "python-dotenv", ] -[tool.hatch.build.targets.wheel] -packages = ["firecrawl_moss.py"] - [tool.hatch.build.targets.sdist] include = [ "README.md", ".env.example", + "firecrawl_moss.ipynb", ] From 0c96b952d956f9b2ca19e23145e31cdbdb4c8337 Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy Date: Mon, 15 Jun 2026 22:12:19 -0400 Subject: [PATCH 8/9] Update firecrawl cookbook license to BSD-2-Clause --- examples/cookbook/firecrawl/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml index b6c7591f..986e2a57 100644 --- a/examples/cookbook/firecrawl/pyproject.toml +++ b/examples/cookbook/firecrawl/pyproject.toml @@ -8,7 +8,7 @@ version = "0.1.0" description = "Firecrawl integration for Moss semantic search" readme = "README.md" requires-python = ">=3.11,<3.14" -license = { text = "Apache-2.0" } +license = { text = "BSD-2-Clause" } authors = [ { name = "InferEdge Inc.", email = "contact@moss.dev" } ] From 4ea0d28ee166b97d9c514dec62c9785be5911776 Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthy Date: Mon, 15 Jun 2026 22:24:53 -0400 Subject: [PATCH 9/9] fix: remove unsupported esbuild.jsx option and add firecrawl-py version constraint --- apps/next-js/vitest.config.ts | 3 --- examples/cookbook/firecrawl/pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/apps/next-js/vitest.config.ts b/apps/next-js/vitest.config.ts index 2db69d1a..70d9c675 100644 --- a/apps/next-js/vitest.config.ts +++ b/apps/next-js/vitest.config.ts @@ -1,9 +1,6 @@ import { defineConfig } from 'vitest/config' export default defineConfig({ - esbuild: { - jsx: 'automatic', - }, test: { environment: 'jsdom', globals: true, diff --git a/examples/cookbook/firecrawl/pyproject.toml b/examples/cookbook/firecrawl/pyproject.toml index 986e2a57..91c4bf5e 100644 --- a/examples/cookbook/firecrawl/pyproject.toml +++ b/examples/cookbook/firecrawl/pyproject.toml @@ -13,7 +13,7 @@ authors = [ { name = "InferEdge Inc.", email = "contact@moss.dev" } ] dependencies = [ - "firecrawl-py", + "firecrawl-py>=1.0.0", "moss>=1.0.0", "python-dotenv", ]