From 7d5258668491bf979edb4b659e39b6709db61c22 Mon Sep 17 00:00:00 2001
From: Pranav Shashidhara <pranavmay22@gmail.com>
Date: Mon, 20 Apr 2026 12:32:30 -0400
Subject: [PATCH 1/5] Update README.md and MCP server
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 tutorials/NanoClaw/README.md             |   59 ++
 tutorials/NanoClaw/hpandas_mcp_server.py | 1065 ++++++++++++++++++++++
 2 files changed, 1124 insertions(+)
 create mode 100644 tutorials/NanoClaw/README.md
 create mode 100644 tutorials/NanoClaw/hpandas_mcp_server.py

diff --git a/tutorials/NanoClaw/README.md b/tutorials/NanoClaw/README.md
new file mode 100644
index 000000000..520c3723f
--- /dev/null
+++ b/tutorials/NanoClaw/README.md
@@ -0,0 +1,59 @@
+# hpandas MCP Server
+
+## Overview
+
+This project exposes a collection of pandas-based data processing utilities as **MCP (Model Context Protocol) tools**. The goal is to make structured data operations accessible to LLM agents in a controlled, observable, and reusable way.
+
+This repository is part of an ongoing effort to integrate **NanoClaw agents with external MCP tool servers**, enabling agents to reason about data while delegating execution to a well-defined tool layer.
+
+## Motivation
+
+Large language models are effective at reasoning about tasks, but they should not directly execute arbitrary code or access raw datasets. This project separates concerns:
+
+- The **agent (NanoClaw / Claude)** decides what operations to perform  
+- The **MCP server (this project)** executes those operations  
+- The **container environment (Docker / NanoClaw runtime)** enforces isolation  
+
+This architecture improves safety, reproducibility, and transparency while enabling complex workflows over structured data.
+
+## What This Server Provides
+
+The server exposes a wide range of DataFrame operations as MCP tools, including:
+
+### Data Loading and I/O
+- `read_csv`, `read_parquet`
+- `write_csv`, `write_parquet`
+
+### Cleaning and Transformation
+- `dropna`, `drop_duplicates`, `remove_outliers`
+- `filter_df`, `merge_dfs`, `trim_df`, `resample_df`
+
+### Analysis
+- `describe_df`, `rolling_corr_over_time`
+- `print_column_variability`
+
+### Validation and Checks
+- Index and schema validation tools
+- DataFrame comparison utilities
+
+### Utilities
+- DataFrame ↔ JSON conversion
+- Sampling, formatting, and column resolution
+
+All DataFrames are passed as JSON strings to ensure compatibility with LLM tool interfaces.
+
+## Architecture
+NanoClaw Agent (LLM planner)  
+↓  
+MCP Client (tool invocation layer)  
+↓  
+hpandas_mcp_server (this project)  
+↓  
+pandas / numpy execution  
+
+The agent does not directly manipulate data. Instead, it issues structured tool calls, which are executed by the MCP server and returned as structured outputs.
+
+## Running the Server
+
+```bash
+python hpandas_mcp_server.py
\ No newline at end of file
diff --git a/tutorials/NanoClaw/hpandas_mcp_server.py b/tutorials/NanoClaw/hpandas_mcp_server.py
new file mode 100644
index 000000000..97512c8de
--- /dev/null
+++ b/tutorials/NanoClaw/hpandas_mcp_server.py
@@ -0,0 +1,1065 @@
+#!/usr/bin/env python3
+"""
+hpandas MCP Server
+==================
+Exposes the hpandas helper library as MCP tools so any MCP client
+(Claude Desktop, Claude Code, nano-claw, etc.) can call them.
+
+Run as a standalone stdio server:
+    python hpandas_mcp_server.py
+
+Or register it in Claude Desktop's config:
+    {
+      "mcpServers": {
+        "hpandas": {
+          "command": "python",
+          "args": ["/absolute/path/to/hpandas_mcp_server.py"]
+        }
+      }
+    }
+
+All DataFrames are exchanged as JSON strings (orient="records") with an
+optional "index" key for the row labels.  Timestamps should be ISO-8601
+strings.
+
+Import as:
+    import hpandas_mcp_server
+"""
+
+import io
+import json
+import traceback
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+import pandas as pd
+from mcp.server.fastmcp import FastMCP
+
+# ---------------------------------------------------------------------------
+# Helpers – JSON ↔ DataFrame
+# ---------------------------------------------------------------------------
+
+def _df_from_json(payload: str) -> pd.DataFrame:
+    """
+    Deserialise a JSON string into a DataFrame.
+
+    Accepts two shapes:
+    * ``{"records": [...], "index": [...]}``  – records + explicit row index
+    * A bare JSON array ``[{...}, ...]``       – records only (default RangeIndex)
+    """
+    data = json.loads(payload)
+    if isinstance(data, dict) and "records" in data:
+        df = pd.DataFrame(data["records"])
+        if "index" in data:
+            df.index = data["index"]
+    else:
+        df = pd.DataFrame(data)
+    # Try to parse datetime columns / index.
+    for col in df.columns:
+        if "time" in str(col).lower() or "date" in str(col).lower():
+            try:
+                df[col] = pd.to_datetime(df[col])
+            except Exception:
+                pass
+    if isinstance(df.index, pd.Index) and df.index.dtype == object:
+        try:
+            df.index = pd.to_datetime(df.index)
+        except Exception:
+            pass
+    return df
+
+
+def _df_to_json(df: pd.DataFrame) -> str:
+    """Serialise a DataFrame to a JSON string (records + index)."""
+    return json.dumps(
+        {
+            "records": json.loads(
+                df.to_json(orient="records", date_format="iso", default_handler=str)
+            ),
+            "index": [str(i) for i in df.index],
+            "shape": list(df.shape),
+            "columns": list(df.columns),
+        },
+        indent=2,
+    )
+
+
+def _srs_to_json(srs: pd.Series) -> str:
+    return json.dumps(
+        {
+            "values": json.loads(srs.to_json(date_format="iso", default_handler=str)),
+            "name": srs.name,
+            "dtype": str(srs.dtype),
+        },
+        indent=2,
+    )
+
+
+def _safe(fn, *args, **kwargs):
+    """Call *fn* and return (result, error_str) tuple."""
+    try:
+        return fn(*args, **kwargs), None
+    except Exception:
+        return None, traceback.format_exc()
+
+
+# ---------------------------------------------------------------------------
+# Server
+# ---------------------------------------------------------------------------
+
+mcp = FastMCP(
+    "hpandas",
+    instructions=(
+        "Tools that wrap the hpandas helper library for pandas DataFrames. "
+        "DataFrames are passed / returned as JSON strings produced by "
+        "_df_to_json / _df_from_json helpers inside this server."
+    ),
+)
+
+
+# ===========================================================================
+# ── DISPLAY ─────────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def get_df_signature(df_json: str, num_rows: int = 6) -> str:
+    """
+    Return a compact signature string for a DataFrame: shape + head + tail rows.
+
+    :param df_json: DataFrame serialised with _df_to_json.
+    :param num_rows: total number of sample rows to show.
+    :return: human-readable signature string.
+    """
+    df = _df_from_json(df_json)
+    txt: List[str] = [f"df.shape={df.shape}"]
+    with pd.option_context("display.max_colwidth", int(1e6), "display.max_columns", None):
+        if len(df) > num_rows:
+            txt.append(f"df.head=\n{df.head(num_rows // 2)}")
+            txt.append(f"df.tail=\n{df.tail(num_rows // 2)}")
+        else:
+            txt.append(f"df.full=\n{df}")
+    return "\n".join(txt)
+
+
+@mcp.tool()
+def convert_df_to_json_string(
+    df_json: str,
+    n_head: Optional[int] = 10,
+    n_tail: Optional[int] = 10,
+) -> str:
+    """
+    Convert a DataFrame to a pretty-printed JSON string showing head and tail.
+
+    :param df_json: DataFrame serialised with _df_to_json.
+    :param n_head: number of top rows (None = all rows).
+    :param n_tail: number of bottom rows (None = skip tail).
+    :return: formatted JSON string.
+    """
+    df = _df_from_json(df_json)
+    shape = f"original shape={df.shape}"
+    head_df = df.head(n_head) if n_head is not None else df
+    head_json = head_df.to_json(orient="index", force_ascii=False, indent=4,
+                                default_handler=str, date_format="iso", date_unit="s")
+    if n_tail is not None:
+        tail_json = df.tail(n_tail).to_json(
+            orient="index", force_ascii=False, indent=4,
+            default_handler=str, date_format="iso", date_unit="s")
+    else:
+        tail_json = ""
+    return "\n".join([shape, "Head:", head_json, "Tail:", tail_json])
+
+
+# ===========================================================================
+# ── CLEAN ───────────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def drop_duplicates(
+    df_json: str,
+    use_index: bool = False,
+    column_subset: Optional[List[str]] = None,
+    keep: str = "first",
+) -> str:
+    """
+    Drop duplicate rows from a DataFrame.
+
+    :param df_json: DataFrame serialised with _df_to_json.
+    :param use_index: if True, the index is included when detecting duplicates.
+    :param column_subset: columns to consider; None = all columns.
+    :param keep: which duplicate to keep – "first", "last", or False.
+    :return: deduplicated DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    cols = column_subset or df.columns.tolist()
+    if use_index:
+        tmp = "__idx_tmp__"
+        df[tmp] = df.index
+        cols = [tmp] + cols
+        df = df.drop_duplicates(subset=cols, keep=keep)
+        df = df.drop(columns=[tmp])
+    else:
+        df = df.drop_duplicates(subset=cols, keep=keep)
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def dropna(
+    df_json: str,
+    drop_infs: bool = False,
+    axis: int = 0,
+    how: str = "any",
+    subset: Optional[List[str]] = None,
+) -> str:
+    """
+    Drop rows (or columns) that contain NaN values.
+
+    :param df_json: DataFrame serialised with _df_to_json.
+    :param drop_infs: if True, treat ±inf as NaN before dropping.
+    :param axis: 0 = drop rows, 1 = drop columns.
+    :param how: "any" or "all".
+    :param subset: columns to check (only used when axis=0).
+    :return: cleaned DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    if drop_infs:
+        df = df.replace([np.inf, -np.inf], np.nan)
+    df = df.dropna(axis=axis, how=how, subset=subset)
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def drop_axis_with_all_nans(
+    df_json: str,
+    drop_rows: bool = True,
+    drop_columns: bool = False,
+    drop_infs: bool = False,
+) -> str:
+    """
+    Remove rows and/or columns that are entirely NaN.
+
+    :param df_json: DataFrame serialised with _df_to_json.
+    :param drop_rows: remove all-NaN rows.
+    :param drop_columns: remove all-NaN columns.
+    :param drop_infs: treat ±inf as NaN before checking.
+    :return: cleaned DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    if drop_infs:
+        df = df.replace([np.inf, -np.inf], np.nan)
+    if drop_columns:
+        df = df.dropna(axis=1, how="all")
+    if drop_rows:
+        df = df.dropna(axis=0, how="all")
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def impute_nans(df_json: str, column: str, value: Any) -> str:
+    """
+    Replace string literal "nan" values in a column with a specified value.
+
+    :param df_json: DataFrame serialised with _df_to_json.
+    :param column: name of the column to fix.
+    :param value: replacement value for "nan" entries.
+    :return: updated DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    df[column] = df[column].astype(str)
+    mask = df[column] == "nan"
+    df[column] = np.where(mask, value, df[column])
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def remove_outliers(
+    df_json: str,
+    lower_quantile: float,
+    column_set: Optional[List[str]] = None,
+    upper_quantile: Optional[float] = None,
+) -> str:
+    """
+    Clip values outside the given quantile range to NaN.
+
+    :param df_json: DataFrame serialised with _df_to_json.
+    :param lower_quantile: lower quantile threshold in [0, 1].
+    :param column_set: columns to apply the filter to; None = all numeric.
+    :param upper_quantile: upper quantile; defaults to 1 - lower_quantile.
+    :return: DataFrame with outliers replaced by NaN, as JSON.
+    """
+    df = _df_from_json(df_json)
+    if upper_quantile is None:
+        upper_quantile = 1.0 - lower_quantile
+    cols = column_set or df.select_dtypes(include=[np.number]).columns.tolist()
+    for col in cols:
+        lo = df[col].quantile(lower_quantile)
+        hi = df[col].quantile(upper_quantile)
+        df[col] = df[col].where((df[col] >= lo) & (df[col] <= hi), np.nan)
+    return _df_to_json(df)
+
+
+# ===========================================================================
+# ── COMPARE ─────────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def compare_dfs(
+    df1_json: str,
+    df2_json: str,
+    row_mode: str = "inner",
+    column_mode: str = "inner",
+    diff_mode: str = "diff",
+) -> str:
+    """
+    Compare two DataFrames element-wise and return a diff DataFrame.
+
+    :param df1_json: first DataFrame as JSON.
+    :param df2_json: second DataFrame as JSON.
+    :param row_mode: "equal" (must share index) or "inner" (intersect index).
+    :param column_mode: "equal" (must share columns) or "inner" (intersect columns).
+    :param diff_mode: "diff" (absolute) or "pct_change" (percentage).
+    :return: diff DataFrame as JSON.
+    """
+    df1 = _df_from_json(df1_json)
+    df2 = _df_from_json(df2_json)
+    # Align rows.
+    if row_mode == "inner":
+        common = df1.index.intersection(df2.index)
+        df1, df2 = df1.loc[common], df2.loc[common]
+    # Align columns.
+    if column_mode == "inner":
+        common_cols = sorted(set(df1.columns) & set(df2.columns))
+        df1, df2 = df1[common_cols], df2[common_cols]
+    # Select only numeric columns.
+    num_cols = df1.select_dtypes(include=[np.number]).columns.tolist()
+    df1, df2 = df1[num_cols], df2[num_cols]
+    if diff_mode == "pct_change":
+        diff = 100 * (df1 - df2) / df2.abs()
+        diff = diff.replace([np.inf, -np.inf], np.nan)
+    else:
+        diff = df1 - df2
+        diff = diff.replace([np.inf, -np.inf], np.nan)
+    diff = diff.add_suffix(f".{diff_mode}")
+    return _df_to_json(diff)
+
+
+@mcp.tool()
+def compare_nans_in_dataframes(df1_json: str, df2_json: str) -> str:
+    """
+    Return a DataFrame highlighting positions where NaN status differs.
+
+    :param df1_json: first DataFrame as JSON.
+    :param df2_json: second DataFrame as JSON.
+    :return: DataFrame showing NaN mismatches, as JSON.
+    """
+    df1 = _df_from_json(df1_json)
+    df2 = _df_from_json(df2_json)
+    common = df1.index.intersection(df2.index)
+    common_cols = sorted(set(df1.columns) & set(df2.columns))
+    df1, df2 = df1.loc[common, common_cols], df2.loc[common, common_cols]
+    mask = (df1.isna() & ~df2.isna()) | (~df1.isna() & df2.isna())
+    result = df1[mask].compare(df2[mask], result_names=("df1", "df2"))
+    return _df_to_json(result)
+
+
+@mcp.tool()
+def find_common_columns(names_json: str, dfs_json: List[str]) -> str:
+    """
+    Report columns shared between every pair of DataFrames.
+
+    :param names_json: JSON array of string labels, one per DataFrame.
+    :param dfs_json: list of DataFrames serialised with _df_to_json.
+    :return: summary DataFrame as JSON.
+    """
+    names = json.loads(names_json)
+    dfs = [_df_from_json(j) for j in dfs_json]
+    rows = []
+    for i in range(len(dfs)):
+        for j in range(i + 1, len(dfs)):
+            common = [c for c in dfs[i].columns if c in dfs[j].columns]
+            rows.append({
+                "table1": names[i], "num_cols1": len(dfs[i].columns),
+                "table2": names[j], "num_cols2": len(dfs[j].columns),
+                "num_common": len(common), "common_cols": ", ".join(common),
+            })
+    return _df_to_json(pd.DataFrame(rows))
+
+
+# ===========================================================================
+# ── CONVERSION ──────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def to_series(df_json: str, series_dtype: str = "float64") -> str:
+    """
+    Convert a single-column DataFrame into a Series.
+
+    :param df_json: single-column DataFrame as JSON.
+    :param series_dtype: dtype to use if the DataFrame is empty.
+    :return: Series as JSON.
+    """
+    df = _df_from_json(df_json)
+    if df.shape[1] != 1:
+        raise ValueError(f"Expected a single-column DataFrame, got {df.shape[1]} columns.")
+    if df.empty:
+        return _srs_to_json(pd.Series(dtype=series_dtype))
+    if df.shape[0] > 1:
+        srs = df.squeeze()
+    else:
+        srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]])
+        srs.name = df.index.name
+    return _srs_to_json(srs)
+
+
+@mcp.tool()
+def infer_column_types(df_json: str) -> str:
+    """
+    Infer the predominant type (bool / numeric / string) for every column.
+
+    :param df_json: DataFrame as JSON.
+    :return: JSON object mapping column name → type string.
+    """
+    df = _df_from_json(df_json)
+    result: Dict[str, str] = {}
+    for col in df.columns:
+        is_bool = float(df[col].map(lambda x: isinstance(x, bool)).mean())
+        is_num = float(pd.to_numeric(df[col], errors="coerce").notna().mean())
+        is_str = float(df[col].map(lambda x: isinstance(x, str)).mean())
+        if is_bool >= is_num and is_bool != 0:
+            result[col] = "is_bool"
+        elif is_num >= is_str and is_num != 0:
+            result[col] = "is_numeric"
+        else:
+            result[col] = "is_string"
+    return json.dumps(result, indent=2)
+
+
+@mcp.tool()
+def convert_df_types(df_json: str) -> str:
+    """
+    Convert every column to its detected predominant type (bool / numeric / string).
+
+    :param df_json: DataFrame as JSON.
+    :return: type-converted DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    out = pd.DataFrame(index=df.index)
+    for col in df.columns:
+        s = df[col]
+        is_bool = float(s.map(lambda x: isinstance(x, bool)).mean())
+        is_num = float(pd.to_numeric(s, errors="coerce").notna().mean())
+        is_str = float(s.map(lambda x: isinstance(x, str)).mean())
+        if is_bool >= is_num and is_bool != 0:
+            out[col] = s.map(lambda x: True if x in ["True", 1, "1", "true", True]
+                             else (False if x in [0, "0", "False", False, "false"] else None))
+        elif is_num >= is_str and is_num != 0:
+            out[col] = pd.to_numeric(s, errors="coerce")
+        else:
+            out[col] = s.astype(str)
+    return _df_to_json(out)
+
+
+@mcp.tool()
+def convert_col_to_int(df_json: str, col: str) -> str:
+    """
+    Cast a single column to int64.
+
+    :param df_json: DataFrame as JSON.
+    :param col: name of the column to convert.
+    :return: updated DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    df[col] = df[col].astype("int64")
+    return _df_to_json(df)
+
+
+# ===========================================================================
+# ── DASSERT (validation) ────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def check_index_is_datetime(df_json: str) -> Dict[str, Any]:
+    """
+    Check whether the DataFrame index is a DatetimeIndex.
+
+    :param df_json: DataFrame as JSON.
+    :return: {"is_datetime": bool, "index_type": str}.
+    """
+    df = _df_from_json(df_json)
+    return {
+        "is_datetime": isinstance(df.index, pd.DatetimeIndex),
+        "index_type": type(df.index).__name__,
+    }
+
+
+@mcp.tool()
+def check_unique_index(df_json: str) -> Dict[str, Any]:
+    """
+    Check whether the DataFrame index contains duplicates.
+
+    :param df_json: DataFrame as JSON.
+    :return: {"is_unique": bool, "num_duplicates": int, "duplicate_values": list}.
+    """
+    df = _df_from_json(df_json)
+    dups = df.index[df.index.duplicated(keep=False)].tolist()
+    return {
+        "is_unique": df.index.is_unique,
+        "num_duplicates": len(dups),
+        "duplicate_values": [str(d) for d in dups[:20]],
+    }
+
+
+@mcp.tool()
+def check_increasing_index(df_json: str) -> Dict[str, Any]:
+    """
+    Check whether the DataFrame index is monotonically increasing.
+
+    :param df_json: DataFrame as JSON.
+    :return: {"is_monotonic_increasing": bool, "is_strictly_increasing": bool}.
+    """
+    df = _df_from_json(df_json)
+    return {
+        "is_monotonic_increasing": bool(df.index.is_monotonic_increasing),
+        "is_strictly_increasing": bool(df.index.is_monotonic_increasing and df.index.is_unique),
+    }
+
+
+@mcp.tool()
+def check_axes_equal(df1_json: str, df2_json: str) -> Dict[str, Any]:
+    """
+    Check whether two DataFrames share identical indices and columns.
+
+    :param df1_json: first DataFrame as JSON.
+    :param df2_json: second DataFrame as JSON.
+    :return: dict with "index_equal", "columns_equal" booleans and difference lists.
+    """
+    df1 = _df_from_json(df1_json)
+    df2 = _df_from_json(df2_json)
+    idx_eq = df1.index.equals(df2.index)
+    col_eq = df1.columns.equals(df2.columns)
+    return {
+        "index_equal": idx_eq,
+        "columns_equal": col_eq,
+        "index_only_in_df1": [str(x) for x in df1.index.difference(df2.index)[:10]],
+        "index_only_in_df2": [str(x) for x in df2.index.difference(df1.index)[:10]],
+        "columns_only_in_df1": list(df1.columns.difference(df2.columns)),
+        "columns_only_in_df2": list(df2.columns.difference(df1.columns)),
+    }
+
+
+@mcp.tool()
+def check_series_dtype(series_json: str, expected_dtype: str) -> Dict[str, Any]:
+    """
+    Check whether a Series has the expected dtype.
+
+    :param series_json: Series serialised by _srs_to_json.
+    :param expected_dtype: dtype string to check against, e.g. "float64".
+    :return: {"matches": bool, "actual_dtype": str}.
+    """
+    data = json.loads(series_json)
+    return {
+        "matches": data.get("dtype") == expected_dtype,
+        "actual_dtype": data.get("dtype"),
+        "expected_dtype": expected_dtype,
+    }
+
+
+# ===========================================================================
+# ── TRANSFORM ───────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def trim_df(
+    df_json: str,
+    start_ts: Optional[str] = None,
+    end_ts: Optional[str] = None,
+    ts_col_name: Optional[str] = None,
+    left_close: bool = True,
+    right_close: bool = True,
+) -> str:
+    """
+    Trim a DataFrame to a timestamp range.
+
+    :param df_json: DataFrame as JSON.
+    :param start_ts: ISO-8601 start timestamp; None = no lower bound.
+    :param end_ts: ISO-8601 end timestamp; None = no upper bound.
+    :param ts_col_name: column to filter on; None = use the index.
+    :param left_close: include start_ts in the result.
+    :param right_close: include end_ts in the result.
+    :return: trimmed DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    if ts_col_name is not None:
+        series = pd.to_datetime(df[ts_col_name])
+    else:
+        series = pd.to_datetime(df.index.to_series())
+    mask = pd.Series(True, index=df.index)
+    if start_ts:
+        ts = pd.Timestamp(start_ts)
+        mask &= (series >= ts) if left_close else (series > ts)
+    if end_ts:
+        ts = pd.Timestamp(end_ts)
+        mask &= (series <= ts) if right_close else (series < ts)
+    return _df_to_json(df[mask])
+
+
+@mcp.tool()
+def resample_df(df_json: str, frequency: str) -> str:
+    """
+    Resample a time-indexed DataFrame to a new frequency (mean aggregation).
+
+    :param df_json: DataFrame with a DatetimeIndex, as JSON.
+    :param frequency: pandas frequency string, e.g. "1H", "D", "15T".
+    :return: resampled DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    df.index = pd.to_datetime(df.index)
+    return _df_to_json(df.resample(frequency).mean())
+
+
+@mcp.tool()
+def merge_dfs(
+    df1_json: str,
+    df2_json: str,
+    how: str = "outer",
+    on: Optional[List[str]] = None,
+    left_on: Optional[List[str]] = None,
+    right_on: Optional[List[str]] = None,
+    suffixes: Optional[List[str]] = None,
+) -> str:
+    """
+    Merge two DataFrames (wrapper around pd.merge).
+
+    :param df1_json: left DataFrame as JSON.
+    :param df2_json: right DataFrame as JSON.
+    :param how: join type – "inner", "outer", "left", "right".
+    :param on: column(s) to join on (must exist in both).
+    :param left_on: column(s) in the left DataFrame to join on.
+    :param right_on: column(s) in the right DataFrame to join on.
+    :param suffixes: list of two suffix strings for overlapping columns.
+    :return: merged DataFrame as JSON.
+    """
+    df1 = _df_from_json(df1_json)
+    df2 = _df_from_json(df2_json)
+    sfx = tuple(suffixes) if suffixes else ("_x", "_y")
+    merged = pd.merge(df1, df2, how=how, on=on, left_on=left_on,
+                      right_on=right_on, suffixes=sfx)
+    return _df_to_json(merged)
+
+
+@mcp.tool()
+def filter_df(
+    df_json: str,
+    filter_col: str,
+    filter_values: List[Any],
+    mode: str = "keep",
+) -> str:
+    """
+    Filter a DataFrame by keeping or dropping rows matching specific values.
+
+    :param df_json: DataFrame as JSON.
+    :param filter_col: column whose values are tested.
+    :param filter_values: list of values to match.
+    :param mode: "keep" (rows matching filter_values) or "drop" (rows not matching).
+    :return: filtered DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    mask = df[filter_col].isin(filter_values)
+    if mode == "drop":
+        mask = ~mask
+    return _df_to_json(df[mask])
+
+
+@mcp.tool()
+def remove_columns(df_json: str, columns: List[str]) -> str:
+    """
+    Drop specified columns from a DataFrame.
+
+    :param df_json: DataFrame as JSON.
+    :param columns: list of column names to remove.
+    :return: DataFrame without the specified columns, as JSON.
+    """
+    df = _df_from_json(df_json)
+    existing = [c for c in columns if c in df.columns]
+    return _df_to_json(df.drop(columns=existing))
+
+
+@mcp.tool()
+def str_to_df(csv_string: str, sep: str = ",") -> str:
+    """
+    Parse a CSV string into a DataFrame.
+
+    :param csv_string: raw CSV text.
+    :param sep: column delimiter.
+    :return: parsed DataFrame as JSON.
+    """
+    df = pd.read_csv(io.StringIO(csv_string), sep=sep)
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def head(df_json: str, nrows: int = 5) -> str:
+    """
+    Return the first *nrows* rows of a DataFrame.
+
+    :param df_json: DataFrame as JSON.
+    :param nrows: number of rows to return.
+    :return: subset DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    return _df_to_json(df.head(nrows))
+
+
+@mcp.tool()
+def subset_df(df_json: str, nrows: int, seed: int = 42) -> str:
+    """
+    Return a random sample of *nrows* rows.
+
+    :param df_json: DataFrame as JSON.
+    :param nrows: how many rows to sample.
+    :param seed: random seed for reproducibility.
+    :return: sampled DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    n = min(nrows, len(df))
+    return _df_to_json(df.sample(n, random_state=seed))
+
+
+# ===========================================================================
+# ── UTILS ───────────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def df_to_str(df_json: str, num_rows: int = 6) -> str:
+    """
+    Format a DataFrame as a readable string (head + tail).
+
+    :param df_json: DataFrame as JSON.
+    :param num_rows: total rows to show (split evenly head / tail).
+    :return: string representation.
+    """
+    df = _df_from_json(df_json)
+    with pd.option_context(
+        "display.max_columns", None,
+        "display.max_colwidth", 200,
+        "display.width", 10000,
+    ):
+        if len(df) <= num_rows:
+            return df.to_string()
+        half = num_rows // 2
+        top = df.head(half).to_string()
+        bot = df.tail(half).to_string()
+        return top + "\n...\n" + bot
+
+
+@mcp.tool()
+def add_pct(
+    df_json: str,
+    col: str,
+    total_col: str,
+    pct_col: str = "pct",
+) -> str:
+    """
+    Add a percentage column (col / total_col * 100).
+
+    :param df_json: DataFrame as JSON.
+    :param col: numerator column name.
+    :param total_col: denominator column name.
+    :param pct_col: name for the new percentage column.
+    :return: DataFrame with the new percentage column, as JSON.
+    """
+    df = _df_from_json(df_json)
+    df[pct_col] = (df[col] / df[total_col] * 100).round(2)
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def find_gaps_in_time_series(
+    df_json: str,
+    frequency: str,
+) -> str:
+    """
+    Identify missing timestamps in a regularly-spaced time series.
+
+    :param df_json: time-indexed DataFrame as JSON.
+    :param frequency: expected frequency string, e.g. "1T", "H", "D".
+    :return: JSON list of missing timestamps (ISO-8601 strings).
+    """
+    df = _df_from_json(df_json)
+    df.index = pd.to_datetime(df.index)
+    expected = pd.date_range(df.index.min(), df.index.max(), freq=frequency)
+    missing = expected.difference(df.index)
+    return json.dumps([str(ts) for ts in missing], indent=2)
+
+
+@mcp.tool()
+def resolve_column_names(
+    df_json: str,
+    column_set: Optional[Union[str, List[str]]] = None,
+) -> List[str]:
+    """
+    Resolve a column specification to a concrete list of column names.
+
+    :param df_json: DataFrame as JSON (used to validate column existence).
+    :param column_set: None = all columns, str = single column, list = subset.
+    :return: resolved list of column names.
+    """
+    df = _df_from_json(df_json)
+    all_cols = df.columns.tolist()
+    if column_set is None:
+        return all_cols
+    if isinstance(column_set, str):
+        column_set = [column_set]
+    missing = [c for c in column_set if c not in all_cols]
+    if missing:
+        raise ValueError(f"Columns not found: {missing}")
+    return column_set
+
+
+# ===========================================================================
+# ── MULTI-INDEX ─────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def multiindex_df_info(df_json: str) -> str:
+    """
+    Return metadata about a 2-level MultiIndex DataFrame.
+
+    :param df_json: MultiIndex DataFrame as JSON.
+    :return: human-readable info string.
+    """
+    df = _df_from_json(df_json)
+    if not isinstance(df.columns, pd.MultiIndex):
+        return f"Not a MultiIndex DataFrame. columns type: {type(df.columns).__name__}"
+    l0 = df.columns.get_level_values(0).unique().tolist()
+    l1 = df.columns.get_level_values(1).unique().tolist()
+    rows = df.index.tolist()
+    lines = [
+        f"shape={len(l0)} x {len(l1)} x {len(rows)}",
+        f"columns_level0={l0}",
+        f"columns_level1={l1}",
+        f"num_rows={len(rows)}",
+    ]
+    if isinstance(df.index, pd.DatetimeIndex):
+        lines += [
+            f"start_timestamp={df.index.min()}",
+            f"end_timestamp={df.index.max()}",
+            f"frequency={df.index.freq or pd.infer_freq(df.index)}",
+        ]
+    return "\n".join(lines)
+
+
+# ===========================================================================
+# ── IO ──────────────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def read_csv(
+    file_path: str,
+    index_col: Optional[Union[int, str]] = None,
+    parse_dates: bool = True,
+) -> str:
+    """
+    Read a CSV file from disk into a DataFrame.
+
+    :param file_path: path to the CSV (or .gz / .zip) file.
+    :param index_col: column to use as the row index.
+    :param parse_dates: attempt to parse the index as dates.
+    :return: DataFrame as JSON.
+    """
+    kwargs: Dict[str, Any] = {}
+    if index_col is not None:
+        kwargs["index_col"] = index_col
+    if parse_dates:
+        kwargs["parse_dates"] = True
+    if any(file_path.endswith(ext) for ext in (".gz", ".gzip", ".tgz")):
+        kwargs["compression"] = "gzip"
+    elif file_path.endswith(".zip"):
+        kwargs["compression"] = "zip"
+    df = pd.read_csv(file_path, **kwargs)
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def read_parquet(file_path: str) -> str:
+    """
+    Read a Parquet file from disk into a DataFrame.
+
+    :param file_path: path to the Parquet file.
+    :return: DataFrame as JSON.
+    """
+    df = pd.read_parquet(file_path)
+    return _df_to_json(df)
+
+
+@mcp.tool()
+def write_csv(df_json: str, file_path: str, index: bool = True) -> str:
+    """
+    Write a DataFrame to a CSV file.
+
+    :param df_json: DataFrame as JSON.
+    :param file_path: destination path.
+    :param index: whether to write the row index.
+    :return: confirmation message.
+    """
+    df = _df_from_json(df_json)
+    df.to_csv(file_path, index=index)
+    return f"Saved {df.shape[0]} rows × {df.shape[1]} columns to '{file_path}'"
+
+
+@mcp.tool()
+def write_parquet(df_json: str, file_path: str) -> str:
+    """
+    Write a DataFrame to a Parquet file.
+
+    :param df_json: DataFrame as JSON.
+    :param file_path: destination path.
+    :return: confirmation message.
+    """
+    df = _df_from_json(df_json)
+    df.to_parquet(file_path)
+    return f"Saved {df.shape[0]} rows × {df.shape[1]} columns to '{file_path}'"
+
+
+# ===========================================================================
+# ── ANALYSIS ────────────────────────────────────────────────────────────────
+# ===========================================================================
+
+@mcp.tool()
+def rolling_corr_over_time(
+    df_json: str,
+    com: float,
+    nan_mode: str = "drop",
+) -> str:
+    """
+    Compute an exponentially-weighted rolling correlation matrix over time.
+
+    :param df_json: time-indexed DataFrame as JSON.
+    :param com: center-of-mass for the EWM calculation.
+    :param nan_mode: "drop", "fill_with_zero", or "abort".
+    :return: multi-index correlation DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    df.index = pd.to_datetime(df.index)
+    if nan_mode == "drop":
+        df = df.dropna(how="any")
+    elif nan_mode == "fill_with_zero":
+        df = df.fillna(0.0)
+    elif nan_mode == "abort":
+        n = int(df.isna().sum().sum())
+        if n > 0:
+            raise ValueError(f"DataFrame has {n} NaN values.")
+    corr_df = df.ewm(com=com, min_periods=int(3 * com)).corr()
+    return _df_to_json(corr_df.reset_index())
+
+
+@mcp.tool()
+def describe_df(df_json: str, percentiles: Optional[List[float]] = None) -> str:
+    """
+    Return descriptive statistics for a DataFrame (wrapper of df.describe()).
+
+    :param df_json: DataFrame as JSON.
+    :param percentiles: list of percentiles to include, e.g. [0.1, 0.5, 0.9].
+    :return: describe DataFrame as JSON.
+    """
+    df = _df_from_json(df_json)
+    stats = df.describe(percentiles=percentiles)
+    return _df_to_json(stats)
+
+
+@mcp.tool()
+def print_column_variability(df_json: str) -> str:
+    """
+    Report the number of unique values and coefficient of variation per column.
+
+    :param df_json: DataFrame as JSON.
+    :return: JSON object mapping column → {nunique, cv, dtype}.
+    """
+    df = _df_from_json(df_json)
+    result: Dict[str, Any] = {}
+    for col in df.columns:
+        s = df[col]
+        info: Dict[str, Any] = {
+            "nunique": int(s.nunique()),
+            "dtype": str(s.dtype),
+        }
+        num = pd.to_numeric(s, errors="coerce")
+        if num.notna().any() and num.mean() != 0:
+            info["cv"] = round(float(num.std() / abs(num.mean())), 4)
+        result[col] = info
+    return json.dumps(result, indent=2)
+
+
+# ===========================================================================
+# ── CHECK SUMMARY ───────────────────────────────────────────────────────────
+# ===========================================================================
+
+# In-process store for CheckSummary objects keyed by a session_id string.
+_SUMMARIES: Dict[str, Any] = {}
+
+
+@mcp.tool()
+def check_summary_create(session_id: str, title: str = "") -> str:
+    """
+    Create a new CheckSummary session.
+
+    :param session_id: unique string identifier for this session.
+    :param title: optional title shown in reports.
+    :return: confirmation message.
+    """
+    _SUMMARIES[session_id] = {"title": title, "rows": []}
+    return f"CheckSummary '{session_id}' created."
+
+
+@mcp.tool()
+def check_summary_add(
+    session_id: str,
+    description: str,
+    comment: str,
+    is_ok: bool,
+) -> str:
+    """
+    Add a check result to an existing CheckSummary session.
+
+    :param session_id: session created with check_summary_create.
+    :param description: short label for this check.
+    :param comment: details / evidence.
+    :param is_ok: True if the check passed.
+    :return: confirmation message.
+    """
+    if session_id not in _SUMMARIES:
+        raise KeyError(f"Session '{session_id}' not found. Call check_summary_create first.")
+    _SUMMARIES[session_id]["rows"].append(
+        {"description": description, "comment": comment, "is_ok": is_ok}
+    )
+    return f"Added check '{description}' (is_ok={is_ok})."
+
+
+@mcp.tool()
+def check_summary_report(session_id: str) -> str:
+    """
+    Return a formatted text report for a CheckSummary session.
+
+    :param session_id: session to report on.
+    :return: plain-text summary table.
+    """
+    if session_id not in _SUMMARIES:
+        raise KeyError(f"Session '{session_id}' not found.")
+    sess = _SUMMARIES[session_id]
+    rows = sess["rows"]
+    title = sess["title"]
+    df = pd.DataFrame(rows)
+    all_ok = all(r["is_ok"] for r in rows)
+    report_lines = []
+    if title:
+        report_lines.append(f"# {title}")
+    report_lines.append(df.to_string(index=False))
+    report_lines.append(f"\nis_ok={all_ok}")
+    return "\n".join(report_lines)
+
+
+# ===========================================================================
+# ── Entry point ─────────────────────────────────────────────────────────────
+# ===========================================================================
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
\ No newline at end of file

From fff0984959632ad37a256567b21ee67ac0dc1412 Mon Sep 17 00:00:00 2001
From: Pranav Shashidhara <pranavmay22@gmail.com>
Date: Wed, 22 Apr 2026 12:21:07 -0400
Subject: [PATCH 2/5] Update README.md and dataset_generator.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 tutorials/NanoClaw/README.md            | 290 ++++++++++++++++++++----
 tutorials/NanoClaw/dataset_generator.py |  28 +++
 2 files changed, 280 insertions(+), 38 deletions(-)
 create mode 100644 tutorials/NanoClaw/dataset_generator.py

diff --git a/tutorials/NanoClaw/README.md b/tutorials/NanoClaw/README.md
index 520c3723f..d743136c6 100644
--- a/tutorials/NanoClaw/README.md
+++ b/tutorials/NanoClaw/README.md
@@ -1,59 +1,273 @@
-# hpandas MCP Server
+# Nanoclaw Tutorial - Agentic EDA System Using MCP Tools
 
-## Overview
+An end-to-end exploratory data analysis system powered by a Claude agent (NanoClaw) and a custom MCP server (`hpandas`). Instead of writing pandas code manually, you describe your analysis in plain English — the agent translates it into tool calls against your dataset in real time.
 
-This project exposes a collection of pandas-based data processing utilities as **MCP (Model Context Protocol) tools**. The goal is to make structured data operations accessible to LLM agents in a controlled, observable, and reusable way.
+## How It Works
 
-This repository is part of an ongoing effort to integrate **NanoClaw agents with external MCP tool servers**, enabling agents to reason about data while delegating execution to a well-defined tool layer.
+A Claude agent runs inside a NanoClaw container. When you describe an analysis task, the agent issues structured tool calls over JSON-RPC to a Python MCP server on your host machine. That server executes pandas operations and streams results back to the agent — no manual scripting required.
 
-## Motivation
+```
+You (natural language prompt)
+        │
+        ▼
+Claude Agent (NanoClaw container)
+        │  tool call
+        ▼
+MCP Client (NanoClaw)
+        │  JSON-RPC
+        ▼
+hpandas MCP Server (host Python process)
+        │
+        ▼
+pandas → results returned to agent
+```
 
-Large language models are effective at reasoning about tasks, but they should not directly execute arbitrary code or access raw datasets. This project separates concerns:
+## Prerequisites
 
-- The **agent (NanoClaw / Claude)** decides what operations to perform  
-- The **MCP server (this project)** executes those operations  
-- The **container environment (Docker / NanoClaw runtime)** enforces isolation  
+- GitHub CLI (`gh`) or `git`
+- Python 3.x
+- Docker or Apple Container runtime
+- Claude Code (`claude` CLI)
 
-This architecture improves safety, reproducibility, and transparency while enabling complex workflows over structured data.
+## 1. Clone the Repository
 
-## What This Server Provides
+**With GitHub CLI (recommended):**
+```bash
+gh repo fork qwibitai/nanoclaw --clone
+cd nanoclaw
+```
+
+**With Git:**
+```bash
+git clone https://github.com/qwibitai/nanoclaw.git
+cd nanoclaw
+```
+
+## 2. Install Dependencies & Bootstrap
+
+```bash
+claude
+```
+
+Inside Claude Code, run:
+```
+/setup
+```
+
+This will:
+- Install Node dependencies
+- Configure the container runtime (Docker / Apple Container)
+- Initialize the MCP system
+- Set up default channels
+
+## 3. Register the `hpandas` MCP Server
+
+Create or edit `.mcp.json` in the NanoClaw root:
+
+```bash
+nano .mcp.json
+```
+
+Add the following:
+
+```json
+{
+  "mcpServers": {
+    "hpandas": {
+      "command": "python",
+      "args": [
+        "absolute_path/umd_msml610/tutorials/NanoClaw/hpandas_mcp_server.py"
+      ]
+    }
+  }
+}
+```
+
+> Update `absolute_path` to match your local filesystem.
+
+## 4. Verify the MCP Server
+
+Before starting NanoClaw, confirm the server runs without errors:
+
+```bash
+python umd_msml610/tutorials/NanoClaw/hpandas_mcp_server.py
+```
+
+A healthy server will stay running without crashes or JSON-RPC errors.
+
+## 5. Generate the Dataset
+
+```bash
+cd umd_msml610/tutorials/NanoClaw
+python dataset_generator.py
+```
+
+This produces `dummy_users.csv` in the same directory. Keep the file here — the MCP server expects to find it at this path.
+
+## 6. Start NanoClaw
+
+```bash
+cd nanoclaw
+claude
+```
+
+Then inside Claude Code:
+```
+/setup
+```
+
+NanoClaw will:
+- Read `.mcp.json`
+- Spawn the MCP server process
+- Register `hpandas` tools under the `mcp__hpandas__*` namespace
 
-The server exposes a wide range of DataFrame operations as MCP tools, including:
+## 7. Verify MCP Is Connected
 
-### Data Loading and I/O
-- `read_csv`, `read_parquet`
-- `write_csv`, `write_parquet`
+Inside Claude Code, prompt:
+```
+Load dummy_users.csv and show the first 10 rows.
+```
 
-### Cleaning and Transformation
-- `dropna`, `drop_duplicates`, `remove_outliers`
-- `filter_df`, `merge_dfs`, `trim_df`, `resample_df`
+You should see tool calls like:
+```
+mcp__hpandas__read_csv
+mcp__hpandas__df_to_str
+```
+
+## 8. Run EDA — Natural Language Prompts
+
+Once connected, interact with your dataset entirely in natural language:
+
+| Task | Prompt |
+|||
+| Overview | `"Describe the dataset"` |
+| Data quality | `"Find missing values"` |
+| Filtering | `"Show users with income > 50k"` |
+| Aggregation | `"Average spend score by country"` |
+
+## MCP Tools Reference
+
+All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passed between tools as JSON strings using the internal `_df_to_json` / `_df_from_json` format (`records` + `index` + `shape` + `columns`).
+
+### I/O
+
+| Tool | Description |
+|||
+| `read_csv` | Read a CSV (or `.gz` / `.zip`) file from disk into a DataFrame |
+| `read_parquet` | Read a Parquet file from disk into a DataFrame |
+| `write_csv` | Write a DataFrame to a CSV file |
+| `write_parquet` | Write a DataFrame to a Parquet file |
+| `str_to_df` | Parse a raw CSV string into a DataFrame |
+
+### Display
+
+| Tool | Description |
+|||
+| `get_df_signature` | Compact shape + head/tail summary of a DataFrame |
+| `convert_df_to_json_string` | Pretty-printed JSON showing head and tail rows |
+| `df_to_str` | Human-readable string representation (head + tail) |
+
+### Cleaning
+
+| Tool | Description |
+|||
+| `drop_duplicates` | Remove duplicate rows, optionally considering the index |
+| `dropna` | Drop rows or columns containing NaN values |
+| `drop_axis_with_all_nans` | Remove rows and/or columns that are entirely NaN |
+| `impute_nans` | Replace string literal `"nan"` entries in a column with a given value |
+| `remove_outliers` | Clip values outside a quantile range to NaN |
+| `remove_columns` | Drop specified columns from a DataFrame |
 
 ### Analysis
-- `describe_df`, `rolling_corr_over_time`
-- `print_column_variability`
 
-### Validation and Checks
-- Index and schema validation tools
-- DataFrame comparison utilities
+| Tool | Description |
+|||
+| `describe_df` | Descriptive statistics (wraps `df.describe()`) |
+| `print_column_variability` | Unique value counts and coefficient of variation per column |
+| `rolling_corr_over_time` | Exponentially-weighted rolling correlation matrix over a time index |
+
+### Filtering & Transformation
+
+| Tool | Description |
+|||
+| `filter_df` | Keep or drop rows matching specific values in a column |
+| `head` | Return the first N rows |
+| `subset_df` | Return a random sample of N rows |
+| `add_pct` | Add a percentage column (`col / total_col * 100`) |
+| `resample_df` | Resample a time-indexed DataFrame to a new frequency (mean) |
+| `trim_df_by_time_period` | Slice a DataFrame to a timestamp range |
+| `find_gaps_in_time_series` | Identify missing timestamps in a regularly-spaced time series |
+| `merge_dfs` | Merge two DataFrames (wraps `pd.merge`) |
+
+### Type Conversion
 
-### Utilities
-- DataFrame ↔ JSON conversion
-- Sampling, formatting, and column resolution
+| Tool | Description |
+|||
+| `infer_column_types` | Detect whether each column is bool / numeric / string |
+| `convert_df_types` | Auto-convert each column to its detected type |
+| `convert_col_to_int` | Cast a single column to `int64` |
+| `to_series` | Convert a single-column DataFrame to a Series |
 
-All DataFrames are passed as JSON strings to ensure compatibility with LLM tool interfaces.
+### Comparison
 
-## Architecture
-NanoClaw Agent (LLM planner)  
-↓  
-MCP Client (tool invocation layer)  
-↓  
-hpandas_mcp_server (this project)  
-↓  
-pandas / numpy execution  
+| Tool | Description |
+|||
+| `compare_dfs` | Element-wise diff (absolute or % change) between two DataFrames |
+| `compare_nans_in_dataframes` | Highlight positions where NaN status differs between two DataFrames |
+| `find_common_columns` | Report columns shared across multiple DataFrames |
 
-The agent does not directly manipulate data. Instead, it issues structured tool calls, which are executed by the MCP server and returned as structured outputs.
+### Validation
 
-## Running the Server
+| Tool | Description |
+|||
+| `check_index_is_datetime` | Assert the DataFrame index is a `DatetimeIndex` |
+| `resolve_column_names` | Validate and resolve a column specification to a concrete list |
+
+### Check Summary (Audit Log)
+
+A lightweight session-based reporting system for logging pass/fail checks during analysis.
+
+| Tool | Description |
+|||
+| `check_summary_create` | Start a new named check session |
+| `check_summary_add` | Append a pass/fail check result to a session |
+| `check_summary_report` | Print a formatted summary table for a session |
+
+### MultiIndex
+
+| Tool | Description |
+|||
+| `multiindex_df_info` | Return shape, level values, and time range metadata for a 2-level MultiIndex DataFrame |
+
+## Deployment Layout
+
+NanoClaw and the MCP server are **separate processes** — they communicate over JSON-RPC, not shared memory or a shared container.
+
+| Component | Location |
+|||
+| NanoClaw agent | Docker / Apple Container |
+| `hpandas` MCP server | Host machine (Python process) |
+| Dataset (`dummy_users.csv`) | Local filesystem |
+
+## Troubleshooting
+
+**Error: `Invalid JSON: EOF while parsing` / `Internal Server Error`**
+
+This means the MCP server either crashed mid-response or wrote non-JSON output to stdout, corrupting the JSON-RPC stream.
+
+**Fix:**
+- Remove all `print()` debug statements from the MCP server
+- Ensure the server outputs **only** valid JSON-RPC messages to stdout
+- Run the server standalone first to confirm it stays healthy
+
+## Quick Demo Flow
 
 ```bash
-python hpandas_mcp_server.py
\ No newline at end of file
+gh repo fork qwibitai/nanoclaw --clone
+cd nanoclaw
+claude
+# → /setup
+
+# Inside Claude Code:
+# "Load dummy_users.csv and analyze it using hpandas MCP server"
+```
\ No newline at end of file
diff --git a/tutorials/NanoClaw/dataset_generator.py b/tutorials/NanoClaw/dataset_generator.py
new file mode 100644
index 000000000..912ab3aba
--- /dev/null
+++ b/tutorials/NanoClaw/dataset_generator.py
@@ -0,0 +1,28 @@
+import pandas as pd
+import numpy as np
+
+np.random.seed(42)
+
+n = 100
+
+df = pd.DataFrame({
+    "user_id": range(1, n + 1),
+    "age": np.random.normal(30, 8, n).round(0),
+    "income": np.random.normal(60000, 15000, n).round(0),
+    "spend_score": np.random.uniform(1, 100, n).round(2),
+    "country": np.random.choice(["US", "UK", "IN", "DE"], n),
+    "signup_date": pd.date_range("2024-01-01", periods=n, freq="D")
+})
+
+# Add missing values
+df.loc[np.random.choice(n, 10, replace=False), "income"] = np.nan
+df.loc[np.random.choice(n, 8, replace=False), "age"] = np.nan
+
+# Add outliers
+df.loc[np.random.choice(n, 2), "income"] = 500000
+df.loc[np.random.choice(n, 2), "spend_score"] = 1000
+
+# SAVE IN SAME FOLDER
+df.to_csv("dummy_users.csv")
+
+print("Saved dummy_users.csv in current folder")
\ No newline at end of file

From c23ad3f171ee8d3682f8eccdcfeb6237264b8b4a Mon Sep 17 00:00:00 2001
From: Pranav Shashidhara <pranavmay22@gmail.com>
Date: Thu, 23 Apr 2026 11:12:10 -0400
Subject: [PATCH 3/5] Lint and Update files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 tutorials/NanoClaw/README.md             |  25 ++-
 tutorials/NanoClaw/__init__.py           |   0
 tutorials/NanoClaw/dataset_generator.py  |  28 ++-
 tutorials/NanoClaw/hpandas_mcp_server.py | 274 ++++++++++++++---------
 4 files changed, 203 insertions(+), 124 deletions(-)
 create mode 100644 tutorials/NanoClaw/__init__.py

diff --git a/tutorials/NanoClaw/README.md b/tutorials/NanoClaw/README.md
index d743136c6..f2619b718 100644
--- a/tutorials/NanoClaw/README.md
+++ b/tutorials/NanoClaw/README.md
@@ -139,12 +139,13 @@ mcp__hpandas__df_to_str
 Once connected, interact with your dataset entirely in natural language:
 
 | Task | Prompt |
-|||
+|---|---|
 | Overview | `"Describe the dataset"` |
 | Data quality | `"Find missing values"` |
 | Filtering | `"Show users with income > 50k"` |
 | Aggregation | `"Average spend score by country"` |
 
+
 ## MCP Tools Reference
 
 All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passed between tools as JSON strings using the internal `_df_to_json` / `_df_from_json` format (`records` + `index` + `shape` + `columns`).
@@ -152,7 +153,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### I/O
 
 | Tool | Description |
-|||
+|---|---|
 | `read_csv` | Read a CSV (or `.gz` / `.zip`) file from disk into a DataFrame |
 | `read_parquet` | Read a Parquet file from disk into a DataFrame |
 | `write_csv` | Write a DataFrame to a CSV file |
@@ -162,7 +163,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### Display
 
 | Tool | Description |
-|||
+|---|---|
 | `get_df_signature` | Compact shape + head/tail summary of a DataFrame |
 | `convert_df_to_json_string` | Pretty-printed JSON showing head and tail rows |
 | `df_to_str` | Human-readable string representation (head + tail) |
@@ -170,7 +171,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### Cleaning
 
 | Tool | Description |
-|||
+|---|---|
 | `drop_duplicates` | Remove duplicate rows, optionally considering the index |
 | `dropna` | Drop rows or columns containing NaN values |
 | `drop_axis_with_all_nans` | Remove rows and/or columns that are entirely NaN |
@@ -181,7 +182,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### Analysis
 
 | Tool | Description |
-|||
+|---|---|
 | `describe_df` | Descriptive statistics (wraps `df.describe()`) |
 | `print_column_variability` | Unique value counts and coefficient of variation per column |
 | `rolling_corr_over_time` | Exponentially-weighted rolling correlation matrix over a time index |
@@ -189,7 +190,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### Filtering & Transformation
 
 | Tool | Description |
-|||
+|---|---|
 | `filter_df` | Keep or drop rows matching specific values in a column |
 | `head` | Return the first N rows |
 | `subset_df` | Return a random sample of N rows |
@@ -202,7 +203,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### Type Conversion
 
 | Tool | Description |
-|||
+|---|---|
 | `infer_column_types` | Detect whether each column is bool / numeric / string |
 | `convert_df_types` | Auto-convert each column to its detected type |
 | `convert_col_to_int` | Cast a single column to `int64` |
@@ -211,7 +212,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### Comparison
 
 | Tool | Description |
-|||
+|---|---|
 | `compare_dfs` | Element-wise diff (absolute or % change) between two DataFrames |
 | `compare_nans_in_dataframes` | Highlight positions where NaN status differs between two DataFrames |
 | `find_common_columns` | Report columns shared across multiple DataFrames |
@@ -219,7 +220,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 ### Validation
 
 | Tool | Description |
-|||
+|---|---|
 | `check_index_is_datetime` | Assert the DataFrame index is a `DatetimeIndex` |
 | `resolve_column_names` | Validate and resolve a column specification to a concrete list |
 
@@ -228,7 +229,7 @@ All tools are exposed under the `mcp__hpandas__` namespace. DataFrames are passe
 A lightweight session-based reporting system for logging pass/fail checks during analysis.
 
 | Tool | Description |
-|||
+|---|---|
 | `check_summary_create` | Start a new named check session |
 | `check_summary_add` | Append a pass/fail check result to a session |
 | `check_summary_report` | Print a formatted summary table for a session |
@@ -236,7 +237,7 @@ A lightweight session-based reporting system for logging pass/fail checks during
 ### MultiIndex
 
 | Tool | Description |
-|||
+|---|---|
 | `multiindex_df_info` | Return shape, level values, and time range metadata for a 2-level MultiIndex DataFrame |
 
 ## Deployment Layout
@@ -244,7 +245,7 @@ A lightweight session-based reporting system for logging pass/fail checks during
 NanoClaw and the MCP server are **separate processes** — they communicate over JSON-RPC, not shared memory or a shared container.
 
 | Component | Location |
-|||
+|---|---|
 | NanoClaw agent | Docker / Apple Container |
 | `hpandas` MCP server | Host machine (Python process) |
 | Dataset (`dummy_users.csv`) | Local filesystem |
diff --git a/tutorials/NanoClaw/__init__.py b/tutorials/NanoClaw/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tutorials/NanoClaw/dataset_generator.py b/tutorials/NanoClaw/dataset_generator.py
index 912ab3aba..ef2cd5e4c 100644
--- a/tutorials/NanoClaw/dataset_generator.py
+++ b/tutorials/NanoClaw/dataset_generator.py
@@ -1,18 +1,26 @@
-import pandas as pd
+"""
+Import as:
+
+import tutorials.NanoClaw.dataset_generator as tnadagen
+"""
+
 import numpy as np
+import pandas as pd
 
 np.random.seed(42)
 
 n = 100
 
-df = pd.DataFrame({
-    "user_id": range(1, n + 1),
-    "age": np.random.normal(30, 8, n).round(0),
-    "income": np.random.normal(60000, 15000, n).round(0),
-    "spend_score": np.random.uniform(1, 100, n).round(2),
-    "country": np.random.choice(["US", "UK", "IN", "DE"], n),
-    "signup_date": pd.date_range("2024-01-01", periods=n, freq="D")
-})
+df = pd.DataFrame(
+    {
+        "user_id": range(1, n + 1),
+        "age": np.random.normal(30, 8, n).round(0),
+        "income": np.random.normal(60000, 15000, n).round(0),
+        "spend_score": np.random.uniform(1, 100, n).round(2),
+        "country": np.random.choice(["US", "UK", "IN", "DE"], n),
+        "signup_date": pd.date_range("2024-01-01", periods=n, freq="D"),
+    }
+)
 
 # Add missing values
 df.loc[np.random.choice(n, 10, replace=False), "income"] = np.nan
@@ -25,4 +33,4 @@
 # SAVE IN SAME FOLDER
 df.to_csv("dummy_users.csv")
 
-print("Saved dummy_users.csv in current folder")
\ No newline at end of file
+print("Saved dummy_users.csv in current folder")
diff --git a/tutorials/NanoClaw/hpandas_mcp_server.py b/tutorials/NanoClaw/hpandas_mcp_server.py
index 97512c8de..43d965a89 100644
--- a/tutorials/NanoClaw/hpandas_mcp_server.py
+++ b/tutorials/NanoClaw/hpandas_mcp_server.py
@@ -29,15 +29,16 @@
 import io
 import json
 import traceback
-from typing import Any, Dict, List, Optional, Union
+import typing 
 
 import numpy as np
 import pandas as pd
-from mcp.server.fastmcp import FastMCP
+import mcp.server.fastmcp  as mcp_fastmcp #import FastMCP
 
-# ---------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
 # Helpers – JSON ↔ DataFrame
-# ---------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+
 
 def _df_from_json(payload: str) -> pd.DataFrame:
     """
@@ -70,11 +71,15 @@ def _df_from_json(payload: str) -> pd.DataFrame:
 
 
 def _df_to_json(df: pd.DataFrame) -> str:
-    """Serialise a DataFrame to a JSON string (records + index)."""
+    """
+    Serialise a DataFrame to a JSON string (records + index).
+    """
     return json.dumps(
         {
             "records": json.loads(
-                df.to_json(orient="records", date_format="iso", default_handler=str)
+                df.to_json(
+                    orient="records", date_format="iso", default_handler=str
+                )
             ),
             "index": [str(i) for i in df.index],
             "shape": list(df.shape),
@@ -87,7 +92,9 @@ def _df_to_json(df: pd.DataFrame) -> str:
 def _srs_to_json(srs: pd.Series) -> str:
     return json.dumps(
         {
-            "values": json.loads(srs.to_json(date_format="iso", default_handler=str)),
+            "values": json.loads(
+                srs.to_json(date_format="iso", default_handler=str)
+            ),
             "name": srs.name,
             "dtype": str(srs.dtype),
         },
@@ -96,18 +103,20 @@ def _srs_to_json(srs: pd.Series) -> str:
 
 
 def _safe(fn, *args, **kwargs):
-    """Call *fn* and return (result, error_str) tuple."""
+    """
+    Call *fn* and return (result, error_str) tuple.
+    """
     try:
         return fn(*args, **kwargs), None
     except Exception:
         return None, traceback.format_exc()
 
 
-# ---------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
 # Server
-# ---------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
 
-mcp = FastMCP(
+mcp = mcp_fastmcp.FastMCP(
     "hpandas",
     instructions=(
         "Tools that wrap the hpandas helper library for pandas DataFrames. "
@@ -117,9 +126,10 @@ def _safe(fn, *args, **kwargs):
 )
 
 
-# ===========================================================================
+# =============================================================================
 # ── DISPLAY ─────────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def get_df_signature(df_json: str, num_rows: int = 6) -> str:
@@ -131,8 +141,10 @@ def get_df_signature(df_json: str, num_rows: int = 6) -> str:
     :return: human-readable signature string.
     """
     df = _df_from_json(df_json)
-    txt: List[str] = [f"df.shape={df.shape}"]
-    with pd.option_context("display.max_colwidth", int(1e6), "display.max_columns", None):
+    txt: typing.List[str] = [f"df.shape={df.shape}"]
+    with pd.option_context(
+        "display.max_colwidth", int(1e6), "display.max_columns", None
+    ):
         if len(df) > num_rows:
             txt.append(f"df.head=\n{df.head(num_rows // 2)}")
             txt.append(f"df.tail=\n{df.tail(num_rows // 2)}")
@@ -144,8 +156,8 @@ def get_df_signature(df_json: str, num_rows: int = 6) -> str:
 @mcp.tool()
 def convert_df_to_json_string(
     df_json: str,
-    n_head: Optional[int] = 10,
-    n_tail: Optional[int] = 10,
+    n_head: typing.Optional[int] = 10,
+    n_tail: typing.Optional[int] = 10,
 ) -> str:
     """
     Convert a DataFrame to a pretty-printed JSON string showing head and tail.
@@ -158,33 +170,46 @@ def convert_df_to_json_string(
     df = _df_from_json(df_json)
     shape = f"original shape={df.shape}"
     head_df = df.head(n_head) if n_head is not None else df
-    head_json = head_df.to_json(orient="index", force_ascii=False, indent=4,
-                                default_handler=str, date_format="iso", date_unit="s")
+    head_json = head_df.to_json(
+        orient="index",
+        force_ascii=False,
+        indent=4,
+        default_handler=str,
+        date_format="iso",
+        date_unit="s",
+    )
     if n_tail is not None:
         tail_json = df.tail(n_tail).to_json(
-            orient="index", force_ascii=False, indent=4,
-            default_handler=str, date_format="iso", date_unit="s")
+            orient="index",
+            force_ascii=False,
+            indent=4,
+            default_handler=str,
+            date_format="iso",
+            date_unit="s",
+        )
     else:
         tail_json = ""
     return "\n".join([shape, "Head:", head_json, "Tail:", tail_json])
 
 
-# ===========================================================================
+# =============================================================================
 # ── CLEAN ───────────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def drop_duplicates(
     df_json: str,
     use_index: bool = False,
-    column_subset: Optional[List[str]] = None,
+    column_subset: typing.Optional[typing.List[str]] = None,
     keep: str = "first",
 ) -> str:
     """
     Drop duplicate rows from a DataFrame.
 
     :param df_json: DataFrame serialised with _df_to_json.
-    :param use_index: if True, the index is included when detecting duplicates.
+    :param use_index: if True, the index is included when detecting
+        duplicates.
     :param column_subset: columns to consider; None = all columns.
     :param keep: which duplicate to keep – "first", "last", or False.
     :return: deduplicated DataFrame as JSON.
@@ -208,7 +233,7 @@ def dropna(
     drop_infs: bool = False,
     axis: int = 0,
     how: str = "any",
-    subset: Optional[List[str]] = None,
+    subset: typing.Optional[typing.List[str]] = None,
 ) -> str:
     """
     Drop rows (or columns) that contain NaN values.
@@ -254,7 +279,7 @@ def drop_axis_with_all_nans(
 
 
 @mcp.tool()
-def impute_nans(df_json: str, column: str, value: Any) -> str:
+def impute_nans(df_json: str, column: str, value: typing.Any) -> str:
     """
     Replace string literal "nan" values in a column with a specified value.
 
@@ -274,8 +299,8 @@ def impute_nans(df_json: str, column: str, value: Any) -> str:
 def remove_outliers(
     df_json: str,
     lower_quantile: float,
-    column_set: Optional[List[str]] = None,
-    upper_quantile: Optional[float] = None,
+    column_set: typing.Optional[typing.List[str]] = None,
+    upper_quantile: typing.Optional[float] = None,
 ) -> str:
     """
     Clip values outside the given quantile range to NaN.
@@ -297,9 +322,10 @@ def remove_outliers(
     return _df_to_json(df)
 
 
-# ===========================================================================
+# =============================================================================
 # ── COMPARE ─────────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def compare_dfs(
@@ -314,8 +340,10 @@ def compare_dfs(
 
     :param df1_json: first DataFrame as JSON.
     :param df2_json: second DataFrame as JSON.
-    :param row_mode: "equal" (must share index) or "inner" (intersect index).
-    :param column_mode: "equal" (must share columns) or "inner" (intersect columns).
+    :param row_mode: "equal" (must share index) or "inner" (intersect
+        index).
+    :param column_mode: "equal" (must share columns) or "inner"
+        (intersect columns).
     :param diff_mode: "diff" (absolute) or "pct_change" (percentage).
     :return: diff DataFrame as JSON.
     """
@@ -362,7 +390,7 @@ def compare_nans_in_dataframes(df1_json: str, df2_json: str) -> str:
 
 
 @mcp.tool()
-def find_common_columns(names_json: str, dfs_json: List[str]) -> str:
+def find_common_columns(names_json: str, dfs_json: typing.List[str]) -> str:
     """
     Report columns shared between every pair of DataFrames.
 
@@ -376,17 +404,23 @@ def find_common_columns(names_json: str, dfs_json: List[str]) -> str:
     for i in range(len(dfs)):
         for j in range(i + 1, len(dfs)):
             common = [c for c in dfs[i].columns if c in dfs[j].columns]
-            rows.append({
-                "table1": names[i], "num_cols1": len(dfs[i].columns),
-                "table2": names[j], "num_cols2": len(dfs[j].columns),
-                "num_common": len(common), "common_cols": ", ".join(common),
-            })
+            rows.append(
+                {
+                    "table1": names[i],
+                    "num_cols1": len(dfs[i].columns),
+                    "table2": names[j],
+                    "num_cols2": len(dfs[j].columns),
+                    "num_common": len(common),
+                    "common_cols": ", ".join(common),
+                }
+            )
     return _df_to_json(pd.DataFrame(rows))
 
 
-# ===========================================================================
+# =============================================================================
 # ── CONVERSION ──────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def to_series(df_json: str, series_dtype: str = "float64") -> str:
@@ -399,7 +433,9 @@ def to_series(df_json: str, series_dtype: str = "float64") -> str:
     """
     df = _df_from_json(df_json)
     if df.shape[1] != 1:
-        raise ValueError(f"Expected a single-column DataFrame, got {df.shape[1]} columns.")
+        raise ValueError(
+            f"Expected a single-column DataFrame, got {df.shape[1]} columns."
+        )
     if df.empty:
         return _srs_to_json(pd.Series(dtype=series_dtype))
     if df.shape[0] > 1:
@@ -419,7 +455,7 @@ def infer_column_types(df_json: str) -> str:
     :return: JSON object mapping column name → type string.
     """
     df = _df_from_json(df_json)
-    result: Dict[str, str] = {}
+    result: typing.Dict[str, str] = {}
     for col in df.columns:
         is_bool = float(df[col].map(lambda x: isinstance(x, bool)).mean())
         is_num = float(pd.to_numeric(df[col], errors="coerce").notna().mean())
@@ -436,7 +472,8 @@ def infer_column_types(df_json: str) -> str:
 @mcp.tool()
 def convert_df_types(df_json: str) -> str:
     """
-    Convert every column to its detected predominant type (bool / numeric / string).
+    Convert every column to its detected predominant type (bool / numeric /
+    string).
 
     :param df_json: DataFrame as JSON.
     :return: type-converted DataFrame as JSON.
@@ -449,8 +486,15 @@ def convert_df_types(df_json: str) -> str:
         is_num = float(pd.to_numeric(s, errors="coerce").notna().mean())
         is_str = float(s.map(lambda x: isinstance(x, str)).mean())
         if is_bool >= is_num and is_bool != 0:
-            out[col] = s.map(lambda x: True if x in ["True", 1, "1", "true", True]
-                             else (False if x in [0, "0", "False", False, "false"] else None))
+            out[col] = s.map(
+                lambda x: (
+                    True
+                    if x in ["True", 1, "1", "true", True]
+                    else (
+                        False if x in [0, "0", "False", False, "false"] else None
+                    )
+                )
+            )
         elif is_num >= is_str and is_num != 0:
             out[col] = pd.to_numeric(s, errors="coerce")
         else:
@@ -472,12 +516,13 @@ def convert_col_to_int(df_json: str, col: str) -> str:
     return _df_to_json(df)
 
 
-# ===========================================================================
+# =============================================================================
 # ── DASSERT (validation) ────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
-def check_index_is_datetime(df_json: str) -> Dict[str, Any]:
+def check_index_is_datetime(df_json: str) -> typing.Dict[str, typing.Any]:
     """
     Check whether the DataFrame index is a DatetimeIndex.
 
@@ -492,12 +537,13 @@ def check_index_is_datetime(df_json: str) -> Dict[str, Any]:
 
 
 @mcp.tool()
-def check_unique_index(df_json: str) -> Dict[str, Any]:
+def check_unique_index(df_json: str) -> typing.Dict[str, typing.Any]:
     """
     Check whether the DataFrame index contains duplicates.
 
     :param df_json: DataFrame as JSON.
-    :return: {"is_unique": bool, "num_duplicates": int, "duplicate_values": list}.
+    :return: {"is_unique": bool, "num_duplicates": int,
+        "duplicate_values": list}.
     """
     df = _df_from_json(df_json)
     dups = df.index[df.index.duplicated(keep=False)].tolist()
@@ -509,28 +555,32 @@ def check_unique_index(df_json: str) -> Dict[str, Any]:
 
 
 @mcp.tool()
-def check_increasing_index(df_json: str) -> Dict[str, Any]:
+def check_increasing_index(df_json: str) -> typing.Dict[str, typing.Any]:
     """
     Check whether the DataFrame index is monotonically increasing.
 
     :param df_json: DataFrame as JSON.
-    :return: {"is_monotonic_increasing": bool, "is_strictly_increasing": bool}.
+    :return: {"is_monotonic_increasing": bool, "is_strictly_increasing":
+        bool}.
     """
     df = _df_from_json(df_json)
     return {
         "is_monotonic_increasing": bool(df.index.is_monotonic_increasing),
-        "is_strictly_increasing": bool(df.index.is_monotonic_increasing and df.index.is_unique),
+        "is_strictly_increasing": bool(
+            df.index.is_monotonic_increasing and df.index.is_unique
+        ),
     }
 
 
 @mcp.tool()
-def check_axes_equal(df1_json: str, df2_json: str) -> Dict[str, Any]:
+def check_axes_equal(df1_json: str, df2_json: str) -> typing.Dict[str, typing.Any]:
     """
     Check whether two DataFrames share identical indices and columns.
 
     :param df1_json: first DataFrame as JSON.
     :param df2_json: second DataFrame as JSON.
-    :return: dict with "index_equal", "columns_equal" booleans and difference lists.
+    :return: dict with "index_equal", "columns_equal" booleans and
+        difference lists.
     """
     df1 = _df_from_json(df1_json)
     df2 = _df_from_json(df2_json)
@@ -539,20 +589,25 @@ def check_axes_equal(df1_json: str, df2_json: str) -> Dict[str, Any]:
     return {
         "index_equal": idx_eq,
         "columns_equal": col_eq,
-        "index_only_in_df1": [str(x) for x in df1.index.difference(df2.index)[:10]],
-        "index_only_in_df2": [str(x) for x in df2.index.difference(df1.index)[:10]],
+        "index_only_in_df1": [
+            str(x) for x in df1.index.difference(df2.index)[:10]
+        ],
+        "index_only_in_df2": [
+            str(x) for x in df2.index.difference(df1.index)[:10]
+        ],
         "columns_only_in_df1": list(df1.columns.difference(df2.columns)),
         "columns_only_in_df2": list(df2.columns.difference(df1.columns)),
     }
 
 
 @mcp.tool()
-def check_series_dtype(series_json: str, expected_dtype: str) -> Dict[str, Any]:
+def check_series_dtype(series_json: str, expected_dtype: str) -> typing.Dict[str, typing.Any]:
     """
     Check whether a Series has the expected dtype.
 
     :param series_json: Series serialised by _srs_to_json.
-    :param expected_dtype: dtype string to check against, e.g. "float64".
+    :param expected_dtype: dtype string to check against, e.g.
+        "float64".
     :return: {"matches": bool, "actual_dtype": str}.
     """
     data = json.loads(series_json)
@@ -563,16 +618,17 @@ def check_series_dtype(series_json: str, expected_dtype: str) -> Dict[str, Any]:
     }
 
 
-# ===========================================================================
+# =============================================================================
 # ── TRANSFORM ───────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def trim_df(
     df_json: str,
-    start_ts: Optional[str] = None,
-    end_ts: Optional[str] = None,
-    ts_col_name: Optional[str] = None,
+    start_ts: typing.Optional[str] = None,
+    end_ts: typing.Optional[str] = None,
+    ts_col_name: typing.Optional[str] = None,
     left_close: bool = True,
     right_close: bool = True,
 ) -> str:
@@ -621,10 +677,10 @@ def merge_dfs(
     df1_json: str,
     df2_json: str,
     how: str = "outer",
-    on: Optional[List[str]] = None,
-    left_on: Optional[List[str]] = None,
-    right_on: Optional[List[str]] = None,
-    suffixes: Optional[List[str]] = None,
+    on: typing.Optional[typing.List[str]] = None,
+    left_on: typing.Optional[typing.List[str]] = None,
+    right_on: typing.Optional[typing.List[str]] = None,
+    suffixes: typing.Optional[typing.List[str]] = None,
 ) -> str:
     """
     Merge two DataFrames (wrapper around pd.merge).
@@ -641,8 +697,9 @@ def merge_dfs(
     df1 = _df_from_json(df1_json)
     df2 = _df_from_json(df2_json)
     sfx = tuple(suffixes) if suffixes else ("_x", "_y")
-    merged = pd.merge(df1, df2, how=how, on=on, left_on=left_on,
-                      right_on=right_on, suffixes=sfx)
+    merged = pd.merge(
+        df1, df2, how=how, on=on, left_on=left_on, right_on=right_on, suffixes=sfx
+    )
     return _df_to_json(merged)
 
 
@@ -650,7 +707,7 @@ def merge_dfs(
 def filter_df(
     df_json: str,
     filter_col: str,
-    filter_values: List[Any],
+    filter_values: typing.List[typing.Any],
     mode: str = "keep",
 ) -> str:
     """
@@ -659,7 +716,8 @@ def filter_df(
     :param df_json: DataFrame as JSON.
     :param filter_col: column whose values are tested.
     :param filter_values: list of values to match.
-    :param mode: "keep" (rows matching filter_values) or "drop" (rows not matching).
+    :param mode: "keep" (rows matching filter_values) or "drop" (rows
+        not matching).
     :return: filtered DataFrame as JSON.
     """
     df = _df_from_json(df_json)
@@ -670,7 +728,7 @@ def filter_df(
 
 
 @mcp.tool()
-def remove_columns(df_json: str, columns: List[str]) -> str:
+def remove_columns(df_json: str, columns:  typing.List[str]) -> str:
     """
     Drop specified columns from a DataFrame.
 
@@ -724,9 +782,10 @@ def subset_df(df_json: str, nrows: int, seed: int = 42) -> str:
     return _df_to_json(df.sample(n, random_state=seed))
 
 
-# ===========================================================================
+# =============================================================================
 # ── UTILS ───────────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def df_to_str(df_json: str, num_rows: int = 6) -> str:
@@ -739,9 +798,12 @@ def df_to_str(df_json: str, num_rows: int = 6) -> str:
     """
     df = _df_from_json(df_json)
     with pd.option_context(
-        "display.max_columns", None,
-        "display.max_colwidth", 200,
-        "display.width", 10000,
+        "display.max_columns",
+        None,
+        "display.max_colwidth",
+        200,
+        "display.width",
+        10000,
     ):
         if len(df) <= num_rows:
             return df.to_string()
@@ -794,13 +856,15 @@ def find_gaps_in_time_series(
 @mcp.tool()
 def resolve_column_names(
     df_json: str,
-    column_set: Optional[Union[str, List[str]]] = None,
-) -> List[str]:
+    column_set: typing.Optional[typing.Union[str, typing.List[str]]] = None,
+) -> typing.List[str]:
     """
     Resolve a column specification to a concrete list of column names.
 
-    :param df_json: DataFrame as JSON (used to validate column existence).
-    :param column_set: None = all columns, str = single column, list = subset.
+    :param df_json: DataFrame as JSON (used to validate column
+        existence).
+    :param column_set: None = all columns, str = single column, list =
+        subset.
     :return: resolved list of column names.
     """
     df = _df_from_json(df_json)
@@ -815,9 +879,10 @@ def resolve_column_names(
     return column_set
 
 
-# ===========================================================================
+# =============================================================================
 # ── MULTI-INDEX ─────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def multiindex_df_info(df_json: str) -> str:
@@ -848,14 +913,15 @@ def multiindex_df_info(df_json: str) -> str:
     return "\n".join(lines)
 
 
-# ===========================================================================
+# =============================================================================
 # ── IO ──────────────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def read_csv(
     file_path: str,
-    index_col: Optional[Union[int, str]] = None,
+    index_col: typing.Optional[typing.Union[int, str]] = None,
     parse_dates: bool = True,
 ) -> str:
     """
@@ -866,7 +932,7 @@ def read_csv(
     :param parse_dates: attempt to parse the index as dates.
     :return: DataFrame as JSON.
     """
-    kwargs: Dict[str, Any] = {}
+    kwargs: typing.Dict[str, typing.Any] = {}
     if index_col is not None:
         kwargs["index_col"] = index_col
     if parse_dates:
@@ -920,9 +986,10 @@ def write_parquet(df_json: str, file_path: str) -> str:
     return f"Saved {df.shape[0]} rows × {df.shape[1]} columns to '{file_path}'"
 
 
-# ===========================================================================
+# =============================================================================
 # ── ANALYSIS ────────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
+
 
 @mcp.tool()
 def rolling_corr_over_time(
@@ -953,12 +1020,13 @@ def rolling_corr_over_time(
 
 
 @mcp.tool()
-def describe_df(df_json: str, percentiles: Optional[List[float]] = None) -> str:
+def describe_df(df_json: str, percentiles: typing.Optional[typing.List[float]] = None) -> str:
     """
     Return descriptive statistics for a DataFrame (wrapper of df.describe()).
 
     :param df_json: DataFrame as JSON.
-    :param percentiles: list of percentiles to include, e.g. [0.1, 0.5, 0.9].
+    :param percentiles: list of percentiles to include, e.g. [0.1, 0.5,
+        0.9].
     :return: describe DataFrame as JSON.
     """
     df = _df_from_json(df_json)
@@ -975,10 +1043,10 @@ def print_column_variability(df_json: str) -> str:
     :return: JSON object mapping column → {nunique, cv, dtype}.
     """
     df = _df_from_json(df_json)
-    result: Dict[str, Any] = {}
+    result: typing.Dict[str, typing.Any] = {}
     for col in df.columns:
         s = df[col]
-        info: Dict[str, Any] = {
+        info: typing.Dict[str, typing.Any] = {
             "nunique": int(s.nunique()),
             "dtype": str(s.dtype),
         }
@@ -989,12 +1057,12 @@ def print_column_variability(df_json: str) -> str:
     return json.dumps(result, indent=2)
 
 
-# ===========================================================================
+# =============================================================================
 # ── CHECK SUMMARY ───────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
 
 # In-process store for CheckSummary objects keyed by a session_id string.
-_SUMMARIES: Dict[str, Any] = {}
+_SUMMARIES: typing.Dict[str, typing.Any] = {}
 
 
 @mcp.tool()
@@ -1027,7 +1095,9 @@ def check_summary_add(
     :return: confirmation message.
     """
     if session_id not in _SUMMARIES:
-        raise KeyError(f"Session '{session_id}' not found. Call check_summary_create first.")
+        raise KeyError(
+            f"Session '{session_id}' not found. Call check_summary_create first."
+        )
     _SUMMARIES[session_id]["rows"].append(
         {"description": description, "comment": comment, "is_ok": is_ok}
     )
@@ -1057,9 +1127,9 @@ def check_summary_report(session_id: str) -> str:
     return "\n".join(report_lines)
 
 
-# ===========================================================================
+# =============================================================================
 # ── Entry point ─────────────────────────────────────────────────────────────
-# ===========================================================================
+# =============================================================================
 
 if __name__ == "__main__":
-    mcp.run(transport="stdio")
\ No newline at end of file
+    mcp.run(transport="stdio")

From af4d845a9ad9cdf09adca06bd4a39862cb1e5975 Mon Sep 17 00:00:00 2001
From: Pranav Shashidhara <pranavmay22@gmail.com>
Date: Fri, 24 Apr 2026 10:31:02 -0400
Subject: [PATCH 4/5] Add Blog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 .../docs/blog/posts/NanoClaw_in_60_mins.md    | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 website/docs/blog/posts/NanoClaw_in_60_mins.md

diff --git a/website/docs/blog/posts/NanoClaw_in_60_mins.md b/website/docs/blog/posts/NanoClaw_in_60_mins.md
new file mode 100644
index 000000000..282c99492
--- /dev/null
+++ b/website/docs/blog/posts/NanoClaw_in_60_mins.md
@@ -0,0 +1,116 @@
+---
+title: "NanoClaw: Agentic EDA with MCP Tools in 60 Minutes"
+authors:
+  - PranavShashidhara
+  - gpsaggese
+date: 2026-04-24
+description:
+categories:
+  - AI Research
+  - Software Engineering
+---
+
+TL;DR: Learn how to build an agentic exploratory data analysis (EDA) system
+using Claude and a custom MCP server — no manual pandas scripting required.
+
+<!-- more -->
+
+## Tutorial in 30 Seconds
+
+NanoClaw is an end-to-end EDA system powered by a Claude agent and a custom
+Python MCP server called `hpandas`. Instead of writing pandas code manually,
+you describe your analysis in plain English — the agent translates it into
+structured tool calls against your dataset in real time.
+
+Key capabilities:
+
+- **Natural language EDA**: Describe tasks like filtering, aggregation, or
+  missing value detection — Claude handles the rest
+- **MCP tool architecture**: All pandas operations are exposed as structured
+  JSON-RPC tools under the `mcp__hpandas__*` namespace
+- **Modular design**: The Claude agent and MCP server run as separate processes,
+  communicating over JSON-RPC — not shared memory
+- **Extensible toolset**: 30+ tools covering I/O, cleaning, filtering,
+  transformation, type conversion, and validation
+
+This tutorial's goal is to show you in 60 minutes:
+
+- How to wire a Claude agent (running inside a NanoClaw container) to a custom
+  Python MCP server on your host machine
+- Concrete examples of natural language EDA prompts against a real dataset
+
+## Official References
+
+- [NanoClaw GitHub repo](https://github.com/qwibitai/nanoclaw)
+
+## Tutorial Content
+
+This tutorial includes all the code, MCP server, and dataset generator in
+`tutorials/NanoClaw`:
+
+- [`README.md`](../../../../tutorials/NanoClaw/README.md):
+  Full setup instructions for the NanoClaw environment
+- [`hpandas_mcp_server.py`](../../../../tutorials/NanoClaw/hpandas_mcp_server.py): The custom
+  MCP server that exposes pandas operations as JSON-RPC tools
+- [`dataset_generator.py`](../../../../tutorials/NanoClaw/dataset_generator.py): Generates
+  `dummy_users.csv` — the sample dataset used throughout the tutorial
+- `.mcp.json` configuration: Registers `hpandas` with the NanoClaw agent so
+  tools are available under the `mcp__hpandas__*` namespace
+
+### System Architecture
+
+```
+You (natural language prompt)
+        │
+        ▼
+Claude Agent (NanoClaw container)
+        │  tool call
+        ▼
+MCP Client (NanoClaw)
+        │  JSON-RPC
+        ▼
+hpandas MCP Server (host Python process)
+        │
+        ▼
+pandas → results returned to agent
+```
+
+### Example EDA Prompts
+
+Once the system is running, interact with your dataset entirely in natural
+language:
+
+| Task | Prompt |
+|---|---|
+| Overview | `"Describe the dataset"` |
+| Data quality | `"Find missing values"` |
+| Filtering | `"Show users with income > 50k"` |
+| Aggregation | `"Average spend score by country"` |
+
+### MCP Tool Categories
+
+The `hpandas` server exposes 30+ tools across seven categories:
+
+- **I/O**: `read_csv`, `read_parquet`, `write_csv`, `write_parquet`
+- **Display**: `df_to_str`, `get_df_signature`, `convert_df_to_json_string`
+- **Cleaning**: `drop_duplicates`, `dropna`, `impute_nans`, `remove_outliers`
+- **Analysis**: `describe_df`, `print_column_variability`, `rolling_corr_over_time`
+- **Filtering & Transformation**: `filter_df`, `merge_dfs`, `resample_df`, `add_pct`
+- **Type Conversion**: `infer_column_types`, `convert_df_types`, `convert_col_to_int`
+- **Validation**: `check_index_is_datetime`, `resolve_column_names`
+
+## Quick Demo Flow
+
+```bash
+gh repo fork qwibitai/nanoclaw --clone
+cd nanoclaw
+claude
+# → /setup
+
+# Generate the dataset
+cd umd_msml610/tutorials/NanoClaw
+python dataset_generator.py
+
+# Inside Claude Code:
+# "Load dummy_users.csv and analyze it using hpandas MCP server"
+```
\ No newline at end of file

From a7a961a85c18603c368eb165bffeb49001a0a822 Mon Sep 17 00:00:00 2001
From: Pranav Shashidhara <pranavmay22@gmail.com>
Date: Mon, 4 May 2026 10:28:01 -0400
Subject: [PATCH 5/5] Update README.md

---
 tutorials/NanoClaw/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/NanoClaw/README.md b/tutorials/NanoClaw/README.md
index f2619b718..099d6f9d1 100644
--- a/tutorials/NanoClaw/README.md
+++ b/tutorials/NanoClaw/README.md
@@ -1,4 +1,4 @@
-# Nanoclaw Tutorial - Agentic EDA System Using MCP Tools
+# Nanoclaw
 
 An end-to-end exploratory data analysis system powered by a Claude agent (NanoClaw) and a custom MCP server (`hpandas`). Instead of writing pandas code manually, you describe your analysis in plain English — the agent translates it into tool calls against your dataset in real time.