diff --git a/OpenAI_Open_Source_Pro_Mode.ipynb b/OpenAI_Open_Source_Pro_Mode_Groq.ipynb
similarity index 100%
rename from OpenAI_Open_Source_Pro_Mode.ipynb
rename to OpenAI_Open_Source_Pro_Mode_Groq.ipynb
diff --git a/OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb b/OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb
new file mode 100644
index 0000000..fe4a172
--- /dev/null
+++ b/OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb
@@ -0,0 +1,421 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "view-in-github"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/mshumer/gpt-oss-pro-mode/blob/main/OpenAI_Open_Source_Pro_Mode.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3LLlSvTtU0Y3"
+      },
+      "source": [
+        "Made by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "cellView": "form",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dzWS8LaqUw3N",
+        "outputId": "d4031e82-2131-4208-8f62-3923baca7cc4"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: ollama in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (0.4.5)\n",
+            "Requirement already satisfied: httpx<0.28.0,>=0.27.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (0.27.2)\n",
+            "Requirement already satisfied: pydantic<3.0.0,>=2.9.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (2.11.7)\n",
+            "Requirement already satisfied: anyio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (4.9.0)\n",
+            "Requirement already satisfied: certifi in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (2024.8.30)\n",
+            "Requirement already satisfied: httpcore==1.* in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.0.5)\n",
+            "Requirement already satisfied: idna in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.8)\n",
+            "Requirement already satisfied: sniffio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.3.1)\n",
+            "Requirement already satisfied: h11<0.15,>=0.13 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama) (0.14.0)\n",
+            "Requirement already satisfied: annotated-types>=0.6.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.7.0)\n",
+            "Requirement already satisfied: pydantic-core==2.33.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (2.33.2)\n",
+            "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (4.12.2)\n",
+            "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.4.1)\n",
+            "\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.1.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "# @title Run this cell to set up Pro Mode\n",
+        "!pip3 install ollama\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\n",
+        "from typing import List, Dict, Any\n",
+        "import time, os\n",
+        "import concurrent.futures as cf\n",
+        "import ollama\n",
+        "\n",
+        "MODEL = \"gpt-oss:120b\"\n",
+        "MAX_COMPLETION_TOKENS = 30000\n",
+        "\n",
+        "\n",
+        "def _one_completion(client: ollama, question: str, temperature: float ) -> str:\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    Sends a question to the Ollama API and returns the response.\n",
+        "    \"\"\"\n",
+        "    delay = 0.5\n",
+        "    for attempt in range(3):\n",
+        "        try:\n",
+        "            response = client.chat(\n",
+        "                model=MODEL,\n",
+        "                messages=[\n",
+        "                    {'role': 'user', 'content': question},\n",
+        "                ],\n",
+        "                options={'temperature': temperature, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n",
+        "            )\n",
+        "            return response['message']['content']\n",
+        "        except Exception as e:\n",
+        "            if attempt == 2:\n",
+        "                raise\n",
+        "            time.sleep(delay)\n",
+        "            delay *= 2\n",
+        "\n",
+        "\n",
+        "def _build_synthesis_messages(candidates: List[str]) -> List[Dict[str, str]]:\n",
+        "    numbered = \"\\n\\n\".join(\n",
+        "        f\"<cand {i+1}>\\n{txt}\\n</cand {i+1}>\" for i, txt in enumerate(candidates)\n",
+        "    )\n",
+        "    system = (\n",
+        "        \"You are an expert editor. Synthesize ONE best answer from the candidate \"\n",
+        "        \"answers provided, merging strengths, correcting errors, and removing repetition. \"\n",
+        "        \"Do not mention the candidates or the synthesis process. Be decisive and clear.\"\n",
+        "    )\n",
+        "    user = (\n",
+        "        f\"You are given {len(candidates)} candidate answers delimited by <cand i> tags.\\n\\n\"\n",
+        "        f\"{numbered}\\n\\nReturn the single best final answer.\"\n",
+        "    )\n",
+        "    return [{\"role\": \"system\", \"content\": system},\n",
+        "            {\"role\": \"user\", \"content\": user}]\n",
+        "\n",
+        "def pro_mode(client: ollama, prompt: str, n_runs: int) -> Dict[str, Any]:\n",
+        "    \"\"\"\n",
+        "    Fan out n_runs parallel generations at T=0.9 and synthesize a final answer at T=0.2.\n",
+        "    If groq_api_key is provided, it will be used; otherwise GROQ_API_KEY env var is used.\n",
+        "    Returns: {\"final\": str, \"candidates\": List[str]}\n",
+        "    \"\"\"\n",
+        "    assert n_runs >= 1, \"n_runs must be >= 1\"\n",
+        "\n",
+        "    # Parallel candidate generations (threaded; Colab-friendly)\n",
+        "    max_workers = min(n_runs, 16)\n",
+        "    candidates: List[str] = [None] * n_runs  # preserve order\n",
+        "    with cf.ThreadPoolExecutor(max_workers=max_workers) as ex:\n",
+        "        fut_to_idx = {\n",
+        "            ex.submit(_one_completion, client, prompt, 0.9): i\n",
+        "            for i in range(n_runs)\n",
+        "        }\n",
+        "        for fut in cf.as_completed(fut_to_idx):\n",
+        "            i = fut_to_idx[fut]\n",
+        "            candidates[i] = fut.result()\n",
+        "\n",
+        "    # Synthesis pass\n",
+        "    messages = _build_synthesis_messages(candidates)\n",
+        "    final_resp = client.chat(\n",
+        "        model=MODEL,\n",
+        "        messages=messages,\n",
+        "        options={'temperature': 0.2, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n",
+        "    )\n",
+        "    final = final_resp['message']['content']\n",
+        "\n",
+        "    return {\"final\": final, \"candidates\": candidates}\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "OoBF5UbSVksz"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "=== FINAL ===\n",
+            " **Self‑Play in Reinforcement Learning**\n",
+            "\n",
+            "Self‑play is a training paradigm in which an RL agent learns by repeatedly playing against a copy of itself (or against a mixture of its past copies). Because the opponent’s strength automatically tracks the learner’s strength, the task provides a **self‑generated curriculum**: the game is never too easy and never hopelessly hard. The only requirement is a simulator of the environment; no external data or hand‑crafted opponent is needed.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 1. Core Loop\n",
+            "\n",
+            "| Step | What happens | Why it matters |\n",
+            "|------|--------------|----------------|\n",
+            "| **1️⃣ Initialise** | Randomly initialise a policy (and optionally a value) network θ. | Gives the agent a starting point. |\n",
+            "| **2️⃣ Create opponent** | Keep a *lagged* copy θ̄ (the “target” network) that will act as the opponent. | Prevents the opponent from being identical at every move, which would give no learning signal. |\n",
+            "| **3️⃣ Self‑play** | Play many episodes where the current policy θ and the opponent θ̄ alternate turns, recording every (state, action, reward, done) tuple. | Generates the data on which the agent will improve. |\n",
+            "| **4️⃣ Compute targets** | From the trajectories compute returns (or advantages) and, if using a value head, a baseline. | Provides the supervised signal for policy‑gradient or value‑based updates. |\n",
+            "| **5️⃣ Update θ** | Perform an RL update (PPO, A2C, REINFORCE + baseline, or AlphaZero‑style loss) on the collected data. | Improves the policy so that actions leading to higher returns become more likely. |\n",
+            "| **6️⃣ Refresh opponent** | Every *K* training steps copy the updated θ into θ̄ (or perform a soft update θ̄←τθ+(1‑τ)θ̄). | Keeps the opponent slightly behind the learner, preserving a moving‑target curriculum. |\n",
+            "| **7️⃣ Evaluate** | Periodically test θ against a simple baseline (random, minimax, or the previous version) and log win‑rates. | Gives an external sanity check and a metric of progress. |\n",
+            "\n",
+            "The loop repeats until performance plateaus or a desired level is reached.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 2. Concrete Example – Connect‑4 with PPO\n",
+            "\n",
+            "Below is a compact, runnable‑style pseudocode that demonstrates the full pipeline for the classic two‑player board game **Connect‑4** (7 × 6 grid, win by aligning four discs). The same structure works for any turn‑based, zero‑sum game.\n",
+            "\n",
+            "```python\n",
+            "# -------------------------------------------------\n",
+            "# 1. Initialise networks\n",
+            "# -------------------------------------------------\n",
+            "policy_net = PolicyCNN()                     # outputs logits for 7 columns\n",
+            "optimizer   = torch.optim.Adam(policy_net.parameters(), lr=1e-3)\n",
+            "\n",
+            "# -------------------------------------------------\n",
+            "# 2. Lagged opponent (target network)\n",
+            "# -------------------------------------------------\n",
+            "target_net = copy.deepcopy(policy_net)       # frozen copy\n",
+            "\n",
+            "# -------------------------------------------------\n",
+            "# 3. Hyper‑parameters\n",
+            "# -------------------------------------------------\n",
+            "NUM_SELFPLAY_GAMES = 1000   # games per training epoch\n",
+            "UPDATE_EVERY       = 200    # refresh opponent every 200 epochs\n",
+            "BATCH_SIZE         = 64\n",
+            "CLIP_EPS           = 0.2   # PPO clipping\n",
+            "GAMMA              = 0.99\n",
+            "\n",
+            "# -------------------------------------------------\n",
+            "# 4. Experience buffer\n",
+            "# -------------------------------------------------\n",
+            "replay = []   # list of (state, action, reward, done)\n",
+            "\n",
+            "def play_one_game(policy, opponent):\n",
+            "    \"\"\"Play a full Connect‑4 game, store (s,a,r,done) from the learner’s view.\"\"\"\n",
+            "    env = Connect4Env()\n",
+            "    traj = []\n",
+            "    player = 1                     # 1 = policy, -1 = opponent\n",
+            "\n",
+            "    while not env.done:\n",
+            "        # Choose action with the appropriate network\n",
+            "        net = policy if player == 1 else opponent\n",
+            "        logits = net(env.board_tensor())\n",
+            "        dist   = torch.distributions.Categorical(logits=logits)\n",
+            "        action = dist.sample().item()\n",
+            "\n",
+            "        # Step the environment (reward is from the *current* player’s perspective)\n",
+            "        next_state, reward, done, _ = env.step(action, player)\n",
+            "\n",
+            "        traj.append((env.board_tensor(), action, reward, done))\n",
+            "        player *= -1                # swap turn\n",
+            "\n",
+            "    # Convert opponent‑reward to learner‑reward and store\n",
+            "    for i, (s,a,r,d) in enumerate(traj):\n",
+            "        if i % 2 == 1:              # opponent’s move\n",
+            "            r = -r\n",
+            "        replay.append((s, a, r, d))\n",
+            "\n",
+            "    return traj\n",
+            "\n",
+            "# -------------------------------------------------\n",
+            "# 5. Main training loop\n",
+            "# -------------------------------------------------\n",
+            "for epoch in range(1, 10001):\n",
+            "    # ---- Self‑play -------------------------------------------------\n",
+            "    for _ in range(NUM_SELFPLAY_GAMES):\n",
+            "        play_one_game(policy_net, target_net)\n",
+            "\n",
+            "    # ---- PPO update ------------------------------------------------\n",
+            "    # Sample a random minibatch\n",
+            "    batch = random.sample(replay, BATCH_SIZE)\n",
+            "    states, actions, rewards, dones = zip(*batch)\n",
+            "\n",
+            "    states  = torch.stack(states)                     # (B, C, H, W)\n",
+            "    actions = torch.tensor(actions)\n",
+            "    rewards = torch.tensor(rewards, dtype=torch.float)\n",
+            "\n",
+            "    # Compute discounted returns\n",
+            "    returns = []\n",
+            "    G = 0.0\n",
+            "    for r in reversed(rewards):\n",
+            "        G = r + GAMMA * G\n",
+            "        returns.insert(0, G)\n",
+            "    returns = torch.tensor(returns)\n",
+            "\n",
+            "    # Policy forward pass\n",
+            "    logits = policy_net(states)                       # (B, 7)\n",
+            "    logp   = torch.distributions.Categorical(logits=logits)\\\n",
+            "                .log_prob(actions)\n",
+            "\n",
+            "    # PPO surrogate loss\n",
+            "    old_logp = logp.detach()\n",
+            "    ratio = torch.exp(logp - old_logp)\n",
+            "    surr1 = ratio * (returns - returns.mean())        # advantage ≈ return‑baseline\n",
+            "    surr2 = torch.clamp(ratio, 1-CLIP_EPS, 1+CLIP_EPS) * (returns - returns.mean())\n",
+            "    loss_pi = -torch.min(surr1, surr2).mean()\n",
+            "\n",
+            "    optimizer.zero_grad()\n",
+            "    loss_pi.backward()\n",
+            "    optimizer.step()\n",
+            "\n",
+            "    # ---- Refresh opponent -------------------------------------------\n",
+            "    if epoch % UPDATE_EVERY == 0:\n",
+            "        target_net = copy.deepcopy(policy_net)\n",
+            "\n",
+            "    # ---- Logging ----------------------------------------------------\n",
+            "    if epoch % 100 == 0:\n",
+            "        win_rate = evaluate_against_random(policy_net, n=200)\n",
+            "        print(f\"Epoch {epoch:5d}  PPO loss {loss_pi.item():.4f}  vs‑random win {win_rate:.2%}\")\n",
+            "\n",
+            "    # ---- Optional buffer trimming to keep memory bounded ------------\n",
+            "    if len(replay) > 50000:\n",
+            "        replay = replay[-25000:]\n",
+            "```\n",
+            "\n",
+            "**Key points illustrated by the code**\n",
+            "\n",
+            "| Part of the loop | Self‑play role |\n",
+            "|------------------|----------------|\n",
+            "| `play_one_game`  | Two copies of the same algorithm (`policy_net` vs. `target_net`) generate the experience. |\n",
+            "| Reward sign flip (`r = -r` on opponent moves) | Guarantees that the learner receives a reward signal that reflects the zero‑sum nature of the game. |\n",
+            "| PPO clipping (`CLIP_EPS`) | Stabilises policy updates while still allowing improvement. |\n",
+            "| Periodic copy of `policy_net` into `target_net` | Provides the “slightly older” opponent that keeps the curriculum moving. |\n",
+            "| Evaluation against a random player | Gives a quick sanity check of learning progress. |\n",
+            "\n",
+            "After a few hundred thousand self‑play games the agent converges to a **near‑optimal Connect‑4 strategy**: it never loses and wins whenever the opponent makes a mistake.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 3. AlphaZero Blueprint (MCTS + Neural Nets)\n",
+            "\n",
+            "For larger games (Go, Chess, Shogi) the same self‑play loop is enriched with **Monte‑Carlo Tree Search (MCTS)**:\n",
+            "\n",
+            "1. **Policy head** supplies a prior over legal moves; MCTS uses these priors to bias rollouts.\n",
+            "2. **Search** is run for each move, producing a *search‑enhanced* policy distribution π\\*_t (the proportion of visits to each child).\n",
+            "3. The training target for the policy network is the **search‑enhanced distribution**, while the value head is trained on the game outcome.\n",
+            "4. The loss (used in AlphaZero) is  \n",
+            "\n",
+            "\\[\n",
+            "L = (z - V_\\phi(s))^2 \\;-\\; \\pi^\\*_t \\cdot \\log \\pi_\\theta(a|s) \\;+\\; c \\| \\theta \\|^2,\n",
+            "\\]\n",
+            "\n",
+            "where *z* is the final game result (±1 or 0).  \n",
+            "5. The opponent is simply the **latest checkpoint** of the same network (or a soft‑updated mixture of several past checkpoints).\n",
+            "\n",
+            "This combination of **self‑play + MCTS + deep nets** is what enabled AlphaGo Zero, AlphaZero, and MuZero to achieve superhuman performance from scratch.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 4. Why Self‑Play Works\n",
+            "\n",
+            "| Benefit | Explanation |\n",
+            "|---------|-------------|\n",
+            "| **Automatic curriculum** | The opponent improves together with the learner, so the difficulty scales naturally. |\n",
+            "| **No external data** | The agent creates its own training set; only a simulator is needed. |\n",
+            "| **Symmetry & fairness** | Both sides share the same inductive biases, eliminating bias from a hand‑crafted opponent. |\n",
+            "| **Domain‑agnostic** | Works for any environment with a simulator—board games, video games, multi‑agent tasks, robotics competitions, etc. |\n",
+            "| **Proven track record** | TD‑Gammon → AlphaGo/AlphaZero → OpenAI Five → MuZero. |\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 5. Common Pitfalls & Stabilisation Tricks\n",
+            "\n",
+            "| Pitfall | Mitigation |\n",
+            "|---------|------------|\n",
+            "| **Cyclic policies** (the learner only beats its immediate predecessor) | Keep a *population* of the last K checkpoints and sample one as the opponent each game. |\n",
+            "| **High variance in policy‑gradient updates** | Use a value baseline, advantage estimation, or entropy regularisation. |\n",
+            "| **Instability when the opponent is exactly equal** | Refresh the opponent **slowly** (soft update with τ ≈ 0.01) or keep it a few steps behind. |\n",
+            "| **Sparse rewards** (only at the end of the episode) | Combine self‑play with MCTS, epsilon‑greedy exploration, or add a small shaping reward for progress. |\n",
+            "| **Over‑fitting to a single opponent** | Randomise which copy starts (X vs. O) and optionally randomise the opponent from a buffer of past policies. |\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 6. Checklist for a Self‑Play Project\n",
+            "\n",
+            "1. **Environment** – deterministic or stochastic simulator, turn‑based or simultaneous actions.  \n",
+            "2. **Network architecture** – policy head (softmax over legal moves) and optionally a value head; for board games a small CNN works well.  \n",
+            "3. **Opponent handling** – lagged copy, soft update, or a replay buffer of past policies.  \n",
+            "4. **RL algorithm** – PPO/A2C for on‑policy updates, REINFORCE + baseline for simplicity, or AlphaZero loss for MCTS‑augmented training.  \n",
+            "5. **Exploration** – entropy bonus, temperature annealing, or MCTS depth limits for early stages.  \n",
+            "6. **Training schedule** – number of self‑play games per epoch, batch size, opponent refresh frequency.  \n",
+            "7. **Evaluation** – against random/minimax players and against previous checkpoints; log win‑rates, loss curves, and compute Elo if appropriate.  \n",
+            "8. **Compute resources** – self‑play can be parallelised across many CPU cores or GPUs; MCTS adds CPU load, while deep‑net inference can be batched on GPUs.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 7. TL;DR\n",
+            "\n",
+            "* **Self‑play** lets an RL agent generate its own training data by competing against a slightly older copy of itself.  \n",
+            "* The opponent’s skill automatically tracks the learner’s skill, providing a **self‑adjusting curriculum** without any human data.  \n",
+            "* A minimal implementation (e.g., Connect‑4 with PPO) follows the loop: initialise → lagged opponent → self‑play → compute returns → update policy → refresh opponent → evaluate.  \n",
+            "* When combined with Monte‑Carlo Tree Search and a value head, this loop becomes the **AlphaZero** algorithm that has mastered Chess, Go, Shogi, and many other complex games.  \n",
+            "* Benefits: data on‑demand, automatic curriculum, symmetry, and broad applicability.  \n",
+            "* Pitfalls (cycling, high variance, instability) are mitigated with target networks, replay buffers of past policies, soft updates, and entropy or baseline techniques.  \n",
+            "\n",
+            "Self‑play is therefore a cornerstone of modern game‑playing AI and a versatile tool for any RL problem where a simulator exists.\n"
+          ]
+        }
+      ],
+      "source": [
+        "PROMPT = \"Explain self-play in reinforcement learning with a concrete example.\"\n",
+        "NUMBER_OF_CANDIDATES = 2 # start with five, go up if you need more intelligence!\n",
+        "\n",
+        "client = ollama.Client()\n",
+        "\n",
+        "\n",
+        "\n",
+        "result = pro_mode(client, PROMPT, NUMBER_OF_CANDIDATES)\n",
+        "\n",
+        "print(\"\\n=== FINAL ===\\n\", result[\"final\"])\n",
+        "# To inspect candidates:\n",
+        "# for i, c in enumerate(result[\"candidates\"], 1): print(f\"\\n--- Candidate {i} ---\\n{c}\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "authorship_tag": "ABX9TyMEoKFi+0SrPi/OXiwQ1i58",
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb b/OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb
new file mode 100644
index 0000000..632544a
--- /dev/null
+++ b/OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb
@@ -0,0 +1,441 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "view-in-github"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/mshumer/gpt-oss-pro-mode/blob/main/OpenAI_Open_Source_Pro_Mode.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3LLlSvTtU0Y3"
+      },
+      "source": [
+        "Made by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "cellView": "form",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dzWS8LaqUw3N",
+        "outputId": "d4031e82-2131-4208-8f62-3923baca7cc4"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: ollama in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (0.4.5)\n",
+            "Requirement already satisfied: httpx<0.28.0,>=0.27.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (0.27.2)\n",
+            "Requirement already satisfied: pydantic<3.0.0,>=2.9.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (2.11.7)\n",
+            "Requirement already satisfied: anyio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (4.9.0)\n",
+            "Requirement already satisfied: certifi in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (2024.8.30)\n",
+            "Requirement already satisfied: httpcore==1.* in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.0.5)\n",
+            "Requirement already satisfied: idna in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.8)\n",
+            "Requirement already satisfied: sniffio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.3.1)\n",
+            "Requirement already satisfied: h11<0.15,>=0.13 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama) (0.14.0)\n",
+            "Requirement already satisfied: annotated-types>=0.6.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.7.0)\n",
+            "Requirement already satisfied: pydantic-core==2.33.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (2.33.2)\n",
+            "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (4.12.2)\n",
+            "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.4.1)\n",
+            "\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.1.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "# @title Run this cell to set up Pro Mode\n",
+        "!pip3 install ollama\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\n",
+        "from typing import List, Dict, Any\n",
+        "import time, os\n",
+        "import concurrent.futures as cf\n",
+        "import ollama\n",
+        "\n",
+        "MODEL = \"gpt-oss:120b\"\n",
+        "MAX_COMPLETION_TOKENS = 30000\n",
+        "\n",
+        "\n",
+        "def _one_completion(client: ollama, question: str, temperature: float ) -> str:\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    Sends a question to the Ollama API and returns the response.\n",
+        "    \"\"\"\n",
+        "    delay = 0.5\n",
+        "    for attempt in range(3):\n",
+        "        try:\n",
+        "            response = client.chat(\n",
+        "                model=MODEL,\n",
+        "                messages=[\n",
+        "                    {'role': 'user', 'content': question},\n",
+        "                ],\n",
+        "                options={'temperature': temperature, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n",
+        "            )\n",
+        "            return response['message']['content']\n",
+        "        except Exception as e:\n",
+        "            if attempt == 2:\n",
+        "                raise\n",
+        "            time.sleep(delay)\n",
+        "            delay *= 2\n",
+        "\n",
+        "\n",
+        "def _build_synthesis_messages(candidates: List[str]) -> List[Dict[str, str]]:\n",
+        "    numbered = \"\\n\\n\".join(\n",
+        "        f\"<cand {i+1}>\\n{txt}\\n</cand {i+1}>\" for i, txt in enumerate(candidates)\n",
+        "    )\n",
+        "    system = (\n",
+        "        \"You are an expert editor. Synthesize ONE best answer from the candidate \"\n",
+        "        \"answers provided, merging strengths, correcting errors, and removing repetition. \"\n",
+        "        \"Do not mention the candidates or the synthesis process. Be decisive and clear.\"\n",
+        "    )\n",
+        "    user = (\n",
+        "        f\"You are given {len(candidates)} candidate answers delimited by <cand i> tags.\\n\\n\"\n",
+        "        f\"{numbered}\\n\\nReturn the single best final answer.\"\n",
+        "    )\n",
+        "    return [{\"role\": \"system\", \"content\": system},\n",
+        "            {\"role\": \"user\", \"content\": user}]\n",
+        "\n",
+        "def pro_mode(client: ollama, prompt: str, n_runs: int) -> Dict[str, Any]:\n",
+        "    \"\"\"\n",
+        "    Fan out n_runs parallel generations at T=0.9 and synthesize a final answer at T=0.2.\n",
+        "    If groq_api_key is provided, it will be used; otherwise GROQ_API_KEY env var is used.\n",
+        "    Returns: {\"final\": str, \"candidates\": List[str]}\n",
+        "    \"\"\"\n",
+        "    assert n_runs >= 1, \"n_runs must be >= 1\"\n",
+        "\n",
+        "    # Parallel candidate generations (threaded; Colab-friendly)\n",
+        "    max_workers = min(n_runs, 16)\n",
+        "    candidates: List[str] = [None] * n_runs  # preserve order\n",
+        "    with cf.ThreadPoolExecutor(max_workers=max_workers) as ex:\n",
+        "        fut_to_idx = {\n",
+        "            ex.submit(_one_completion, client, prompt, 0.9): i\n",
+        "            for i in range(n_runs)\n",
+        "        }\n",
+        "        for fut in cf.as_completed(fut_to_idx):\n",
+        "            i = fut_to_idx[fut]\n",
+        "            candidates[i] = fut.result()\n",
+        "\n",
+        "    # Synthesis pass\n",
+        "    messages = _build_synthesis_messages(candidates)\n",
+        "    final_resp = client.chat(\n",
+        "        model=MODEL,\n",
+        "        messages=messages,\n",
+        "        options={'temperature': 0.2, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n",
+        "    )\n",
+        "    final = final_resp['message']['content']\n",
+        "\n",
+        "    return {\"final\": final, \"candidates\": candidates}\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OoBF5UbSVksz"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "=== FINAL ===\n",
+            " **Self‑play in reinforcement learning**  \n",
+            "Self‑play turns a two‑player (or multi‑agent) game into its own data‑generator: the learning agent repeatedly plays against a copy of itself (or a past version). Because the opponent improves together with the learner, the difficulty of the task automatically adapts, eliminating the need for hand‑crafted opponents or expert demonstrations.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 1. Why self‑play works\n",
+            "\n",
+            "| Reason | Effect on learning |\n",
+            "|--------|--------------------|\n",
+            "| **Automatic curriculum** | Early games are easy (both agents are weak); later games become harder as the policy improves, keeping the learning signal informative. |\n",
+            "| **No external labels** | The only reward needed is the game outcome (win = +1, loss = ‑1, draw = 0). |\n",
+            "| **Full‑tree exploration** | An evolving opponent forces the learner to discover strategies that would never appear against a static opponent. |\n",
+            "| **Convergence to equilibrium** | In deterministic zero‑sum games the process drives both sides toward a Nash (minimax) policy; for solved games this is the optimal strategy. |\n",
+            "| **Sample efficiency** | One episode yields a training example for *both* players, doubling the amount of useful data per simulation. |\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 2. Core ingredients\n",
+            "\n",
+            "| Component | Typical implementation |\n",
+            "|-----------|------------------------|\n",
+            "| **Environment** | Any perfect‑information game (board, video‑game, robot duel). Must expose state, legal actions, transition, and a terminal reward. |\n",
+            "| **Policy‑value network** `fθ(s) → (π, v)` | Small MLP for toy games; deep residual CNN or Transformer for large board games. `π` is a probability distribution over legal moves, `v∈[-1,1]` estimates the expected outcome from the current player’s perspective. |\n",
+            "| **Opponent** | Usually the *current* network itself. For stability a lagged copy (target network) or a pool of older checkpoints can be used. |\n",
+            "| **Search layer (optional)** | Monte‑Carlo Tree Search (MCTS) that uses `π` and `v` to produce a stronger move distribution `π_MCTS`. The search result becomes the training target for the policy head. |\n",
+            "| **Replay buffer** `D` | Stores tuples `(s, π_target, z)` where `z` is the final game result (+1/0/‑1) from the player who acted in `s`. |\n",
+            "| **Loss** | `L(θ) = (z‑v)²  –  π_target·log π  + λ‖θ‖²` (value MSE + policy cross‑entropy + L2 regularisation). |\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 3. General self‑play training loop (high‑level pseudocode)\n",
+            "\n",
+            "```python\n",
+            "initialize network fθ randomly\n",
+            "D = empty replay buffer\n",
+            "target = copy of fθ   # optional lagged opponent\n",
+            "\n",
+            "while not done:\n",
+            "    # ---------- self‑play ----------\n",
+            "    for _ in range(num_episodes):\n",
+            "        game = new_game()\n",
+            "        states, policies = [], []\n",
+            "        while not game.terminal():\n",
+            "            s = game.state()\n",
+            "            # run MCTS if available, otherwise use raw policy\n",
+            "            π = MCTS(s, fθ) if use_mcts else softmax(fθ(s).policy)\n",
+            "            a = sample_move(π)               # ε‑greedy or temperature >0\n",
+            "            states.append(s)\n",
+            "            policies.append(π)\n",
+            "            game.step(a)                     # opponent = same net, other side\n",
+            "        z = game.result()                    # +1 / 0 / -1 from the player who moved first\n",
+            "        # store every position with the perspective‑corrected outcome\n",
+            "        for i, s in enumerate(states):\n",
+            "            zi =  z if i % 2 == 0 else -z   # flip sign every move\n",
+            "            D.add((s, policies[i], zi))\n",
+            "\n",
+            "    # ---------- learning ----------\n",
+            "    for _ in range(train_steps):\n",
+            "        batch = D.sample(batch_size)\n",
+            "        loss = compute_loss(fθ, batch)      # value + policy loss\n",
+            "        optimizer.zero_grad()\n",
+            "        loss.backward()\n",
+            "        optimizer.step()\n",
+            "\n",
+            "    # ---------- opponent update ----------\n",
+            "    if iteration % target_update == 0:\n",
+            "        target.load_state_dict(fθ.state_dict())   # or add fθ to a pool of past nets\n",
+            "```\n",
+            "\n",
+            "*Both sides query the **same** network; the only difference is the sign of the value (or a flip of the policy logits) to reflect which player is acting.*\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 4. Minimal working example – Tic‑Tac‑Toe\n",
+            "\n",
+            "### 4.1 Game definition (Python‑like)\n",
+            "\n",
+            "```python\n",
+            "class TicTacToe:\n",
+            "    def __init__(self):\n",
+            "        self.board = np.zeros(9, dtype=int)   # 0 empty, 1 X, -1 O\n",
+            "        self.player = 1                        # X starts\n",
+            "\n",
+            "    def legal(self):\n",
+            "        return np.where(self.board == 0)[0]\n",
+            "\n",
+            "    def step(self, a):\n",
+            "        self.board[a] = self.player\n",
+            "        win = self.check_winner()\n",
+            "        done = win is not None or not self.legal().size\n",
+            "        reward = 0\n",
+            "        if done:\n",
+            "            reward = 0 if win == 0 else (1 if win == self.player else -1)\n",
+            "        self.player *= -1\n",
+            "        return self.board.copy(), reward, done\n",
+            "\n",
+            "    # returns 1 if X wins, -1 if O wins, 0 for draw, None otherwise\n",
+            "    def check_winner(self):\n",
+            "        lines = [(0,1,2),(3,4,5),(6,7,8),\n",
+            "                 (0,3,6),(1,4,7),(2,5,8),\n",
+            "                 (0,4,8),(2,4,6)]\n",
+            "        for a,b,c in lines:\n",
+            "            s = self.board[a] + self.board[b] + self.board[c]\n",
+            "            if s == 3:  return 1\n",
+            "            if s == -3: return -1\n",
+            "        if not (self.board == 0).any(): return 0\n",
+            "        return None\n",
+            "```\n",
+            "\n",
+            "### 4.2 Tiny policy‑value net (MLP)\n",
+            "\n",
+            "```python\n",
+            "class Net(nn.Module):\n",
+            "    def __init__(self):\n",
+            "        super().__init__()\n",
+            "        self.fc = nn.Sequential(\n",
+            "            nn.Linear(9, 128), nn.ReLU(),\n",
+            "            nn.Linear(128, 128), nn.ReLU()\n",
+            "        )\n",
+            "        self.policy_head = nn.Linear(128, 9)   # logits\n",
+            "        self.value_head  = nn.Linear(128, 1)   # tanh output\n",
+            "\n",
+            "    def forward(self, board):\n",
+            "        x = self.fc(board.float())\n",
+            "        return self.policy_head(x), torch.tanh(self.value_head(x))\n",
+            "```\n",
+            "\n",
+            "### 4.3 Self‑play episode (no MCTS)\n",
+            "\n",
+            "```python\n",
+            "def play_episode(net, eps=0.1):\n",
+            "    env = TicTacToe()\n",
+            "    traj = []                     # (state, player, policy)\n",
+            "    while True:\n",
+            "        s = torch.from_numpy(env.board).float()\n",
+            "        logits, _ = net(s)\n",
+            "        # mask illegal moves\n",
+            "        mask = (env.board == 0).astype(float)\n",
+            "        probs = torch.softmax(logits, dim=0) * torch.from_numpy(mask)\n",
+            "        probs = probs / probs.sum()\n",
+            "\n",
+            "        # ε‑greedy exploration\n",
+            "        if random.random() < eps:\n",
+            "            a = random.choice(env.legal())\n",
+            "        else:\n",
+            "            a = torch.multinomial(probs, 1).item()\n",
+            "\n",
+            "        traj.append((s.clone(), env.player, probs.clone()))\n",
+            "        _, reward, done = env.step(a)\n",
+            "        if done:\n",
+            "            # propagate final outcome to every step\n",
+            "            for state, player, pi in traj:\n",
+            "                z = reward * player          # perspective‑corrected result\n",
+            "                replay.add(state, pi, z)\n",
+            "            break\n",
+            "```\n",
+            "\n",
+            "### 4.4 Training step\n",
+            "\n",
+            "```python\n",
+            "def train_step(net, batch):\n",
+            "    states, pis, zs = zip(*batch)               # each is a list of tensors\n",
+            "    states = torch.stack(states)\n",
+            "    target_pi = torch.stack(pis)\n",
+            "    target_z = torch.tensor(zs).unsqueeze(1).float()\n",
+            "\n",
+            "    logits, values = net(states)\n",
+            "    policy_loss = -torch.mean(torch.sum(target_pi * torch.log_softmax(logits, dim=1), dim=1))\n",
+            "    value_loss  = torch.mean((values - target_z) ** 2)\n",
+            "    loss = policy_loss + value_loss\n",
+            "\n",
+            "    optimizer.zero_grad()\n",
+            "    loss.backward()\n",
+            "    optimizer.step()\n",
+            "    return loss.item()\n",
+            "```\n",
+            "\n",
+            "Running the loop for a few hundred thousand episodes drives the network to a **draw‑only** policy—the optimal solution for Tic‑Tac‑Toe. The same code, with a deeper net and MCTS, scales to Go, Chess, Shogi (AlphaZero) and even to multi‑agent video games.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 5. Scaling up – what changes for AlphaZero‑style systems\n",
+            "\n",
+            "| Change | Reason |\n",
+            "|--------|--------|\n",
+            "| **Monte‑Carlo Tree Search** | Provides a strong move distribution `π_MCTS` that is used as the policy target; dramatically improves data efficiency in huge state spaces. |\n",
+            "| **Large replay buffer (≈10⁶–10⁷ games)** | Guarantees a diverse training set and smooths the non‑stationarity caused by the moving opponent. |\n",
+            "| **Lagged or pooled opponents** | Instead of always playing against the current net, sample from a *league* of older checkpoints; prevents cyclic strategies and stabilises convergence. |\n",
+            "| **Curriculum on temperature & Dirichlet noise** | Early games use high temperature (more random) and added Dirichlet noise to explore openings; later games become more deterministic. |\n",
+            "| **Distributed generation** | Self‑play is parallelised over thousands of CPU cores / GPUs; training is performed on GPUs. |\n",
+            "| **Regularisation & target network** | L2 weight decay, dropout, and a slowly‑updated target net reduce catastrophic policy swings. |\n",
+            "\n",
+            "With these additions AlphaZero learned master‑level play in Chess, Shogi and Go from scratch in a few days of compute.\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 6. Common pitfalls & practical tips\n",
+            "\n",
+            "| Pitfall | Mitigation |\n",
+            "|---------|------------|\n",
+            "| **Mode collapse (repeating the same line)** | Keep a pool of past opponents, inject exploration noise, or use a temperature schedule. |\n",
+            "| **Non‑stationary targets** | Use a target network for generating `π_target` and `v_target`, or sample from a replay buffer that contains older games. |\n",
+            "| **Sparse reward** (only win/loss) | Add intermediate shaping (e.g., material advantage in Chess) *only* for the value head; keep the policy target derived from search. |\n",
+            "| **Credit assignment in long games** | Use bootstrapped value estimates from the network inside MCTS, or employ TD(λ) / n‑step returns instead of pure Monte‑Carlo returns. |\n",
+            "| **Over‑fitting to a single opponent** | Periodically evaluate against a fixed strong baseline (e.g., a handcrafted engine) to detect regression. |\n",
+            "\n",
+            "---\n",
+            "\n",
+            "## 7. Quick checklist for a new self‑play project\n",
+            "\n",
+            "1. **Define a two‑player environment** with clear win/loss reward.  \n",
+            "2. **Choose a network architecture** (MLP for tiny games, CNN/Transformer for board games).  \n",
+            "3. **Implement a copy‑as‑opponent** routine (direct clone, lagged copy, or opponent pool).  \n",
+            "4. **Write a self‑play generator** that records `(state, policy, outcome)` for every move.  \n",
+            "5. **Select an RL algorithm** – REINFORCE, PPO, or Q‑learning; combine with a value head if possible.  \n",
+            "6. **Set up a replay buffer** and a training step that minimises the combined policy‑value loss.  \n",
+            "7. **Add stabilisation tricks** – target network, opponent pool, exploration noise, temperature schedule.  \n",
+            "8. **Periodically evaluate** against older checkpoints or a known baseline to monitor progress.  \n",
+            "\n",
+            "---\n",
+            "\n",
+            "## TL;DR\n",
+            "\n",
+            "*Self‑play* lets an RL agent improve by repeatedly playing a game against a copy of itself. The opponent’s strength rises together with the learner, providing an automatic curriculum and removing the need for external data. The basic loop is:\n",
+            "\n",
+            "1. **Generate games** with the current policy (optionally guided by MCTS).  \n",
+            "2. **Store** for each move the state, the improved move distribution, and the final win/loss outcome.  \n",
+            "3. **Update** a shared policy‑value network with a loss that combines value regression and policy cross‑entropy.  \n",
+            "4. **Refresh** the opponent (lagged copy or pool) and repeat.\n",
+            "\n",
+            "A tiny neural net trained on self‑play Tic‑Tac‑Toe converges to perfect (draw‑only) play; the same framework, augmented with MCTS, large replay buffers, and opponent leagues, underlies world‑class systems such as AlphaZero, AlphaStar, OpenAI Five, and many multi‑agent robotics or language‑game projects.\n"
+          ]
+        }
+      ],
+      "source": [
+        "PROMPT = \"Explain self-play in reinforcement learning with a concrete example.\"\n",
+        "NUMBER_OF_CANDIDATES = 5 # start with five, go up if you need more intelligence!\n",
+        "OLLAMA_API_KEY = \"yourkey\"\n",
+        "\n",
+        "client = ollama.Client(\n",
+        "    host=\"https://ollama.com\",\n",
+        "    headers={'Authorization': OLLAMA_API_KEY}\n",
+        ")\n",
+        "\n",
+        "\n",
+        "\n",
+        "result = pro_mode(client, PROMPT, NUMBER_OF_CANDIDATES)\n",
+        "\n",
+        "print(\"\\n=== FINAL ===\\n\", result[\"final\"])\n",
+        "# To inspect candidates:\n",
+        "# for i, c in enumerate(result[\"candidates\"], 1): print(f\"\\n--- Candidate {i} ---\\n{c}\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "authorship_tag": "ABX9TyMEoKFi+0SrPi/OXiwQ1i58",
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/README.md b/README.md
index e3de401..31b27f2 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,151 @@
+# GPT-OSS Pro Mode
+
+A collection of Jupyter notebooks that implement **Pro Mode** - an advanced AI reasoning technique that generates multiple candidate responses in parallel and then synthesizes them into a single, high-quality answer.
+
+## 🎯 What is Pro Mode?
+
+Pro Mode is a sophisticated approach to AI reasoning that mimics how expert humans think through complex problems:
+
+1. **Generate Multiple Perspectives**: Creates several candidate responses to the same question
+2. **Parallel Processing**: Uses multiple AI instances simultaneously for efficiency
+3. **Intelligent Synthesis**: Combines the best parts of each candidate into a final, refined answer
+
+This technique significantly improves answer quality, reduces errors, and provides more comprehensive responses compared to single-shot AI interactions.
+
+## 🚀 Key Benefits
+
+- **Higher Quality Answers**: Multiple perspectives lead to more thorough and accurate responses
+- **Error Reduction**: Synthesis process catches and corrects individual mistakes
+- **Better Reasoning**: Combines strengths from different approaches
+- **Comprehensive Coverage**: Addresses aspects that single responses might miss
+
+## 📁 Available Implementations
+
+This repository contains three different implementations of Pro Mode:
+
+### 1. **Groq Implementation** (`OpenAI_Open_Source_Pro_Mode_Groq.ipynb`)
+- Uses Groq's fast inference API
+- Requires Groq API key
+- Optimized for speed and cost-effectiveness
+- Best for production use cases
+
+### 2. **Ollama Local Implementation** (`OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb`)
+- Runs locally using Ollama
+- No API costs or internet required
+- Uses the `gpt-oss:120b` model
+- Perfect for privacy-conscious users
+
+### 3. **Ollama Turbo Implementation** (`OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb`)
+- Enhanced version with additional features
+- More sophisticated synthesis process
+- Better error handling and retry logic
+
+## 🔧 How It Works
+
+### The Pro Mode Process
+
+1. **Parallel Generation**: 
+   - Takes your prompt and generates `n_runs` candidate responses simultaneously
+   - Uses high temperature (0.9) for creative diversity
+   - Runs in parallel threads for efficiency
+
+2. **Synthesis Phase**:
+   - An expert editor AI analyzes all candidates
+   - Merges strengths, corrects errors, removes repetition
+   - Uses low temperature (0.2) for focused synthesis
+   - Produces a single, refined final answer
+
+3. **Quality Output**:
+   - Returns both the final synthesized answer and all candidates
+   - Allows inspection of individual candidates if needed
+
+### Example Usage
+
+```python
+# Set up your preferred implementation
+# (Groq, Ollama Local, or Ollama Turbo)
+
+# Define your question
+PROMPT = "Explain self-play in reinforcement learning with a concrete example."
+NUMBER_OF_CANDIDATES = 3  # Adjust based on complexity
+
+# Run Pro Mode
+result = pro_mode(client, PROMPT, NUMBER_OF_CANDIDATES)
+
+# Get the final synthesized answer
+print("=== FINAL ANSWER ===")
+print(result["final"])
+
+# Optionally inspect individual candidates
+for i, candidate in enumerate(result["candidates"], 1):
+    print(f"\n--- Candidate {i} ---")
+    print(candidate)
+```
+
+## 🛠️ Setup Instructions
+
+### For Groq Implementation:
+1. Get a Groq API key from [groq.com](https://groq.com)
+2. Set environment variable: `export GROQ_API_KEY="your-key-here"`
+3. Run the notebook: `OpenAI_Open_Source_Pro_Mode_Groq.ipynb`
+
+### For Ollama Local Implementation:
+1. Install Ollama: `curl -fsSL https://ollama.ai/install.sh | sh`
+2. Pull the model: `ollama pull gpt-oss:120b`
+3. Run the notebook: `OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb`
+
+### For Ollama Turbo Implementation:
+1. Follow same setup as Ollama Local
+2. Run the notebook: `OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb`
+
+## ⚙️ Configuration Options
+
+- **`n_runs`**: Number of candidate responses (2-5 recommended)
+- **`MAX_COMPLETION_TOKENS`**: Maximum response length (default: 30000)
+- **Temperature**: 0.9 for candidates, 0.2 for synthesis
+- **Model**: `openai/gpt-oss-120b` (Groq) or `gpt-oss:120b` (Ollama)
+
+## 🎯 When to Use Pro Mode
+
+**Best for:**
+- Complex reasoning problems
+- Technical explanations
+- Creative writing tasks
+- Problem-solving scenarios
+- Research and analysis
+
+**Not needed for:**
+- Simple factual questions
+- Basic text generation
+- Real-time applications (due to parallel processing time)
+
+## 📊 Performance Tips
+
+- Start with 2-3 candidates for most questions
+- Increase to 4-5 for very complex problems
+- Monitor API costs when using Groq
+- Consider local Ollama for privacy-sensitive tasks
+
+## 🤝 Contributing
+
+Created by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X)
+
+Feel free to:
+- Submit issues and feature requests
+- Contribute improvements to the synthesis logic
+- Add new model implementations
+- Share your use cases and results
+
+## 📄 License
+
+This project is open source. Please check individual notebook headers for specific licensing information.
+
+---
+
+**Pro Mode transforms single-shot AI interactions into collaborative reasoning sessions, delivering significantly higher quality results through intelligent synthesis of multiple perspectives.**
+
 # gpt-oss-pro-mode
 
 [![Twitter Follow](https://img.shields.io/twitter/follow/mattshumer_?style=social)](https://x.com/mattshumer_)
 
-[![Open Notebook In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1XeYmOHJwACtavCjJM-eOqlPxHgTD2KNP?usp=sharing)
-
-Run the attached notebook to access Pro mode! Star this repo and let me know what you want me to add!
+[![Open Notebook In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1XeYmOHJwACtavCjJM-eOqlPxHgTD2KNP?usp=sharing)
\ No newline at end of file