diff --git a/OpenAI_Open_Source_Pro_Mode.ipynb b/OpenAI_Open_Source_Pro_Mode_Groq.ipynb similarity index 100% rename from OpenAI_Open_Source_Pro_Mode.ipynb rename to OpenAI_Open_Source_Pro_Mode_Groq.ipynb diff --git a/OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb b/OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb new file mode 100644 index 0000000..fe4a172 --- /dev/null +++ b/OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3LLlSvTtU0Y3" + }, + "source": [ + "Made by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dzWS8LaqUw3N", + "outputId": "d4031e82-2131-4208-8f62-3923baca7cc4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: ollama in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (0.4.5)\n", + "Requirement already satisfied: httpx<0.28.0,>=0.27.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (0.27.2)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.9.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (2.11.7)\n", + "Requirement already satisfied: anyio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (4.9.0)\n", + "Requirement already satisfied: certifi in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.0.5)\n", + "Requirement already satisfied: idna in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.8)\n", + "Requirement already satisfied: sniffio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.33.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (2.33.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (4.12.2)\n", + "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.4.1)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.1.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "# @title Run this cell to set up Pro Mode\n", + "!pip3 install ollama\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from typing import List, Dict, Any\n", + "import time, os\n", + "import concurrent.futures as cf\n", + "import ollama\n", + "\n", + "MODEL = \"gpt-oss:120b\"\n", + "MAX_COMPLETION_TOKENS = 30000\n", + "\n", + "\n", + "def _one_completion(client: ollama, question: str, temperature: float ) -> str:\n", + " \"\"\"\n", + " \n", + " Sends a question to the Ollama API and returns the response.\n", + " \"\"\"\n", + " delay = 0.5\n", + " for attempt in range(3):\n", + " try:\n", + " response = client.chat(\n", + " model=MODEL,\n", + " messages=[\n", + " {'role': 'user', 'content': question},\n", + " ],\n", + " options={'temperature': temperature, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n", + " )\n", + " return response['message']['content']\n", + " except Exception as e:\n", + " if attempt == 2:\n", + " raise\n", + " time.sleep(delay)\n", + " delay *= 2\n", + "\n", + "\n", + "def _build_synthesis_messages(candidates: List[str]) -> List[Dict[str, str]]:\n", + " numbered = \"\\n\\n\".join(\n", + " f\"\\n{txt}\\n\" for i, txt in enumerate(candidates)\n", + " )\n", + " system = (\n", + " \"You are an expert editor. Synthesize ONE best answer from the candidate \"\n", + " \"answers provided, merging strengths, correcting errors, and removing repetition. \"\n", + " \"Do not mention the candidates or the synthesis process. Be decisive and clear.\"\n", + " )\n", + " user = (\n", + " f\"You are given {len(candidates)} candidate answers delimited by tags.\\n\\n\"\n", + " f\"{numbered}\\n\\nReturn the single best final answer.\"\n", + " )\n", + " return [{\"role\": \"system\", \"content\": system},\n", + " {\"role\": \"user\", \"content\": user}]\n", + "\n", + "def pro_mode(client: ollama, prompt: str, n_runs: int) -> Dict[str, Any]:\n", + " \"\"\"\n", + " Fan out n_runs parallel generations at T=0.9 and synthesize a final answer at T=0.2.\n", + " If groq_api_key is provided, it will be used; otherwise GROQ_API_KEY env var is used.\n", + " Returns: {\"final\": str, \"candidates\": List[str]}\n", + " \"\"\"\n", + " assert n_runs >= 1, \"n_runs must be >= 1\"\n", + "\n", + " # Parallel candidate generations (threaded; Colab-friendly)\n", + " max_workers = min(n_runs, 16)\n", + " candidates: List[str] = [None] * n_runs # preserve order\n", + " with cf.ThreadPoolExecutor(max_workers=max_workers) as ex:\n", + " fut_to_idx = {\n", + " ex.submit(_one_completion, client, prompt, 0.9): i\n", + " for i in range(n_runs)\n", + " }\n", + " for fut in cf.as_completed(fut_to_idx):\n", + " i = fut_to_idx[fut]\n", + " candidates[i] = fut.result()\n", + "\n", + " # Synthesis pass\n", + " messages = _build_synthesis_messages(candidates)\n", + " final_resp = client.chat(\n", + " model=MODEL,\n", + " messages=messages,\n", + " options={'temperature': 0.2, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n", + " )\n", + " final = final_resp['message']['content']\n", + "\n", + " return {\"final\": final, \"candidates\": candidates}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "OoBF5UbSVksz" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== FINAL ===\n", + " **Self‑Play in Reinforcement Learning**\n", + "\n", + "Self‑play is a training paradigm in which an RL agent learns by repeatedly playing against a copy of itself (or against a mixture of its past copies). Because the opponent’s strength automatically tracks the learner’s strength, the task provides a **self‑generated curriculum**: the game is never too easy and never hopelessly hard. The only requirement is a simulator of the environment; no external data or hand‑crafted opponent is needed.\n", + "\n", + "---\n", + "\n", + "## 1. Core Loop\n", + "\n", + "| Step | What happens | Why it matters |\n", + "|------|--------------|----------------|\n", + "| **1️⃣ Initialise** | Randomly initialise a policy (and optionally a value) network θ. | Gives the agent a starting point. |\n", + "| **2️⃣ Create opponent** | Keep a *lagged* copy θ̄ (the “target” network) that will act as the opponent. | Prevents the opponent from being identical at every move, which would give no learning signal. |\n", + "| **3️⃣ Self‑play** | Play many episodes where the current policy θ and the opponent θ̄ alternate turns, recording every (state, action, reward, done) tuple. | Generates the data on which the agent will improve. |\n", + "| **4️⃣ Compute targets** | From the trajectories compute returns (or advantages) and, if using a value head, a baseline. | Provides the supervised signal for policy‑gradient or value‑based updates. |\n", + "| **5️⃣ Update θ** | Perform an RL update (PPO, A2C, REINFORCE + baseline, or AlphaZero‑style loss) on the collected data. | Improves the policy so that actions leading to higher returns become more likely. |\n", + "| **6️⃣ Refresh opponent** | Every *K* training steps copy the updated θ into θ̄ (or perform a soft update θ̄←τθ+(1‑τ)θ̄). | Keeps the opponent slightly behind the learner, preserving a moving‑target curriculum. |\n", + "| **7️⃣ Evaluate** | Periodically test θ against a simple baseline (random, minimax, or the previous version) and log win‑rates. | Gives an external sanity check and a metric of progress. |\n", + "\n", + "The loop repeats until performance plateaus or a desired level is reached.\n", + "\n", + "---\n", + "\n", + "## 2. Concrete Example – Connect‑4 with PPO\n", + "\n", + "Below is a compact, runnable‑style pseudocode that demonstrates the full pipeline for the classic two‑player board game **Connect‑4** (7 × 6 grid, win by aligning four discs). The same structure works for any turn‑based, zero‑sum game.\n", + "\n", + "```python\n", + "# -------------------------------------------------\n", + "# 1. Initialise networks\n", + "# -------------------------------------------------\n", + "policy_net = PolicyCNN() # outputs logits for 7 columns\n", + "optimizer = torch.optim.Adam(policy_net.parameters(), lr=1e-3)\n", + "\n", + "# -------------------------------------------------\n", + "# 2. Lagged opponent (target network)\n", + "# -------------------------------------------------\n", + "target_net = copy.deepcopy(policy_net) # frozen copy\n", + "\n", + "# -------------------------------------------------\n", + "# 3. Hyper‑parameters\n", + "# -------------------------------------------------\n", + "NUM_SELFPLAY_GAMES = 1000 # games per training epoch\n", + "UPDATE_EVERY = 200 # refresh opponent every 200 epochs\n", + "BATCH_SIZE = 64\n", + "CLIP_EPS = 0.2 # PPO clipping\n", + "GAMMA = 0.99\n", + "\n", + "# -------------------------------------------------\n", + "# 4. Experience buffer\n", + "# -------------------------------------------------\n", + "replay = [] # list of (state, action, reward, done)\n", + "\n", + "def play_one_game(policy, opponent):\n", + " \"\"\"Play a full Connect‑4 game, store (s,a,r,done) from the learner’s view.\"\"\"\n", + " env = Connect4Env()\n", + " traj = []\n", + " player = 1 # 1 = policy, -1 = opponent\n", + "\n", + " while not env.done:\n", + " # Choose action with the appropriate network\n", + " net = policy if player == 1 else opponent\n", + " logits = net(env.board_tensor())\n", + " dist = torch.distributions.Categorical(logits=logits)\n", + " action = dist.sample().item()\n", + "\n", + " # Step the environment (reward is from the *current* player’s perspective)\n", + " next_state, reward, done, _ = env.step(action, player)\n", + "\n", + " traj.append((env.board_tensor(), action, reward, done))\n", + " player *= -1 # swap turn\n", + "\n", + " # Convert opponent‑reward to learner‑reward and store\n", + " for i, (s,a,r,d) in enumerate(traj):\n", + " if i % 2 == 1: # opponent’s move\n", + " r = -r\n", + " replay.append((s, a, r, d))\n", + "\n", + " return traj\n", + "\n", + "# -------------------------------------------------\n", + "# 5. Main training loop\n", + "# -------------------------------------------------\n", + "for epoch in range(1, 10001):\n", + " # ---- Self‑play -------------------------------------------------\n", + " for _ in range(NUM_SELFPLAY_GAMES):\n", + " play_one_game(policy_net, target_net)\n", + "\n", + " # ---- PPO update ------------------------------------------------\n", + " # Sample a random minibatch\n", + " batch = random.sample(replay, BATCH_SIZE)\n", + " states, actions, rewards, dones = zip(*batch)\n", + "\n", + " states = torch.stack(states) # (B, C, H, W)\n", + " actions = torch.tensor(actions)\n", + " rewards = torch.tensor(rewards, dtype=torch.float)\n", + "\n", + " # Compute discounted returns\n", + " returns = []\n", + " G = 0.0\n", + " for r in reversed(rewards):\n", + " G = r + GAMMA * G\n", + " returns.insert(0, G)\n", + " returns = torch.tensor(returns)\n", + "\n", + " # Policy forward pass\n", + " logits = policy_net(states) # (B, 7)\n", + " logp = torch.distributions.Categorical(logits=logits)\\\n", + " .log_prob(actions)\n", + "\n", + " # PPO surrogate loss\n", + " old_logp = logp.detach()\n", + " ratio = torch.exp(logp - old_logp)\n", + " surr1 = ratio * (returns - returns.mean()) # advantage ≈ return‑baseline\n", + " surr2 = torch.clamp(ratio, 1-CLIP_EPS, 1+CLIP_EPS) * (returns - returns.mean())\n", + " loss_pi = -torch.min(surr1, surr2).mean()\n", + "\n", + " optimizer.zero_grad()\n", + " loss_pi.backward()\n", + " optimizer.step()\n", + "\n", + " # ---- Refresh opponent -------------------------------------------\n", + " if epoch % UPDATE_EVERY == 0:\n", + " target_net = copy.deepcopy(policy_net)\n", + "\n", + " # ---- Logging ----------------------------------------------------\n", + " if epoch % 100 == 0:\n", + " win_rate = evaluate_against_random(policy_net, n=200)\n", + " print(f\"Epoch {epoch:5d} PPO loss {loss_pi.item():.4f} vs‑random win {win_rate:.2%}\")\n", + "\n", + " # ---- Optional buffer trimming to keep memory bounded ------------\n", + " if len(replay) > 50000:\n", + " replay = replay[-25000:]\n", + "```\n", + "\n", + "**Key points illustrated by the code**\n", + "\n", + "| Part of the loop | Self‑play role |\n", + "|------------------|----------------|\n", + "| `play_one_game` | Two copies of the same algorithm (`policy_net` vs. `target_net`) generate the experience. |\n", + "| Reward sign flip (`r = -r` on opponent moves) | Guarantees that the learner receives a reward signal that reflects the zero‑sum nature of the game. |\n", + "| PPO clipping (`CLIP_EPS`) | Stabilises policy updates while still allowing improvement. |\n", + "| Periodic copy of `policy_net` into `target_net` | Provides the “slightly older” opponent that keeps the curriculum moving. |\n", + "| Evaluation against a random player | Gives a quick sanity check of learning progress. |\n", + "\n", + "After a few hundred thousand self‑play games the agent converges to a **near‑optimal Connect‑4 strategy**: it never loses and wins whenever the opponent makes a mistake.\n", + "\n", + "---\n", + "\n", + "## 3. AlphaZero Blueprint (MCTS + Neural Nets)\n", + "\n", + "For larger games (Go, Chess, Shogi) the same self‑play loop is enriched with **Monte‑Carlo Tree Search (MCTS)**:\n", + "\n", + "1. **Policy head** supplies a prior over legal moves; MCTS uses these priors to bias rollouts.\n", + "2. **Search** is run for each move, producing a *search‑enhanced* policy distribution π\\*_t (the proportion of visits to each child).\n", + "3. The training target for the policy network is the **search‑enhanced distribution**, while the value head is trained on the game outcome.\n", + "4. The loss (used in AlphaZero) is \n", + "\n", + "\\[\n", + "L = (z - V_\\phi(s))^2 \\;-\\; \\pi^\\*_t \\cdot \\log \\pi_\\theta(a|s) \\;+\\; c \\| \\theta \\|^2,\n", + "\\]\n", + "\n", + "where *z* is the final game result (±1 or 0). \n", + "5. The opponent is simply the **latest checkpoint** of the same network (or a soft‑updated mixture of several past checkpoints).\n", + "\n", + "This combination of **self‑play + MCTS + deep nets** is what enabled AlphaGo Zero, AlphaZero, and MuZero to achieve superhuman performance from scratch.\n", + "\n", + "---\n", + "\n", + "## 4. Why Self‑Play Works\n", + "\n", + "| Benefit | Explanation |\n", + "|---------|-------------|\n", + "| **Automatic curriculum** | The opponent improves together with the learner, so the difficulty scales naturally. |\n", + "| **No external data** | The agent creates its own training set; only a simulator is needed. |\n", + "| **Symmetry & fairness** | Both sides share the same inductive biases, eliminating bias from a hand‑crafted opponent. |\n", + "| **Domain‑agnostic** | Works for any environment with a simulator—board games, video games, multi‑agent tasks, robotics competitions, etc. |\n", + "| **Proven track record** | TD‑Gammon → AlphaGo/AlphaZero → OpenAI Five → MuZero. |\n", + "\n", + "---\n", + "\n", + "## 5. Common Pitfalls & Stabilisation Tricks\n", + "\n", + "| Pitfall | Mitigation |\n", + "|---------|------------|\n", + "| **Cyclic policies** (the learner only beats its immediate predecessor) | Keep a *population* of the last K checkpoints and sample one as the opponent each game. |\n", + "| **High variance in policy‑gradient updates** | Use a value baseline, advantage estimation, or entropy regularisation. |\n", + "| **Instability when the opponent is exactly equal** | Refresh the opponent **slowly** (soft update with τ ≈ 0.01) or keep it a few steps behind. |\n", + "| **Sparse rewards** (only at the end of the episode) | Combine self‑play with MCTS, epsilon‑greedy exploration, or add a small shaping reward for progress. |\n", + "| **Over‑fitting to a single opponent** | Randomise which copy starts (X vs. O) and optionally randomise the opponent from a buffer of past policies. |\n", + "\n", + "---\n", + "\n", + "## 6. Checklist for a Self‑Play Project\n", + "\n", + "1. **Environment** – deterministic or stochastic simulator, turn‑based or simultaneous actions. \n", + "2. **Network architecture** – policy head (softmax over legal moves) and optionally a value head; for board games a small CNN works well. \n", + "3. **Opponent handling** – lagged copy, soft update, or a replay buffer of past policies. \n", + "4. **RL algorithm** – PPO/A2C for on‑policy updates, REINFORCE + baseline for simplicity, or AlphaZero loss for MCTS‑augmented training. \n", + "5. **Exploration** – entropy bonus, temperature annealing, or MCTS depth limits for early stages. \n", + "6. **Training schedule** – number of self‑play games per epoch, batch size, opponent refresh frequency. \n", + "7. **Evaluation** – against random/minimax players and against previous checkpoints; log win‑rates, loss curves, and compute Elo if appropriate. \n", + "8. **Compute resources** – self‑play can be parallelised across many CPU cores or GPUs; MCTS adds CPU load, while deep‑net inference can be batched on GPUs.\n", + "\n", + "---\n", + "\n", + "## 7. TL;DR\n", + "\n", + "* **Self‑play** lets an RL agent generate its own training data by competing against a slightly older copy of itself. \n", + "* The opponent’s skill automatically tracks the learner’s skill, providing a **self‑adjusting curriculum** without any human data. \n", + "* A minimal implementation (e.g., Connect‑4 with PPO) follows the loop: initialise → lagged opponent → self‑play → compute returns → update policy → refresh opponent → evaluate. \n", + "* When combined with Monte‑Carlo Tree Search and a value head, this loop becomes the **AlphaZero** algorithm that has mastered Chess, Go, Shogi, and many other complex games. \n", + "* Benefits: data on‑demand, automatic curriculum, symmetry, and broad applicability. \n", + "* Pitfalls (cycling, high variance, instability) are mitigated with target networks, replay buffers of past policies, soft updates, and entropy or baseline techniques. \n", + "\n", + "Self‑play is therefore a cornerstone of modern game‑playing AI and a versatile tool for any RL problem where a simulator exists.\n" + ] + } + ], + "source": [ + "PROMPT = \"Explain self-play in reinforcement learning with a concrete example.\"\n", + "NUMBER_OF_CANDIDATES = 2 # start with five, go up if you need more intelligence!\n", + "\n", + "client = ollama.Client()\n", + "\n", + "\n", + "\n", + "result = pro_mode(client, PROMPT, NUMBER_OF_CANDIDATES)\n", + "\n", + "print(\"\\n=== FINAL ===\\n\", result[\"final\"])\n", + "# To inspect candidates:\n", + "# for i, c in enumerate(result[\"candidates\"], 1): print(f\"\\n--- Candidate {i} ---\\n{c}\")" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyMEoKFi+0SrPi/OXiwQ1i58", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb b/OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb new file mode 100644 index 0000000..632544a --- /dev/null +++ b/OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb @@ -0,0 +1,441 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3LLlSvTtU0Y3" + }, + "source": [ + "Made by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dzWS8LaqUw3N", + "outputId": "d4031e82-2131-4208-8f62-3923baca7cc4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: ollama in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (0.4.5)\n", + "Requirement already satisfied: httpx<0.28.0,>=0.27.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (0.27.2)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.9.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from ollama) (2.11.7)\n", + "Requirement already satisfied: anyio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (4.9.0)\n", + "Requirement already satisfied: certifi in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.0.5)\n", + "Requirement already satisfied: idna in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.8)\n", + "Requirement already satisfied: sniffio in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.33.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (2.33.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (4.12.2)\n", + "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/meirm/.pyenv/versions/3.12.8/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.4.1)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.1.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "# @title Run this cell to set up Pro Mode\n", + "!pip3 install ollama\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from typing import List, Dict, Any\n", + "import time, os\n", + "import concurrent.futures as cf\n", + "import ollama\n", + "\n", + "MODEL = \"gpt-oss:120b\"\n", + "MAX_COMPLETION_TOKENS = 30000\n", + "\n", + "\n", + "def _one_completion(client: ollama, question: str, temperature: float ) -> str:\n", + " \"\"\"\n", + " \n", + " Sends a question to the Ollama API and returns the response.\n", + " \"\"\"\n", + " delay = 0.5\n", + " for attempt in range(3):\n", + " try:\n", + " response = client.chat(\n", + " model=MODEL,\n", + " messages=[\n", + " {'role': 'user', 'content': question},\n", + " ],\n", + " options={'temperature': temperature, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n", + " )\n", + " return response['message']['content']\n", + " except Exception as e:\n", + " if attempt == 2:\n", + " raise\n", + " time.sleep(delay)\n", + " delay *= 2\n", + "\n", + "\n", + "def _build_synthesis_messages(candidates: List[str]) -> List[Dict[str, str]]:\n", + " numbered = \"\\n\\n\".join(\n", + " f\"\\n{txt}\\n\" for i, txt in enumerate(candidates)\n", + " )\n", + " system = (\n", + " \"You are an expert editor. Synthesize ONE best answer from the candidate \"\n", + " \"answers provided, merging strengths, correcting errors, and removing repetition. \"\n", + " \"Do not mention the candidates or the synthesis process. Be decisive and clear.\"\n", + " )\n", + " user = (\n", + " f\"You are given {len(candidates)} candidate answers delimited by tags.\\n\\n\"\n", + " f\"{numbered}\\n\\nReturn the single best final answer.\"\n", + " )\n", + " return [{\"role\": \"system\", \"content\": system},\n", + " {\"role\": \"user\", \"content\": user}]\n", + "\n", + "def pro_mode(client: ollama, prompt: str, n_runs: int) -> Dict[str, Any]:\n", + " \"\"\"\n", + " Fan out n_runs parallel generations at T=0.9 and synthesize a final answer at T=0.2.\n", + " If groq_api_key is provided, it will be used; otherwise GROQ_API_KEY env var is used.\n", + " Returns: {\"final\": str, \"candidates\": List[str]}\n", + " \"\"\"\n", + " assert n_runs >= 1, \"n_runs must be >= 1\"\n", + "\n", + " # Parallel candidate generations (threaded; Colab-friendly)\n", + " max_workers = min(n_runs, 16)\n", + " candidates: List[str] = [None] * n_runs # preserve order\n", + " with cf.ThreadPoolExecutor(max_workers=max_workers) as ex:\n", + " fut_to_idx = {\n", + " ex.submit(_one_completion, client, prompt, 0.9): i\n", + " for i in range(n_runs)\n", + " }\n", + " for fut in cf.as_completed(fut_to_idx):\n", + " i = fut_to_idx[fut]\n", + " candidates[i] = fut.result()\n", + "\n", + " # Synthesis pass\n", + " messages = _build_synthesis_messages(candidates)\n", + " final_resp = client.chat(\n", + " model=MODEL,\n", + " messages=messages,\n", + " options={'temperature': 0.2, 'max_completion_tokens':MAX_COMPLETION_TOKENS}\n", + " )\n", + " final = final_resp['message']['content']\n", + "\n", + " return {\"final\": final, \"candidates\": candidates}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OoBF5UbSVksz" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== FINAL ===\n", + " **Self‑play in reinforcement learning** \n", + "Self‑play turns a two‑player (or multi‑agent) game into its own data‑generator: the learning agent repeatedly plays against a copy of itself (or a past version). Because the opponent improves together with the learner, the difficulty of the task automatically adapts, eliminating the need for hand‑crafted opponents or expert demonstrations.\n", + "\n", + "---\n", + "\n", + "## 1. Why self‑play works\n", + "\n", + "| Reason | Effect on learning |\n", + "|--------|--------------------|\n", + "| **Automatic curriculum** | Early games are easy (both agents are weak); later games become harder as the policy improves, keeping the learning signal informative. |\n", + "| **No external labels** | The only reward needed is the game outcome (win = +1, loss = ‑1, draw = 0). |\n", + "| **Full‑tree exploration** | An evolving opponent forces the learner to discover strategies that would never appear against a static opponent. |\n", + "| **Convergence to equilibrium** | In deterministic zero‑sum games the process drives both sides toward a Nash (minimax) policy; for solved games this is the optimal strategy. |\n", + "| **Sample efficiency** | One episode yields a training example for *both* players, doubling the amount of useful data per simulation. |\n", + "\n", + "---\n", + "\n", + "## 2. Core ingredients\n", + "\n", + "| Component | Typical implementation |\n", + "|-----------|------------------------|\n", + "| **Environment** | Any perfect‑information game (board, video‑game, robot duel). Must expose state, legal actions, transition, and a terminal reward. |\n", + "| **Policy‑value network** `fθ(s) → (π, v)` | Small MLP for toy games; deep residual CNN or Transformer for large board games. `π` is a probability distribution over legal moves, `v∈[-1,1]` estimates the expected outcome from the current player’s perspective. |\n", + "| **Opponent** | Usually the *current* network itself. For stability a lagged copy (target network) or a pool of older checkpoints can be used. |\n", + "| **Search layer (optional)** | Monte‑Carlo Tree Search (MCTS) that uses `π` and `v` to produce a stronger move distribution `π_MCTS`. The search result becomes the training target for the policy head. |\n", + "| **Replay buffer** `D` | Stores tuples `(s, π_target, z)` where `z` is the final game result (+1/0/‑1) from the player who acted in `s`. |\n", + "| **Loss** | `L(θ) = (z‑v)² – π_target·log π + λ‖θ‖²` (value MSE + policy cross‑entropy + L2 regularisation). |\n", + "\n", + "---\n", + "\n", + "## 3. General self‑play training loop (high‑level pseudocode)\n", + "\n", + "```python\n", + "initialize network fθ randomly\n", + "D = empty replay buffer\n", + "target = copy of fθ # optional lagged opponent\n", + "\n", + "while not done:\n", + " # ---------- self‑play ----------\n", + " for _ in range(num_episodes):\n", + " game = new_game()\n", + " states, policies = [], []\n", + " while not game.terminal():\n", + " s = game.state()\n", + " # run MCTS if available, otherwise use raw policy\n", + " π = MCTS(s, fθ) if use_mcts else softmax(fθ(s).policy)\n", + " a = sample_move(π) # ε‑greedy or temperature >0\n", + " states.append(s)\n", + " policies.append(π)\n", + " game.step(a) # opponent = same net, other side\n", + " z = game.result() # +1 / 0 / -1 from the player who moved first\n", + " # store every position with the perspective‑corrected outcome\n", + " for i, s in enumerate(states):\n", + " zi = z if i % 2 == 0 else -z # flip sign every move\n", + " D.add((s, policies[i], zi))\n", + "\n", + " # ---------- learning ----------\n", + " for _ in range(train_steps):\n", + " batch = D.sample(batch_size)\n", + " loss = compute_loss(fθ, batch) # value + policy loss\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " # ---------- opponent update ----------\n", + " if iteration % target_update == 0:\n", + " target.load_state_dict(fθ.state_dict()) # or add fθ to a pool of past nets\n", + "```\n", + "\n", + "*Both sides query the **same** network; the only difference is the sign of the value (or a flip of the policy logits) to reflect which player is acting.*\n", + "\n", + "---\n", + "\n", + "## 4. Minimal working example – Tic‑Tac‑Toe\n", + "\n", + "### 4.1 Game definition (Python‑like)\n", + "\n", + "```python\n", + "class TicTacToe:\n", + " def __init__(self):\n", + " self.board = np.zeros(9, dtype=int) # 0 empty, 1 X, -1 O\n", + " self.player = 1 # X starts\n", + "\n", + " def legal(self):\n", + " return np.where(self.board == 0)[0]\n", + "\n", + " def step(self, a):\n", + " self.board[a] = self.player\n", + " win = self.check_winner()\n", + " done = win is not None or not self.legal().size\n", + " reward = 0\n", + " if done:\n", + " reward = 0 if win == 0 else (1 if win == self.player else -1)\n", + " self.player *= -1\n", + " return self.board.copy(), reward, done\n", + "\n", + " # returns 1 if X wins, -1 if O wins, 0 for draw, None otherwise\n", + " def check_winner(self):\n", + " lines = [(0,1,2),(3,4,5),(6,7,8),\n", + " (0,3,6),(1,4,7),(2,5,8),\n", + " (0,4,8),(2,4,6)]\n", + " for a,b,c in lines:\n", + " s = self.board[a] + self.board[b] + self.board[c]\n", + " if s == 3: return 1\n", + " if s == -3: return -1\n", + " if not (self.board == 0).any(): return 0\n", + " return None\n", + "```\n", + "\n", + "### 4.2 Tiny policy‑value net (MLP)\n", + "\n", + "```python\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.fc = nn.Sequential(\n", + " nn.Linear(9, 128), nn.ReLU(),\n", + " nn.Linear(128, 128), nn.ReLU()\n", + " )\n", + " self.policy_head = nn.Linear(128, 9) # logits\n", + " self.value_head = nn.Linear(128, 1) # tanh output\n", + "\n", + " def forward(self, board):\n", + " x = self.fc(board.float())\n", + " return self.policy_head(x), torch.tanh(self.value_head(x))\n", + "```\n", + "\n", + "### 4.3 Self‑play episode (no MCTS)\n", + "\n", + "```python\n", + "def play_episode(net, eps=0.1):\n", + " env = TicTacToe()\n", + " traj = [] # (state, player, policy)\n", + " while True:\n", + " s = torch.from_numpy(env.board).float()\n", + " logits, _ = net(s)\n", + " # mask illegal moves\n", + " mask = (env.board == 0).astype(float)\n", + " probs = torch.softmax(logits, dim=0) * torch.from_numpy(mask)\n", + " probs = probs / probs.sum()\n", + "\n", + " # ε‑greedy exploration\n", + " if random.random() < eps:\n", + " a = random.choice(env.legal())\n", + " else:\n", + " a = torch.multinomial(probs, 1).item()\n", + "\n", + " traj.append((s.clone(), env.player, probs.clone()))\n", + " _, reward, done = env.step(a)\n", + " if done:\n", + " # propagate final outcome to every step\n", + " for state, player, pi in traj:\n", + " z = reward * player # perspective‑corrected result\n", + " replay.add(state, pi, z)\n", + " break\n", + "```\n", + "\n", + "### 4.4 Training step\n", + "\n", + "```python\n", + "def train_step(net, batch):\n", + " states, pis, zs = zip(*batch) # each is a list of tensors\n", + " states = torch.stack(states)\n", + " target_pi = torch.stack(pis)\n", + " target_z = torch.tensor(zs).unsqueeze(1).float()\n", + "\n", + " logits, values = net(states)\n", + " policy_loss = -torch.mean(torch.sum(target_pi * torch.log_softmax(logits, dim=1), dim=1))\n", + " value_loss = torch.mean((values - target_z) ** 2)\n", + " loss = policy_loss + value_loss\n", + "\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " return loss.item()\n", + "```\n", + "\n", + "Running the loop for a few hundred thousand episodes drives the network to a **draw‑only** policy—the optimal solution for Tic‑Tac‑Toe. The same code, with a deeper net and MCTS, scales to Go, Chess, Shogi (AlphaZero) and even to multi‑agent video games.\n", + "\n", + "---\n", + "\n", + "## 5. Scaling up – what changes for AlphaZero‑style systems\n", + "\n", + "| Change | Reason |\n", + "|--------|--------|\n", + "| **Monte‑Carlo Tree Search** | Provides a strong move distribution `π_MCTS` that is used as the policy target; dramatically improves data efficiency in huge state spaces. |\n", + "| **Large replay buffer (≈10⁶–10⁷ games)** | Guarantees a diverse training set and smooths the non‑stationarity caused by the moving opponent. |\n", + "| **Lagged or pooled opponents** | Instead of always playing against the current net, sample from a *league* of older checkpoints; prevents cyclic strategies and stabilises convergence. |\n", + "| **Curriculum on temperature & Dirichlet noise** | Early games use high temperature (more random) and added Dirichlet noise to explore openings; later games become more deterministic. |\n", + "| **Distributed generation** | Self‑play is parallelised over thousands of CPU cores / GPUs; training is performed on GPUs. |\n", + "| **Regularisation & target network** | L2 weight decay, dropout, and a slowly‑updated target net reduce catastrophic policy swings. |\n", + "\n", + "With these additions AlphaZero learned master‑level play in Chess, Shogi and Go from scratch in a few days of compute.\n", + "\n", + "---\n", + "\n", + "## 6. Common pitfalls & practical tips\n", + "\n", + "| Pitfall | Mitigation |\n", + "|---------|------------|\n", + "| **Mode collapse (repeating the same line)** | Keep a pool of past opponents, inject exploration noise, or use a temperature schedule. |\n", + "| **Non‑stationary targets** | Use a target network for generating `π_target` and `v_target`, or sample from a replay buffer that contains older games. |\n", + "| **Sparse reward** (only win/loss) | Add intermediate shaping (e.g., material advantage in Chess) *only* for the value head; keep the policy target derived from search. |\n", + "| **Credit assignment in long games** | Use bootstrapped value estimates from the network inside MCTS, or employ TD(λ) / n‑step returns instead of pure Monte‑Carlo returns. |\n", + "| **Over‑fitting to a single opponent** | Periodically evaluate against a fixed strong baseline (e.g., a handcrafted engine) to detect regression. |\n", + "\n", + "---\n", + "\n", + "## 7. Quick checklist for a new self‑play project\n", + "\n", + "1. **Define a two‑player environment** with clear win/loss reward. \n", + "2. **Choose a network architecture** (MLP for tiny games, CNN/Transformer for board games). \n", + "3. **Implement a copy‑as‑opponent** routine (direct clone, lagged copy, or opponent pool). \n", + "4. **Write a self‑play generator** that records `(state, policy, outcome)` for every move. \n", + "5. **Select an RL algorithm** – REINFORCE, PPO, or Q‑learning; combine with a value head if possible. \n", + "6. **Set up a replay buffer** and a training step that minimises the combined policy‑value loss. \n", + "7. **Add stabilisation tricks** – target network, opponent pool, exploration noise, temperature schedule. \n", + "8. **Periodically evaluate** against older checkpoints or a known baseline to monitor progress. \n", + "\n", + "---\n", + "\n", + "## TL;DR\n", + "\n", + "*Self‑play* lets an RL agent improve by repeatedly playing a game against a copy of itself. The opponent’s strength rises together with the learner, providing an automatic curriculum and removing the need for external data. The basic loop is:\n", + "\n", + "1. **Generate games** with the current policy (optionally guided by MCTS). \n", + "2. **Store** for each move the state, the improved move distribution, and the final win/loss outcome. \n", + "3. **Update** a shared policy‑value network with a loss that combines value regression and policy cross‑entropy. \n", + "4. **Refresh** the opponent (lagged copy or pool) and repeat.\n", + "\n", + "A tiny neural net trained on self‑play Tic‑Tac‑Toe converges to perfect (draw‑only) play; the same framework, augmented with MCTS, large replay buffers, and opponent leagues, underlies world‑class systems such as AlphaZero, AlphaStar, OpenAI Five, and many multi‑agent robotics or language‑game projects.\n" + ] + } + ], + "source": [ + "PROMPT = \"Explain self-play in reinforcement learning with a concrete example.\"\n", + "NUMBER_OF_CANDIDATES = 5 # start with five, go up if you need more intelligence!\n", + "OLLAMA_API_KEY = \"yourkey\"\n", + "\n", + "client = ollama.Client(\n", + " host=\"https://ollama.com\",\n", + " headers={'Authorization': OLLAMA_API_KEY}\n", + ")\n", + "\n", + "\n", + "\n", + "result = pro_mode(client, PROMPT, NUMBER_OF_CANDIDATES)\n", + "\n", + "print(\"\\n=== FINAL ===\\n\", result[\"final\"])\n", + "# To inspect candidates:\n", + "# for i, c in enumerate(result[\"candidates\"], 1): print(f\"\\n--- Candidate {i} ---\\n{c}\")" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyMEoKFi+0SrPi/OXiwQ1i58", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/README.md b/README.md index e3de401..31b27f2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,151 @@ +# GPT-OSS Pro Mode + +A collection of Jupyter notebooks that implement **Pro Mode** - an advanced AI reasoning technique that generates multiple candidate responses in parallel and then synthesizes them into a single, high-quality answer. + +## 🎯 What is Pro Mode? + +Pro Mode is a sophisticated approach to AI reasoning that mimics how expert humans think through complex problems: + +1. **Generate Multiple Perspectives**: Creates several candidate responses to the same question +2. **Parallel Processing**: Uses multiple AI instances simultaneously for efficiency +3. **Intelligent Synthesis**: Combines the best parts of each candidate into a final, refined answer + +This technique significantly improves answer quality, reduces errors, and provides more comprehensive responses compared to single-shot AI interactions. + +## 🚀 Key Benefits + +- **Higher Quality Answers**: Multiple perspectives lead to more thorough and accurate responses +- **Error Reduction**: Synthesis process catches and corrects individual mistakes +- **Better Reasoning**: Combines strengths from different approaches +- **Comprehensive Coverage**: Addresses aspects that single responses might miss + +## 📁 Available Implementations + +This repository contains three different implementations of Pro Mode: + +### 1. **Groq Implementation** (`OpenAI_Open_Source_Pro_Mode_Groq.ipynb`) +- Uses Groq's fast inference API +- Requires Groq API key +- Optimized for speed and cost-effectiveness +- Best for production use cases + +### 2. **Ollama Local Implementation** (`OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb`) +- Runs locally using Ollama +- No API costs or internet required +- Uses the `gpt-oss:120b` model +- Perfect for privacy-conscious users + +### 3. **Ollama Turbo Implementation** (`OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb`) +- Enhanced version with additional features +- More sophisticated synthesis process +- Better error handling and retry logic + +## 🔧 How It Works + +### The Pro Mode Process + +1. **Parallel Generation**: + - Takes your prompt and generates `n_runs` candidate responses simultaneously + - Uses high temperature (0.9) for creative diversity + - Runs in parallel threads for efficiency + +2. **Synthesis Phase**: + - An expert editor AI analyzes all candidates + - Merges strengths, corrects errors, removes repetition + - Uses low temperature (0.2) for focused synthesis + - Produces a single, refined final answer + +3. **Quality Output**: + - Returns both the final synthesized answer and all candidates + - Allows inspection of individual candidates if needed + +### Example Usage + +```python +# Set up your preferred implementation +# (Groq, Ollama Local, or Ollama Turbo) + +# Define your question +PROMPT = "Explain self-play in reinforcement learning with a concrete example." +NUMBER_OF_CANDIDATES = 3 # Adjust based on complexity + +# Run Pro Mode +result = pro_mode(client, PROMPT, NUMBER_OF_CANDIDATES) + +# Get the final synthesized answer +print("=== FINAL ANSWER ===") +print(result["final"]) + +# Optionally inspect individual candidates +for i, candidate in enumerate(result["candidates"], 1): + print(f"\n--- Candidate {i} ---") + print(candidate) +``` + +## 🛠️ Setup Instructions + +### For Groq Implementation: +1. Get a Groq API key from [groq.com](https://groq.com) +2. Set environment variable: `export GROQ_API_KEY="your-key-here"` +3. Run the notebook: `OpenAI_Open_Source_Pro_Mode_Groq.ipynb` + +### For Ollama Local Implementation: +1. Install Ollama: `curl -fsSL https://ollama.ai/install.sh | sh` +2. Pull the model: `ollama pull gpt-oss:120b` +3. Run the notebook: `OpenAI_Open_Source_Pro_Mode_Ollama_Local.ipynb` + +### For Ollama Turbo Implementation: +1. Follow same setup as Ollama Local +2. Run the notebook: `OpenAI_Open_Source_Pro_Mode_Ollama_Turbo.ipynb` + +## ⚙️ Configuration Options + +- **`n_runs`**: Number of candidate responses (2-5 recommended) +- **`MAX_COMPLETION_TOKENS`**: Maximum response length (default: 30000) +- **Temperature**: 0.9 for candidates, 0.2 for synthesis +- **Model**: `openai/gpt-oss-120b` (Groq) or `gpt-oss:120b` (Ollama) + +## 🎯 When to Use Pro Mode + +**Best for:** +- Complex reasoning problems +- Technical explanations +- Creative writing tasks +- Problem-solving scenarios +- Research and analysis + +**Not needed for:** +- Simple factual questions +- Basic text generation +- Real-time applications (due to parallel processing time) + +## 📊 Performance Tips + +- Start with 2-3 candidates for most questions +- Increase to 4-5 for very complex problems +- Monitor API costs when using Groq +- Consider local Ollama for privacy-sensitive tasks + +## 🤝 Contributing + +Created by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X) + +Feel free to: +- Submit issues and feature requests +- Contribute improvements to the synthesis logic +- Add new model implementations +- Share your use cases and results + +## 📄 License + +This project is open source. Please check individual notebook headers for specific licensing information. + +--- + +**Pro Mode transforms single-shot AI interactions into collaborative reasoning sessions, delivering significantly higher quality results through intelligent synthesis of multiple perspectives.** + # gpt-oss-pro-mode [![Twitter Follow](https://img.shields.io/twitter/follow/mattshumer_?style=social)](https://x.com/mattshumer_) -[![Open Notebook In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1XeYmOHJwACtavCjJM-eOqlPxHgTD2KNP?usp=sharing) - -Run the attached notebook to access Pro mode! Star this repo and let me know what you want me to add! +[![Open Notebook In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1XeYmOHJwACtavCjJM-eOqlPxHgTD2KNP?usp=sharing) \ No newline at end of file