Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ With mini-browser: clean text → 800 tokens → $

**mini-browser** sits between your AI agent and the web:

1. **Searches** — DuckDuckGo, no API key required, spam filtered, recency-aware
1. **Searches** — DuckDuckGo (default, no API key) or Tavily (optional, higher quality), spam filtered, recency-aware
2. **Fetches** — full Playwright browser for JS-heavy sites (Yahoo Finance, TradingView, Bloomberg), fast httpx for simple sites
3. **Extracts** — trafilatura + BeautifulSoup strip all noise (nav/ads/footer)
4. **Compresses** — sentence-level relevance scoring keeps only what matters for your query
Expand Down Expand Up @@ -94,6 +94,11 @@ python -m playwright install chromium
pip install "mini-browser[mcp] @ git+https://github.com/ghanibot/mini-browser.git"
```

### With Tavily search
```bash
pip install "mini-browser[tavily] @ git+https://github.com/ghanibot/mini-browser.git"
```

### With PDF support
```bash
pip install "mini-browser[pdf] @ git+https://github.com/ghanibot/mini-browser.git"
Expand All @@ -105,7 +110,7 @@ pip install "mini-browser[full] @ git+https://github.com/ghanibot/mini-browser.g
python -m playwright install chromium
```

**Requirements:** Python 3.10+, internet connection, no API keys needed.
**Requirements:** Python 3.10+, internet connection. No API keys needed for default DuckDuckGo search; Tavily requires a `TAVILY_API_KEY` (see [Configuration](#search-provider) below).

---

Expand Down Expand Up @@ -238,7 +243,7 @@ result = handle_tool_call(tool_name, tool_arguments)
| **Recency detection** | Queries containing "terbaru/latest/hari ini" auto-apply DuckDuckGo time filter |
| **Retry with backoff** | Failed fetches retry 2x with exponential backoff |
| **Configurable domains** | Add custom JS-heavy domains via env var or `.mini-browser.json` |
| **No API keys** | Uses DuckDuckGo — completely free |
| **No API keys** | Uses DuckDuckGo by default — completely free. Optional Tavily provider for higher quality |

---

Expand All @@ -258,6 +263,28 @@ Fetching a news article:

## Configuration

### Search Provider

By default, mini-browser uses DuckDuckGo (no API key required). You can switch to [Tavily](https://tavily.com) for higher-quality, AI-optimized search results.

| Environment Variable | Description | Default |
|----------------------|-------------|---------|
| `MINI_BROWSER_SEARCH_PROVIDER` | Search backend to use: `duckduckgo` or `tavily` | `duckduckgo` |
| `TAVILY_API_KEY` | Your Tavily API key (required when provider is `tavily`) | — |

```bash
# Install with Tavily support
pip install "mini-browser[tavily] @ git+https://github.com/ghanibot/mini-browser.git"

# Use Tavily as search provider
export MINI_BROWSER_SEARCH_PROVIDER=tavily
export TAVILY_API_KEY=tvly-YOUR_API_KEY

mini-browser search "latest AI news today"
```

Get a free Tavily API key (1,000 credits/month) at [app.tavily.com](https://app.tavily.com).

### Custom JS-heavy domains

**Via env var:**
Expand Down
5 changes: 5 additions & 0 deletions mini_browser/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ def add_js_domain(domain: str) -> None:
domains = get_js_domains()
domains.add(domain.removeprefix("www."))
_cached_domains = domains


def get_search_provider() -> str:
"""Return the configured search provider ('duckduckgo' or 'tavily')."""
return os.environ.get("MINI_BROWSER_SEARCH_PROVIDER", "duckduckgo").lower()
65 changes: 64 additions & 1 deletion mini_browser/search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import re
from urllib.parse import urlparse

Expand All @@ -6,6 +7,10 @@
except ImportError:
from duckduckgo_search import DDGS

from mini_browser.config import get_search_provider

_log = logging.getLogger(__name__)

# Domains known for spam, clickbait, or low-quality content
_BLOCKLIST_PATTERNS = [
r"\.store/",
Expand All @@ -31,10 +36,25 @@ def search_urls(
timelimit: str | None = None,
) -> list[dict]:
"""
Search DuckDuckGo. Returns list of dicts: href, title, body.
Search the web. Returns list of dicts: href, title, body.

Provider is selected via MINI_BROWSER_SEARCH_PROVIDER env var
('duckduckgo' default, or 'tavily').

timelimit: "d" (day), "w" (week), "m" (month), "y" (year)
"""
provider = get_search_provider()
if provider == "tavily":
return _search_tavily(query, max_results, timelimit)
return _search_ddgs(query, max_results, timelimit)


def _search_ddgs(
query: str,
max_results: int = 5,
timelimit: str | None = None,
) -> list[dict]:
"""Search via DuckDuckGo."""
try:
kwargs: dict = {"max_results": max_results * 3}
if timelimit:
Expand All @@ -47,6 +67,49 @@ def search_urls(
return []


_TIMELIMIT_TO_TIME_RANGE = {
"d": "day",
"w": "week",
"m": "month",
"y": "year",
}


def _search_tavily(
query: str,
max_results: int = 5,
timelimit: str | None = None,
) -> list[dict]:
"""Search via Tavily and normalise results to {href, title, body}."""
try:
from tavily import TavilyClient
except ImportError:
raise ImportError(
"tavily-python is required for the Tavily search provider. "
"Install it with: pip install mini-browser[tavily]"
)

try:
client = TavilyClient()
kwargs: dict = {"max_results": max_results}
if timelimit:
time_range = _TIMELIMIT_TO_TIME_RANGE.get(timelimit)
if time_range:
kwargs["time_range"] = time_range
response = client.search(query, **kwargs)
return [
{
"href": r.get("url", ""),
"title": r.get("title", ""),
"body": r.get("content", ""),
}
for r in response.get("results", [])
]
except Exception:
_log.warning("Tavily search failed for query %r", query, exc_info=True)
return []


def _is_quality(result: dict) -> bool:
url = result.get("href", "")
snippet = result.get("body", "")
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ dependencies = [
mcp = ["mcp>=1.0.0"]
playwright = ["playwright>=1.40.0"]
pdf = ["pdfplumber>=0.10.0", "pypdf>=4.0.0"]
full = ["mcp>=1.0.0", "playwright>=1.40.0", "pdfplumber>=0.10.0", "pypdf>=4.0.0"]
tavily = ["tavily-python>=0.5.0"]
full = ["mcp>=1.0.0", "playwright>=1.40.0", "pdfplumber>=0.10.0", "pypdf>=4.0.0", "tavily-python>=0.5.0"]
dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.23.0",
Expand Down