diff --git a/CLAUDE.md b/CLAUDE.md
index 71bf4d7..e38f285 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -51,24 +51,13 @@ StackOne AI SDK is a Python library that provides a unified interface for access
    - `Tools`: Container for managing multiple tools
    - Format converters for different AI frameworks
 
-3. **OpenAPI Parser** (`stackone_ai/specs/parser.py`): Spec conversion
-   - Converts OpenAPI specs to tool definitions
-   - Handles file upload detection (`format: binary` → `type: file`)
-   - Resolves schema references
-
-### OpenAPI Specifications
-
-All tool definitions are generated from OpenAPI specs in `stackone_ai/oas/`:
-
-- `core.json`, `ats.json`, `crm.json`, `documents.json`, `hris.json`, `iam.json`, `lms.json`, `marketing.json`
-
 ## Key Development Patterns
 
 ### Tool Filtering
 
 ```python
 # Use glob patterns for tool selection
-tools = StackOneToolSet(include_tools=["hris_*", "!hris_create_*"])
+tools = StackOneToolSet(include_tools=["bamboohr_*", "!bamboohr_create_*"])
 ```
 
 ### Authentication
diff --git a/README.md b/README.md
index 7e26dbd..293e986 100644
--- a/README.md
+++ b/README.md
@@ -16,10 +16,11 @@ StackOne AI provides a unified interface for accessing various SaaS tools throug
 - **Tool Calling**: Direct method calling with `tool.call()` for intuitive usage
 - **MCP-backed Dynamic Discovery**: Fetch tools at runtime via `fetch_tools()` with provider, action, and account filtering
 - **Advanced Tool Filtering**:
-  - Glob pattern filtering with patterns like `"hris_*"` and exclusions `"!hris_delete_*"`
+  - Glob pattern filtering with patterns like `"salesforce_*"` and exclusions `"!*_delete_*"`
   - Provider and action filtering
   - Multi-account support
-- **Utility Tools** (Beta): Dynamic tool discovery and execution based on natural language queries
+- **Semantic Search**: AI-powered tool discovery using natural language queries
+- **Search Tool**: Callable tool discovery for agent loops via `get_search_tool()`
 - Integration with popular AI frameworks:
   - OpenAI Functions
   - LangChain Tools
@@ -58,10 +59,10 @@ toolset = StackOneToolSet()  # Uses STACKONE_API_KEY env var
 # Or explicitly: toolset = StackOneToolSet(api_key="your-api-key")
 
 # Get HRIS-related tools with glob patterns
-tools = toolset.fetch_tools(actions=["hris_*"], account_ids=["your-account-id"])
+tools = toolset.fetch_tools(actions=["bamboohr_*"], account_ids=["your-account-id"])
 
 # Use a specific tool with the call method
-employee_tool = tools.get_tool("hris_get_employee")
+employee_tool = tools.get_tool("bamboohr_get_employee")
 # Call with keyword arguments
 employee = employee_tool.call(id="employee-id")
 # Or with traditional execute method
@@ -107,9 +108,9 @@ tools = toolset.fetch_tools(providers=["hibob"])
 - **`account_ids`**: Filter tools by account IDs. Tools will be loaded for each specified account.
 - **`providers`**: Filter by provider names (e.g., `["hibob", "bamboohr"]`). Case-insensitive matching.
 - **`actions`**: Filter by action patterns with glob support:
-  - Exact match: `["hris_list_employees"]`
+  - Exact match: `["bamboohr_list_employees"]`
   - Glob pattern: `["*_list_employees"]` matches all tools ending with `_list_employees`
-  - Provider prefix: `["hris_*"]` matches all HRIS tools
+  - Provider prefix: `["bamboohr_*"]` matches all BambooHR tools
 
 ## Implicit Feedback (Beta)
 
@@ -169,7 +170,7 @@ from stackone_ai import StackOneToolSet
 
 # Initialize StackOne tools
 toolset = StackOneToolSet()
-tools = toolset.fetch_tools(actions=["hris_*"], account_ids=["your-account-id"])
+tools = toolset.fetch_tools(actions=["bamboohr_*"], account_ids=["your-account-id"])
 
 # Convert to LangChain format
 langchain_tools = tools.to_langchain()
@@ -216,7 +217,7 @@ from stackone_ai.integrations.langgraph import to_tool_node, bind_model_with_too
 
 # Prepare tools
 toolset = StackOneToolSet()
-tools = toolset.fetch_tools(actions=["hris_*"], account_ids=["your-account-id"])
+tools = toolset.fetch_tools(actions=["bamboohr_*"], account_ids=["your-account-id"])
 langchain_tools = tools.to_langchain()
 
 class State(TypedDict):
@@ -254,7 +255,7 @@ from stackone_ai import StackOneToolSet
 
 # Get tools and convert to LangChain format
 toolset = StackOneToolSet()
-tools = toolset.fetch_tools(actions=["hris_*"], account_ids=["your-account-id"])
+tools = toolset.fetch_tools(actions=["bamboohr_*"], account_ids=["your-account-id"])
 langchain_tools = tools.to_langchain()
 
 # Create CrewAI agent with StackOne tools
@@ -296,7 +297,7 @@ feedback_tool = tools.get_tool("tool_feedback")
 result = feedback_tool.call(
     feedback="The HRIS tools are working great! Very fast response times.",
     account_id="acc_123456",
-    tool_names=["hris_list_employees", "hris_get_employee"]
+    tool_names=["bamboohr_list_employees", "bamboohr_get_employee"]
 )
 ```
 
@@ -305,26 +306,59 @@ result = feedback_tool.call(
 - "Are you ok with sending feedback to StackOne? The LLM will take care of sending it."
 - Only call the tool after the user explicitly agrees.
 
-## Utility Tools (Beta)
+## Search Tool
 
-Utility tools enable dynamic tool discovery and execution without hardcoding tool names.
+Search for tools using natural language queries. Works with both semantic (cloud) and local BM25+TF-IDF search.
 
 ### Basic Usage
 
 ```python
-# Get utility tools for dynamic discovery
-tools = toolset.fetch_tools(actions=["hris_*"])
-utility_tools = tools.utility_tools()
+# Get a callable search tool
+toolset = StackOneToolSet()
+all_tools = toolset.fetch_tools(account_ids=["your-account-id"])
+search_tool = toolset.get_search_tool()
 
-# Search for relevant tools using natural language
-filter_tool = utility_tools.get_tool("tool_search")
-results = filter_tool.call(query="manage employees", limit=5)
+# Search for relevant tools — returns a Tools collection
+tools = search_tool("manage employees", top_k=5)
 
-# Execute discovered tools dynamically
-execute_tool = utility_tools.get_tool("tool_execute")
-result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10})
+# Execute a discovered tool directly
+tools[0](limit=10)
 ```
 
+## Semantic Search
+
+Discover tools using natural language instead of exact names. Queries like "onboard new hire" resolve to the right actions even when the tool is called `bamboohr_create_employee`.
+
+```python
+from stackone_ai import StackOneToolSet
+
+toolset = StackOneToolSet()
+
+# Search by intent — returns Tools collection ready for any framework
+tools = toolset.search_tools("manage employee records", account_ids=["your-account-id"], top_k=5)
+openai_tools = tools.to_openai()
+
+# Lightweight: inspect results without fetching full tool definitions
+results = toolset.search_action_names("time off requests", top_k=5)
+```
+
+### Search Modes
+
+Control which search backend `search_tools()` uses via the `search` parameter:
+
+```python
+# "auto" (default) — tries semantic search first, falls back to local
+tools = toolset.search_tools("manage employees", search="auto")
+
+# "semantic" — semantic API only, raises if unavailable
+tools = toolset.search_tools("manage employees", search="semantic")
+
+# "local" — local BM25+TF-IDF only, no semantic API call
+tools = toolset.search_tools("manage employees", search="local")
+```
+
+Results are automatically scoped to connectors in your linked accounts. See [Semantic Search Example](examples/semantic_search_example.py) for `SearchTool` (`get_search_tool`) integration, OpenAI, and LangChain patterns.
+
 ## Examples
 
 For more examples, check out the [examples/](examples/) directory:
@@ -334,7 +368,8 @@ For more examples, check out the [examples/](examples/) directory:
 - [OpenAI Integration](examples/openai_integration.py)
 - [LangChain Integration](examples/langchain_integration.py)
 - [CrewAI Integration](examples/crewai_integration.py)
-- [Utility Tools](examples/utility_tools_example.py)
+- [Search Tool](examples/search_tool_example.py)
+- [Semantic Search](examples/semantic_search_example.py)
 
 ## Development
 
diff --git a/examples/search_tool_example.py b/examples/search_tool_example.py
new file mode 100644
index 0000000..ea7dde2
--- /dev/null
+++ b/examples/search_tool_example.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python
+"""
+Example demonstrating dynamic tool discovery using search_tool.
+
+The search tool allows AI agents to discover relevant tools based on natural language
+queries without hardcoding tool names.
+
+Prerequisites:
+- STACKONE_API_KEY environment variable set
+- STACKONE_ACCOUNT_ID environment variable set (comma-separated for multiple)
+- At least one linked account in StackOne (this example uses BambooHR)
+
+This example is runnable with the following command:
+```bash
+uv run examples/search_tool_example.py
+```
+"""
+
+import os
+
+from stackone_ai import StackOneToolSet
+
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv()
+except ModuleNotFoundError:
+    pass
+
+# Read account IDs from environment — supports comma-separated values
+_account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()]
+
+
+def example_search_tool_basic():
+    """Basic example of using the search tool for tool discovery"""
+    print("Example 1: Dynamic tool discovery\n")
+
+    # Initialize StackOne toolset
+    toolset = StackOneToolSet()
+
+    # Get all available tools using MCP-backed fetch_tools()
+    all_tools = toolset.fetch_tools(account_ids=_account_ids)
+    print(f"Total tools available: {len(all_tools)}")
+
+    if not all_tools:
+        print("No tools found. Check your linked accounts.")
+        return
+
+    # Get a search tool for dynamic discovery
+    search_tool = toolset.get_search_tool()
+
+    # Search for employee management tools — returns a Tools collection
+    tools = search_tool("manage employees create update list", top_k=5, account_ids=_account_ids)
+
+    print(f"Found {len(tools)} relevant tools:")
+    for tool in tools:
+        print(f"  - {tool.name}: {tool.description}")
+
+    print()
+
+
+def example_search_modes():
+    """Comparing semantic vs local search modes.
+
+    Search config can be set at the constructor level or overridden per call:
+    - Constructor: StackOneToolSet(search={"method": "semantic"})
+    - Per-call: toolset.search_tools(query, search="local")
+
+    The search method controls which backend search_tools() uses:
+    - "semantic": cloud-based semantic vector search (higher accuracy for natural language)
+    - "local": local BM25+TF-IDF hybrid search (no network call to semantic API)
+    - "auto" (default): tries semantic first, falls back to local on failure
+    """
+    print("Example 2: Semantic vs local search modes\n")
+
+    query = "manage employee time off"
+
+    # Constructor-level config — semantic search as the default for this toolset
+    print('Constructor config: StackOneToolSet(search={"method": "semantic"})')
+    toolset_semantic = StackOneToolSet(search={"method": "semantic"})
+    try:
+        tools_semantic = toolset_semantic.search_tools(query, account_ids=_account_ids, top_k=5)
+        print(f"  Found {len(tools_semantic)} tools:")
+        for tool in tools_semantic:
+            print(f"    - {tool.name}")
+    except Exception as e:
+        print(f"  Semantic search unavailable: {e}")
+    print()
+
+    # Constructor-level config — local search (no network call to semantic API)
+    print('Constructor config: StackOneToolSet(search={"method": "local"})')
+    toolset_local = StackOneToolSet(search={"method": "local"})
+    tools_local = toolset_local.search_tools(query, account_ids=_account_ids, top_k=5)
+    print(f"  Found {len(tools_local)} tools:")
+    for tool in tools_local:
+        print(f"    - {tool.name}")
+    print()
+
+    # Per-call override — constructor defaults can be overridden on each call
+    print("Per-call override: constructor uses semantic, but this call uses local")
+    tools_override = toolset_semantic.search_tools(query, account_ids=_account_ids, top_k=5, search="local")
+    print(f"  Found {len(tools_override)} tools:")
+    for tool in tools_override:
+        print(f"    - {tool.name}")
+    print()
+
+    # Auto (default) — tries semantic, falls back to local
+    print('Default: StackOneToolSet() uses search="auto" (semantic with local fallback)')
+    toolset_auto = StackOneToolSet()
+    tools_auto = toolset_auto.search_tools(query, account_ids=_account_ids, top_k=5)
+    print(f"  Found {len(tools_auto)} tools:")
+    for tool in tools_auto:
+        print(f"    - {tool.name}")
+    print()
+
+
+def example_top_k_config():
+    """Configuring top_k at the constructor level vs per-call.
+
+    Constructor-level top_k applies to all search_tools() and search_action_names()
+    calls. Per-call top_k overrides the constructor default for that single call.
+    """
+    print("Example 3: top_k at constructor vs per-call\n")
+
+    # Constructor-level top_k — all calls default to returning 3 results
+    toolset = StackOneToolSet(search={"top_k": 3})
+
+    query = "manage employee records"
+    print(f'Constructor top_k=3: searching for "{query}"')
+    tools_default = toolset.search_tools(query, account_ids=_account_ids)
+    print(f"  Got {len(tools_default)} tools (constructor default)")
+    for tool in tools_default:
+        print(f"    - {tool.name}")
+    print()
+
+    # Per-call override — this single call returns up to 10 results
+    print("Per-call top_k=10: overriding constructor default")
+    tools_override = toolset.search_tools(query, account_ids=_account_ids, top_k=10)
+    print(f"  Got {len(tools_override)} tools (per-call override)")
+    for tool in tools_override:
+        print(f"    - {tool.name}")
+    print()
+
+
+def example_search_tool_with_execution():
+    """Example of discovering and executing tools dynamically"""
+    print("Example 4: Dynamic tool execution\n")
+
+    # Initialize toolset
+    toolset = StackOneToolSet()
+
+    # Get all tools using MCP-backed fetch_tools()
+    all_tools = toolset.fetch_tools(account_ids=_account_ids)
+
+    if not all_tools:
+        print("No tools found. Check your linked accounts.")
+        return
+
+    search_tool = toolset.get_search_tool()
+
+    # Step 1: Search for relevant tools
+    tools = search_tool("list all employees", top_k=1, account_ids=_account_ids)
+
+    if tools:
+        best_tool = tools[0]
+        print(f"Best matching tool: {best_tool.name}")
+        print(f"Description: {best_tool.description}")
+
+        # Step 2: Execute the found tool directly
+        try:
+            print(f"\nExecuting {best_tool.name}...")
+            result = best_tool(limit=5)
+            print(f"Execution result: {result}")
+        except Exception as e:
+            print(f"Execution failed (expected in example): {e}")
+
+    print()
+
+
+def example_with_openai():
+    """Example of using search tool with OpenAI"""
+    print("Example 5: Using search tool with OpenAI\n")
+
+    try:
+        from openai import OpenAI
+
+        # Initialize OpenAI client
+        client = OpenAI()
+
+        # Initialize StackOne toolset
+        toolset = StackOneToolSet()
+
+        # Search for BambooHR employee tools
+        tools = toolset.search_tools("manage employees", account_ids=_account_ids, top_k=5)
+
+        # Convert to OpenAI format
+        openai_tools = tools.to_openai()
+
+        # Create a chat completion with discovered tools
+        response = client.chat.completions.create(
+            model="gpt-4",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are an HR assistant with access to employee management tools.",
+                },
+                {"role": "user", "content": "Can you help me find tools for managing employee records?"},
+            ],
+            tools=openai_tools,
+            tool_choice="auto",
+        )
+
+        print("OpenAI Response:", response.choices[0].message.content)
+
+        if response.choices[0].message.tool_calls:
+            print("\nTool calls made:")
+            for tool_call in response.choices[0].message.tool_calls:
+                print(f"  - {tool_call.function.name}")
+
+    except ImportError:
+        print("OpenAI library not installed. Install with: pip install openai")
+    except Exception as e:
+        print(f"OpenAI example failed: {e}")
+
+    print()
+
+
+def example_with_langchain():
+    """Example of using tools with LangChain"""
+    print("Example 6: Using tools with LangChain\n")
+
+    try:
+        from langchain.agents import AgentExecutor, create_tool_calling_agent
+        from langchain_core.prompts import ChatPromptTemplate
+        from langchain_openai import ChatOpenAI
+
+        # Initialize StackOne toolset
+        toolset = StackOneToolSet()
+
+        # Get tools and convert to LangChain format using MCP-backed fetch_tools()
+        tools = toolset.search_tools("list employees", account_ids=_account_ids, top_k=5)
+        langchain_tools = list(tools.to_langchain())
+
+        print(f"Available tools for LangChain: {len(langchain_tools)}")
+        for tool in langchain_tools:
+            print(f"  - {tool.name}: {tool.description}")
+
+        # Create LangChain agent
+        llm = ChatOpenAI(model="gpt-4", temperature=0)
+
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    "You are an HR assistant. Use the available tools to help the user.",
+                ),
+                ("human", "{input}"),
+                ("placeholder", "{agent_scratchpad}"),
+            ]
+        )
+
+        agent = create_tool_calling_agent(llm, langchain_tools, prompt)
+        agent_executor = AgentExecutor(agent=agent, tools=langchain_tools, verbose=True)
+
+        # Run the agent
+        result = agent_executor.invoke({"input": "Find tools that can list employee data"})
+
+        print(f"\nAgent result: {result['output']}")
+
+    except ImportError as e:
+        print(f"LangChain dependencies not installed: {e}")
+        print("Install with: pip install langchain-openai")
+    except Exception as e:
+        print(f"LangChain example failed: {e}")
+
+    print()
+
+
+def main():
+    """Run all examples"""
+    print("=" * 60)
+    print("StackOne AI SDK - Search Tool Examples")
+    print("=" * 60)
+    print()
+
+    if not os.getenv("STACKONE_API_KEY"):
+        print("Set STACKONE_API_KEY to run these examples.")
+        return
+
+    if not _account_ids:
+        print("Set STACKONE_ACCOUNT_ID to run these examples.")
+        print("(Comma-separated for multiple accounts)")
+        return
+
+    # Basic examples that work without external APIs
+    example_search_tool_basic()
+    example_search_modes()
+    example_top_k_config()
+    example_search_tool_with_execution()
+
+    # Examples that require OpenAI API
+    if os.getenv("OPENAI_API_KEY"):
+        example_with_openai()
+        example_with_langchain()
+    else:
+        print("Set OPENAI_API_KEY to run OpenAI and LangChain examples\n")
+
+    print("=" * 60)
+    print("Examples completed!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/semantic_search_example.py b/examples/semantic_search_example.py
new file mode 100644
index 0000000..425661a
--- /dev/null
+++ b/examples/semantic_search_example.py
@@ -0,0 +1,424 @@
+#!/usr/bin/env python
+"""
+Example demonstrating semantic search for AI-powered tool discovery.
+
+Semantic search understands natural language intent and synonyms, so queries like
+"book a meeting" or "cancel an event" resolve to the right StackOne actions —
+unlike keyword matching which requires exact tool names.
+
+This example uses a Calendly-linked account to demonstrate how semantic search
+discovers scheduling, event, and organization management tools from natural
+language queries.
+
+
+How Semantic Search Works (Overview)
+=====================================
+
+The SDK provides three paths for semantic tool discovery, each with a different
+trade-off between speed, filtering, and completeness:
+
+1. search_tools(query)  — Full discovery (recommended for agent frameworks)
+
+   This is the method you should use when integrating with OpenAI, LangChain,
+   CrewAI, or any other agent framework.
+
+   Recommended usage — pass ``connector`` to scope to a single provider:
+
+       tools = toolset.search_tools("book a meeting", connector="calendly")
+
+   This is faster and returns more relevant results than searching all
+   connectors. When the target provider is known, always pass ``connector``.
+
+   When ``connector`` is not specified, the SDK searches all connectors
+   available in the user's linked accounts in parallel:
+
+   a) Fetch tools from the user's linked accounts via MCP
+   b) Extract available connectors (e.g. {bamboohr, calendly})
+   c) Search each connector in parallel via the semantic search API
+   d) Collect results, sort by relevance score
+   e) If top_k was specified, keep only the top K results
+   f) Match results back to the fetched tool definitions
+   g) Return a Tools collection sorted by relevance score
+
+   Key point: only the user's own connectors are searched — no wasted results
+   from connectors the user doesn't have. When top_k is not specified, the
+   backend decides how many results to return per connector. If the semantic
+   API is unavailable, the SDK falls back to local BM25+TF-IDF search
+   automatically.
+
+2. search_action_names(query)  — Lightweight preview
+
+   Queries the semantic API directly and returns metadata (name, connector,
+   score, description) without fetching full tool definitions. Useful for
+   inspecting results before committing to a full fetch. When account_ids are
+   provided, each connector is searched in parallel (same as search_tools).
+
+3. get_search_tool()  — Agent-loop pattern
+
+   Returns a callable SearchTool that wraps search_tools(). Call it
+   with a natural language query to get a Tools collection back.
+   Designed for agent loops where the LLM decides what to search for.
+
+
+This example is runnable with the following command:
+```bash
+uv run examples/semantic_search_example.py
+```
+
+Prerequisites:
+- STACKONE_API_KEY environment variable set
+- STACKONE_ACCOUNT_ID environment variable set (required for examples that fetch tools)
+- At least one linked account in StackOne (this example uses Calendly)
+
+Note: search_action_names() works with just STACKONE_API_KEY — no account ID needed.
+"""
+
+import logging
+import os
+
+from stackone_ai import StackOneToolSet
+
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv()
+except ModuleNotFoundError:
+    pass
+
+# Show SDK warnings (e.g., semantic search fallback to local search)
+logging.basicConfig(level=logging.WARNING)
+
+# Read account IDs from environment — supports comma-separated values
+_account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()]
+
+
+def example_search_action_names():
+    """Lightweight search returning action names and scores without fetching tools.
+
+    search_action_names() queries the semantic search API directly — it does NOT
+    need account IDs or MCP. This makes it the simplest way to try semantic search.
+
+    When called without account_ids, results come from the full StackOne catalog
+    (all connectors). When called with account_ids, results are filtered to only
+    connectors available in your linked accounts.
+    """
+    print("=" * 60)
+    print("Example 1: search_action_names() — lightweight discovery")
+    print("=" * 60)
+    print()
+    print("This searches the StackOne action catalog using semantic vectors.")
+    print("No account ID needed — results come from all available connectors.")
+    print()
+
+    # Constructor-level config sets defaults; per-call params override them.
+    # Here we set method="semantic" at the constructor level.
+    toolset = StackOneToolSet(search={"method": "semantic"})
+
+    query = "get user schedule"
+
+    # --- top_k behavior ---
+    # When top_k is NOT specified, the backend decides how many results to return.
+    # When top_k IS specified, results are explicitly limited to that number.
+    print(f'Searching for: "{query}" (no top_k — backend decides count)')
+    results_default = toolset.search_action_names(query)
+    print(f"  Backend returned {len(results_default)} results (its default)")
+    print()
+
+    print(f'Searching for: "{query}" (top_k=3 — explicitly limited)')
+    results_limited = toolset.search_action_names(query, top_k=3)
+    print(f"  Got exactly {len(results_limited)} results")
+    print()
+
+    # Show the limited results
+    print(f"Top {len(results_limited)} matches from the full catalog:")
+    for r in results_limited:
+        print(f"  [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})")
+        print(f"         {r.description}")
+    print()
+
+    # Show filtering effect when account_ids are available
+    if _account_ids:
+        print(f"Now filtering to your linked accounts ({', '.join(_account_ids)})...")
+        print("  (Each connector is searched in parallel — only your connectors are queried)")
+        filtered = toolset.search_action_names(query, account_ids=_account_ids, top_k=5)
+        print(f"  Filtered to {len(filtered)} matches (only your connectors):")
+        for r in filtered:
+            print(f"    [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})")
+    else:
+        print("Tip: Set STACKONE_ACCOUNT_ID to see results filtered to your linked connectors.")
+
+    print()
+
+
+def example_search_tools():
+    """High-level semantic search returning a Tools collection.
+
+    search_tools() is the recommended way to use semantic search. It:
+    1. Fetches tool definitions from your linked accounts via MCP
+    2. Searches each of your connectors in parallel via the semantic search API
+    3. Sorts results by relevance and matches back to tool definitions
+    4. Returns a Tools collection ready for any framework (.to_openai(), .to_langchain(), etc.)
+
+    Search config can be set at the constructor level:
+        toolset = StackOneToolSet(search={"method": "semantic", "top_k": 5})
+    Per-call parameters (e.g. top_k, search) override the constructor defaults.
+    """
+    print("=" * 60)
+    print("Example 2: search_tools() — full tool discovery")
+    print("=" * 60)
+    print()
+
+    # Constructor-level search config: always use semantic search with top_k=5
+    toolset = StackOneToolSet(search={"method": "semantic", "top_k": 5})
+
+    query = "cancel an event"
+    print(f'Step 1: Searching for "{query}" via semantic search (constructor config)...')
+    print()
+
+    # top_k and method are already set via the constructor — no need to pass them here
+    tools = toolset.search_tools(query, account_ids=_account_ids)
+
+    connectors = tools.get_connectors()
+    print(f"Found {len(tools)} tools from your linked account(s) ({', '.join(sorted(connectors))}):")
+    for tool in tools:
+        print(f"  - {tool.name}")
+        print(f"    {tool.description}")
+    print()
+
+
+def example_search_tools_with_connector():
+    """Semantic search filtered by connector.
+
+    Use the connector parameter to scope results to a specific provider,
+    for example when you know the user works with Calendly.
+    """
+    print("=" * 60)
+    print("Example 3: search_tools() with connector filter")
+    print("=" * 60)
+    print()
+
+    toolset = StackOneToolSet()
+
+    query = "book a meeting"
+    connector = "calendly"
+    print(f'Searching for "{query}" filtered to connector="{connector}"...')
+    print()
+
+    tools = toolset.search_tools(
+        query,
+        connector=connector,
+        account_ids=_account_ids,
+        top_k=3,
+    )
+
+    print(f"Found {len(tools)} {connector} tools:")
+    for tool in tools:
+        print(f"  - {tool.name}")
+        print(f"    {tool.description}")
+    print()
+
+
+def example_search_tool_agent_loop():
+    """Using get_search_tool() for agent loops.
+
+    get_search_tool() returns a callable that wraps search_tools().
+    Call it with a query to get a Tools collection back — designed
+    for agent loops where the LLM decides what to search for.
+    """
+    print("=" * 60)
+    print("Example 4: Search tool for agent loops")
+    print("=" * 60)
+    print()
+
+    toolset = StackOneToolSet()
+
+    print("Step 1: Fetching tools from your linked accounts via MCP...")
+    all_tools = toolset.fetch_tools(account_ids=_account_ids)
+    print(f"Loaded {len(all_tools)} tools.")
+    print()
+
+    print("Step 2: Getting a callable search tool...")
+    search_tool = toolset.get_search_tool()
+
+    query = "cancel an event or meeting"
+    print()
+    print(f'Step 3: Calling search_tool("{query}")...')
+    print("  (Searches are scoped to your linked connectors)")
+    print()
+    tools = search_tool(query, top_k=5, account_ids=_account_ids)
+    print(f"search_tool returned {len(tools)} tools:")
+    for tool in tools:
+        print(f"  - {tool.name}")
+        print(f"    {tool.description}")
+
+    print()
+
+
+def example_openai_agent_loop():
+    """Complete agent loop: semantic search -> LLM -> execute.
+
+    This demonstrates the full pattern for building an AI agent that
+    discovers tools via semantic search and executes them via an LLM.
+
+    Supports both OpenAI and Google Gemini (via its OpenAI-compatible API).
+    Set OPENAI_API_KEY for OpenAI, or GOOGLE_API_KEY for Gemini.
+    """
+    print("=" * 60)
+    print("Example 5: LLM agent loop with semantic search")
+    print("=" * 60)
+    print()
+
+    try:
+        from openai import OpenAI
+    except ImportError:
+        print("Skipped: OpenAI library not installed. Install with: pip install openai")
+        print()
+        return
+
+    # Support both OpenAI and Gemini (via OpenAI-compatible endpoint)
+    openai_key = os.getenv("OPENAI_API_KEY")
+    google_key = os.getenv("GOOGLE_API_KEY")
+
+    if openai_key:
+        client = OpenAI()
+        model = "gpt-4o-mini"
+        provider = "OpenAI"
+    elif google_key:
+        client = OpenAI(
+            api_key=google_key,
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
+        model = "gemini-2.5-flash"
+        provider = "Gemini"
+    else:
+        print("Skipped: Set OPENAI_API_KEY or GOOGLE_API_KEY to run this example.")
+        print()
+        return
+
+    print(f"Using {provider} ({model})")
+    print()
+
+    toolset = StackOneToolSet()
+
+    query = "list upcoming events"
+    print(f'Step 1: Discovering tools for "{query}" via semantic search...')
+    tools = toolset.search_tools(query, account_ids=_account_ids, top_k=3)
+    print(f"Found {len(tools)} tools:")
+    for tool in tools:
+        print(f"  - {tool.name}")
+    print()
+
+    print(f"Step 2: Sending tools to {provider} as function definitions...")
+    openai_tools = tools.to_openai()
+
+    messages = [
+        {"role": "system", "content": "You are a helpful scheduling assistant."},
+        {"role": "user", "content": "Can you show me my upcoming events?"},
+    ]
+
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        tools=openai_tools,
+        tool_choice="auto",
+    )
+
+    if response.choices[0].message.tool_calls:
+        print(f"Step 3: {provider} chose to call these tools:")
+        for tool_call in response.choices[0].message.tool_calls:
+            print(f"  - {tool_call.function.name}({tool_call.function.arguments})")
+
+            tool = tools.get_tool(tool_call.function.name)
+            if tool:
+                result = tool.execute(tool_call.function.arguments)
+                print(
+                    f"    Response keys: {list(result.keys()) if isinstance(result, dict) else type(result)}"
+                )
+    else:
+        print(f"{provider} responded with text: {response.choices[0].message.content}")
+
+    print()
+
+
+def example_langchain_semantic():
+    """Semantic search with LangChain tools.
+
+    search_tools() returns a Tools collection that converts directly
+    to LangChain format — no extra steps needed.
+    """
+    print("=" * 60)
+    print("Example 6: Semantic search with LangChain")
+    print("=" * 60)
+    print()
+
+    try:
+        from langchain_core.tools import BaseTool  # noqa: F401
+    except ImportError:
+        print("Skipped: LangChain not installed. Install with: pip install langchain-core")
+        print()
+        return
+
+    toolset = StackOneToolSet()
+
+    query = "remove a user from the team"
+    print(f'Step 1: Searching for "{query}" via semantic search...')
+    tools = toolset.search_tools(query, account_ids=_account_ids, top_k=5)
+    print(f"Found {len(tools)} tools.")
+    print()
+
+    print("Step 2: Converting to LangChain tools...")
+    langchain_tools = tools.to_langchain()
+
+    print(f"Created {len(langchain_tools)} LangChain tools (ready for use with agents):")
+    for tool in langchain_tools:
+        print(f"  - {tool.name} (type: {type(tool).__name__})")
+        print(f"    {tool.description}")
+
+    print()
+
+
+def main():
+    """Run all semantic search examples."""
+    print()
+    print("############################################################")
+    print("#   StackOne AI SDK — Semantic Search Examples              #")
+    print("############################################################")
+    print()
+
+    if not os.getenv("STACKONE_API_KEY"):
+        print("Set STACKONE_API_KEY to run these examples.")
+        return
+
+    # --- Examples that work without account IDs ---
+    example_search_action_names()
+
+    # --- Examples that require account IDs (MCP needs x-account-id) ---
+    if not _account_ids:
+        print("=" * 60)
+        print("Remaining examples require STACKONE_ACCOUNT_ID")
+        print("=" * 60)
+        print()
+        print("Set STACKONE_ACCOUNT_ID (comma-separated for multiple) to run")
+        print("examples that fetch full tool definitions from your linked accounts:")
+        print("  - search_tools() with natural language queries")
+        print("  - search_tools() with connector filter")
+        print("  - Search tool for agent loops")
+        print("  - OpenAI agent loop")
+        print("  - LangChain integration")
+        return
+
+    example_search_tools()
+    example_search_tools_with_connector()
+    example_search_tool_agent_loop()
+
+    # Framework integration patterns
+    example_openai_agent_loop()
+    example_langchain_semantic()
+
+    print("############################################################")
+    print("#   All examples completed!                                 #")
+    print("############################################################")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/test_examples.py b/examples/test_examples.py
index 45d631e..8a4c899 100644
--- a/examples/test_examples.py
+++ b/examples/test_examples.py
@@ -30,7 +30,8 @@ def get_example_files() -> list[str]:
     "index.py": ["mcp"],
     "file_uploads.py": ["mcp"],
     "stackone_account_ids.py": ["mcp"],
-    "utility_tools_example.py": ["mcp"],
+    "search_tool_example.py": ["mcp"],
+    "semantic_search_example.py": ["mcp"],
     "mcp_server.py": ["mcp"],
 }
 
diff --git a/examples/utility_tools_example.py b/examples/utility_tools_example.py
deleted file mode 100644
index 3291f7e..0000000
--- a/examples/utility_tools_example.py
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/usr/bin/env python
-"""
-Example demonstrating utility tools for dynamic tool discovery and execution.
-
-Utility tools allow AI agents to search for relevant tools based on natural language queries
-and execute them dynamically without hardcoding tool names.
-"""
-
-import os
-
-from dotenv import load_dotenv
-
-from stackone_ai import StackOneToolSet
-
-# Load environment variables
-load_dotenv()
-
-
-def example_utility_tools_basic():
-    """Basic example of using utility tools for tool discovery"""
-    print("Example 1: Dynamic tool discovery\n")
-
-    # Initialize StackOne toolset
-    toolset = StackOneToolSet()
-
-    # Get all available tools using MCP-backed fetch_tools()
-    all_tools = toolset.fetch_tools(actions=["bamboohr_*"])
-    print(f"Total BambooHR tools available: {len(all_tools)}")
-
-    # Get utility tools for dynamic discovery
-    utility_tools = all_tools.utility_tools()
-
-    # Get the filter tool to search for relevant tools
-    filter_tool = utility_tools.get_tool("tool_search")
-    if filter_tool:
-        # Search for employee management tools
-        result = filter_tool.call(query="manage employees create update list", limit=5, minScore=0.0)
-
-        print("Found relevant tools:")
-        for tool in result.get("tools", []):
-            print(f"  - {tool['name']} (score: {tool['score']:.2f}): {tool['description']}")
-
-    print()
-
-
-def example_utility_tools_with_execution():
-    """Example of discovering and executing tools dynamically"""
-    print("Example 2: Dynamic tool execution\n")
-
-    # Initialize toolset
-    toolset = StackOneToolSet()
-
-    # Get all tools using MCP-backed fetch_tools()
-    all_tools = toolset.fetch_tools()
-    utility_tools = all_tools.utility_tools()
-
-    # Step 1: Search for relevant tools
-    filter_tool = utility_tools.get_tool("tool_search")
-    execute_tool = utility_tools.get_tool("tool_execute")
-
-    if filter_tool and execute_tool:
-        # Find tools for listing employees
-        search_result = filter_tool.call(query="list all employees", limit=1)
-
-        tools_found = search_result.get("tools", [])
-        if tools_found:
-            best_tool = tools_found[0]
-            print(f"Best matching tool: {best_tool['name']}")
-            print(f"Description: {best_tool['description']}")
-            print(f"Relevance score: {best_tool['score']:.2f}")
-
-            # Step 2: Execute the found tool
-            try:
-                print(f"\nExecuting {best_tool['name']}...")
-                result = execute_tool.call(toolName=best_tool["name"], params={"limit": 5})
-                print(f"Execution result: {result}")
-            except Exception as e:
-                print(f"Execution failed (expected in example): {e}")
-
-    print()
-
-
-def example_with_openai():
-    """Example of using utility tools with OpenAI"""
-    print("Example 3: Using utility tools with OpenAI\n")
-
-    try:
-        from openai import OpenAI
-
-        # Initialize OpenAI client
-        client = OpenAI()
-
-        # Initialize StackOne toolset
-        toolset = StackOneToolSet()
-
-        # Get BambooHR tools and their utility tools using MCP-backed fetch_tools()
-        bamboohr_tools = toolset.fetch_tools(actions=["bamboohr_*"])
-        utility_tools = bamboohr_tools.utility_tools()
-
-        # Convert to OpenAI format
-        openai_tools = utility_tools.to_openai()
-
-        # Create a chat completion with utility tools
-        response = client.chat.completions.create(
-            model="gpt-4",
-            messages=[
-                {
-                    "role": "system",
-                    "content": "You are an HR assistant. Use tool_search to find appropriate tools, then tool_execute to execute them.",
-                },
-                {"role": "user", "content": "Can you help me find tools for managing employee records?"},
-            ],
-            tools=openai_tools,
-            tool_choice="auto",
-        )
-
-        print("OpenAI Response:", response.choices[0].message.content)
-
-        if response.choices[0].message.tool_calls:
-            print("\nTool calls made:")
-            for tool_call in response.choices[0].message.tool_calls:
-                print(f"  - {tool_call.function.name}")
-
-    except ImportError:
-        print("OpenAI library not installed. Install with: pip install openai")
-    except Exception as e:
-        print(f"OpenAI example failed: {e}")
-
-    print()
-
-
-def example_with_langchain():
-    """Example of using tools with LangChain"""
-    print("Example 4: Using tools with LangChain\n")
-
-    try:
-        from langchain.agents import AgentExecutor, create_tool_calling_agent
-        from langchain_core.prompts import ChatPromptTemplate
-        from langchain_openai import ChatOpenAI
-
-        # Initialize StackOne toolset
-        toolset = StackOneToolSet()
-
-        # Get tools and convert to LangChain format using MCP-backed fetch_tools()
-        tools = toolset.fetch_tools(actions=["bamboohr_list_*"])
-        langchain_tools = tools.to_langchain()
-
-        # Get utility tools as well
-        utility_tools = tools.utility_tools()
-        langchain_utility_tools = utility_tools.to_langchain()
-
-        # Combine all tools
-        all_langchain_tools = list(langchain_tools) + list(langchain_utility_tools)
-
-        print(f"Available tools for LangChain: {len(all_langchain_tools)}")
-        for tool in all_langchain_tools:
-            print(f"  - {tool.name}: {tool.description}")
-
-        # Create LangChain agent
-        llm = ChatOpenAI(model="gpt-4", temperature=0)
-
-        prompt = ChatPromptTemplate.from_messages(
-            [
-                (
-                    "system",
-                    "You are an HR assistant. Use the utility tools to discover and execute relevant tools.",
-                ),
-                ("human", "{input}"),
-                ("placeholder", "{agent_scratchpad}"),
-            ]
-        )
-
-        agent = create_tool_calling_agent(llm, all_langchain_tools, prompt)
-        agent_executor = AgentExecutor(agent=agent, tools=all_langchain_tools, verbose=True)
-
-        # Run the agent
-        result = agent_executor.invoke({"input": "Find tools that can list employee data"})
-
-        print(f"\nAgent result: {result['output']}")
-
-    except ImportError as e:
-        print(f"LangChain dependencies not installed: {e}")
-        print("Install with: pip install langchain-openai")
-    except Exception as e:
-        print(f"LangChain example failed: {e}")
-
-    print()
-
-
-def main():
-    """Run all examples"""
-    print("=" * 60)
-    print("StackOne AI SDK - Utility Tools Examples")
-    print("=" * 60)
-    print()
-
-    # Basic examples that work without external APIs
-    example_utility_tools_basic()
-    example_utility_tools_with_execution()
-
-    # Examples that require OpenAI API
-    if os.getenv("OPENAI_API_KEY"):
-        example_with_openai()
-        example_with_langchain()
-    else:
-        print("Set OPENAI_API_KEY to run OpenAI and LangChain examples\n")
-
-    print("=" * 60)
-    print("Examples completed!")
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/stackone_ai/__init__.py b/stackone_ai/__init__.py
index f7a0aba..e5b5fb4 100644
--- a/stackone_ai/__init__.py
+++ b/stackone_ai/__init__.py
@@ -1,11 +1,25 @@
 """StackOne AI SDK"""
 
-from .models import StackOneTool, Tools
-from .toolset import StackOneToolSet
+from stackone_ai.models import StackOneTool, Tools
+from stackone_ai.semantic_search import (
+    SemanticSearchClient,
+    SemanticSearchError,
+    SemanticSearchResponse,
+    SemanticSearchResult,
+)
+from stackone_ai.toolset import SearchConfig, SearchMode, SearchTool, StackOneToolSet
 
 __all__ = [
     "StackOneToolSet",
     "StackOneTool",
     "Tools",
+    "SearchConfig",
+    "SearchMode",
+    "SearchTool",
+    # Semantic search
+    "SemanticSearchClient",
+    "SemanticSearchResult",
+    "SemanticSearchResponse",
+    "SemanticSearchError",
 ]
 __version__ = "2.3.1"
diff --git a/stackone_ai/constants.py b/stackone_ai/constants.py
index a33428d..1472f50 100644
--- a/stackone_ai/constants.py
+++ b/stackone_ai/constants.py
@@ -1,3 +1,6 @@
+# StackOne API base URL
+DEFAULT_BASE_URL: str = "https://api.stackone.com"
+
 # Hybrid search default weight for BM25 vs TF-IDF
 # alpha=0.2 means: 20% BM25 + 80% TF-IDF
 # This value was optimized through validation testing and provides
diff --git a/stackone_ai/feedback/tool.py b/stackone_ai/feedback/tool.py
index ae493e9..bcc3ef7 100644
--- a/stackone_ai/feedback/tool.py
+++ b/stackone_ai/feedback/tool.py
@@ -6,7 +6,8 @@
 
 from pydantic import BaseModel, Field, field_validator
 
-from ..models import (
+from stackone_ai.constants import DEFAULT_BASE_URL
+from stackone_ai.models import (
     ExecuteConfig,
     JsonDict,
     ParameterLocation,
@@ -147,7 +148,7 @@ def execute(
 def create_feedback_tool(
     api_key: str,
     account_id: str | None = None,
-    base_url: str = "https://api.stackone.com",
+    base_url: str = DEFAULT_BASE_URL,
 ) -> FeedbackTool:
     """
     Create a feedback collection tool.
diff --git a/stackone_ai/local_search.py b/stackone_ai/local_search.py
new file mode 100644
index 0000000..1a38f0e
--- /dev/null
+++ b/stackone_ai/local_search.py
@@ -0,0 +1,170 @@
+"""Local BM25 + TF-IDF hybrid keyword search for tool discovery."""
+
+from __future__ import annotations
+
+import bm25s
+import numpy as np
+from pydantic import BaseModel
+
+from stackone_ai.constants import DEFAULT_HYBRID_ALPHA
+from stackone_ai.models import StackOneTool
+from stackone_ai.utils.tfidf_index import TfidfDocument, TfidfIndex
+
+
+class ToolSearchResult(BaseModel):
+    """Result from tool_search"""
+
+    name: str
+    description: str
+    score: float
+
+
+class ToolIndex:
+    """Hybrid BM25 + TF-IDF tool search index"""
+
+    def __init__(self, tools: list[StackOneTool], hybrid_alpha: float | None = None) -> None:
+        """Initialize tool index with hybrid search
+
+        Args:
+            tools: List of tools to index
+            hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided,
+                uses DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring
+                and has been shown to provide better tool discovery accuracy
+                (10.8% improvement in validation testing).
+        """
+        self.tools = tools
+        self.tool_map = {tool.name: tool for tool in tools}
+        # Use default if not provided, then clamp to [0, 1]
+        alpha = hybrid_alpha if hybrid_alpha is not None else DEFAULT_HYBRID_ALPHA
+        self.hybrid_alpha = max(0.0, min(1.0, alpha))
+
+        # Prepare corpus for both BM25 and TF-IDF
+        corpus = []
+        tfidf_docs = []
+        self.tool_names = []
+
+        for tool in tools:
+            # Extract category and action from tool name
+            parts = tool.name.split("_")
+            category = parts[0] if parts else ""
+
+            # Extract action types
+            action_types = ["create", "update", "delete", "get", "list", "search"]
+            actions = [p for p in parts if p in action_types]
+
+            # Combine name, description, category and tags for indexing
+            # For TF-IDF: use weighted approach similar to Node.js
+            tfidf_text = " ".join(
+                [
+                    f"{tool.name} {tool.name} {tool.name}",  # boost name
+                    f"{category} {' '.join(actions)}",
+                    tool.description,
+                    " ".join(parts),
+                ]
+            )
+
+            # For BM25: simpler approach
+            bm25_text = " ".join(
+                [
+                    tool.name,
+                    tool.description,
+                    category,
+                    " ".join(parts),
+                    " ".join(actions),
+                ]
+            )
+
+            corpus.append(bm25_text)
+            tfidf_docs.append(TfidfDocument(id=tool.name, text=tfidf_text))
+            self.tool_names.append(tool.name)
+
+        # Create BM25 index
+        self.bm25_retriever = bm25s.BM25()
+        if corpus:
+            corpus_tokens = bm25s.tokenize(corpus, stemmer=None, show_progress=False)  # ty: ignore[invalid-argument-type]
+            self.bm25_retriever.index(corpus_tokens)
+
+        # Create TF-IDF index
+        self.tfidf_index = TfidfIndex()
+        if tfidf_docs:
+            self.tfidf_index.build(tfidf_docs)
+
+    def search(self, query: str, limit: int = 5, min_score: float = 0.0) -> list[ToolSearchResult]:
+        """Search for relevant tools using hybrid BM25 + TF-IDF
+
+        Args:
+            query: Natural language query
+            limit: Maximum number of results
+            min_score: Minimum relevance score (0-1)
+
+        Returns:
+            List of search results sorted by relevance
+        """
+        if not self.tools:
+            return []
+
+        # Get more results initially to have better candidate pool for fusion
+        fetch_limit = max(50, limit)
+
+        # Tokenize query for BM25
+        query_tokens = bm25s.tokenize([query], stemmer=None, show_progress=False)  # ty: ignore[invalid-argument-type]
+
+        # Search with BM25
+        bm25_results, bm25_scores = self.bm25_retriever.retrieve(
+            query_tokens, k=min(fetch_limit, len(self.tools))
+        )
+
+        # Search with TF-IDF
+        tfidf_results = self.tfidf_index.search(query, k=min(fetch_limit, len(self.tools)))
+
+        # Build score map for fusion
+        score_map: dict[str, dict[str, float]] = {}
+
+        # Add BM25 scores
+        for idx, score in zip(bm25_results[0], bm25_scores[0], strict=True):
+            tool_name = self.tool_names[idx]
+            # Normalize BM25 score to 0-1 range
+            normalized_score = float(1 / (1 + np.exp(-score / 10)))
+            # Clamp to [0, 1]
+            clamped_score = max(0.0, min(1.0, normalized_score))
+            score_map[tool_name] = {"bm25": clamped_score}
+
+        # Add TF-IDF scores
+        for result in tfidf_results:
+            if result.id not in score_map:
+                score_map[result.id] = {}
+            score_map[result.id]["tfidf"] = result.score
+
+        # Fuse scores: hybrid_score = alpha * bm25 + (1 - alpha) * tfidf
+        fused_results: list[tuple[str, float]] = []
+        for tool_name, scores in score_map.items():
+            bm25_score = scores.get("bm25", 0.0)
+            tfidf_score = scores.get("tfidf", 0.0)
+            hybrid_score = self.hybrid_alpha * bm25_score + (1 - self.hybrid_alpha) * tfidf_score
+            fused_results.append((tool_name, hybrid_score))
+
+        # Sort by score descending
+        fused_results.sort(key=lambda x: x[1], reverse=True)
+
+        # Build final results
+        search_results = []
+        for tool_name, score in fused_results:
+            if score < min_score:
+                continue
+
+            tool = self.tool_map.get(tool_name)
+            if tool is None:
+                continue
+
+            search_results.append(
+                ToolSearchResult(
+                    name=tool.name,
+                    description=tool.description,
+                    score=score,
+                )
+            )
+
+            if len(search_results) >= limit:
+                break
+
+        return search_results
diff --git a/stackone_ai/models.py b/stackone_ai/models.py
index fcd32d7..aabc802 100644
--- a/stackone_ai/models.py
+++ b/stackone_ai/models.py
@@ -98,6 +98,18 @@ class StackOneTool(BaseModel):
         "feedback_metadata",
     }
 
+    @property
+    def connector(self) -> str:
+        """Extract connector from tool name.
+
+        Tool names follow the format: {connector}_{action}_{entity}
+        e.g., 'bamboohr_create_employee' -> 'bamboohr'
+
+        Returns:
+            Connector name in lowercase
+        """
+        return self.name.split("_")[0].lower()
+
     def __init__(
         self,
         description: str,
@@ -318,6 +330,13 @@ def call(self, *args: Any, options: JsonDict | None = None, **kwargs: Any) -> Js
 
         return self.execute(kwargs if kwargs else None)
 
+    def __call__(self, *args: Any, options: JsonDict | None = None, **kwargs: Any) -> JsonDict:
+        """Make the tool directly callable.
+
+        Alias for :meth:`call` so that ``tool(query="…")`` works.
+        """
+        return self.call(*args, options=options, **kwargs)
+
     def to_openai_function(self) -> JsonDict:
         """Convert this tool to OpenAI's function format
 
@@ -455,7 +474,10 @@ def get_account_id(self) -> str | None:
 class Tools:
     """Container for Tool instances with lookup capabilities"""
 
-    def __init__(self, tools: list[StackOneTool]) -> None:
+    def __init__(
+        self,
+        tools: list[StackOneTool],
+    ) -> None:
         """Initialize Tools container
 
         Args:
@@ -514,6 +536,19 @@ def get_account_id(self) -> str | None:
                 return account_id
         return None
 
+    def get_connectors(self) -> set[str]:
+        """Get unique connector names from all tools.
+
+        Returns:
+            Set of connector names (lowercase)
+
+        Example:
+            tools = toolset.fetch_tools()
+            connectors = tools.get_connectors()
+            # {'bamboohr', 'hibob', 'slack', ...}
+        """
+        return {tool.connector for tool in self.tools}
+
     def to_openai(self) -> list[JsonDict]:
         """Convert all tools to OpenAI function format
 
@@ -529,36 +564,3 @@ def to_langchain(self) -> Sequence[BaseTool]:
             Sequence of tools in LangChain format
         """
         return [tool.to_langchain() for tool in self.tools]
-
-    def utility_tools(self, hybrid_alpha: float | None = None) -> Tools:
-        """Return utility tools for tool discovery and execution
-
-        Utility tools enable dynamic tool discovery and execution based on natural language queries
-        using hybrid BM25 + TF-IDF search.
-
-        Args:
-            hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided, uses
-                ToolIndex.DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring
-                and has been shown to provide better tool discovery accuracy
-                (10.8% improvement in validation testing).
-
-        Returns:
-            Tools collection containing tool_search and tool_execute
-
-        Note:
-            This feature is in beta and may change in future versions
-        """
-        from stackone_ai.utility_tools import (
-            ToolIndex,
-            create_tool_execute,
-            create_tool_search,
-        )
-
-        # Create search index with hybrid search
-        index = ToolIndex(self.tools, hybrid_alpha=hybrid_alpha)
-
-        # Create utility tools
-        filter_tool = create_tool_search(index)
-        execute_tool = create_tool_execute(self)
-
-        return Tools([filter_tool, execute_tool])
diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py
new file mode 100644
index 0000000..1a1e2b7
--- /dev/null
+++ b/stackone_ai/semantic_search.py
@@ -0,0 +1,213 @@
+"""Semantic search client for StackOne action search API.
+
+How Semantic Search Works
+=========================
+
+The SDK provides three ways to discover tools using semantic search.
+Each path trades off between speed, filtering, and completeness.
+
+1. ``search_tools(query)`` — Full tool discovery (recommended for agent frameworks)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is the primary method used when integrating with OpenAI, LangChain, or CrewAI.
+The internal flow is:
+
+1. Fetch ALL tools from linked accounts via MCP (uses account_ids to scope the request)
+2. Extract available connectors from the fetched tools (e.g. {bamboohr, hibob})
+3. Search EACH connector in parallel via the semantic search API (/actions/search)
+4. Collect results, sort by relevance score, apply top_k if specified
+5. Match semantic results back to the fetched tool definitions
+6. Return Tools sorted by relevance score
+
+Key point: only the user's own connectors are searched — no wasted results
+from connectors the user doesn't have. Tools are fetched first, semantic
+search runs second, and only tools that exist in the user's linked
+accounts AND match the semantic query are returned. This prevents
+suggesting tools the user cannot execute.
+
+If the semantic API is unavailable, the SDK falls back to a local
+BM25 + TF-IDF hybrid search over the fetched tools (unless
+``search="semantic"`` is specified).
+
+
+2. ``search_action_names(query)`` — Lightweight discovery
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Queries the semantic API directly and returns action name metadata
+(name, connector, score, description) **without** fetching full tool
+definitions. This is useful for previewing results before committing
+to a full fetch.
+
+When ``account_ids`` are provided, each connector is searched in
+parallel (same as ``search_tools``). Without ``account_ids``, results
+come from the full StackOne catalog.
+
+
+3. ``toolset.get_search_tool()`` — Agent-loop callable
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Returns a callable ``SearchTool`` that wraps ``search_tools()``.
+Call it with a natural language query to get a ``Tools`` collection
+back. Designed for agent loops where the LLM decides what to search for.
+"""
+
+from __future__ import annotations
+
+import base64
+from typing import Any
+
+import httpx
+from pydantic import BaseModel
+
+from stackone_ai.constants import DEFAULT_BASE_URL
+
+
+class SemanticSearchError(Exception):
+    """Raised when semantic search fails."""
+
+    pass
+
+
+class SemanticSearchResult(BaseModel):
+    """Single result from semantic search API."""
+
+    action_name: str
+    connector_key: str
+    similarity_score: float
+    label: str
+    description: str
+    project_id: str = "global"
+
+
+class SemanticSearchResponse(BaseModel):
+    """Response from /actions/search endpoint."""
+
+    results: list[SemanticSearchResult]
+    total_count: int
+    query: str
+    connector_filter: str | None = None
+    project_filter: str | None = None
+
+
+class SemanticSearchClient:
+    """Client for StackOne semantic search API.
+
+    This client provides access to the semantic search endpoint which uses
+    enhanced embeddings for higher accuracy than local BM25+TF-IDF search.
+
+    Example:
+        client = SemanticSearchClient(api_key="sk-xxx")
+        response = client.search("create employee", connector="bamboohr", top_k=5)
+        for result in response.results:
+            print(f"{result.action_name}: {result.similarity_score:.2f}")
+    """
+
+    def __init__(
+        self,
+        api_key: str,
+        base_url: str = DEFAULT_BASE_URL,
+        timeout: float = 30.0,
+    ) -> None:
+        """Initialize the semantic search client.
+
+        Args:
+            api_key: StackOne API key
+            base_url: Base URL for API requests
+            timeout: Request timeout in seconds
+        """
+        self.api_key = api_key
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+
+    def _build_auth_header(self) -> str:
+        """Build the Basic auth header."""
+        token = base64.b64encode(f"{self.api_key}:".encode()).decode()
+        return f"Basic {token}"
+
+    def search(
+        self,
+        query: str,
+        connector: str | None = None,
+        top_k: int | None = None,
+        project_id: str | None = None,
+        min_similarity: float | None = None,
+    ) -> SemanticSearchResponse:
+        """Search for relevant actions using semantic search.
+
+        Args:
+            query: Natural language query describing what tools/actions you need
+            connector: Optional connector/provider filter (e.g., "bamboohr", "slack")
+            top_k: Maximum number of results to return. If not provided, uses the backend default.
+            project_id: Optional project scope (e.g., "103/dev-56501"). When provided,
+                results include both global actions and project-specific actions.
+            min_similarity: Minimum similarity score threshold (0-1). If not provided,
+                the server uses its default (currently 0.4).
+
+        Returns:
+            SemanticSearchResponse containing matching actions with similarity scores
+
+        Raises:
+            SemanticSearchError: If the API call fails
+
+        Example:
+            response = client.search("onboard a new team member", top_k=5)
+            for result in response.results:
+                print(f"{result.action_name}: {result.similarity_score:.2f}")
+        """
+        url = f"{self.base_url}/actions/search"
+        headers = {
+            "Authorization": self._build_auth_header(),
+            "Content-Type": "application/json",
+        }
+        payload: dict[str, Any] = {"query": query}
+        if top_k is not None:
+            payload["top_k"] = top_k
+        if connector:
+            payload["connector"] = connector
+        if project_id:
+            payload["project_id"] = project_id
+        if min_similarity is not None:
+            payload["min_similarity"] = min_similarity
+
+        try:
+            response = httpx.post(url, json=payload, headers=headers, timeout=self.timeout)
+            response.raise_for_status()
+            data = response.json()
+            return SemanticSearchResponse(**data)
+        except httpx.HTTPStatusError as e:
+            raise SemanticSearchError(f"API error: {e.response.status_code} - {e.response.text}") from e
+        except httpx.RequestError as e:
+            raise SemanticSearchError(f"Request failed: {e}") from e
+        except Exception as e:
+            raise SemanticSearchError(f"Search failed: {e}") from e
+
+    def search_action_names(
+        self,
+        query: str,
+        connector: str | None = None,
+        top_k: int | None = None,
+        min_similarity: float | None = None,
+        project_id: str | None = None,
+    ) -> list[str]:
+        """Convenience method returning just action names.
+
+        Args:
+            query: Natural language query
+            connector: Optional connector/provider filter
+            top_k: Maximum number of results. If not provided, uses the backend default.
+            min_similarity: Minimum similarity score threshold (0-1). If not provided,
+                the server uses its default.
+            project_id: Optional project scope for multi-tenant filtering
+
+        Returns:
+            List of action names sorted by relevance
+
+        Example:
+            action_names = client.search_action_names(
+                "create employee",
+                connector="bamboohr",
+                min_similarity=0.5
+            )
+        """
+        response = self.search(query, connector, top_k, project_id, min_similarity=min_similarity)
+        return [r.action_name for r in response.results]
diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py
index 126078a..998dbc0 100644
--- a/stackone_ai/toolset.py
+++ b/stackone_ai/toolset.py
@@ -2,15 +2,18 @@
 
 import asyncio
 import base64
+import concurrent.futures
 import fnmatch
 import json
+import logging
 import os
 import threading
 from collections.abc import Coroutine
 from dataclasses import dataclass
 from importlib import metadata
-from typing import Any, TypeVar
+from typing import Any, Literal, TypedDict, TypeVar
 
+from stackone_ai.constants import DEFAULT_BASE_URL
 from stackone_ai.models import (
     ExecuteConfig,
     ParameterLocation,
@@ -18,13 +21,43 @@
     ToolParameters,
     Tools,
 )
+from stackone_ai.semantic_search import (
+    SemanticSearchClient,
+    SemanticSearchError,
+    SemanticSearchResult,
+)
+from stackone_ai.utils.normalize import _normalize_action_name
+
+logger = logging.getLogger("stackone.tools")
+
+SearchMode = Literal["auto", "semantic", "local"]
+
+
+class SearchConfig(TypedDict, total=False):
+    """Search configuration for the StackOneToolSet constructor.
+
+    When provided as a dict, sets default search options that flow through
+    to ``search_tools()``, ``get_search_tool()``, and ``search_action_names()``.
+    Per-call options override these defaults.
+
+    When set to ``None``, search is disabled entirely.
+    When omitted, defaults to ``{"method": "auto"}``.
+    """
+
+    method: SearchMode
+    """Search backend to use. Defaults to ``"auto"``."""
+    top_k: int
+    """Maximum number of tools to return."""
+    min_similarity: float
+    """Minimum similarity score threshold 0-1."""
+
+
+_SEARCH_DEFAULT: SearchConfig = {"method": "auto"}
 
 try:
     _SDK_VERSION = metadata.version("stackone-ai")
 except metadata.PackageNotFoundError:  # pragma: no cover - best-effort fallback when running from source
     _SDK_VERSION = "dev"
-
-DEFAULT_BASE_URL = "https://api.stackone.com"
 _RPC_PARAMETER_LOCATIONS = {
     "action": ParameterLocation.BODY,
     "body": ParameterLocation.BODY,
@@ -34,6 +67,7 @@
 }
 _USER_AGENT = f"stackone-ai-python/{_SDK_VERSION}"
 
+
 T = TypeVar("T")
 
 
@@ -222,6 +256,60 @@ def _build_action_headers(self, additional_headers: dict[str, Any] | None) -> di
         return headers
 
 
+class SearchTool:
+    """Callable search tool that wraps StackOneToolSet.search_tools().
+
+    Designed for agent loops — call it with a query to get Tools back.
+
+    Example::
+
+        toolset = StackOneToolSet()
+        search_tool = toolset.get_search_tool()
+        tools = search_tool("manage employee records", account_ids=["acc-123"])
+    """
+
+    def __init__(self, toolset: StackOneToolSet, config: SearchConfig | None = None) -> None:
+        self._toolset = toolset
+        self._config: SearchConfig = config or {}
+
+    def __call__(
+        self,
+        query: str,
+        *,
+        connector: str | None = None,
+        top_k: int | None = None,
+        min_similarity: float | None = None,
+        account_ids: list[str] | None = None,
+        search: SearchMode | None = None,
+    ) -> Tools:
+        """Search for tools using natural language.
+
+        Args:
+            query: Natural language description of needed functionality
+            connector: Optional provider/connector filter (e.g., "bamboohr", "slack")
+            top_k: Maximum number of tools to return. Overrides constructor default.
+            min_similarity: Minimum similarity score threshold 0-1. Overrides constructor default.
+            account_ids: Optional account IDs (uses set_accounts() if not provided)
+            search: Override the default search mode for this call
+
+        Returns:
+            Tools collection with matched tools
+        """
+        effective_top_k = top_k if top_k is not None else self._config.get("top_k")
+        effective_min_sim = (
+            min_similarity if min_similarity is not None else self._config.get("min_similarity")
+        )
+        effective_search = search if search is not None else self._config.get("method", "auto")
+        return self._toolset.search_tools(
+            query,
+            connector=connector,
+            top_k=effective_top_k,
+            min_similarity=effective_min_sim,
+            account_ids=account_ids,
+            search=effective_search,
+        )
+
+
 class StackOneToolSet:
     """Main class for accessing StackOne tools"""
 
@@ -230,6 +318,7 @@ def __init__(
         api_key: str | None = None,
         account_id: str | None = None,
         base_url: str | None = None,
+        search: SearchConfig | None = _SEARCH_DEFAULT,
     ) -> None:
         """Initialize StackOne tools with authentication
 
@@ -237,6 +326,11 @@ def __init__(
             api_key: Optional API key. If not provided, will try to get from STACKONE_API_KEY env var
             account_id: Optional account ID
             base_url: Optional base URL override for API requests
+            search: Search configuration. Controls default search behavior.
+                Omit or pass ``{}`` for defaults (method="auto").
+                Pass ``None`` to disable search.
+                Pass ``{"method": "semantic", "top_k": 5}`` for custom defaults.
+                Per-call options always override these defaults.
 
         Raises:
             ToolsetConfigError: If no API key is provided or found in environment
@@ -251,6 +345,8 @@ def __init__(
         self.account_id = account_id
         self.base_url = base_url or DEFAULT_BASE_URL
         self._account_ids: list[str] = []
+        self._semantic_client: SemanticSearchClient | None = None
+        self._search_config: SearchConfig | None = search
 
     def set_accounts(self, account_ids: list[str]) -> StackOneToolSet:
         """Set account IDs for filtering tools
@@ -264,6 +360,346 @@ def set_accounts(self, account_ids: list[str]) -> StackOneToolSet:
         self._account_ids = account_ids
         return self
 
+    def get_search_tool(self, *, search: SearchMode | None = None) -> SearchTool:
+        """Get a callable search tool that returns Tools collections.
+
+        Returns a callable that wraps :meth:`search_tools` for use in agent loops.
+        The returned tool is directly callable: ``search_tool("query")`` returns
+        :class:`Tools`.
+
+        Uses the constructor's search config as defaults. Per-call options override.
+
+        Args:
+            search: Override the default search mode. If not provided, uses
+                the constructor's search config.
+
+        Returns:
+            SearchTool instance
+
+        Example::
+
+            toolset = StackOneToolSet()
+            search_tool = toolset.get_search_tool()
+            tools = search_tool("manage employee records", account_ids=["acc-123"])
+        """
+        if self._search_config is None:
+            raise ToolsetConfigError(
+                "Search is disabled. Initialize StackOneToolSet with a search config to enable."
+            )
+
+        config: SearchConfig = {**self._search_config}
+        if search is not None:
+            config["method"] = search
+
+        return SearchTool(self, config=config)
+
+    @property
+    def semantic_client(self) -> SemanticSearchClient:
+        """Lazy initialization of semantic search client.
+
+        Returns:
+            SemanticSearchClient instance configured with the toolset's API key and base URL
+        """
+        if self._semantic_client is None:
+            self._semantic_client = SemanticSearchClient(
+                api_key=self.api_key,
+                base_url=self.base_url,
+            )
+        return self._semantic_client
+
+    def _local_search(
+        self,
+        query: str,
+        all_tools: Tools,
+        *,
+        connector: str | None = None,
+        top_k: int | None = None,
+        min_similarity: float | None = None,
+    ) -> Tools:
+        """Run local BM25+TF-IDF search over already-fetched tools."""
+        from stackone_ai.local_search import ToolIndex
+
+        available_connectors = all_tools.get_connectors()
+        if not available_connectors:
+            return Tools([])
+
+        index = ToolIndex(list(all_tools))
+        results = index.search(
+            query,
+            limit=top_k if top_k is not None else 5,
+            min_score=min_similarity if min_similarity is not None else 0.0,
+        )
+        matched_names = [r.name for r in results]
+        tool_map = {t.name: t for t in all_tools}
+        filter_connectors = {connector.lower()} if connector else available_connectors
+        matched_tools = [
+            tool_map[name]
+            for name in matched_names
+            if name in tool_map and name.split("_")[0].lower() in filter_connectors
+        ]
+        return Tools(matched_tools[:top_k] if top_k is not None else matched_tools)
+
+    def search_tools(
+        self,
+        query: str,
+        *,
+        connector: str | None = None,
+        top_k: int | None = None,
+        min_similarity: float | None = None,
+        account_ids: list[str] | None = None,
+        search: SearchMode | None = None,
+    ) -> Tools:
+        """Search for and fetch tools using semantic or local search.
+
+        This method discovers relevant tools based on natural language queries.
+        Constructor search config provides defaults; per-call args override.
+
+        Args:
+            query: Natural language description of needed functionality
+                (e.g., "create employee", "send a message")
+            connector: Optional provider/connector filter (e.g., "bamboohr", "slack")
+            top_k: Maximum number of tools to return. Overrides constructor default.
+            min_similarity: Minimum similarity score threshold 0-1. Overrides constructor default.
+            account_ids: Optional account IDs (uses set_accounts() if not provided)
+            search: Search backend to use. Overrides constructor default.
+                - ``"auto"`` (default): try semantic search first, fall back to local
+                  BM25+TF-IDF if the API is unavailable.
+                - ``"semantic"``: use only the semantic search API; raises
+                  ``SemanticSearchError`` on failure.
+                - ``"local"``: use only local BM25+TF-IDF search (no API call to the
+                  semantic search endpoint).
+
+        Returns:
+            Tools collection with matched tools from linked accounts
+
+        Raises:
+            ToolsetConfigError: If search is disabled (``search=None`` in constructor)
+            SemanticSearchError: If the API call fails and search is ``"semantic"``
+
+        Examples:
+            # Semantic search (default with local fallback)
+            tools = toolset.search_tools("manage employee records", top_k=5)
+
+            # Explicit semantic search
+            tools = toolset.search_tools("manage employees", search="semantic")
+
+            # Local BM25+TF-IDF search
+            tools = toolset.search_tools("manage employees", search="local")
+
+            # Filter by connector
+            tools = toolset.search_tools(
+                "create time off request",
+                connector="bamboohr",
+                search="semantic",
+            )
+        """
+        if self._search_config is None:
+            raise ToolsetConfigError(
+                "Search is disabled. Initialize StackOneToolSet with a search config to enable."
+            )
+
+        # Merge constructor defaults with per-call overrides
+        effective_search: SearchMode = (
+            search if search is not None else self._search_config.get("method", "auto")
+        )
+        effective_top_k = top_k if top_k is not None else self._search_config.get("top_k")
+        effective_min_sim = (
+            min_similarity if min_similarity is not None else self._search_config.get("min_similarity")
+        )
+
+        all_tools = self.fetch_tools(account_ids=account_ids)
+        available_connectors = all_tools.get_connectors()
+
+        if not available_connectors:
+            return Tools([])
+
+        # Local-only search — skip semantic API entirely
+        if effective_search == "local":
+            return self._local_search(
+                query, all_tools, connector=connector, top_k=effective_top_k, min_similarity=effective_min_sim
+            )
+
+        try:
+            # Determine which connectors to search
+            if connector:
+                connectors_to_search = {connector.lower()} & available_connectors
+                if not connectors_to_search:
+                    return Tools([])
+            else:
+                connectors_to_search = available_connectors
+
+            # Search each connector in parallel
+            def _search_one(c: str) -> list[SemanticSearchResult]:
+                resp = self.semantic_client.search(
+                    query=query, connector=c, top_k=effective_top_k, min_similarity=effective_min_sim
+                )
+                return list(resp.results)
+
+            all_results: list[SemanticSearchResult] = []
+            last_error: SemanticSearchError | None = None
+            max_workers = min(len(connectors_to_search), 10)
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
+                futures = {pool.submit(_search_one, c): c for c in connectors_to_search}
+                for future in concurrent.futures.as_completed(futures):
+                    try:
+                        all_results.extend(future.result())
+                    except SemanticSearchError as e:
+                        last_error = e
+
+            # If ALL connector searches failed, re-raise to trigger fallback
+            if not all_results and last_error is not None:
+                raise last_error
+
+            # Sort by score, apply top_k
+            all_results.sort(key=lambda r: r.similarity_score, reverse=True)
+            if effective_top_k is not None:
+                all_results = all_results[:effective_top_k]
+
+            if not all_results:
+                return Tools([])
+
+            # Match back to fetched tool definitions
+            action_names = {_normalize_action_name(r.action_name) for r in all_results}
+            matched_tools = [t for t in all_tools if t.name in action_names]
+
+            # Sort matched tools by semantic search score order
+            action_order = {_normalize_action_name(r.action_name): i for i, r in enumerate(all_results)}
+            matched_tools.sort(key=lambda t: action_order.get(t.name, float("inf")))
+
+            return Tools(matched_tools)
+
+        except SemanticSearchError as e:
+            if effective_search == "semantic":
+                raise
+
+            logger.warning("Semantic search failed (%s), falling back to local BM25+TF-IDF search", e)
+            return self._local_search(
+                query, all_tools, connector=connector, top_k=effective_top_k, min_similarity=effective_min_sim
+            )
+
+    def search_action_names(
+        self,
+        query: str,
+        *,
+        connector: str | None = None,
+        account_ids: list[str] | None = None,
+        top_k: int | None = None,
+        min_similarity: float | None = None,
+    ) -> list[SemanticSearchResult]:
+        """Search for action names without fetching tools.
+
+        Useful when you need to inspect search results before fetching,
+        or when building custom filtering logic.
+
+        Args:
+            query: Natural language description of needed functionality
+            connector: Optional provider/connector filter (single connector)
+            account_ids: Optional account IDs to scope results to connectors
+                available in those accounts (uses set_accounts() if not provided).
+                When provided, results are filtered to only matching connectors.
+            top_k: Maximum number of results. If None, uses the backend default.
+            min_similarity: Minimum similarity score threshold 0-1. If not provided,
+                the server uses its default.
+
+        Returns:
+            List of SemanticSearchResult with action names, scores, and metadata.
+            Versioned API names are normalized to MCP format but results are NOT
+            deduplicated — multiple API versions of the same action may appear
+            with their individual scores.
+
+        Examples:
+            # Lightweight: inspect results before fetching
+            results = toolset.search_action_names("manage employees")
+            for r in results:
+                print(f"{r.action_name}: {r.similarity_score:.2f}")
+
+            # Account-scoped: only results for connectors in linked accounts
+            results = toolset.search_action_names(
+                "create employee",
+                account_ids=["acc-123"],
+                top_k=5
+            )
+
+            # Then fetch specific high-scoring actions
+            selected = [r.action_name for r in results if r.similarity_score > 0.7]
+            tools = toolset.fetch_tools(actions=selected)
+        """
+        if self._search_config is None:
+            raise ToolsetConfigError(
+                "Search is disabled. Initialize StackOneToolSet with search config to enable."
+            )
+
+        # Merge constructor defaults with per-call overrides
+        effective_top_k = top_k if top_k is not None else self._search_config.get("top_k")
+        effective_min_sim = (
+            min_similarity if min_similarity is not None else self._search_config.get("min_similarity")
+        )
+
+        # Resolve available connectors from account_ids (same pattern as search_tools)
+        available_connectors: set[str] | None = None
+        effective_account_ids = account_ids or self._account_ids
+        if effective_account_ids:
+            all_tools = self.fetch_tools(account_ids=effective_account_ids)
+            available_connectors = all_tools.get_connectors()
+            if not available_connectors:
+                return []
+
+        try:
+            if available_connectors:
+                # Parallel per-connector search (only user's connectors)
+                if connector:
+                    connectors_to_search = {connector.lower()} & available_connectors
+                else:
+                    connectors_to_search = available_connectors
+
+                def _search_one(c: str) -> list[SemanticSearchResult]:
+                    try:
+                        resp = self.semantic_client.search(
+                            query=query,
+                            connector=c,
+                            top_k=effective_top_k,
+                            min_similarity=effective_min_sim,
+                        )
+                        return list(resp.results)
+                    except SemanticSearchError:
+                        return []
+
+                all_results: list[SemanticSearchResult] = []
+                if connectors_to_search:
+                    max_workers = min(len(connectors_to_search), 10)
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
+                        futures = [pool.submit(_search_one, c) for c in connectors_to_search]
+                        for future in concurrent.futures.as_completed(futures):
+                            all_results.extend(future.result())
+            else:
+                # No account filtering — single global search
+                response = self.semantic_client.search(
+                    query=query,
+                    connector=connector,
+                    top_k=effective_top_k,
+                    min_similarity=effective_min_sim,
+                )
+                all_results = list(response.results)
+
+        except SemanticSearchError as e:
+            logger.warning("Semantic search failed: %s", e)
+            return []
+
+        # Sort by score, normalize action names
+        all_results.sort(key=lambda r: r.similarity_score, reverse=True)
+        normalized: list[SemanticSearchResult] = []
+        for r in all_results:
+            normalized.append(
+                SemanticSearchResult(
+                    action_name=_normalize_action_name(r.action_name),
+                    connector_key=r.connector_key,
+                    similarity_score=r.similarity_score,
+                    label=r.label,
+                    description=r.description,
+                )
+            )
+        return normalized[:effective_top_k] if effective_top_k is not None else normalized
+
     def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool:
         """Check if a tool name matches any of the provider filters
 
diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py
deleted file mode 100644
index 0d9a209..0000000
--- a/stackone_ai/utility_tools.py
+++ /dev/null
@@ -1,347 +0,0 @@
-"""Utility tools for dynamic tool discovery and execution"""
-
-from __future__ import annotations
-
-import json
-from typing import TYPE_CHECKING
-
-import bm25s
-import numpy as np
-from pydantic import BaseModel
-
-from stackone_ai.constants import DEFAULT_HYBRID_ALPHA
-from stackone_ai.models import ExecuteConfig, JsonDict, StackOneTool, ToolParameters
-from stackone_ai.utils.tfidf_index import TfidfDocument, TfidfIndex
-
-if TYPE_CHECKING:
-    from stackone_ai.models import Tools
-
-
-class ToolSearchResult(BaseModel):
-    """Result from tool_search"""
-
-    name: str
-    description: str
-    score: float
-
-
-class ToolIndex:
-    """Hybrid BM25 + TF-IDF tool search index"""
-
-    def __init__(self, tools: list[StackOneTool], hybrid_alpha: float | None = None) -> None:
-        """Initialize tool index with hybrid search
-
-        Args:
-            tools: List of tools to index
-            hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided,
-                uses DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring
-                and has been shown to provide better tool discovery accuracy
-                (10.8% improvement in validation testing).
-        """
-        self.tools = tools
-        self.tool_map = {tool.name: tool for tool in tools}
-        # Use default if not provided, then clamp to [0, 1]
-        alpha = hybrid_alpha if hybrid_alpha is not None else DEFAULT_HYBRID_ALPHA
-        self.hybrid_alpha = max(0.0, min(1.0, alpha))
-
-        # Prepare corpus for both BM25 and TF-IDF
-        corpus = []
-        tfidf_docs = []
-        self.tool_names = []
-
-        for tool in tools:
-            # Extract category and action from tool name
-            parts = tool.name.split("_")
-            category = parts[0] if parts else ""
-
-            # Extract action types
-            action_types = ["create", "update", "delete", "get", "list", "search"]
-            actions = [p for p in parts if p in action_types]
-
-            # Combine name, description, category and tags for indexing
-            # For TF-IDF: use weighted approach similar to Node.js
-            tfidf_text = " ".join(
-                [
-                    f"{tool.name} {tool.name} {tool.name}",  # boost name
-                    f"{category} {' '.join(actions)}",
-                    tool.description,
-                    " ".join(parts),
-                ]
-            )
-
-            # For BM25: simpler approach
-            bm25_text = " ".join(
-                [
-                    tool.name,
-                    tool.description,
-                    category,
-                    " ".join(parts),
-                    " ".join(actions),
-                ]
-            )
-
-            corpus.append(bm25_text)
-            tfidf_docs.append(TfidfDocument(id=tool.name, text=tfidf_text))
-            self.tool_names.append(tool.name)
-
-        # Create BM25 index
-        self.bm25_retriever = bm25s.BM25()
-        corpus_tokens = bm25s.tokenize(corpus, stemmer=None, show_progress=False)  # ty: ignore[invalid-argument-type]
-        self.bm25_retriever.index(corpus_tokens)
-
-        # Create TF-IDF index
-        self.tfidf_index = TfidfIndex()
-        self.tfidf_index.build(tfidf_docs)
-
-    def search(self, query: str, limit: int = 5, min_score: float = 0.0) -> list[ToolSearchResult]:
-        """Search for relevant tools using hybrid BM25 + TF-IDF
-
-        Args:
-            query: Natural language query
-            limit: Maximum number of results
-            min_score: Minimum relevance score (0-1)
-
-        Returns:
-            List of search results sorted by relevance
-        """
-        # Get more results initially to have better candidate pool for fusion
-        fetch_limit = max(50, limit)
-
-        # Tokenize query for BM25
-        query_tokens = bm25s.tokenize([query], stemmer=None, show_progress=False)  # ty: ignore[invalid-argument-type]
-
-        # Search with BM25
-        bm25_results, bm25_scores = self.bm25_retriever.retrieve(
-            query_tokens, k=min(fetch_limit, len(self.tools))
-        )
-
-        # Search with TF-IDF
-        tfidf_results = self.tfidf_index.search(query, k=min(fetch_limit, len(self.tools)))
-
-        # Build score map for fusion
-        score_map: dict[str, dict[str, float]] = {}
-
-        # Add BM25 scores
-        for idx, score in zip(bm25_results[0], bm25_scores[0], strict=True):
-            tool_name = self.tool_names[idx]
-            # Normalize BM25 score to 0-1 range
-            normalized_score = float(1 / (1 + np.exp(-score / 10)))
-            # Clamp to [0, 1]
-            clamped_score = max(0.0, min(1.0, normalized_score))
-            score_map[tool_name] = {"bm25": clamped_score}
-
-        # Add TF-IDF scores
-        for result in tfidf_results:
-            if result.id not in score_map:
-                score_map[result.id] = {}
-            score_map[result.id]["tfidf"] = result.score
-
-        # Fuse scores: hybrid_score = alpha * bm25 + (1 - alpha) * tfidf
-        fused_results: list[tuple[str, float]] = []
-        for tool_name, scores in score_map.items():
-            bm25_score = scores.get("bm25", 0.0)
-            tfidf_score = scores.get("tfidf", 0.0)
-            hybrid_score = self.hybrid_alpha * bm25_score + (1 - self.hybrid_alpha) * tfidf_score
-            fused_results.append((tool_name, hybrid_score))
-
-        # Sort by score descending
-        fused_results.sort(key=lambda x: x[1], reverse=True)
-
-        # Build final results
-        search_results = []
-        for tool_name, score in fused_results:
-            if score < min_score:
-                continue
-
-            tool = self.tool_map.get(tool_name)
-            if tool is None:
-                continue
-
-            search_results.append(
-                ToolSearchResult(
-                    name=tool.name,
-                    description=tool.description,
-                    score=score,
-                )
-            )
-
-            if len(search_results) >= limit:
-                break
-
-        return search_results
-
-
-def create_tool_search(index: ToolIndex) -> StackOneTool:
-    """Create the tool_search tool
-
-    Args:
-        index: Tool search index
-
-    Returns:
-        Utility tool for searching relevant tools
-    """
-    name = "tool_search"
-    description = (
-        f"Searches for relevant tools based on a natural language query using hybrid BM25 + TF-IDF search "
-        f"(alpha={index.hybrid_alpha}). This tool should be called first to discover available tools "
-        f"before executing them."
-    )
-
-    parameters = ToolParameters(
-        type="object",
-        properties={
-            "query": {
-                "type": "string",
-                "description": (
-                    "Natural language query describing what tools you need "
-                    '(e.g., "tools for managing employees", "create time off request")'
-                ),
-            },
-            "limit": {
-                "type": "number",
-                "description": "Maximum number of tools to return (default: 5)",
-                "default": 5,
-            },
-            "minScore": {
-                "type": "number",
-                "description": "Minimum relevance score (0-1) to filter results (default: 0.0)",
-                "default": 0.0,
-            },
-        },
-    )
-
-    def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict:
-        """Execute the filter tool"""
-        # Parse arguments
-        if isinstance(arguments, str):
-            kwargs = json.loads(arguments)
-        else:
-            kwargs = arguments or {}
-
-        query = kwargs.get("query", "")
-        limit = int(kwargs.get("limit", 5))
-        min_score = float(kwargs.get("minScore", 0.0))
-
-        # Search for tools
-        results = index.search(query, limit, min_score)
-
-        # Format results
-        tools_data = [
-            {
-                "name": r.name,
-                "description": r.description,
-                "score": r.score,
-            }
-            for r in results
-        ]
-
-        return {"tools": tools_data}
-
-    # Create execute config for the meta tool
-    execute_config = ExecuteConfig(
-        name=name,
-        method="POST",
-        url="",  # Utility tools don't make HTTP requests
-        headers={},
-    )
-
-    # Create a wrapper class that delegates execute to our custom function
-    class ToolSearchTool(StackOneTool):
-        """Utility tool for searching relevant tools"""
-
-        def __init__(self) -> None:
-            super().__init__(
-                description=description,
-                parameters=parameters,
-                _execute_config=execute_config,
-                _api_key="",  # Utility tools don't need API key
-                _account_id=None,
-            )
-
-        def execute(
-            self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None
-        ) -> JsonDict:
-            return execute_filter(arguments)
-
-    return ToolSearchTool()
-
-
-def create_tool_execute(tools_collection: Tools) -> StackOneTool:
-    """Create the tool_execute tool
-
-    Args:
-        tools_collection: Collection of tools to execute from
-
-    Returns:
-        Utility tool for executing discovered tools
-    """
-    name = "tool_execute"
-    description = (
-        "Executes a tool by name with the provided parameters. "
-        "Use this after discovering tools with tool_search."
-    )
-
-    parameters = ToolParameters(
-        type="object",
-        properties={
-            "toolName": {
-                "type": "string",
-                "description": "Name of the tool to execute",
-            },
-            "params": {
-                "type": "object",
-                "description": "Parameters to pass to the tool",
-                "additionalProperties": True,
-            },
-        },
-    )
-
-    def execute_tool(arguments: str | JsonDict | None = None) -> JsonDict:
-        """Execute the meta execute tool"""
-        # Parse arguments
-        if isinstance(arguments, str):
-            kwargs = json.loads(arguments)
-        else:
-            kwargs = arguments or {}
-
-        tool_name = kwargs.get("toolName")
-        params = kwargs.get("params", {})
-
-        if not tool_name:
-            raise ValueError("toolName is required")
-
-        # Get the tool
-        tool = tools_collection.get_tool(tool_name)
-        if not tool:
-            raise ValueError(f"Tool '{tool_name}' not found")
-
-        # Execute the tool
-        return tool.execute(params)
-
-    # Create execute config for the meta tool
-    execute_config = ExecuteConfig(
-        name=name,
-        method="POST",
-        url="",  # Utility tools don't make HTTP requests
-        headers={},
-    )
-
-    # Create a wrapper class that delegates execute to our custom function
-    class ToolExecuteTool(StackOneTool):
-        """Utility tool for executing discovered tools"""
-
-        def __init__(self) -> None:
-            super().__init__(
-                description=description,
-                parameters=parameters,
-                _execute_config=execute_config,
-                _api_key="",  # Utility tools don't need API key
-                _account_id=None,
-            )
-
-        def execute(
-            self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None
-        ) -> JsonDict:
-            return execute_tool(arguments)
-
-    return ToolExecuteTool()
diff --git a/stackone_ai/utils/normalize.py b/stackone_ai/utils/normalize.py
new file mode 100644
index 0000000..e6ff0d8
--- /dev/null
+++ b/stackone_ai/utils/normalize.py
@@ -0,0 +1,17 @@
+"""Action name normalization utilities."""
+
+from __future__ import annotations
+
+import re
+
+_VERSIONED_ACTION_RE = re.compile(r"^[a-z][a-z0-9]*_\d+(?:\.\d+)+_(.+)_global$")
+
+
+def _normalize_action_name(action_name: str) -> str:
+    """Convert semantic search API action name to MCP tool name.
+
+    API:  'calendly_1.0.0_calendly_create_scheduling_link_global'
+    MCP:  'calendly_create_scheduling_link'
+    """
+    match = _VERSIONED_ACTION_RE.match(action_name)
+    return match.group(1) if match else action_name
diff --git a/tests/conftest.py b/tests/conftest.py
index 56459bb..b25b4f8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,6 +11,12 @@
 
 import pytest
 
+# Test base URL - used instead of production URLs in all test mocks.
+# Since respx intercepts at the HTTP client level before DNS resolution,
+# any URL string works for matching; http://localhost avoids exposing
+# real infrastructure URLs.
+TEST_BASE_URL = "http://localhost"
+
 
 def _find_free_port() -> int:
     """Find a free port on localhost."""
@@ -57,8 +63,8 @@ def test_mcp_integration(mcp_mock_server):
     if not serve_script.exists():
         pytest.skip("MCP mock server script not found at tests/mocks/serve.ts")
 
-    if not vendor_dir.exists():
-        pytest.skip("stackone-ai-node submodule not found. Run 'git submodule update --init'")
+    if not (vendor_dir / "package.json").exists():
+        pytest.skip("stackone-ai-node submodule not initialized. Run 'git submodule update --init'")
 
     # find port
     port = _find_free_port()
@@ -79,10 +85,16 @@ def test_mcp_integration(mcp_mock_server):
     try:
         # Wait for server to start
         if not _wait_for_server("localhost", port, timeout=30.0):
-            stdout, stderr = process.communicate(timeout=5)
-            raise RuntimeError(
-                f"MCP mock server failed to start:\nstdout: {stdout.decode()}\nstderr: {stderr.decode()}"
-            )
+            try:
+                stdout, stderr = process.communicate(timeout=5)
+                msg = (
+                    f"MCP mock server failed to start:\nstdout: {stdout.decode()}\nstderr: {stderr.decode()}"
+                )
+            except subprocess.TimeoutExpired:
+                process.kill()
+                stdout, stderr = process.communicate()
+                msg = f"MCP mock server timed out:\nstdout: {stdout.decode()}\nstderr: {stderr.decode()}"
+            raise RuntimeError(msg)
 
         yield base_url
 
diff --git a/tests/test_feedback.py b/tests/test_feedback.py
index ab3132a..79ba51e 100644
--- a/tests/test_feedback.py
+++ b/tests/test_feedback.py
@@ -12,8 +12,10 @@
 from hypothesis import given, settings
 from hypothesis import strategies as st
 
+from stackone_ai.constants import DEFAULT_BASE_URL
 from stackone_ai.feedback import create_feedback_tool
 from stackone_ai.models import StackOneError
+from tests.conftest import TEST_BASE_URL
 
 # Hypothesis strategies for PBT
 # Various whitespace characters including Unicode
@@ -48,7 +50,7 @@ class TestFeedbackToolValidation:
 
     def test_missing_required_fields(self) -> None:
         """Test validation errors for missing required fields."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="account_id"):
             tool.execute({"feedback": "Great tools!", "tool_names": ["test_tool"]})
@@ -61,7 +63,7 @@ def test_missing_required_fields(self) -> None:
 
     def test_empty_and_whitespace_validation(self) -> None:
         """Test validation for empty and whitespace-only strings."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="non-empty"):
             tool.execute({"feedback": "   ", "account_id": "acc_123456", "tool_names": ["test_tool"]})
@@ -77,7 +79,7 @@ def test_empty_and_whitespace_validation(self) -> None:
 
     def test_multiple_account_ids_validation(self) -> None:
         """Test validation with multiple account IDs."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="At least one account ID is required"):
             tool.execute({"feedback": "Great tools!", "account_id": [], "tool_names": ["test_tool"]})
@@ -87,7 +89,7 @@ def test_multiple_account_ids_validation(self) -> None:
 
     def test_invalid_account_id_type(self) -> None:
         """Test validation with invalid account ID type (not string or list)."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         # Pydantic validates input types before our custom validator runs
         with pytest.raises(StackOneError, match="(account_id|Input should be a valid)"):
@@ -100,7 +102,7 @@ def test_invalid_account_id_type(self) -> None:
 
     def test_invalid_json_input(self) -> None:
         """Test that invalid JSON input raises appropriate error."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="Invalid JSON"):
             tool.execute("not valid json {}")
@@ -112,7 +114,7 @@ def test_invalid_json_input(self) -> None:
     @settings(max_examples=50)
     def test_whitespace_feedback_validation_pbt(self, whitespace: str) -> None:
         """PBT: Test validation for various whitespace patterns in feedback."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="non-empty"):
             tool.execute({"feedback": whitespace, "account_id": "acc_123456", "tool_names": ["test_tool"]})
@@ -121,7 +123,7 @@ def test_whitespace_feedback_validation_pbt(self, whitespace: str) -> None:
     @settings(max_examples=50)
     def test_whitespace_account_id_validation_pbt(self, whitespace: str) -> None:
         """PBT: Test validation for various whitespace patterns in account_id."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="non-empty"):
             tool.execute({"feedback": "Great!", "account_id": whitespace, "tool_names": ["test_tool"]})
@@ -130,7 +132,7 @@ def test_whitespace_account_id_validation_pbt(self, whitespace: str) -> None:
     @settings(max_examples=50)
     def test_whitespace_tool_names_validation_pbt(self, whitespace_list: list[str]) -> None:
         """PBT: Test validation for lists containing only whitespace tool names."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="At least one tool name"):
             tool.execute({"feedback": "Great!", "account_id": "acc_123456", "tool_names": whitespace_list})
@@ -141,7 +143,7 @@ def test_whitespace_tool_names_validation_pbt(self, whitespace_list: list[str])
     @settings(max_examples=50)
     def test_whitespace_account_ids_list_validation_pbt(self, whitespace_list: list[str]) -> None:
         """PBT: Test validation for lists containing only whitespace account IDs."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="At least one valid account ID is required"):
             tool.execute(
@@ -156,7 +158,7 @@ def test_whitespace_account_ids_list_validation_pbt(self, whitespace_list: list[
     @settings(max_examples=50)
     def test_invalid_json_input_pbt(self, invalid_json: str) -> None:
         """PBT: Test that various invalid JSON inputs raise appropriate error."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         with pytest.raises(StackOneError, match="Invalid JSON"):
             tool.execute(invalid_json)
@@ -164,9 +166,9 @@ def test_invalid_json_input_pbt(self, invalid_json: str) -> None:
     @respx.mock
     def test_json_string_input(self) -> None:
         """Test that JSON string input is properly parsed."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
-        route = respx.post("https://api.stackone.com/ai/tool-feedback").mock(
+        route = respx.post(f"{TEST_BASE_URL}/ai/tool-feedback").mock(
             return_value=httpx.Response(200, json={"message": "Success"})
         )
 
@@ -185,10 +187,10 @@ class TestFeedbackToolExecution:
     @respx.mock
     def test_single_account_execution(self) -> None:
         """Test execution with single account ID."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
         api_response = {"message": "Feedback successfully stored", "trace_id": "test-trace-id"}
 
-        route = respx.post("https://api.stackone.com/ai/tool-feedback").mock(
+        route = respx.post(f"{TEST_BASE_URL}/ai/tool-feedback").mock(
             return_value=httpx.Response(200, json=api_response)
         )
 
@@ -213,10 +215,10 @@ def test_single_account_execution(self) -> None:
     @respx.mock
     def test_call_method_interface(self) -> None:
         """Test that the .call() method works correctly."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
         api_response = {"message": "Success", "trace_id": "test-trace-id"}
 
-        route = respx.post("https://api.stackone.com/ai/tool-feedback").mock(
+        route = respx.post(f"{TEST_BASE_URL}/ai/tool-feedback").mock(
             return_value=httpx.Response(200, json=api_response)
         )
 
@@ -234,9 +236,9 @@ def test_call_method_interface(self) -> None:
     @respx.mock
     def test_api_error_handling(self) -> None:
         """Test that API errors are handled properly."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
-        route = respx.post("https://api.stackone.com/ai/tool-feedback").mock(
+        route = respx.post(f"{TEST_BASE_URL}/ai/tool-feedback").mock(
             return_value=httpx.Response(401, json={"error": "Unauthorized"})
         )
 
@@ -255,11 +257,11 @@ def test_api_error_handling(self) -> None:
     @respx.mock
     def test_multiple_account_ids_execution(self) -> None:
         """Test execution with multiple account IDs - both success and mixed scenarios."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
         api_response = {"message": "Feedback successfully stored", "trace_id": "test-trace-id"}
 
         # Test all successful case
-        route = respx.post("https://api.stackone.com/ai/tool-feedback").mock(
+        route = respx.post(f"{TEST_BASE_URL}/ai/tool-feedback").mock(
             return_value=httpx.Response(200, json=api_response)
         )
 
@@ -302,7 +304,7 @@ def test_multiple_account_ids_execution(self) -> None:
     @respx.mock
     def test_multiple_account_ids_mixed_success(self) -> None:
         """Test execution with multiple account IDs - mixed success and error."""
-        tool = create_feedback_tool(api_key="test_key")
+        tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         def custom_side_effect(request: httpx.Request) -> httpx.Response:
             body = json.loads(request.content)
@@ -312,7 +314,7 @@ def custom_side_effect(request: httpx.Request) -> httpx.Response:
             else:
                 return httpx.Response(401, json={"error": "Unauthorized"})
 
-        route = respx.post("https://api.stackone.com/ai/tool-feedback").mock(side_effect=custom_side_effect)
+        route = respx.post(f"{TEST_BASE_URL}/ai/tool-feedback").mock(side_effect=custom_side_effect)
 
         result = tool.execute(
             {
@@ -338,7 +340,7 @@ def custom_side_effect(request: httpx.Request) -> httpx.Response:
                     "status": "error",
                     "error": (
                         "Client error '401 Unauthorized' for url "
-                        "'https://api.stackone.com/ai/tool-feedback'\n"
+                        f"'{TEST_BASE_URL}/ai/tool-feedback'\n"
                         "For more information check: "
                         "https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401"
                     ),
@@ -351,7 +353,7 @@ def custom_side_effect(request: httpx.Request) -> httpx.Response:
 
     def test_tool_integration(self) -> None:
         """Test that feedback tool integrates properly with toolset."""
-        feedback_tool = create_feedback_tool(api_key="test_key")
+        feedback_tool = create_feedback_tool(api_key="test_key", base_url=TEST_BASE_URL)
 
         assert feedback_tool is not None
         assert feedback_tool.name == "tool_feedback"
@@ -376,7 +378,7 @@ def test_live_feedback_submission() -> None:
     if not api_key:
         pytest.skip("STACKONE_API_KEY env var required for live feedback test")
 
-    base_url = os.getenv("STACKONE_BASE_URL", "https://api.stackone.com")
+    base_url = os.getenv("STACKONE_BASE_URL", DEFAULT_BASE_URL)
 
     feedback_tool = create_feedback_tool(api_key=api_key, base_url=base_url)
     assert feedback_tool is not None, "Feedback tool must be available"
diff --git a/tests/test_utility_tools.py b/tests/test_local_search.py
similarity index 52%
rename from tests/test_utility_tools.py
rename to tests/test_local_search.py
index c8b6c97..32e09e7 100644
--- a/tests/test_utility_tools.py
+++ b/tests/test_local_search.py
@@ -1,18 +1,12 @@
 """Tests for utility tools functionality"""
 
-import httpx
 import pytest
-import respx
 from hypothesis import given, settings
 from hypothesis import strategies as st
 
 from stackone_ai import StackOneTool, Tools
+from stackone_ai.local_search import ToolIndex
 from stackone_ai.models import ExecuteConfig, ToolParameters
-from stackone_ai.utility_tools import (
-    ToolIndex,
-    create_tool_execute,
-    create_tool_search,
-)
 
 # Hypothesis strategies for PBT
 # Score threshold strategy
@@ -89,63 +83,7 @@ def _create_sample_tools() -> list[StackOneTool]:
 @pytest.fixture
 def sample_tools():
     """Create sample tools for testing"""
-    tools = []
-
-    # Create HiBob tools
-    for action in ["create", "list", "update", "delete"]:
-        for entity in ["employee", "department", "timeoff"]:
-            tool_name = f"hibob_{action}_{entity}"
-            execute_config = ExecuteConfig(
-                name=tool_name,
-                method="POST" if action in ["create", "update"] else "GET",
-                url=f"https://api.example.com/hibob/{entity}",
-                headers={},
-            )
-
-            parameters = ToolParameters(
-                type="object",
-                properties={
-                    "id": {"type": "string", "description": "Entity ID"},
-                    "data": {"type": "object", "description": "Entity data"},
-                },
-            )
-
-            tool = StackOneTool(
-                description=f"{action.capitalize()} {entity} in HiBob system",
-                parameters=parameters,
-                _execute_config=execute_config,
-                _api_key="test_key",
-            )
-            tools.append(tool)
-
-    # Create BambooHR tools
-    for action in ["create", "list", "search"]:
-        for entity in ["candidate", "job", "application"]:
-            tool_name = f"bamboohr_{action}_{entity}"
-            execute_config = ExecuteConfig(
-                name=tool_name,
-                method="POST" if action == "create" else "GET",
-                url=f"https://api.example.com/bamboohr/{entity}",
-                headers={},
-            )
-
-            parameters = ToolParameters(
-                type="object",
-                properties={
-                    "query": {"type": "string", "description": "Search query"},
-                    "filters": {"type": "object", "description": "Filter criteria"},
-                },
-            )
-
-            tool = StackOneTool(
-                description=f"{action.capitalize()} {entity} in BambooHR system",
-                parameters=parameters,
-                _execute_config=execute_config,
-                _api_key="test_key",
-            )
-            tools.append(tool)
-
-    return tools
+    return _create_sample_tools()
 
 
 @pytest.fixture
@@ -236,166 +174,6 @@ def test_search_limit_pbt(self, limit: int):
         assert len(results) <= len(tools)
 
 
-class TestToolSearch:
-    """Test the tool_search functionality"""
-
-    def test_filter_tool_creation(self, sample_tools):
-        """Test creating the filter tool"""
-        index = ToolIndex(sample_tools)
-        filter_tool = create_tool_search(index)
-
-        assert filter_tool.name == "tool_search"
-        assert "natural language query" in filter_tool.description.lower()
-
-    def test_filter_tool_execute_with_json_string(self, sample_tools):
-        """Test executing the filter tool with JSON string input."""
-        import json
-
-        index = ToolIndex(sample_tools)
-        filter_tool = create_tool_search(index)
-
-        # Execute with JSON string
-        json_input = json.dumps({"query": "employee", "limit": 2, "minScore": 0.0})
-        result = filter_tool.execute(json_input)
-
-        assert "tools" in result
-        assert isinstance(result["tools"], list)
-        assert len(result["tools"]) <= 2
-
-    def test_filter_tool_execute(self, sample_tools):
-        """Test executing the filter tool"""
-        index = ToolIndex(sample_tools)
-        filter_tool = create_tool_search(index)
-
-        # Execute with a query
-        result = filter_tool.execute(
-            {
-                "query": "manage employees",
-                "limit": 3,
-                "minScore": 0.0,
-            }
-        )
-
-        assert "tools" in result
-        assert isinstance(result["tools"], list)
-        assert len(result["tools"]) <= 3
-
-        # Check tool structure
-        if result["tools"]:
-            tool = result["tools"][0]
-            assert "name" in tool
-            assert "description" in tool
-            assert "score" in tool
-
-    def test_filter_tool_call(self, sample_tools):
-        """Test calling the filter tool with call method"""
-        index = ToolIndex(sample_tools)
-        filter_tool = create_tool_search(index)
-
-        # Call with kwargs
-        result = filter_tool.call(query="candidate", limit=2)
-
-        assert "tools" in result
-        assert len(result["tools"]) <= 2
-
-
-class TestToolExecute:
-    """Test the tool_execute functionality"""
-
-    def test_execute_tool_creation(self, tools_collection):
-        """Test creating the execute tool"""
-        execute_tool = create_tool_execute(tools_collection)
-
-        assert execute_tool.name == "tool_execute"
-        assert "executes a tool" in execute_tool.description.lower()
-
-    def test_execute_tool_missing_name(self, tools_collection):
-        """Test execute tool with missing tool name"""
-        execute_tool = create_tool_execute(tools_collection)
-
-        with pytest.raises(ValueError, match="toolName is required"):
-            execute_tool.execute({"params": {}})
-
-    def test_execute_tool_with_json_string(self, tools_collection):
-        """Test execute tool with JSON string input."""
-        import json
-
-        execute_tool = create_tool_execute(tools_collection)
-
-        # Execute with JSON string - should raise ValueError for invalid tool
-        json_input = json.dumps({"toolName": "nonexistent_tool", "params": {}})
-        with pytest.raises(ValueError, match="Tool 'nonexistent_tool' not found"):
-            execute_tool.execute(json_input)
-
-    def test_execute_tool_invalid_name(self, tools_collection):
-        """Test execute tool with invalid tool name"""
-        execute_tool = create_tool_execute(tools_collection)
-
-        with pytest.raises(ValueError, match="Tool 'invalid_tool' not found"):
-            execute_tool.execute(
-                {
-                    "toolName": "invalid_tool",
-                    "params": {},
-                }
-            )
-
-    @respx.mock
-    def test_execute_tool_call(self, tools_collection):
-        """Test calling the execute tool with call method"""
-        execute_tool = create_tool_execute(tools_collection)
-
-        # Mock the actual tool execution
-        route = respx.get("https://api.example.com/hibob/employee").mock(
-            return_value=httpx.Response(200, json={"success": True, "employees": []})
-        )
-
-        # Call the tool_execute tool
-        result = execute_tool.call(toolName="hibob_list_employee", params={"limit": 10})
-
-        assert result == {"success": True, "employees": []}
-        assert route.called
-        assert route.calls[0].response.status_code == 200
-
-
-class TestToolsUtilityTools:
-    """Test the utility_tools method on Tools collection"""
-
-    def test_utility_tools_creation(self, tools_collection):
-        """Test creating utility tools from a Tools collection"""
-        utility_tools = tools_collection.utility_tools()
-
-        assert isinstance(utility_tools, Tools)
-        assert len(utility_tools) == 2
-
-        # Check tool names
-        tool_names = [tool.name for tool in utility_tools.tools]
-        assert "tool_search" in tool_names
-        assert "tool_execute" in tool_names
-
-    def test_utility_tools_functionality(self, tools_collection):
-        """Test that utility tools work correctly"""
-        utility_tools = tools_collection.utility_tools()
-
-        # Get the filter tool
-        filter_tool = utility_tools.get_tool("tool_search")
-        assert filter_tool is not None
-
-        # Search for tools
-        result = filter_tool.execute(
-            {
-                "query": "create employee",
-                "limit": 1,
-            }
-        )
-
-        assert "tools" in result
-        assert len(result["tools"]) > 0
-
-        # The top result should be related to creating employees
-        top_tool = result["tools"][0]
-        assert "employee" in top_tool["name"].lower() or "create" in top_tool["name"].lower()
-
-
 class TestHybridSearch:
     """Test hybrid search functionality"""
 
@@ -474,21 +252,3 @@ def test_hybrid_search_with_different_alphas(self, sample_tools):
         assert any("employee" in r.name and "create" in r.name for r in results_balanced), (
             f"Balanced results: {[r.name for r in results_balanced]}"
         )
-
-    def test_utility_tools_with_custom_alpha(self, sample_tools):
-        """Test that utility_tools() accepts hybrid_alpha parameter"""
-        tools_collection = Tools(sample_tools)
-
-        # Create utility tools with custom alpha
-        utility_tools = tools_collection.utility_tools(hybrid_alpha=0.3)
-
-        filter_tool = utility_tools.get_tool("tool_search")
-        assert filter_tool is not None
-
-        # Check that description mentions the alpha value
-        assert "alpha=0.3" in filter_tool.description
-
-        # Test it works
-        result = filter_tool.execute({"query": "list employees", "limit": 3})
-        assert "tools" in result
-        assert len(result["tools"]) > 0
diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py
new file mode 100644
index 0000000..13bef94
--- /dev/null
+++ b/tests/test_semantic_search.py
@@ -0,0 +1,1014 @@
+"""Tests for semantic search client and integration."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+from stackone_ai.constants import DEFAULT_BASE_URL
+from stackone_ai.semantic_search import (
+    SemanticSearchClient,
+    SemanticSearchError,
+    SemanticSearchResponse,
+    SemanticSearchResult,
+)
+
+
+class TestSemanticSearchResult:
+    """Tests for SemanticSearchResult model."""
+
+    def test_create_result(self) -> None:
+        """Test creating a search result."""
+        result = SemanticSearchResult(
+            action_name="bamboohr_create_employee",
+            connector_key="bamboohr",
+            similarity_score=0.92,
+            label="Create Employee",
+            description="Creates a new employee in BambooHR",
+        )
+
+        assert result.action_name == "bamboohr_create_employee"
+        assert result.connector_key == "bamboohr"
+        assert result.similarity_score == 0.92
+        assert result.label == "Create Employee"
+        assert result.description == "Creates a new employee in BambooHR"
+
+
+class TestSemanticSearchResponse:
+    """Tests for SemanticSearchResponse model."""
+
+    def test_create_response(self) -> None:
+        """Test creating a search response."""
+        results = [
+            SemanticSearchResult(
+                action_name="bamboohr_create_employee",
+                connector_key="bamboohr",
+                similarity_score=0.92,
+                label="Create Employee",
+                description="Creates a new employee",
+            ),
+            SemanticSearchResult(
+                action_name="hibob_create_employee",
+                connector_key="hibob",
+                similarity_score=0.85,
+                label="Create Employee",
+                description="Creates a new employee",
+            ),
+        ]
+        response = SemanticSearchResponse(
+            results=results,
+            total_count=2,
+            query="create employee",
+        )
+
+        assert len(response.results) == 2
+        assert response.total_count == 2
+        assert response.query == "create employee"
+
+
+class TestSemanticSearchClient:
+    """Tests for SemanticSearchClient."""
+
+    def test_init(self) -> None:
+        """Test client initialization."""
+        client = SemanticSearchClient(api_key="test-key")
+
+        assert client.api_key == "test-key"
+        assert client.base_url == DEFAULT_BASE_URL
+        assert client.timeout == 30.0
+
+    def test_init_custom_base_url(self) -> None:
+        """Test client initialization with custom base URL."""
+        client = SemanticSearchClient(
+            api_key="test-key",
+            base_url="https://custom.api.com/",
+        )
+
+        assert client.base_url == "https://custom.api.com"  # Trailing slash stripped
+
+    def test_build_auth_header(self) -> None:
+        """Test building the authorization header."""
+        client = SemanticSearchClient(api_key="test-key")
+        header = client._build_auth_header()
+
+        # test-key: encoded in base64 = dGVzdC1rZXk6
+        assert header == "Basic dGVzdC1rZXk6"
+
+    @patch("httpx.post")
+    def test_search_success(self, mock_post: MagicMock) -> None:
+        """Test successful search request."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "results": [
+                {
+                    "action_name": "bamboohr_create_employee",
+                    "connector_key": "bamboohr",
+                    "similarity_score": 0.92,
+                    "label": "Create Employee",
+                    "description": "Creates a new employee",
+                }
+            ],
+            "total_count": 1,
+            "query": "create employee",
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_post.return_value = mock_response
+
+        client = SemanticSearchClient(api_key="test-key")
+        response = client.search("create employee", top_k=5)
+
+        assert len(response.results) == 1
+        assert response.results[0].action_name == "bamboohr_create_employee"
+        assert response.total_count == 1
+        assert response.query == "create employee"
+
+        # Verify request was made correctly
+        mock_post.assert_called_once()
+        call_kwargs = mock_post.call_args
+        assert call_kwargs.kwargs["json"] == {"query": "create employee", "top_k": 5}
+        assert "Authorization" in call_kwargs.kwargs["headers"]
+
+    @patch("httpx.post")
+    def test_search_with_connector(self, mock_post: MagicMock) -> None:
+        """Test search with connector filter."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "results": [],
+            "total_count": 0,
+            "query": "create employee",
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_post.return_value = mock_response
+
+        client = SemanticSearchClient(api_key="test-key")
+        client.search("create employee", connector="bamboohr", top_k=10)
+
+        call_kwargs = mock_post.call_args
+        assert call_kwargs.kwargs["json"] == {
+            "query": "create employee",
+            "connector": "bamboohr",
+            "top_k": 10,
+        }
+
+    @patch("httpx.post")
+    def test_search_http_error(self, mock_post: MagicMock) -> None:
+        """Test search with HTTP error."""
+        mock_response = MagicMock()
+        mock_response.status_code = 401
+        mock_response.text = "Unauthorized"
+        mock_post.return_value = mock_response
+        mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
+            "Unauthorized",
+            request=MagicMock(),
+            response=mock_response,
+        )
+
+        client = SemanticSearchClient(api_key="invalid-key")
+
+        with pytest.raises(SemanticSearchError) as exc_info:
+            client.search("create employee")
+
+        assert "API error: 401" in str(exc_info.value)
+
+    @patch("httpx.post")
+    def test_search_request_error(self, mock_post: MagicMock) -> None:
+        """Test search with request error."""
+        mock_post.side_effect = httpx.RequestError("Connection failed")
+
+        client = SemanticSearchClient(api_key="test-key")
+
+        with pytest.raises(SemanticSearchError) as exc_info:
+            client.search("create employee")
+
+        assert "Request failed" in str(exc_info.value)
+
+    @patch("httpx.post")
+    def test_search_action_names(self, mock_post: MagicMock) -> None:
+        """Test search_action_names convenience method."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "results": [
+                {
+                    "action_name": "bamboohr_create_employee",
+                    "connector_key": "bamboohr",
+                    "similarity_score": 0.92,
+                    "label": "Create Employee",
+                    "description": "Creates a new employee",
+                },
+                {
+                    "action_name": "hibob_create_employee",
+                    "connector_key": "hibob",
+                    "similarity_score": 0.45,
+                    "label": "Create Employee",
+                    "description": "Creates a new employee",
+                },
+            ],
+            "total_count": 2,
+            "query": "create employee",
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_post.return_value = mock_response
+
+        client = SemanticSearchClient(api_key="test-key")
+
+        # Without min_similarity — returns all results
+        names = client.search_action_names("create employee")
+        assert len(names) == 2
+        assert "bamboohr_create_employee" in names
+        assert "hibob_create_employee" in names
+
+        # With min_similarity — passes threshold to server
+        names = client.search_action_names("create employee", min_similarity=0.5)
+        assert len(names) == 2  # Mock returns same data; filtering is server-side
+        # Verify min_similarity was sent in the request payload
+        last_call_kwargs = mock_post.call_args
+        payload = last_call_kwargs.kwargs.get("json") or last_call_kwargs[1].get("json")
+        assert payload["min_similarity"] == 0.5
+
+
+class TestSemanticSearchIntegration:
+    """Integration tests for semantic search with toolset."""
+
+    def test_toolset_semantic_client_lazy_init(self) -> None:
+        """Test that semantic_client is lazily initialized."""
+        from stackone_ai import StackOneToolSet
+
+        toolset = StackOneToolSet(api_key="test-key")
+
+        # Access semantic_client
+        client = toolset.semantic_client
+        assert isinstance(client, SemanticSearchClient)
+        assert client.api_key == "test-key"
+
+        # Same instance on second access
+        assert toolset.semantic_client is client
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_toolset_search_tools(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test toolset.search_tools() method with connector filtering."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        # Mock semantic search to return versioned API names (including some for unavailable connectors)
+        mock_search.return_value = SemanticSearchResponse(
+            results=[
+                SemanticSearchResult(
+                    action_name="bamboohr_1.0.0_bamboohr_create_employee_global",
+                    connector_key="bamboohr",
+                    similarity_score=0.95,
+                    label="Create Employee",
+                    description="Creates a new employee",
+                ),
+                SemanticSearchResult(
+                    action_name="workday_1.0.0_workday_create_worker_global",
+                    connector_key="workday",  # User doesn't have this connector
+                    similarity_score=0.90,
+                    label="Create Worker",
+                    description="Creates a new worker",
+                ),
+                SemanticSearchResult(
+                    action_name="hibob_1.0.0_hibob_create_employee_global",
+                    connector_key="hibob",
+                    similarity_score=0.85,
+                    label="Create Employee",
+                    description="Creates a new employee",
+                ),
+            ],
+            total_count=3,
+            query="create employee",
+        )
+
+        # Mock MCP fetch to return only bamboohr and hibob tools (user's linked accounts)
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates a new employee",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="hibob_create_employee",
+                description="Creates a new employee",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="bamboohr_list_employees",
+                description="Lists employees",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        tools = toolset.search_tools("create employee", top_k=5)
+
+        # Should only return tools for available connectors (bamboohr, hibob)
+        # workday_create_worker should be filtered out
+        assert len(tools) == 2
+        tool_names = [t.name for t in tools]
+        assert "bamboohr_create_employee" in tool_names
+        assert "hibob_create_employee" in tool_names
+        assert "workday_create_worker" not in tool_names  # Filtered out - connector not available
+
+        # Results should be sorted by semantic score
+        assert tools[0].name == "bamboohr_create_employee"  # score 0.95
+        assert tools[1].name == "hibob_create_employee"  # score 0.85
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_toolset_search_tools_fallback(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test search_tools() fallback when semantic search fails."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        # Semantic search raises an error to trigger fallback
+        mock_search.side_effect = SemanticSearchError("API unavailable")
+
+        # Mock MCP fetch to return tools from multiple connectors
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates a new employee in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="bamboohr_list_employees",
+                description="Lists all employees in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="workday_create_worker",
+                description="Creates a new worker in Workday",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        tools = toolset.search_tools("create employee", top_k=5, search="auto")
+
+        # Should return results from the local BM25+TF-IDF fallback
+        assert len(tools) > 0
+        tool_names = [t.name for t in tools]
+        # Should only include tools for available connectors (bamboohr, workday)
+        for name in tool_names:
+            connector = name.split("_")[0]
+            assert connector in {"bamboohr", "workday"}
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_toolset_search_tools_fallback_respects_connector(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test BM25 fallback filters to the requested connector."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_search.side_effect = SemanticSearchError("API unavailable")
+
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates a new employee in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="bamboohr_list_employees",
+                description="Lists all employees in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="workday_create_worker",
+                description="Creates a new worker in Workday",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        tools = toolset.search_tools("create employee", connector="bamboohr", search="auto")
+
+        assert len(tools) > 0
+        tool_names = [t.name for t in tools]
+        for name in tool_names:
+            assert name.split("_")[0] == "bamboohr"
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_toolset_search_tools_fallback_disabled(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test search_tools() raises when fallback is disabled."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_search.side_effect = SemanticSearchError("API unavailable")
+        # Must provide tools so the flow reaches the semantic search call
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates a new employee",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        with pytest.raises(SemanticSearchError):
+            toolset.search_tools("create employee", search="semantic")
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_toolset_search_action_names(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test toolset.search_action_names() method."""
+        from stackone_ai import StackOneToolSet
+
+        mock_search.return_value = SemanticSearchResponse(
+            results=[
+                SemanticSearchResult(
+                    action_name="bamboohr_1.0.0_bamboohr_create_employee_global",
+                    connector_key="bamboohr",
+                    similarity_score=0.92,
+                    label="Create Employee",
+                    description="Creates a new employee",
+                ),
+                SemanticSearchResult(
+                    action_name="hibob_1.0.0_hibob_create_employee_global",
+                    connector_key="hibob",
+                    similarity_score=0.45,
+                    label="Create Employee",
+                    description="Creates a new employee",
+                ),
+            ],
+            total_count=2,
+            query="create employee",
+        )
+
+        toolset = StackOneToolSet(api_key="test-key")
+        results = toolset.search_action_names("create employee", min_similarity=0.5)
+
+        # min_similarity is passed to server; mock returns both results
+        # Verify results are normalized
+        assert len(results) == 2
+        assert results[0].action_name == "bamboohr_create_employee"
+        assert results[1].action_name == "hibob_create_employee"
+        # Verify min_similarity was passed to the search call
+        mock_search.assert_called_with(
+            query="create employee", connector=None, top_k=None, min_similarity=0.5
+        )
+
+    def test_tools_no_longer_has_utility_tools(self) -> None:
+        """Test that utility_tools abstraction has been removed from Tools."""
+        from stackone_ai.models import StackOneTool, Tools
+
+        tool = MagicMock(spec=StackOneTool)
+        tool.name = "test_tool"
+        tool.description = "Test tool"
+        tool.connector = "test"
+        tools = Tools([tool])
+
+        assert not hasattr(tools, "utility_tools")
+
+
+class TestSearchModes:
+    """Tests for the search parameter on search_tools()."""
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_local_mode_skips_semantic_api(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test search='local' uses BM25+TF-IDF without calling semantic API."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates a new employee in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="bamboohr_list_employees",
+                description="Lists all employees in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        tools = toolset.search_tools("create employee", top_k=5, search="local")
+
+        assert len(tools) > 0
+        mock_search.assert_not_called()
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_semantic_mode_raises_on_failure(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test search='semantic' raises SemanticSearchError on failure."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_search.side_effect = SemanticSearchError("API unavailable")
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates a new employee",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        with pytest.raises(SemanticSearchError):
+            toolset.search_tools("create employee", search="semantic")
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_auto_mode_falls_back_to_local(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test search='auto' falls back to local on semantic failure."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_search.side_effect = SemanticSearchError("API unavailable")
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates a new employee in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        tools = toolset.search_tools("create employee", top_k=5, search="auto")
+
+        assert len(tools) > 0
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_search_tool_passes_search_mode(
+        self,
+        mock_fetch: MagicMock,
+        mock_search: MagicMock,
+    ) -> None:
+        """Test that get_search_tool(search='local') passes mode through."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_list_employees",
+                description="Lists employees in BambooHR",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        search_tool = toolset.get_search_tool(search="local")
+        tools = search_tool("list employees", top_k=5)
+
+        assert len(tools) > 0
+        mock_search.assert_not_called()
+
+
+class TestConnectorProperty:
+    """Tests for StackOneTool.connector property."""
+
+    def test_connector_extracts_from_name(self) -> None:
+        """Test that connector is extracted from tool name."""
+        from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters
+
+        execute_config = ExecuteConfig(
+            name="bamboohr_create_employee",
+            method="POST",
+            url="https://api.example.com",
+            headers={},
+        )
+        tool = StackOneTool(
+            description="Creates employee",
+            parameters=ToolParameters(type="object", properties={}),
+            _execute_config=execute_config,
+            _api_key="test-key",
+        )
+
+        assert tool.connector == "bamboohr"
+
+    def test_connector_is_lowercase(self) -> None:
+        """Test that connector is always lowercase."""
+        from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters
+
+        execute_config = ExecuteConfig(
+            name="BambooHR_Create_Employee",
+            method="POST",
+            url="https://api.example.com",
+            headers={},
+        )
+        tool = StackOneTool(
+            description="Creates employee",
+            parameters=ToolParameters(type="object", properties={}),
+            _execute_config=execute_config,
+            _api_key="test-key",
+        )
+
+        assert tool.connector == "bamboohr"
+
+    def test_connector_with_single_word_name(self) -> None:
+        """Test connector extraction with single-word tool name."""
+        from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters
+
+        execute_config = ExecuteConfig(
+            name="utility",
+            method="POST",
+            url="https://api.example.com",
+            headers={},
+        )
+        tool = StackOneTool(
+            description="Utility tool",
+            parameters=ToolParameters(type="object", properties={}),
+            _execute_config=execute_config,
+            _api_key="test-key",
+        )
+
+        assert tool.connector == "utility"
+
+
+class TestToolsConnectorHelpers:
+    """Tests for Tools.get_connectors()."""
+
+    def test_get_connectors(self) -> None:
+        """Test getting unique connectors from tools collection."""
+        from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools
+
+        def make_tool(name: str) -> StackOneTool:
+            return StackOneTool(
+                description=f"Tool {name}",
+                parameters=ToolParameters(type="object", properties={}),
+                _execute_config=ExecuteConfig(name=name, method="POST", url="", headers={}),
+                _api_key="test-key",
+            )
+
+        tools = Tools(
+            [
+                make_tool("bamboohr_create_employee"),
+                make_tool("bamboohr_list_employees"),
+                make_tool("hibob_create_employee"),
+                make_tool("slack_send_message"),
+            ]
+        )
+
+        connectors = tools.get_connectors()
+
+        assert connectors == {"bamboohr", "hibob", "slack"}
+
+    def test_get_connectors_empty(self) -> None:
+        """Test get_connectors with empty tools collection."""
+        from stackone_ai.models import Tools
+
+        tools = Tools([])
+        assert tools.get_connectors() == set()
+
+
+class TestSearchActionNamesWithAccountIds:
+    """Tests for search_action_names with account_ids parameter."""
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_filters_by_account_connectors(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None:
+        """Test that only connectors from linked accounts are searched (per-connector parallel)."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        def _search_side_effect(
+            query: str,
+            connector: str | None = None,
+            top_k: int | None = None,
+            min_similarity: float | None = None,
+        ) -> SemanticSearchResponse:
+            if connector == "bamboohr":
+                return SemanticSearchResponse(
+                    results=[
+                        SemanticSearchResult(
+                            action_name="bamboohr_1.0.0_bamboohr_create_employee_global",
+                            connector_key="bamboohr",
+                            similarity_score=0.95,
+                            label="Create Employee",
+                            description="Creates employee",
+                        ),
+                    ],
+                    total_count=1,
+                    query=query,
+                )
+            elif connector == "hibob":
+                return SemanticSearchResponse(
+                    results=[
+                        SemanticSearchResult(
+                            action_name="hibob_1.0.0_hibob_create_employee_global",
+                            connector_key="hibob",
+                            similarity_score=0.85,
+                            label="Create Employee",
+                            description="Creates employee",
+                        ),
+                    ],
+                    total_count=1,
+                    query=query,
+                )
+            return SemanticSearchResponse(results=[], total_count=0, query=query)
+
+        mock_search.side_effect = _search_side_effect
+
+        # Mock MCP to return only bamboohr and hibob tools (user's linked accounts)
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates employee",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="hibob_create_employee",
+                description="Creates employee",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        results = toolset.search_action_names(
+            "create employee",
+            account_ids=["acc-123"],
+            top_k=10,
+        )
+
+        # Only bamboohr and hibob searched (workday never queried)
+        assert len(results) == 2
+        action_names = [r.action_name for r in results]
+        assert "bamboohr_create_employee" in action_names
+        assert "hibob_create_employee" in action_names
+        # Verify only per-connector calls were made (no global call)
+        assert mock_search.call_count == 2
+        called_connectors = {call.kwargs.get("connector") for call in mock_search.call_args_list}
+        assert called_connectors == {"bamboohr", "hibob"}
+
+    @patch.object(SemanticSearchClient, "search")
+    def test_search_action_names_returns_empty_on_failure(self, mock_search: MagicMock) -> None:
+        """Test that search_action_names returns [] when semantic search fails."""
+        from stackone_ai import StackOneToolSet
+
+        mock_search.side_effect = SemanticSearchError("API unavailable")
+
+        toolset = StackOneToolSet(api_key="test-key")
+        results = toolset.search_action_names("create employee")
+
+        assert results == []
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_searches_all_connectors_in_parallel(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None:
+        """Test that all available connectors are searched directly (no global call + fallback)."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_search.return_value = SemanticSearchResponse(
+            results=[],
+            total_count=0,
+            query="test",
+        )
+
+        # Mock MCP to return tools from two connectors
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_list_employees",
+                description="Lists employees",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="hibob_list_employees",
+                description="Lists employees",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        toolset.search_action_names(
+            "test",
+            account_ids=["acc-123"],
+            top_k=5,
+        )
+
+        # Each connector gets its own search call (parallel, not sequential fallback)
+        assert mock_search.call_count == 2
+        called_connectors = {call.kwargs.get("connector") for call in mock_search.call_args_list}
+        assert called_connectors == {"bamboohr", "hibob"}
+        # top_k is passed to each per-connector call
+        for call in mock_search.call_args_list:
+            assert call.kwargs["top_k"] == 5
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_respects_top_k_after_filtering(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None:
+        """Test that results are limited to top_k after filtering."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        # Return more results than top_k using versioned API names
+        mock_search.return_value = SemanticSearchResponse(
+            results=[
+                SemanticSearchResult(
+                    action_name=f"bamboohr_1.0.0_bamboohr_action_{i}_global",
+                    connector_key="bamboohr",
+                    similarity_score=0.9 - i * 0.1,
+                    label=f"Action {i}",
+                    description=f"Action {i}",
+                )
+                for i in range(10)
+            ],
+            total_count=10,
+            query="test",
+        )
+
+        # Mock MCP to return bamboohr tools
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="bamboohr_action_0",
+                description="Action 0",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        results = toolset.search_action_names(
+            "test",
+            account_ids=["acc-123"],
+            top_k=3,
+        )
+
+        # Should be limited to top_k after normalization
+        assert len(results) == 3
+        # Names should be normalized
+        assert results[0].action_name == "bamboohr_action_0"
+
+
+class TestNormalizeActionName:
+    """Tests for _normalize_action_name() function."""
+
+    def test_versioned_name_is_normalized(self) -> None:
+        """Test that versioned API names are normalized to MCP format."""
+        from stackone_ai.utils.normalize import _normalize_action_name
+
+        assert (
+            _normalize_action_name("calendly_1.0.0_calendly_create_scheduling_link_global")
+            == "calendly_create_scheduling_link"
+        )
+
+    def test_multi_segment_version(self) -> None:
+        """Test normalization with multi-segment semver."""
+        from stackone_ai.utils.normalize import _normalize_action_name
+
+        assert (
+            _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global")
+            == "breathehr_list_employees"
+        )
+
+    def test_already_normalized_name_unchanged(self) -> None:
+        """Test that MCP-format names pass through unchanged."""
+        from stackone_ai.utils.normalize import _normalize_action_name
+
+        assert _normalize_action_name("bamboohr_create_employee") == "bamboohr_create_employee"
+
+    def test_non_matching_name_unchanged(self) -> None:
+        """Test that names that don't match the pattern pass through unchanged."""
+        from stackone_ai.utils.normalize import _normalize_action_name
+
+        assert _normalize_action_name("some_random_tool") == "some_random_tool"
+
+    def test_empty_string(self) -> None:
+        """Test empty string input."""
+        from stackone_ai.utils.normalize import _normalize_action_name
+
+        assert _normalize_action_name("") == ""
+
+    def test_multiple_versions_normalize_to_same(self) -> None:
+        """Test that different versions of the same action normalize identically."""
+        from stackone_ai.utils.normalize import _normalize_action_name
+
+        name_v1 = _normalize_action_name("breathehr_1.0.0_breathehr_list_employees_global")
+        name_v2 = _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global")
+        assert name_v1 == name_v2 == "breathehr_list_employees"
+
+
+class TestSemanticSearchDeduplication:
+    """Tests for deduplication after name normalization."""
+
+    @patch.object(SemanticSearchClient, "search")
+    @patch("stackone_ai.toolset._fetch_mcp_tools")
+    def test_search_tools_deduplicates_versions(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None:
+        """Test that search_tools deduplicates multiple API versions of the same action."""
+        from stackone_ai import StackOneToolSet
+        from stackone_ai.toolset import _McpToolDefinition
+
+        mock_search.return_value = SemanticSearchResponse(
+            results=[
+                SemanticSearchResult(
+                    action_name="breathehr_1.0.0_breathehr_list_employees_global",
+                    connector_key="breathehr",
+                    similarity_score=0.95,
+                    label="List Employees",
+                    description="Lists employees",
+                ),
+                SemanticSearchResult(
+                    action_name="breathehr_1.0.1_breathehr_list_employees_global",
+                    connector_key="breathehr",
+                    similarity_score=0.90,
+                    label="List Employees v2",
+                    description="Lists employees v2",
+                ),
+                SemanticSearchResult(
+                    action_name="bamboohr_1.0.0_bamboohr_create_employee_global",
+                    connector_key="bamboohr",
+                    similarity_score=0.85,
+                    label="Create Employee",
+                    description="Creates employee",
+                ),
+            ],
+            total_count=3,
+            query="list employees",
+        )
+
+        mock_fetch.return_value = [
+            _McpToolDefinition(
+                name="breathehr_list_employees",
+                description="Lists employees",
+                input_schema={"type": "object", "properties": {}},
+            ),
+            _McpToolDefinition(
+                name="bamboohr_create_employee",
+                description="Creates employee",
+                input_schema={"type": "object", "properties": {}},
+            ),
+        ]
+
+        toolset = StackOneToolSet(api_key="test-key")
+        tools = toolset.search_tools("list employees", top_k=5)
+
+        # Should deduplicate: both breathehr versions -> breathehr_list_employees
+        tool_names = [t.name for t in tools]
+        assert tool_names.count("breathehr_list_employees") == 1
+        assert "bamboohr_create_employee" in tool_names
+        assert len(tools) == 2
+
+    @patch.object(SemanticSearchClient, "search")
+    def test_search_action_names_normalizes_versions(self, mock_search: MagicMock) -> None:
+        """Test that search_action_names normalizes versioned API names."""
+        from stackone_ai import StackOneToolSet
+
+        mock_search.return_value = SemanticSearchResponse(
+            results=[
+                SemanticSearchResult(
+                    action_name="breathehr_1.0.0_breathehr_list_employees_global",
+                    connector_key="breathehr",
+                    similarity_score=0.95,
+                    label="List Employees",
+                    description="Lists employees",
+                ),
+                SemanticSearchResult(
+                    action_name="breathehr_1.0.1_breathehr_list_employees_global",
+                    connector_key="breathehr",
+                    similarity_score=0.90,
+                    label="List Employees v2",
+                    description="Lists employees v2",
+                ),
+            ],
+            total_count=2,
+            query="list employees",
+        )
+
+        toolset = StackOneToolSet(api_key="test-key")
+        results = toolset.search_action_names("list employees", top_k=5)
+
+        # Both results are returned with normalized names (no dedup in global path)
+        assert len(results) == 2
+        assert results[0].action_name == "breathehr_list_employees"
+        assert results[1].action_name == "breathehr_list_employees"
+        # Sorted by score descending
+        assert results[0].similarity_score == 0.95
+        assert results[1].similarity_score == 0.90
diff --git a/tests/test_tool_calling.py b/tests/test_tool_calling.py
index 5dc73c8..1902b7e 100644
--- a/tests/test_tool_calling.py
+++ b/tests/test_tool_calling.py
@@ -9,6 +9,7 @@
 from stackone_ai import StackOneTool
 from stackone_ai.models import ExecuteConfig, ToolParameters
 from stackone_ai.toolset import _StackOneRpcTool
+from tests.conftest import TEST_BASE_URL
 
 
 @pytest.fixture
@@ -154,14 +155,14 @@ def rpc_tool(self):
             description="Get employee details",
             parameters=parameters,
             api_key="test_api_key",
-            base_url="https://api.stackone.com",
+            base_url=TEST_BASE_URL,
             account_id="test_account",
         )
 
     @respx.mock
     def test_execute_basic(self, rpc_tool):
         """Test basic RPC tool execution"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"data": {"id": "123", "name": "John"}})
         )
 
@@ -178,7 +179,7 @@ def test_execute_basic(self, rpc_tool):
     @respx.mock
     def test_execute_with_json_string(self, rpc_tool):
         """Test RPC tool execution with JSON string arguments"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -192,7 +193,7 @@ def test_execute_with_json_string(self, rpc_tool):
     @respx.mock
     def test_execute_with_body_payload(self, rpc_tool):
         """Test RPC tool execution with nested body payload"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -206,7 +207,7 @@ def test_execute_with_body_payload(self, rpc_tool):
     @respx.mock
     def test_execute_with_path_payload(self, rpc_tool):
         """Test RPC tool execution with path parameters"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -219,7 +220,7 @@ def test_execute_with_path_payload(self, rpc_tool):
     @respx.mock
     def test_execute_with_query_payload(self, rpc_tool):
         """Test RPC tool execution with query parameters"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -232,7 +233,7 @@ def test_execute_with_query_payload(self, rpc_tool):
     @respx.mock
     def test_execute_with_headers_payload(self, rpc_tool):
         """Test RPC tool execution with custom headers"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -246,7 +247,7 @@ def test_execute_with_headers_payload(self, rpc_tool):
     @respx.mock
     def test_execute_headers_strips_authorization(self, rpc_tool):
         """Test that Authorization header is stripped from action headers"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -260,7 +261,7 @@ def test_execute_headers_strips_authorization(self, rpc_tool):
     @respx.mock
     def test_execute_headers_skips_none_values(self, rpc_tool):
         """Test that None header values are skipped"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -283,11 +284,11 @@ def test_execute_without_account_id(self):
             description="Test",
             parameters=parameters,
             api_key="test_key",
-            base_url="https://api.stackone.com",
+            base_url=TEST_BASE_URL,
             account_id=None,
         )
 
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
@@ -300,7 +301,7 @@ def test_execute_without_account_id(self):
     @respx.mock
     def test_execute_with_none_arguments(self, rpc_tool):
         """Test RPC tool execution with None arguments"""
-        route = respx.post("https://api.stackone.com/actions/rpc").mock(
+        route = respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
             return_value=httpx.Response(200, json={"success": True})
         )
 
diff --git a/tests/test_toolset.py b/tests/test_toolset.py
index f26b6a0..8ccd26a 100644
--- a/tests/test_toolset.py
+++ b/tests/test_toolset.py
@@ -11,6 +11,7 @@
 from hypothesis import given, settings
 from hypothesis import strategies as st
 
+from stackone_ai.constants import DEFAULT_BASE_URL
 from stackone_ai.toolset import (
     StackOneToolSet,
     ToolsetConfigError,
@@ -173,7 +174,7 @@ def test_init_with_api_key(self):
         toolset = StackOneToolSet(api_key="test_key")
         assert toolset.api_key == "test_key"
         assert toolset.account_id is None
-        assert toolset.base_url == "https://api.stackone.com"
+        assert toolset.base_url == DEFAULT_BASE_URL
 
     def test_init_with_env_api_key(self):
         """Test initialization with API key from environment."""