From d74b0b9e5243873390b4d9cc0c886a4b29bb81f4 Mon Sep 17 00:00:00 2001 From: Herklos Date: Tue, 3 Feb 2026 14:36:40 +0100 Subject: [PATCH] [Services] Add searxng and migrate to AbstractWebSearchService --- .../searxng_service/__init__.py | 21 ++ .../searxng_service/metadata.json | 6 + .../Services_bases/searxng_service/searxng.py | 302 ++++++++++++++++++ .../Services_bases/tavily_service/tavily.py | 91 +++++- 4 files changed, 417 insertions(+), 3 deletions(-) create mode 100644 packages/tentacles/Services/Services_bases/searxng_service/__init__.py create mode 100644 packages/tentacles/Services/Services_bases/searxng_service/metadata.json create mode 100644 packages/tentacles/Services/Services_bases/searxng_service/searxng.py diff --git a/packages/tentacles/Services/Services_bases/searxng_service/__init__.py b/packages/tentacles/Services/Services_bases/searxng_service/__init__.py new file mode 100644 index 0000000000..32134f7330 --- /dev/null +++ b/packages/tentacles/Services/Services_bases/searxng_service/__init__.py @@ -0,0 +1,21 @@ +# Drakkar-Software OctoBot-Tentacles +# Copyright (c) Drakkar-Software, All rights reserved. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3.0 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. + +from .searxng import SearXNGService + +__all__ = [ + "SearXNGService", +] diff --git a/packages/tentacles/Services/Services_bases/searxng_service/metadata.json b/packages/tentacles/Services/Services_bases/searxng_service/metadata.json new file mode 100644 index 0000000000..36fa58f46f --- /dev/null +++ b/packages/tentacles/Services/Services_bases/searxng_service/metadata.json @@ -0,0 +1,6 @@ +{ + "version": "1.2.0", + "origin_package": "OctoBot-Default-Tentacles", + "tentacles": ["SearXNGService"], + "tentacles-requirements": [] +} diff --git a/packages/tentacles/Services/Services_bases/searxng_service/searxng.py b/packages/tentacles/Services/Services_bases/searxng_service/searxng.py new file mode 100644 index 0000000000..b3608921f7 --- /dev/null +++ b/packages/tentacles/Services/Services_bases/searxng_service/searxng.py @@ -0,0 +1,302 @@ +# Drakkar-Software OctoBot-Tentacles +# Copyright (c) Drakkar-Software, All rights reserved. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3.0 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. +import aiohttp +from typing import Any, Dict, List, Optional, Sequence + +import octobot_services.constants as services_constants +import octobot_services.services as services + + +class SearXNGService(services.AbstractWebSearchService): + """ + SearXNG web search service implementation. + + SearXNG is a free, privacy-respecting metasearch engine. + See: https://github.com/searxng/searxng + + Connects to a self-hosted SearXNG instance to perform web searches. + SearXNG is a privacy-respecting metasearch engine that aggregates + results from multiple search engines. + + Configuration: + - url: Base URL of the SearXNG instance (e.g., "http://localhost") + - port: Port number (e.g., 8080) + - categories: Default search categories (e.g., ["general", "news"]) + - language: Default language code (e.g., "en") + - time_range: Default time range filter + - safe_search: Safe search level (0=off, 1=moderate, 2=strict) + - engines: Comma-separated list of engines to use + """ + + SEARXNG_DOCS_URL = "https://docs.searxng.org/" + SEARXNG_GITHUB_URL = "https://github.com/searxng/searxng" + + def __init__(self): + super().__init__() + self._base_url: Optional[str] = None + self._port: Optional[int] = None + self._default_categories: Optional[List[str]] = None + self._default_language: Optional[str] = None + self._default_time_range: Optional[str] = None + self._safe_search: int = 0 + self._default_engines: Optional[str] = None + + def _get_api_url(self) -> str: + if not self._base_url: + return "" + url = self._base_url.rstrip("/") + if self._port: + url = f"{url}:{self._port}" + return url + + async def _get(self, path: str, params: Dict[str, Any], timeout: float = 30) -> Dict[str, Any]: + url = f"{self._get_api_url()}/{path.lstrip('/')}" + if not url or not self._base_url: + self.logger.error("SearXNG URL not configured") + return {} + + try: + async with aiohttp.ClientSession() as session: + async with session.get( + url, + params=params, + timeout=aiohttp.ClientTimeout(total=timeout), + ) as resp: + if resp.status != 200: + text = await resp.text() + self.logger.error( + f"SearXNG API error {resp.status}: {text[:200]}" + ) + return {} + return await resp.json() + except aiohttp.ClientError as e: + self.logger.exception(e, True, f"SearXNG API request failed: {e}") + return {} + except Exception as e: + self.logger.error(f"SearXNG API error: {e}") + return {} + + def get_type(self): + return services_constants.CONFIG_SEARXNG + + def get_endpoint(self): + return self + + @staticmethod + def is_setup_correctly(config): + return ( + services_constants.CONFIG_CATEGORY_SERVICES in config + and services_constants.CONFIG_SEARXNG + in config[services_constants.CONFIG_CATEGORY_SERVICES] + and services_constants.CONFIG_SERVICE_INSTANCE + in config[services_constants.CONFIG_CATEGORY_SERVICES][ + services_constants.CONFIG_SEARXNG + ] + ) + + def has_required_configuration(self): + return ( + services_constants.CONFIG_CATEGORY_SERVICES in self.config + and services_constants.CONFIG_SEARXNG + in self.config[services_constants.CONFIG_CATEGORY_SERVICES] + and self.check_required_config( + self.config[services_constants.CONFIG_CATEGORY_SERVICES][ + services_constants.CONFIG_SEARXNG + ] + ) + ) + + async def prepare(self): + searxng_config = self.config.get(services_constants.CONFIG_CATEGORY_SERVICES, {}).get( + services_constants.CONFIG_SEARXNG, {} + ) + + self._base_url = ( + searxng_config.get(services_constants.CONFIG_SEARXNG_URL) or "" + ).strip() or None + + port_str = str(searxng_config.get(services_constants.CONFIG_SEARXNG_PORT) or "").strip() + self._port = int(port_str) if port_str.isdigit() else None + + categories = searxng_config.get(services_constants.CONFIG_SEARXNG_CATEGORIES) + if isinstance(categories, str): + self._default_categories = [c.strip() for c in categories.split(",") if c.strip()] + elif isinstance(categories, list): + self._default_categories = categories + else: + self._default_categories = None + + self._default_language = ( + searxng_config.get(services_constants.CONFIG_SEARXNG_LANGUAGE) or "" + ).strip() or None + + self._default_time_range = ( + searxng_config.get(services_constants.CONFIG_SEARXNG_TIME_RANGE) or "" + ).strip() or None + + safe_str = str(searxng_config.get(services_constants.CONFIG_SEARXNG_SAFE_SEARCH) or "0").strip() + self._safe_search = int(safe_str) if safe_str.isdigit() else 0 + + self._default_engines = ( + searxng_config.get(services_constants.CONFIG_SEARXNG_ENGINES) or "" + ).strip() or None + + # Test connection + if self._base_url: + try: + # SearXNG returns JSON when format=json is specified + test_result = await self._get("search", {"q": "test", "format": "json"}, timeout=10) + self._startup_healthy = bool(test_result) + self._startup_message = ( + f"SearXNG connected at {self._get_api_url()}" + if self._startup_healthy + else "" + ) + except Exception as e: + self._startup_healthy = False + self._startup_message = "" + self.logger.warning(f"SearXNG connection test failed: {e}") + else: + self._startup_healthy = False + self._startup_message = "" + + def get_fields_description(self): + return { + services_constants.CONFIG_SEARXNG_URL: "SearXNG instance URL (e.g., http://localhost or https://searxng.example.com).", + services_constants.CONFIG_SEARXNG_PORT: "Port number (optional, e.g., 8080).", + services_constants.CONFIG_SEARXNG_CATEGORIES: "Default search categories (comma-separated, e.g., 'general,news').", + services_constants.CONFIG_SEARXNG_LANGUAGE: "Default language code (e.g., 'en', 'fr', 'de').", + services_constants.CONFIG_SEARXNG_TIME_RANGE: "Default time range filter (e.g., 'day', 'week', 'month', 'year').", + services_constants.CONFIG_SEARXNG_SAFE_SEARCH: "Safe search level: 0=off, 1=moderate, 2=strict.", + services_constants.CONFIG_SEARXNG_ENGINES: "Engines to use (comma-separated, e.g., 'google,bing,duckduckgo').", + } + + def get_default_value(self): + return { + services_constants.CONFIG_SEARXNG_URL: "http://localhost", + services_constants.CONFIG_SEARXNG_PORT: "8080", + services_constants.CONFIG_SEARXNG_CATEGORIES: "general", + services_constants.CONFIG_SEARXNG_LANGUAGE: "en", + services_constants.CONFIG_SEARXNG_TIME_RANGE: "", + services_constants.CONFIG_SEARXNG_SAFE_SEARCH: "0", + services_constants.CONFIG_SEARXNG_ENGINES: "", + } + + def get_required_config(self): + return [services_constants.CONFIG_SEARXNG_URL] + + @classmethod + def get_help_page(cls) -> str: + return cls.SEARXNG_DOCS_URL + + def get_website_url(self) -> str: + return self.SEARXNG_GITHUB_URL + + async def search( + self, + query: str, + max_results: Optional[int] = None, + categories: Optional[Sequence[str]] = None, + language: Optional[str] = None, + time_range: Optional[str] = None, + include_domains: Optional[Sequence[str]] = None, + exclude_domains: Optional[Sequence[str]] = None, + timeout: Optional[float] = None, + engines: Optional[str] = None, + safe_search: Optional[int] = None, + **kwargs, + ) -> services.WebSearchResponse: + params: Dict[str, Any] = { + "q": query, + "format": "json", + } + + cats = categories or self._default_categories + if cats: + params["categories"] = ",".join(cats) if isinstance(cats, (list, tuple)) else cats + + lang = language or self._default_language + if lang: + params["language"] = lang + + tr = time_range or self._default_time_range + if tr: + params["time_range"] = tr + + ss = safe_search if safe_search is not None else self._safe_search + params["safesearch"] = ss + + eng = engines or self._default_engines + if eng: + params["engines"] = eng + params.update(kwargs) + + request_timeout = timeout or self.DEFAULT_TIMEOUT + raw = await self._get("search", params, timeout=request_timeout) + + if not raw: + return services.WebSearchResponse(query=query) + + results: List[services.WebSearchResult] = [] + for r in raw.get("results", []): + if not isinstance(r, dict): + continue + + url = str(r.get("url", "")) + + if include_domains: + if not any(domain in url for domain in include_domains): + continue + if exclude_domains: + if any(domain in url for domain in exclude_domains): + continue + + results.append(services.WebSearchResult( + title=str(r.get("title", "")), + url=url, + content=str(r.get("content", "")), + score=float(r.get("score", 0)), + engine=r.get("engine"), + )) + + # Limit results if max_results specified + if max_results and len(results) >= max_results: + break + + return services.WebSearchResponse( + query=query, + results=results, + total_results=raw.get("number_of_results"), + ) + + async def search_news( + self, + query: str, + max_results: Optional[int] = None, + language: Optional[str] = None, + time_range: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs, + ) -> services.WebSearchResponse: + return await self.search( + query=query, + max_results=max_results, + categories=["news"], + language=language, + time_range=time_range or "week", # Default to recent news + timeout=timeout, + **kwargs, + ) diff --git a/packages/tentacles/Services/Services_bases/tavily_service/tavily.py b/packages/tentacles/Services/Services_bases/tavily_service/tavily.py index 708a7870f1..e4b2e18d7e 100644 --- a/packages/tentacles/Services/Services_bases/tavily_service/tavily.py +++ b/packages/tentacles/Services/Services_bases/tavily_service/tavily.py @@ -34,7 +34,7 @@ TAVILY_API_BASE = "https://api.tavily.com" -class TavilyService(services.AbstractService): +class TavilyService(services.AbstractWebSearchService): TAVILY_DOCS_URL = "https://docs.tavily.com/documentation/api-reference/introduction" @@ -42,8 +42,6 @@ def __init__(self): super().__init__() self._api_key: Optional[str] = None self._headers: Dict[str, str] = {} - self._startup_message: str = "" - self._startup_healthy: bool = False async def _post(self, path: str, data: dict, timeout: float = 60) -> dict: if not self._api_key: @@ -190,6 +188,73 @@ def get_website_url(self) -> str: return self.TAVILY_DOCS_URL async def search( + self, + query: str, + max_results: Optional[int] = None, + categories: Optional[Sequence[str]] = None, + language: Optional[str] = None, + time_range: Optional[str] = None, + include_domains: Optional[Sequence[str]] = None, + exclude_domains: Optional[Sequence[str]] = None, + timeout: Optional[float] = None, + # Tavily-specific parameters + search_depth: Optional[str] = None, + topic: Optional[str] = None, + include_answer: Optional[Union[bool, str]] = None, + include_raw_content: Optional[Union[bool, str]] = None, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + days: Optional[int] = None, + include_images: Optional[bool] = None, + country: Optional[str] = None, + auto_parameters: Optional[bool] = None, + include_favicon: Optional[bool] = None, + include_usage: Optional[bool] = None, + chunks_per_source: Optional[int] = None, + **kwargs, + ) -> services.WebSearchResponse: + tavily_response = await self.search_tavily( + query=query, + search_depth=search_depth, + topic=topic, + max_results=max_results, + include_answer=include_answer, + include_raw_content=include_raw_content, + include_domains=include_domains, + exclude_domains=exclude_domains, + time_range=time_range, + start_date=start_date, + end_date=end_date, + days=days, + include_images=include_images, + country=country, + auto_parameters=auto_parameters, + include_favicon=include_favicon, + include_usage=include_usage, + chunks_per_source=chunks_per_source, + timeout=timeout or 60.0, + ) + + web_results = [] + for tavily_result in tavily_response.results: + web_results.append(services.WebSearchResult( + title=tavily_result.title, + url=tavily_result.url, + content=tavily_result.content, + score=tavily_result.score, + raw_content=tavily_result.raw_content, + favicon=tavily_result.favicon, + )) + + return services.WebSearchResponse( + query=tavily_response.query, + results=web_results, + answer=tavily_response.answer, + response_time=tavily_response.response_time, + total_results=len(web_results), + ) + + async def search_tavily( self, query: str, search_depth: Optional[str] = None, @@ -249,6 +314,26 @@ async def search( raw = await self._post("search", data, timeout=min(timeout, 120)) return SearchResponse.from_dict(raw) + async def search_news( + self, + query: str, + max_results: Optional[int] = None, + language: Optional[str] = None, + time_range: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs, + ) -> services.WebSearchResponse: + return await self.search( + query=query, + max_results=max_results, + categories=None, # Not used by Tavily + language=language, + time_range=time_range or "week", # Default to recent news + timeout=timeout, + topic="news", # Tavily-specific: use news topic + **kwargs, + ) + async def extract( self, urls: Union[List[str], str],