From c9a582b43dd3e9b4bf4b6d742d2df2a0a63ecf96 Mon Sep 17 00:00:00 2001 From: samsucik Date: Fri, 6 Feb 2026 20:15:57 +0100 Subject: [PATCH] update POWO url and use curl as the API refuses to serve Python requests --- pykew/core.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/pykew/core.py b/pykew/core.py index 5af2578..736f894 100644 --- a/pykew/core.py +++ b/pykew/core.py @@ -1,11 +1,17 @@ +from urllib.error import HTTPError import requests import time import urllib +import subprocess + IPNI_URL = 'http://beta.ipni.org/api/1' -POWO_URL = 'http://www.plantsoftheworldonline.org/api/2' +POWO_URL = 'https://powo.science.kew.org/api/2' KPL_URL = 'http://kewplantlist.org/api/v1' +class SearchError(Exception): + pass + class Api: def __init__(self, url): self._base_url = url @@ -17,19 +23,53 @@ def _url(self, method, params): opt=urllib.parse.urlencode(params)) def get(self, method, params = {}): - resp = requests.get(self._url(method, params)) + # POWO doesn't like calls from Python, so use curl + if self._base_url == POWO_URL: + resp = self._get_using_curl(self._url(method, {k: v for k, v in params.items() if k != "cursor"})) + else: + resp = requests.get(self._url(method, params)) + if resp.status_code == 249: # too many requests, retry after 5 sec time.sleep(5) return self.get(method, params) return resp + def _get_using_curl(self, url): + raw_response = subprocess.run( + ["curl", "-i", url], + capture_output=True, + text=True, + ).stdout + header_part, body = raw_response.split("\n\n", 1) + + header_lines = header_part.splitlines() + status_line = header_lines[0] + _, status_code, *_ = status_line.split(" ") + status_code = int(status_code) + + headers = {} + for line in header_lines[1:]: + if ":" in line: + k, v = line.split(":", 1) + headers[k.strip()] = v.strip() + + response = requests.Response() + response.status_code = status_code + response.headers = headers + response._content = body.encode("utf-8") + response.url = url + response.encoding = "utf-8" + + return response + class SearchResult: def __init__(self, api, query, filters = None): self._query = query self._filters = filters self._api = api self._cursor = "*" - self._run_query() + self._max_retry_attempts = 3 + self._run_query_with_retries() def _build_params(self): params = {'perPage': 500, 'cursor': self._cursor} @@ -53,6 +93,24 @@ def _format_filters(self): else: return self._filters.value + def _run_query_with_retries(self): + if hasattr(self, "_results"): + del self._results + retry_attempt = 0 + while retry_attempt < self._max_retry_attempts: + try: + self._run_query() + except HTTPError as e: + retry_attempt += 1 + continue + + if hasattr(self, "_results"): + return + else: + retry_attempt += 1 + + raise SearchError(f"Couldn't retrieve results within {self._max_retry_attempts} attempts. The last response was: {self._response}") + def _run_query(self): params = self._build_params() response = self._api.get('search', params) @@ -60,7 +118,9 @@ def _run_query(self): # before making subsequent calls self._wait_time = response.elapsed.total_seconds() / 2.0 self._response = response.json() - if 'results' in self._response: + if self._response.get('totalResults') == 0: + self._results = iter([]) + elif 'results' in self._response: self._results = iter(self._response['results']) if 'cursor' in self._response: self._cursor = self._response['cursor'] @@ -71,9 +131,13 @@ def __iter__(self): def __next__(self): try: return next(self._results) - except StopIteration: + except StopIteration as e: + # avoid repeated calls for POWO (they might still make sense for other apps – to be verified later) + if self._api._base_url == POWO_URL: + raise e + time.sleep(self._wait_time) - self._run_query() + self._run_query_with_retries() return next(self._results) def size(self):