From 6f463c48cffae6a2670e020d8d5a0e3db2344f07 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Wed, 21 May 2025 16:42:59 +0200 Subject: [PATCH 1/3] update: fetch child and org worgspaces via api, paginate through results --- docs/BACKUP.md | 3 + requirements-test.txt | 1 + requirements.txt | 1 + scripts/backup.py | 227 ++---------------- scripts/restore.py | 40 +-- scripts/utils/__init__.py | 0 scripts/utils/backup_utils/__init__.py | 0 scripts/utils/backup_utils/input_loader.py | 191 +++++++++++++++ scripts/utils/gd_api.py | 87 +++++++ scripts/utils/logger.py | 22 ++ scripts/utils/models/__init__.py | 0 scripts/utils/models/workspace_response.py | 33 +++ tests/__init__.py | 2 +- tests/data/backup/mock_values.py | 57 ----- tests/test_backup.py | 143 +---------- tests/test_utils/__init__.py | 0 .../test_utils/test_backup_utils/__init__.py | 0 .../test_backup_utils/test_input_loader.py | 204 ++++++++++++++++ 18 files changed, 578 insertions(+), 433 deletions(-) create mode 100644 scripts/utils/__init__.py create mode 100644 scripts/utils/backup_utils/__init__.py create mode 100644 scripts/utils/backup_utils/input_loader.py create mode 100644 scripts/utils/gd_api.py create mode 100644 scripts/utils/logger.py create mode 100644 scripts/utils/models/__init__.py create mode 100644 scripts/utils/models/workspace_response.py delete mode 100644 tests/data/backup/mock_values.py create mode 100644 tests/test_utils/__init__.py create mode 100644 tests/test_utils/test_backup_utils/__init__.py create mode 100644 tests/test_utils/test_backup_utils/test_input_loader.py diff --git a/docs/BACKUP.md b/docs/BACKUP.md index 43e0f35..a878649 100644 --- a/docs/BACKUP.md +++ b/docs/BACKUP.md @@ -69,12 +69,15 @@ python scripts/backup.py input.csv conf.yaml -p path/to/profiles.yaml --profile ## Configuration file (conf) The configuration files let you define which type of storage the export tool will save the backups to, and any additional storage-specific information that might be required. Currently AWS S3 and Local storage are supported. +If you run the script with `list-of-parents` or `entire-organization`, the script will fetch the IDs of workspaces to process (either hierarchies under the specified parents or all the workspaces within the organization) in batches. As a default, the batch size is set to `100`, but you can parametrize it by setting the `api_page_size` parametter in your configuration yaml. + The configuration file has the following format: ```yaml storage_type: some_storage storage: arg1: foo arg2: bar +api_page_size: 1000 ``` ### AWS S3 diff --git a/requirements-test.txt b/requirements-test.txt index 94b458a..89abb30 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,3 @@ pytest~=7.3.2 moto~=4.1.11 +pytest-mock==3.14.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b61bdd9..7a07e8d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ boto3==1.37.21 gooddata_sdk==1.39.0 requests==2.32.0 +pydantic==2.11.3 \ No newline at end of file diff --git a/scripts/backup.py b/scripts/backup.py index 118f497..10d869e 100644 --- a/scripts/backup.py +++ b/scripts/backup.py @@ -1,29 +1,27 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import abc import argparse -import csv import datetime import json -import logging import os import shutil import tempfile from pathlib import Path -from typing import Any, Iterator, Optional, Type, TypeAlias +from typing import Any, Type import boto3 # type: ignore[import] import requests import yaml -from gooddata_api_client.exceptions import NotFoundException # type: ignore[import] -from gooddata_sdk import GoodDataSdk # type: ignore[import] +from gooddata_api_client.exceptions import NotFoundException from gooddata_sdk import __version__ as sdk_version # type: ignore[import] -from gooddata_sdk.catalog.workspace.declarative_model.workspace.automation import ( - CatalogDeclarativeAutomation, -) -from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import ( - CatalogDeclarativeWorkspace, - CatalogDeclarativeWorkspaces, +from gooddata_sdk.sdk import GoodDataSdk # type: ignore[import] +from utils.backup_utils.input_loader import InputLoader # type: ignore[import] +from utils.gd_api import ( # type: ignore[import] + BEARER_TKN_PREFIX, + GDApi, + GoodDataRestApiError, ) +from utils.logger import logger # type: ignore[import] TIMESTAMP_SDK_FOLDER = ( str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) @@ -31,34 +29,25 @@ + sdk_version.replace(".", "_") ) -API_VERSION = "v1" -BEARER_TKN_PREFIX = "Bearer" PROFILES_FILE = "profiles.yaml" PROFILES_DIRECTORY = ".gooddata" PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE -FORMAT = "%(asctime)s [%(levelname)s] %(message)s" -logger = logging.getLogger(__name__) -logging.getLogger(__name__).setLevel(logging.INFO) -logger.setLevel(logging.INFO) -ch = logging.StreamHandler() -ch.setFormatter(logging.Formatter(fmt=FORMAT)) -logger.addHandler(ch) LAYOUTS_DIR = "gooddata_layouts" LDM_DIR = "ldm" - -class GoodDataRestApiError(Exception): - """Wrapper for errors occurring from interaction with GD REST API.""" +API_PAGE_SIZE = 100 +# TODO: consider moving storage related logic to a separate module and reuse it in restore class BackupRestoreConfig: def __init__(self, conf_path: str): with open(conf_path, "r") as stream: conf = yaml.safe_load(stream) self.storage_type = conf["storage_type"] self.storage = conf["storage"] + self.api_page_size = conf.get("api_page_size", API_PAGE_SIZE) class BackupStorage(abc.ABC): @@ -125,79 +114,6 @@ def export(self, folder, org_id, export_folder="local_backups"): ) -MaybeResponse: TypeAlias = Optional[requests.Response] - - -class GDApi: - """Wrapper for GoodData REST API client.""" - - def __init__(self, host: str, api_token: str, headers=None): - self.endpoint = self._handle_endpoint(host) - self.api_token = api_token - self.headers = headers if headers else {} - self.wait_api_time = 10 - - @staticmethod - def _handle_endpoint(host: str) -> str: - """Ensures that the endpoint URL is correctly formatted.""" - return ( - f"{host}api/{API_VERSION}" - if host[-1] == "/" - else f"{host}/api/{API_VERSION}" - ) - - def get( - self, - path: str, - params, - ok_code: int = 200, - not_found_code: int = 404, - ) -> MaybeResponse: - """Sends a GET request to the GoodData API.""" - kwargs = self._prepare_request(path, params) - logger.debug(f"GET request: {json.dumps(kwargs)}") - response = requests.get(**kwargs) - return self._resolve_return_code( - response, ok_code, kwargs["url"], "RestApi.get", not_found_code - ) - - def _prepare_request(self, path: str, params=None) -> dict[str, Any]: - """Prepares the request to be sent to the GoodData API.""" - kwargs: dict[str, Any] = { - "url": f"{self.endpoint}/{path}", - "headers": self.headers.copy(), - } - if params: - kwargs["params"] = params - if self.api_token: - kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}" - else: - raise RuntimeError( - "Token required for authentication against GD API is missing." - ) - # TODO - Currently no credentials validation - # TODO - do we also support username+pwd auth? Or do we enforce token only? - # else: - # kwargs['auth'] = (self.user, self.password) if self.user is not None else None # noqa - return kwargs - - @staticmethod - def _resolve_return_code( - response, ok_code: int, url, method, not_found_code: Optional[int] = None - ) -> MaybeResponse: - """Resolves the return code of the response.""" - if response.status_code == ok_code: - logger.debug(f"{method} to {url} succeeded") - return response - if not_found_code and response.status_code == not_found_code: - logger.debug(f"{method} to {url} failed - target not found") - return None - raise GoodDataRestApiError( - f"{method} to {url} failed - " - f"response_code={response.status_code} message={response.text}" - ) - - def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi: """Creates a GoodData API client from the specified profile.""" with open(profile_config, "r") as file: @@ -347,31 +263,6 @@ def store_automations(api: GDApi, export_path: Path, org_id: str, ws_id: str) -> json.dump(automations, f) -def store_declarative_automations( - sdk: GoodDataSdk, export_path: Path, org_id: str, ws_id: str -) -> None: - """Stores the declarative automations in the specified export path.""" - # TODO: Currently not working because of a bug in the SDK. There is an alternative way to - # get the automations from the API, which is implemented here, but it will be better to use - # the SDK method once the bug is fixed. - - # Construct path to automations folder to put it in the same subfolder as the analytics model - automations_path: Path = Path( - export_path, "gooddata_layouts", org_id, "workspaces", ws_id, "automations" - ) - os.mkdir(automations_path) - - # Get the automations via the SDK - automations: list[CatalogDeclarativeAutomation] = ( - sdk.catalog_workspace.get_declarative_automations(ws_id) - ) - - # Store the automations - for automation in automations: - with open(f"{automations_path}/{automation.id}.yaml", "w") as f: - f.write(yaml.dump(automation.to_dict())) - - def store_declarative_filter_views( sdk: GoodDataSdk, export_path: Path, org_id: str, ws_id: str ) -> None: @@ -393,93 +284,6 @@ def store_declarative_filter_views( ) -def read_csv_input_for_backup(file_path: str) -> list[str]: - """Reads the input CSV file and returns its content from the first column as a list of string.""" - - with open(file_path) as csv_file: - reader: Iterator[list[str]] = csv.reader(csv_file, skipinitialspace=True) - - try: - # Skip the header - headers = next(reader) - - if len(headers) > 1: - raise ValueError( - "Input file contains more than one column. Please check the input and try again." - ) - - except StopIteration: - # Raise an error if the iterator is empty - raise ValueError("No content found in the CSV file.") - - # Read the content - content = [row[0] for row in reader] - - # If the content is empty (no rows), raise an error - if not content: - raise ValueError("No workspaces found in the CSV file.") - - return content - - -def get_recursive_children( - all_workspaces: list[CatalogDeclarativeWorkspace], parent_id: str -) -> list[str]: - """Recursively gets the children of the specified parent workspace.""" - children = [] - for workspace in all_workspaces: - if workspace.parent and workspace.parent.id == parent_id: - children.append(workspace.id) - children.extend(get_recursive_children(all_workspaces, workspace.id)) - - return children - - -def get_workspaces_to_backup( - input_type: str, path_to_csv: str, sdk: GoodDataSdk -) -> list[str]: - """Returns the list of workspace IDs to back up based on the input type.""" - if input_type == "list-of-workspaces": - return read_csv_input_for_backup(path_to_csv) - - else: - declarative_workspaces: CatalogDeclarativeWorkspaces = ( - sdk.catalog_workspace.get_declarative_workspaces() - ) - - workspaces: list[CatalogDeclarativeWorkspace] = ( - declarative_workspaces.workspaces - ) - - if not workspaces: - raise RuntimeError("No workspaces found in the organization.") - - if input_type == "list-of-parents": - list_of_parents = read_csv_input_for_backup(path_to_csv) - list_of_children: list[str] = [] - - for parent in list_of_parents: - list_of_children.extend(get_recursive_children(workspaces, parent)) - - if not list_of_children: - raise RuntimeError( - "No child workspaces found for the provided list of parents." - ) - - # Include the parent workspaces in the backup - return list_of_parents + list_of_children - - if input_type == "entire-organization": - list_of_workspaces: list[str] = [] - - for workspace in workspaces: - list_of_workspaces.append(workspace.id) - - return list_of_workspaces - - raise RuntimeError("Invalid input type provided.") - - def get_workspace_export( sdk: GoodDataSdk, api: GDApi, @@ -589,8 +393,9 @@ def main(args: argparse.Namespace) -> None: storage_class: Type[BackupStorage] = get_storage(conf.storage_type) storage: BackupStorage = storage_class(conf) - workspaces_to_export: list[str] = get_workspaces_to_backup( - args.input_type, args.ws_csv, sdk + loader = InputLoader(api, conf.api_page_size) + workspaces_to_export: list[str] = loader.get_ids_to_backup( + args.input_type, args.ws_csv ) with tempfile.TemporaryDirectory() as tmpdir: diff --git a/scripts/restore.py b/scripts/restore.py index 381ad41..675930d 100644 --- a/scripts/restore.py +++ b/scripts/restore.py @@ -5,22 +5,22 @@ import json import logging import os +import sys import tempfile import traceback -import requests -import sys -import yaml import zipfile from pathlib import Path -from typing import Any, Optional, TypeAlias, Type +from typing import Any, Optional, Type, TypeAlias import boto3 +import requests +import yaml from gooddata_sdk import ( - GoodDataSdk, CatalogDeclarativeAnalytics, - CatalogDeclarativeModel, - CatalogDeclarativeFilterView, CatalogDeclarativeAutomation, + CatalogDeclarativeFilterView, + CatalogDeclarativeModel, + GoodDataSdk, ) BEARER_TKN_PREFIX = "Bearer" @@ -64,6 +64,7 @@ def _load_conf(path: str) -> dict[str, Any]: return yaml.safe_load(conf) +# TODO: storage logic also defined in backup.py, consider moving to utils class BackupStorage(abc.ABC): """ Retrieves archive of backed up hierarchical export of workspace declaration. @@ -147,6 +148,7 @@ def get_ws_declaration(self, s3_target_path: str, local_target_path: Path) -> No class GDApi: + # TODO: also defined in utils, consider importing from there def __init__(self, host: str, api_token: str, headers: dict[str, Any] = {}): self.endpoint = self._handle_endpoint(host) self.api_token = api_token @@ -370,30 +372,6 @@ def _load_and_put_filter_views(self, ws_id: str, src_path: Path) -> None: ws_id, filter_views ) - def _load_and_put_declarative_automations(self, ws_id: str, src_path: Path) -> None: - """Loads and puts automations into GoodData workspace.""" - # TODO: This should potentially replace the _load_and_post_automations method - # once the SDK methods are working properly. Currently the CatalogDeclarativeAutomation - # object is received without relationships attribute, which means the automation is - # created created in Panther, but is not applied to anything. - - automations_folder_path = Path(src_path / "automations") - if not automations_folder_path.exists(): - # Skip if the automations directory does not exist - return - - automations: list[CatalogDeclarativeAutomation] = [] - - for file in automations_folder_path.iterdir(): - automation_content: dict[str, Any] = dict(self._safe_load_yaml(file)) - automation: CatalogDeclarativeAutomation = ( - CatalogDeclarativeAutomation.from_dict(automation_content) - ) - automations.append(automation) - - if automations: - self._sdk.catalog_workspace.put_declarative_automations(ws_id, automations) - def _load_and_post_automations(self, ws_id: str, source_path: Path) -> None: """Loads automations from specified json file and creates them in the workspace.""" # Load automations from JSON diff --git a/scripts/utils/__init__.py b/scripts/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/utils/backup_utils/__init__.py b/scripts/utils/backup_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py new file mode 100644 index 0000000..62c4b49 --- /dev/null +++ b/scripts/utils/backup_utils/input_loader.py @@ -0,0 +1,191 @@ +# (C) 2023 GoodData Corporation +import csv +from dataclasses import dataclass +from typing import Iterator + +from utils.gd_api import ( # type: ignore[import] + API_VERSION, + GDApi, + GoodDataRestApiError, + MaybeResponse, +) +from utils.logger import logger # type: ignore[import] +from utils.models.workspace_response import ( # type: ignore[import] + Workspace, + WorkspaceResponse, +) + + +class InputLoader: + """Class to handle loading and parsing the input data.""" + + api_client: GDApi + base_workspace_endpoint: str + hierarchy_endpoint: str + all_workspaces_endpoint: str + + def __init__(self, api_client: GDApi, page_size: int) -> None: + self.api_client = api_client + self.page_size = page_size + self.set_endpoints() + + def set_endpoints(self) -> None: + """Sets the hierarchy endpoint for the API client.""" + self.base_workspace_endpoint = "/api/v1/entities/workspaces" + self.hierarchy_endpoint = ( + f"{self.base_workspace_endpoint}?" + + "filter=parent.id=={parent_id}" + + f"&include=parent&page=0&size={self.page_size}&sort=name,asc&metaInclude=page,hierarchy" + ) + self.all_workspaces_endpoint = f"{self.base_workspace_endpoint}?page=0&size={self.page_size}&sort=name,asc&metaInclude=page" + + @dataclass + class _ProcessDataOutput: + workspace_ids: list[str] + sub_parents: list[str] | None = None + + @staticmethod + def read_csv_input_for_backup(file_path: str) -> list[str]: + """Reads the input CSV file and returns its content from the first column as a list of string.""" + + with open(file_path) as csv_file: + reader: Iterator[list[str]] = csv.reader(csv_file, skipinitialspace=True) + + try: + # Skip the header + headers = next(reader) + + if len(headers) > 1: + raise ValueError( + "Input file contains more than one column. Please check the input and try again." + ) + + except StopIteration: + # Raise an error if the iterator is empty + raise ValueError("No content found in the CSV file.") + + # Read the content + content = [row[0] for row in reader] + + # If the content is empty (no rows), raise an error + if not content: + raise ValueError("No workspaces found in the CSV file.") + + return content + + def fetch_page(self, url: str) -> WorkspaceResponse: + """Fetch a page of workspaces.""" + + # Separate the API path from the URL so that it can be fed to the GDApi class + endpoint: str = url.split(f"api/{API_VERSION}")[1] + response: MaybeResponse = self.api_client.get(endpoint, None) + if response: + return WorkspaceResponse(**response.json()) + else: + raise GoodDataRestApiError( + f"Failed to fetch data from the API. URL: {endpoint}" + ) + + @staticmethod + def process_data(data: list[Workspace]) -> _ProcessDataOutput: + """Extract children and sub-parents from workspace data.""" + children: list[str] = [] + sub_parents: list[str] = [] + + for workspace in data: + # append child workspace IDs + children.append(workspace.id) + + # if hierarchy is present and has children, append child workspace ID to sub_parents + if workspace.meta and workspace.meta.hierarchy: + if workspace.meta.hierarchy.childrenCount > 0: + sub_parents.append(workspace.id) + return InputLoader._ProcessDataOutput(children, sub_parents) + + @staticmethod + def log_paging_progress(response: WorkspaceResponse) -> None: + """Log the progress of paging through API responses if paginatino data is present""" + current_page: int | None + total_pages: int | None + + if response.meta.page: + current_page = response.meta.page.number + 1 + total_pages = response.meta.page.totalPages + else: + current_page = None + total_pages = None + + if current_page and total_pages: + logger.info(f"Fetched page: {current_page} of {total_pages}") + + def _paginate(self, url: str | None): + result: list[InputLoader._ProcessDataOutput] = [] + while url: + response: WorkspaceResponse = self.fetch_page(url) + self.log_paging_progress(response) + result.append(self.process_data(response.data)) + url = response.links.next + + return result + + def get_hierarchy(self, parent_id: str) -> list[str]: + """Returns a list of workspace IDs in the hierarchy.""" + logger.info(f"Fetching children of {parent_id}") + url = self.hierarchy_endpoint.format(parent_id=parent_id) + + all_children, sub_parents = [], [] + + results: list[InputLoader._ProcessDataOutput] = self._paginate(url) + + for result in results: + all_children.extend(result.workspace_ids) + if result.sub_parents: + sub_parents.extend(result.sub_parents) + + for subparent in sub_parents: + all_children += self.get_hierarchy(subparent) + + return all_children + + def get_all_workspaces(self) -> list[str]: + """Returns a list of all workspace IDs in the organization.""" + logger.info("Fetching all workspaces") + url = self.all_workspaces_endpoint + + all_workspaces: list[str] = [] + + results: list[InputLoader._ProcessDataOutput] = self._paginate(url) + + for result in results: + all_workspaces.extend(result.workspace_ids) + + return all_workspaces + + def get_ids_to_backup(self, input_type: str, path_to_csv: str) -> list[str]: + """Returns the list of workspace IDs to back up based on the input type.""" + if input_type == "list-of-workspaces": + return self.read_csv_input_for_backup(path_to_csv) + + else: + if input_type == "list-of-parents": + list_of_parents = self.read_csv_input_for_backup(path_to_csv) + list_of_children: list[str] = [] + + for parent in list_of_parents: + list_of_children.extend(self.get_hierarchy(parent)) + + if not list_of_children: + raise RuntimeError( + "No child workspaces found for the provided list of parents." + ) + + # Include the parent workspaces in the backup + return list_of_parents + list_of_children + + if input_type == "entire-organization": + list_of_workspaces = self.get_all_workspaces() + if not list_of_workspaces: + raise RuntimeError("No workspaces found in the organization.") + return list_of_workspaces + + raise RuntimeError("Invalid input type provided.") diff --git a/scripts/utils/gd_api.py b/scripts/utils/gd_api.py new file mode 100644 index 0000000..de0578d --- /dev/null +++ b/scripts/utils/gd_api.py @@ -0,0 +1,87 @@ +# (C) 2023 GoodData Corporation + +import json +from typing import Any, TypeAlias + +import requests +from utils.logger import logger # type: ignore[import] + +API_VERSION = "v1" +BEARER_TKN_PREFIX = "Bearer" + +MaybeResponse: TypeAlias = requests.Response | None + + +class GoodDataRestApiError(Exception): + """Wrapper for errors occurring from interaction with GD REST API.""" + + +class GDApi: + """Wrapper for GoodData REST API client.""" + + # TODO: also defined in restore.py, consider moving to utils + def __init__(self, host: str, api_token: str, headers=None): + self.endpoint = self._handle_endpoint(host) + self.api_token = api_token + self.headers = headers if headers else {} + self.wait_api_time = 10 + + @staticmethod + def _handle_endpoint(host: str) -> str: + """Ensures that the endpoint URL is correctly formatted.""" + return ( + f"{host}api/{API_VERSION}" + if host[-1] == "/" + else f"{host}/api/{API_VERSION}" + ) + + def get( + self, + path: str, + params, + ok_code: int = 200, + not_found_code: int = 404, + ) -> MaybeResponse: + """Sends a GET request to the GoodData API.""" + kwargs = self._prepare_request(path, params) + logger.debug(f"GET request: {json.dumps(kwargs)}") + response = requests.get(**kwargs) + return self._resolve_return_code( + response, ok_code, kwargs["url"], "RestApi.get", not_found_code + ) + + def _prepare_request(self, path: str, params=None) -> dict[str, Any]: + """Prepares the request to be sent to the GoodData API.""" + kwargs: dict[str, Any] = { + "url": f"{self.endpoint}/{path}", + "headers": self.headers.copy(), + } + if params: + kwargs["params"] = params + if self.api_token: + kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}" + else: + raise RuntimeError( + "Token required for authentication against GD API is missing." + ) + # TODO - Currently no credentials validation + # TODO - do we also support username+pwd auth? Or do we enforce token only? + # else: + # kwargs['auth'] = (self.user, self.password) if self.user is not None else None # noqa + return kwargs + + @staticmethod + def _resolve_return_code( + response, ok_code: int, url, method, not_found_code: int | None = None + ) -> MaybeResponse: + """Resolves the return code of the response.""" + if response.status_code == ok_code: + logger.debug(f"{method} to {url} succeeded") + return response + if not_found_code and response.status_code == not_found_code: + logger.debug(f"{method} to {url} failed - target not found") + return None + raise GoodDataRestApiError( + f"{method} to {url} failed - " + f"response_code={response.status_code} message={response.text}" + ) diff --git a/scripts/utils/logger.py b/scripts/utils/logger.py new file mode 100644 index 0000000..576fa3c --- /dev/null +++ b/scripts/utils/logger.py @@ -0,0 +1,22 @@ +import logging + + +class LevelFormatter(logging.Formatter): + BASE_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" + FORMATS = { + logging.WARNING: "\033[33m%(asctime)s [%(levelname)s] %(message)s\033[00m", + logging.ERROR: "\033[31m%(asctime)s [%(levelname)s] %(message)s\033[00m", + } + + def format(self, record): + fmt = self.FORMATS.get(record.levelno, self.BASE_FORMAT) + formatter = logging.Formatter(fmt) + return formatter.format(record) + + +logger = logging.getLogger(__name__) +logging.getLogger(__name__).setLevel(logging.INFO) +logger.setLevel(logging.INFO) +ch = logging.StreamHandler() +ch.setFormatter(LevelFormatter()) +logger.addHandler(ch) diff --git a/scripts/utils/models/__init__.py b/scripts/utils/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/utils/models/workspace_response.py b/scripts/utils/models/workspace_response.py new file mode 100644 index 0000000..f6a3d8e --- /dev/null +++ b/scripts/utils/models/workspace_response.py @@ -0,0 +1,33 @@ +from pydantic import BaseModel # type: ignore[import] # missing type stub + + +class Page(BaseModel): + size: int + totalElements: int + totalPages: int + number: int + + +class Hierarchy(BaseModel): + childrenCount: int + + +class Meta(BaseModel): + page: Page | None = None + hierarchy: Hierarchy | None = None + + +class Workspace(BaseModel): + id: str + meta: Meta | None = None + + +class Links(BaseModel): + self: str + next: str | None = None + + +class WorkspaceResponse(BaseModel): + data: list[Workspace] + links: Links + meta: Meta diff --git a/tests/__init__.py b/tests/__init__.py index 332df81..37d863d 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation diff --git a/tests/data/backup/mock_values.py b/tests/data/backup/mock_values.py deleted file mode 100644 index b74f974..0000000 --- a/tests/data/backup/mock_values.py +++ /dev/null @@ -1,57 +0,0 @@ -from gooddata_sdk.catalog.identifier import CatalogWorkspaceIdentifier -from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import ( - CatalogDeclarativeWorkspace, - CatalogDeclarativeWorkspaces, -) - -NO_CHILDREN_RETURN_VALUE = CatalogDeclarativeWorkspaces( - workspaces=[ - CatalogDeclarativeWorkspace( - id="ws_id", - name="ws_name", - parent=CatalogWorkspaceIdentifier(id="recognized_parent_id"), - ) - ], - workspace_data_filters=[], -) - -WORKSPACE_HIERARCHY = CatalogDeclarativeWorkspaces( - workspaces=[ - CatalogDeclarativeWorkspace( - id="parent", - name="parent", - parent=None, - ), - CatalogDeclarativeWorkspace( - id="direct_child", - name="direct_child", - parent=CatalogWorkspaceIdentifier(id="parent"), - ), - CatalogDeclarativeWorkspace( - id="indirect_child", - name="indirect_child", - parent=CatalogWorkspaceIdentifier(id="direct_child"), - ), - CatalogDeclarativeWorkspace( - id="another_direct_child", - name="another_direct_child", - parent=CatalogWorkspaceIdentifier(id="parent"), - ), - CatalogDeclarativeWorkspace( - id="another_indirect_child", - name="another_indirect_child", - parent=CatalogWorkspaceIdentifier(id="another_direct_child"), - ), - CatalogDeclarativeWorkspace( - id="unrelated_workspace", - name="unrelated_workspace", - parent=None, - ), - CatalogDeclarativeWorkspace( - id="another_unrelated_workspace", - name="another_unrelated_workspace", - parent=None, - ), - ], - workspace_data_filters=[], -) diff --git a/tests/test_backup.py b/tests/test_backup.py index fb5a00a..61daece 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -1,4 +1,11 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation +import os +import sys + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts")) +) + import argparse import os import shutil @@ -10,13 +17,9 @@ import boto3 import pytest from gooddata_sdk import GoodDataSdk # type: ignore[import] -from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import ( - CatalogDeclarativeWorkspaces, -) from moto import mock_s3 -from scripts import backup -from tests.data.backup.mock_values import NO_CHILDREN_RETURN_VALUE, WORKSPACE_HIERARCHY +import scripts.backup as backup LOGGER_NAME = "scripts.backup" MOCK_DL_TARGET = Path("overlays.zip") @@ -172,83 +175,6 @@ def test_wrong_input_type_raises_error(): backup.validate_args(args) -@mock.patch.object( - MOCK_SDK.catalog_workspace, - "get_declarative_workspaces", - return_value=CatalogDeclarativeWorkspaces(workspaces=[], workspace_data_filters=[]), -) -def test_get_workspaces_to_backup_empty_org(mock_get_declarative_workspaces): - with pytest.raises(RuntimeError, match="No workspaces found in the organization."): - backup.get_workspaces_to_backup( - "tests/data/backup/test_exports/services", - "services", - MOCK_SDK, - ) - - -@mock.patch( - "scripts.backup.read_csv_input_for_backup", return_value=["unrecognized_parent_id"] -) -@mock.patch.object( - MOCK_SDK.catalog_workspace, - "get_declarative_workspaces", - return_value=NO_CHILDREN_RETURN_VALUE, -) -def test_get_workspaces_to_backup_no_children( - mock_get_declarative_workspaces, mock_read_csv_input -): - with pytest.raises( - RuntimeError, - match="No child workspaces found for the provided list of parents.", - ): - backup.get_workspaces_to_backup( - "list-of-parents", - "some-csv-file.csv", - MOCK_SDK, - ) - - -@mock.patch( - "scripts.backup.read_csv_input_for_backup", return_value=["unrecognized_parent_id"] -) -@mock.patch.object( - MOCK_SDK.catalog_workspace, - "get_declarative_workspaces", - return_value=NO_CHILDREN_RETURN_VALUE, -) -def test_get_workspaces_to_backup_wrong_input_type( - mock_get_declarative_workspaces, mock_read_csv_input -): - with pytest.raises(RuntimeError, match="Invalid input type provided."): - backup.get_workspaces_to_backup( - "invalid-input-type", - "some-csv-file.csv", - MOCK_SDK, - ) - - -def test_get_recursive_children(): - workspaces = WORKSPACE_HIERARCHY.workspaces - workspace_id = "parent" - expected_children = [ - "direct_child", - "indirect_child", - "another_direct_child", - "another_indirect_child", - ] - - result = backup.get_recursive_children(workspaces, workspace_id) - - assert sorted(result) == sorted(expected_children) - - -def test_get_recursive_children_empty_workspaces(): - workspaces: list = [] - workspace_id = "parent" - result = backup.get_recursive_children(workspaces, workspace_id) - assert result == [] - - def test_get_s3_storage(): s3_storage_type = backup.get_storage("s3") assert s3_storage_type == backup.S3Storage @@ -309,7 +235,7 @@ def test_archive_gooddata_layouts_to_zip(): assert zip_exists -@mock.patch("scripts.backup.requests", new_callable=mock_requests) +@mock.patch("utils.gd_api.requests", new_callable=mock_requests) def test_get_user_data_filters_normal_response(requests): api = backup.GDApi("some.host.com", "token") @@ -421,52 +347,3 @@ def test_file_upload(s3, s3_bucket): S3_BUCKET, "some/s3/backup/path/org_id/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml", ).load() - - -def test_read_csv_input_empty_file() -> None: - """Test with an empty CSV file.""" - with tempfile.NamedTemporaryFile() as temp_csv: - path_to_csv = temp_csv.name - with pytest.raises(ValueError, match="No content found in the CSV file."): - backup.read_csv_input_for_backup(path_to_csv) - - -def test_read_csv_input_only_header() -> None: - """Test with a CSV file that contains only the header.""" - with tempfile.NamedTemporaryFile() as temp_csv: - temp_csv.write(b"header1\n") - temp_csv.flush() - temp_csv.seek(0) - path_to_csv = temp_csv.name - with pytest.raises(ValueError, match="No workspaces found in the CSV file."): - backup.read_csv_input_for_backup(path_to_csv) - - -def test_read_csv_input_valid() -> None: - """Test with a valid CSV file.""" - with tempfile.NamedTemporaryFile(delete=False) as temp_csv: - temp_csv.write(b"header1\n") - temp_csv.write(b"workspace1\n") - temp_csv.write(b"workspace2\n") - temp_csv.flush() - temp_csv.seek(0) - path_to_csv = temp_csv.name - result = backup.read_csv_input_for_backup(path_to_csv) - assert result == ["workspace1", "workspace2"] - os.remove(path_to_csv) - - -def test_read_csv_input_too_many_columns() -> None: - """Test with a CSV file that contains too many columns.""" - with tempfile.NamedTemporaryFile(delete=False) as temp_csv: - temp_csv.write(b"header1,header2\n") - temp_csv.write(b"workspace1,extra_column\n") - temp_csv.flush() - temp_csv.seek(0) - path_to_csv = temp_csv.name - with pytest.raises( - ValueError, - match="Input file contains more than one column. Please check the input and try again.", - ): - backup.read_csv_input_for_backup(path_to_csv) - os.remove(path_to_csv) diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_utils/test_backup_utils/__init__.py b/tests/test_utils/test_backup_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_utils/test_backup_utils/test_input_loader.py b/tests/test_utils/test_backup_utils/test_input_loader.py new file mode 100644 index 0000000..5282d40 --- /dev/null +++ b/tests/test_utils/test_backup_utils/test_input_loader.py @@ -0,0 +1,204 @@ +# (C) 2025 GoodData Corporation +import os +import sys + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../scripts")) +) + +import tempfile + +import pytest + +from scripts.utils.backup_utils.input_loader import InputLoader +from scripts.utils.gd_api import GDApi +from scripts.utils.models.workspace_response import ( + Hierarchy, + Links, + Meta, + Page, + Workspace, + WorkspaceResponse, +) + +MOCK_GDP_API = GDApi( + host="https://fake.host/", + api_token="fake_token", +) + +# MOCK_INPUT_LOADER = backup.InputLoader(MOCK_GD_API, 100) + + +@pytest.fixture +def input_loader(): + loader = InputLoader(MOCK_GDP_API, page_size=2) + loader.hierarchy_endpoint = "/fake/hierarchy?filter=parent.id=={parent_id}" + loader.all_workspaces_endpoint = "/fake/all" + return loader + + +def test_process_data_extracts_children_and_subparents(): + ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=2))) + ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0))) + ws3 = Workspace(id="ws3", meta=None) + + result = InputLoader.process_data([ws1, ws2, ws3]) + assert result.workspace_ids == ["ws1", "ws2", "ws3"] + assert result.sub_parents == ["ws1"] + + +def test_log_paging_progress_logs_info(mocker): + response = WorkspaceResponse( + data=[], + meta=Meta( + page=Page(size=5, totalElements=25, number=1, totalPages=5), hierarchy=None + ), + links=Links(self="self", next="next"), + ) + + mock_logger = mocker.patch("scripts.utils.logger.logger.info") + InputLoader.log_paging_progress(response) + mock_logger.assert_called_once + + +def test_log_paging_progress_no_page(mocker): + response = WorkspaceResponse( + data=[], + meta=Meta(page=None, hierarchy=None), + links=Links(self="self", next="next"), + ) + + mock_logger = mocker.patch("scripts.utils.logger.logger.info") + InputLoader.log_paging_progress(response) + assert mock_logger.call_count == 0 + + +def test_paginate_calls_fetch_page_and_process_data(input_loader, monkeypatch): + ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=1))) + ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0))) + links1 = Links(self="self", next="next_url") + links2 = Links(self="self", next=None) + resp1 = WorkspaceResponse( + data=[ws1], meta=Meta(hierarchy=None, page=None), links=links1 + ) + resp2 = WorkspaceResponse( + data=[ws2], meta=Meta(hierarchy=None, page=None), links=links2 + ) + + fetch_page_calls = [] + + def fetch_page_side_effect(url): + fetch_page_calls.append(url) + return resp1 if len(fetch_page_calls) == 1 else resp2 + + input_loader.fetch_page = fetch_page_side_effect + + process_data_calls = [] + + def process_data_side_effect(data): + process_data_calls.append(data) + if len(process_data_calls) == 1: + return InputLoader._ProcessDataOutput(["ws1"], ["ws1"]) + else: + return InputLoader._ProcessDataOutput(["ws2"], []) + + monkeypatch.setattr( + InputLoader, "process_data", staticmethod(process_data_side_effect) + ) + monkeypatch.setattr( + InputLoader, "log_paging_progress", staticmethod(lambda resp: None) + ) + + result = input_loader._paginate("first_url") + assert len(result) == 2 + assert result[0].workspace_ids == ["ws1"] + assert result[1].workspace_ids == ["ws2"] + assert len(fetch_page_calls) == 2 + assert len(process_data_calls) == 2 + + +def test_get_hierarchy_recurses(input_loader, monkeypatch): + def fake_paginate(url): + if "p1" in url: + return [InputLoader._ProcessDataOutput(["c1"], ["c1"])] + if "c1" in url: + return [InputLoader._ProcessDataOutput(["c2"], [])] + return [] + + input_loader._paginate = fake_paginate + monkeypatch.setattr( + "scripts.utils.backup_utils.input_loader.logger", + type("Logger", (), {"info": lambda self, msg: None})(), + ) + result = input_loader.get_hierarchy("p1") + assert set(result) == {"c1", "c2"} + + +def test_get_workspaces_to_backup_empty_org(input_loader, monkeypatch): + monkeypatch.setattr( + input_loader, + "get_all_workspaces", + lambda: [], + ) + with pytest.raises(RuntimeError, match="No workspaces found in the organization."): + input_loader.get_ids_to_backup( + "entire-organization", + "some-csv-file.csv", + ) + + +def test_get_workspaces_to_backup_wrong_input_type(input_loader): + with pytest.raises(RuntimeError, match="Invalid input type provided."): + input_loader.get_ids_to_backup( + "invalid-input-type", + "some-csv-file.csv", + ) + + +def test_read_csv_input_empty_file(input_loader) -> None: + """Test with an empty CSV file.""" + with tempfile.NamedTemporaryFile() as temp_csv: + path_to_csv = temp_csv.name + with pytest.raises(ValueError, match="No content found in the CSV file."): + input_loader.read_csv_input_for_backup(path_to_csv) + + +def test_read_csv_input_only_header(input_loader) -> None: + """Test with a CSV file that contains only the header.""" + with tempfile.NamedTemporaryFile() as temp_csv: + temp_csv.write(b"header1\n") + temp_csv.flush() + temp_csv.seek(0) + path_to_csv = temp_csv.name + with pytest.raises(ValueError, match="No workspaces found in the CSV file."): + input_loader.read_csv_input_for_backup(path_to_csv) + + +def test_read_csv_input_valid(input_loader) -> None: + """Test with a valid CSV file.""" + with tempfile.NamedTemporaryFile(delete=False) as temp_csv: + temp_csv.write(b"header1\n") + temp_csv.write(b"workspace1\n") + temp_csv.write(b"workspace2\n") + temp_csv.flush() + temp_csv.seek(0) + path_to_csv = temp_csv.name + result = input_loader.read_csv_input_for_backup(path_to_csv) + assert result == ["workspace1", "workspace2"] + os.remove(path_to_csv) + + +def test_read_csv_input_too_many_columns(input_loader) -> None: + """Test with a CSV file that contains too many columns.""" + with tempfile.NamedTemporaryFile(delete=False) as temp_csv: + temp_csv.write(b"header1,header2\n") + temp_csv.write(b"workspace1,extra_column\n") + temp_csv.flush() + temp_csv.seek(0) + path_to_csv = temp_csv.name + with pytest.raises( + ValueError, + match="Input file contains more than one column. Please check the input and try again.", + ): + input_loader.read_csv_input_for_backup(path_to_csv) + os.remove(path_to_csv) From 38667eea04b1c18f11eeb02c0b9d97bb687b6682 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Thu, 22 May 2025 14:16:50 +0200 Subject: [PATCH 2/3] chore: update (c) year --- LICENCE | 2 +- scripts/__init__.py | 2 +- scripts/permission_mgmt.py | 3 +-- scripts/restore.py | 2 +- scripts/user_mgmt.py | 6 ++---- scripts/utils/backup_utils/input_loader.py | 2 +- scripts/utils/gd_api.py | 2 +- tests/test_permissions.py | 2 +- tests/test_restore.py | 6 +++--- tests/test_user_mgmt.py | 7 +++---- 10 files changed, 15 insertions(+), 19 deletions(-) diff --git a/LICENCE b/LICENCE index 332301a..db896a4 100644 --- a/LICENCE +++ b/LICENCE @@ -1,6 +1,6 @@ BSD License -Copyright (c) 2023-2024, GoodData Corporation. All rights reserved. +Copyright (c) 2023-2025, GoodData Corporation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted, provided that the following conditions are met: diff --git a/scripts/__init__.py b/scripts/__init__.py index 332df81..37d863d 100644 --- a/scripts/__init__.py +++ b/scripts/__init__.py @@ -1 +1 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation diff --git a/scripts/permission_mgmt.py b/scripts/permission_mgmt.py index 41151f0..6275d3a 100644 --- a/scripts/permission_mgmt.py +++ b/scripts/permission_mgmt.py @@ -1,4 +1,4 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import argparse import csv import logging @@ -11,7 +11,6 @@ import gooddata_sdk as gd_sdk from gooddata_api_client.exceptions import NotFoundException - USER_TYPE = "user" USER_GROUP_TYPE = "userGroup" diff --git a/scripts/restore.py b/scripts/restore.py index 675930d..9914f59 100644 --- a/scripts/restore.py +++ b/scripts/restore.py @@ -1,4 +1,4 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import abc import argparse import csv diff --git a/scripts/user_mgmt.py b/scripts/user_mgmt.py index caa6923..059bf30 100644 --- a/scripts/user_mgmt.py +++ b/scripts/user_mgmt.py @@ -1,4 +1,4 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import argparse import csv import logging @@ -228,9 +228,7 @@ def create_clients(args: argparse.Namespace) -> gd_sdk.GoodDataSdk: profile_config, profile = args.profile_config, args.profile if os.path.exists(profile_config): - logger.info( - f"Using GoodData profile {profile} " f"sourced from {profile_config}." - ) + logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") sdk = gd_sdk.GoodDataSdk.create_from_profile(profile, profile_config) return sdk diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py index 62c4b49..e636c62 100644 --- a/scripts/utils/backup_utils/input_loader.py +++ b/scripts/utils/backup_utils/input_loader.py @@ -1,4 +1,4 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import csv from dataclasses import dataclass from typing import Iterator diff --git a/scripts/utils/gd_api.py b/scripts/utils/gd_api.py index de0578d..56a5450 100644 --- a/scripts/utils/gd_api.py +++ b/scripts/utils/gd_api.py @@ -1,4 +1,4 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import json from typing import Any, TypeAlias diff --git a/tests/test_permissions.py b/tests/test_permissions.py index bdd9c69..4d50c5f 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -1,4 +1,4 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import argparse from unittest import mock diff --git a/tests/test_restore.py b/tests/test_restore.py index f92a4ee..f014ea3 100644 --- a/tests/test_restore.py +++ b/tests/test_restore.py @@ -1,16 +1,16 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import argparse +import json import logging import os import tempfile -import json from pathlib import Path from unittest import mock import boto3 +import gooddata_sdk as gd_sdk import pytest from moto import mock_s3 -import gooddata_sdk as gd_sdk from scripts import restore diff --git a/tests/test_user_mgmt.py b/tests/test_user_mgmt.py index fbe811e..4d318ee 100644 --- a/tests/test_user_mgmt.py +++ b/tests/test_user_mgmt.py @@ -1,16 +1,15 @@ -# (C) 2023 GoodData Corporation +# (C) 2025 GoodData Corporation import argparse from dataclasses import dataclass -from unittest import mock from typing import Any, Optional +from unittest import mock -import pytest import gooddata_sdk as gd_sdk +import pytest from gooddata_api_client.exceptions import NotFoundException from scripts import user_mgmt - TEST_CSV_PATH = "tests/data/user_mgmt/input.csv" From 9326798b3905038b508e443107a14eb517c02666 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Fri, 23 May 2025 15:27:39 +0200 Subject: [PATCH 3/3] fixup: use snake_case and aliases in workspace model, add todos --- scripts/utils/backup_utils/input_loader.py | 8 +++++-- scripts/utils/models/workspace_response.py | 24 +++++++++++++++---- tests/test_backup.py | 2 +- .../test_backup_utils/test_input_loader.py | 11 +++++---- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py index e636c62..3f0df38 100644 --- a/scripts/utils/backup_utils/input_loader.py +++ b/scripts/utils/backup_utils/input_loader.py @@ -98,7 +98,7 @@ def process_data(data: list[Workspace]) -> _ProcessDataOutput: # if hierarchy is present and has children, append child workspace ID to sub_parents if workspace.meta and workspace.meta.hierarchy: - if workspace.meta.hierarchy.childrenCount > 0: + if workspace.meta.hierarchy.children_count > 0: sub_parents.append(workspace.id) return InputLoader._ProcessDataOutput(children, sub_parents) @@ -110,7 +110,7 @@ def log_paging_progress(response: WorkspaceResponse) -> None: if response.meta.page: current_page = response.meta.page.number + 1 - total_pages = response.meta.page.totalPages + total_pages = response.meta.page.total_pages else: current_page = None total_pages = None @@ -149,6 +149,10 @@ def get_hierarchy(self, parent_id: str) -> list[str]: def get_all_workspaces(self) -> list[str]: """Returns a list of all workspace IDs in the organization.""" + # TODO: can be optimized - requests can be sent asynchronously. + # Use the total number of pages to calculate the number of requests + # to be sent. Use semaphore or otherwise limit the number of concurrent + # requests to avoid putting too much load on the server. logger.info("Fetching all workspaces") url = self.all_workspaces_endpoint diff --git a/scripts/utils/models/workspace_response.py b/scripts/utils/models/workspace_response.py index f6a3d8e..9a7ae99 100644 --- a/scripts/utils/models/workspace_response.py +++ b/scripts/utils/models/workspace_response.py @@ -1,15 +1,31 @@ -from pydantic import BaseModel # type: ignore[import] # missing type stub +from pydantic import ( # type: ignore[import] # missing type stub + BaseModel, + ConfigDict, +) +from pydantic.alias_generators import ( # type: ignore[import] # missing type stub + to_camel, +) class Page(BaseModel): size: int - totalElements: int - totalPages: int + total_elements: int + total_pages: int number: int + model_config = ConfigDict( + alias_generator=to_camel, + populate_by_name=True, + ) + class Hierarchy(BaseModel): - childrenCount: int + children_count: int + + model_config = ConfigDict( + alias_generator=to_camel, + populate_by_name=True, + ) class Meta(BaseModel): diff --git a/tests/test_backup.py b/tests/test_backup.py index 61daece..43b7b48 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -16,7 +16,7 @@ import boto3 import pytest -from gooddata_sdk import GoodDataSdk # type: ignore[import] +from gooddata_sdk.sdk import GoodDataSdk from moto import mock_s3 import scripts.backup as backup diff --git a/tests/test_utils/test_backup_utils/test_input_loader.py b/tests/test_utils/test_backup_utils/test_input_loader.py index 5282d40..a520e88 100644 --- a/tests/test_utils/test_backup_utils/test_input_loader.py +++ b/tests/test_utils/test_backup_utils/test_input_loader.py @@ -38,8 +38,8 @@ def input_loader(): def test_process_data_extracts_children_and_subparents(): - ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=2))) - ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0))) + ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(children_count=2))) + ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(children_count=0))) ws3 = Workspace(id="ws3", meta=None) result = InputLoader.process_data([ws1, ws2, ws3]) @@ -51,7 +51,8 @@ def test_log_paging_progress_logs_info(mocker): response = WorkspaceResponse( data=[], meta=Meta( - page=Page(size=5, totalElements=25, number=1, totalPages=5), hierarchy=None + page=Page(size=5, total_elements=25, number=1, total_pages=5), + hierarchy=None, ), links=Links(self="self", next="next"), ) @@ -74,8 +75,8 @@ def test_log_paging_progress_no_page(mocker): def test_paginate_calls_fetch_page_and_process_data(input_loader, monkeypatch): - ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=1))) - ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0))) + ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(children_count=1))) + ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(children_count=0))) links1 = Links(self="self", next="next_url") links2 = Links(self="self", next=None) resp1 = WorkspaceResponse(