From 6f463c48cffae6a2670e020d8d5a0e3db2344f07 Mon Sep 17 00:00:00 2001
From: janmatzek <jan.matzek@gmail.com>
Date: Wed, 21 May 2025 16:42:59 +0200
Subject: [PATCH 1/3] update: fetch child and org worgspaces via api, paginate
 through results

---
 docs/BACKUP.md                                |   3 +
 requirements-test.txt                         |   1 +
 requirements.txt                              |   1 +
 scripts/backup.py                             | 227 ++----------------
 scripts/restore.py                            |  40 +--
 scripts/utils/__init__.py                     |   0
 scripts/utils/backup_utils/__init__.py        |   0
 scripts/utils/backup_utils/input_loader.py    | 191 +++++++++++++++
 scripts/utils/gd_api.py                       |  87 +++++++
 scripts/utils/logger.py                       |  22 ++
 scripts/utils/models/__init__.py              |   0
 scripts/utils/models/workspace_response.py    |  33 +++
 tests/__init__.py                             |   2 +-
 tests/data/backup/mock_values.py              |  57 -----
 tests/test_backup.py                          | 143 +----------
 tests/test_utils/__init__.py                  |   0
 .../test_utils/test_backup_utils/__init__.py  |   0
 .../test_backup_utils/test_input_loader.py    | 204 ++++++++++++++++
 18 files changed, 578 insertions(+), 433 deletions(-)
 create mode 100644 scripts/utils/__init__.py
 create mode 100644 scripts/utils/backup_utils/__init__.py
 create mode 100644 scripts/utils/backup_utils/input_loader.py
 create mode 100644 scripts/utils/gd_api.py
 create mode 100644 scripts/utils/logger.py
 create mode 100644 scripts/utils/models/__init__.py
 create mode 100644 scripts/utils/models/workspace_response.py
 delete mode 100644 tests/data/backup/mock_values.py
 create mode 100644 tests/test_utils/__init__.py
 create mode 100644 tests/test_utils/test_backup_utils/__init__.py
 create mode 100644 tests/test_utils/test_backup_utils/test_input_loader.py

diff --git a/docs/BACKUP.md b/docs/BACKUP.md
index 43e0f35..a878649 100644
--- a/docs/BACKUP.md
+++ b/docs/BACKUP.md
@@ -69,12 +69,15 @@ python scripts/backup.py input.csv conf.yaml -p path/to/profiles.yaml --profile
 ## Configuration file (conf)
 The configuration files let you define which type of storage the export tool will save the backups to, and any additional storage-specific information that might be required. Currently AWS S3 and Local storage are supported.
 
+If you run the script with `list-of-parents` or `entire-organization`, the script will fetch the IDs of workspaces to process (either hierarchies under the specified parents or all the workspaces within the organization) in batches. As a default, the batch size is set to `100`, but you can parametrize it by setting the `api_page_size` parametter in your configuration yaml.
+
 The configuration file has the following format:
 ```yaml
 storage_type: some_storage
 storage:
   arg1: foo
   arg2: bar
+api_page_size: 1000
 ```
 
 ### AWS S3
diff --git a/requirements-test.txt b/requirements-test.txt
index 94b458a..89abb30 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,2 +1,3 @@
 pytest~=7.3.2
 moto~=4.1.11
+pytest-mock==3.14.0
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index b61bdd9..7a07e8d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 boto3==1.37.21
 gooddata_sdk==1.39.0
 requests==2.32.0
+pydantic==2.11.3
\ No newline at end of file
diff --git a/scripts/backup.py b/scripts/backup.py
index 118f497..10d869e 100644
--- a/scripts/backup.py
+++ b/scripts/backup.py
@@ -1,29 +1,27 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import abc
 import argparse
-import csv
 import datetime
 import json
-import logging
 import os
 import shutil
 import tempfile
 from pathlib import Path
-from typing import Any, Iterator, Optional, Type, TypeAlias
+from typing import Any, Type
 
 import boto3  # type: ignore[import]
 import requests
 import yaml
-from gooddata_api_client.exceptions import NotFoundException  # type: ignore[import]
-from gooddata_sdk import GoodDataSdk  # type: ignore[import]
+from gooddata_api_client.exceptions import NotFoundException
 from gooddata_sdk import __version__ as sdk_version  # type: ignore[import]
-from gooddata_sdk.catalog.workspace.declarative_model.workspace.automation import (
-    CatalogDeclarativeAutomation,
-)
-from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import (
-    CatalogDeclarativeWorkspace,
-    CatalogDeclarativeWorkspaces,
+from gooddata_sdk.sdk import GoodDataSdk  # type: ignore[import]
+from utils.backup_utils.input_loader import InputLoader  # type: ignore[import]
+from utils.gd_api import (  # type: ignore[import]
+    BEARER_TKN_PREFIX,
+    GDApi,
+    GoodDataRestApiError,
 )
+from utils.logger import logger  # type: ignore[import]
 
 TIMESTAMP_SDK_FOLDER = (
     str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
@@ -31,34 +29,25 @@
     + sdk_version.replace(".", "_")
 )
 
-API_VERSION = "v1"
-BEARER_TKN_PREFIX = "Bearer"
 PROFILES_FILE = "profiles.yaml"
 PROFILES_DIRECTORY = ".gooddata"
 PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE
 
-FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
-logger = logging.getLogger(__name__)
-logging.getLogger(__name__).setLevel(logging.INFO)
-logger.setLevel(logging.INFO)
-ch = logging.StreamHandler()
-ch.setFormatter(logging.Formatter(fmt=FORMAT))
-logger.addHandler(ch)
 
 LAYOUTS_DIR = "gooddata_layouts"
 LDM_DIR = "ldm"
 
-
-class GoodDataRestApiError(Exception):
-    """Wrapper for errors occurring from interaction with GD REST API."""
+API_PAGE_SIZE = 100
 
 
+# TODO: consider moving storage related logic to a separate module and reuse it in restore
 class BackupRestoreConfig:
     def __init__(self, conf_path: str):
         with open(conf_path, "r") as stream:
             conf = yaml.safe_load(stream)
             self.storage_type = conf["storage_type"]
             self.storage = conf["storage"]
+            self.api_page_size = conf.get("api_page_size", API_PAGE_SIZE)
 
 
 class BackupStorage(abc.ABC):
@@ -125,79 +114,6 @@ def export(self, folder, org_id, export_folder="local_backups"):
         )
 
 
-MaybeResponse: TypeAlias = Optional[requests.Response]
-
-
-class GDApi:
-    """Wrapper for GoodData REST API client."""
-
-    def __init__(self, host: str, api_token: str, headers=None):
-        self.endpoint = self._handle_endpoint(host)
-        self.api_token = api_token
-        self.headers = headers if headers else {}
-        self.wait_api_time = 10
-
-    @staticmethod
-    def _handle_endpoint(host: str) -> str:
-        """Ensures that the endpoint URL is correctly formatted."""
-        return (
-            f"{host}api/{API_VERSION}"
-            if host[-1] == "/"
-            else f"{host}/api/{API_VERSION}"
-        )
-
-    def get(
-        self,
-        path: str,
-        params,
-        ok_code: int = 200,
-        not_found_code: int = 404,
-    ) -> MaybeResponse:
-        """Sends a GET request to the GoodData API."""
-        kwargs = self._prepare_request(path, params)
-        logger.debug(f"GET request: {json.dumps(kwargs)}")
-        response = requests.get(**kwargs)
-        return self._resolve_return_code(
-            response, ok_code, kwargs["url"], "RestApi.get", not_found_code
-        )
-
-    def _prepare_request(self, path: str, params=None) -> dict[str, Any]:
-        """Prepares the request to be sent to the GoodData API."""
-        kwargs: dict[str, Any] = {
-            "url": f"{self.endpoint}/{path}",
-            "headers": self.headers.copy(),
-        }
-        if params:
-            kwargs["params"] = params
-        if self.api_token:
-            kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}"
-        else:
-            raise RuntimeError(
-                "Token required for authentication against GD API is missing."
-            )
-        # TODO - Currently no credentials validation
-        # TODO - do we also support username+pwd auth? Or do we enforce token only?
-        # else:
-        #     kwargs['auth'] = (self.user, self.password) if self.user is not None else None  # noqa
-        return kwargs
-
-    @staticmethod
-    def _resolve_return_code(
-        response, ok_code: int, url, method, not_found_code: Optional[int] = None
-    ) -> MaybeResponse:
-        """Resolves the return code of the response."""
-        if response.status_code == ok_code:
-            logger.debug(f"{method} to {url} succeeded")
-            return response
-        if not_found_code and response.status_code == not_found_code:
-            logger.debug(f"{method} to {url} failed - target not found")
-            return None
-        raise GoodDataRestApiError(
-            f"{method} to {url} failed - "
-            f"response_code={response.status_code} message={response.text}"
-        )
-
-
 def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi:
     """Creates a GoodData API client from the specified profile."""
     with open(profile_config, "r") as file:
@@ -347,31 +263,6 @@ def store_automations(api: GDApi, export_path: Path, org_id: str, ws_id: str) ->
             json.dump(automations, f)
 
 
-def store_declarative_automations(
-    sdk: GoodDataSdk, export_path: Path, org_id: str, ws_id: str
-) -> None:
-    """Stores the declarative automations in the specified export path."""
-    # TODO: Currently not working because of a bug in the SDK. There is an alternative way to
-    # get the automations from the API, which is implemented here, but it will be better to use
-    # the SDK method once the bug is fixed.
-
-    # Construct path to automations folder to put it in the same subfolder as the analytics model
-    automations_path: Path = Path(
-        export_path, "gooddata_layouts", org_id, "workspaces", ws_id, "automations"
-    )
-    os.mkdir(automations_path)
-
-    # Get the automations via the SDK
-    automations: list[CatalogDeclarativeAutomation] = (
-        sdk.catalog_workspace.get_declarative_automations(ws_id)
-    )
-
-    # Store the automations
-    for automation in automations:
-        with open(f"{automations_path}/{automation.id}.yaml", "w") as f:
-            f.write(yaml.dump(automation.to_dict()))
-
-
 def store_declarative_filter_views(
     sdk: GoodDataSdk, export_path: Path, org_id: str, ws_id: str
 ) -> None:
@@ -393,93 +284,6 @@ def store_declarative_filter_views(
     )
 
 
-def read_csv_input_for_backup(file_path: str) -> list[str]:
-    """Reads the input CSV file and returns its content from the first column as a list of string."""
-
-    with open(file_path) as csv_file:
-        reader: Iterator[list[str]] = csv.reader(csv_file, skipinitialspace=True)
-
-        try:
-            # Skip the header
-            headers = next(reader)
-
-            if len(headers) > 1:
-                raise ValueError(
-                    "Input file contains more than one column. Please check the input and try again."
-                )
-
-        except StopIteration:
-            # Raise an error if the iterator is empty
-            raise ValueError("No content found in the CSV file.")
-
-        # Read the content
-        content = [row[0] for row in reader]
-
-        # If the content is empty (no rows), raise an error
-        if not content:
-            raise ValueError("No workspaces found in the CSV file.")
-
-    return content
-
-
-def get_recursive_children(
-    all_workspaces: list[CatalogDeclarativeWorkspace], parent_id: str
-) -> list[str]:
-    """Recursively gets the children of the specified parent workspace."""
-    children = []
-    for workspace in all_workspaces:
-        if workspace.parent and workspace.parent.id == parent_id:
-            children.append(workspace.id)
-            children.extend(get_recursive_children(all_workspaces, workspace.id))
-
-    return children
-
-
-def get_workspaces_to_backup(
-    input_type: str, path_to_csv: str, sdk: GoodDataSdk
-) -> list[str]:
-    """Returns the list of workspace IDs to back up based on the input type."""
-    if input_type == "list-of-workspaces":
-        return read_csv_input_for_backup(path_to_csv)
-
-    else:
-        declarative_workspaces: CatalogDeclarativeWorkspaces = (
-            sdk.catalog_workspace.get_declarative_workspaces()
-        )
-
-        workspaces: list[CatalogDeclarativeWorkspace] = (
-            declarative_workspaces.workspaces
-        )
-
-        if not workspaces:
-            raise RuntimeError("No workspaces found in the organization.")
-
-        if input_type == "list-of-parents":
-            list_of_parents = read_csv_input_for_backup(path_to_csv)
-            list_of_children: list[str] = []
-
-            for parent in list_of_parents:
-                list_of_children.extend(get_recursive_children(workspaces, parent))
-
-            if not list_of_children:
-                raise RuntimeError(
-                    "No child workspaces found for the provided list of parents."
-                )
-
-            # Include the parent workspaces in the backup
-            return list_of_parents + list_of_children
-
-        if input_type == "entire-organization":
-            list_of_workspaces: list[str] = []
-
-            for workspace in workspaces:
-                list_of_workspaces.append(workspace.id)
-
-            return list_of_workspaces
-
-    raise RuntimeError("Invalid input type provided.")
-
-
 def get_workspace_export(
     sdk: GoodDataSdk,
     api: GDApi,
@@ -589,8 +393,9 @@ def main(args: argparse.Namespace) -> None:
     storage_class: Type[BackupStorage] = get_storage(conf.storage_type)
     storage: BackupStorage = storage_class(conf)
 
-    workspaces_to_export: list[str] = get_workspaces_to_backup(
-        args.input_type, args.ws_csv, sdk
+    loader = InputLoader(api, conf.api_page_size)
+    workspaces_to_export: list[str] = loader.get_ids_to_backup(
+        args.input_type, args.ws_csv
     )
 
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/scripts/restore.py b/scripts/restore.py
index 381ad41..675930d 100644
--- a/scripts/restore.py
+++ b/scripts/restore.py
@@ -5,22 +5,22 @@
 import json
 import logging
 import os
+import sys
 import tempfile
 import traceback
-import requests
-import sys
-import yaml
 import zipfile
 from pathlib import Path
-from typing import Any, Optional, TypeAlias, Type
+from typing import Any, Optional, Type, TypeAlias
 
 import boto3
+import requests
+import yaml
 from gooddata_sdk import (
-    GoodDataSdk,
     CatalogDeclarativeAnalytics,
-    CatalogDeclarativeModel,
-    CatalogDeclarativeFilterView,
     CatalogDeclarativeAutomation,
+    CatalogDeclarativeFilterView,
+    CatalogDeclarativeModel,
+    GoodDataSdk,
 )
 
 BEARER_TKN_PREFIX = "Bearer"
@@ -64,6 +64,7 @@ def _load_conf(path: str) -> dict[str, Any]:
             return yaml.safe_load(conf)
 
 
+# TODO: storage logic also defined in backup.py, consider moving to utils
 class BackupStorage(abc.ABC):
     """
     Retrieves archive of backed up hierarchical export of workspace declaration.
@@ -147,6 +148,7 @@ def get_ws_declaration(self, s3_target_path: str, local_target_path: Path) -> No
 
 
 class GDApi:
+    # TODO: also defined in utils, consider importing from there
     def __init__(self, host: str, api_token: str, headers: dict[str, Any] = {}):
         self.endpoint = self._handle_endpoint(host)
         self.api_token = api_token
@@ -370,30 +372,6 @@ def _load_and_put_filter_views(self, ws_id: str, src_path: Path) -> None:
                 ws_id, filter_views
             )
 
-    def _load_and_put_declarative_automations(self, ws_id: str, src_path: Path) -> None:
-        """Loads and puts automations into GoodData workspace."""
-        # TODO: This should potentially replace the _load_and_post_automations method
-        # once the SDK methods are working properly. Currently the CatalogDeclarativeAutomation
-        # object is received without relationships attribute, which means the automation is
-        # created created in Panther, but is not applied to anything.
-
-        automations_folder_path = Path(src_path / "automations")
-        if not automations_folder_path.exists():
-            # Skip if the automations directory does not exist
-            return
-
-        automations: list[CatalogDeclarativeAutomation] = []
-
-        for file in automations_folder_path.iterdir():
-            automation_content: dict[str, Any] = dict(self._safe_load_yaml(file))
-            automation: CatalogDeclarativeAutomation = (
-                CatalogDeclarativeAutomation.from_dict(automation_content)
-            )
-            automations.append(automation)
-
-        if automations:
-            self._sdk.catalog_workspace.put_declarative_automations(ws_id, automations)
-
     def _load_and_post_automations(self, ws_id: str, source_path: Path) -> None:
         """Loads automations from specified json file and creates them in the workspace."""
         # Load automations from JSON
diff --git a/scripts/utils/__init__.py b/scripts/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/utils/backup_utils/__init__.py b/scripts/utils/backup_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py
new file mode 100644
index 0000000..62c4b49
--- /dev/null
+++ b/scripts/utils/backup_utils/input_loader.py
@@ -0,0 +1,191 @@
+# (C) 2023 GoodData Corporation
+import csv
+from dataclasses import dataclass
+from typing import Iterator
+
+from utils.gd_api import (  # type: ignore[import]
+    API_VERSION,
+    GDApi,
+    GoodDataRestApiError,
+    MaybeResponse,
+)
+from utils.logger import logger  # type: ignore[import]
+from utils.models.workspace_response import (  # type: ignore[import]
+    Workspace,
+    WorkspaceResponse,
+)
+
+
+class InputLoader:
+    """Class to handle loading and parsing the input data."""
+
+    api_client: GDApi
+    base_workspace_endpoint: str
+    hierarchy_endpoint: str
+    all_workspaces_endpoint: str
+
+    def __init__(self, api_client: GDApi, page_size: int) -> None:
+        self.api_client = api_client
+        self.page_size = page_size
+        self.set_endpoints()
+
+    def set_endpoints(self) -> None:
+        """Sets the hierarchy endpoint for the API client."""
+        self.base_workspace_endpoint = "/api/v1/entities/workspaces"
+        self.hierarchy_endpoint = (
+            f"{self.base_workspace_endpoint}?"
+            + "filter=parent.id=={parent_id}"
+            + f"&include=parent&page=0&size={self.page_size}&sort=name,asc&metaInclude=page,hierarchy"
+        )
+        self.all_workspaces_endpoint = f"{self.base_workspace_endpoint}?page=0&size={self.page_size}&sort=name,asc&metaInclude=page"
+
+    @dataclass
+    class _ProcessDataOutput:
+        workspace_ids: list[str]
+        sub_parents: list[str] | None = None
+
+    @staticmethod
+    def read_csv_input_for_backup(file_path: str) -> list[str]:
+        """Reads the input CSV file and returns its content from the first column as a list of string."""
+
+        with open(file_path) as csv_file:
+            reader: Iterator[list[str]] = csv.reader(csv_file, skipinitialspace=True)
+
+            try:
+                # Skip the header
+                headers = next(reader)
+
+                if len(headers) > 1:
+                    raise ValueError(
+                        "Input file contains more than one column. Please check the input and try again."
+                    )
+
+            except StopIteration:
+                # Raise an error if the iterator is empty
+                raise ValueError("No content found in the CSV file.")
+
+            # Read the content
+            content = [row[0] for row in reader]
+
+            # If the content is empty (no rows), raise an error
+            if not content:
+                raise ValueError("No workspaces found in the CSV file.")
+
+        return content
+
+    def fetch_page(self, url: str) -> WorkspaceResponse:
+        """Fetch a page of workspaces."""
+
+        # Separate the API path from the URL so that it can be fed to the GDApi class
+        endpoint: str = url.split(f"api/{API_VERSION}")[1]
+        response: MaybeResponse = self.api_client.get(endpoint, None)
+        if response:
+            return WorkspaceResponse(**response.json())
+        else:
+            raise GoodDataRestApiError(
+                f"Failed to fetch data from the API. URL: {endpoint}"
+            )
+
+    @staticmethod
+    def process_data(data: list[Workspace]) -> _ProcessDataOutput:
+        """Extract children and sub-parents from workspace data."""
+        children: list[str] = []
+        sub_parents: list[str] = []
+
+        for workspace in data:
+            # append child workspace IDs
+            children.append(workspace.id)
+
+            # if hierarchy is present and has children, append child workspace ID to sub_parents
+            if workspace.meta and workspace.meta.hierarchy:
+                if workspace.meta.hierarchy.childrenCount > 0:
+                    sub_parents.append(workspace.id)
+        return InputLoader._ProcessDataOutput(children, sub_parents)
+
+    @staticmethod
+    def log_paging_progress(response: WorkspaceResponse) -> None:
+        """Log the progress of paging through API responses if paginatino data is present"""
+        current_page: int | None
+        total_pages: int | None
+
+        if response.meta.page:
+            current_page = response.meta.page.number + 1
+            total_pages = response.meta.page.totalPages
+        else:
+            current_page = None
+            total_pages = None
+
+        if current_page and total_pages:
+            logger.info(f"Fetched page: {current_page} of {total_pages}")
+
+    def _paginate(self, url: str | None):
+        result: list[InputLoader._ProcessDataOutput] = []
+        while url:
+            response: WorkspaceResponse = self.fetch_page(url)
+            self.log_paging_progress(response)
+            result.append(self.process_data(response.data))
+            url = response.links.next
+
+        return result
+
+    def get_hierarchy(self, parent_id: str) -> list[str]:
+        """Returns a list of workspace IDs in the hierarchy."""
+        logger.info(f"Fetching children of {parent_id}")
+        url = self.hierarchy_endpoint.format(parent_id=parent_id)
+
+        all_children, sub_parents = [], []
+
+        results: list[InputLoader._ProcessDataOutput] = self._paginate(url)
+
+        for result in results:
+            all_children.extend(result.workspace_ids)
+            if result.sub_parents:
+                sub_parents.extend(result.sub_parents)
+
+        for subparent in sub_parents:
+            all_children += self.get_hierarchy(subparent)
+
+        return all_children
+
+    def get_all_workspaces(self) -> list[str]:
+        """Returns a list of all workspace IDs in the organization."""
+        logger.info("Fetching all workspaces")
+        url = self.all_workspaces_endpoint
+
+        all_workspaces: list[str] = []
+
+        results: list[InputLoader._ProcessDataOutput] = self._paginate(url)
+
+        for result in results:
+            all_workspaces.extend(result.workspace_ids)
+
+        return all_workspaces
+
+    def get_ids_to_backup(self, input_type: str, path_to_csv: str) -> list[str]:
+        """Returns the list of workspace IDs to back up based on the input type."""
+        if input_type == "list-of-workspaces":
+            return self.read_csv_input_for_backup(path_to_csv)
+
+        else:
+            if input_type == "list-of-parents":
+                list_of_parents = self.read_csv_input_for_backup(path_to_csv)
+                list_of_children: list[str] = []
+
+                for parent in list_of_parents:
+                    list_of_children.extend(self.get_hierarchy(parent))
+
+                if not list_of_children:
+                    raise RuntimeError(
+                        "No child workspaces found for the provided list of parents."
+                    )
+
+                # Include the parent workspaces in the backup
+                return list_of_parents + list_of_children
+
+            if input_type == "entire-organization":
+                list_of_workspaces = self.get_all_workspaces()
+                if not list_of_workspaces:
+                    raise RuntimeError("No workspaces found in the organization.")
+                return list_of_workspaces
+
+        raise RuntimeError("Invalid input type provided.")
diff --git a/scripts/utils/gd_api.py b/scripts/utils/gd_api.py
new file mode 100644
index 0000000..de0578d
--- /dev/null
+++ b/scripts/utils/gd_api.py
@@ -0,0 +1,87 @@
+# (C) 2023 GoodData Corporation
+
+import json
+from typing import Any, TypeAlias
+
+import requests
+from utils.logger import logger  # type: ignore[import]
+
+API_VERSION = "v1"
+BEARER_TKN_PREFIX = "Bearer"
+
+MaybeResponse: TypeAlias = requests.Response | None
+
+
+class GoodDataRestApiError(Exception):
+    """Wrapper for errors occurring from interaction with GD REST API."""
+
+
+class GDApi:
+    """Wrapper for GoodData REST API client."""
+
+    # TODO: also defined in restore.py, consider moving to utils
+    def __init__(self, host: str, api_token: str, headers=None):
+        self.endpoint = self._handle_endpoint(host)
+        self.api_token = api_token
+        self.headers = headers if headers else {}
+        self.wait_api_time = 10
+
+    @staticmethod
+    def _handle_endpoint(host: str) -> str:
+        """Ensures that the endpoint URL is correctly formatted."""
+        return (
+            f"{host}api/{API_VERSION}"
+            if host[-1] == "/"
+            else f"{host}/api/{API_VERSION}"
+        )
+
+    def get(
+        self,
+        path: str,
+        params,
+        ok_code: int = 200,
+        not_found_code: int = 404,
+    ) -> MaybeResponse:
+        """Sends a GET request to the GoodData API."""
+        kwargs = self._prepare_request(path, params)
+        logger.debug(f"GET request: {json.dumps(kwargs)}")
+        response = requests.get(**kwargs)
+        return self._resolve_return_code(
+            response, ok_code, kwargs["url"], "RestApi.get", not_found_code
+        )
+
+    def _prepare_request(self, path: str, params=None) -> dict[str, Any]:
+        """Prepares the request to be sent to the GoodData API."""
+        kwargs: dict[str, Any] = {
+            "url": f"{self.endpoint}/{path}",
+            "headers": self.headers.copy(),
+        }
+        if params:
+            kwargs["params"] = params
+        if self.api_token:
+            kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}"
+        else:
+            raise RuntimeError(
+                "Token required for authentication against GD API is missing."
+            )
+        # TODO - Currently no credentials validation
+        # TODO - do we also support username+pwd auth? Or do we enforce token only?
+        # else:
+        #     kwargs['auth'] = (self.user, self.password) if self.user is not None else None  # noqa
+        return kwargs
+
+    @staticmethod
+    def _resolve_return_code(
+        response, ok_code: int, url, method, not_found_code: int | None = None
+    ) -> MaybeResponse:
+        """Resolves the return code of the response."""
+        if response.status_code == ok_code:
+            logger.debug(f"{method} to {url} succeeded")
+            return response
+        if not_found_code and response.status_code == not_found_code:
+            logger.debug(f"{method} to {url} failed - target not found")
+            return None
+        raise GoodDataRestApiError(
+            f"{method} to {url} failed - "
+            f"response_code={response.status_code} message={response.text}"
+        )
diff --git a/scripts/utils/logger.py b/scripts/utils/logger.py
new file mode 100644
index 0000000..576fa3c
--- /dev/null
+++ b/scripts/utils/logger.py
@@ -0,0 +1,22 @@
+import logging
+
+
+class LevelFormatter(logging.Formatter):
+    BASE_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
+    FORMATS = {
+        logging.WARNING: "\033[33m%(asctime)s [%(levelname)s] %(message)s\033[00m",
+        logging.ERROR: "\033[31m%(asctime)s [%(levelname)s] %(message)s\033[00m",
+    }
+
+    def format(self, record):
+        fmt = self.FORMATS.get(record.levelno, self.BASE_FORMAT)
+        formatter = logging.Formatter(fmt)
+        return formatter.format(record)
+
+
+logger = logging.getLogger(__name__)
+logging.getLogger(__name__).setLevel(logging.INFO)
+logger.setLevel(logging.INFO)
+ch = logging.StreamHandler()
+ch.setFormatter(LevelFormatter())
+logger.addHandler(ch)
diff --git a/scripts/utils/models/__init__.py b/scripts/utils/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/utils/models/workspace_response.py b/scripts/utils/models/workspace_response.py
new file mode 100644
index 0000000..f6a3d8e
--- /dev/null
+++ b/scripts/utils/models/workspace_response.py
@@ -0,0 +1,33 @@
+from pydantic import BaseModel  # type: ignore[import] # missing type stub
+
+
+class Page(BaseModel):
+    size: int
+    totalElements: int
+    totalPages: int
+    number: int
+
+
+class Hierarchy(BaseModel):
+    childrenCount: int
+
+
+class Meta(BaseModel):
+    page: Page | None = None
+    hierarchy: Hierarchy | None = None
+
+
+class Workspace(BaseModel):
+    id: str
+    meta: Meta | None = None
+
+
+class Links(BaseModel):
+    self: str
+    next: str | None = None
+
+
+class WorkspaceResponse(BaseModel):
+    data: list[Workspace]
+    links: Links
+    meta: Meta
diff --git a/tests/__init__.py b/tests/__init__.py
index 332df81..37d863d 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1 +1 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
diff --git a/tests/data/backup/mock_values.py b/tests/data/backup/mock_values.py
deleted file mode 100644
index b74f974..0000000
--- a/tests/data/backup/mock_values.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from gooddata_sdk.catalog.identifier import CatalogWorkspaceIdentifier
-from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import (
-    CatalogDeclarativeWorkspace,
-    CatalogDeclarativeWorkspaces,
-)
-
-NO_CHILDREN_RETURN_VALUE = CatalogDeclarativeWorkspaces(
-    workspaces=[
-        CatalogDeclarativeWorkspace(
-            id="ws_id",
-            name="ws_name",
-            parent=CatalogWorkspaceIdentifier(id="recognized_parent_id"),
-        )
-    ],
-    workspace_data_filters=[],
-)
-
-WORKSPACE_HIERARCHY = CatalogDeclarativeWorkspaces(
-    workspaces=[
-        CatalogDeclarativeWorkspace(
-            id="parent",
-            name="parent",
-            parent=None,
-        ),
-        CatalogDeclarativeWorkspace(
-            id="direct_child",
-            name="direct_child",
-            parent=CatalogWorkspaceIdentifier(id="parent"),
-        ),
-        CatalogDeclarativeWorkspace(
-            id="indirect_child",
-            name="indirect_child",
-            parent=CatalogWorkspaceIdentifier(id="direct_child"),
-        ),
-        CatalogDeclarativeWorkspace(
-            id="another_direct_child",
-            name="another_direct_child",
-            parent=CatalogWorkspaceIdentifier(id="parent"),
-        ),
-        CatalogDeclarativeWorkspace(
-            id="another_indirect_child",
-            name="another_indirect_child",
-            parent=CatalogWorkspaceIdentifier(id="another_direct_child"),
-        ),
-        CatalogDeclarativeWorkspace(
-            id="unrelated_workspace",
-            name="unrelated_workspace",
-            parent=None,
-        ),
-        CatalogDeclarativeWorkspace(
-            id="another_unrelated_workspace",
-            name="another_unrelated_workspace",
-            parent=None,
-        ),
-    ],
-    workspace_data_filters=[],
-)
diff --git a/tests/test_backup.py b/tests/test_backup.py
index fb5a00a..61daece 100644
--- a/tests/test_backup.py
+++ b/tests/test_backup.py
@@ -1,4 +1,11 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
+import os
+import sys
+
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts"))
+)
+
 import argparse
 import os
 import shutil
@@ -10,13 +17,9 @@
 import boto3
 import pytest
 from gooddata_sdk import GoodDataSdk  # type: ignore[import]
-from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import (
-    CatalogDeclarativeWorkspaces,
-)
 from moto import mock_s3
 
-from scripts import backup
-from tests.data.backup.mock_values import NO_CHILDREN_RETURN_VALUE, WORKSPACE_HIERARCHY
+import scripts.backup as backup
 
 LOGGER_NAME = "scripts.backup"
 MOCK_DL_TARGET = Path("overlays.zip")
@@ -172,83 +175,6 @@ def test_wrong_input_type_raises_error():
         backup.validate_args(args)
 
 
-@mock.patch.object(
-    MOCK_SDK.catalog_workspace,
-    "get_declarative_workspaces",
-    return_value=CatalogDeclarativeWorkspaces(workspaces=[], workspace_data_filters=[]),
-)
-def test_get_workspaces_to_backup_empty_org(mock_get_declarative_workspaces):
-    with pytest.raises(RuntimeError, match="No workspaces found in the organization."):
-        backup.get_workspaces_to_backup(
-            "tests/data/backup/test_exports/services",
-            "services",
-            MOCK_SDK,
-        )
-
-
-@mock.patch(
-    "scripts.backup.read_csv_input_for_backup", return_value=["unrecognized_parent_id"]
-)
-@mock.patch.object(
-    MOCK_SDK.catalog_workspace,
-    "get_declarative_workspaces",
-    return_value=NO_CHILDREN_RETURN_VALUE,
-)
-def test_get_workspaces_to_backup_no_children(
-    mock_get_declarative_workspaces, mock_read_csv_input
-):
-    with pytest.raises(
-        RuntimeError,
-        match="No child workspaces found for the provided list of parents.",
-    ):
-        backup.get_workspaces_to_backup(
-            "list-of-parents",
-            "some-csv-file.csv",
-            MOCK_SDK,
-        )
-
-
-@mock.patch(
-    "scripts.backup.read_csv_input_for_backup", return_value=["unrecognized_parent_id"]
-)
-@mock.patch.object(
-    MOCK_SDK.catalog_workspace,
-    "get_declarative_workspaces",
-    return_value=NO_CHILDREN_RETURN_VALUE,
-)
-def test_get_workspaces_to_backup_wrong_input_type(
-    mock_get_declarative_workspaces, mock_read_csv_input
-):
-    with pytest.raises(RuntimeError, match="Invalid input type provided."):
-        backup.get_workspaces_to_backup(
-            "invalid-input-type",
-            "some-csv-file.csv",
-            MOCK_SDK,
-        )
-
-
-def test_get_recursive_children():
-    workspaces = WORKSPACE_HIERARCHY.workspaces
-    workspace_id = "parent"
-    expected_children = [
-        "direct_child",
-        "indirect_child",
-        "another_direct_child",
-        "another_indirect_child",
-    ]
-
-    result = backup.get_recursive_children(workspaces, workspace_id)
-
-    assert sorted(result) == sorted(expected_children)
-
-
-def test_get_recursive_children_empty_workspaces():
-    workspaces: list = []
-    workspace_id = "parent"
-    result = backup.get_recursive_children(workspaces, workspace_id)
-    assert result == []
-
-
 def test_get_s3_storage():
     s3_storage_type = backup.get_storage("s3")
     assert s3_storage_type == backup.S3Storage
@@ -309,7 +235,7 @@ def test_archive_gooddata_layouts_to_zip():
         assert zip_exists
 
 
-@mock.patch("scripts.backup.requests", new_callable=mock_requests)
+@mock.patch("utils.gd_api.requests", new_callable=mock_requests)
 def test_get_user_data_filters_normal_response(requests):
     api = backup.GDApi("some.host.com", "token")
 
@@ -421,52 +347,3 @@ def test_file_upload(s3, s3_bucket):
         S3_BUCKET,
         "some/s3/backup/path/org_id/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml",
     ).load()
-
-
-def test_read_csv_input_empty_file() -> None:
-    """Test with an empty CSV file."""
-    with tempfile.NamedTemporaryFile() as temp_csv:
-        path_to_csv = temp_csv.name
-        with pytest.raises(ValueError, match="No content found in the CSV file."):
-            backup.read_csv_input_for_backup(path_to_csv)
-
-
-def test_read_csv_input_only_header() -> None:
-    """Test with a CSV file that contains only the header."""
-    with tempfile.NamedTemporaryFile() as temp_csv:
-        temp_csv.write(b"header1\n")
-        temp_csv.flush()
-        temp_csv.seek(0)
-        path_to_csv = temp_csv.name
-        with pytest.raises(ValueError, match="No workspaces found in the CSV file."):
-            backup.read_csv_input_for_backup(path_to_csv)
-
-
-def test_read_csv_input_valid() -> None:
-    """Test with a valid CSV file."""
-    with tempfile.NamedTemporaryFile(delete=False) as temp_csv:
-        temp_csv.write(b"header1\n")
-        temp_csv.write(b"workspace1\n")
-        temp_csv.write(b"workspace2\n")
-        temp_csv.flush()
-        temp_csv.seek(0)
-        path_to_csv = temp_csv.name
-        result = backup.read_csv_input_for_backup(path_to_csv)
-        assert result == ["workspace1", "workspace2"]
-    os.remove(path_to_csv)
-
-
-def test_read_csv_input_too_many_columns() -> None:
-    """Test with a CSV file that contains too many columns."""
-    with tempfile.NamedTemporaryFile(delete=False) as temp_csv:
-        temp_csv.write(b"header1,header2\n")
-        temp_csv.write(b"workspace1,extra_column\n")
-        temp_csv.flush()
-        temp_csv.seek(0)
-        path_to_csv = temp_csv.name
-        with pytest.raises(
-            ValueError,
-            match="Input file contains more than one column. Please check the input and try again.",
-        ):
-            backup.read_csv_input_for_backup(path_to_csv)
-    os.remove(path_to_csv)
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_utils/test_backup_utils/__init__.py b/tests/test_utils/test_backup_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_utils/test_backup_utils/test_input_loader.py b/tests/test_utils/test_backup_utils/test_input_loader.py
new file mode 100644
index 0000000..5282d40
--- /dev/null
+++ b/tests/test_utils/test_backup_utils/test_input_loader.py
@@ -0,0 +1,204 @@
+# (C) 2025 GoodData Corporation
+import os
+import sys
+
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../scripts"))
+)
+
+import tempfile
+
+import pytest
+
+from scripts.utils.backup_utils.input_loader import InputLoader
+from scripts.utils.gd_api import GDApi
+from scripts.utils.models.workspace_response import (
+    Hierarchy,
+    Links,
+    Meta,
+    Page,
+    Workspace,
+    WorkspaceResponse,
+)
+
+MOCK_GDP_API = GDApi(
+    host="https://fake.host/",
+    api_token="fake_token",
+)
+
+# MOCK_INPUT_LOADER = backup.InputLoader(MOCK_GD_API, 100)
+
+
+@pytest.fixture
+def input_loader():
+    loader = InputLoader(MOCK_GDP_API, page_size=2)
+    loader.hierarchy_endpoint = "/fake/hierarchy?filter=parent.id=={parent_id}"
+    loader.all_workspaces_endpoint = "/fake/all"
+    return loader
+
+
+def test_process_data_extracts_children_and_subparents():
+    ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=2)))
+    ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0)))
+    ws3 = Workspace(id="ws3", meta=None)
+
+    result = InputLoader.process_data([ws1, ws2, ws3])
+    assert result.workspace_ids == ["ws1", "ws2", "ws3"]
+    assert result.sub_parents == ["ws1"]
+
+
+def test_log_paging_progress_logs_info(mocker):
+    response = WorkspaceResponse(
+        data=[],
+        meta=Meta(
+            page=Page(size=5, totalElements=25, number=1, totalPages=5), hierarchy=None
+        ),
+        links=Links(self="self", next="next"),
+    )
+
+    mock_logger = mocker.patch("scripts.utils.logger.logger.info")
+    InputLoader.log_paging_progress(response)
+    mock_logger.assert_called_once
+
+
+def test_log_paging_progress_no_page(mocker):
+    response = WorkspaceResponse(
+        data=[],
+        meta=Meta(page=None, hierarchy=None),
+        links=Links(self="self", next="next"),
+    )
+
+    mock_logger = mocker.patch("scripts.utils.logger.logger.info")
+    InputLoader.log_paging_progress(response)
+    assert mock_logger.call_count == 0
+
+
+def test_paginate_calls_fetch_page_and_process_data(input_loader, monkeypatch):
+    ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=1)))
+    ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0)))
+    links1 = Links(self="self", next="next_url")
+    links2 = Links(self="self", next=None)
+    resp1 = WorkspaceResponse(
+        data=[ws1], meta=Meta(hierarchy=None, page=None), links=links1
+    )
+    resp2 = WorkspaceResponse(
+        data=[ws2], meta=Meta(hierarchy=None, page=None), links=links2
+    )
+
+    fetch_page_calls = []
+
+    def fetch_page_side_effect(url):
+        fetch_page_calls.append(url)
+        return resp1 if len(fetch_page_calls) == 1 else resp2
+
+    input_loader.fetch_page = fetch_page_side_effect
+
+    process_data_calls = []
+
+    def process_data_side_effect(data):
+        process_data_calls.append(data)
+        if len(process_data_calls) == 1:
+            return InputLoader._ProcessDataOutput(["ws1"], ["ws1"])
+        else:
+            return InputLoader._ProcessDataOutput(["ws2"], [])
+
+    monkeypatch.setattr(
+        InputLoader, "process_data", staticmethod(process_data_side_effect)
+    )
+    monkeypatch.setattr(
+        InputLoader, "log_paging_progress", staticmethod(lambda resp: None)
+    )
+
+    result = input_loader._paginate("first_url")
+    assert len(result) == 2
+    assert result[0].workspace_ids == ["ws1"]
+    assert result[1].workspace_ids == ["ws2"]
+    assert len(fetch_page_calls) == 2
+    assert len(process_data_calls) == 2
+
+
+def test_get_hierarchy_recurses(input_loader, monkeypatch):
+    def fake_paginate(url):
+        if "p1" in url:
+            return [InputLoader._ProcessDataOutput(["c1"], ["c1"])]
+        if "c1" in url:
+            return [InputLoader._ProcessDataOutput(["c2"], [])]
+        return []
+
+    input_loader._paginate = fake_paginate
+    monkeypatch.setattr(
+        "scripts.utils.backup_utils.input_loader.logger",
+        type("Logger", (), {"info": lambda self, msg: None})(),
+    )
+    result = input_loader.get_hierarchy("p1")
+    assert set(result) == {"c1", "c2"}
+
+
+def test_get_workspaces_to_backup_empty_org(input_loader, monkeypatch):
+    monkeypatch.setattr(
+        input_loader,
+        "get_all_workspaces",
+        lambda: [],
+    )
+    with pytest.raises(RuntimeError, match="No workspaces found in the organization."):
+        input_loader.get_ids_to_backup(
+            "entire-organization",
+            "some-csv-file.csv",
+        )
+
+
+def test_get_workspaces_to_backup_wrong_input_type(input_loader):
+    with pytest.raises(RuntimeError, match="Invalid input type provided."):
+        input_loader.get_ids_to_backup(
+            "invalid-input-type",
+            "some-csv-file.csv",
+        )
+
+
+def test_read_csv_input_empty_file(input_loader) -> None:
+    """Test with an empty CSV file."""
+    with tempfile.NamedTemporaryFile() as temp_csv:
+        path_to_csv = temp_csv.name
+        with pytest.raises(ValueError, match="No content found in the CSV file."):
+            input_loader.read_csv_input_for_backup(path_to_csv)
+
+
+def test_read_csv_input_only_header(input_loader) -> None:
+    """Test with a CSV file that contains only the header."""
+    with tempfile.NamedTemporaryFile() as temp_csv:
+        temp_csv.write(b"header1\n")
+        temp_csv.flush()
+        temp_csv.seek(0)
+        path_to_csv = temp_csv.name
+        with pytest.raises(ValueError, match="No workspaces found in the CSV file."):
+            input_loader.read_csv_input_for_backup(path_to_csv)
+
+
+def test_read_csv_input_valid(input_loader) -> None:
+    """Test with a valid CSV file."""
+    with tempfile.NamedTemporaryFile(delete=False) as temp_csv:
+        temp_csv.write(b"header1\n")
+        temp_csv.write(b"workspace1\n")
+        temp_csv.write(b"workspace2\n")
+        temp_csv.flush()
+        temp_csv.seek(0)
+        path_to_csv = temp_csv.name
+        result = input_loader.read_csv_input_for_backup(path_to_csv)
+        assert result == ["workspace1", "workspace2"]
+    os.remove(path_to_csv)
+
+
+def test_read_csv_input_too_many_columns(input_loader) -> None:
+    """Test with a CSV file that contains too many columns."""
+    with tempfile.NamedTemporaryFile(delete=False) as temp_csv:
+        temp_csv.write(b"header1,header2\n")
+        temp_csv.write(b"workspace1,extra_column\n")
+        temp_csv.flush()
+        temp_csv.seek(0)
+        path_to_csv = temp_csv.name
+        with pytest.raises(
+            ValueError,
+            match="Input file contains more than one column. Please check the input and try again.",
+        ):
+            input_loader.read_csv_input_for_backup(path_to_csv)
+    os.remove(path_to_csv)

From 38667eea04b1c18f11eeb02c0b9d97bb687b6682 Mon Sep 17 00:00:00 2001
From: janmatzek <jan.matzek@gmail.com>
Date: Thu, 22 May 2025 14:16:50 +0200
Subject: [PATCH 2/3] chore: update (c) year

---
 LICENCE                                    | 2 +-
 scripts/__init__.py                        | 2 +-
 scripts/permission_mgmt.py                 | 3 +--
 scripts/restore.py                         | 2 +-
 scripts/user_mgmt.py                       | 6 ++----
 scripts/utils/backup_utils/input_loader.py | 2 +-
 scripts/utils/gd_api.py                    | 2 +-
 tests/test_permissions.py                  | 2 +-
 tests/test_restore.py                      | 6 +++---
 tests/test_user_mgmt.py                    | 7 +++----
 10 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/LICENCE b/LICENCE
index 332301a..db896a4 100644
--- a/LICENCE
+++ b/LICENCE
@@ -1,6 +1,6 @@
 BSD License
 
-Copyright (c) 2023-2024, GoodData Corporation. All rights reserved.
+Copyright (c) 2023-2025, GoodData Corporation. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, are permitted, provided that the following conditions are met:
 
diff --git a/scripts/__init__.py b/scripts/__init__.py
index 332df81..37d863d 100644
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
@@ -1 +1 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
diff --git a/scripts/permission_mgmt.py b/scripts/permission_mgmt.py
index 41151f0..6275d3a 100644
--- a/scripts/permission_mgmt.py
+++ b/scripts/permission_mgmt.py
@@ -1,4 +1,4 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import argparse
 import csv
 import logging
@@ -11,7 +11,6 @@
 import gooddata_sdk as gd_sdk
 from gooddata_api_client.exceptions import NotFoundException
 
-
 USER_TYPE = "user"
 USER_GROUP_TYPE = "userGroup"
 
diff --git a/scripts/restore.py b/scripts/restore.py
index 675930d..9914f59 100644
--- a/scripts/restore.py
+++ b/scripts/restore.py
@@ -1,4 +1,4 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import abc
 import argparse
 import csv
diff --git a/scripts/user_mgmt.py b/scripts/user_mgmt.py
index caa6923..059bf30 100644
--- a/scripts/user_mgmt.py
+++ b/scripts/user_mgmt.py
@@ -1,4 +1,4 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import argparse
 import csv
 import logging
@@ -228,9 +228,7 @@ def create_clients(args: argparse.Namespace) -> gd_sdk.GoodDataSdk:
 
     profile_config, profile = args.profile_config, args.profile
     if os.path.exists(profile_config):
-        logger.info(
-            f"Using GoodData profile {profile} " f"sourced from {profile_config}."
-        )
+        logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.")
         sdk = gd_sdk.GoodDataSdk.create_from_profile(profile, profile_config)
         return sdk
 
diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py
index 62c4b49..e636c62 100644
--- a/scripts/utils/backup_utils/input_loader.py
+++ b/scripts/utils/backup_utils/input_loader.py
@@ -1,4 +1,4 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import csv
 from dataclasses import dataclass
 from typing import Iterator
diff --git a/scripts/utils/gd_api.py b/scripts/utils/gd_api.py
index de0578d..56a5450 100644
--- a/scripts/utils/gd_api.py
+++ b/scripts/utils/gd_api.py
@@ -1,4 +1,4 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 
 import json
 from typing import Any, TypeAlias
diff --git a/tests/test_permissions.py b/tests/test_permissions.py
index bdd9c69..4d50c5f 100644
--- a/tests/test_permissions.py
+++ b/tests/test_permissions.py
@@ -1,4 +1,4 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import argparse
 from unittest import mock
 
diff --git a/tests/test_restore.py b/tests/test_restore.py
index f92a4ee..f014ea3 100644
--- a/tests/test_restore.py
+++ b/tests/test_restore.py
@@ -1,16 +1,16 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import argparse
+import json
 import logging
 import os
 import tempfile
-import json
 from pathlib import Path
 from unittest import mock
 
 import boto3
+import gooddata_sdk as gd_sdk
 import pytest
 from moto import mock_s3
-import gooddata_sdk as gd_sdk
 
 from scripts import restore
 
diff --git a/tests/test_user_mgmt.py b/tests/test_user_mgmt.py
index fbe811e..4d318ee 100644
--- a/tests/test_user_mgmt.py
+++ b/tests/test_user_mgmt.py
@@ -1,16 +1,15 @@
-# (C) 2023 GoodData Corporation
+# (C) 2025 GoodData Corporation
 import argparse
 from dataclasses import dataclass
-from unittest import mock
 from typing import Any, Optional
+from unittest import mock
 
-import pytest
 import gooddata_sdk as gd_sdk
+import pytest
 from gooddata_api_client.exceptions import NotFoundException
 
 from scripts import user_mgmt
 
-
 TEST_CSV_PATH = "tests/data/user_mgmt/input.csv"
 
 

From 9326798b3905038b508e443107a14eb517c02666 Mon Sep 17 00:00:00 2001
From: janmatzek <jan.matzek@gmail.com>
Date: Fri, 23 May 2025 15:27:39 +0200
Subject: [PATCH 3/3] fixup: use snake_case and aliases in workspace model, add
 todos

---
 scripts/utils/backup_utils/input_loader.py    |  8 +++++--
 scripts/utils/models/workspace_response.py    | 24 +++++++++++++++----
 tests/test_backup.py                          |  2 +-
 .../test_backup_utils/test_input_loader.py    | 11 +++++----
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py
index e636c62..3f0df38 100644
--- a/scripts/utils/backup_utils/input_loader.py
+++ b/scripts/utils/backup_utils/input_loader.py
@@ -98,7 +98,7 @@ def process_data(data: list[Workspace]) -> _ProcessDataOutput:
 
             # if hierarchy is present and has children, append child workspace ID to sub_parents
             if workspace.meta and workspace.meta.hierarchy:
-                if workspace.meta.hierarchy.childrenCount > 0:
+                if workspace.meta.hierarchy.children_count > 0:
                     sub_parents.append(workspace.id)
         return InputLoader._ProcessDataOutput(children, sub_parents)
 
@@ -110,7 +110,7 @@ def log_paging_progress(response: WorkspaceResponse) -> None:
 
         if response.meta.page:
             current_page = response.meta.page.number + 1
-            total_pages = response.meta.page.totalPages
+            total_pages = response.meta.page.total_pages
         else:
             current_page = None
             total_pages = None
@@ -149,6 +149,10 @@ def get_hierarchy(self, parent_id: str) -> list[str]:
 
     def get_all_workspaces(self) -> list[str]:
         """Returns a list of all workspace IDs in the organization."""
+        # TODO: can be optimized - requests can be sent asynchronously.
+        # Use the total number of pages to calculate the number of requests
+        # to be sent. Use semaphore or otherwise limit the number of concurrent
+        # requests to avoid putting too much load on the server.
         logger.info("Fetching all workspaces")
         url = self.all_workspaces_endpoint
 
diff --git a/scripts/utils/models/workspace_response.py b/scripts/utils/models/workspace_response.py
index f6a3d8e..9a7ae99 100644
--- a/scripts/utils/models/workspace_response.py
+++ b/scripts/utils/models/workspace_response.py
@@ -1,15 +1,31 @@
-from pydantic import BaseModel  # type: ignore[import] # missing type stub
+from pydantic import (  # type: ignore[import] # missing type stub
+    BaseModel,
+    ConfigDict,
+)
+from pydantic.alias_generators import (  # type: ignore[import] # missing type stub
+    to_camel,
+)
 
 
 class Page(BaseModel):
     size: int
-    totalElements: int
-    totalPages: int
+    total_elements: int
+    total_pages: int
     number: int
 
+    model_config = ConfigDict(
+        alias_generator=to_camel,
+        populate_by_name=True,
+    )
+
 
 class Hierarchy(BaseModel):
-    childrenCount: int
+    children_count: int
+
+    model_config = ConfigDict(
+        alias_generator=to_camel,
+        populate_by_name=True,
+    )
 
 
 class Meta(BaseModel):
diff --git a/tests/test_backup.py b/tests/test_backup.py
index 61daece..43b7b48 100644
--- a/tests/test_backup.py
+++ b/tests/test_backup.py
@@ -16,7 +16,7 @@
 
 import boto3
 import pytest
-from gooddata_sdk import GoodDataSdk  # type: ignore[import]
+from gooddata_sdk.sdk import GoodDataSdk
 from moto import mock_s3
 
 import scripts.backup as backup
diff --git a/tests/test_utils/test_backup_utils/test_input_loader.py b/tests/test_utils/test_backup_utils/test_input_loader.py
index 5282d40..a520e88 100644
--- a/tests/test_utils/test_backup_utils/test_input_loader.py
+++ b/tests/test_utils/test_backup_utils/test_input_loader.py
@@ -38,8 +38,8 @@ def input_loader():
 
 
 def test_process_data_extracts_children_and_subparents():
-    ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=2)))
-    ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0)))
+    ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(children_count=2)))
+    ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(children_count=0)))
     ws3 = Workspace(id="ws3", meta=None)
 
     result = InputLoader.process_data([ws1, ws2, ws3])
@@ -51,7 +51,8 @@ def test_log_paging_progress_logs_info(mocker):
     response = WorkspaceResponse(
         data=[],
         meta=Meta(
-            page=Page(size=5, totalElements=25, number=1, totalPages=5), hierarchy=None
+            page=Page(size=5, total_elements=25, number=1, total_pages=5),
+            hierarchy=None,
         ),
         links=Links(self="self", next="next"),
     )
@@ -74,8 +75,8 @@ def test_log_paging_progress_no_page(mocker):
 
 
 def test_paginate_calls_fetch_page_and_process_data(input_loader, monkeypatch):
-    ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(childrenCount=1)))
-    ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(childrenCount=0)))
+    ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(children_count=1)))
+    ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(children_count=0)))
     links1 = Links(self="self", next="next_url")
     links2 = Links(self="self", next=None)
     resp1 = WorkspaceResponse(