Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENCE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BSD License

Copyright (c) 2023-2024, GoodData Corporation. All rights reserved.
Copyright (c) 2023-2025, GoodData Corporation. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted, provided that the following conditions are met:

Expand Down
3 changes: 3 additions & 0 deletions docs/BACKUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,15 @@ python scripts/backup.py input.csv conf.yaml -p path/to/profiles.yaml --profile
## Configuration file (conf)
The configuration files let you define which type of storage the export tool will save the backups to, and any additional storage-specific information that might be required. Currently AWS S3 and Local storage are supported.

If you run the script with `list-of-parents` or `entire-organization`, the script will fetch the IDs of workspaces to process (either hierarchies under the specified parents or all the workspaces within the organization) in batches. As a default, the batch size is set to `100`, but you can parametrize it by setting the `api_page_size` parametter in your configuration yaml.

The configuration file has the following format:
```yaml
storage_type: some_storage
storage:
arg1: foo
arg2: bar
api_page_size: 1000
```

### AWS S3
Expand Down
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pytest~=7.3.2
moto~=4.1.11
pytest-mock==3.14.0
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
boto3==1.37.21
gooddata_sdk==1.39.0
requests==2.32.0
pydantic==2.11.3
2 changes: 1 addition & 1 deletion scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# (C) 2023 GoodData Corporation
# (C) 2025 GoodData Corporation
227 changes: 16 additions & 211 deletions scripts/backup.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,53 @@
# (C) 2023 GoodData Corporation
# (C) 2025 GoodData Corporation
import abc
import argparse
import csv
import datetime
import json
import logging
import os
import shutil
import tempfile
from pathlib import Path
from typing import Any, Iterator, Optional, Type, TypeAlias
from typing import Any, Type

import boto3 # type: ignore[import]
import requests
import yaml
from gooddata_api_client.exceptions import NotFoundException # type: ignore[import]
from gooddata_sdk import GoodDataSdk # type: ignore[import]
from gooddata_api_client.exceptions import NotFoundException
from gooddata_sdk import __version__ as sdk_version # type: ignore[import]
from gooddata_sdk.catalog.workspace.declarative_model.workspace.automation import (
CatalogDeclarativeAutomation,
)
from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import (
CatalogDeclarativeWorkspace,
CatalogDeclarativeWorkspaces,
from gooddata_sdk.sdk import GoodDataSdk # type: ignore[import]
from utils.backup_utils.input_loader import InputLoader # type: ignore[import]
from utils.gd_api import ( # type: ignore[import]
BEARER_TKN_PREFIX,
GDApi,
GoodDataRestApiError,
)
from utils.logger import logger # type: ignore[import]

TIMESTAMP_SDK_FOLDER = (
str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
+ "-"
+ sdk_version.replace(".", "_")
)

API_VERSION = "v1"
BEARER_TKN_PREFIX = "Bearer"
PROFILES_FILE = "profiles.yaml"
PROFILES_DIRECTORY = ".gooddata"
PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE

FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
logger = logging.getLogger(__name__)
logging.getLogger(__name__).setLevel(logging.INFO)
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setFormatter(logging.Formatter(fmt=FORMAT))
logger.addHandler(ch)

LAYOUTS_DIR = "gooddata_layouts"
LDM_DIR = "ldm"


class GoodDataRestApiError(Exception):
"""Wrapper for errors occurring from interaction with GD REST API."""
API_PAGE_SIZE = 100


# TODO: consider moving storage related logic to a separate module and reuse it in restore
class BackupRestoreConfig:
def __init__(self, conf_path: str):
with open(conf_path, "r") as stream:
conf = yaml.safe_load(stream)
self.storage_type = conf["storage_type"]
self.storage = conf["storage"]
self.api_page_size = conf.get("api_page_size", API_PAGE_SIZE)


class BackupStorage(abc.ABC):
Expand Down Expand Up @@ -125,79 +114,6 @@ def export(self, folder, org_id, export_folder="local_backups"):
)


MaybeResponse: TypeAlias = Optional[requests.Response]


class GDApi:
"""Wrapper for GoodData REST API client."""

def __init__(self, host: str, api_token: str, headers=None):
self.endpoint = self._handle_endpoint(host)
self.api_token = api_token
self.headers = headers if headers else {}
self.wait_api_time = 10

@staticmethod
def _handle_endpoint(host: str) -> str:
"""Ensures that the endpoint URL is correctly formatted."""
return (
f"{host}api/{API_VERSION}"
if host[-1] == "/"
else f"{host}/api/{API_VERSION}"
)

def get(
self,
path: str,
params,
ok_code: int = 200,
not_found_code: int = 404,
) -> MaybeResponse:
"""Sends a GET request to the GoodData API."""
kwargs = self._prepare_request(path, params)
logger.debug(f"GET request: {json.dumps(kwargs)}")
response = requests.get(**kwargs)
return self._resolve_return_code(
response, ok_code, kwargs["url"], "RestApi.get", not_found_code
)

def _prepare_request(self, path: str, params=None) -> dict[str, Any]:
"""Prepares the request to be sent to the GoodData API."""
kwargs: dict[str, Any] = {
"url": f"{self.endpoint}/{path}",
"headers": self.headers.copy(),
}
if params:
kwargs["params"] = params
if self.api_token:
kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}"
else:
raise RuntimeError(
"Token required for authentication against GD API is missing."
)
# TODO - Currently no credentials validation
# TODO - do we also support username+pwd auth? Or do we enforce token only?
# else:
# kwargs['auth'] = (self.user, self.password) if self.user is not None else None # noqa
return kwargs

@staticmethod
def _resolve_return_code(
response, ok_code: int, url, method, not_found_code: Optional[int] = None
) -> MaybeResponse:
"""Resolves the return code of the response."""
if response.status_code == ok_code:
logger.debug(f"{method} to {url} succeeded")
return response
if not_found_code and response.status_code == not_found_code:
logger.debug(f"{method} to {url} failed - target not found")
return None
raise GoodDataRestApiError(
f"{method} to {url} failed - "
f"response_code={response.status_code} message={response.text}"
)


def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi:
"""Creates a GoodData API client from the specified profile."""
with open(profile_config, "r") as file:
Expand Down Expand Up @@ -347,31 +263,6 @@ def store_automations(api: GDApi, export_path: Path, org_id: str, ws_id: str) ->
json.dump(automations, f)


def store_declarative_automations(
sdk: GoodDataSdk, export_path: Path, org_id: str, ws_id: str
) -> None:
"""Stores the declarative automations in the specified export path."""
# TODO: Currently not working because of a bug in the SDK. There is an alternative way to
# get the automations from the API, which is implemented here, but it will be better to use
# the SDK method once the bug is fixed.

# Construct path to automations folder to put it in the same subfolder as the analytics model
automations_path: Path = Path(
export_path, "gooddata_layouts", org_id, "workspaces", ws_id, "automations"
)
os.mkdir(automations_path)

# Get the automations via the SDK
automations: list[CatalogDeclarativeAutomation] = (
sdk.catalog_workspace.get_declarative_automations(ws_id)
)

# Store the automations
for automation in automations:
with open(f"{automations_path}/{automation.id}.yaml", "w") as f:
f.write(yaml.dump(automation.to_dict()))


def store_declarative_filter_views(
sdk: GoodDataSdk, export_path: Path, org_id: str, ws_id: str
) -> None:
Expand All @@ -393,93 +284,6 @@ def store_declarative_filter_views(
)


def read_csv_input_for_backup(file_path: str) -> list[str]:
"""Reads the input CSV file and returns its content from the first column as a list of string."""

with open(file_path) as csv_file:
reader: Iterator[list[str]] = csv.reader(csv_file, skipinitialspace=True)

try:
# Skip the header
headers = next(reader)

if len(headers) > 1:
raise ValueError(
"Input file contains more than one column. Please check the input and try again."
)

except StopIteration:
# Raise an error if the iterator is empty
raise ValueError("No content found in the CSV file.")

# Read the content
content = [row[0] for row in reader]

# If the content is empty (no rows), raise an error
if not content:
raise ValueError("No workspaces found in the CSV file.")

return content


def get_recursive_children(
all_workspaces: list[CatalogDeclarativeWorkspace], parent_id: str
) -> list[str]:
"""Recursively gets the children of the specified parent workspace."""
children = []
for workspace in all_workspaces:
if workspace.parent and workspace.parent.id == parent_id:
children.append(workspace.id)
children.extend(get_recursive_children(all_workspaces, workspace.id))

return children


def get_workspaces_to_backup(
input_type: str, path_to_csv: str, sdk: GoodDataSdk
) -> list[str]:
"""Returns the list of workspace IDs to back up based on the input type."""
if input_type == "list-of-workspaces":
return read_csv_input_for_backup(path_to_csv)

else:
declarative_workspaces: CatalogDeclarativeWorkspaces = (
sdk.catalog_workspace.get_declarative_workspaces()
)

workspaces: list[CatalogDeclarativeWorkspace] = (
declarative_workspaces.workspaces
)

if not workspaces:
raise RuntimeError("No workspaces found in the organization.")

if input_type == "list-of-parents":
list_of_parents = read_csv_input_for_backup(path_to_csv)
list_of_children: list[str] = []

for parent in list_of_parents:
list_of_children.extend(get_recursive_children(workspaces, parent))

if not list_of_children:
raise RuntimeError(
"No child workspaces found for the provided list of parents."
)

# Include the parent workspaces in the backup
return list_of_parents + list_of_children

if input_type == "entire-organization":
list_of_workspaces: list[str] = []

for workspace in workspaces:
list_of_workspaces.append(workspace.id)

return list_of_workspaces

raise RuntimeError("Invalid input type provided.")


def get_workspace_export(
sdk: GoodDataSdk,
api: GDApi,
Expand Down Expand Up @@ -589,8 +393,9 @@ def main(args: argparse.Namespace) -> None:
storage_class: Type[BackupStorage] = get_storage(conf.storage_type)
storage: BackupStorage = storage_class(conf)

workspaces_to_export: list[str] = get_workspaces_to_backup(
args.input_type, args.ws_csv, sdk
loader = InputLoader(api, conf.api_page_size)
workspaces_to_export: list[str] = loader.get_ids_to_backup(
args.input_type, args.ws_csv
)

with tempfile.TemporaryDirectory() as tmpdir:
Expand Down
3 changes: 1 addition & 2 deletions scripts/permission_mgmt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) 2023 GoodData Corporation
# (C) 2025 GoodData Corporation
import argparse
import csv
import logging
Expand All @@ -11,7 +11,6 @@
import gooddata_sdk as gd_sdk
from gooddata_api_client.exceptions import NotFoundException


USER_TYPE = "user"
USER_GROUP_TYPE = "userGroup"

Expand Down
Loading